diff --git a/CMakeLists.txt b/CMakeLists.txt index e037af3bc..eaee5d5e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ project(Eigen) -cmake_minimum_required(VERSION 2.8.4) +cmake_minimum_required(VERSION 2.8.5) # guard against in-source builds @@ -55,6 +55,7 @@ endif(EIGEN_HG_CHANGESET) include(CheckCXXCompilerFlag) +include(GNUInstallDirs) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) @@ -118,11 +119,7 @@ endmacro(ei_add_cxx_compiler_flag) if(NOT MSVC) # We assume that other compilers are partly compatible with GNUCC - -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions") - set(CMAKE_CXX_FLAGS_DEBUG "-g3") - set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2") - + # clang outputs some warnings for unknwon flags that are not caught by check_cxx_compiler_flag # adding -Werror turns such warnings into errors check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR) @@ -341,24 +338,29 @@ option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tens include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) -# the user modifiable install path for header files -set(EIGEN_INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} CACHE PATH "The directory where we install the header files (optional)") - -# set the internal install path for header files which depends on wether the user modifiable -# EIGEN_INCLUDE_INSTALL_DIR has been set by the user or not. +# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR if(EIGEN_INCLUDE_INSTALL_DIR) - set(INCLUDE_INSTALL_DIR - ${EIGEN_INCLUDE_INSTALL_DIR} - CACHE INTERNAL - "The directory where we install the header files (internal)" - ) + message(WARNING "EIGEN_INCLUDE_INSTALL_DIR is deprecated. Use INCLUDE_INSTALL_DIR instead.") +endif() + +if(EIGEN_INCLUDE_INSTALL_DIR AND NOT INCLUDE_INSTALL_DIR) + set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed") else() set(INCLUDE_INSTALL_DIR - "${CMAKE_INSTALL_PREFIX}/include/eigen3" - CACHE INTERNAL - "The directory where we install the header files (internal)" - ) + "${CMAKE_INSTALL_INCLUDEDIR}/eigen3" + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed" + ) endif() +set(CMAKEPACKAGE_INSTALL_DIR + "${CMAKE_INSTALL_LIBDIR}/cmake/eigen3" + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed" + ) +set(PKGCONFIG_INSTALL_DIR + "${CMAKE_INSTALL_DATADIR}/pkgconfig" + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where eigen3.pc is installed" + ) + # similar to set_target_properties but append the property instead of overwriting it macro(ei_add_target_property target prop value) @@ -377,21 +379,9 @@ install(FILES ) if(EIGEN_BUILD_PKGCONFIG) - SET(path_separator ":") - STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}") - message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib") - FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search}) - if(pkg_config_libdir) - SET(pkg_config_install_dir ${pkg_config_libdir}) - message(STATUS "found ${pkg_config_libdir}/pkgconfig" ) - else(pkg_config_libdir) - SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share) - message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" ) - endif(pkg_config_libdir) - - configure_file(eigen3.pc.in eigen3.pc) + configure_file(eigen3.pc.in eigen3.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc - DESTINATION ${pkg_config_install_dir}/pkgconfig + DESTINATION ${PKGCONFIG_INSTALL_DIR} ) endif(EIGEN_BUILD_PKGCONFIG) @@ -454,12 +444,15 @@ if(cmake_generator_tolower MATCHES "makefile") message(STATUS "--------------+--------------------------------------------------------------") message(STATUS "Command | Description") message(STATUS "--------------+--------------------------------------------------------------") - message(STATUS "make install | Install to ${CMAKE_INSTALL_PREFIX}. To change that:") - message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourpath") - message(STATUS " | Eigen headers will then be installed to:") - message(STATUS " | ${INCLUDE_INSTALL_DIR}") - message(STATUS " | To install Eigen headers to a separate location, do:") - message(STATUS " | cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath") + message(STATUS "make install | Install Eigen. Headers will be installed to:") + message(STATUS " | /") + message(STATUS " | Using the following values:") + message(STATUS " | CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") + message(STATUS " | INCLUDE_INSTALL_DIR: ${INCLUDE_INSTALL_DIR}") + message(STATUS " | Change the install location of Eigen headers using:") + message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourprefix") + message(STATUS " | Or:") + message(STATUS " | cmake . -DINCLUDE_INSTALL_DIR=yourdir") message(STATUS "make doc | Generate the API documentation, requires Doxygen & LaTeX") message(STATUS "make check | Build and run the unit-tests. Read this page:") message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests") @@ -473,21 +466,13 @@ endif() message(STATUS "") -set ( EIGEN_CONFIG_CMAKE_PATH - lib${LIB_SUFFIX}/cmake/eigen3 - CACHE PATH "The directory where the CMake files are installed" - ) -if ( NOT IS_ABSOLUTE EIGEN_CONFIG_CMAKE_PATH ) - set ( EIGEN_CONFIG_CMAKE_PATH ${CMAKE_INSTALL_PREFIX}/${EIGEN_CONFIG_CMAKE_PATH} ) -endif () -set ( EIGEN_USE_FILE ${EIGEN_CONFIG_CMAKE_PATH}/UseEigen3.cmake ) set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} ) set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD_VERSION} ) set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} ) set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} ) set ( EIGEN_DEFINITIONS "") -set ( EIGEN_INCLUDE_DIR ${INCLUDE_INSTALL_DIR} ) +set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" ) set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} ) set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} ) @@ -498,7 +483,7 @@ configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake - DESTINATION ${EIGEN_CONFIG_CMAKE_PATH} + DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} ) # Add uninstall target diff --git a/Eigen/Cholesky b/Eigen/Cholesky index dd0ca911c..705a04cc4 100644 --- a/Eigen/Cholesky +++ b/Eigen/Cholesky @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_CHOLESKY_MODULE_H #define EIGEN_CHOLESKY_MODULE_H diff --git a/Eigen/CholmodSupport b/Eigen/CholmodSupport index 687cd9777..83e2c1da4 100644 --- a/Eigen/CholmodSupport +++ b/Eigen/CholmodSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_CHOLMODSUPPORT_MODULE_H #define EIGEN_CHOLMODSUPPORT_MODULE_H diff --git a/Eigen/Core b/Eigen/Core index 713d18a6d..63602f4c3 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -300,6 +300,7 @@ using std::ptrdiff_t; #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" +#include "src/Core/SpecialFunctions.h" #include "src/Core/GenericPacketMath.h" #if defined EIGEN_VECTORIZE_AVX @@ -382,8 +383,6 @@ using std::ptrdiff_t; #include "src/Core/DiagonalMatrix.h" #include "src/Core/Diagonal.h" #include "src/Core/DiagonalProduct.h" -#include "src/Core/PermutationMatrix.h" -#include "src/Core/Transpositions.h" #include "src/Core/Redux.h" #include "src/Core/Visitor.h" #include "src/Core/Fuzzy.h" @@ -393,6 +392,9 @@ using std::ptrdiff_t; #include "src/Core/GeneralProduct.h" #include "src/Core/Solve.h" #include "src/Core/Inverse.h" +#include "src/Core/SolverBase.h" +#include "src/Core/PermutationMatrix.h" +#include "src/Core/Transpositions.h" #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" #include "src/Core/products/GeneralBlockPanelKernel.h" diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues index 53c5a73a2..ea93eb303 100644 --- a/Eigen/Eigenvalues +++ b/Eigen/Eigenvalues @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_EIGENVALUES_MODULE_H #define EIGEN_EIGENVALUES_MODULE_H diff --git a/Eigen/Geometry b/Eigen/Geometry index 11aea8025..06b736e3f 100644 --- a/Eigen/Geometry +++ b/Eigen/Geometry @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_GEOMETRY_MODULE_H #define EIGEN_GEOMETRY_MODULE_H diff --git a/Eigen/Householder b/Eigen/Householder index 6e348db5c..89cd81b1a 100644 --- a/Eigen/Householder +++ b/Eigen/Householder @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_HOUSEHOLDER_MODULE_H #define EIGEN_HOUSEHOLDER_MODULE_H diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers index f5fdcd9e5..957d5750b 100644 --- a/Eigen/IterativeLinearSolvers +++ b/Eigen/IterativeLinearSolvers @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H #define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H @@ -34,6 +41,7 @@ #include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" #include "src/IterativeLinearSolvers/BiCGSTAB.h" #include "src/IterativeLinearSolvers/IncompleteLUT.h" +#include "src/IterativeLinearSolvers/IncompleteCholesky.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/Jacobi b/Eigen/Jacobi index ba8a4dc36..17c1d785a 100644 --- a/Eigen/Jacobi +++ b/Eigen/Jacobi @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_JACOBI_MODULE_H #define EIGEN_JACOBI_MODULE_H diff --git a/Eigen/LU b/Eigen/LU index 132ecc42c..2d70c92de 100644 --- a/Eigen/LU +++ b/Eigen/LU @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_LU_MODULE_H #define EIGEN_LU_MODULE_H diff --git a/Eigen/MetisSupport b/Eigen/MetisSupport index 6a113f7a8..85c41bf34 100644 --- a/Eigen/MetisSupport +++ b/Eigen/MetisSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_METISSUPPORT_MODULE_H #define EIGEN_METISSUPPORT_MODULE_H diff --git a/Eigen/OrderingMethods b/Eigen/OrderingMethods index 7c0f1ffff..d8ea36193 100644 --- a/Eigen/OrderingMethods +++ b/Eigen/OrderingMethods @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_ORDERINGMETHODS_MODULE_H #define EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/Eigen/PaStiXSupport b/Eigen/PaStiXSupport index e7d275f97..3411dface 100644 --- a/Eigen/PaStiXSupport +++ b/Eigen/PaStiXSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_PASTIXSUPPORT_MODULE_H #define EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/Eigen/PardisoSupport b/Eigen/PardisoSupport old mode 100644 new mode 100755 index 99330ce7a..340edf51f --- a/Eigen/PardisoSupport +++ b/Eigen/PardisoSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_PARDISOSUPPORT_MODULE_H #define EIGEN_PARDISOSUPPORT_MODULE_H @@ -7,8 +14,6 @@ #include -#include - /** \ingroup Support_modules * \defgroup PardisoSupport_Module PardisoSupport module * diff --git a/Eigen/QR b/Eigen/QR index 230cb079a..f74f365f1 100644 --- a/Eigen/QR +++ b/Eigen/QR @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_QR_MODULE_H #define EIGEN_QR_MODULE_H diff --git a/Eigen/QtAlignedMalloc b/Eigen/QtAlignedMalloc index 46f7d83b7..4044d5ac5 100644 --- a/Eigen/QtAlignedMalloc +++ b/Eigen/QtAlignedMalloc @@ -1,3 +1,9 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_QTMALLOC_MODULE_H #define EIGEN_QTMALLOC_MODULE_H diff --git a/Eigen/SPQRSupport b/Eigen/SPQRSupport index e3f49bb5a..f9489dcd8 100644 --- a/Eigen/SPQRSupport +++ b/Eigen/SPQRSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPQRSUPPORT_MODULE_H #define EIGEN_SPQRSUPPORT_MODULE_H diff --git a/Eigen/SVD b/Eigen/SVD index dbd37b17a..b353f3f54 100644 --- a/Eigen/SVD +++ b/Eigen/SVD @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SVD_MODULE_H #define EIGEN_SVD_MODULE_H diff --git a/Eigen/Sparse b/Eigen/Sparse index a540f0eec..a2ef7a665 100644 --- a/Eigen/Sparse +++ b/Eigen/Sparse @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSE_MODULE_H #define EIGEN_SPARSE_MODULE_H diff --git a/Eigen/SparseCore b/Eigen/SparseCore index 48ed967b8..76966c4c4 100644 --- a/Eigen/SparseCore +++ b/Eigen/SparseCore @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSECORE_MODULE_H #define EIGEN_SPARSECORE_MODULE_H @@ -14,7 +21,7 @@ /** * \defgroup SparseCore_Module SparseCore module * - * This module provides a sparse matrix representation, and basic associatd matrix manipulations + * This module provides a sparse matrix representation, and basic associated matrix manipulations * and operations. * * See the \ref TutorialSparse "Sparse tutorial" diff --git a/Eigen/SparseQR b/Eigen/SparseQR index efb2695ba..a6f3b7f7d 100644 --- a/Eigen/SparseQR +++ b/Eigen/SparseQR @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSEQR_MODULE_H #define EIGEN_SPARSEQR_MODULE_H diff --git a/Eigen/SuperLUSupport b/Eigen/SuperLUSupport index d1eac9464..113f58ee5 100644 --- a/Eigen/SuperLUSupport +++ b/Eigen/SuperLUSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SUPERLUSUPPORT_MODULE_H #define EIGEN_SUPERLUSUPPORT_MODULE_H @@ -36,6 +43,8 @@ namespace Eigen { struct SluMatrix; } * - class SuperLU: a supernodal sequential LU factorization. * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). * + * \warning This wrapper is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. + * * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. * * \code diff --git a/Eigen/UmfPackSupport b/Eigen/UmfPackSupport index 0efad5dee..4a9f46a1e 100644 --- a/Eigen/UmfPackSupport +++ b/Eigen/UmfPackSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_UMFPACKSUPPORT_MODULE_H #define EIGEN_UMFPACKSUPPORT_MODULE_H diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index dc73304e8..1f0091f3c 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -285,7 +285,7 @@ template struct llt_inplace return k; mat.coeffRef(k,k) = x = sqrt(x); if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint(); - if (rs>0) A21 *= RealScalar(1)/x; + if (rs>0) A21 /= x; } return -1; } diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index d2b0fb282..06421d5ed 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -78,7 +78,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat) { res.itype = CHOLMOD_INT; } - else if (internal::is_same<_StorageIndex,UF_long>::value) + else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value) { res.itype = CHOLMOD_LONG; } @@ -170,6 +170,10 @@ class CholmodBase : public SparseSolverBase typedef typename MatrixType::RealScalar RealScalar; typedef MatrixType CholMatrixType; typedef typename MatrixType::StorageIndex StorageIndex; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: @@ -350,6 +354,8 @@ class CholmodBase : public SparseSolverBase * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT @@ -397,6 +403,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT @@ -442,6 +450,8 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers @@ -489,6 +499,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 151c05526..b4c24a27a 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -46,15 +46,14 @@ template class ArrayBase typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator*; - typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; typedef DenseBase Base; + using Base::operator*; + using Base::operator/; using Base::RowsAtCompileTime; using Base::ColsAtCompileTime; using Base::SizeAtCompileTime; diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h old mode 100644 new mode 100755 index c4ba60d6d..9dfffbcc4 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -54,6 +54,7 @@ private: InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) : int(Dst::MaxRowsAtCompileTime), + OuterStride = int(outer_stride_at_compile_time::ret), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, PacketSize = unpacket_traits::size }; @@ -65,7 +66,9 @@ private: MightVectorize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && (functor_traits::PacketAccess), - MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + MayInnerVectorize = MightVectorize + && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0 && int(JointAlignment)>=int(RequiredAlignment), MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess @@ -95,10 +98,8 @@ private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic - && int(SrcEvaluator::CoeffReadCost) != Dynamic && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(SrcEvaluator::CoeffReadCost) != Dynamic && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) }; @@ -125,8 +126,8 @@ public: std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; std::cerr.setf(std::ios::hex, std::ios::basefield); - EIGEN_DEBUG_VAR(DstFlags) - EIGEN_DEBUG_VAR(SrcFlags) + std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; + std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; std::cerr.unsetf(std::ios::hex); EIGEN_DEBUG_VAR(DstAlignment) EIGEN_DEBUG_VAR(SrcAlignment) @@ -141,11 +142,11 @@ public: EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Traversal) + std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; EIGEN_DEBUG_VAR(UnrollingLimit) EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) - EIGEN_DEBUG_VAR(Unrolling) + std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; std::cerr << std::endl; } #endif @@ -288,7 +289,7 @@ struct dense_assignment_loop; template struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) { for(Index outer = 0; outer < kernel.outerSize(); ++outer) { for(Index inner = 0; inner < kernel.innerSize(); ++inner) { @@ -311,7 +312,6 @@ struct dense_assignment_loop template struct dense_assignment_loop { - typedef typename Kernel::StorageIndex StorageIndex; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -392,7 +392,6 @@ struct dense_assignment_loop template struct dense_assignment_loop { - typedef typename Kernel::StorageIndex StorageIndex; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -414,7 +413,7 @@ template struct dense_assignment_loop { typedef typename Kernel::PacketType PacketType; - EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index innerSize = kernel.innerSize(); const Index outerSize = kernel.outerSize(); @@ -438,7 +437,6 @@ struct dense_assignment_loop struct dense_assignment_loop { - typedef typename Kernel::StorageIndex StorageIndex; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -455,7 +453,7 @@ struct dense_assignment_loop template struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index size = kernel.size(); for(Index i = 0; i < size; ++i) @@ -545,7 +543,6 @@ public: typedef DstEvaluatorTypeT DstEvaluatorType; typedef SrcEvaluatorTypeT SrcEvaluatorType; typedef typename DstEvaluatorType::Scalar Scalar; - typedef typename DstEvaluatorType::StorageIndex StorageIndex; typedef copy_using_evaluator_traits AssignmentTraits; typedef typename AssignmentTraits::PacketType PacketType; @@ -565,26 +562,23 @@ public: EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } - // TODO get rid of this one: - EIGEN_DEVICE_FUNC DstXprType& dstExpression() const { return m_dstExpr; } - EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } /// Assign src(row,col) to dst(row,col) through the assignment functor. - EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) { m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); } /// \sa assignCoeff(Index,Index) - EIGEN_DEVICE_FUNC void assignCoeff(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) { m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); } /// \sa assignCoeff(Index,Index) - EIGEN_DEVICE_FUNC void assignCoeffByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); @@ -593,26 +587,26 @@ public: template - EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) { m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); } template - EIGEN_DEVICE_FUNC void assignPacket(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) { m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); } template - EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); assignPacket(row, col); } - EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 @@ -621,7 +615,7 @@ public: : inner; } - EIGEN_DEVICE_FUNC static Index colIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 @@ -719,14 +713,8 @@ EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& fun } // by-pass AssumeAliasing -// FIXME the const version should probably not be needed // When there is no aliasing, we require that 'dst' has been properly resized template class StorageBase, typename Src, typename Func> -EIGEN_DEVICE_FUNC void call_assignment(const NoAlias& dst, const Src& src, const Func& func) -{ - call_assignment_no_alias(dst.expression(), src, func); -} -template class StorageBase, typename Src, typename Func> EIGEN_DEVICE_FUNC void call_assignment(NoAlias& dst, const Src& src, const Func& func) { call_assignment_no_alias(dst.expression(), src, func); @@ -737,11 +725,9 @@ template EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) { enum { - NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) - | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". - // revert to || as soon as not needed anymore. - (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) - && int(Dst::SizeAtCompileTime) != 1 + NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) + || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) + ) && int(Dst::SizeAtCompileTime) != 1 }; Index dstRows = NeedToTranspose ? src.cols() : src.rows(); @@ -756,11 +742,7 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const // TODO check whether this is the right place to perform these checks: EIGEN_STATIC_ASSERT_LVALUE(Dst) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) - - // TODO this line is commented to allow matrix = permutation - // Actually, the "Scalar" type for a permutation matrix does not really make sense, - // perhaps it could be void, and EIGEN_CHECK_BINARY_COMPATIBILIY could allow micing void with anything...? -// EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); Assignment::run(actualDst, src, func); } diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index ba45cf5c3..8409d8749 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -83,8 +83,6 @@ inline bool DenseBase::all() const typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); @@ -109,8 +107,6 @@ inline bool DenseBase::any() const typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); @@ -142,7 +138,11 @@ inline Eigen::Index DenseBase::count() const template inline bool DenseBase::hasNaN() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isNaN().any(); +#else return !((derived().array()==derived().array()).all()); +#endif } /** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. @@ -152,7 +152,11 @@ inline bool DenseBase::hasNaN() const template inline bool DenseBase::allFinite() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isFinite().all(); +#else return !((derived()-derived()).hasNaN()); +#endif } } // end namespace Eigen diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index dc772277d..89bcd750c 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -106,7 +106,7 @@ struct CommaInitializer EIGEN_DEVICE_FUNC inline ~CommaInitializer() #if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS - throw(Eigen::eigen_assert_exception) + EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) #endif { eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows() diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 2cbb6cd44..f97dc33de 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -29,6 +29,7 @@ struct storage_kind_to_evaluator_kind { template struct storage_kind_to_shape; template<> struct storage_kind_to_shape { typedef DenseShape Shape; }; +template<> struct storage_kind_to_shape { typedef SolverShape Shape; }; template<> struct storage_kind_to_shape { typedef PermutationShape Shape; }; template<> struct storage_kind_to_shape { typedef TranspositionsShape Shape; }; @@ -98,9 +99,6 @@ struct evaluator template struct evaluator_base : public noncopyable { - // FIXME is it really usefull? - typedef typename traits::StorageIndex StorageIndex; - // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits ExpressionTraits; @@ -140,11 +138,15 @@ struct evaluator > m_outerStride(IsVectorAtCompileTime ? 0 : int(IsRowMajor) ? ColsAtCompileTime : RowsAtCompileTime) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { @@ -324,13 +326,15 @@ struct evaluator > & ( HereditaryBits | (functor_has_linear_access::ret ? LinearAccessBit : 0) | (functor_traits::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), // FIXME EvalBeforeNestingBit should be needed anymore - Alignment = 0 // FIXME alignment should not matter here, perhaps we could set it to AlignMax?? + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), + Alignment = AlignedMax }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) : m_functor(n.functor()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -379,7 +383,10 @@ struct unary_evaluator, IndexBased > EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -452,7 +459,10 @@ struct binary_evaluator, IndexBased, IndexBase : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -505,7 +515,10 @@ struct unary_evaluator, IndexBased> EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_unaryOp(op.functor()), m_argImpl(op.nestedExpression()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -562,6 +575,7 @@ struct mapbase_evaluator : evaluator_base { EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const @@ -636,17 +650,9 @@ struct evaluator > HasNoStride = HasNoInnerStride && HasNoOuterStride, IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - PacketAlignment = unpacket_traits::alignment, - - KeepsPacketAccess = bool(HasNoInnerStride) - && ( bool(IsDynamicSize) - || HasNoOuterStride - || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ), - Flags0 = evaluator::Flags, - Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags0) : int(Flags0 & ~LinearAccessBit), - Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit), + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), + LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), + Flags = int( evaluator::Flags) & (LinearAccessMask&PacketAccessMask), Alignment = int(MapOptions)&int(AlignedMask) }; @@ -724,7 +730,10 @@ struct evaluator > Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) }; typedef block_evaluator block_evaluator_type; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } }; // no direct-access => dispatch to a unary evaluator @@ -825,14 +834,14 @@ struct block_evaluator(block) { - // FIXME this should be an internal assertion + // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator::Alignment)) == 0) && "data is not aligned"); } }; // -------------------- Select -------------------- -// TODO shall we introduce a ternary_evaluator? +// NOTE shall we introduce a ternary_evaluator? // TODO enable vectorization for Select template @@ -842,8 +851,8 @@ struct evaluator > typedef Select XprType; enum { CoeffReadCost = evaluator::CoeffReadCost - + EIGEN_SIZE_MAX(evaluator::CoeffReadCost, - evaluator::CoeffReadCost), + + EIGEN_PLAIN_ENUM_MAX(evaluator::CoeffReadCost, + evaluator::CoeffReadCost), Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, @@ -854,7 +863,9 @@ struct evaluator > : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -897,8 +908,8 @@ struct unary_evaluator > enum { CoeffReadCost = evaluator::CoeffReadCost, - - Flags = (evaluator::Flags & HereditaryBits & ~RowMajorBit) | (traits::Flags & RowMajorBit), + LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0, + Flags = (evaluator::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits::Flags & RowMajorBit), Alignment = evaluator::Alignment }; @@ -957,7 +968,7 @@ struct unary_evaluator > } protected: - const ArgTypeNested m_arg; // FIXME is it OK to store both the argument and its evaluator?? (we have the same situation in evaluator_product) + const ArgTypeNested m_arg; evaluator m_argImpl; const variable_if_dynamic m_rows; const variable_if_dynamic m_cols; @@ -965,48 +976,57 @@ protected: // -------------------- PartialReduxExpr -------------------- -// -// This is a wrapper around the expression object. -// TODO: Find out how to write a proper evaluator without duplicating -// the row() and col() member functions. template< typename ArgType, typename MemberOp, int Direction> struct evaluator > : evaluator_base > { typedef PartialReduxExpr XprType; - typedef typename XprType::Scalar InputScalar; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + typedef typename ArgType::Scalar InputScalar; + typedef typename XprType::Scalar Scalar; enum { - TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(XprType::ColsAtCompileTime) + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) }; typedef typename MemberOp::template Cost CostOpType; enum { - CoeffReadCost = TraversalSize==Dynamic ? Dynamic + CoeffReadCost = TraversalSize==Dynamic ? HugeCost : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), - Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&HereditaryBits), + Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&(HereditaryBits&(~RowMajorBit))), - Alignment = 0 // FIXME this could be improved + Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized }; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr) - : m_expr(expr) - {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) + : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value)); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const - { - return m_expr.coeff(row, col); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(j)); + else + return m_functor(m_arg.row(i)); } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_expr.coeff(index); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(index)); + else + return m_functor(m_arg.row(index)); } protected: - const XprType m_expr; + const ArgTypeNested m_arg; + const MemberOp m_functor; }; @@ -1130,6 +1150,7 @@ struct unary_evaluator > // FIXME enable DirectAccess with negative strides? Flags0 = evaluator::Flags, LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1)) ? LinearAccessBit : 0, Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), @@ -1139,8 +1160,8 @@ struct unary_evaluator > EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) : m_argImpl(reverse.nestedExpression()), - m_rows(ReverseRow ? reverse.nestedExpression().rows() : 0), - m_cols(ReverseCol ? reverse.nestedExpression().cols() : 0) + m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1), + m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) { } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const @@ -1214,8 +1235,9 @@ protected: evaluator m_argImpl; // If we do not reverse rows, then we do not need to know the number of rows; same for columns - const variable_if_dynamic m_rows; - const variable_if_dynamic m_cols; + // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors. + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; }; @@ -1331,20 +1353,16 @@ struct evaluator > typedef evaluator Base; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) + : m_result(xpr.arg()) { ::new (static_cast(this)) Base(m_result); - // TODO we should simply do m_result(xpr.arg()); - call_dense_assignment_loop(m_result, xpr.arg()); } // This constructor is used when nesting an EvalTo evaluator in another evaluator EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) - : m_result(arg.rows(), arg.cols()) + : m_result(arg) { ::new (static_cast(this)) Base(m_result); - // TODO we should simply do m_result(xpr.arg()); - call_dense_assignment_loop(m_result, arg); } protected: diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 488f15061..e181dafaf 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -40,18 +40,14 @@ static inline void check_DenseIndex_is_signed() { */ template class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN - : public internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real> + : public internal::special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real, + DenseCoeffsBase > #else : public DenseCoeffsBase #endif // not EIGEN_PARSED_BY_DOXYGEN { public: - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator*; - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator/; - /** Inner iterator type to iterate over the coefficients of a row or column. * \sa class InnerIterator @@ -77,9 +73,10 @@ template class DenseBase typedef Scalar value_type; typedef typename NumTraits::Real RealScalar; + typedef internal::special_scalar_op_base > Base; - typedef internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real> Base; + using Base::operator*; + using Base::operator/; using Base::derived; using Base::const_cast_derived; using Base::rows; diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index 9581757f3..820a90e6f 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -138,6 +138,8 @@ class DenseCoeffsBase : public EigenBase EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); return internal::evaluator(derived()).coeff(index); } @@ -243,6 +245,8 @@ class DenseCoeffsBase : public EigenBase template EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) typedef typename internal::packet_traits::type DefaultPacketType; eigen_internal_assert(index >= 0 && index < size()); return internal::evaluator(derived()).template packet(index); @@ -370,6 +374,8 @@ class DenseCoeffsBase : public DenseCoeffsBase::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); return internal::evaluator(derived()).coeffRef(index); } @@ -617,7 +623,7 @@ static inline Index first_default_aligned(const DenseBase& m) { typedef typename Derived::Scalar Scalar; typedef typename packet_traits::type DefaultPacketType; - return first_aligned::alignment>(m); + return internal::first_aligned::alignment),Derived>(m); } template::ret> diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 94b058466..003450f1a 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -178,9 +178,11 @@ struct lpNorm_selector } // end namespace internal -/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values - * of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ - * norm, that is the maximum of the absolute values of the coefficients of *this. +/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values + * of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ + * norm, that is the maximum of the absolute values of the coefficients of \c *this. + * + * \note For matrices, this function does not compute the operator-norm. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink. * * \sa norm() */ diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 475d6f4aa..fe8204ac3 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -160,7 +160,7 @@ template<> struct product_type_selector { enum namespace internal { template -struct gemv_dense_sense_selector; +struct gemv_dense_selector; } // end namespace internal @@ -204,19 +204,19 @@ struct gemv_static_vector_if // The vector is on the left => transposition template -struct gemv_dense_sense_selector +struct gemv_dense_selector { template static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { Transpose destT(dest); enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; - gemv_dense_sense_selector + gemv_dense_selector ::run(rhs.transpose(), lhs.transpose(), destT, alpha); } }; -template<> struct gemv_dense_sense_selector +template<> struct gemv_dense_selector { template static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) @@ -292,7 +292,7 @@ template<> struct gemv_dense_sense_selector } }; -template<> struct gemv_dense_sense_selector +template<> struct gemv_dense_selector { template static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) @@ -345,27 +345,28 @@ template<> struct gemv_dense_sense_selector } }; -template<> struct gemv_dense_sense_selector +template<> struct gemv_dense_selector { template static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - // TODO makes sure dest is sequentially stored in memory, otherwise use a temp + // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp + typename nested_eval::type actual_rhs(rhs); const Index size = rhs.rows(); for(Index k=0; k struct gemv_dense_sense_selector +template<> struct gemv_dense_selector { template static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp + typename nested_eval::type actual_rhs(rhs); const Index rows = dest.rows(); for(Index i=0; i EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } +/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } + +/** \internal \returns the erf(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perf(const Packet& a) { using numext::erf; return erf(a); } + +/** \internal \returns the erfc(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index aaa076701..62fec7008 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -49,6 +49,9 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op) @@ -64,6 +67,7 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op) template inline const Eigen::CwiseUnaryOp, const Derived> diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h index b359e1287..f3ec84990 100644 --- a/Eigen/src/Core/Inverse.h +++ b/Eigen/src/Core/Inverse.h @@ -12,8 +12,6 @@ namespace Eigen { -// TODO move the general declaration in Core, and rename this file DenseInverseImpl.h, or something like this... - template class InverseImpl; namespace internal { @@ -49,11 +47,13 @@ public: typedef typename XprType::PlainObject PlainObject; typedef typename internal::ref_selector::type XprTypeNested; typedef typename internal::remove_all::type XprTypeNestedCleaned; + typedef typename internal::ref_selector::type Nested; + typedef typename internal::remove_all::type NestedExpression; explicit Inverse(const XprType &xpr) : m_xpr(xpr) {} - + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } @@ -63,25 +63,16 @@ protected: XprTypeNested m_xpr; }; -/** \internal - * Specialization of the Inverse expression for dense expressions. - * Direct access to the coefficients are discared. - * FIXME this intermediate class is probably not needed anymore. - */ -template -class InverseImpl - : public MatrixBase > +// Generic API dispatcher +template +class InverseImpl + : public internal::generic_xpr_base >::type { - typedef Inverse Derived; - public: - - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) - typedef typename internal::remove_all::type NestedExpression; - + typedef typename internal::generic_xpr_base >::type Base; + typedef typename XprType::Scalar Scalar; private: - + Scalar coeff(Index row, Index col) const; Scalar coeff(Index i) const; }; diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index ae28d4db6..75a80daaa 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -155,6 +155,10 @@ template class MapBase checkSanity(); } + #ifdef EIGEN_MAPBASE_PLUGIN + #include EIGEN_MAPBASE_PLUGIN + #endif + protected: EIGEN_DEVICE_FUNC diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index c795149b8..48cf565fb 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -241,8 +241,8 @@ struct conj_retval * Implementation of abs2 * ****************************************************************************/ -template -struct abs2_impl +template +struct abs2_impl_default { typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC @@ -252,16 +252,28 @@ struct abs2_impl } }; -template -struct abs2_impl > +template +struct abs2_impl_default // IsComplex { + typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC - static inline RealScalar run(const std::complex& x) + static inline RealScalar run(const Scalar& x) { return real(x)*real(x) + imag(x)*imag(x); } }; +template +struct abs2_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return abs2_impl_default::IsComplex>::run(x); + } +}; + template struct abs2_retval { @@ -314,8 +326,6 @@ struct hypot_impl typedef typename NumTraits::Real RealScalar; static inline RealScalar run(const Scalar& x, const Scalar& y) { - EIGEN_USING_STD_MATH(max); - EIGEN_USING_STD_MATH(min); EIGEN_USING_STD_MATH(abs); EIGEN_USING_STD_MATH(sqrt); RealScalar _x = abs(x); @@ -607,8 +617,6 @@ struct random_default_impl { static inline Scalar run(const Scalar& x, const Scalar& y) { - using std::max; - using std::min; typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; if(y=1800) || (EIGEN_COMP_CLANG) +#define EIGEN_USE_STD_FPCLASSIFY 1 +#else +#define EIGEN_USE_STD_FPCLASSIFY 0 +#endif + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isnan_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isinf_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isfinite_impl(const T&) { return true; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isfinite_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isfinite; + return isfinite EIGEN_NOT_A_MACRO (x); + #else + return x::highest() && x>NumTraits::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isinf_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isinf; + return isinf EIGEN_NOT_A_MACRO (x); + #else + return x>NumTraits::highest() || x::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isnan_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isnan; + return isnan EIGEN_NOT_A_MACRO (x); + #else + return x != x; + #endif +} + +#if (!EIGEN_USE_STD_FPCLASSIFY) + +#if EIGEN_COMP_MSVC + +template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) +{ + return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; +} + +//MSVC defines a _isnan builtin function, but for double only +EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); } + +EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); } + +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) + +#if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) +#else + // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), + // while the second prevent too aggressive optimizations in fast-math mode: + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only"))) +#endif + +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); } + +#undef EIGEN_TMP_NOOPT_ATTRIB + +#endif + +#endif + +// The following overload are defined at the end of this file +template bool isfinite_impl(const std::complex& x); +template bool isnan_impl(const std::complex& x); +template bool isinf_impl(const std::complex& x); + } // end namespace internal /**************************************************************************** @@ -810,59 +927,9 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); } -template -EIGEN_DEVICE_FUNC -bool (isfinite)(const T& x) -{ - #if EIGEN_HAS_CXX11_MATH - using std::isfinite; - return isfinite EIGEN_NOT_A_MACRO (x); - #else - return x::highest() && x>NumTraits::lowest(); - #endif -} - -template -EIGEN_DEVICE_FUNC -bool (isnan)(const T& x) -{ - #if EIGEN_HAS_CXX11_MATH - using std::isnan; - return isnan EIGEN_NOT_A_MACRO (x); - #else - return x != x; - #endif -} - -template -EIGEN_DEVICE_FUNC -bool (isinf)(const T& x) -{ - #if EIGEN_HAS_CXX11_MATH - using std::isinf; - return isinf EIGEN_NOT_A_MACRO (x); - #else - return x>NumTraits::highest() || x::lowest(); - #endif -} - -template -bool (isfinite)(const std::complex& x) -{ - return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); -} - -template -bool (isnan)(const std::complex& x) -{ - return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); -} - -template -bool (isinf)(const std::complex& x) -{ - return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); -} +template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } +template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } +template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } template EIGEN_DEVICE_FUNC @@ -906,6 +973,24 @@ inline int log2(int x) namespace internal { +template +bool isfinite_impl(const std::complex& x) +{ + return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); +} + +template +bool isnan_impl(const std::complex& x) +{ + return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); +} + +template +bool isinf_impl(const std::complex& x) +{ + return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); +} + /**************************************************************************** * Implementation of fuzzy comparisons * ****************************************************************************/ @@ -928,9 +1013,8 @@ struct scalar_fuzzy_default_impl EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - EIGEN_USING_STD_MATH(min); EIGEN_USING_STD_MATH(abs); - return abs(x - y) <= (min)(abs(x), abs(y)) * prec; + return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec; } EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) @@ -971,8 +1055,7 @@ struct scalar_fuzzy_default_impl } static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - EIGEN_USING_STD_MATH(min); - return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec; + return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; } }; diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 7c66572d1..9d612c852 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -328,23 +328,26 @@ template class MatrixBase /////////// LU module /////////// - EIGEN_DEVICE_FUNC const FullPivLU fullPivLu() const; - EIGEN_DEVICE_FUNC const PartialPivLU partialPivLu() const; - - const PartialPivLU lu() const; + EIGEN_DEVICE_FUNC + inline const FullPivLU fullPivLu() const; + EIGEN_DEVICE_FUNC + inline const PartialPivLU partialPivLu() const; EIGEN_DEVICE_FUNC - const Inverse inverse() const; + inline const PartialPivLU lu() const; + + EIGEN_DEVICE_FUNC + inline const Inverse inverse() const; template - void computeInverseAndDetWithCheck( + inline void computeInverseAndDetWithCheck( ResultType& inverse, typename ResultType::Scalar& determinant, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() ) const; template - void computeInverseWithCheck( + inline void computeInverseWithCheck( ResultType& inverse, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() @@ -353,22 +356,24 @@ template class MatrixBase /////////// Cholesky module /////////// - const LLT llt() const; - const LDLT ldlt() const; + inline const LLT llt() const; + inline const LDLT ldlt() const; /////////// QR module /////////// - const HouseholderQR householderQr() const; - const ColPivHouseholderQR colPivHouseholderQr() const; - const FullPivHouseholderQR fullPivHouseholderQr() const; + inline const HouseholderQR householderQr() const; + inline const ColPivHouseholderQR colPivHouseholderQr() const; + inline const FullPivHouseholderQR fullPivHouseholderQr() const; - EigenvaluesReturnType eigenvalues() const; - RealScalar operatorNorm() const; +/////////// Eigenvalues module /////////// + + inline EigenvaluesReturnType eigenvalues() const; + inline RealScalar operatorNorm() const; /////////// SVD module /////////// - JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; - BDCSVD bdcSvd(unsigned int computationOptions = 0) const; + inline JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; + inline BDCSVD bdcSvd(unsigned int computationOptions = 0) const; /////////// Geometry module /////////// @@ -381,24 +386,24 @@ template class MatrixBase #endif // EIGEN_PARSED_BY_DOXYGEN template EIGEN_DEVICE_FUNC - typename cross_product_return_type::type + inline typename cross_product_return_type::type cross(const MatrixBase& other) const; template EIGEN_DEVICE_FUNC - PlainObject cross3(const MatrixBase& other) const; + inline PlainObject cross3(const MatrixBase& other) const; EIGEN_DEVICE_FUNC - PlainObject unitOrthogonal(void) const; + inline PlainObject unitOrthogonal(void) const; - Matrix eulerAngles(Index a0, Index a1, Index a2) const; + inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; - ScalarMultipleReturnType operator*(const UniformScaling& s) const; + inline ScalarMultipleReturnType operator*(const UniformScaling& s) const; // put this as separate enum value to work around possible GCC 4.3 bug (?) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) : ColsAtCompileTime==1 ? Vertical : Horizontal }; typedef Homogeneous HomogeneousReturnType; - HomogeneousReturnType homogeneous() const; + inline HomogeneousReturnType homogeneous() const; enum { SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 @@ -409,7 +414,7 @@ template class MatrixBase typedef CwiseUnaryOp::Scalar>, const ConstStartMinusOne > HNormalizedReturnType; - const HNormalizedReturnType hnormalized() const; + inline const HNormalizedReturnType hnormalized() const; ////////// Householder module /////////// @@ -433,6 +438,15 @@ template class MatrixBase template void applyOnTheRight(Index p, Index q, const JacobiRotation& j); +///////// SparseCore module ///////// + + template + EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type + cwiseProduct(const SparseMatrixBase &other) const + { + return other.cwiseProduct(derived()); + } + ///////// MatrixFunctions module ///////// typedef typename internal::stem_function::type StemFunction; diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 61ec2f533..1d85dec72 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -157,9 +157,9 @@ struct NumTraits > IsInteger = NumTraits::IsInteger, IsSigned = NumTraits::IsSigned, RequireInitialization = 1, - ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, - AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::AddCost, - MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::MulCost + ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, + AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::AddCost, + MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::MulCost }; static inline RealScalar epsilon() { return NumTraits::epsilon(); } diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index bfe6f899a..90e1df233 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2009 Benoit Jacob -// Copyright (C) 2009-2011 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -13,9 +13,6 @@ namespace Eigen { -// TODO: this does not seems to be needed at all: -// template class PermutedImpl; - /** \class PermutationBase * \ingroup Core_Module * @@ -67,8 +64,10 @@ class PermutationBase : public EigenBase DenseMatrixType; typedef PermutationMatrix PlainPermutationType; + typedef PlainPermutationType PlainObject; using Base::derived; - typedef Transpose TransposeReturnType; + typedef Inverse InverseReturnType; + typedef void Scalar; #endif /** Copies the other permutation into *this */ @@ -195,14 +194,14 @@ class PermutationBase : public EigenBase * * \note \note_try_to_help_rvo */ - inline TransposeReturnType inverse() const - { return TransposeReturnType(derived()); } + inline InverseReturnType inverse() const + { return InverseReturnType(derived()); } /** \returns the tranpose permutation matrix. * * \note \note_try_to_help_rvo */ - inline TransposeReturnType transpose() const - { return TransposeReturnType(derived()); } + inline InverseReturnType transpose() const + { return InverseReturnType(derived()); } /**** multiplication helpers to hopefully get RVO ****/ @@ -237,7 +236,7 @@ class PermutationBase : public EigenBase * \note \note_try_to_help_rvo */ template - inline PlainPermutationType operator*(const Transpose >& other) const + inline PlainPermutationType operator*(const InverseImpl& other) const { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); } /** \returns the product of an inverse permutation with another permutation. @@ -245,7 +244,7 @@ class PermutationBase : public EigenBase * \note \note_try_to_help_rvo */ template friend - inline PlainPermutationType operator*(const Transpose >& other, const PermutationBase& perm) + inline PlainPermutationType operator*(const InverseImpl& other, const PermutationBase& perm) { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); } /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation. @@ -303,6 +302,7 @@ struct traits IndicesType; typedef _StorageIndex StorageIndex; + typedef void Scalar; }; } @@ -396,13 +396,13 @@ class PermutationMatrix : public PermutationBase - PermutationMatrix(const Transpose >& other) - : m_indices(other.nestedExpression().size()) + PermutationMatrix(const InverseImpl& other) + : m_indices(other.derived().nestedExpression().size()) { eigen_internal_assert(m_indices.size() <= NumTraits::highest()); StorageIndex end = StorageIndex(m_indices.size()); for (StorageIndex i=0; i PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) @@ -426,6 +426,7 @@ struct traits, _PacketAccess> IndicesType; typedef _StorageIndex StorageIndex; + typedef void Scalar; }; } @@ -499,7 +500,7 @@ template struct traits > { typedef PermutationStorage StorageKind; - typedef typename _IndicesType::Scalar Scalar; + typedef void Scalar; typedef typename _IndicesType::Scalar StorageIndex; typedef _IndicesType IndicesType; enum { @@ -561,84 +562,61 @@ operator*(const PermutationBase &permutation, (permutation.derived(), matrix.derived()); } -namespace internal { -/* Template partial specialization for transposed/inverse permutations */ - -template -struct traits > > - : traits -{}; - -} // end namespace internal - -// TODO: the specificties should be handled by the evaluator, -// at the very least we should only specialize TransposeImpl -template -class Transpose > - : public EigenBase > > +template +class InverseImpl + : public EigenBase > { - typedef Derived PermutationType; - typedef typename PermutationType::IndicesType IndicesType; typedef typename PermutationType::PlainPermutationType PlainPermutationType; + typedef internal::traits PermTraits; + protected: + InverseImpl() {} public: + typedef Inverse InverseType; + using EigenBase >::derived; #ifndef EIGEN_PARSED_BY_DOXYGEN - typedef internal::traits Traits; - typedef typename Derived::DenseMatrixType DenseMatrixType; + typedef typename PermutationType::DenseMatrixType DenseMatrixType; enum { - Flags = Traits::Flags, - RowsAtCompileTime = Traits::RowsAtCompileTime, - ColsAtCompileTime = Traits::ColsAtCompileTime, - MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, - MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + RowsAtCompileTime = PermTraits::RowsAtCompileTime, + ColsAtCompileTime = PermTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime }; - typedef typename Traits::Scalar Scalar; - typedef typename Traits::StorageIndex StorageIndex; #endif - Transpose(const PermutationType& p) : m_permutation(p) {} - - inline Index rows() const { return m_permutation.rows(); } - inline Index cols() const { return m_permutation.cols(); } - #ifndef EIGEN_PARSED_BY_DOXYGEN template void evalTo(MatrixBase& other) const { other.setZero(); - for (Index i=0; i friend - const Product - operator*(const MatrixBase& matrix, const Transpose& trPerm) + const Product + operator*(const MatrixBase& matrix, const InverseType& trPerm) { - return Product(matrix.derived(), trPerm.derived()); + return Product(matrix.derived(), trPerm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ template - const Product + const Product operator*(const MatrixBase& matrix) const { - return Product(*this, matrix.derived()); + return Product(derived(), matrix.derived()); } - - const PermutationType& nestedExpression() const { return m_permutation; } - - protected: - const PermutationType& m_permutation; }; template diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 48e29ebdc..1225e85b4 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -263,7 +263,6 @@ class PlainObjectBase : public internal::dense_xpr_base::type m_storage.resize(size, rows, cols); if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED #else - internal::check_rows_cols_for_overflow::run(rows, cols); m_storage.resize(rows*cols, rows, cols); #endif } @@ -450,6 +449,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type return Base::operator=(func); } + // Prevent user from trying to instantiate PlainObjectBase objects + // by making all its constructor protected. See bug 1074. + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() { @@ -496,17 +499,6 @@ class PlainObjectBase : public internal::dense_xpr_base::type // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } - /** \copydoc MatrixBase::operator=(const EigenBase&) - */ - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) - { - _resize_to_match(other); - Base::operator=(other.derived()); - return this->derived(); - } - /** \sa PlainObjectBase::operator=(const EigenBase&) */ template EIGEN_DEVICE_FUNC @@ -520,7 +512,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type /** \sa PlainObjectBase::operator=(const EigenBase&) */ template - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase &other) : m_storage() { @@ -539,6 +531,19 @@ class PlainObjectBase : public internal::dense_xpr_base::type other.evalTo(this->derived()); } + public: + + /** \copydoc MatrixBase::operator=(const EigenBase&) + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) + { + _resize_to_match(other); + Base::operator=(other.derived()); + return this->derived(); + } + /** \name Map * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects, * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index b79236f15..fdd2fed3f 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -217,29 +217,6 @@ class ProductImpl }; -/*************************************************************************** -* Implementation of matrix base methods -***************************************************************************/ - - -/** \internal used to test the evaluator only - */ -template -const Product -prod(const Lhs& lhs, const Rhs& rhs) -{ - return Product(lhs,rhs); -} - -/** \internal used to test the evaluator only - */ -template -const Product -lazyprod(const Lhs& lhs, const Rhs& rhs) -{ - return Product(lhs,rhs); -} - } // end namespace Eigen #endif // EIGEN_PRODUCT_H diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 04e5e5e37..794038a2a 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -38,6 +38,12 @@ struct evaluator > // Catch scalar * ( A * B ) and transform it to (A*scalar) * B // TODO we should apply that rule only if that's really helpful template +struct evaluator_traits, const Product > > + : evaluator_traits_base, const Product > > +{ + enum { AssumeAliasing = 1 }; +}; +template struct evaluator, const Product > > : public evaluator,const Lhs>, Rhs, DefaultProduct> > { @@ -91,8 +97,7 @@ struct evaluator_traits > // This is the default evaluator implementation for products: // It creates a temporary and call generic_product_impl template -struct product_evaluator, ProductTag, LhsShape, RhsShape, typename traits::Scalar, typename traits::Scalar, - EnableIf<(Options==DefaultProduct || Options==AliasFreeProduct)> > +struct product_evaluator, ProductTag, LhsShape, RhsShape> : public evaluator::PlainObject> { typedef Product XprType; @@ -177,11 +182,41 @@ struct Assignment > SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) { - // TODO use operator* instead of prod() once we have made enough progress - call_assignment(dst.noalias(), prod(src.functor().m_other * src.nestedExpression().lhs(), src.nestedExpression().rhs()), func); + call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); } }; +//---------------------------------------- +// Catch "Dense ?= xpr + Product<>" expression to save one temporary +// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct + +template +struct assignment_from_xpr_plus_product +{ + typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const Func1& func) + { + call_assignment_no_alias(dst, src.lhs(), func); + call_assignment_no_alias(dst, src.rhs(), Func2()); + } +}; + +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::assign_op, internal::add_assign_op > +{}; +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::add_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::add_assign_op, internal::add_assign_op > +{}; +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::sub_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::sub_assign_op, internal::sub_assign_op > +{}; +//---------------------------------------- template struct generic_product_impl @@ -213,12 +248,12 @@ template EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) { evaluator rhsEval(rhs); - // FIXME make sure lhs is sequentially stored + typename nested_eval::type actual_lhs(lhs); + // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored // FIXME not very good if rhs is real and lhs complex while alpha is real too - // FIXME we should probably build an evaluator for dst const Index cols = dst.cols(); for (Index j=0; j EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) { evaluator lhsEval(lhs); - // FIXME make sure rhs is sequentially stored + typename nested_eval::type actual_rhs(rhs); + // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored // FIXME not very good if lhs is real and rhs complex while alpha is real too - // FIXME we should probably build an evaluator for dst const Index rows = dst.rows(); for (Index i=0; i @@ -314,7 +349,7 @@ struct generic_product_impl template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - internal::gemv_dense_sense_selector::HasUsableDirectAccess) >::run(lhs, rhs, dst, alpha); @@ -329,28 +364,28 @@ struct generic_product_impl template static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - // TODO: use the following instead of calling call_assignment, same for the other methods - // dst = lazyprod(lhs,rhs); - call_assignment(dst, lazyprod(lhs,rhs), internal::assign_op()); + // Same as: dst.noalias() = lhs.lazyProduct(rhs); + // but easier on the compiler side + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); } template static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - // dst += lazyprod(lhs,rhs); - call_assignment(dst, lazyprod(lhs,rhs), internal::add_assign_op()); + // dst.noalias() += lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); } template static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - // dst -= lazyprod(lhs,rhs); - call_assignment(dst, lazyprod(lhs,rhs), internal::sub_assign_op()); + // dst.noalias() -= lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); } // template // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) -// { dst += alpha * lazyprod(lhs,rhs); } +// { dst.noalias() += alpha * lhs.lazyProduct(rhs); } }; // This specialization enforces the use of a coefficient-based evaluation strategy @@ -371,7 +406,7 @@ template -struct product_evaluator, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > +struct product_evaluator, ProductTag, DenseShape, DenseShape> : evaluator_base > { typedef Product XprType; @@ -387,7 +422,11 @@ struct product_evaluator, ProductTag, DenseShape, m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. m_innerDim(xpr.lhs().cols()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } // Everything below here is taken from CoeffBasedProduct.h @@ -408,15 +447,15 @@ struct product_evaluator, ProductTag, DenseShape, MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, PacketSize = packet_traits::size, - + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost - : (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits::AddCost==Dynamic || NumTraits::MulCost==Dynamic) ? Dynamic + : InnerSize == Dynamic ? HugeCost : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits::AddCost, - Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, @@ -424,19 +463,16 @@ struct product_evaluator, ProductTag, DenseShape, LhsAlignment = LhsEtorType::Alignment, RhsAlignment = RhsEtorType::Alignment, - LhsIsAligned = int(LhsAlignment) >= int(unpacket_traits::alignment), - RhsIsAligned = int(RhsAlignment) >= int(unpacket_traits::alignment), - LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, SameType = is_same::value, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ), + && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ), + && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -445,12 +481,16 @@ struct product_evaluator, ProductTag, DenseShape, Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) // TODO enable vectorization for mixed types - | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) + | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), - Alignment = CanVectorizeLhs ? LhsAlignment - : CanVectorizeRhs ? RhsAlignment + LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), + RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), + + Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) : 0, - + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect @@ -460,13 +500,11 @@ struct product_evaluator, ProductTag, DenseShape, && LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) - && (LhsIsAligned && RhsIsAligned) && (InnerSize % packet_traits::size == 0) }; - EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const { - // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } @@ -478,7 +516,6 @@ struct product_evaluator, ProductTag, DenseShape, { const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index col = RowsAtCompileTime == 1 ? index : 0; - // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } @@ -486,14 +523,21 @@ struct product_evaluator, ProductTag, DenseShape, const PacketType packet(Index row, Index col) const { PacketType res; - typedef etor_product_packet_impl PacketImpl; - PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; } + template + const PacketType packet(Index index) const + { + const Index row = RowsAtCompileTime == 1 ? 0 : index; + const Index col = RowsAtCompileTime == 1 ? index : 0; + return packet(row,col); + } + protected: const LhsNested m_lhs; const RhsNested m_rhs; @@ -506,12 +550,12 @@ protected: }; template -struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar > - : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar > +struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape> + : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape> { typedef Product XprType; typedef Product BaseProduct; - typedef product_evaluator Base; + typedef product_evaluator Base; enum { Flags = Base::Flags | EvalBeforeNestingBit }; @@ -703,6 +747,8 @@ public: diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) : m_diagImpl(diag), m_matImpl(mat) { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const @@ -735,7 +781,7 @@ protected: // diagonal * dense template -struct product_evaluator, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> +struct product_evaluator, ProductTag, DiagonalShape, DenseShape> : diagonal_product_evaluator_base, OnTheLeft> { typedef diagonal_product_evaluator_base, OnTheLeft> Base; @@ -781,7 +827,7 @@ struct product_evaluator, ProductTag, DiagonalSha // dense * diagonal template -struct product_evaluator, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar> +struct product_evaluator, ProductTag, DenseShape, DiagonalShape> : diagonal_product_evaluator_base, OnTheRight> { typedef diagonal_product_evaluator_base, OnTheRight> Base; @@ -911,20 +957,20 @@ struct generic_product_impl }; template -struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> +struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> { template - static void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) + static void evalTo(Dest& dst, const Inverse& lhs, const Rhs& rhs) { permutation_matrix_product::run(dst, lhs.nestedExpression(), rhs); } }; template -struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> +struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> { template - static void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) + static void evalTo(Dest& dst, const Lhs& lhs, const Inverse& rhs) { permutation_matrix_product::run(dst, rhs.nestedExpression(), lhs); } diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index c427a4d58..d170cae29 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -50,20 +50,14 @@ public: public: enum { - Cost = ( Derived::SizeAtCompileTime == Dynamic - || Derived::CoeffReadCost == Dynamic - || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost == Dynamic) - ) ? Dynamic - : Derived::SizeAtCompileTime * Derived::CoeffReadCost - + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, + Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost + : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) }; public: enum { - Unrolling = Cost != Dynamic && Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling + Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; #ifdef EIGEN_DEBUG_ASSIGN @@ -269,8 +263,9 @@ struct redux_impl } }; -template -struct redux_impl +// NOTE: for SliceVectorizedTraversal we simply bypass unrolling +template +struct redux_impl { typedef typename Derived::Scalar Scalar; typedef typename packet_traits::type PacketType; @@ -414,17 +409,7 @@ typename internal::traits::Scalar DenseBase::redux(const Func& func) const { eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); - - // FIXME, eval_nest should be handled by redux_evaluator, however: - // - it is currently difficult to provide the right Flags since they are still handled by the expressions - // - handling it here might reduce the number of template instantiations -// typedef typename internal::nested_eval::type ThisNested; -// typedef typename internal::remove_all::type ThisNestedCleaned; -// typedef typename internal::redux_evaluator ThisEvaluator; -// -// ThisNested thisNested(derived()); -// ThisEvaluator thisEval(thisNested); - + typedef typename internal::redux_evaluator ThisEvaluator; ThisEvaluator thisEval(derived()); diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index 4857a7c42..ba2ee53b8 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -34,12 +34,11 @@ template struct s template struct solve_traits { - typedef typename Decomposition::MatrixType MatrixType; typedef Matrix PlainObject; }; @@ -52,7 +51,7 @@ struct traits > typedef traits BaseTraits; enum { Flags = BaseTraits::Flags & RowMajorBit, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; }; @@ -118,6 +117,8 @@ struct evaluator > typedef Solve SolveType; typedef typename SolveType::PlainObject PlainObject; typedef evaluator Base; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve) : m_result(solve.rows(), solve.cols()) @@ -143,6 +144,28 @@ struct Assignment, internal::assign_op +struct Assignment,RhsType>, internal::assign_op, Dense2Dense, Scalar> +{ + typedef Solve,RhsType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + src.dec().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); + } +}; + +// Specialization for "dst = dec.adjoint().solve(rhs)" +template +struct Assignment, const Transpose >,RhsType>, internal::assign_op, Dense2Dense, Scalar> +{ + typedef Solve, const Transpose >,RhsType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); + } +}; + } // end namepsace internal } // end namespace Eigen diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index ded42e0e8..5a2010449 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -107,32 +107,32 @@ struct triangular_solver_selector * meta-unrolling implementation ***************************************************************************/ -template +template struct triangular_solver_unroller; -template -struct triangular_solver_unroller { +template +struct triangular_solver_unroller { enum { IsLower = ((Mode&Lower)==Lower), - I = IsLower ? Index : Size - Index - 1, - S = IsLower ? 0 : I+1 + DiagIndex = IsLower ? LoopIndex : Size - LoopIndex - 1, + StartIndex = IsLower ? 0 : DiagIndex+1 }; static void run(const Lhs& lhs, Rhs& rhs) { - if (Index>0) - rhs.coeffRef(I) -= lhs.row(I).template segment(S).transpose() - .cwiseProduct(rhs.template segment(S)).sum(); + if (LoopIndex>0) + rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment(StartIndex).transpose() + .cwiseProduct(rhs.template segment(StartIndex)).sum(); if(!(Mode & UnitDiag)) - rhs.coeffRef(I) /= lhs.coeff(I,I); + rhs.coeffRef(DiagIndex) /= lhs.coeff(DiagIndex,DiagIndex); - triangular_solver_unroller::run(lhs,rhs); + triangular_solver_unroller::run(lhs,rhs); } }; -template -struct triangular_solver_unroller { +template +struct triangular_solver_unroller { static void run(const Lhs&, Rhs&) {} }; @@ -161,13 +161,6 @@ struct triangular_solver_selector { * TriangularView methods ***************************************************************************/ -/** "in-place" version of TriangularView::solve() where the result is written in \a other - * - * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here. - * This function will const_cast it, so constness isn't honored here. - * - * See TriangularView:solve() for the details. - */ template template void TriangularViewImpl::solveInPlace(const MatrixBase& _other) const @@ -188,27 +181,6 @@ void TriangularViewImpl::solveInPlace(const MatrixBase template const internal::triangular_solve_retval,Other> diff --git a/Eigen/src/Core/SolverBase.h b/Eigen/src/Core/SolverBase.h new file mode 100644 index 000000000..8a4adc229 --- /dev/null +++ b/Eigen/src/Core/SolverBase.h @@ -0,0 +1,130 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SOLVERBASE_H +#define EIGEN_SOLVERBASE_H + +namespace Eigen { + +namespace internal { + + + +} // end namespace internal + +/** \class SolverBase + * \brief A base class for matrix decomposition and solvers + * + * \tparam Derived the actual type of the decomposition/solver. + * + * Any matrix decomposition inheriting this base class provide the following API: + * + * \code + * MatrixType A, b, x; + * DecompositionType dec(A); + * x = dec.solve(b); // solve A * x = b + * x = dec.transpose().solve(b); // solve A^T * x = b + * x = dec.adjoint().solve(b); // solve A' * x = b + * \endcode + * + * \warning Currently, any other usage of transpose() and adjoint() are not supported and will produce compilation errors. + * + * \sa class PartialPivLU, class FullPivLU + */ +template +class SolverBase : public EigenBase +{ + public: + + typedef EigenBase Base; + typedef typename internal::traits::Scalar Scalar; + typedef Scalar CoeffReturnType; + + enum { + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + SizeAtCompileTime = (internal::size_at_compile_time::RowsAtCompileTime, + internal::traits::ColsAtCompileTime>::ret), + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + MaxSizeAtCompileTime = (internal::size_at_compile_time::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime>::ret), + IsVectorAtCompileTime = internal::traits::MaxRowsAtCompileTime == 1 + || internal::traits::MaxColsAtCompileTime == 1 + }; + + /** Default constructor */ + SolverBase() + {} + + ~SolverBase() + {} + + using Base::derived; + + /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A. + */ + template + inline const Solve + solve(const MatrixBase& b) const + { + eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b"); + return Solve(derived(), b.derived()); + } + + /** \internal the return type of transpose() */ + typedef typename internal::add_const >::type ConstTransposeReturnType; + /** \returns an expression of the transposed of the factored matrix. + * + * A typical usage is to solve for the transposed problem A^T x = b: + * \code x = dec.transpose().solve(b); \endcode + * + * \sa adjoint(), solve() + */ + inline ConstTransposeReturnType transpose() const + { + return ConstTransposeReturnType(derived()); + } + + /** \internal the return type of adjoint() */ + typedef typename internal::conditional::IsComplex, + CwiseUnaryOp, ConstTransposeReturnType>, + ConstTransposeReturnType + >::type AdjointReturnType; + /** \returns an expression of the adjoint of the factored matrix + * + * A typical usage is to solve for the adjoint problem A' x = b: + * \code x = dec.adjoint().solve(b); \endcode + * + * For real scalar types, this function is equivalent to transpose(). + * + * \sa transpose(), solve() + */ + inline AdjointReturnType adjoint() const + { + return AdjointReturnType(derived().transpose()); + } + + protected: +}; + +namespace internal { + +template +struct generic_xpr_base +{ + typedef SolverBase type; + +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SOLVERBASE_H diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h new file mode 100644 index 000000000..d43cf23a1 --- /dev/null +++ b/Eigen/src/Core/SpecialFunctions.h @@ -0,0 +1,160 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIAL_FUNCTIONS_H +#define EIGEN_SPECIAL_FUNCTIONS_H + +namespace Eigen { +namespace internal { + +/**************************************************************************** + * Implementation of lgamma * + ****************************************************************************/ + +template +struct lgamma_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar&) + { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template +struct lgamma_retval +{ + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template<> +struct lgamma_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const float& x) { return ::lgammaf(x); } +}; + +template<> +struct lgamma_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); } +}; +#endif + +/**************************************************************************** + * Implementation of erf * + ****************************************************************************/ + +template +struct erf_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar&) + { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template +struct erf_retval +{ + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template<> +struct erf_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float& x) { return ::erff(x); } +}; + +template<> +struct erf_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); } +}; +#endif // EIGEN_HAS_C99_MATH + +/*************************************************************************** +* Implementation of erfc * +****************************************************************************/ + +template +struct erfc_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar&) + { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template +struct erfc_retval +{ + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template<> +struct erfc_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } +}; + +template<> +struct erfc_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } +}; +#endif // EIGEN_HAS_C99_MATH + +} // end namespace internal + + +namespace numext { + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); +} + +} // end namespace numext + +} // end namespace Eigen + +#endif // EIGEN_SPECIAL_FUNCTIONS_H diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 2152405d5..5b66eb5e1 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -39,7 +39,7 @@ struct traits > : public traits MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime, FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, - Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit), + Flags0 = traits::Flags & ~(LvalueBit | NestByRefBit), Flags1 = Flags0 | FlagsLvalueBit, Flags = Flags1 ^ RowMajorBit, InnerStrideAtCompileTime = inner_stride_at_compile_time::ret, diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 63a1af8c1..099a02ec3 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -222,18 +222,23 @@ template class TriangularView TriangularView& operator=(const TriangularView &other) { return Base::operator=(other); } + /** \copydoc EigenBase::rows() */ EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + /** \copydoc EigenBase::cols() */ EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } + /** \returns a const reference to the nested expression */ EIGEN_DEVICE_FUNC const NestedExpression& nestedExpression() const { return m_matrix; } + + /** \returns a reference to the nested expression */ EIGEN_DEVICE_FUNC NestedExpression& nestedExpression() { return *const_cast(&m_matrix); } - /** \sa MatrixBase::conjugate() const */ typedef TriangularView ConjugateReturnType; + /** \sa MatrixBase::conjugate() const */ EIGEN_DEVICE_FUNC inline const ConjugateReturnType conjugate() const { return ConjugateReturnType(m_matrix.conjugate()); } @@ -279,19 +284,28 @@ template class TriangularView using Base::solve; #endif - EIGEN_DEVICE_FUNC - const SelfAdjointView selfadjointView() const - { - EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR); - return SelfAdjointView(m_matrix); - } + /** \returns a selfadjoint view of the referenced triangular part which must be either \c #Upper or \c #Lower. + * + * This is a shortcut for \code this->nestedExpression().selfadjointView<(*this)::Mode>() \endcode + * \sa MatrixBase::selfadjointView() */ EIGEN_DEVICE_FUNC SelfAdjointView selfadjointView() { - EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR); + EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR); return SelfAdjointView(m_matrix); } + /** This is the const version of selfadjointView() */ + EIGEN_DEVICE_FUNC + const SelfAdjointView selfadjointView() const + { + EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR); + return SelfAdjointView(m_matrix); + } + + + /** \returns the determinant of the triangular matrix + * \sa MatrixBase::determinant() */ EIGEN_DEVICE_FUNC Scalar determinant() const { @@ -341,12 +355,16 @@ template class TriangularViewImpl<_Mat Flags = internal::traits::Flags }; + /** \returns the outer-stride of the underlying dense matrix + * \sa DenseCoeffsBase::outerStride() */ EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); } + /** \returns the inner-stride of the underlying dense matrix + * \sa DenseCoeffsBase::innerStride() */ EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); } - /** \sa MatrixBase::operator+=() */ + /** \sa MatrixBase::operator+=() */ template EIGEN_DEVICE_FUNC TriangularViewType& operator+=(const DenseBase& other) { @@ -364,7 +382,7 @@ template class TriangularViewImpl<_Mat /** \sa MatrixBase::operator*=() */ EIGEN_DEVICE_FUNC TriangularViewType& operator*=(const typename internal::traits::Scalar& other) { return *this = derived().nestedExpression() * other; } - /** \sa MatrixBase::operator/=() */ + /** \sa DenseBase::operator/=() */ EIGEN_DEVICE_FUNC TriangularViewType& operator/=(const typename internal::traits::Scalar& other) { return *this = derived().nestedExpression() / other; } @@ -408,21 +426,26 @@ template class TriangularViewImpl<_Mat EIGEN_DEVICE_FUNC TriangularViewType& operator=(const TriangularBase& other); + /** Shortcut for\code *this = other.other.triangularView<(*this)::Mode>() \endcode */ template EIGEN_DEVICE_FUNC TriangularViewType& operator=(const MatrixBase& other); +#ifndef EIGEN_PARSED_BY_DOXYGEN EIGEN_DEVICE_FUNC TriangularViewType& operator=(const TriangularViewImpl& other) { return *this = other.derived().nestedExpression(); } + /** \deprecated */ template EIGEN_DEVICE_FUNC void lazyAssign(const TriangularBase& other); + /** \deprecated */ template EIGEN_DEVICE_FUNC - void lazyAssign(const MatrixBase& other); + void lazyAssign(const MatrixBase& other); +#endif /** Efficient triangular matrix times vector/matrix product */ template @@ -442,11 +465,39 @@ template class TriangularViewImpl<_Mat return Product(lhs.derived(),rhs.derived()); } + /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular. + * + * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if + * \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if + * \a Side==OnTheRight. + * + * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the + * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this + * is an upper (resp. lower) triangular matrix. + * + * Example: \include Triangular_solve.cpp + * Output: \verbinclude Triangular_solve.out + * + * This function returns an expression of the inverse-multiply and can works in-place if it is assigned + * to the same matrix or vector \a other. + * + * For users coming from BLAS, this function (and more specifically solveInPlace()) offer + * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines. + * + * \sa TriangularView::solveInPlace() + */ template EIGEN_DEVICE_FUNC inline const internal::triangular_solve_retval solve(const MatrixBase& other) const; + /** "in-place" version of TriangularView::solve() where the result is written in \a other + * + * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here. + * This function will const_cast it, so constness isn't honored here. + * + * See TriangularView:solve() for the details. + */ template EIGEN_DEVICE_FUNC void solveInPlace(const MatrixBase& other) const; @@ -456,18 +507,26 @@ template class TriangularViewImpl<_Mat void solveInPlace(const MatrixBase& other) const { return solveInPlace(other); } + /** Swaps the coefficients of the common triangular parts of two matrices */ template EIGEN_DEVICE_FUNC +#ifdef EIGEN_PARSED_BY_DOXYGEN + void swap(TriangularBase &other) +#else void swap(TriangularBase const & other) +#endif { + EIGEN_STATIC_ASSERT_LVALUE(OtherDerived); call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); } - // TODO: this overload is ambiguous and it should be deprecated (Gael) + /** \deprecated + * Shortcut for \code (*this).swap(other.triangularView<(*this)::Mode>()) \endcode */ template EIGEN_DEVICE_FUNC void swap(MatrixBase const & other) { + EIGEN_STATIC_ASSERT_LVALUE(OtherDerived); call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); } @@ -503,7 +562,7 @@ template template void TriangularViewImpl::lazyAssign(const MatrixBase& other) { - internal::call_assignment(derived().noalias(), other.template triangularView()); + internal::call_assignment_no_alias(derived(), other.template triangularView()); } @@ -523,7 +582,7 @@ template void TriangularViewImpl::lazyAssign(const TriangularBase& other) { eigen_assert(Mode == int(OtherDerived::Mode)); - internal::call_assignment(derived().noalias(), other.derived()); + internal::call_assignment_no_alias(derived(), other.derived()); } /*************************************************************************** @@ -745,7 +804,7 @@ EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, co enum { unroll = DstXprType::SizeAtCompileTime != Dynamic - && SrcEvaluatorType::CoeffReadCost != Dynamic + && SrcEvaluatorType::CoeffReadCost < HugeCost && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT }; diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h old mode 100644 new mode 100755 index 37171aaa0..483f71909 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -41,8 +41,6 @@ struct traits > typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; typedef typename MatrixType::Scalar InputScalar; - typedef typename ref_selector::type MatrixTypeNested; - typedef typename remove_all::type _MatrixTypeNested; enum { RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime, ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime, @@ -62,8 +60,6 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr) - typedef typename internal::traits::MatrixTypeNested MatrixTypeNested; - typedef typename internal::traits::_MatrixTypeNested _MatrixTypeNested; EIGEN_DEVICE_FUNC explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp()) @@ -74,24 +70,14 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr::AddCost); EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits::MulCost); +template +struct member_lpnorm { + typedef ResultType result_type; + template struct Cost + { enum { value = (Size+5) * NumTraits::MulCost + (Size-1)*NumTraits::AddCost }; }; + EIGEN_DEVICE_FUNC member_lpnorm() {} + template + EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const + { return mat.template lpNorm

(); } +}; template struct member_redux { @@ -290,6 +286,10 @@ template class VectorwiseOp typedef typename ReturnType::Type ProdReturnType; typedef Reverse ReverseReturnType; + template struct LpNormReturnType { + typedef PartialReduxExpr,Direction> Type; + }; + /** \returns a row (or column) vector expression of the smallest coefficient * of each column (or row) of the referenced expression. * @@ -340,6 +340,19 @@ template class VectorwiseOp const NormReturnType norm() const { return NormReturnType(_expression()); } + /** \returns a row (or column) vector expression of the norm + * of each column (or row) of the referenced expression. + * This is a vector with real entries, even if the original matrix has complex entries. + * + * Example: \include PartialRedux_norm.cpp + * Output: \verbinclude PartialRedux_norm.out + * + * \sa DenseBase::norm() */ + template + EIGEN_DEVICE_FUNC + const typename LpNormReturnType

::Type lpNorm() const + { return typename LpNormReturnType

::Type(_expression()); } + /** \returns a row (or column) vector expression of the norm * of each column (or row) of the referenced expression, using diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index a4e2cebab..7aac0b6e1 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -109,14 +109,11 @@ void DenseBase::visit(Visitor& visitor) const typedef typename internal::visitor_evaluator ThisEvaluator; ThisEvaluator thisEval(derived()); - enum { unroll = SizeAtCompileTime != Dynamic - && ThisEvaluator::CoeffReadCost != Dynamic - && (SizeAtCompileTime == 1 || internal::functor_traits::Cost != Dynamic) - && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits::Cost - <= EIGEN_UNROLLING_LIMIT }; - return internal::visitor_impl::run(thisEval, visitor); + enum { + unroll = SizeAtCompileTime != Dynamic + && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits::Cost <= EIGEN_UNROLLING_LIMIT + }; + return internal::visitor_impl::run(thisEval, visitor); } namespace internal { diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 06cd56684..7baf57eca 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -10,11 +10,6 @@ #ifndef EIGEN_MATH_FUNCTIONS_AVX_H #define EIGEN_MATH_FUNCTIONS_AVX_H -// For some reason, this function didn't make it into the avxintirn.h -// used by the compiler, so we'll just wrap it. -#define _mm256_setr_m128(lo, hi) \ - _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1) - /* The sin, cos, exp, and log functions of this file are loosely derived from * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ */ @@ -38,10 +33,10 @@ psin(const Packet8f& _x) { _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f); _EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f); _EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f); - _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00); - _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04); - _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07); - _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00); + _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f); + _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f); + _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f); + _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f); // Map x from [-Pi/4,3*Pi/4] to z in [-1,3] and subtract the shifted period. Packet8f z = pmul(x, p8f_one_over_pi); @@ -55,15 +50,15 @@ psin(const Packet8f& _x) { // is odd. Packet8i shift_ints = _mm256_cvtps_epi32(shift); Packet8i shift_isodd = - (__m256i)_mm256_and_ps((__m256)shift_ints, (__m256)p8i_one); + _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one))); #ifdef EIGEN_VECTORIZE_AVX2 Packet8i sign_flip_mask = _mm256_slli_epi32(shift_isodd, 31); #else __m128i lo = - _mm_slli_epi32(_mm256_extractf128_si256((__m256i)shift_isodd, 0), 31); + _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 0), 31); __m128i hi = - _mm_slli_epi32(_mm256_extractf128_si256((__m256i)shift_isodd, 1), 31); - Packet8i sign_flip_mask = _mm256_setr_m128(lo, hi); + _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 1), 31); + Packet8i sign_flip_mask = _mm256_set_m128(hi, lo); #endif // Create a mask for which interpolant to use, i.e. if z > 1, then the mask @@ -72,9 +67,9 @@ psin(const Packet8f& _x) { // Evaluate the polynomial for the interval [1,3] in z. _EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f); - _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01); - _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02); - _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04); + _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f); + _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f); + _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f); Packet8f z_minus_two = psub(z, p8f_two); Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two); Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4); @@ -82,10 +77,10 @@ psin(const Packet8f& _x) { right = pmadd(right, z_minus_two2, p8f_coeff_right_0); // Evaluate the polynomial for the interval [-1,1] in z. - _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01); - _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02); - _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03); - _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05); + _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f); + _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f); + _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f); + _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f); Packet8f z2 = pmul(z, z); Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5); left = pmadd(left, z2, p8f_coeff_left_3); @@ -98,7 +93,7 @@ psin(const Packet8f& _x) { Packet8f res = _mm256_or_ps(left, right); // Flip the sign on the odd intervals and return the result. - res = _mm256_xor_ps(res, (__m256)sign_flip_mask); + res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask)); return res; } @@ -145,11 +140,11 @@ plog(const Packet8f& _x) { // Extract the shifted exponents (No bitwise shifting in regular AVX, so // convert to SSE and do it there). #ifdef EIGEN_VECTORIZE_AVX2 - Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32((__m256i)x, 23)); + Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(x), 23)); #else - __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256((__m256i)x, 0), 23); - __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256((__m256i)x, 1), 23); - Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_setr_m128(lo, hi)); + __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 0), 23); + __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 1), 23); + Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_set_m128(hi,lo)); #endif Packet8f e = _mm256_sub_ps(emm0, p8f_126f); @@ -264,7 +259,7 @@ pexp(const Packet8f& _x) { #else __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 0), 23); __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 1), 23); - emm0 = _mm256_setr_m128(lo, hi); + emm0 = _mm256_set_m128(hi,lo); #endif // Return 2^m * exp(r). @@ -348,7 +343,7 @@ pexp(const Packet4d& _x) { // Construct the result 2^n * exp(g) = e * x. The max is used to catch // non-finite values in the input. - return pmax(pmul(x, Packet4d(e)), _x); + return pmax(pmul(x, _mm256_castsi256_pd(e)), _x); } // Functions for sqrt. @@ -393,7 +388,7 @@ Packet4d psqrt(const Packet4d& x) { template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f prsqrt(const Packet8f& _x) { - _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000); + _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000); _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000); _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f); _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f); diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 32c121ab6..717ae67c5 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -43,7 +43,7 @@ template<> struct is_arithmetic<__m256d> { enum { value = true }; }; const Packet4d p4d_##NAME = pset1(X) #define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \ - const Packet8f p8f_##NAME = (__m256)pset1(X) + const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1(X)) #define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \ const Packet8i p8i_##NAME = pset1(X) @@ -66,7 +66,10 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 + HasBlend = 1, + HasRound = 1, + HasFloor = 1, + HasCeil = 1 }; }; template<> struct packet_traits : default_packet_traits @@ -83,7 +86,10 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 + HasBlend = 1, + HasRound = 1, + HasFloor = 1, + HasCeil = 1 }; }; @@ -176,6 +182,15 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin(const Packet4d& a, const template<> EIGEN_STRONG_INLINE Packet8f pmax(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pmax(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet8f pround(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); } +template<> EIGEN_STRONG_INLINE Packet4d pround(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); } + +template<> EIGEN_STRONG_INLINE Packet8f pceil(const Packet8f& a) { return _mm256_ceil_ps(a); } +template<> EIGEN_STRONG_INLINE Packet4d pceil(const Packet4d& a) { return _mm256_ceil_pd(a); } + +template<> EIGEN_STRONG_INLINE Packet8f pfloor(const Packet8f& a) { return _mm256_floor_ps(a); } +template<> EIGEN_STRONG_INLINE Packet4d pfloor(const Packet4d& a) { return _mm256_floor_pd(a); } + template<> EIGEN_STRONG_INLINE Packet8f pand(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pand(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); } diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 3bea88bea..ecd5c444e 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -66,6 +66,43 @@ double2 prsqrt(const double2& a) return make_double2(rsqrt(a.x), rsqrt(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 plgamma(const float4& a) +{ + return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 plgamma(const double2& a) +{ + return make_double2(lgamma(a.x), lgamma(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perf(const float4& a) +{ + return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perf(const double2& a) +{ + return make_double2(erf(a.x), erf(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perfc(const float4& a) +{ + return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perfc(const double2& a) +{ + return make_double2(erfc(a.x), erfc(a.y)); +} + + #endif } // end namespace internal diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index a2d803c06..cb1b547e0 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -39,6 +39,9 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasLGamma = 1, + HasErf = 1, + HasErfc = 1, HasBlend = 0, }; @@ -59,6 +62,9 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasLGamma = 1, + HasErf = 1, + HasErfc = 1, HasBlend = 0, }; @@ -177,7 +183,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(double* to to[1] = from.y; } -#ifdef __CUDA_ARCH__ +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro(const float* from) { return __ldg((const float4*)from); diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index d2322b307..d2d467936 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -73,7 +73,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, con // Get the real values of a | a1_re | a1_re | a2_re | a2_re | v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0)); - // Get the real values of a | a1_im | a1_im | a2_im | a2_im | + // Get the imag values of a | a1_im | a1_im | a2_im | a2_im | v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1)); // Multiply the real a with b v1 = vmulq_f32(v1, b.v); @@ -325,8 +325,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, con // Get the real values of a v1 = vdupq_lane_f64(vget_low_f64(a.v), 0); - // Get the real values of a - v2 = vdupq_lane_f64(vget_high_f64(a.v), 1); + // Get the imag values of a + v2 = vdupq_lane_f64(vget_high_f64(a.v), 0); // Multiply the real a with b v1 = vmulq_f64(v1, b.v); // Multiply the imag a with b diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 2a44b6272..4f45ddfbf 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -67,7 +67,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { - // TODO optimize it for SSE3 and 4 #ifdef EIGEN_VECTORIZE_SSE3 return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v), _mm_mul_ps(_mm_movehdup_ps(a.v), @@ -310,9 +309,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { - // TODO optimize it for SSE3 and 4 #ifdef EIGEN_VECTORIZE_SSE3 - return Packet1cd(_mm_addsub_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), + return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v), _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), vec2d_swizzle1(b.v, 1, 0)))); #else diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 7eb7278af..eb517b871 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -110,6 +110,13 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasBlend = 1 + +#ifdef EIGEN_VECTORIZE_SSE4_1 + , + HasRound = 1, + HasFloor = 1, + HasCeil = 1 +#endif }; }; template<> struct packet_traits : default_packet_traits @@ -127,6 +134,13 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasBlend = 1 + +#ifdef EIGEN_VECTORIZE_SSE4_1 + , + HasRound = 1, + HasFloor = 1, + HasCeil = 1 +#endif }; }; #endif @@ -135,7 +149,6 @@ template<> struct packet_traits : default_packet_traits typedef Packet4i type; typedef Packet4i half; enum { - // FIXME check the Has* Vectorizable = 1, AlignedOnScalar = 1, size=4, @@ -223,10 +236,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const template<> EIGEN_STRONG_INLINE Packet4f pdiv(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pdiv(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, const Packet4i& /*b*/) -{ eigen_assert(false && "packet integer division are not supported by SSE"); - return pset1(0); -} // for some weird raisons, it has to be overloaded for packet of integers template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); } @@ -261,6 +270,17 @@ template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const #endif } +#ifdef EIGEN_VECTORIZE_SSE4_1 +template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { return _mm_round_ps(a, 0); } +template<> EIGEN_STRONG_INLINE Packet2d pround(const Packet2d& a) { return _mm_round_pd(a, 0); } + +template<> EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { return _mm_ceil_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2d pceil(const Packet2d& a) { return _mm_ceil_pd(a); } + +template<> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) { return _mm_floor_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { return _mm_floor_pd(a); } +#endif + template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pand(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); } @@ -287,8 +307,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { E #if (EIGEN_COMP_MSVC==1600) // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps // (i.e., it does not generate an unaligned load!! - // TODO On most architectures this version should also be faster than a single _mm_loadu_ps - // so we could also enable it for MSVC08 but first we have to make this later does not generate crap when doing so... __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from)); res = _mm_loadh_pi(res, (const __m64*)(from+2)); return res; @@ -299,24 +317,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { E template<> EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); } template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast(from)); } #else -// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would -// require pointer casting to incompatible pointer types and leads to invalid code -// because of the strict aliasing rule. The "dummy" stuff are required to enforce -// a correct instruction dependency. -// TODO: do the same for MSVC (ICC is compatible) // NOTE: with the code below, MSVC's compiler crashes! #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8))) // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 - #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1 #elif EIGEN_COMP_CLANG // bug 201: Segfaults in __mm_loadh_pd with clang 2.8 #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 - #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0 #else #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0 - #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0 #endif template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) @@ -374,17 +384,9 @@ template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& f template<> EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); } template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); } -template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& from) { - EIGEN_DEBUG_UNALIGNED_STORE -#if EIGEN_AVOID_CUSTOM_UNALIGNED_STORES - _mm_storeu_pd(to, from); -#else - _mm_storel_pd((to), from); - _mm_storeh_pd((to+1), from); -#endif -} -template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast(to), Packet2d(_mm_castps_pd(from))); } -template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast(to), Packet2d(_mm_castsi128_pd(from))); } +template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); } template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, Index stride) { @@ -547,7 +549,6 @@ EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs) } #ifdef EIGEN_VECTORIZE_SSE3 -// TODO implement SSE2 versions as well as integer versions template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) { return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3])); @@ -556,11 +557,6 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) { return _mm_hadd_pd(vecs[0], vecs[1]); } -// SSSE3 version: -// EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) -// { -// return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3])); -// } template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) { @@ -569,23 +565,16 @@ template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) } template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { return pfirst(_mm_hadd_pd(a, a)); } - -// SSSE3 version: -// EIGEN_STRONG_INLINE float predux(const Packet4i& a) -// { -// Packet4i tmp0 = _mm_hadd_epi32(a,a); -// return pfirst(_mm_hadd_epi32(tmp0, tmp0)); -// } #else // SSE2 versions template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) { Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a)); - return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); + return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); } template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { - return pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a))); + return pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a))); } template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) @@ -608,6 +597,18 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) } #endif // SSE3 + +#ifdef EIGEN_VECTORIZE_SSSE3 +template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) +{ + return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3])); +} +template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) +{ + Packet4i tmp0 = _mm_hadd_epi32(a,a); + return pfirst(_mm_hadd_epi32(tmp0,tmp0)); +} +#else template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) { Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a)); @@ -627,7 +628,7 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) tmp0 = _mm_unpackhi_epi64(tmp0, tmp1); return _mm_add_epi32(tmp0, tmp2); } - +#endif // Other reduction functions: // mul diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index cc0e80a33..4962d625c 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -26,10 +26,10 @@ template struct scalar_sum_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::padd(a,b); } template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const { return internal::predux(a); } }; template @@ -65,10 +65,10 @@ template struct scalar_product_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmul(a,b); } template - EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux_mul(a); } }; template @@ -97,7 +97,7 @@ template struct scalar_conj_product_op { { return conj_helper().pmul(a,b); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return conj_helper().pmul(a,b); } }; template @@ -117,10 +117,10 @@ template struct scalar_min_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmin(a,b); } template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const { return internal::predux_min(a); } }; template @@ -140,10 +140,10 @@ template struct scalar_max_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmax(a,b); } template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const { return internal::predux_max(a); } }; template @@ -175,22 +175,37 @@ struct result_of(Scalar,Scalar)> { template struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} }; template struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} }; +template struct scalar_cmp_op { + typedef bool result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;} +}; +template struct scalar_cmp_op { + typedef bool result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;} +}; template struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} }; template struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} }; @@ -252,7 +267,7 @@ template struct scalar_difference_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::psub(a,b); } }; template @@ -277,7 +292,7 @@ template struct scalar_quotient_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pdiv(a,b); } }; template @@ -349,7 +364,7 @@ struct scalar_multiple_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pmul(a, pset1(m_other)); } typename add_const_on_value_type::Nested>::type m_other; }; @@ -384,7 +399,7 @@ struct scalar_quotient1_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pdiv(a, pset1(m_other)); } typename add_const_on_value_type::Nested>::type m_other; }; @@ -426,7 +441,7 @@ struct scalar_add_op { EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { } EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::padd(a, pset1(m_other)); } const Scalar m_other; }; @@ -440,11 +455,11 @@ struct functor_traits > */ template struct scalar_sub_op { - inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { } - inline scalar_sub_op(const Scalar& other) : m_other(other) { } - inline Scalar operator() (const Scalar& a) const { return a - m_other; } + EIGEN_DEVICE_FUNC inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { } + EIGEN_DEVICE_FUNC inline scalar_sub_op(const Scalar& other) : m_other(other) { } + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a - m_other; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::psub(a, pset1(m_other)); } const Scalar m_other; }; @@ -458,11 +473,11 @@ struct functor_traits > */ template struct scalar_rsub_op { - inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { } - inline scalar_rsub_op(const Scalar& other) : m_other(other) { } - inline Scalar operator() (const Scalar& a) const { return m_other - a; } + EIGEN_DEVICE_FUNC inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { } + EIGEN_DEVICE_FUNC inline scalar_rsub_op(const Scalar& other) : m_other(other) { } + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other - a; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::psub(pset1(m_other), a); } const Scalar m_other; }; @@ -477,8 +492,8 @@ struct functor_traits > template struct scalar_pow_op { // FIXME default copy constructors seems bugged with std::complex<> - inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } - inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} + EIGEN_DEVICE_FUNC inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } + EIGEN_DEVICE_FUNC inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } const Scalar m_exponent; @@ -493,10 +508,10 @@ struct functor_traits > */ template struct scalar_inverse_mult_op { - scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} + EIGEN_DEVICE_FUNC scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::pdiv(pset1(m_other),a); } Scalar m_other; }; diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 130f20868..cd9fbf267 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -21,12 +21,11 @@ struct scalar_constant_op { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; } template - EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1(m_other); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1(m_other); } const Scalar m_other; }; template struct functor_traits > -// FIXME replace this packet test by a safe one { enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; template struct scalar_identity_op { @@ -64,7 +63,7 @@ struct linspaced_op_impl } template - EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } const Scalar m_low; const Scalar m_step; @@ -86,7 +85,7 @@ struct linspaced_op_impl EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(Scalar(i)),m_interPacket))); } const Scalar m_low; @@ -121,12 +120,12 @@ template struct linspa } template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since // there row==0 and col is used for the actual iteration. template - EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const { eigen_assert(col==0 || row==0); return impl.packetOp(col + row); @@ -135,14 +134,12 @@ template struct linspa // This proxy object handles the actual required temporaries, the different // implementations (random vs. sequential access) as well as the // correct piping to size 2/4 packet operations. - // TODO find a way to make the packet type configurable const linspaced_op_impl impl; }; // all functors allow linear access, except scalar_identity_op. So we fix here a quick meta // to indicate whether a functor allows linear access, just always answering 'yes' except for // scalar_identity_op. -// FIXME move this to functor_traits adding a functor_default template struct functor_has_linear_access { enum { ret = 1 }; }; template struct functor_has_linear_access > { enum { ret = 0 }; }; diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 2aab9d1ba..6891cfdda 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -23,7 +23,7 @@ template struct scalar_opposite_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pnegate(a); } }; template @@ -43,7 +43,7 @@ template struct scalar_abs_op { typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pabs(a); } }; template @@ -94,7 +94,7 @@ template struct scalar_abs2_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pmul(a,a); } }; template @@ -111,7 +111,7 @@ template struct scalar_conjugate_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); } }; template struct functor_traits > @@ -132,7 +132,7 @@ template struct scalar_arg_op { typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using numext::arg; return arg(a); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::parg(a); } }; template @@ -232,7 +232,7 @@ template struct scalar_exp_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } }; template struct functor_traits > @@ -248,7 +248,7 @@ template struct scalar_log_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); } template - inline Packet packetOp(const Packet& a) const { return internal::plog(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); } }; template struct functor_traits > @@ -264,7 +264,7 @@ template struct scalar_log10_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); } template - inline Packet packetOp(const Packet& a) const { return internal::plog10(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); } }; template struct functor_traits > @@ -278,7 +278,7 @@ template struct scalar_sqrt_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); } template - inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } }; template struct functor_traits > @@ -296,7 +296,7 @@ template struct scalar_rsqrt_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return Scalar(1)/sqrt(a); } template - inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); } }; template @@ -315,7 +315,7 @@ template struct scalar_cos_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op) EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pcos(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); } }; template struct functor_traits > @@ -334,7 +334,7 @@ template struct scalar_sin_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); } template - inline Packet packetOp(const Packet& a) const { return internal::psin(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); } }; template struct functor_traits > @@ -354,7 +354,7 @@ template struct scalar_tan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); } template - inline Packet packetOp(const Packet& a) const { return internal::ptan(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); } }; template struct functor_traits > @@ -373,7 +373,7 @@ template struct scalar_acos_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pacos(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); } }; template struct functor_traits > @@ -392,7 +392,7 @@ template struct scalar_asin_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pasin(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); } }; template struct functor_traits > @@ -403,15 +403,86 @@ struct functor_traits > }; }; + +/** \internal + * \brief Template functor to compute the natural log of the absolute + * value of Gamma of a scalar + * \sa class CwiseUnaryOp, Cwise::lgamma() + */ +template struct scalar_lgamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::lgamma; return lgamma(a); + } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasLGamma + }; +}; + +/** \internal + * \brief Template functor to compute the Gauss error function of a + * scalar + * \sa class CwiseUnaryOp, Cwise::erf() + */ +template struct scalar_erf_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::erf; return erf(a); + } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::perf(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasErf + }; +}; + +/** \internal + * \brief Template functor to compute the Complementary Error Function + * of a scalar + * \sa class CwiseUnaryOp, Cwise::erfc() + */ +template struct scalar_erfc_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::erfc; return erfc(a); + } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::perfc(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasErfc + }; +}; + + /** \internal * \brief Template functor to compute the atan of a scalar * \sa class CwiseUnaryOp, ArrayBase::atan() */ template struct scalar_atan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op) - inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); } template - inline Packet packetOp(const Packet& a) const { return internal::patan(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); } }; template struct functor_traits > @@ -422,15 +493,16 @@ struct functor_traits > }; }; + /** \internal * \brief Template functor to compute the tanh of a scalar * \sa class CwiseUnaryOp, ArrayBase::tanh() */ template struct scalar_tanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op) - inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); } template - inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); } }; template struct functor_traits > @@ -447,9 +519,9 @@ struct functor_traits > */ template struct scalar_sinh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op) - inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); } template - inline Packet packetOp(const Packet& a) const { return internal::psinh(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); } }; template struct functor_traits > @@ -466,9 +538,9 @@ struct functor_traits > */ template struct scalar_cosh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op) - inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); } }; template struct functor_traits > @@ -488,7 +560,7 @@ struct scalar_inverse_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op) EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::pdiv(pset1(Scalar(1)),a); } }; template @@ -504,7 +576,7 @@ struct scalar_square_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op) EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::pmul(a,a); } }; template @@ -520,7 +592,7 @@ struct scalar_cube_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op) EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::pmul(a,pmul(a,a)); } }; template @@ -535,7 +607,7 @@ template struct scalar_round_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pround(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pround(a); } }; template struct functor_traits > @@ -554,7 +626,7 @@ template struct scalar_floor_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); } }; template struct functor_traits > @@ -573,7 +645,7 @@ template struct scalar_ceil_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); } typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pceil(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); } }; template struct functor_traits > @@ -655,6 +727,49 @@ struct functor_traits > { }; }; +/** \internal + * \brief Template functor to compute the signum of a scalar + * \sa class CwiseUnaryOp, Cwise::sign() + */ +template::IsComplex!=0) > struct scalar_sign_op; +template +struct scalar_sign_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const + { + return Scalar( (a>Scalar(0)) - (a + //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); } +}; +template +struct scalar_sign_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const + { + using std::abs; + typedef typename NumTraits::Real real_type; + real_type aa = abs(a); + if (aa==0) + return Scalar(0); + aa = 1./aa; + return Scalar(real(a)*aa, imag(a)*aa ); + } + //TODO + //template + //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); } +}; +template +struct functor_traits > +{ enum { + Cost = + NumTraits::IsComplex + ? ( 8*NumTraits::MulCost ) // roughly + : ( 3*NumTraits::AddCost), + PacketAccess = packet_traits::HasSign + }; +}; } // end namespace internal diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 79eaa7432..229e96ceb 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -36,37 +36,40 @@ const std::ptrdiff_t defaultL3CacheSize = 512*1024; #endif /** \internal */ -inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) -{ - static bool m_cache_sizes_initialized = false; - static std::ptrdiff_t m_l1CacheSize = 0; - static std::ptrdiff_t m_l2CacheSize = 0; - static std::ptrdiff_t m_l3CacheSize = 0; - - if(!m_cache_sizes_initialized) - { +struct CacheSizes { + CacheSizes(): m_l1(-1),m_l2(-1),m_l3(-1) { int l1CacheSize, l2CacheSize, l3CacheSize; queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize); - m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize); - m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize); - m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize); - m_cache_sizes_initialized = true; + m_l1 = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize); + m_l2 = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize); + m_l3 = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize); } + std::ptrdiff_t m_l1; + std::ptrdiff_t m_l2; + std::ptrdiff_t m_l3; +}; + + +/** \internal */ +inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) +{ + static CacheSizes m_cacheSizes; + if(action==SetAction) { // set the cpu cache size and cache all block sizes from a global cache size in byte eigen_internal_assert(l1!=0 && l2!=0); - m_l1CacheSize = *l1; - m_l2CacheSize = *l2; - m_l3CacheSize = *l3; + m_cacheSizes.m_l1 = *l1; + m_cacheSizes.m_l2 = *l2; + m_cacheSizes.m_l3 = *l3; } else if(action==GetAction) { eigen_internal_assert(l1!=0 && l2!=0); - *l1 = m_l1CacheSize; - *l2 = m_l2CacheSize; - *l3 = m_l3CacheSize; + *l1 = m_cacheSizes.m_l1; + *l2 = m_cacheSizes.m_l2; + *l3 = m_cacheSizes.m_l3; } else { @@ -200,8 +203,6 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n const Index actual_l2 = 1572864; // == 1.5 MB #endif - - // Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2. // The second half is implicitly reserved to access the result and lhs coefficients. // When k1 only works for openmp, what if the user does not use openmp? - if((!Condition) || (omp_get_num_threads()>1)) - return func(0,rows, 0,cols); - - Index size = transpose ? rows : cols; - - // 2- compute the maximal number of threads from the size of the product: + // compute the maximal number of threads from the size of the product: // FIXME this has to be fine tuned - Index max_threads = std::max(1,size / 32); + Index size = transpose ? rows : cols; + Index pb_max_threads = std::max(1,size / 32); + // compute the number of threads we are going to use + Index threads = std::min(nbThreads(), pb_max_threads); - // 3 - compute the number of threads we are going to use - Index threads = std::min(nbThreads(), max_threads); - - if(threads==1) + // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session, + // then abort multi-threading + // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp? + if((!Condition) || (threads==1) || (omp_get_num_threads()>1)) return func(0,rows, 0,cols); Eigen::initParallel(); diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index f3443bd10..d8d30267e 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -30,7 +30,7 @@ struct selfadjoint_matrix_vector_product static EIGEN_DONT_INLINE void run( Index size, const Scalar* lhs, Index lhsStride, - const Scalar* _rhs, Index rhsIncr, + const Scalar* rhs, Scalar* res, Scalar alpha); }; @@ -39,11 +39,12 @@ template::run( Index size, const Scalar* lhs, Index lhsStride, - const Scalar* _rhs, Index rhsIncr, + const Scalar* rhs, Scalar* res, Scalar alpha) { typedef typename packet_traits::type Packet; + typedef typename NumTraits::Real RealScalar; const Index PacketSize = sizeof(Packet)/sizeof(Scalar); enum { @@ -54,23 +55,13 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; - conj_helper::IsComplex, ConjugateRhs> cjd; + conj_helper cjd; conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0; conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1; Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha; - // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed. - // if the rhs is not sequentially stored in memory we copy it to a temporary buffer, - // this is because we need to extract packets - ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast(_rhs) : 0); - if (rhsIncr!=1) - { - const Scalar* it = _rhs; - for (Index i=0; i ( lhs.rows(), // size &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info - actualRhsPtr, 1, // rhs info + actualRhsPtr, // rhs info actualDestPtr, // result info actualAlpha // scale factor ); diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h old mode 100644 new mode 100755 index 86684b66d..a08f385bc --- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h @@ -52,16 +52,16 @@ template { \ static void run( \ Index size, const Scalar* lhs, Index lhsStride, \ - const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ + const Scalar* _rhs, Scalar* res, Scalar alpha) { \ enum {\ IsColMajor = StorageOrder==ColMajor \ }; \ if (IsColMajor == ConjugateLhs) {\ selfadjoint_matrix_vector_product::run( \ - size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + size, lhs, lhsStride, _rhs, res, alpha); \ } else {\ selfadjoint_matrix_vector_product_symv::run( \ - size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + size, lhs, lhsStride, _rhs, res, alpha); \ }\ } \ }; \ @@ -79,13 +79,13 @@ typedef Matrix SYMVVector;\ \ static void run( \ Index size, const EIGTYPE* lhs, Index lhsStride, \ -const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ +const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \ { \ enum {\ IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \ IsLower = UpLo == Lower ? 1 : 0 \ }; \ - MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \ + MKL_INT n=size, lda=lhsStride, incx=1, incy=1; \ MKLTYPE alpha_, beta_; \ const EIGTYPE *x_ptr, myone(1); \ char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \ @@ -93,10 +93,9 @@ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ assign_scalar_eig2mkl(beta_, myone); \ SYMVVector x_tmp; \ if (ConjugateRhs) { \ - Map > map_x(_rhs,size,1,InnerStride<>(incx)); \ + Map map_x(_rhs,size,1); \ x_tmp=map_x.conjugate(); \ x_ptr=x_tmp.data(); \ - incx=1; \ } else x_ptr=_rhs; \ MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ }\ diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index a9a198d64..208593718 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -304,9 +304,12 @@ EIGEN_DONT_INLINE void triangular_solve_matrix struct CommaInitializer; template class ReturnByValue; template class ArrayWrapper; template class MatrixWrapper; +template class SolverBase; template class InnerIterator; namespace internal { @@ -160,8 +161,7 @@ template< typename T, typename LhsShape = typename evaluator_traits::Shape, typename RhsShape = typename evaluator_traits::Shape, typename LhsScalar = typename traits::Scalar, - typename RhsScalar = typename traits::Scalar, - typename = EnableIf // extra template parameter for SFINAE-based specialization + typename RhsScalar = typename traits::Scalar > struct product_evaluator; } @@ -209,6 +209,7 @@ template struct scalar_random_op; template struct scalar_add_op; template struct scalar_constant_op; template struct scalar_identity_op; +template struct scalar_sign_op; template struct scalar_product_op; template struct scalar_multiple2_op; @@ -266,7 +267,6 @@ template class Rotation2D; template class AngleAxis; template class Translation; template class AlignedBox; - template class Quaternion; template class Transform; template class ParametrizedLine; @@ -274,6 +274,9 @@ template class Hyperp template class UniformScaling; template class Homogeneous; +// Sparse module: +template class SparseMatrixBase; + // MatrixFunctions module template struct MatrixExponentialReturnValue; template class MatrixFunctionReturnValue; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index abc69f866..9b4f8faa7 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 2 -#define EIGEN_MINOR_VERSION 91 +#define EIGEN_MINOR_VERSION 92 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -341,6 +341,13 @@ #define EIGEN_HAVE_RVALUE_REFERENCES #endif +// Does the compiler support C99? +#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ + || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) +#define EIGEN_HAS_C99_MATH 1 +#endif + // Does the compiler support result_of? #if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)) #define EIGEN_HAS_STD_RESULT_OF 1 @@ -353,16 +360,19 @@ // Does the compiler support const expressions? #ifdef __CUDACC__ - // Const expressions are not supported regardless of what host compiler is used +// Const expressions are supported provided that c++11 is enabled and we're using nvcc 7.5 or above +#if defined(__CUDACC_VER__) && __CUDACC_VER__ >= 70500 && __cplusplus > 199711L + #define EIGEN_HAS_CONSTEXPR 1 +#endif #elif (defined(__cplusplus) && __cplusplus >= 201402L) || \ - EIGEN_GNUC_AT_LEAST(4,9) + EIGEN_GNUC_AT_LEAST(4,8) #define EIGEN_HAS_CONSTEXPR 1 #endif // Does the compiler support C++11 math? // Let's be conservative and enable the default C++11 implementation only if we are sure it exists #ifndef EIGEN_HAS_CXX11_MATH - #if (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ + #if (__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC) #define EIGEN_HAS_CXX11_MATH 1 #else @@ -372,17 +382,30 @@ // Does the compiler support proper C++11 containers? #ifndef EIGEN_HAS_CXX11_CONTAINERS - #if ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG)) || EIGEN_COMP_MSVC >= 1900 + #if (__cplusplus > 201103L) \ + || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ + || EIGEN_COMP_MSVC >= 1900 #define EIGEN_HAS_CXX11_CONTAINERS 1 #else #define EIGEN_HAS_CXX11_CONTAINERS 0 #endif #endif +// Does the compiler support C++11 noexcept? +#ifndef EIGEN_HAS_CXX11_NOEXCEPT + #if (__cplusplus > 201103L) \ + || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ + || EIGEN_COMP_MSVC >= 1900 + #define EIGEN_HAS_CXX11_NOEXCEPT 1 + #else + #define EIGEN_HAS_CXX11_NOEXCEPT 0 + #endif +#endif + /** Allows to disable some optimizations which might affect the accuracy of the result. * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. * They currently include: - * - single precision ArrayBase::sin() and ArrayBase::cos() when SSE vectorization is enabled. + * - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization. */ #ifndef EIGEN_FAST_MATH #define EIGEN_FAST_MATH 1 @@ -609,10 +632,14 @@ namespace Eigen { // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in // certain common platform (compiler+architecture combinations) to avoid these problems. - // Only static alignment is really problematic (relies on nonstandard compiler extensions that don't - // work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even - // when we have to disable static alignment. - #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) + // Only static alignment is really problematic (relies on nonstandard compiler extensions), + // try to keep heap alignment even when we have to disable static alignment. + #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 + #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6) + // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support. + // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use. + // 4.8 and newer seem definitely unaffected. #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 #else #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 @@ -747,8 +774,6 @@ namespace Eigen { * documentation in a single line. **/ -// TODO The EIGEN_DENSE_PUBLIC_INTERFACE should not exists anymore - #define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ typedef typename Eigen::internal::traits::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex. */ \ typedef typename Eigen::NumTraits::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex, T were corresponding to RealScalar. */ \ @@ -761,17 +786,17 @@ namespace Eigen { Flags = Eigen::internal::traits::Flags, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \ - IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; - - -#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ - EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ - typedef typename Base::PacketScalar PacketScalar; \ - enum { MaxRowsAtCompileTime = Eigen::internal::traits::MaxRowsAtCompileTime, \ - MaxColsAtCompileTime = Eigen::internal::traits::MaxColsAtCompileTime}; \ + IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \ using Base::derived; \ using Base::const_cast_derived; + +// FIXME Maybe the EIGEN_DENSE_PUBLIC_INTERFACE could be removed as importing PacketScalar is rarely needed +#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ + EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ + typedef typename Base::PacketScalar PacketScalar; + + #define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b) #define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b) @@ -837,4 +862,12 @@ namespace Eigen { # define EIGEN_CATCH(X) else #endif +#if EIGEN_HAS_CXX11_NOEXCEPT +# define EIGEN_NO_THROW noexcept(true) +# define EIGEN_EXCEPTION_SPEC(X) noexcept(false) +#else +# define EIGEN_NO_THROW throw() +# define EIGEN_EXCEPTION_SPEC(X) throw(X) +#endif + #endif // EIGEN_MACROS_H diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 69a489d43..1fc535a3a 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -732,7 +732,7 @@ template void swap(scoped_array &a,scoped_array &b) #if EIGEN_MAX_ALIGN_BYTES!=0 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - void* operator new(size_t size, const std::nothrow_t&) throw() { \ + void* operator new(size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc(size); } \ EIGEN_CATCH (...) { return 0; } \ } @@ -743,20 +743,20 @@ template void swap(scoped_array &a,scoped_array &b) void *operator new[](size_t size) { \ return Eigen::internal::conditional_aligned_malloc(size); \ } \ - void operator delete(void * ptr) throw() { Eigen::internal::conditional_aligned_free(ptr); } \ - void operator delete[](void * ptr) throw() { Eigen::internal::conditional_aligned_free(ptr); } \ - void operator delete(void * ptr, std::size_t /* sz */) throw() { Eigen::internal::conditional_aligned_free(ptr); } \ - void operator delete[](void * ptr, std::size_t /* sz */) throw() { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ /* in-place new and delete. since (at least afaik) there is no actual */ \ /* memory allocated we can safely let the default implementation handle */ \ /* this particular case. */ \ static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \ static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \ - void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \ - void operator delete[](void * memory, void *ptr) throw() { return ::operator delete[](memory,ptr); } \ + void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \ + void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \ /* nothrow-new (returns zero instead of std::bad_alloc) */ \ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - void operator delete(void *ptr, const std::nothrow_t&) throw() { \ + void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \ Eigen::internal::conditional_aligned_free(ptr); \ } \ typedef void eigen_aligned_operator_new_marker_type; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 6eb409194..3dee2bd7c 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -11,6 +11,11 @@ #ifndef EIGEN_META_H #define EIGEN_META_H +#if defined(__CUDA_ARCH__) +#include +#include +#endif + namespace Eigen { namespace internal { @@ -68,6 +73,18 @@ template<> struct is_arithmetic { enum { value = true }; }; template<> struct is_arithmetic { enum { value = true }; }; template<> struct is_arithmetic { enum { value = true }; }; +template struct is_integral { enum { value = false }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; + template struct add_const { typedef const T type; }; template struct add_const { typedef T& type; }; @@ -138,16 +155,16 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static float (max)() { return CUDART_MAX_NORMAL_F; } EIGEN_DEVICE_FUNC - static float (min)() { return __FLT_EPSILON__; } + static float (min)() { return FLT_MIN; } }; template<> struct numeric_limits { EIGEN_DEVICE_FUNC static double epsilon() { return __DBL_EPSILON__; } EIGEN_DEVICE_FUNC - static double (max)() { return CUDART_INF; } + static double (max)() { return DBL_MAX; } EIGEN_DEVICE_FUNC - static double (min)() { return __DBL_EPSILON__; } + static double (min)() { return DBL_MIN; } }; template<> struct numeric_limits { @@ -158,6 +175,15 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static int (min)() { return INT_MIN; } }; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static unsigned int epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static unsigned int (max)() { return UINT_MAX; } + EIGEN_DEVICE_FUNC + static unsigned int (min)() { return 0; } +}; template<> struct numeric_limits { EIGEN_DEVICE_FUNC @@ -167,6 +193,15 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static long (min)() { return LONG_MIN; } }; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static unsigned long epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static unsigned long (max)() { return ULONG_MAX; } + EIGEN_DEVICE_FUNC + static unsigned long (min)() { return 0; } +}; template<> struct numeric_limits { EIGEN_DEVICE_FUNC @@ -176,6 +211,15 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static long long (min)() { return LLONG_MIN; } }; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static unsigned long long epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static unsigned long long (max)() { return ULLONG_MAX; } + EIGEN_DEVICE_FUNC + static unsigned long long (min)() { return 0; } +}; } @@ -193,7 +237,6 @@ protected: EIGEN_DEVICE_FUNC ~noncopyable() {} }; - /** \internal * Convenient struct to get the result type of a unary or binary functor. * diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 7538a0633..1fe365aa7 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -93,7 +93,11 @@ THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, - STORAGE_LAYOUT_DOES_NOT_MATCH + STORAGE_LAYOUT_DOES_NOT_MATCH, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, + THIS_TYPE_IS_NOT_SUPPORTED }; }; @@ -200,5 +204,9 @@ >::value), \ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES) +// Check that a cost value is positive, and that is stay within a reasonable range +// TODO this check could be enabled for internal debugging only +#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \ + EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE); #endif // EIGEN_STATIC_ASSERT_H diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index c31cd4801..f9e2959cc 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -233,33 +233,33 @@ template struct size_of_xpr_at_compile_time */ template::StorageKind> struct plain_matrix_type; -template struct plain_matrix_type_dense; +template struct plain_matrix_type_dense; template struct plain_matrix_type { - typedef typename plain_matrix_type_dense::XprKind>::type type; + typedef typename plain_matrix_type_dense::XprKind, traits::Flags>::type type; }; template struct plain_matrix_type { typedef typename T::PlainObject type; }; -template struct plain_matrix_type_dense +template struct plain_matrix_type_dense { typedef Matrix::Scalar, traits::RowsAtCompileTime, traits::ColsAtCompileTime, - AutoAlign | (traits::Flags&RowMajorBit ? RowMajor : ColMajor), + AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor), traits::MaxRowsAtCompileTime, traits::MaxColsAtCompileTime > type; }; -template struct plain_matrix_type_dense +template struct plain_matrix_type_dense { typedef Array::Scalar, traits::RowsAtCompileTime, traits::ColsAtCompileTime, - AutoAlign | (traits::Flags&RowMajorBit ? RowMajor : ColMajor), + AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor), traits::MaxRowsAtCompileTime, traits::MaxColsAtCompileTime > type; @@ -303,6 +303,15 @@ struct eval, Dense> }; +/* similar to plain_matrix_type, but using the evaluator's Flags */ +template::StorageKind> struct plain_object_eval; + +template +struct plain_object_eval +{ + typedef typename plain_matrix_type_dense::XprKind, evaluator::Flags>::type type; +}; + /* plain_matrix_type_column_major : same as plain_matrix_type but guaranteed to be column-major */ @@ -385,29 +394,23 @@ struct transfer_constness * \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression. * \param PlainObject the type of the temporary if needed. */ -template::type> struct nested_eval +template::type> struct nested_eval { enum { - // For the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values. - // the choice of 10000 makes it larger than any practical fixed value and even most dynamic values. - // in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues - // (poor choice of temporaries). - // It's important that this value can still be squared without integer overflowing. - DynamicAsInteger = 10000, ScalarReadCost = NumTraits::Scalar>::ReadCost, - ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost), - CoeffReadCost = evaluator::CoeffReadCost, // TODO What if an evaluator evaluate itself into a tempory? - // Then CoeffReadCost will be small but we still have to evaluate if n>1... - // The solution might be to ask the evaluator if it creates a temp. Perhaps we could even ask the number of temps? - CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost), - NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n, - CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger, - CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger + CoeffReadCost = evaluator::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a tempory? + // Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1. + // This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON + // for all evaluator creating a temporary. This flag is then propagated by the parent evaluators. + // Another solution could be to count the number of temps? + NAsInteger = n == Dynamic ? HugeCost : n, + CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost, + CostNoEval = NAsInteger * CoeffReadCost }; typedef typename conditional< ( (int(evaluator::Flags) & EvalBeforeNestingBit) || - (int(CostEvalAsInteger) < int(CostNoEvalAsInteger)) ), + (int(CostEval) < int(CostNoEval)) ), PlainObject, typename ref_selector::type >::type type; @@ -449,9 +452,9 @@ struct generic_xpr_base /** \internal Helper base class to add a scalar multiple operator * overloads for complex types */ -template::value > -struct special_scalar_op_base : public DenseCoeffsBase +struct special_scalar_op_base : public BaseType { // dummy operator* so that the // "using special_scalar_op_base::operator*" compiles @@ -460,8 +463,8 @@ struct special_scalar_op_base : public DenseCoeffsBase void operator/(dummy) const; }; -template -struct special_scalar_op_base : public DenseCoeffsBase +template +struct special_scalar_op_base : public BaseType { const CwiseUnaryOp, Derived> operator*(const OtherScalar& scalar) const @@ -654,6 +657,43 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if struct is_same_or_void { enum { value = is_same::value }; }; +template struct is_same_or_void { enum { value = 1 }; }; +template struct is_same_or_void { enum { value = 1 }; }; +template<> struct is_same_or_void { enum { value = 1 }; }; + +#ifdef EIGEN_DEBUG_ASSIGN +std::string demangle_traversal(int t) +{ + if(t==DefaultTraversal) return "DefaultTraversal"; + if(t==LinearTraversal) return "LinearTraversal"; + if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal"; + if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal"; + if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal"; + return "?"; +} +std::string demangle_unrolling(int t) +{ + if(t==NoUnrolling) return "NoUnrolling"; + if(t==InnerUnrolling) return "InnerUnrolling"; + if(t==CompleteUnrolling) return "CompleteUnrolling"; + return "?"; +} +std::string demangle_flags(int f) +{ + std::string res; + if(f&RowMajorBit) res += " | RowMajor"; + if(f&PacketAccessBit) res += " | Packet"; + if(f&LinearAccessBit) res += " | Linear"; + if(f&LvalueBit) res += " | Lvalue"; + if(f&DirectAccessBit) res += " | Direct"; + if(f&NestByRefBit) res += " | NestByRef"; + if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; + + return res; +} +#endif + } // end namespace internal // we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor @@ -666,7 +706,7 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if::ret \ ? int(internal::scalar_product_traits::Defined) \ - : int(internal::is_same::value)), \ + : int(internal::is_same_or_void::value)), \ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) } // end namespace Eigen diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h old mode 100644 new mode 100755 index 27aed923c..e20c3725b --- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h @@ -40,9 +40,9 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_SCHUR_COMPLEX(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ -template<> inline \ +template<> template inline \ ComplexSchur >& \ -ComplexSchur >::compute(const Matrix& matrix, bool computeU) \ +ComplexSchur >::compute(const EigenBase& matrix, bool computeU) \ { \ typedef Matrix MatrixType; \ typedef MatrixType::RealScalar RealScalar; \ @@ -53,7 +53,7 @@ ComplexSchur >::compute(const Matri m_matUisUptodate = false; \ if(matrix.cols() == 1) \ { \ - m_matT = matrix.cast(); \ + m_matT = matrix.derived().template cast(); \ if(computeU) m_matU = ComplexMatrixType::Identity(1,1); \ m_info = Success; \ m_isInitialized = true; \ @@ -61,7 +61,6 @@ ComplexSchur >::compute(const Matri return *this; \ } \ lapack_int n = matrix.cols(), sdim, info; \ - lapack_int lda = matrix.outerStride(); \ lapack_int matrix_order = MKLCOLROW; \ char jobvs, sort='N'; \ LAPACK_##MKLPREFIX_U##_SELECT1 select = 0; \ @@ -69,6 +68,7 @@ ComplexSchur >::compute(const Matri m_matU.resize(n, n); \ lapack_int ldvs = m_matU.outerStride(); \ m_matT = matrix; \ + lapack_int lda = m_matT.outerStride(); \ Matrix w; \ w.resize(n, 1);\ info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)w.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h old mode 100644 new mode 100755 index e2e28cd4a..a9d6790d5 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -145,7 +145,7 @@ template class GeneralizedEigenSolver * * \sa compute() */ - explicit GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true) + GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true) : m_eivec(A.rows(), A.cols()), m_alphas(A.cols()), m_betas(A.cols()), diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h old mode 100644 new mode 100755 index 02ebb7d17..a62071d42 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -101,7 +101,7 @@ namespace Eigen { * * This constructor calls compute() to compute the QZ decomposition. */ - explicit RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) : + RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) : m_S(A.rows(),A.cols()), m_T(A.rows(),A.cols()), m_Q(A.rows(),A.cols()), diff --git a/Eigen/src/Eigenvalues/RealSchur_MKL.h b/Eigen/src/Eigenvalues/RealSchur_MKL.h old mode 100644 new mode 100755 index c3089b468..e80926400 --- a/Eigen/src/Eigenvalues/RealSchur_MKL.h +++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h @@ -40,14 +40,13 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_SCHUR_REAL(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ -template<> inline \ +template<> template inline \ RealSchur >& \ -RealSchur >::compute(const Matrix& matrix, bool computeU) \ +RealSchur >::compute(const EigenBase& matrix, bool computeU) \ { \ eigen_assert(matrix.cols() == matrix.rows()); \ \ lapack_int n = matrix.cols(), sdim, info; \ - lapack_int lda = matrix.outerStride(); \ lapack_int matrix_order = MKLCOLROW; \ char jobvs, sort='N'; \ LAPACK_##MKLPREFIX_U##_SELECT2 select = 0; \ @@ -55,6 +54,7 @@ RealSchur >::compute(const Matrix wr, wi; \ wr.resize(n, 1); wi.resize(n, 1); \ info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)wr.data(), (MKLTYPE*)wi.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index 4d62708ad..c64555096 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -411,7 +411,7 @@ SelfAdjointEigenSolver& SelfAdjointEigenSolver if(n==1) { - m_eivalues.coeffRef(0,0) = numext::real(matrix.coeff(0,0)); + m_eivalues.coeffRef(0,0) = numext::real(matrix(0,0)); if(computeEigenvectors) m_eivec.setOnes(n,n); m_info = Success; diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h old mode 100644 new mode 100755 index 17c0dadd2..3499dc78a --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h @@ -40,9 +40,9 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_EIG_SELFADJ(EIGTYPE, MKLTYPE, MKLRTYPE, MKLNAME, EIGCOLROW, MKLCOLROW ) \ -template<> inline \ +template<> template inline \ SelfAdjointEigenSolver >& \ -SelfAdjointEigenSolver >::compute(const Matrix& matrix, int options) \ +SelfAdjointEigenSolver >::compute(const EigenBase& matrix, int options) \ { \ eigen_assert(matrix.cols() == matrix.rows()); \ eigen_assert((options&~(EigVecMask|GenEigMask))==0 \ @@ -56,7 +56,7 @@ SelfAdjointEigenSolver >::compute(c \ if(n==1) \ { \ - m_eivalues.coeffRef(0,0) = numext::real(matrix.coeff(0,0)); \ + m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0)); \ if(computeEigenvectors) m_eivec.setOnes(n,n); \ m_info = Success; \ m_isInitialized = true; \ @@ -64,7 +64,7 @@ SelfAdjointEigenSolver >::compute(c return *this; \ } \ \ - lda = matrix.outerStride(); \ + lda = m_eivec.outerStride(); \ matrix_order=MKLCOLROW; \ char jobz, uplo='L'/*, range='A'*/; \ jobz = computeEigenvectors ? 'V' : 'N'; \ diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index 186d4ecad..03f1a11f8 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -163,7 +163,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * a uniform distribution */ inline VectorType sample() const { - VectorType r; + VectorType r(dim()); for(Index d=0; d inline explicit AngleAxis(const MatrixBase& m) { *this = m; } + /** \returns the value of the rotation angle in radian */ Scalar angle() const { return m_angle; } + /** \returns a read-write reference to the stored angle in radian */ Scalar& angle() { return m_angle; } + /** \returns the rotation axis */ const Vector3& axis() const { return m_axis; } + /** \returns a read-write reference to the stored rotation axis. + * + * \warning The rotation axis must remain a \b unit vector. + */ Vector3& axis() { return m_axis; } /** Concatenates two rotations */ @@ -133,7 +140,7 @@ public: m_angle = Scalar(other.angle()); } - static inline const AngleAxis Identity() { return AngleAxis(0, Vector3::UnitX()); } + static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); } /** \returns \c true if \c *this is approximately equal to \a other, within the precision * determined by \a prec. @@ -170,8 +177,8 @@ AngleAxis& AngleAxis::operator=(const QuaternionBase, Homogeneous +struct permutation_matrix_product + : public permutation_matrix_product +{}; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 56fa2bfbf..32e7e76fa 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -739,8 +739,9 @@ template struct quaternionbase_assign_impl { typedef typename Other::Scalar Scalar; - template static inline void run(QuaternionBase& q, const Other& mat) + template static inline void run(QuaternionBase& q, const Other& a_mat) { + const typename internal::nested_eval::type mat(a_mat); using std::sqrt; // This algorithm comes from "Quaternion Calculus and Fast Animation", // Ken Shoemake, 1987 SIGGRAPH course notes diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h index 65aa83be5..8b0ddcfb0 100644 --- a/Eigen/src/Geometry/Rotation2D.h +++ b/Eigen/src/Geometry/Rotation2D.h @@ -64,6 +64,16 @@ public: /** Default constructor wihtout initialization. The represented rotation is undefined. */ Rotation2D() {} + /** Construct a 2D rotation from a 2x2 rotation matrix \a mat. + * + * \sa fromRotationMatrix() + */ + template + explicit Rotation2D(const MatrixBase& m) + { + fromRotationMatrix(m.derived()); + } + /** \returns the rotation angle */ inline Scalar angle() const { return m_angle; } @@ -103,6 +113,17 @@ public: Rotation2D& fromRotationMatrix(const MatrixBase& m); Matrix2 toRotationMatrix() const; + /** Set \c *this from a 2x2 rotation matrix \a mat. + * In other words, this function extract the rotation angle from the rotation matrix. + * + * This method is an alias for fromRotationMatrix() + * + * \sa fromRotationMatrix() + */ + template + Rotation2D& operator=(const MatrixBase& m) + { return fromRotationMatrix(m.derived()); } + /** \returns the spherical interpolation between \c *this and \a other using * parameter \a t. It is in fact equivalent to a linear interpolation. */ diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 8c9d7049b..75f20bda6 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -118,15 +118,15 @@ template struct transform_make_affine; * * However, unlike a plain matrix, the Transform class provides many features * simplifying both its assembly and usage. In particular, it can be composed - * with any other transformations (Transform,Translation,RotationBase,Matrix) + * with any other transformations (Transform,Translation,RotationBase,DiagonalMatrix) * and can be directly used to transform implicit homogeneous vectors. All these * operations are handled via the operator*. For the composition of transformations, * its principle consists to first convert the right/left hand sides of the product * to a compatible (Dim+1)^2 matrix and then perform a pure matrix product. * Of course, internally, operator* tries to perform the minimal number of operations * according to the nature of each terms. Likewise, when applying the transform - * to non homogeneous vectors, the latters are automatically promoted to homogeneous - * one before doing the matrix product. The convertions to homogeneous representations + * to points, the latters are automatically promoted to homogeneous vectors + * before doing the matrix product. The conventions to homogeneous representations * are performed as follow: * * \b Translation t (Dim)x(1): @@ -140,7 +140,7 @@ template struct transform_make_affine; * R & 0\\ * 0\,...\,0 & 1 * \end{array} \right) \f$ - * + * + * \b Scaling \b DiagonalMatrix S (Dim)x(Dim): + * \f$ \left( \begin{array}{cc} + * S & 0\\ + * 0\,...\,0 & 1 + * \end{array} \right) \f$ * - * \b Column \b vector v (Dim)x(1): + * \b Column \b point v (Dim)x(1): * \f$ \left( \begin{array}{c} * v\\ * 1 * \end{array} \right) \f$ * - * \b Set \b of \b column \b vectors V1...Vn (Dim)x(n): + * \b Set \b of \b column \b points V1...Vn (Dim)x(n): * \f$ \left( \begin{array}{ccc} * v_1 & ... & v_n\\ * 1 & ... & 1 @@ -404,26 +410,39 @@ public: /** \returns a writable expression of the translation vector of the transformation */ inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); } - /** \returns an expression of the product between the transform \c *this and a matrix expression \a other + /** \returns an expression of the product between the transform \c *this and a matrix expression \a other. * - * The right hand side \a other might be either: - * \li a vector of size Dim, + * The right-hand-side \a other can be either: * \li an homogeneous vector of size Dim+1, - * \li a set of vectors of size Dim x Dynamic, - * \li a set of homogeneous vectors of size Dim+1 x Dynamic, - * \li a linear transformation matrix of size Dim x Dim, - * \li an affine transformation matrix of size Dim x Dim+1, + * \li a set of homogeneous vectors of size Dim+1 x N, * \li a transformation matrix of size Dim+1 x Dim+1. + * + * Moreover, if \c *this represents an affine transformation (i.e., Mode!=Projective), then \a other can also be: + * \li a point of size Dim (computes: \code this->linear() * other + this->translation()\endcode), + * \li a set of N points as a Dim x N matrix (computes: \code (this->linear() * other).colwise() + this->translation()\endcode), + * + * In all cases, the return type is a matrix or vector of same sizes as the right-hand-side \a other. + * + * If you want to interpret \a other as a linear or affine transformation, then first convert it to a Transform<> type, + * or do your own cooking. + * + * Finally, if you want to apply Affine transformations to vectors, then explicitly apply the linear part only: + * \code + * Affine3f A; + * Vector3f v1, v2; + * v2 = A.linear() * v1; + * \endcode + * */ // note: this function is defined here because some compilers cannot find the respective declaration template - EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl::ResultType + EIGEN_STRONG_INLINE const typename OtherDerived::PlainObject operator * (const EigenBase &other) const { return internal::transform_right_product_impl::run(*this,other.derived()); } /** \returns the product expression of a transformation matrix \a a times a transform \a b * - * The left hand side \a other might be either: + * The left hand side \a other can be either: * \li a linear transformation matrix of size Dim x Dim, * \li an affine transformation matrix of size Dim x Dim+1, * \li a general transformation matrix of size Dim+1 x Dim+1. diff --git a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index ff7f08c1c..358444aff 100644 --- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -23,6 +23,8 @@ namespace Eigen { * * \tparam _Scalar the type of the scalar. * + * \implsparsesolverconcept + * * This preconditioner is suitable for both selfadjoint and general problems. * The diagonal entries are pre-inverted and stored into a dense vector. * @@ -37,8 +39,10 @@ class DiagonalPreconditioner typedef Matrix Vector; public: typedef typename Vector::StorageIndex StorageIndex; - // this typedef is only to export the scalar type and compile-time dimensions to solve_retval - typedef Matrix MatrixType; + enum { + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic + }; DiagonalPreconditioner() : m_isInitialized(false) {} @@ -114,6 +118,8 @@ class DiagonalPreconditioner * * \tparam _Scalar the type of the scalar. * + * \implsparsesolverconcept + * * The diagonal entries are pre-inverted and stored into a dense vector. * * \sa class LeastSquaresConjugateGradient, class DiagonalPreconditioner @@ -172,6 +178,8 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar> /** \ingroup IterativeLinearSolvers_Module * \brief A naive preconditioner which approximates any matrix as the identity matrix * + * \implsparsesolverconcept + * * \sa class DiagonalPreconditioner */ class IdentityPreconditioner diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index a34ee7628..454f46814 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -132,6 +132,8 @@ struct traits > * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix. * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner * + * \implsparsesolverconcept + * * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations * and NumTraits::epsilon() for the tolerance. @@ -148,13 +150,15 @@ struct traits > * By default the iterations start with x=0 as an initial guess of the solution. * One can control the start using the solveWithGuess() method. * + * BiCGSTAB can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ template< typename _MatrixType, typename _Preconditioner> class BiCGSTAB : public IterativeSolverBase > { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -180,7 +184,8 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - explicit BiCGSTAB(const MatrixType& A) : Base(A) {} + template + explicit BiCGSTAB(const EigenBase& A) : Base(A.derived()) {} ~BiCGSTAB() {} @@ -195,7 +200,7 @@ public: m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - if(!internal::bicgstab(mp_matrix, b.col(j), xj, Base::m_preconditioner, m_iterations, m_error)) + if(!internal::bicgstab(matrix(), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error)) failed = true; } m_info = failed ? NumericalIssue diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h index 8f33c446d..395daa8e4 100644 --- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h @@ -118,6 +118,8 @@ struct traits > * Default is \c Lower, best performance is \c Lower|Upper. * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner * + * \implsparsesolverconcept + * * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations * and NumTraits::epsilon() for the tolerance. @@ -147,13 +149,15 @@ struct traits > * By default the iterations start with x=0 as an initial guess of the solution. * One can control the start using the solveWithGuess() method. * + * ConjugateGradient can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * * \sa class LeastSquaresConjugateGradient, class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ template< typename _MatrixType, int _UpLo, typename _Preconditioner> class ConjugateGradient : public IterativeSolverBase > { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -183,7 +187,8 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - explicit ConjugateGradient(const MatrixType& A) : Base(A) {} + template + explicit ConjugateGradient(const EigenBase& A) : Base(A.derived()) {} ~ConjugateGradient() {} @@ -191,12 +196,19 @@ public: template void _solve_with_guess_impl(const Rhs& b, Dest& x) const { - typedef Ref MatRef; - typedef typename internal::conditional::IsComplex), - Transpose, MatRef const&>::type RowMajorWrapper; + typedef typename Base::MatrixWrapper MatrixWrapper; + typedef typename Base::ActualMatrixType ActualMatrixType; + enum { + TransposeInput = (!MatrixWrapper::MatrixFree) + && (UpLo==(Lower|Upper)) + && (!MatrixType::IsRowMajor) + && (!NumTraits::IsComplex) + }; + typedef typename internal::conditional, ActualMatrixType const&>::type RowMajorWrapper; + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY); typedef typename internal::conditional::Type + typename MatrixWrapper::template ConstSelfAdjointViewReturnType::Type >::type SelfAdjointWrapper; m_iterations = Base::maxIterations(); m_error = Base::m_tolerance; @@ -207,7 +219,7 @@ public: m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - RowMajorWrapper row_mat(mp_matrix); + RowMajorWrapper row_mat(matrix()); internal::conjugate_gradient(SelfAdjointWrapper(row_mat), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error); } diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h similarity index 68% rename from unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h rename to Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h index 2e2d9a851..284e37f13 100644 --- a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2012 Désiré Nuentsa-Wakam +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -9,24 +10,42 @@ #ifndef EIGEN_INCOMPLETE_CHOlESKY_H #define EIGEN_INCOMPLETE_CHOlESKY_H -#include "Eigen/src/IterativeLinearSolvers/IncompleteLUT.h" -#include + +#include #include namespace Eigen { /** - * \brief Modified Incomplete Cholesky with dual threshold - * - * References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with - * Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999 - * - * \tparam _MatrixType The type of the sparse matrix. It should be a symmetric - * matrix. It is advised to give a row-oriented sparse matrix - * \tparam _UpLo The triangular part of the matrix to reference. - * \tparam _OrderingType - */ - -template > + * \brief Modified Incomplete Cholesky with dual threshold + * + * References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with + * Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999 + * + * \tparam _MatrixType The type of the sparse matrix. It is advised to give a row-oriented sparse matrix + * \tparam _UpLo The triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * \tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering, + * unless EIGEN_MPL2_ONLY is defined, in which case the default is NaturalOrdering. + * + * \implsparsesolverconcept + * + * It performs the following incomplete factorization: \f$ S P A P' S \approx L L' \f$ + * where L is a lower triangular factor, S is a diagonal scaling matrix, and P is a + * fill-in reducing permutation as computed by the ordering method. + * + * \b Shifting \b strategy: Let \f$ B = S P A P' S \f$ be the scaled matrix on which the factorization is carried out, + * and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly performed + * on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I \f$ where + * \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$ \sigma = 10^{-3} \f$. + * + */ +template +#else +NaturalOrdering +#endif +> class IncompleteCholesky : public SparseSolverBase > { protected: @@ -38,45 +57,60 @@ class IncompleteCholesky : public SparseSolverBase FactorType; - typedef FactorType MatrixType; typedef Matrix VectorSx; typedef Matrix VectorRx; typedef Matrix VectorIx; typedef std::vector > VectorList; enum { UpLo = _UpLo }; + enum { + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic + }; public: + + /** Default constructor leaving the object in a partly non-initialized stage. + * + * You must call compute() or the pair analyzePattern()/factorize() to make it valid. + * + * \sa IncompleteCholesky(const MatrixType&) + */ IncompleteCholesky() : m_initialShift(1e-3),m_factorizationIsOk(false) {} + /** Constructor computing the incomplete factorization for the given matrix \a matrix. + */ template IncompleteCholesky(const MatrixType& matrix) : m_initialShift(1e-3),m_factorizationIsOk(false) { compute(matrix); } + /** \returns number of rows of the factored matrix */ Index rows() const { return m_L.rows(); } + /** \returns number of columns of the factored matrix */ Index cols() const { return m_L.cols(); } /** \brief Reports whether previous computation was successful. * - * \returns \c Success if computation was succesful, + * It triggers an assertion if \c *this has not been initialized through the respective constructor, + * or a call to compute() or analyzePattern(). + * + * \returns \c Success if computation was successful, * \c NumericalIssue if the matrix appears to be negative. */ ComputationInfo info() const { - eigen_assert(m_isInitialized && "IncompleteLLT is not initialized."); + eigen_assert(m_isInitialized && "IncompleteCholesky is not initialized."); return m_info; } - /** - * \brief Set the initial shift parameter - */ + /** \brief Set the initial shift parameter \f$ \sigma \f$. + */ void setInitialShift(RealScalar shift) { m_initialShift = shift; } - /** - * \brief Computes the fill reducing permutation vector. - */ + /** \brief Computes the fill reducing permutation vector using the sparsity pattern of \a mat + */ template void analyzePattern(const MatrixType& mat) { @@ -85,19 +119,36 @@ class IncompleteCholesky : public SparseSolverBase(), pinv); if(pinv.size()>0) m_perm = pinv.inverse(); else m_perm.resize(0); - m_analysisIsOk = true; + m_L.resize(mat.rows(), mat.cols()); + m_analysisIsOk = true; + m_isInitialized = true; + m_info = Success; } + /** \brief Performs the numerical factorization of the input matrix \a mat + * + * The method analyzePattern() or compute() must have been called beforehand + * with a matrix having the same pattern. + * + * \sa compute(), analyzePattern() + */ template - void factorize(const MatrixType& amat); + void factorize(const MatrixType& mat); + /** Computes or re-computes the incomplete Cholesky factorization of the input matrix \a mat + * + * It is a shortcut for a sequential call to the analyzePattern() and factorize() methods. + * + * \sa analyzePattern(), factorize() + */ template - void compute(const MatrixType& matrix) + void compute(const MatrixType& mat) { - analyzePattern(matrix); - factorize(matrix); + analyzePattern(mat); + factorize(mat); } + // internal template void _solve_impl(const Rhs& b, Dest& x) const { @@ -110,9 +161,17 @@ class IncompleteCholesky : public SparseSolverBase colPtr, Ref rowIdx, Ref vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol); }; @@ -135,8 +194,6 @@ void IncompleteCholesky::factorize(const _MatrixType // Dropping strategy : Keep only the p largest elements per column, where p is the number of elements in the column of the original matrix. Other strategies will be added - m_L.resize(mat.rows(), mat.cols()); - // Apply the fill-reducing permutation computed in analyzePattern() if (m_perm.rows() == mat.rows() ) // To detect the null permutation { @@ -176,13 +233,21 @@ void IncompleteCholesky::factorize(const _MatrixType } m_scale = m_scale.cwiseSqrt().cwiseSqrt(); + + for (Index j = 0; j < n; ++j) + if(m_scale(j)>(std::numeric_limits::min)()) + m_scale(j) = RealScalar(1)/m_scale(j); + else + m_scale(j) = 1; + + // FIXME disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster) // Scale and compute the shift for the matrix RealScalar mindiag = NumTraits::highest(); for (Index j = 0; j < n; j++) { for (Index k = colPtr[j]; k < colPtr[j+1]; k++) - vals[k] /= (m_scale(j)*m_scale(rowIdx[k])); + vals[k] *= (m_scale(j)*m_scale(rowIdx[k])); eigen_internal_assert(rowIdx[colPtr[j]]==j && "IncompleteCholesky: only the lower triangular part must be stored"); mindiag = numext::mini(numext::real(vals[colPtr[j]]), mindiag); } @@ -240,7 +305,6 @@ void IncompleteCholesky::factorize(const _MatrixType // Scale the current column if(numext::real(diag) <= 0) { - std::cerr << "\nNegative diagonal during Incomplete factorization at position " << j << " (value = " << diag << ")\n"; m_info = NumericalIssue; return; } @@ -276,8 +340,7 @@ void IncompleteCholesky::factorize(const _MatrixType updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol); } m_factorizationIsOk = true; - m_isInitialized = true; - m_info = Success; + m_info = Success; } template diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h index b644163f1..338e6f10a 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h @@ -67,6 +67,8 @@ Index QuickSplit(VectorV &row, VectorI &ind, Index ncut) * \class IncompleteLUT * \brief Incomplete LU factorization with dual-threshold strategy * + * \implsparsesolverconcept + * * During the numerical factorization, two dropping rules are used : * 1) any element whose magnitude is less than some tolerance is dropped. * This tolerance is obtained by multiplying the input tolerance @p droptol @@ -107,11 +109,13 @@ class IncompleteLUT : public SparseSolverBase VectorI; typedef SparseMatrix FactorType; + enum { + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic + }; + public: - // this typedef is only to export the scalar type and compile-time dimensions to solve_retval - typedef Matrix MatrixType; - IncompleteLUT() : m_droptol(NumTraits::dummy_precision()), m_fillfactor(10), m_analysisIsOk(false), m_factorizationIsOk(false) @@ -166,7 +170,7 @@ class IncompleteLUT : public SparseSolverBase void _solve_impl(const Rhs& b, Dest& x) const { - x = m_Pinv * b; + x = m_Pinv * b; x = m_lu.template triangularView().solve(x); x = m_lu.template triangularView().solve(x); x = m_P * x; @@ -219,16 +223,25 @@ template void IncompleteLUT::analyzePattern(const _MatrixType& amat) { // Compute the Fill-reducing permutation + // Since ILUT does not perform any numerical pivoting, + // it is highly preferable to keep the diagonal through symmetric permutations. +#ifndef EIGEN_MPL2_ONLY + // To this end, let's symmetrize the pattern and perform AMD on it. SparseMatrix mat1 = amat; SparseMatrix mat2 = amat.transpose(); - // Symmetrize the pattern // FIXME for a matrix with nearly symmetric pattern, mat2+mat1 is the appropriate choice. // on the other hand for a really non-symmetric pattern, mat2*mat1 should be prefered... SparseMatrix AtA = mat2 + mat1; - AtA.prune(keep_diag()); - internal::minimum_degree_ordering(AtA, m_P); // Then compute the AMD ordering... - - m_Pinv = m_P.inverse(); // ... and the inverse permutation + AMDOrdering ordering; + ordering(AtA,m_P); + m_Pinv = m_P.inverse(); // cache the inverse permutation +#else + // If AMD is not available, (MPL2-only), then let's use the slower COLAMD routine. + SparseMatrix mat1 = amat; + COLAMDOrdering ordering; + ordering(mat1,m_Pinv); + m_P = m_Pinv.inverse(); +#endif m_analysisIsOk = true; m_factorizationIsOk = false; diff --git a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h index 5f4bcea11..3d62fef6e 100644 --- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h @@ -12,6 +12,128 @@ namespace Eigen { +namespace internal { + +template +struct is_ref_compatible_impl +{ +private: + template + struct any_conversion + { + template any_conversion(const volatile T&); + template any_conversion(T&); + }; + struct yes {int a[1];}; + struct no {int a[2];}; + + template + static yes test(const Ref&, int); + template + static no test(any_conversion, ...); + +public: + static MatrixType ms_from; + enum { value = sizeof(test(ms_from, 0))==sizeof(yes) }; +}; + +template +struct is_ref_compatible +{ + enum { value = is_ref_compatible_impl::type>::value }; +}; + +template::value> +class generic_matrix_wrapper; + +// We have an explicit matrix at hand, compatible with Ref<> +template +class generic_matrix_wrapper +{ +public: + typedef Ref ActualMatrixType; + template struct ConstSelfAdjointViewReturnType { + typedef typename ActualMatrixType::template ConstSelfAdjointViewReturnType::Type Type; + }; + + enum { + MatrixFree = false + }; + + generic_matrix_wrapper() + : m_dummy(0,0), m_matrix(m_dummy) + {} + + template + generic_matrix_wrapper(const InputType &mat) + : m_matrix(mat) + {} + + const ActualMatrixType& matrix() const + { + return m_matrix; + } + + template + void grab(const EigenBase &mat) + { + m_matrix.~Ref(); + ::new (&m_matrix) Ref(mat.derived()); + } + + void grab(const Ref &mat) + { + if(&(mat.derived()) != &m_matrix) + { + m_matrix.~Ref(); + ::new (&m_matrix) Ref(mat); + } + } + +protected: + MatrixType m_dummy; // used to default initialize the Ref<> object + ActualMatrixType m_matrix; +}; + +// MatrixType is not compatible with Ref<> -> matrix-free wrapper +template +class generic_matrix_wrapper +{ +public: + typedef MatrixType ActualMatrixType; + template struct ConstSelfAdjointViewReturnType + { + typedef ActualMatrixType Type; + }; + + enum { + MatrixFree = true + }; + + generic_matrix_wrapper() + : mp_matrix(0) + {} + + generic_matrix_wrapper(const MatrixType &mat) + : mp_matrix(&mat) + {} + + const ActualMatrixType& matrix() const + { + return *mp_matrix; + } + + void grab(const MatrixType &mat) + { + mp_matrix = &mat; + } + +protected: + const ActualMatrixType *mp_matrix; +}; + +} + /** \ingroup IterativeLinearSolvers_Module * \brief Base class for linear iterative solvers * @@ -31,13 +153,17 @@ public: typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::RealScalar RealScalar; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + public: using Base::derived; /** Default constructor. */ IterativeSolverBase() - : m_dummy(0,0), mp_matrix(m_dummy) { init(); } @@ -54,10 +180,10 @@ public: */ template explicit IterativeSolverBase(const EigenBase& A) - : mp_matrix(A.derived()) + : m_matrixWrapper(A.derived()) { init(); - compute(mp_matrix); + compute(matrix()); } ~IterativeSolverBase() {} @@ -71,7 +197,7 @@ public: Derived& analyzePattern(const EigenBase& A) { grab(A.derived()); - m_preconditioner.analyzePattern(mp_matrix); + m_preconditioner.analyzePattern(matrix()); m_isInitialized = true; m_analysisIsOk = true; m_info = m_preconditioner.info(); @@ -92,7 +218,7 @@ public: { eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); grab(A.derived()); - m_preconditioner.factorize(mp_matrix); + m_preconditioner.factorize(matrix()); m_factorizationIsOk = true; m_info = m_preconditioner.info(); return derived(); @@ -112,7 +238,7 @@ public: Derived& compute(const EigenBase& A) { grab(A.derived()); - m_preconditioner.compute(mp_matrix); + m_preconditioner.compute(matrix()); m_isInitialized = true; m_analysisIsOk = true; m_factorizationIsOk = true; @@ -121,10 +247,10 @@ public: } /** \internal */ - Index rows() const { return mp_matrix.rows(); } + Index rows() const { return matrix().rows(); } /** \internal */ - Index cols() const { return mp_matrix.cols(); } + Index cols() const { return matrix().cols(); } /** \returns the tolerance threshold used by the stopping criteria. * \sa setTolerance() @@ -154,7 +280,7 @@ public: */ Index maxIterations() const { - return (m_maxIterations<0) ? 2*mp_matrix.cols() : m_maxIterations; + return (m_maxIterations<0) ? 2*matrix().cols() : m_maxIterations; } /** Sets the max number of iterations. @@ -234,25 +360,22 @@ protected: m_maxIterations = -1; m_tolerance = NumTraits::epsilon(); } - - template - void grab(const EigenBase &A) + + typedef internal::generic_matrix_wrapper MatrixWrapper; + typedef typename MatrixWrapper::ActualMatrixType ActualMatrixType; + + const ActualMatrixType& matrix() const { - mp_matrix.~Ref(); - ::new (&mp_matrix) Ref(A.derived()); + return m_matrixWrapper.matrix(); } - void grab(const Ref &A) + template + void grab(const InputType &A) { - if(&(A.derived()) != &mp_matrix) - { - mp_matrix.~Ref(); - ::new (&mp_matrix) Ref(A); - } + m_matrixWrapper.grab(A); } - MatrixType m_dummy; - Ref mp_matrix; + MatrixWrapper m_matrixWrapper; Preconditioner m_preconditioner; Index m_maxIterations; diff --git a/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h index 1d819927e..0aea0e099 100644 --- a/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h @@ -119,6 +119,8 @@ struct traits > * \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix. * \tparam _Preconditioner the type of the preconditioner. Default is LeastSquareDiagonalPreconditioner * + * \implsparsesolverconcept + * * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations * and NumTraits::epsilon() for the tolerance. @@ -147,7 +149,7 @@ template< typename _MatrixType, typename _Preconditioner> class LeastSquaresConjugateGradient : public IterativeSolverBase > { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -173,7 +175,8 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - explicit LeastSquaresConjugateGradient(const MatrixType& A) : Base(A) {} + template + explicit LeastSquaresConjugateGradient(const EigenBase& A) : Base(A.derived()) {} ~LeastSquaresConjugateGradient() {} @@ -190,7 +193,7 @@ public: m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - internal::least_square_conjugate_gradient(mp_matrix, b.col(j), xj, Base::m_preconditioner, m_iterations, m_error); + internal::least_square_conjugate_gradient(matrix(), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error); } m_isInitialized = true; diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 07a87cbc6..0c4d63923 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -10,12 +10,14 @@ #ifndef EIGEN_LU_H #define EIGEN_LU_H -namespace Eigen { +namespace Eigen { namespace internal { template struct traits > : traits<_MatrixType> { + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; enum { Flags = 0 }; }; @@ -53,21 +55,18 @@ template struct traits > * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse() */ template class FullPivLU + : public SolverBase > { public: typedef _MatrixType MatrixType; + typedef SolverBase Base; + + EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU) + // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef typename internal::traits::StorageKind StorageKind; - // FIXME should be int - typedef typename MatrixType::StorageIndex StorageIndex; typedef typename internal::plain_row_type::type IntRowVectorType; typedef typename internal::plain_col_type::type IntColVectorType; typedef PermutationMatrix PermutationQType; @@ -223,6 +222,7 @@ template class FullPivLU * * \sa TriangularView::solve(), kernel(), inverse() */ + // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion. template inline const Solve solve(const MatrixBase& b) const @@ -384,22 +384,26 @@ template class FullPivLU inline Index rows() const { return m_lu.rows(); } inline Index cols() const { return m_lu.cols(); } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; + + template + EIGEN_DEVICE_FUNC + void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const; #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + void computeInPlace(); - + MatrixType m_lu; PermutationPType m_p; PermutationQType m_q; @@ -447,15 +451,15 @@ template FullPivLU& FullPivLU::compute(const EigenBase& matrix) { check_template_parameters(); - + // the permutations are stored as int indices, so just to be sure: eigen_assert(matrix.rows()<=NumTraits::highest() && matrix.cols()<=NumTraits::highest()); - + m_isInitialized = true; m_lu = matrix.derived(); - + computeInPlace(); - + return *this; } @@ -709,7 +713,7 @@ struct image_retval > template template void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const -{ +{ /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. * So we proceed as follows: * Step 1: compute c = P * rhs. @@ -720,7 +724,7 @@ void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const const Index rows = this->rows(), cols = this->cols(), - nonzero_pivots = this->nonzeroPivots(); + nonzero_pivots = this->rank(); eigen_assert(rhs.rows() == rows); const Index smalldim = (std::min)(rows, cols); @@ -753,6 +757,70 @@ void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const for(Index i = nonzero_pivots; i < m_lu.cols(); ++i) dst.row(permutationQ().indices().coeff(i)).setZero(); } + +template +template +void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const +{ + /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}, + * and since permutations are real and unitary, we can write this + * as A^T = Q U^T L^T P, + * So we proceed as follows: + * Step 1: compute c = Q^T rhs. + * Step 2: replace c by the solution x to U^T x = c. May or may not exist. + * Step 3: replace c by the solution x to L^T x = c. + * Step 4: result = P^T c. + * If Conjugate is true, replace "^T" by "^*" above. + */ + + const Index rows = this->rows(), cols = this->cols(), + nonzero_pivots = this->rank(); + eigen_assert(rhs.rows() == cols); + const Index smalldim = (std::min)(rows, cols); + + if(nonzero_pivots == 0) + { + dst.setZero(); + return; + } + + typename RhsType::PlainObject c(rhs.rows(), rhs.cols()); + + // Step 1 + c = permutationQ().inverse() * rhs; + + if (Conjugate) { + // Step 2 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView() + .adjoint() + .solveInPlace(c.topRows(nonzero_pivots)); + // Step 3 + m_lu.topLeftCorner(smalldim, smalldim) + .template triangularView() + .adjoint() + .solveInPlace(c.topRows(smalldim)); + } else { + // Step 2 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView() + .transpose() + .solveInPlace(c.topRows(nonzero_pivots)); + // Step 3 + m_lu.topLeftCorner(smalldim, smalldim) + .template triangularView() + .transpose() + .solveInPlace(c.topRows(smalldim)); + } + + // Step 4 + PermutationPType invp = permutationP().inverse().eval(); + for(Index i = 0; i < smalldim; ++i) + dst.row(invp.indices().coeff(i)) = c.row(i); + for(Index i = smalldim; i < rows; ++i) + dst.row(invp.indices().coeff(i)).setZero(); +} + #endif namespace internal { @@ -765,7 +833,7 @@ struct Assignment >, internal::assign_ typedef FullPivLU LuType; typedef Inverse SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) - { + { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 2c28818a3..50e920609 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -11,12 +11,14 @@ #ifndef EIGEN_PARTIALLU_H #define EIGEN_PARTIALLU_H -namespace Eigen { +namespace Eigen { namespace internal { template struct traits > : traits<_MatrixType> { + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; typedef traits<_MatrixType> BaseTraits; enum { Flags = BaseTraits::Flags & RowMajorBit, @@ -58,33 +60,29 @@ template struct traits > * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU */ template class PartialPivLU + : public SolverBase > { public: typedef _MatrixType MatrixType; + typedef SolverBase Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU) + // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef typename internal::traits::StorageKind StorageKind; - // FIXME should be int - typedef typename MatrixType::StorageIndex StorageIndex; typedef PermutationMatrix PermutationType; typedef Transpositions TranspositionType; typedef typename MatrixType::PlainObject PlainObject; /** - * \brief Default Constructor. - * - * The default constructor is useful in cases in which the user intends to - * perform decompositions via PartialPivLU::compute(const MatrixType&). - */ + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via PartialPivLU::compute(const MatrixType&). + */ PartialPivLU(); /** \brief Default Constructor with memory preallocation @@ -145,6 +143,7 @@ template class PartialPivLU * * \sa TriangularView::solve(), inverse(), computeInverse() */ + // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion. template inline const Solve solve(const MatrixBase& b) const @@ -185,7 +184,7 @@ template class PartialPivLU inline Index rows() const { return m_lu.rows(); } inline Index cols() const { return m_lu.cols(); } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template EIGEN_DEVICE_FUNC @@ -206,17 +205,44 @@ template class PartialPivLU m_lu.template triangularView().solveInPlace(dst); // Step 3 - m_lu.template triangularView().solveInPlace(dst); + m_lu.template triangularView().solveInPlace(dst); + } + + template + EIGEN_DEVICE_FUNC + void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const { + /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. + * So we proceed as follows: + * Step 1: compute c = Pb. + * Step 2: replace c by the solution x to Lx = c. + * Step 3: replace c by the solution x to Ux = c. + */ + + eigen_assert(rhs.rows() == m_lu.cols()); + + if (Conjugate) { + // Step 1 + dst = m_lu.template triangularView().adjoint().solve(rhs); + // Step 2 + m_lu.template triangularView().adjoint().solveInPlace(dst); + } else { + // Step 1 + dst = m_lu.template triangularView().transpose().solve(rhs); + // Step 2 + m_lu.template triangularView().transpose().solveInPlace(dst); + } + // Step 3 + dst = permutationP().transpose() * dst; } #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + MatrixType m_lu; PermutationType m_p; TranspositionType m_rowsTranspositions; @@ -295,7 +321,7 @@ struct partial_lu_impl { Index rrows = rows-k-1; Index rcols = cols-k-1; - + Index row_of_biggest_in_col; Score biggest_in_corner = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col); @@ -436,10 +462,10 @@ template PartialPivLU& PartialPivLU::compute(const EigenBase& matrix) { check_template_parameters(); - + // the row permutation is stored as int indices, so just to be sure: eigen_assert(matrix.rows()::highest()); - + m_lu = matrix.derived(); eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices"); @@ -481,7 +507,7 @@ MatrixType PartialPivLU::reconstructedMatrix() const return res; } -/***** Implementation of solve() *****************************************************/ +/***** Implementation details *****************************************************/ namespace internal { @@ -492,7 +518,7 @@ struct Assignment >, internal::assi typedef PartialPivLU LuType; typedef Inverse SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) - { + { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; diff --git a/Eigen/src/PaStiXSupport/PaStiXSupport.h b/Eigen/src/PaStiXSupport/PaStiXSupport.h index 4e73edf5b..1999fd289 100644 --- a/Eigen/src/PaStiXSupport/PaStiXSupport.h +++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h @@ -141,6 +141,10 @@ class PastixBase : public SparseSolverBase typedef typename MatrixType::StorageIndex StorageIndex; typedef Matrix Vector; typedef SparseMatrix ColSpMatrix; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: @@ -398,7 +402,9 @@ bool PastixBase::_solve_impl(const MatrixBase &b, MatrixBase &x * NOTE : Note that if the analysis and factorization phase are called separately, * the input matrix will be symmetrized at each call, hence it is advised to * symmetrize the matrix in a end-user program and set \p IsStrSym to true - * + * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers * */ @@ -509,7 +515,9 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> > * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam UpLo The part of the matrix to use : Lower or Upper. The default is Lower as required by PaStiX - * + * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template @@ -590,7 +598,9 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> > * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam UpLo The part of the matrix to use : Lower or Upper. The default is Lower as required by PaStiX - * + * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h index 234e3213b..7c238ce3c 100755 --- a/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/Eigen/src/PardisoSupport/PardisoSupport.h @@ -117,7 +117,9 @@ class PardisoImpl : public SparseSolverBase typedef Matrix IntColVectorType; typedef Array ParameterType; enum { - ScalarIsComplex = NumTraits::IsComplex + ScalarIsComplex = NumTraits::IsComplex, + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic }; PardisoImpl() @@ -371,6 +373,8 @@ void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template @@ -421,6 +425,8 @@ class PardisoLU : public PardisoImpl< PardisoLU > * \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used. * Upper|Lower can be used to tell both triangular parts can be used as input. * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template @@ -479,6 +485,8 @@ class PardisoLLT : public PardisoImpl< PardisoLLT > * Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix. * Upper|Lower can be used to tell both triangular parts can be used as input. * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template diff --git a/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/Eigen/src/QR/ColPivHouseholderQR_MKL.h old mode 100644 new mode 100755 index 7b6ba0a5e..1203d0d36 --- a/Eigen/src/QR/ColPivHouseholderQR_MKL.h +++ b/Eigen/src/QR/ColPivHouseholderQR_MKL.h @@ -41,10 +41,10 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_QR_COLPIV(EIGTYPE, MKLTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ -template<> inline \ +template<> template inline \ ColPivHouseholderQR >& \ ColPivHouseholderQR >::compute( \ - const Matrix& matrix) \ + const EigenBase& matrix) \ \ { \ using std::abs; \ @@ -52,9 +52,9 @@ ColPivHouseholderQR - * NOTE - * - */ + * \ingroup SPQRSupport_Module + * \class SPQR + * \brief Sparse QR factorization based on SuiteSparseQR library + * + * This class is used to perform a multithreaded and multifrontal rank-revealing QR decomposition + * of sparse matrices. The result is then used to solve linear leasts_square systems. + * Clearly, a QR factorization is returned such that A*P = Q*R where : + * + * P is the column permutation. Use colsPermutation() to get it. + * + * Q is the orthogonal matrix represented as Householder reflectors. + * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose. + * You can then apply it to a vector. + * + * R is the sparse triangular factor. Use matrixQR() to get it as SparseMatrix. + * NOTE : The Index type of R is always SuiteSparse_long. You can get it with SPQR::Index + * + * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<> + * + * \implsparsesolverconcept + * + * + */ template class SPQR : public SparseSolverBase > { @@ -63,9 +65,13 @@ class SPQR : public SparseSolverBase > public: typedef typename _MatrixType::Scalar Scalar; typedef typename _MatrixType::RealScalar RealScalar; - typedef UF_long StorageIndex ; + typedef SuiteSparse_long StorageIndex ; typedef SparseMatrix MatrixType; typedef Map > PermutationType; + enum { + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic + }; public: SPQR() : m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits::epsilon()), m_useDefaultThreshold(true) diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h old mode 100644 new mode 100755 index e29d36cf2..59c965e15 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -539,7 +539,7 @@ template class JacobiSVD * according to the specified problem size. * \sa JacobiSVD() */ - explicit JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0) + JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0) { allocate(rows, cols, computationOptions); } @@ -666,7 +666,7 @@ void JacobiSVD::allocate(Index rows, Index cols, u if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this); if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this); - if(m_cols!=m_cols) m_scaledMatrix.resize(rows,cols); + if(m_rows!=m_cols) m_scaledMatrix.resize(rows,cols); } template diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h index f56298e8c..1343eb15c 100644 --- a/Eigen/src/SparseCholesky/SimplicialCholesky.h +++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h @@ -71,6 +71,11 @@ class SimplicialCholeskyBase : public SparseSolverBase typedef Matrix VectorType; typedef Matrix VectorI; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + public: using Base::derived; @@ -319,6 +324,8 @@ template struct traits or NaturalOrdering<>. Default is AMDOrdering<> * + * \implsparsesolverconcept + * * \sa class SimplicialLDLT, class AMDOrdering, class NaturalOrdering */ template @@ -408,6 +415,8 @@ public: * or Upper. Default is Lower. * \tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<> * + * \implsparsesolverconcept + * * \sa class SimplicialLLT, class AMDOrdering, class NaturalOrdering */ template diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index 6e664515d..0f6835846 100644 --- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -138,7 +138,7 @@ struct conservative_sparse_sparse_product_selector RowMajorMatrix; typedef SparseMatrix ColMajorMatrixAux; - typedef typename sparse_eval::type ColMajorMatrix; + typedef typename sparse_eval::type ColMajorMatrix; // If the result is tall and thin (in the extreme case a column vector) // then it is faster to sort the coefficients inplace instead of transposing twice. @@ -255,6 +255,89 @@ struct conservative_sparse_sparse_product_selector +static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) +{ + typedef typename remove_all::type::Scalar Scalar; + Index cols = rhs.outerSize(); + eigen_assert(lhs.outerSize() == rhs.innerSize()); + + evaluator lhsEval(lhs); + evaluator rhsEval(rhs); + + for (Index j=0; j::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) + { + Scalar y = rhsIt.value(); + Index k = rhsIt.index(); + for (typename evaluator::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) + { + Index i = lhsIt.index(); + Scalar x = lhsIt.value(); + res.coeffRef(i,j) += x * y; + } + } + } +} + + +} // end namespace internal + +namespace internal { + +template::Flags&RowMajorBit) ? RowMajor : ColMajor, + int RhsStorageOrder = (traits::Flags&RowMajorBit) ? RowMajor : ColMajor> +struct sparse_sparse_to_dense_product_selector; + +template +struct sparse_sparse_to_dense_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + internal::sparse_sparse_to_dense_product_impl(lhs, rhs, res); + } +}; + +template +struct sparse_sparse_to_dense_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix ColMajorMatrix; + ColMajorMatrix lhsCol(lhs); + internal::sparse_sparse_to_dense_product_impl(lhsCol, rhs, res); + } +}; + +template +struct sparse_sparse_to_dense_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix ColMajorMatrix; + ColMajorMatrix rhsCol(rhs); + internal::sparse_sparse_to_dense_product_impl(lhs, rhsCol, res); + } +}; + +template +struct sparse_sparse_to_dense_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + Transpose trRes(res); + internal::sparse_sparse_to_dense_product_impl >(rhs, lhs, trRes); + } +}; + + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index e984bbdb3..4a8dd12e4 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -64,6 +64,7 @@ struct Sparse2Dense {}; template<> struct AssignmentKind { typedef Sparse2Sparse Kind; }; template<> struct AssignmentKind { typedef Sparse2Sparse Kind; }; template<> struct AssignmentKind { typedef Sparse2Dense Kind; }; +template<> struct AssignmentKind { typedef Sparse2Dense Kind; }; template @@ -132,13 +133,16 @@ struct Assignment } }; -// Sparse to Dense assignment +// Generic Sparse to Dense assignment template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment { static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + if(internal::is_same >::value) + dst.setZero(); internal::evaluator srcEval(src); internal::evaluator dstEval(dst); @@ -149,23 +153,6 @@ struct Assignment } }; -template< typename DstXprType, typename SrcXprType, typename Scalar> -struct Assignment, Sparse2Dense, Scalar> -{ - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) - { - eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - - dst.setZero(); - internal::evaluator srcEval(src); - internal::evaluator dstEval(dst); - const Index outerEvaluationSize = (internal::evaluator::Flags&RowMajorBit) ? src.rows() : src.cols(); - for (Index j=0; j::InnerIterator i(srcEval,j); i; ++i) - dstEval.coeffRef(i.row(),i.col()) = i.value(); - } -}; - // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error template diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 9afb5327e..10be84856 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -23,6 +23,8 @@ public: enum { IsRowMajor = internal::traits::IsRowMajor }; protected: enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; + typedef SparseMatrixBase Base; + using Base::convert_index; public: EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) @@ -88,10 +90,11 @@ class sparse_matrix_block_impl { typedef typename internal::remove_all::type _MatrixTypeNested; typedef Block BlockType; + typedef SparseCompressedBase > Base; + using Base::convert_index; public: enum { IsRowMajor = internal::traits::IsRowMajor }; - typedef SparseCompressedBase > Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) + EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) protected: typedef typename Base::IndexVector IndexVector; enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; @@ -114,7 +117,8 @@ public: // and/or it is not at the end of the nonzeros of the underlying matrix. // 1 - eval to a temporary to avoid transposition and/or aliasing issues - SparseMatrix tmp(other); + Ref > tmp(other.derived()); + eigen_internal_assert(tmp.outerSize()==m_outerSize.value()); // 2 - let's check whether there is enough allocated memory Index nnz = tmp.nonZeros(); @@ -127,6 +131,7 @@ public: ? Index(matrix.data().allocatedSize()) + block_size : block_size; + bool update_trailing_pointers = false; if(nnz>free_size) { // realloc manually to reduce copies @@ -135,8 +140,8 @@ public: internal::smart_copy(&m_matrix.data().value(0), &m_matrix.data().value(0) + start, &newdata.value(0)); internal::smart_copy(&m_matrix.data().index(0), &m_matrix.data().index(0) + start, &newdata.index(0)); - internal::smart_copy(&tmp.data().value(0), &tmp.data().value(0) + nnz, &newdata.value(start)); - internal::smart_copy(&tmp.data().index(0), &tmp.data().index(0) + nnz, &newdata.index(start)); + internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, &newdata.value(start)); + internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, &newdata.index(start)); internal::smart_copy(&matrix.data().value(end), &matrix.data().value(end) + tail_size, &newdata.value(start+nnz)); internal::smart_copy(&matrix.data().index(end), &matrix.data().index(end) + tail_size, &newdata.index(start+nnz)); @@ -144,35 +149,53 @@ public: newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz); matrix.data().swap(newdata); + + update_trailing_pointers = true; } else { - // no need to realloc, simply copy the tail at its respective position and insert tmp - matrix.data().resize(start + nnz + tail_size); + if(m_matrix.isCompressed()) + { + // no need to realloc, simply copy the tail at its respective position and insert tmp + matrix.data().resize(start + nnz + tail_size); - internal::smart_memmove(&matrix.data().value(end), &matrix.data().value(end) + tail_size, &matrix.data().value(start + nnz)); - internal::smart_memmove(&matrix.data().index(end), &matrix.data().index(end) + tail_size, &matrix.data().index(start + nnz)); + internal::smart_memmove(&matrix.data().value(end), &matrix.data().value(end) + tail_size, &matrix.data().value(start + nnz)); + internal::smart_memmove(&matrix.data().index(end), &matrix.data().index(end) + tail_size, &matrix.data().index(start + nnz)); - internal::smart_copy(&tmp.data().value(0), &tmp.data().value(0) + nnz, &matrix.data().value(start)); - internal::smart_copy(&tmp.data().index(0), &tmp.data().index(0) + nnz, &matrix.data().index(start)); + update_trailing_pointers = true; + } + + internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, &matrix.data().value(start)); + internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, &matrix.data().index(start)); } - - // update innerNonZeros - if(!m_matrix.isCompressed()) - for(Index j=0; j(nnz - block_size); - for(Index k = m_outerStart + m_outerSize.value(); k<=matrix.outerSize(); ++k) + else { - matrix.outerIndexPtr()[k] += offset; + StorageIndex p = StorageIndex(start); + for(Index k=0; k(nnz - block_size); + for(Index k = m_outerStart + m_outerSize.value(); k<=matrix.outerSize(); ++k) + { + matrix.outerIndexPtr()[k] += offset; + } } return derived(); @@ -289,7 +312,7 @@ private: template BlockImpl(const SparseMatrixBase& xpr, Index i); template BlockImpl(const SparseMatrixBase& xpr); }; - + //---------- /** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this @@ -339,7 +362,9 @@ template class BlockImpl : public SparseMatrixBase >, internal::no_assignment_operator { - typedef Block BlockType; + typedef Block BlockType; + typedef SparseMatrixBase Base; + using Base::convert_index; public: enum { IsRowMajor = internal::traits::IsRowMajor }; EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index 0dbb94faf..c223e4f42 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -28,7 +28,7 @@ class SparseCompressedBase { public: typedef SparseMatrixBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseCompressedBase) + EIGEN_SPARSE_PUBLIC_INTERFACE(SparseCompressedBase) using Base::operator=; using Base::IsRowMajor; @@ -45,13 +45,14 @@ class SparseCompressedBase /** \returns the number of non zero coefficients */ inline Index nonZeros() const { - if(isCompressed()) + if(Derived::IsVectorAtCompileTime && outerIndexPtr()==0) + return derived().nonZeros(); + else if(isCompressed()) return outerIndexPtr()[derived().outerSize()]-outerIndexPtr()[0]; else if(derived().outerSize()==0) return 0; else return innerNonZeros().sum(); - } /** \returns a const pointer to the array of values. @@ -74,10 +75,12 @@ class SparseCompressedBase /** \returns a const pointer to the array of the starting positions of the inner vectors. * This function is aimed at interoperability with other libraries. + * \warning it returns the null pointer 0 for SparseVector * \sa valuePtr(), innerIndexPtr() */ inline const StorageIndex* outerIndexPtr() const { return derived().outerIndexPtr(); } /** \returns a non-const pointer to the array of the starting positions of the inner vectors. * This function is aimed at interoperability with other libraries. + * \warning it returns the null pointer 0 for SparseVector * \sa valuePtr(), innerIndexPtr() */ inline StorageIndex* outerIndexPtr() { return derived().outerIndexPtr(); } @@ -92,7 +95,12 @@ class SparseCompressedBase /** \returns whether \c *this is in compressed form. */ inline bool isCompressed() const { return innerNonZeroPtr()==0; } - + + protected: + /** Default constructor. Do nothing. */ + SparseCompressedBase() {} + private: + template explicit SparseCompressedBase(const SparseCompressedBase&); }; template @@ -100,12 +108,33 @@ class SparseCompressedBase::InnerIterator { public: InnerIterator(const SparseCompressedBase& mat, Index outer) - : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer), m_id(mat.outerIndexPtr()[outer]) + : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer) { - if(mat.isCompressed()) - m_end = mat.outerIndexPtr()[outer+1]; + if(Derived::IsVectorAtCompileTime && mat.outerIndexPtr()==0) + { + m_id = 0; + m_end = mat.nonZeros(); + } else - m_end = m_id + mat.innerNonZeroPtr()[outer]; + { + m_id = mat.outerIndexPtr()[outer]; + if(mat.isCompressed()) + m_end = mat.outerIndexPtr()[outer+1]; + else + m_end = m_id + mat.innerNonZeroPtr()[outer]; + } + } + + explicit InnerIterator(const SparseCompressedBase& mat) + : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(0), m_id(0), m_end(mat.nonZeros()) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + } + + explicit InnerIterator(const internal::CompressedStorage& data) + : m_values(&data.value(0)), m_indices(&data.index(0)), m_outer(0), m_id(0), m_end(data.size()) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); } inline InnerIterator& operator++() { m_id++; return *this; } @@ -114,16 +143,16 @@ class SparseCompressedBase::InnerIterator inline Scalar& valueRef() { return const_cast(m_values[m_id]); } inline StorageIndex index() const { return m_indices[m_id]; } - inline Index outer() const { return m_outer; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } + inline Index outer() const { return m_outer.value(); } + inline Index row() const { return IsRowMajor ? m_outer.value() : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer.value(); } inline operator bool() const { return (m_id < m_end); } protected: const Scalar* m_values; const StorageIndex* m_indices; - const Index m_outer; + const internal::variable_if_dynamic m_outer; Index m_id; Index m_end; private: @@ -138,12 +167,33 @@ class SparseCompressedBase::ReverseInnerIterator { public: ReverseInnerIterator(const SparseCompressedBase& mat, Index outer) - : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer), m_start(mat.outerIndexPtr()[outer]) + : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer) { - if(mat.isCompressed()) - m_id = mat.outerIndexPtr()[outer+1]; + if(Derived::IsVectorAtCompileTime && mat.outerIndexPtr()==0) + { + m_start = 0; + m_id = mat.nonZeros(); + } else - m_id = m_start + mat.innerNonZeroPtr()[outer]; + { + m_start.value() = mat.outerIndexPtr()[outer]; + if(mat.isCompressed()) + m_id = mat.outerIndexPtr()[outer+1]; + else + m_id = m_start.value() + mat.innerNonZeroPtr()[outer]; + } + } + + explicit ReverseInnerIterator(const SparseCompressedBase& mat) + : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(0), m_start(0), m_id(mat.nonZeros()) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + } + + explicit ReverseInnerIterator(const internal::CompressedStorage& data) + : m_values(&data.value(0)), m_indices(&data.index(0)), m_outer(0), m_start(0), m_id(data.size()) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); } inline ReverseInnerIterator& operator--() { --m_id; return *this; } @@ -152,18 +202,18 @@ class SparseCompressedBase::ReverseInnerIterator inline Scalar& valueRef() { return const_cast(m_values[m_id-1]); } inline StorageIndex index() const { return m_indices[m_id-1]; } - inline Index outer() const { return m_outer; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } + inline Index outer() const { return m_outer.value(); } + inline Index row() const { return IsRowMajor ? m_outer.value() : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer.value(); } - inline operator bool() const { return (m_id > m_start); } + inline operator bool() const { return (m_id > m_start.value()); } protected: const Scalar* m_values; const StorageIndex* m_indices; - const Index m_outer; + const internal::variable_if_dynamic m_outer; Index m_id; - const Index m_start; + const internal::variable_if_dynamic m_start; }; namespace internal { @@ -181,8 +231,14 @@ struct evaluator > Flags = Derived::Flags }; - evaluator() : m_matrix(0) {} - explicit evaluator(const Derived &mat) : m_matrix(&mat) {} + evaluator() : m_matrix(0) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + explicit evaluator(const Derived &mat) : m_matrix(&mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_matrix->nonZeros(); diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index 973b80095..d9420ac63 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -35,13 +35,13 @@ class CwiseBinaryOpImpl { public: typedef CwiseBinaryOp Derived; + typedef SparseMatrixBase Base; EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) CwiseBinaryOpImpl() { - typedef typename internal::traits::StorageKind LhsStorageKind; - typedef typename internal::traits::StorageKind RhsStorageKind; EIGEN_STATIC_ASSERT(( - (!internal::is_same::value) + (!internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value) || ((Lhs::Flags&RowMajorBit) == (Rhs::Flags&RowMajorBit))), THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH); } @@ -138,7 +138,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_lhsImpl.nonZerosEstimate() + m_rhsImpl.nonZerosEstimate(); @@ -219,7 +222,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return (std::min)(m_lhsImpl.nonZerosEstimate(), m_rhsImpl.nonZerosEstimate()); @@ -288,7 +294,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_rhsImpl.nonZerosEstimate(); @@ -358,7 +367,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_lhsImpl.nonZerosEstimate(); @@ -410,10 +422,10 @@ Derived& SparseMatrixBase::operator-=(const DiagonalBase& template template -EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE +EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type SparseMatrixBase::cwiseProduct(const MatrixBase &other) const { - return EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE(derived(), other.derived()); + return typename CwiseProductDenseReturnType::Type(derived(), other.derived()); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h index 469bac36e..fe4a97120 100644 --- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -29,7 +29,11 @@ struct unary_evaluator, IteratorBased> Flags = XprType::Flags }; - explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {} + explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_argImpl.nonZerosEstimate(); @@ -108,7 +112,11 @@ struct unary_evaluator, IteratorBased> Flags = XprType::Flags }; - explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {} + explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } protected: typedef typename evaluator::InnerIterator EvalIterator; diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 67b3c9c1b..87c946b9b 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -160,8 +160,8 @@ struct generic_product_impl template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - typedef typename nested_eval::type LhsNested; - typedef typename nested_eval::type RhsNested; + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; LhsNested lhsNested(lhs); RhsNested rhsNested(rhs); internal::sparse_time_dense_product(lhsNested, rhsNested, dst, alpha); @@ -182,8 +182,8 @@ struct generic_product_impl template static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - typedef typename nested_eval::type LhsNested; - typedef typename nested_eval::type RhsNested; + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; LhsNested lhsNested(lhs); RhsNested rhsNested(rhs); @@ -221,7 +221,7 @@ protected: public: enum { Flags = NeedToTranspose ? RowMajorBit : 0, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; class InnerIterator : public LhsIterator @@ -263,12 +263,16 @@ public: sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs) : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } // transpose case sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs) : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } protected: const LhsArg m_lhs; @@ -278,7 +282,7 @@ protected: // sparse * dense outer product template -struct product_evaluator, OuterProduct, SparseShape, DenseShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, OuterProduct, SparseShape, DenseShape> : sparse_dense_outer_product_evaluator { typedef sparse_dense_outer_product_evaluator Base; @@ -293,7 +297,7 @@ struct product_evaluator, OuterProduct, Sparse }; template -struct product_evaluator, OuterProduct, DenseShape, SparseShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, OuterProduct, DenseShape, SparseShape> : sparse_dense_outer_product_evaluator { typedef sparse_dense_outer_product_evaluator Base; diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h index 42e29cf70..e4af49e09 100644 --- a/Eigen/src/SparseCore/SparseDiagonalProduct.h +++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2014 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -35,22 +35,22 @@ template struct sparse_diagonal_product_evaluator; template -struct product_evaluator, ProductTag, DiagonalShape, SparseShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, ProductTag, DiagonalShape, SparseShape> : public sparse_diagonal_product_evaluator { typedef Product XprType; - enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags + enum { CoeffReadCost = HugeCost, Flags = Rhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags typedef sparse_diagonal_product_evaluator Base; explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {} }; template -struct product_evaluator, ProductTag, SparseShape, DiagonalShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, ProductTag, SparseShape, DiagonalShape> : public sparse_diagonal_product_evaluator, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> { typedef Product XprType; - enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags + enum { CoeffReadCost = HugeCost, Flags = Lhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags typedef sparse_diagonal_product_evaluator, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> Base; explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal().transpose()) {} diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h index 7c512d9fe..36c09ab0c 100644 --- a/Eigen/src/SparseCore/SparseMap.h +++ b/Eigen/src/SparseCore/SparseMap.h @@ -63,7 +63,7 @@ class SparseMapBase Index m_outerSize; Index m_innerSize; - Index m_nnz; + Array m_zero_nnz; IndexPointer m_outerIndex; IndexPointer m_innerIndices; ScalarPointer m_values; @@ -75,6 +75,7 @@ class SparseMapBase inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; } inline Index innerSize() const { return m_innerSize; } inline Index outerSize() const { return m_outerSize; } + inline Index nonZeros() const { return m_zero_nnz[1]; } bool isCompressed() const { return m_innerNonZeros==0; } @@ -107,12 +108,21 @@ class SparseMapBase inline SparseMapBase(Index rows, Index cols, Index nnz, IndexPointer outerIndexPtr, IndexPointer innerIndexPtr, ScalarPointer valuePtr, IndexPointer innerNonZerosPtr = 0) - : m_outerSize(IsRowMajor?rows:cols), m_innerSize(IsRowMajor?cols:rows), m_nnz(nnz), m_outerIndex(outerIndexPtr), + : m_outerSize(IsRowMajor?rows:cols), m_innerSize(IsRowMajor?cols:rows), m_zero_nnz(0,internal::convert_index(nnz)), m_outerIndex(outerIndexPtr), m_innerIndices(innerIndexPtr), m_values(valuePtr), m_innerNonZeros(innerNonZerosPtr) {} + // for vectors + inline SparseMapBase(Index size, Index nnz, IndexPointer innerIndexPtr, ScalarPointer valuePtr) + : m_outerSize(1), m_innerSize(size), m_zero_nnz(0,internal::convert_index(nnz)), m_outerIndex(m_zero_nnz.data()), + m_innerIndices(innerIndexPtr), m_values(valuePtr), m_innerNonZeros(0) + {} + /** Empty destructor */ inline ~SparseMapBase() {} + + protected: + inline SparseMapBase() {} }; template @@ -163,8 +173,16 @@ class SparseMapBase : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr) {} + // for vectors + inline SparseMapBase(Index size, Index nnz, StorageIndex* innerIndexPtr, Scalar* valuePtr) + : Base(size, nnz, innerIndexPtr, valuePtr) + {} + /** Empty destructor */ inline ~SparseMapBase() {} + + protected: + inline SparseMapBase() {} }; template @@ -173,7 +191,7 @@ class Map, Options, StrideType> { public: typedef SparseMapBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(Map) + EIGEN_SPARSE_PUBLIC_INTERFACE(Map) enum { IsRowMajor = Base::IsRowMajor }; public: @@ -193,7 +211,7 @@ class Map, Options, StrideType { public: typedef SparseMapBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(Map) + EIGEN_SPARSE_PUBLIC_INTERFACE(Map) enum { IsRowMajor = Base::IsRowMajor }; public: diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index f18829866..91bada40f 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -92,11 +92,12 @@ template class SparseMatrix : public SparseCompressedBase > { - public: typedef SparseCompressedBase Base; + using Base::convert_index; + public: using Base::isCompressed; using Base::nonZeros; - _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix) + EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix) using Base::operator+=; using Base::operator-=; @@ -436,7 +437,13 @@ class SparseMatrix template void setFromTriplets(const InputIterators& begin, const InputIterators& end); - void sumupDuplicates(); + template + void setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func); + + void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } + + template + void collapseDuplicates(DupFunctor dup_func = DupFunctor()); //--- @@ -508,7 +515,6 @@ class SparseMatrix void prune(const KeepFunc& keep = KeepFunc()) { // TODO optimize the uncompressed mode to avoid moving and allocating the data twice - // TODO also implement a unit test makeCompressed(); StorageIndex k = 0; @@ -532,7 +538,7 @@ class SparseMatrix } /** Resizes the matrix to a \a rows x \a cols matrix leaving old values untouched. - * \sa resizeNonZeros(Index), reserve(), setZero() + * \sa reserve(), setZero() */ void conservativeResize(Index rows, Index cols) { @@ -600,7 +606,7 @@ class SparseMatrix * This function does not free the currently allocated memory. To release as much as memory as possible, * call \code mat.data().squeeze(); \endcode after resizing it. * - * \sa resizeNonZeros(Index), reserve(), setZero() + * \sa reserve(), setZero() */ void resize(Index rows, Index cols) { @@ -627,7 +633,6 @@ class SparseMatrix * Resize the nonzero vector to \a size */ void resizeNonZeros(Index size) { - // TODO remove this function m_data.resize(size); } @@ -665,8 +670,15 @@ class SparseMatrix YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) check_template_parameters(); const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator::Flags & RowMajorBit); - if (needToTranspose) *this = other.derived(); - else internal::call_assignment_no_alias(*this, other.derived()); + if (needToTranspose) + *this = other.derived(); + else + { + #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + #endif + internal::call_assignment_no_alias(*this, other.derived()); + } } /** Constructs a sparse matrix from the sparse selfadjoint view \a other */ @@ -717,7 +729,8 @@ class SparseMatrix m_data.swap(other.m_data); } - /** Sets *this to the identity matrix */ + /** Sets *this to the identity matrix. + * This function also turns the matrix into compressed mode, and drop any reserved memory. */ inline void setIdentity() { eigen_assert(rows() == cols() && "ONLY FOR SQUARED MATRICES"); @@ -725,6 +738,8 @@ class SparseMatrix Eigen::Map(&this->m_data.index(0), rows()).setLinSpaced(0, StorageIndex(rows()-1)); Eigen::Map(&this->m_data.value(0), rows()).setOnes(); Eigen::Map(this->m_outerIndex, rows()+1).setLinSpaced(0, StorageIndex(rows())); + std::free(m_innerNonZeros); + m_innerNonZeros = 0; } inline SparseMatrix& operator=(const SparseMatrix& other) { @@ -883,10 +898,9 @@ private: namespace internal { -template -void set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, int Options = 0) +template +void set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, DupFunctor dup_func) { - EIGEN_UNUSED_VARIABLE(Options); enum { IsRowMajor = SparseMatrixType::IsRowMajor }; typedef typename SparseMatrixType::Scalar Scalar; typedef typename SparseMatrixType::StorageIndex StorageIndex; @@ -909,7 +923,7 @@ void set_from_triplets(const InputIterator& begin, const InputIterator& end, Spa trMat.insertBackUncompressed(it->row(),it->col()) = it->value(); // pass 3: - trMat.sumupDuplicates(); + trMat.collapseDuplicates(dup_func); } // pass 4: transposed copy -> implicit sorting @@ -960,12 +974,29 @@ template template void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) { - internal::set_from_triplets(begin, end, *this); + internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); +} + +/** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied: + * \code + * value = dup_func(OldValue, NewValue) + * \endcode + * Here is a C++11 example keeping the latest entry only: + * \code + * mat.setFromTriplets(triplets.begin(), triplets.end(), [] (const Scalar&,const Scalar &b) { return b; }); + * \endcode + */ +template +template +void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func) +{ + internal::set_from_triplets, DupFunctor>(begin, end, *this, dup_func); } /** \internal */ template -void SparseMatrix::sumupDuplicates() +template +void SparseMatrix::collapseDuplicates(DupFunctor dup_func) { eigen_assert(!isCompressed()); // TODO, in practice we should be able to use m_innerNonZeros for that task @@ -983,7 +1014,7 @@ void SparseMatrix::sumupDuplicates() if(wi(i)>=start) { // we already meet this entry => accumulate it - m_data.value(wi(i)) += m_data.value(k); + m_data.value(wi(i)) = dup_func(m_data.value(wi(i)), m_data.value(k)); } else { @@ -1017,6 +1048,9 @@ EIGEN_DONT_INLINE SparseMatrix& SparseMatrix::Flags & RowMajorBit); if (needToTranspose) { + #ifdef EIGEN_SPARSE_TRANSPOSED_COPY_PLUGIN + EIGEN_SPARSE_TRANSPOSED_COPY_PLUGIN + #endif // two passes algorithm: // 1 - compute the number of coeffs per dest inner vector // 2 - do the actual copy/eval @@ -1101,6 +1135,14 @@ typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Op for(Index j=1; j<=m_outerSize; ++j) m_outerIndex[j] = end; } + else + { + // turn the matrix into non-compressed mode + m_innerNonZeros = static_cast(std::malloc(m_outerSize * sizeof(StorageIndex))); + if(!m_innerNonZeros) internal::throw_std_bad_alloc(); + for(Index j=0; j::Scalar& SparseMatrix<_Scalar,_Op { // make sure the matrix is compatible to random un-compressed insertion: m_data.resize(m_data.allocatedSize()); - this->reserveInnerVectors(Array::Constant(2*m_outerSize, convert_index(m_outerSize))); + this->reserveInnerVectors(Array::Constant(m_outerSize, 2)); } return insertUncompressed(row,col); diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 4e720904e..648ae1f8a 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -23,7 +23,14 @@ namespace Eigen { * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEMATRIXBASE_PLUGIN. */ -template class SparseMatrixBase : public EigenBase +template class SparseMatrixBase +#ifndef EIGEN_PARSED_BY_DOXYGEN + : public internal::special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real, + EigenBase > +#else + : public EigenBase +#endif // not EIGEN_PARSED_BY_DOXYGEN { public: @@ -42,7 +49,7 @@ template class SparseMatrixBase : public EigenBase >::type PacketReturnType; typedef SparseMatrixBase StorageBaseType; - typedef EigenBase Base; + typedef Matrix IndexVector; typedef Matrix ScalarVector; @@ -134,6 +141,10 @@ template class SparseMatrixBase : public EigenBase inline Derived& derived() { return *static_cast(this); } inline Derived& const_cast_derived() const { return *static_cast(const_cast(this)); } + + typedef internal::special_scalar_op_base > Base; + using Base::operator*; + using Base::operator/; #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::SparseMatrixBase @@ -251,20 +262,18 @@ template class SparseMatrixBase : public EigenBase Derived& operator*=(const Scalar& other); Derived& operator/=(const Scalar& other); - #define EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE \ - CwiseBinaryOp< \ - internal::scalar_product_op< \ - typename internal::scalar_product_traits< \ - typename internal::traits::Scalar, \ - typename internal::traits::Scalar \ - >::ReturnType \ - >, \ - const Derived, \ - const OtherDerived \ - > + template struct CwiseProductDenseReturnType { + typedef CwiseBinaryOp::Scalar, + typename internal::traits::Scalar + >::ReturnType>, + const Derived, + const OtherDerived + > Type; + }; template - EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE + EIGEN_STRONG_INLINE const typename CwiseProductDenseReturnType::Type cwiseProduct(const MatrixBase &other) const; // sparse * diagonal @@ -281,7 +290,7 @@ template class SparseMatrixBase : public EigenBase // sparse * sparse template - const Product + const Product operator*(const SparseMatrixBase &other) const; // sparse * dense diff --git a/Eigen/src/SparseCore/SparsePermutation.h b/Eigen/src/SparseCore/SparsePermutation.h index d63607b6c..ef38357ae 100644 --- a/Eigen/src/SparseCore/SparsePermutation.h +++ b/Eigen/src/SparseCore/SparsePermutation.h @@ -16,15 +16,17 @@ namespace Eigen { namespace internal { -template -struct permutation_matrix_product +template +struct permutation_matrix_product { - typedef typename remove_all::type MatrixTypeNestedCleaned; - typedef typename MatrixTypeNestedCleaned::Scalar Scalar; - typedef typename MatrixTypeNestedCleaned::StorageIndex StorageIndex; + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + typedef typename MatrixTypeCleaned::Scalar Scalar; + typedef typename MatrixTypeCleaned::StorageIndex StorageIndex; enum { - SrcStorageOrder = MatrixTypeNestedCleaned::Flags&RowMajorBit ? RowMajor : ColMajor, + SrcStorageOrder = MatrixTypeCleaned::Flags&RowMajorBit ? RowMajor : ColMajor, MoveOuter = SrcStorageOrder==RowMajor ? Side==OnTheLeft : Side==OnTheRight }; @@ -33,8 +35,9 @@ struct permutation_matrix_product SparseMatrix >::type ReturnType; template - static inline void run(Dest& dst, const PermutationType& perm, const MatrixType& mat) + static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) { + MatrixType mat(xpr); if(MoveOuter) { SparseMatrix tmp(mat.rows(), mat.cols()); @@ -50,7 +53,7 @@ struct permutation_matrix_product Index jp = perm.indices().coeff(j); Index jsrc = ((Side==OnTheRight) ^ Transposed) ? jp : j; Index jdst = ((Side==OnTheLeft) ^ Transposed) ? jp : j; - for(typename MatrixTypeNestedCleaned::InnerIterator it(mat,jsrc); it; ++it) + for(typename MatrixTypeCleaned::InnerIterator it(mat,jsrc); it; ++it) tmp.insertByOuterInner(jdst,it.index()) = it.value(); } dst = tmp; @@ -67,11 +70,11 @@ struct permutation_matrix_product perm_cpy = perm.transpose(); for(Index j=0; j struct product_promote_storage_type >::PlainObject template -struct product_evaluator, ProductTag, PermutationShape, SparseShape, typename traits::Scalar, typename traits::Scalar> - : public evaluator::ReturnType> +struct product_evaluator, ProductTag, PermutationShape, SparseShape> + : public evaluator::ReturnType> { typedef Product XprType; - typedef typename permutation_matrix_product::ReturnType PlainObject; + typedef typename permutation_matrix_product::ReturnType PlainObject; typedef evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + explicit product_evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast(this)) Base(m_result); generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); } - -protected: + +protected: PlainObject m_result; }; template -struct product_evaluator, ProductTag, SparseShape, PermutationShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, ProductTag, SparseShape, PermutationShape > : public evaluator::ReturnType> { typedef Product XprType; typedef typename permutation_matrix_product::ReturnType PlainObject; typedef evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + explicit product_evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast(this)) Base(m_result); generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); } - -protected: + +protected: PlainObject m_result; }; @@ -132,35 +143,34 @@ protected: /** \returns the matrix with the permutation applied to the columns */ template -inline const Product +inline const Product operator*(const SparseMatrixBase& matrix, const PermutationBase& perm) -{ return Product(matrix.derived(), perm.derived()); } +{ return Product(matrix.derived(), perm.derived()); } /** \returns the matrix with the permutation applied to the rows */ template -inline const Product +inline const Product operator*( const PermutationBase& perm, const SparseMatrixBase& matrix) -{ return Product(perm.derived(), matrix.derived()); } +{ return Product(perm.derived(), matrix.derived()); } -// TODO, the following specializations should not be needed as Transpose should be a PermutationBase. /** \returns the matrix with the inverse permutation applied to the columns. */ -template -inline const Product > > -operator*(const SparseMatrixBase& matrix, const Transpose >& tperm) +template +inline const Product, AliasFreeProduct> +operator*(const SparseMatrixBase& matrix, const InverseImpl& tperm) { - return Product > >(matrix.derived(), tperm); + return Product, AliasFreeProduct>(matrix.derived(), tperm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ -template -inline const Product >, SparseDerived> -operator*(const Transpose >& tperm, const SparseMatrixBase& matrix) +template +inline const Product, SparseDerived, AliasFreeProduct> +operator*(const InverseImpl& tperm, const SparseMatrixBase& matrix) { - return Product >, SparseDerived>(tperm, matrix.derived()); + return Product, SparseDerived, AliasFreeProduct>(tperm.derived(), matrix.derived()); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index da8919ecc..cbd0db71b 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -25,10 +25,10 @@ namespace Eigen { * */ template template -inline const Product +inline const Product SparseMatrixBase::operator*(const SparseMatrixBase &other) const { - return Product(derived(), other.derived()); + return Product(derived(), other.derived()); } namespace internal { @@ -39,6 +39,34 @@ struct generic_product_impl { template static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + evalTo(dst, lhs, rhs, typename evaluator_traits::Shape()); + } + + // dense += sparse * sparse + template + static void addTo(Dest& dst, const ActualLhs& lhs, const Rhs& rhs, int* = typename enable_if::Shape,DenseShape>::value,int*>::type(0) ) + { + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; + LhsNested lhsNested(lhs); + RhsNested rhsNested(rhs); + internal::sparse_sparse_to_dense_product_selector::type, + typename remove_all::type, Dest>::run(lhsNested,rhsNested,dst); + } + + // dense -= sparse * sparse + template + static void subTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, int* = typename enable_if::Shape,DenseShape>::value,int*>::type(0) ) + { + addTo(dst, -lhs, rhs); + } + +protected: + + // sparse = sparse * sparse + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, SparseShape) { typedef typename nested_eval::type LhsNested; typedef typename nested_eval::type RhsNested; @@ -47,6 +75,14 @@ struct generic_product_impl internal::conservative_sparse_sparse_product_selector::type, typename remove_all::type, Dest>::run(lhsNested,rhsNested,dst); } + + // dense = sparse * sparse + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, DenseShape) + { + dst.setZero(); + addTo(dst, lhs, rhs); + } }; // sparse * sparse-triangular @@ -61,6 +97,39 @@ struct generic_product_impl {}; +// dense = sparse-product (can be sparse*sparse, sparse*perm, etc.) +template< typename DstXprType, typename Lhs, typename Rhs> +struct Assignment, internal::assign_op, Sparse2Dense> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + generic_product_impl::evalTo(dst,src.lhs(),src.rhs()); + } +}; + +// dense += sparse-product (can be sparse*sparse, sparse*perm, etc.) +template< typename DstXprType, typename Lhs, typename Rhs> +struct Assignment, internal::add_assign_op, Sparse2Dense> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + { + generic_product_impl::addTo(dst,src.lhs(),src.rhs()); + } +}; + +// dense -= sparse-product (can be sparse*sparse, sparse*perm, etc.) +template< typename DstXprType, typename Lhs, typename Rhs> +struct Assignment, internal::sub_assign_op, Sparse2Dense> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + { + generic_product_impl::subTo(dst,src.lhs(),src.rhs()); + } +}; + template struct evaluator > > : public evaluator::PlainObject> diff --git a/Eigen/src/SparseCore/SparseRef.h b/Eigen/src/SparseCore/SparseRef.h index 8df62a119..19e06fc80 100644 --- a/Eigen/src/SparseCore/SparseRef.h +++ b/Eigen/src/SparseCore/SparseRef.h @@ -19,7 +19,7 @@ enum { namespace internal { template class SparseRefBase; - + template struct traits, _Options, _StrideType> > : public traits > @@ -27,7 +27,7 @@ struct traits, _Options, _Stride typedef SparseMatrix PlainObjectType; enum { Options = _Options, - Flags = traits >::Flags | CompressedAccessBit | NestByRefBit + Flags = traits::Flags | CompressedAccessBit | NestByRefBit }; template struct match { @@ -48,7 +48,35 @@ struct traits, _Options, _ Flags = (traits >::Flags | CompressedAccessBit | NestByRefBit) & ~LvalueBit }; }; - + +template +struct traits, _Options, _StrideType> > + : public traits > +{ + typedef SparseVector PlainObjectType; + enum { + Options = _Options, + Flags = traits::Flags | CompressedAccessBit | NestByRefBit + }; + + template struct match { + enum { + MatchAtCompileTime = (Derived::Flags&CompressedAccessBit) && Derived::IsVectorAtCompileTime + }; + typedef typename internal::conditional::type type; + }; + +}; + +template +struct traits, _Options, _StrideType> > + : public traits, _Options, _StrideType> > +{ + enum { + Flags = (traits >::Flags | CompressedAccessBit | NestByRefBit) & ~LvalueBit + }; +}; + template struct traits > : public traits {}; @@ -58,7 +86,7 @@ template class SparseRefBase public: typedef SparseMapBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseRefBase) + EIGEN_SPARSE_PUBLIC_INTERFACE(SparseRefBase) SparseRefBase() : Base(RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime, 0, 0, 0, 0, 0) @@ -66,11 +94,13 @@ public: protected: - template void construct(Expression& expr) { - ::new (static_cast(this)) Base(expr.rows(), expr.cols(), expr.nonZeros(), expr.outerIndexPtr(), expr.innerIndexPtr(), expr.valuePtr(), expr.innerNonZeroPtr()); + if(expr.outerIndexPtr()==0) + ::new (static_cast(this)) Base(expr.size(), expr.nonZeros(), expr.innerIndexPtr(), expr.valuePtr()); + else + ::new (static_cast(this)) Base(expr.rows(), expr.cols(), expr.nonZeros(), expr.outerIndexPtr(), expr.innerIndexPtr(), expr.valuePtr(), expr.innerNonZeroPtr()); } }; @@ -102,7 +132,7 @@ class Ref, Options, StrideType > public: typedef internal::SparseRefBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) + EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -146,7 +176,7 @@ class Ref, Options, StrideType public: typedef internal::SparseRefBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) + EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) template inline Ref(const SparseMatrixBase& expr) @@ -170,8 +200,9 @@ class Ref, Options, StrideType { if((Options & int(StandardCompressedFormat)) && (!expr.isCompressed())) { - m_object = expr; - Base::construct(m_object); + TPlainObjectType* obj = reinterpret_cast(m_object_bytes); + ::new (obj) TPlainObjectType(expr); + Base::construct(*obj); } else { @@ -182,17 +213,113 @@ class Ref, Options, StrideType template void construct(const Expression& expr, internal::false_type) { - m_object = expr; - Base::construct(m_object); + TPlainObjectType* obj = reinterpret_cast(m_object_bytes); + ::new (obj) TPlainObjectType(expr); + Base::construct(*obj); } protected: - TPlainObjectType m_object; + char m_object_bytes[sizeof(TPlainObjectType)]; }; + +/** + * \ingroup Sparse_Module + * + * \brief A sparse vector expression referencing an existing sparse vector expression + * + * \tparam PlainObjectType the equivalent sparse matrix type of the referenced data + * \tparam Options Not used for SparseVector. + * \tparam StrideType Only used for dense Ref + * + * \sa class Ref + */ +template +class Ref, Options, StrideType > + : public internal::SparseRefBase, Options, StrideType > > +{ + typedef SparseVector PlainObjectType; + typedef internal::traits Traits; + template + inline Ref(const SparseVector& expr); + public: + + typedef internal::SparseRefBase Base; + EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + inline Ref(SparseVector& expr) + { + EIGEN_STATIC_ASSERT(bool(Traits::template match >::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.derived()); + } + + template + inline Ref(const SparseCompressedBase& expr) + #else + template + inline Ref(SparseCompressedBase& expr) + #endif + { + EIGEN_STATIC_ASSERT(bool(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.const_cast_derived()); + } +}; + +// this is the const ref version +template +class Ref, Options, StrideType> + : public internal::SparseRefBase, Options, StrideType> > +{ + typedef SparseVector TPlainObjectType; + typedef internal::traits Traits; + public: + + typedef internal::SparseRefBase Base; + EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) + + template + inline Ref(const SparseMatrixBase& expr) + { + construct(expr.derived(), typename Traits::template match::type()); + } + + inline Ref(const Ref& other) : Base(other) { + // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy + } + + template + inline Ref(const RefBase& other) { + construct(other.derived(), typename Traits::template match::type()); + } + + protected: + + template + void construct(const Expression& expr,internal::true_type) + { + Base::construct(expr); + } + + template + void construct(const Expression& expr, internal::false_type) + { + TPlainObjectType* obj = reinterpret_cast(m_object_bytes); + ::new (obj) TPlainObjectType(expr); + Base::construct(*obj); + } + + protected: + char m_object_bytes[sizeof(TPlainObjectType)]; +}; + namespace internal { +// FIXME shall we introduce a general evaluatior_ref that we can specialize for any sparse object once, and thus remove this copy-pasta thing... + template struct evaluator, Options, StrideType> > : evaluator, Options, StrideType> > > @@ -213,6 +340,26 @@ struct evaluator, Options, explicit evaluator(const XprType &mat) : Base(mat) {} }; +template +struct evaluator, Options, StrideType> > + : evaluator, Options, StrideType> > > +{ + typedef evaluator, Options, StrideType> > > Base; + typedef Ref, Options, StrideType> XprType; + evaluator() : Base() {} + explicit evaluator(const XprType &mat) : Base(mat) {} +}; + +template +struct evaluator, Options, StrideType> > + : evaluator, Options, StrideType> > > +{ + typedef evaluator, Options, StrideType> > > Base; + typedef Ref, Options, StrideType> XprType; + evaluator() : Base() {} + explicit evaluator(const XprType &mat) : Base(mat) {} +}; + } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index b0c2e472e..46c6ce1d3 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -137,14 +137,14 @@ template class SparseSelfAdjointView SparseSelfAdjointView& operator=(const SparseSelfAdjointView& src) { - PermutationMatrix pnull; + PermutationMatrix pnull; return *this = src.twistedBy(pnull); } template SparseSelfAdjointView& operator=(const SparseSelfAdjointView& src) { - PermutationMatrix pnull; + PermutationMatrix pnull; return *this = src.twistedBy(pnull); } @@ -336,7 +336,7 @@ struct generic_product_impl -struct product_evaluator, ProductTag, SparseSelfAdjointShape, SparseShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, ProductTag, SparseSelfAdjointShape, SparseShape> : public evaluator::PlainObject> { typedef Product XprType; @@ -356,7 +356,7 @@ protected: }; template -struct product_evaluator, ProductTag, SparseShape, SparseSelfAdjointShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, ProductTag, SparseShape, SparseSelfAdjointShape> : public evaluator::PlainObject> { typedef Product XprType; diff --git a/Eigen/src/SparseCore/SparseTranspose.h b/Eigen/src/SparseCore/SparseTranspose.h index c2d4ac549..b6f180a41 100644 --- a/Eigen/src/SparseCore/SparseTranspose.h +++ b/Eigen/src/SparseCore/SparseTranspose.h @@ -27,12 +27,14 @@ namespace internal { using Base::derived; typedef typename Base::Scalar Scalar; typedef typename Base::StorageIndex StorageIndex; + + inline Index nonZeros() const { return derived().nestedExpression().nonZeros(); } inline const Scalar* valuePtr() const { return derived().nestedExpression().valuePtr(); } inline const StorageIndex* innerIndexPtr() const { return derived().nestedExpression().innerIndexPtr(); } inline const StorageIndex* outerIndexPtr() const { return derived().nestedExpression().outerIndexPtr(); } inline const StorageIndex* innerNonZeroPtr() const { return derived().nestedExpression().innerNonZeroPtr(); } - + inline Scalar* valuePtr() { return derived().nestedExpression().valuePtr(); } inline StorageIndex* innerIndexPtr() { return derived().nestedExpression().innerIndexPtr(); } inline StorageIndex* outerIndexPtr() { return derived().nestedExpression().outerIndexPtr(); } diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h index 3d9946149..7c718e4e1 100644 --- a/Eigen/src/SparseCore/SparseTriangularView.h +++ b/Eigen/src/SparseCore/SparseTriangularView.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2014 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // Copyright (C) 2012 Désiré Nuentsa-Wakam // // This Source Code Form is subject to the terms of the Mozilla @@ -34,10 +34,11 @@ template class TriangularViewImpl TriangularViewType; -protected: + protected: // dummy solve function to make TriangularView happy. void solve() const; + typedef SparseMatrixBase Base; public: EIGEN_SPARSE_PUBLIC_INTERFACE(TriangularViewType) diff --git a/Eigen/src/SparseCore/SparseUtil.h b/Eigen/src/SparseCore/SparseUtil.h index d53a9cb17..74df0d496 100644 --- a/Eigen/src/SparseCore/SparseUtil.h +++ b/Eigen/src/SparseCore/SparseUtil.h @@ -39,32 +39,16 @@ EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \ #define EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATORS(Derived) \ EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =) -// TODO this is mostly the same as EIGEN_GENERIC_PUBLIC_INTERFACE -#define _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \ - typedef typename Eigen::internal::traits::Scalar Scalar; \ - typedef typename Eigen::NumTraits::Real RealScalar; \ - typedef typename Eigen::internal::ref_selector::type Nested; \ - typedef typename Eigen::internal::traits::StorageKind StorageKind; \ - typedef typename Eigen::internal::traits::StorageIndex StorageIndex; \ - enum { RowsAtCompileTime = Eigen::internal::traits::RowsAtCompileTime, \ - ColsAtCompileTime = Eigen::internal::traits::ColsAtCompileTime, \ - Flags = Eigen::internal::traits::Flags, \ - SizeAtCompileTime = Base::SizeAtCompileTime, \ - IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \ - using Base::derived; \ - using Base::const_cast_derived; \ - using Base::convert_index; - -#define EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \ - typedef Eigen::SparseMatrixBase Base; \ - _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) +#define EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \ + EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) + + const int CoherentAccessPattern = 0x1; const int InnerRandomAccessPattern = 0x2 | CoherentAccessPattern; const int OuterRandomAccessPattern = 0x4 | CoherentAccessPattern; const int RandomAccessPattern = 0x8 | OuterRandomAccessPattern | InnerRandomAccessPattern; -template class SparseMatrixBase; template class SparseMatrix; template class DynamicSparseMatrix; template class SparseVector; @@ -89,20 +73,20 @@ template class SparseSymmetricPermutationProduct; namespace internal { -template struct sparse_eval; +template struct sparse_eval; template struct eval - : public sparse_eval::RowsAtCompileTime,traits::ColsAtCompileTime> + : sparse_eval::RowsAtCompileTime,traits::ColsAtCompileTime,traits::Flags> {}; -template struct sparse_eval { +template struct sparse_eval { typedef typename traits::Scalar _Scalar; typedef typename traits::StorageIndex _StorageIndex; public: typedef SparseVector<_Scalar, RowMajor, _StorageIndex> type; }; -template struct sparse_eval { +template struct sparse_eval { typedef typename traits::Scalar _Scalar; typedef typename traits::StorageIndex _StorageIndex; public: @@ -110,15 +94,15 @@ template struct sparse_eval { }; // TODO this seems almost identical to plain_matrix_type -template struct sparse_eval { +template struct sparse_eval { typedef typename traits::Scalar _Scalar; typedef typename traits::StorageIndex _StorageIndex; - enum { _Options = ((traits::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor }; + enum { _Options = ((Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor }; public: typedef SparseMatrix<_Scalar, _Options, _StorageIndex> type; }; -template struct sparse_eval { +template struct sparse_eval { typedef typename traits::Scalar _Scalar; public: typedef Matrix<_Scalar, 1, 1> type; @@ -133,10 +117,15 @@ template struct plain_matrix_type typedef SparseMatrix<_Scalar, _Options, _StorageIndex> type; }; +template +struct plain_object_eval + : sparse_eval::RowsAtCompileTime,traits::ColsAtCompileTime, evaluator::Flags> +{}; + template struct solve_traits { - typedef typename sparse_eval::type PlainObject; + typedef typename sparse_eval::Flags>::type PlainObject; }; template diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index ccf9364f2..7ec73a365 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -40,8 +40,7 @@ struct traits > ColsAtCompileTime = IsColVector ? 1 : Dynamic, MaxRowsAtCompileTime = RowsAtCompileTime, MaxColsAtCompileTime = ColsAtCompileTime, - Flags = _Options | NestByRefBit | LvalueBit | (IsColVector ? 0 : RowMajorBit), - CoeffReadCost = NumTraits::ReadCost, + Flags = _Options | NestByRefBit | LvalueBit | (IsColVector ? 0 : RowMajorBit) | CompressedAccessBit, SupportedAccessPatterns = InnerRandomAccessPattern }; }; @@ -63,10 +62,10 @@ struct sparse_vector_assign_selector; template class SparseVector - : public SparseMatrixBase > + : public SparseCompressedBase > { - typedef SparseMatrixBase SparseBase; - + typedef SparseCompressedBase Base; + using Base::convert_index; public: EIGEN_SPARSE_PUBLIC_INTERFACE(SparseVector) EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseVector, +=) @@ -89,6 +88,11 @@ class SparseVector EIGEN_STRONG_INLINE const StorageIndex* innerIndexPtr() const { return &m_data.index(0); } EIGEN_STRONG_INLINE StorageIndex* innerIndexPtr() { return &m_data.index(0); } + + inline const StorageIndex* outerIndexPtr() const { return 0; } + inline StorageIndex* outerIndexPtr() { return 0; } + inline const StorageIndex* innerNonZeroPtr() const { return 0; } + inline StorageIndex* innerNonZeroPtr() { return 0; } /** \internal */ inline Storage& data() { return m_data; } @@ -126,8 +130,8 @@ class SparseVector public: - class InnerIterator; - class ReverseInnerIterator; + typedef typename Base::InnerIterator InnerIterator; + typedef typename Base::ReverseInnerIterator ReverseInnerIterator; inline void setZero() { m_data.clear(); } @@ -230,12 +234,15 @@ class SparseVector inline SparseVector(const SparseMatrixBase& other) : m_size(0) { + #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + #endif check_template_parameters(); *this = other.derived(); } inline SparseVector(const SparseVector& other) - : SparseBase(other), m_size(0) + : Base(other), m_size(0) { check_template_parameters(); *this = other.derived(); @@ -357,75 +364,6 @@ protected: Index m_size; }; -template -class SparseVector::InnerIterator -{ - public: - explicit InnerIterator(const SparseVector& vec, Index outer=0) - : m_data(vec.m_data), m_id(0), m_end(m_data.size()) - { - EIGEN_UNUSED_VARIABLE(outer); - eigen_assert(outer==0); - } - - explicit InnerIterator(const internal::CompressedStorage& data) - : m_data(data), m_id(0), m_end(m_data.size()) - {} - - inline InnerIterator& operator++() { m_id++; return *this; } - - inline Scalar value() const { return m_data.value(m_id); } - inline Scalar& valueRef() { return const_cast(m_data.value(m_id)); } - - inline StorageIndex index() const { return m_data.index(m_id); } - inline Index row() const { return IsColVector ? index() : 0; } - inline Index col() const { return IsColVector ? 0 : index(); } - - inline operator bool() const { return (m_id < m_end); } - - protected: - const internal::CompressedStorage& m_data; - Index m_id; - const Index m_end; - private: - // If you get here, then you're not using the right InnerIterator type, e.g.: - // SparseMatrix A; - // SparseMatrix::InnerIterator it(A,0); - template InnerIterator(const SparseMatrixBase&,Index outer=0); -}; - -template -class SparseVector::ReverseInnerIterator -{ - public: - explicit ReverseInnerIterator(const SparseVector& vec, Index outer=0) - : m_data(vec.m_data), m_id(m_data.size()), m_start(0) - { - EIGEN_UNUSED_VARIABLE(outer); - eigen_assert(outer==0); - } - - explicit ReverseInnerIterator(const internal::CompressedStorage& data) - : m_data(data), m_id(m_data.size()), m_start(0) - {} - - inline ReverseInnerIterator& operator--() { m_id--; return *this; } - - inline Scalar value() const { return m_data.value(m_id-1); } - inline Scalar& valueRef() { return const_cast(m_data.value(m_id-1)); } - - inline StorageIndex index() const { return m_data.index(m_id-1); } - inline Index row() const { return IsColVector ? index() : 0; } - inline Index col() const { return IsColVector ? 0 : index(); } - - inline operator bool() const { return (m_id > m_start); } - - protected: - const internal::CompressedStorage& m_data; - Index m_id; - const Index m_start; -}; - namespace internal { template @@ -441,7 +379,10 @@ struct evaluator > Flags = SparseVectorType::Flags }; - explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) {} + explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_matrix.nonZeros(); diff --git a/Eigen/src/SparseCore/SparseView.h b/Eigen/src/SparseCore/SparseView.h index 761e72038..c945c4dab 100644 --- a/Eigen/src/SparseCore/SparseView.h +++ b/Eigen/src/SparseCore/SparseView.h @@ -32,6 +32,7 @@ class SparseView : public SparseMatrixBase > { typedef typename MatrixType::Nested MatrixTypeNested; typedef typename internal::remove_all::type _MatrixTypeNested; + typedef SparseMatrixBase Base; public: EIGEN_SPARSE_PUBLIC_INTERFACE(SparseView) typedef typename internal::remove_all::type NestedExpression; diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h old mode 100644 new mode 100755 index 8cdd29c7b..d33d27f46 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -64,7 +64,8 @@ template struct SparseLUMatrixURetu * * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD - * + * + * \implsparsesolverconcept * * \sa \ref TutorialSparseDirectSolvers * \sa \ref OrderingMethods_Module @@ -89,13 +90,19 @@ class SparseLU : public SparseSolverBase >, typedef Matrix IndexVector; typedef PermutationMatrix PermutationType; typedef internal::SparseLUImpl Base; + + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) { initperfvalues(); } - explicit SparseLU(const MatrixType& matrix):m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) + explicit SparseLU(const MatrixType& matrix) + : m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) { initperfvalues(); compute(matrix); @@ -713,7 +720,7 @@ template struct SparseLUMatrixUReturnType : internal::no_assignment_operator { typedef typename MatrixLType::Scalar Scalar; - explicit SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) + SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) : m_mapL(mapL),m_mapU(mapU) { } Index rows() { return m_mapL.rows(); } diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index 548b3f9b0..4f26c19ca 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -62,6 +62,8 @@ namespace internal { * \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module * OrderingMethods \endlink module for the list of built-in and external ordering methods. * + * \implsparsesolverconcept + * * \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()). * */ @@ -82,6 +84,12 @@ class SparseQR : public SparseSolverBase > typedef Matrix IndexVector; typedef Matrix ScalarVector; typedef PermutationMatrix PermutationType; + + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + public: SparseQR () : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) { } diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index d067d8fdf..fd2b26581 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -14,12 +14,11 @@ namespace Eigen { #define DECL_GSSVX(PREFIX,FLOATTYPE,KEYTYPE) \ extern "C" { \ - typedef struct { FLOATTYPE for_lu; FLOATTYPE total_needed; int expansions; } PREFIX##mem_usage_t; \ extern void PREFIX##gssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, \ char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *, \ void *, int, SuperMatrix *, SuperMatrix *, \ FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, \ - PREFIX##mem_usage_t *, SuperLUStat_t *, int *); \ + mem_usage_t *, SuperLUStat_t *, int *); \ } \ inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A, \ int *perm_c, int *perm_r, int *etree, char *equed, \ @@ -29,7 +28,7 @@ namespace Eigen { FLOATTYPE *recip_pivot_growth, \ FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE *berr, \ SuperLUStat_t *stats, int *info, KEYTYPE) { \ - PREFIX##mem_usage_t mem_usage; \ + mem_usage_t mem_usage; \ PREFIX##gssvx(options, A, perm_c, perm_r, etree, equed, R, C, L, \ U, work, lwork, B, X, recip_pivot_growth, rcond, \ ferr, berr, &mem_usage, stats, info); \ @@ -53,7 +52,7 @@ DECL_GSSVX(z,double,std::complex) extern void PREFIX##gsisx(superlu_options_t *, SuperMatrix *, int *, int *, int *, \ char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *, \ void *, int, SuperMatrix *, SuperMatrix *, FLOATTYPE *, FLOATTYPE *, \ - PREFIX##mem_usage_t *, SuperLUStat_t *, int *); \ + mem_usage_t *, SuperLUStat_t *, int *); \ } \ inline float SuperLU_gsisx(superlu_options_t *options, SuperMatrix *A, \ int *perm_c, int *perm_r, int *etree, char *equed, \ @@ -63,7 +62,7 @@ DECL_GSSVX(z,double,std::complex) FLOATTYPE *recip_pivot_growth, \ FLOATTYPE *rcond, \ SuperLUStat_t *stats, int *info, KEYTYPE) { \ - PREFIX##mem_usage_t mem_usage; \ + mem_usage_t mem_usage; \ PREFIX##gsisx(options, A, perm_c, perm_r, etree, equed, R, C, L, \ U, work, lwork, B, X, recip_pivot_growth, rcond, \ &mem_usage, stats, info); \ @@ -305,6 +304,10 @@ class SuperLUBase : public SparseSolverBase typedef Matrix IntColVectorType; typedef Map > PermutationMap; typedef SparseMatrix LUMatrixType; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: @@ -449,6 +452,10 @@ class SuperLUBase : public SparseSolverBase * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * + * \warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. + * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template @@ -657,7 +664,7 @@ void SuperLU::_solve_impl(const MatrixBase &b, MatrixBase &m_sluStat, &info, Scalar()); StatFree(&m_sluStat); - if(&x.coeffRef(0) != x_ref.data()) + if(x.derived().data() != x_ref.data()) x = x_ref; m_info = info==0 ? Success : NumericalIssue; @@ -796,10 +803,12 @@ typename SuperLU::Scalar SuperLU::determinant() const * This class allows to solve for an approximate solution of A.X = B sparse linear problems via an incomplete LU factorization * using the SuperLU library. This class is aimed to be used as a preconditioner of the iterative linear solvers. * - * \warning This class requires SuperLU 4 or later. + * \warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers, class ConjugateGradient, class BiCGSTAB */ diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h index 0a5043ef2..aaec8c6f1 100644 --- a/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -16,6 +16,13 @@ namespace Eigen { // generic double/complex wrapper functions: + +inline void umfpack_defaults(double control[UMFPACK_CONTROL], double) +{ umfpack_di_defaults(control); } + +inline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex) +{ umfpack_zi_defaults(control); } + inline void umfpack_free_numeric(void **Numeric, double) { umfpack_di_free_numeric(Numeric); *Numeric = 0; } @@ -139,16 +146,23 @@ class UmfPackLU : public SparseSolverBase > typedef SparseMatrix LUMatrixType; typedef SparseMatrix UmfpackMatrixType; typedef Ref UmfpackMatrixRef; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: + typedef Array UmfpackControl; + UmfPackLU() : m_dummy(0,0), mp_matrix(m_dummy) { init(); } - explicit UmfPackLU(const MatrixType& matrix) + template + explicit UmfPackLU(const InputMatrixType& matrix) : mp_matrix(matrix) { init(); @@ -230,6 +244,39 @@ class UmfPackLU : public SparseSolverBase > analyzePattern_impl(); } + /** Provides the return status code returned by UmfPack during the numeric + * factorization. + * + * \sa factorize(), compute() + */ + inline int umfpackFactorizeReturncode() const + { + eigen_assert(m_numeric && "UmfPackLU: you must first call factorize()"); + return m_fact_errorCode; + } + + /** Provides access to the control settings array used by UmfPack. + * + * If this array contains NaN's, the default values are used. + * + * See UMFPACK documentation for details. + */ + inline const UmfpackControl& umfpackControl() const + { + return m_control; + } + + /** Provides access to the control settings array used by UmfPack. + * + * If this array contains NaN's, the default values are used. + * + * See UMFPACK documentation for details. + */ + inline UmfpackControl& umfpackControl() + { + return m_control; + } + /** Performs a numeric decomposition of \a matrix * * The given matrix must has the same sparcity than the matrix on which the pattern anylysis has been performed. @@ -269,11 +316,12 @@ class UmfPackLU : public SparseSolverBase > void analyzePattern_impl() { + umfpack_defaults(m_control.data(), Scalar()); int errorCode = 0; errorCode = umfpack_symbolic(internal::convert_index(mp_matrix.rows()), internal::convert_index(mp_matrix.cols()), mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - &m_symbolic, 0, 0); + &m_symbolic, m_control.data(), 0); m_isInitialized = true; m_info = errorCode ? InvalidInput : Success; @@ -284,11 +332,10 @@ class UmfPackLU : public SparseSolverBase > void factorize_impl() { - int errorCode; - errorCode = umfpack_numeric(mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - m_symbolic, &m_numeric, 0, 0); + m_fact_errorCode = umfpack_numeric(mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), + m_symbolic, &m_numeric, m_control.data(), 0); - m_info = errorCode ? NumericalIssue : Success; + m_info = m_fact_errorCode == UMFPACK_OK ? Success : NumericalIssue; m_factorizationIsOk = true; m_extractedDataAreDirty = true; } @@ -311,6 +358,9 @@ class UmfPackLU : public SparseSolverBase > // cached data to reduce reallocation, etc. mutable LUMatrixType m_l; + int m_fact_errorCode; + UmfpackControl m_control; + mutable LUMatrixType m_u; mutable IntColVectorType m_p; mutable IntRowVectorType m_q; @@ -390,7 +440,7 @@ bool UmfPackLU::_solve_impl(const MatrixBase &b, MatrixBas x_ptr = &x.col(j).coeffRef(0); errorCode = umfpack_solve(UMFPACK_A, mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, 0, 0); + x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, m_control.data(), 0); if(x.innerStride()!=1) x.col(j) = x_tmp; if (errorCode!=0) diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 5a3c92ea2..01432e2f3 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -4,6 +4,8 @@ typedef CwiseUnaryOp, const Derived> AbsReturnTy typedef CwiseUnaryOp, const Derived> ArgReturnType; typedef CwiseUnaryOp, const Derived> Abs2ReturnType; typedef CwiseUnaryOp, const Derived> SqrtReturnType; +typedef CwiseUnaryOp, const Derived> RsqrtReturnType; +typedef CwiseUnaryOp, const Derived> SignReturnType; typedef CwiseUnaryOp, const Derived> InverseReturnType; typedef CwiseUnaryOp, const Derived> BooleanNotReturnType; @@ -19,6 +21,9 @@ typedef CwiseUnaryOp, const Derived> AtanReturn typedef CwiseUnaryOp, const Derived> TanhReturnType; typedef CwiseUnaryOp, const Derived> SinhReturnType; typedef CwiseUnaryOp, const Derived> CoshReturnType; +typedef CwiseUnaryOp, const Derived> LgammaReturnType; +typedef CwiseUnaryOp, const Derived> ErfReturnType; +typedef CwiseUnaryOp, const Derived> ErfcReturnType; typedef CwiseUnaryOp, const Derived> PowReturnType; typedef CwiseUnaryOp, const Derived> SquareReturnType; typedef CwiseUnaryOp, const Derived> CubeReturnType; @@ -138,6 +143,39 @@ sqrt() const return SqrtReturnType(derived()); } +/** \returns an expression of the coefficient-wise inverse square root of *this. + * + * This function computes the coefficient-wise inverse square root. + * + * Example: \include Cwise_sqrt.cpp + * Output: \verbinclude Cwise_sqrt.out + * + * \sa pow(), square() + */ +EIGEN_DEVICE_FUNC +inline const RsqrtReturnType +rsqrt() const +{ + return RsqrtReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise signum of *this. + * + * This function computes the coefficient-wise signum. + * + * Example: \include Cwise_sign.cpp + * Output: \verbinclude Cwise_sign.out + * + * \sa pow(), square() + */ +EIGEN_DEVICE_FUNC +inline const SignReturnType +sign() const +{ + return SignReturnType(derived()); +} + + /** \returns an expression of the coefficient-wise cosine of *this. * * This function computes the coefficient-wise cosine. The function MatrixBase::cos() in the @@ -267,6 +305,47 @@ cosh() const return CoshReturnType(derived()); } +/** \returns an expression of the coefficient-wise ln(|gamma(*this)|). + * + * Example: \include Cwise_lgamma.cpp + * Output: \verbinclude Cwise_lgamma.out + * + * \sa cos(), sin(), tan() + */ +inline const LgammaReturnType +lgamma() const +{ + return LgammaReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise Gauss error + * function of *this. + * + * Example: \include Cwise_erf.cpp + * Output: \verbinclude Cwise_erf.out + * + * \sa cos(), sin(), tan() + */ +inline const ErfReturnType +erf() const +{ + return ErfReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise Complementary error + * function of *this. + * + * Example: \include Cwise_erfc.cpp + * Output: \verbinclude Cwise_erfc.out + * + * \sa cos(), sin(), tan() + */ +inline const ErfcReturnType +erfc() const +{ + return ErfcReturnType(derived()); +} + /** \returns an expression of the coefficient-wise power of *this to the given exponent. * * This function computes the coefficient-wise power. The function MatrixBase::pow() in the diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/Eigen/src/plugins/MatrixCwiseUnaryOps.h index e339140bf..e16bb374b 100644 --- a/Eigen/src/plugins/MatrixCwiseUnaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.h @@ -14,6 +14,7 @@ typedef CwiseUnaryOp, const Derived> CwiseAbsReturnType; typedef CwiseUnaryOp, const Derived> CwiseAbs2ReturnType; typedef CwiseUnaryOp, const Derived> CwiseSqrtReturnType; +typedef CwiseUnaryOp, const Derived> CwiseSignReturnType; typedef CwiseUnaryOp, const Derived> CwiseInverseReturnType; /** \returns an expression of the coefficient-wise absolute value of \c *this @@ -49,6 +50,17 @@ EIGEN_DEVICE_FUNC inline const CwiseSqrtReturnType cwiseSqrt() const { return CwiseSqrtReturnType(derived()); } +/** \returns an expression of the coefficient-wise signum of *this. + * + * Example: \include MatrixBase_cwiseSign.cpp + * Output: \verbinclude MatrixBase_cwiseSign.out + * + */ +EIGEN_DEVICE_FUNC +inline const CwiseSignReturnType +cwiseSign() const { return CwiseSignReturnType(derived()); } + + /** \returns an expression of the coefficient-wise inverse of *this. * * Example: \include MatrixBase_cwiseInverse.cpp diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h index 28e2bcaea..64666d75f 100644 --- a/bench/BenchTimer.h +++ b/bench/BenchTimer.h @@ -28,6 +28,14 @@ # include #endif +static void escape(void *p) { + asm volatile("" : : "g"(p) : "memory"); +} + +static void clobber() { + asm volatile("" : : : "memory"); +} + #include namespace Eigen @@ -168,6 +176,7 @@ public: CODE; \ } \ TIMER.stop(); \ + clobber(); \ } \ } diff --git a/bench/bench_gemm.cpp b/bench/bench_gemm.cpp index 0974ebe4c..8528c5587 100644 --- a/bench/bench_gemm.cpp +++ b/bench/bench_gemm.cpp @@ -203,9 +203,10 @@ int main(int argc, char ** argv) return 1; } - if(cache_size1>0) - setCpuCacheSizes(cache_size1,cache_size2,cache_size3); - +#if EIGEN_VERSION_AT_LEAST(3,2,90) + if(cache_size1>0) + setCpuCacheSizes(cache_size1,cache_size2,cache_size3); +#endif A a(m,p); a.setRandom(); B b(p,n); b.setRandom(); diff --git a/bench/btl/CMakeLists.txt b/bench/btl/CMakeLists.txt index 9444b450c..38ff9f483 100644 --- a/bench/btl/CMakeLists.txt +++ b/bench/btl/CMakeLists.txt @@ -11,29 +11,24 @@ SET(CMAKE_INCLUDE_CURRENT_DIR ON) string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER}) IF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) - SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG") - SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG") - IF(NOT BTL_NOVEC) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2") - SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -msse2") - ELSE(NOT BTL_NOVEC) + SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_CXX_FLAGS}") + SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_Fortran_FLAGS}") + IF(BTL_NOVEC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") - ENDIF(NOT BTL_NOVEC) + ENDIF(BTL_NOVEC) ENDIF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) IF(MSVC) SET(CMAKE_CXX_FLAGS " /O2 /Ot /GL /fp:fast -DNDEBUG") # SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG") - IF(NOT BTL_NOVEC) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2") - ELSE(NOT BTL_NOVEC) + IF(BTL_NOVEC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") - ENDIF(NOT BTL_NOVEC) + ENDIF(BTL_NOVEC) ENDIF(MSVC) if(IS_ICPC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fast") - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fast") + set(CMAKE_CXX_FLAGS "-fast ${CMAKE_CXX_FLAGS}") + set(CMAKE_Fortran_FLAGS "-fast ${CMAKE_Fortran_FLAGS}") endif(IS_ICPC) include_directories( diff --git a/bench/btl/generic_bench/bench_parameter.hh b/bench/btl/generic_bench/bench_parameter.hh index 0f62bd421..2b01149f9 100644 --- a/bench/btl/generic_bench/bench_parameter.hh +++ b/bench/btl/generic_bench/bench_parameter.hh @@ -29,7 +29,7 @@ // min vector size for axpy bench #define MIN_AXPY 5 // max vector size for axpy bench -#define MAX_AXPY 1000000 +#define MAX_AXPY 3000000 // min matrix size for matrix vector product bench #define MIN_MV 5 // max matrix size for matrix vector product bench diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index 40a71c781..fb3e48e99 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -42,4 +42,6 @@ before-evaluators 6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache 6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1. 7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) +7591:09a8e2186610 # 3.3-alpha1 +7650:b0f3c8f43025 # help clang inlining diff --git a/bench/perf_monitoring/gemm/gemm.cpp b/bench/perf_monitoring/gemm/gemm.cpp index 72eb9cab6..614bd4737 100644 --- a/bench/perf_monitoring/gemm/gemm.cpp +++ b/bench/perf_monitoring/gemm/gemm.cpp @@ -53,7 +53,7 @@ int main(int argc, char **argv) { std::vector results; - std::ifstream settings("settings.txt"); + std::ifstream settings("gemm_settings.txt"); long m, n, k; while(settings >> m >> n >> k) { diff --git a/bench/perf_monitoring/gemm/settings.txt b/bench/perf_monitoring/gemm/gemm_settings.txt similarity index 100% rename from bench/perf_monitoring/gemm/settings.txt rename to bench/perf_monitoring/gemm/gemm_settings.txt diff --git a/bench/perf_monitoring/gemm/lazy_gemm.cpp b/bench/perf_monitoring/gemm/lazy_gemm.cpp new file mode 100644 index 000000000..b443218d7 --- /dev/null +++ b/bench/perf_monitoring/gemm/lazy_gemm.cpp @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +template +inline void lazy_gemm(const MatA &A, const MatB &B, MatC &C) +{ + escape((void*)A.data()); + escape((void*)B.data()); + C.noalias() += A.lazyProduct(B); + escape((void*)C.data()); +} + +template +EIGEN_DONT_INLINE +double bench() +{ + typedef Matrix MatA; + typedef Matrix MatB; + typedef Matrix MatC; + + MatA A(m,k); + MatB B(k,n); + MatC C(m,n); + A.setRandom(); + B.setRandom(); + C.setZero(); + + BenchTimer t; + + double up = 1e7*4/sizeof(Scalar); + double tm0 = 10, tm1 = 20; + + double flops = 2. * m * n * k; + long rep = std::max(10., std::min(10000., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, lazy_gemm(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +template +double bench_t(int t) +{ + if(t) + return bench(); + else + return bench(); +} + +EIGEN_DONT_INLINE +double bench_mnk(int m, int n, int k, int t) +{ + int id = m*10000 + n*100 + k; + switch(id) { + case 10101 : return bench_t< 1, 1, 1>(t); break; + case 20202 : return bench_t< 2, 2, 2>(t); break; + case 30303 : return bench_t< 3, 3, 3>(t); break; + case 40404 : return bench_t< 4, 4, 4>(t); break; + case 50505 : return bench_t< 5, 5, 5>(t); break; + case 60606 : return bench_t< 6, 6, 6>(t); break; + case 70707 : return bench_t< 7, 7, 7>(t); break; + case 80808 : return bench_t< 8, 8, 8>(t); break; + case 90909 : return bench_t< 9, 9, 9>(t); break; + case 101010 : return bench_t<10,10,10>(t); break; + case 111111 : return bench_t<11,11,11>(t); break; + case 121212 : return bench_t<12,12,12>(t); break; + } + return 0; +} + +int main(int argc, char **argv) +{ + std::vector results; + + std::ifstream settings("lazy_gemm_settings.txt"); + long m, n, k, t; + while(settings >> m >> n >> k >> t) + { + //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; + results.push_back( bench_mnk(m, n, k, t) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/bench/perf_monitoring/gemm/lazy_gemm_settings.txt b/bench/perf_monitoring/gemm/lazy_gemm_settings.txt new file mode 100644 index 000000000..407d5d4fa --- /dev/null +++ b/bench/perf_monitoring/gemm/lazy_gemm_settings.txt @@ -0,0 +1,15 @@ +1 1 1 0 +2 2 2 0 +3 3 3 0 +4 4 4 0 +4 4 4 1 +5 5 5 0 +6 6 6 0 +7 7 7 0 +7 7 7 1 +8 8 8 0 +9 9 9 0 +10 10 10 0 +11 11 11 0 +12 12 12 0 +12 12 12 1 diff --git a/bench/perf_monitoring/gemm/make_plot.sh b/bench/perf_monitoring/gemm/make_plot.sh index 609c471f9..4d6053501 100755 --- a/bench/perf_monitoring/gemm/make_plot.sh +++ b/bench/perf_monitoring/gemm/make_plot.sh @@ -4,6 +4,7 @@ # it reads $1.out # and generates $1.pdf WHAT=$1 +bench=$2 header="rev " while read line @@ -11,7 +12,7 @@ do if [ ! -z '$line' ]; then header="$header \"$line\"" fi -done < settings.txt +done < $bench"_settings.txt" echo $header > $WHAT.out.header cat $WHAT.out >> $WHAT.out.header diff --git a/bench/perf_monitoring/gemm/run_gemm.sh b/bench/perf_monitoring/gemm/run.sh similarity index 78% rename from bench/perf_monitoring/gemm/run_gemm.sh rename to bench/perf_monitoring/gemm/run.sh index 3fa6a3661..bfb4ecfac 100755 --- a/bench/perf_monitoring/gemm/run_gemm.sh +++ b/bench/perf_monitoring/gemm/run.sh @@ -1,5 +1,8 @@ #!/bin/bash +# ./run.sh gemm +# ./run.sh lazy_gemm + # Examples of environment variables to be set: # PREFIX="haswell-fma-" # CXX_FLAGS="-mfma" @@ -8,6 +11,7 @@ # -up : enforce the recomputation of existing data, and keep best results as a merging strategy # -s : recompute selected changesets only and keep bests +bench=$1 if echo "$*" | grep '\-up' > /dev/null; then update=true @@ -84,7 +88,7 @@ function test_current fi res=$prev count_rev=`echo $prev | wc -w` - count_ref=`cat "settings.txt" | wc -l` + count_ref=`cat $bench"_settings.txt" | wc -l` if echo "$global_args" | grep "$rev" > /dev/null; then rev_found=true else @@ -93,7 +97,7 @@ function test_current # echo $update et $selected et $rev_found because $rev et "$global_args" # echo $count_rev et $count_ref if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then - if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src gemm.cpp -DSCALAR=$scalar -o $name; then + if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then curr=`./$name` if [ $count_rev == $count_ref ]; then echo "merge previous $prev" @@ -113,9 +117,9 @@ function test_current fi } -make_backup $PREFIX"sgemm" -make_backup $PREFIX"dgemm" -make_backup $PREFIX"cgemm" +make_backup $PREFIX"s"$bench +make_backup $PREFIX"d"$bench +make_backup $PREFIX"c"$bench cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev do @@ -126,27 +130,27 @@ do actual_rev=`hg identify | cut -f1 -d' '` cd .. - test_current $actual_rev float $PREFIX"sgemm" - test_current $actual_rev double $PREFIX"dgemm" - test_current $actual_rev "std::complex" $PREFIX"cgemm" + test_current $actual_rev float $PREFIX"s"$bench + test_current $actual_rev double $PREFIX"d"$bench + test_current $actual_rev "std::complex" $PREFIX"c"$bench fi done echo "Float:" -cat $PREFIX"sgemm.out" +cat $PREFIX"s"$bench.out" echo "" echo "Double:" -cat $PREFIX"dgemm.out" +cat $PREFIX"d"$bench.out" echo "" echo "Complex:" -cat $PREFIX"cgemm.out" +cat $PREFIX"c"$bench.out" echo "" -./make_plot.sh $PREFIX"sgemm" -./make_plot.sh $PREFIX"dgemm" -./make_plot.sh $PREFIX"cgemm" +./make_plot.sh $PREFIX"s"$bench $bench +./make_plot.sh $PREFIX"d"$bench $bench +./make_plot.sh $PREFIX"c"$bench $bench diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 6e0e1b103..8d53f4ae2 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -29,7 +29,7 @@ if(UMFPACK_FOUND AND BLAS_FOUND) set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${BLAS_LIBRARIES}) endif() -find_package(SuperLU) +find_package(SuperLU 4.0) if(SUPERLU_FOUND AND BLAS_FOUND) add_definitions("-DEIGEN_SUPERLU_SUPPORT") include_directories(${SUPERLU_INCLUDES}) diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index afa9a7493..9b845de22 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -18,7 +18,7 @@ */ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { - typedef void (*functype)(int, const Scalar*, int, const Scalar*, int, Scalar*, Scalar); + typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); static functype func[2]; static bool init = false; @@ -67,7 +67,7 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa if(code>=2 || func[code]==0) return 0; - func[code](*n, a, *lda, actual_x, 1, actual_y, alpha); + func[code](*n, a, *lda, actual_x, actual_y, alpha); } if(actual_x!=x) delete[] actual_x; diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index 9722a4674..cac89b268 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -12,7 +12,7 @@ // y = alpha*A*x + beta*y int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { - typedef void (*functype)(int, const Scalar*, int, const Scalar*, int, Scalar*, Scalar); + typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); static functype func[2]; static bool init = false; @@ -59,7 +59,7 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p if(code>=2 || func[code]==0) return 0; - func[code](*n, a, *lda, actual_x, 1, actual_y, alpha); + func[code](*n, a, *lda, actual_x, actual_y, alpha); if(actual_x!=x) delete[] actual_x; if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy); diff --git a/blas/level3_impl.h b/blas/level3_impl.h index 563101dfc..6a6b00728 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h @@ -6,7 +6,7 @@ // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - +#include #include "common.h" int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) @@ -133,6 +133,9 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, if(info) return xerbla_(SCALAR_SUFFIX_UP"TRSM ",&info,6); + if(*m==0 || *n==0) + return 0; + int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4); if(SIDE(*side)==LEFT) @@ -358,6 +361,9 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp else matrix(c, *n, *n, *ldc).triangularView() *= beta; } + if(*n==0 || *k==0) + return 0; + #if ISCOMPLEX // FIXME add support for symmetric complex matrix if(UPLO(*uplo)==UP) @@ -392,6 +398,8 @@ int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal Scalar alpha = *reinterpret_cast(palpha); Scalar beta = *reinterpret_cast(pbeta); +// std::cerr << "in syr2k " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << *ldb << " " << beta << " " << *ldc << "\n"; + int info = 0; if(UPLO(*uplo)==INVALID) info = 1; else if(OP(*op)==INVALID) info = 2; @@ -506,6 +514,8 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa // c = alpha*conj(a')*a + beta*c for op = 'C'or'c' int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) { +// std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; + typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&); static functype func[8]; @@ -577,6 +587,8 @@ int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal Scalar alpha = *reinterpret_cast(palpha); RealScalar beta = *pbeta; +// std::cerr << "in her2k " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << *ldb << " " << beta << " " << *ldc << "\n"; + int info = 0; if(UPLO(*uplo)==INVALID) info = 1; else if((OP(*op)==INVALID) || (OP(*op)==TR)) info = 2; diff --git a/blas/testing/cblat1.f b/blas/testing/cblat1.f index a4c996fda..8ca67fb19 100644 --- a/blas/testing/cblat1.f +++ b/blas/testing/cblat1.f @@ -1,7 +1,49 @@ +*> \brief \b CBLAT1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CBLAT1 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX Level 1 BLAS. +*> Based upon the original BLAS test routine together with: +*> +*> F06GAF Example Program Text +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex_blas_testing +* +* ===================================================================== PROGRAM CBLAT1 -* Test program for the COMPLEX Level 1 BLAS. -* Based upon the original BLAS test routine together with: -* F06GAF Example Program Text +* +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* ===================================================================== +* * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) @@ -114,8 +156,8 @@ + (5.0E0,6.0E0), (5.0E0,6.0E0), (0.1E0,0.1E0), + (-0.6E0,0.1E0), (0.1E0,-0.3E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (7.0E0,8.0E0), (0.3E0,0.1E0), (0.1E0,0.4E0), - + (0.4E0,0.1E0), (0.1E0,0.2E0), (2.0E0,3.0E0), + + (7.0E0,8.0E0), (0.3E0,0.1E0), (0.5E0,0.0E0), + + (0.0E0,0.5E0), (0.0E0,0.2E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), @@ -129,10 +171,10 @@ + (3.0E0,6.0E0), (-0.6E0,0.1E0), (4.0E0,7.0E0), + (0.1E0,-0.3E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.3E0,0.1E0), (5.0E0,8.0E0), - + (0.1E0,0.4E0), (6.0E0,9.0E0), (0.4E0,0.1E0), - + (8.0E0,3.0E0), (0.1E0,0.2E0), (9.0E0,4.0E0)/ - DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.7E0/ - DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.7E0/ + + (0.5E0,0.0E0), (6.0E0,9.0E0), (0.0E0,0.5E0), + + (8.0E0,3.0E0), (0.0E0,0.2E0), (9.0E0,4.0E0)/ + DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.8E0/ + DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.6E0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), @@ -145,8 +187,8 @@ + (0.11E0,-0.03E0), (-0.17E0,0.46E0), + (-0.17E0,-0.19E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (0.19E0,-0.17E0), (0.32E0,0.09E0), - + (0.23E0,-0.24E0), (0.18E0,0.01E0), + + (0.19E0,-0.17E0), (0.20E0,-0.35E0), + + (0.35E0,0.20E0), (0.14E0,0.08E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0), + (2.0E0,3.0E0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), @@ -162,9 +204,9 @@ + (-0.17E0,0.46E0), (4.0E0,7.0E0), + (-0.17E0,-0.19E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.19E0,-0.17E0), (5.0E0,8.0E0), - + (0.32E0,0.09E0), (6.0E0,9.0E0), - + (0.23E0,-0.24E0), (8.0E0,3.0E0), - + (0.18E0,0.01E0), (9.0E0,4.0E0)/ + + (0.20E0,-0.35E0), (6.0E0,9.0E0), + + (0.35E0,0.20E0), (8.0E0,3.0E0), + + (0.14E0,0.08E0), (9.0E0,4.0E0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), @@ -177,8 +219,8 @@ + (0.03E0,0.03E0), (-0.18E0,0.03E0), + (0.03E0,-0.09E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (0.09E0,0.03E0), (0.03E0,0.12E0), - + (0.12E0,0.03E0), (0.03E0,0.06E0), (2.0E0,3.0E0), + + (0.09E0,0.03E0), (0.15E0,0.00E0), + + (0.00E0,0.15E0), (0.00E0,0.06E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), @@ -193,8 +235,8 @@ + (-0.18E0,0.03E0), (4.0E0,7.0E0), + (0.03E0,-0.09E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.09E0,0.03E0), (5.0E0,8.0E0), - + (0.03E0,0.12E0), (6.0E0,9.0E0), (0.12E0,0.03E0), - + (8.0E0,3.0E0), (0.03E0,0.06E0), (9.0E0,4.0E0)/ + + (0.15E0,0.00E0), (6.0E0,9.0E0), (0.00E0,0.15E0), + + (8.0E0,3.0E0), (0.00E0,0.06E0), (9.0E0,4.0E0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 @@ -529,7 +571,8 @@ * * .. Parameters .. INTEGER NOUT - PARAMETER (NOUT=6) + REAL ZERO + PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC INTEGER LEN @@ -552,7 +595,7 @@ * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) - IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0E0) + IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). diff --git a/blas/testing/cblat2.f b/blas/testing/cblat2.f index 20f188100..5833ea81a 100644 --- a/blas/testing/cblat2.f +++ b/blas/testing/cblat2.f @@ -1,68 +1,114 @@ +*> \brief \b CBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 17 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 35 lines: +*> 'cblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> CGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CHEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CHBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CHPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> CGERC T PUT F FOR NO TEST. SAME COLUMNS. +*> CGERU T PUT F FOR NO TEST. SAME COLUMNS. +*> CHER T PUT F FOR NO TEST. SAME COLUMNS. +*> CHPR T PUT F FOR NO TEST. SAME COLUMNS. +*> CHER2 T PUT F FOR NO TEST. SAME COLUMNS. +*> CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex_blas_testing +* +* ===================================================================== PROGRAM CBLAT2 * -* Test program for the COMPLEX Level 2 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 17 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 35 lines: -* 'CBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* CGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* CGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* CHEMV T PUT F FOR NO TEST. SAME COLUMNS. -* CHBMV T PUT F FOR NO TEST. SAME COLUMNS. -* CHPMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTRMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTBMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTPMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTRSV T PUT F FOR NO TEST. SAME COLUMNS. -* CTBSV T PUT F FOR NO TEST. SAME COLUMNS. -* CTPSV T PUT F FOR NO TEST. SAME COLUMNS. -* CGERC T PUT F FOR NO TEST. SAME COLUMNS. -* CGERU T PUT F FOR NO TEST. SAME COLUMNS. -* CHER T PUT F FOR NO TEST. SAME COLUMNS. -* CHPR T PUT F FOR NO TEST. SAME COLUMNS. -* CHER2 T PUT F FOR NO TEST. SAME COLUMNS. -* CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. -* -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -71,8 +117,8 @@ PARAMETER ( NSUBS = 17 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) - REAL RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0, RHALF = 0.5, RONE = 1.0 ) + REAL RZERO + PARAMETER ( RZERO = 0.0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -126,7 +172,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -135,7 +181,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -240,14 +286,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 90 CONTINUE - IF( SDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 100 - EPS = RHALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of CMVCH using exact data. @@ -3079,7 +3118,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LCERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/cblat3.f b/blas/testing/cblat3.f index b26be91e6..09f2cb9c5 100644 --- a/blas/testing/cblat3.f +++ b/blas/testing/cblat3.f @@ -1,50 +1,96 @@ +*> \brief \b CBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 9 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 23 lines: +*> 'cblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> CGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CHEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> CHERK T PUT F FOR NO TEST. SAME COLUMNS. +*> CSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> CHER2K T PUT F FOR NO TEST. SAME COLUMNS. +*> CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex_blas_testing +* +* ===================================================================== PROGRAM CBLAT3 * -* Test program for the COMPLEX Level 3 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 9 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 23 lines: -* 'CBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* CGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* CHEMM T PUT F FOR NO TEST. SAME COLUMNS. -* CSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* CTRMM T PUT F FOR NO TEST. SAME COLUMNS. -* CTRSM T PUT F FOR NO TEST. SAME COLUMNS. -* CHERK T PUT F FOR NO TEST. SAME COLUMNS. -* CSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* CHER2K T PUT F FOR NO TEST. SAME COLUMNS. -* CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. -* -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -53,8 +99,8 @@ PARAMETER ( NSUBS = 9 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) - REAL RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0, RHALF = 0.5, RONE = 1.0 ) + REAL RZERO + PARAMETER ( RZERO = 0.0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -103,7 +149,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -112,7 +158,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -189,14 +235,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 70 CONTINUE - IF( SDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 80 - EPS = RHALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of CMMCH using exact data. @@ -1946,7 +1985,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, RALPHA, BETA, RBETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1956,12 +1995,19 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) +* 3-19-92: Fix argument 12 in calls to CSYMM and CHEMM +* with INFOT = 9 (eca) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) * .. Local Scalars .. COMPLEX ALPHA, BETA REAL RALPHA, RBETA @@ -1979,6 +2025,14 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA, BETA, RALPHA, and RBETA. +* + ALPHA = CMPLX( ONE, -ONE ) + BETA = CMPLX( TWO, -TWO ) + RALPHA = ONE + RBETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90 )ISNUM 10 INFOT = 1 @@ -2205,16 +2259,16 @@ CALL CHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2272,16 +2326,16 @@ CALL CSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -3268,7 +3322,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LCERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/dblat2.f b/blas/testing/dblat2.f index 4002d4368..0fa80afa4 100644 --- a/blas/testing/dblat2.f +++ b/blas/testing/dblat2.f @@ -1,75 +1,121 @@ +*> \brief \b DBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM DBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the DOUBLE PRECISION Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 16 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 34 lines: +*> 'dblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 0.9 VALUES OF BETAC +*> DGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DSBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DSPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> DGER T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYR T PUT F FOR NO TEST. SAME COLUMNS. +*> DSPR T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup double_blas_testing +* +* ===================================================================== PROGRAM DBLAT2 * -* Test program for the DOUBLE PRECISION Level 2 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 16 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 34 lines: -* 'DBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 0.9 VALUES OF BETA -* DGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* DGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* DSYMV T PUT F FOR NO TEST. SAME COLUMNS. -* DSBMV T PUT F FOR NO TEST. SAME COLUMNS. -* DSPMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTRMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTBMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTPMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTRSV T PUT F FOR NO TEST. SAME COLUMNS. -* DTBSV T PUT F FOR NO TEST. SAME COLUMNS. -* DTPSV T PUT F FOR NO TEST. SAME COLUMNS. -* DGER T PUT F FOR NO TEST. SAME COLUMNS. -* DSYR T PUT F FOR NO TEST. SAME COLUMNS. -* DSPR T PUT F FOR NO TEST. SAME COLUMNS. -* DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. -* DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. -* -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 16 ) - DOUBLE PRECISION ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -121,7 +167,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -130,7 +176,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -235,14 +281,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 90 CONTINUE - IF( DDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 100 - EPS = HALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of DMVCH using exact data. @@ -2982,7 +3021,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LDERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/dblat3.f b/blas/testing/dblat3.f index 082e03e5e..8d37c7453 100644 --- a/blas/testing/dblat3.f +++ b/blas/testing/dblat3.f @@ -1,55 +1,101 @@ +*> \brief \b DBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM DBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the DOUBLE PRECISION Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 6 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 20 lines: +*> 'dblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 1.3 VALUES OF BETA +*> DGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup double_blas_testing +* +* ===================================================================== PROGRAM DBLAT3 * -* Test program for the DOUBLE PRECISION Level 3 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 6 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 20 lines: -* 'DBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 1.3 VALUES OF BETA -* DGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* DSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* DTRMM T PUT F FOR NO TEST. SAME COLUMNS. -* DTRSM T PUT F FOR NO TEST. SAME COLUMNS. -* DSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. -* -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 6 ) - DOUBLE PRECISION ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -96,7 +142,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -105,7 +151,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -182,14 +228,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 70 CONTINUE - IF( DDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 80 - EPS = HALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of DMMCH using exact data. @@ -1802,7 +1841,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, BETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1812,12 +1851,18 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA and BETA (eca) +* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + DOUBLE PRECISION ONE, TWO + PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, BETA * .. Local Arrays .. @@ -1834,6 +1879,12 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA and BETA. +* + ALPHA = ONE + BETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM 10 INFOT = 1 CALL DGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) @@ -1963,16 +2014,16 @@ CALL DSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2660,7 +2711,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LDERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/sblat2.f b/blas/testing/sblat2.f index 057a85429..71605ed31 100644 --- a/blas/testing/sblat2.f +++ b/blas/testing/sblat2.f @@ -1,75 +1,121 @@ +*> \brief \b SBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM SBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the REAL Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 16 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 34 lines: +*> 'sblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 0.9 VALUES OF BETA +*> SGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SSBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SSPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> STBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> STPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> SGER T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYR T PUT F FOR NO TEST. SAME COLUMNS. +*> SSPR T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup single_blas_testing +* +* ===================================================================== PROGRAM SBLAT2 * -* Test program for the REAL Level 2 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 16 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 34 lines: -* 'SBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 0.9 VALUES OF BETA -* SGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* SGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* SSYMV T PUT F FOR NO TEST. SAME COLUMNS. -* SSBMV T PUT F FOR NO TEST. SAME COLUMNS. -* SSPMV T PUT F FOR NO TEST. SAME COLUMNS. -* STRMV T PUT F FOR NO TEST. SAME COLUMNS. -* STBMV T PUT F FOR NO TEST. SAME COLUMNS. -* STPMV T PUT F FOR NO TEST. SAME COLUMNS. -* STRSV T PUT F FOR NO TEST. SAME COLUMNS. -* STBSV T PUT F FOR NO TEST. SAME COLUMNS. -* STPSV T PUT F FOR NO TEST. SAME COLUMNS. -* SGER T PUT F FOR NO TEST. SAME COLUMNS. -* SSYR T PUT F FOR NO TEST. SAME COLUMNS. -* SSPR T PUT F FOR NO TEST. SAME COLUMNS. -* SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. -* SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. -* -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 16 ) - REAL ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -121,7 +167,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -130,7 +176,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -235,14 +281,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 90 CONTINUE - IF( SDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 100 - EPS = HALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of SMVCH using exact data. @@ -2982,7 +3021,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LSERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/sblat3.f b/blas/testing/sblat3.f index 325a9eb92..879269633 100644 --- a/blas/testing/sblat3.f +++ b/blas/testing/sblat3.f @@ -1,55 +1,101 @@ +*> \brief \b SBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM SBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the REAL Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 6 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 20 lines: +*> 'sblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 1.3 VALUES OF BETA +*> SGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> STRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> STRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup single_blas_testing +* +* ===================================================================== PROGRAM SBLAT3 * -* Test program for the REAL Level 3 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 6 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 20 lines: -* 'SBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 1.3 VALUES OF BETA -* SGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* SSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* STRMM T PUT F FOR NO TEST. SAME COLUMNS. -* STRSM T PUT F FOR NO TEST. SAME COLUMNS. -* SSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. -* -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 6 ) - REAL ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -96,7 +142,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -105,7 +151,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -182,14 +228,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 70 CONTINUE - IF( SDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 80 - EPS = HALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of SMMCH using exact data. @@ -1802,7 +1841,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, BETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1812,12 +1851,18 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA and BETA (eca) +* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) * .. Local Scalars .. REAL ALPHA, BETA * .. Local Arrays .. @@ -1834,6 +1879,12 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA and BETA. +* + ALPHA = ONE + BETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM 10 INFOT = 1 CALL SGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) @@ -1963,16 +2014,16 @@ CALL SSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2660,7 +2711,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LSERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/zblat1.f b/blas/testing/zblat1.f index e2415e1c4..d30112c63 100644 --- a/blas/testing/zblat1.f +++ b/blas/testing/zblat1.f @@ -1,7 +1,49 @@ +*> \brief \b ZBLAT1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZBLAT1 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX*16 Level 1 BLAS. +*> +*> Based upon the original BLAS test routine together with: +*> F06GAF Example Program Text +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16_blas_testing +* +* ===================================================================== PROGRAM ZBLAT1 -* Test program for the COMPLEX*16 Level 1 BLAS. -* Based upon the original BLAS test routine together with: -* F06GAF Example Program Text +* +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* ===================================================================== +* * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) @@ -114,8 +156,8 @@ + (5.0D0,6.0D0), (5.0D0,6.0D0), (0.1D0,0.1D0), + (-0.6D0,0.1D0), (0.1D0,-0.3D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (7.0D0,8.0D0), (0.3D0,0.1D0), (0.1D0,0.4D0), - + (0.4D0,0.1D0), (0.1D0,0.2D0), (2.0D0,3.0D0), + + (7.0D0,8.0D0), (0.3D0,0.1D0), (0.5D0,0.0D0), + + (0.0D0,0.5D0), (0.0D0,0.2D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), @@ -129,10 +171,10 @@ + (3.0D0,6.0D0), (-0.6D0,0.1D0), (4.0D0,7.0D0), + (0.1D0,-0.3D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.3D0,0.1D0), (5.0D0,8.0D0), - + (0.1D0,0.4D0), (6.0D0,9.0D0), (0.4D0,0.1D0), - + (8.0D0,3.0D0), (0.1D0,0.2D0), (9.0D0,4.0D0)/ - DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.7D0/ - DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.7D0/ + + (0.5D0,0.0D0), (6.0D0,9.0D0), (0.0D0,0.5D0), + + (8.0D0,3.0D0), (0.0D0,0.2D0), (9.0D0,4.0D0)/ + DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.8D0/ + DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.6D0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), @@ -145,8 +187,8 @@ + (0.11D0,-0.03D0), (-0.17D0,0.46D0), + (-0.17D0,-0.19D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (0.19D0,-0.17D0), (0.32D0,0.09D0), - + (0.23D0,-0.24D0), (0.18D0,0.01D0), + + (0.19D0,-0.17D0), (0.20D0,-0.35D0), + + (0.35D0,0.20D0), (0.14D0,0.08D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0), + (2.0D0,3.0D0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), @@ -162,9 +204,9 @@ + (-0.17D0,0.46D0), (4.0D0,7.0D0), + (-0.17D0,-0.19D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.19D0,-0.17D0), (5.0D0,8.0D0), - + (0.32D0,0.09D0), (6.0D0,9.0D0), - + (0.23D0,-0.24D0), (8.0D0,3.0D0), - + (0.18D0,0.01D0), (9.0D0,4.0D0)/ + + (0.20D0,-0.35D0), (6.0D0,9.0D0), + + (0.35D0,0.20D0), (8.0D0,3.0D0), + + (0.14D0,0.08D0), (9.0D0,4.0D0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), @@ -177,8 +219,8 @@ + (0.03D0,0.03D0), (-0.18D0,0.03D0), + (0.03D0,-0.09D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (0.09D0,0.03D0), (0.03D0,0.12D0), - + (0.12D0,0.03D0), (0.03D0,0.06D0), (2.0D0,3.0D0), + + (0.09D0,0.03D0), (0.15D0,0.00D0), + + (0.00D0,0.15D0), (0.00D0,0.06D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), @@ -193,8 +235,8 @@ + (-0.18D0,0.03D0), (4.0D0,7.0D0), + (0.03D0,-0.09D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.09D0,0.03D0), (5.0D0,8.0D0), - + (0.03D0,0.12D0), (6.0D0,9.0D0), (0.12D0,0.03D0), - + (8.0D0,3.0D0), (0.03D0,0.06D0), (9.0D0,4.0D0)/ + + (0.15D0,0.00D0), (6.0D0,9.0D0), (0.00D0,0.15D0), + + (8.0D0,3.0D0), (0.00D0,0.06D0), (9.0D0,4.0D0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 @@ -529,7 +571,8 @@ * * .. Parameters .. INTEGER NOUT - PARAMETER (NOUT=6) + DOUBLE PRECISION ZERO + PARAMETER (NOUT=6, ZERO=0.0D0) * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN @@ -552,7 +595,7 @@ * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) - IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0D0) + IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). diff --git a/blas/testing/zblat2.f b/blas/testing/zblat2.f index e65cdcc70..53129a11e 100644 --- a/blas/testing/zblat2.f +++ b/blas/testing/zblat2.f @@ -1,68 +1,114 @@ +*> \brief \b ZBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX*16 Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 17 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 35 lines: +*> 'zblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZGERC T PUT F FOR NO TEST. SAME COLUMNS. +*> ZGERU T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHER T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHPR T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16_blas_testing +* +* ===================================================================== PROGRAM ZBLAT2 * -* Test program for the COMPLEX*16 Level 2 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 17 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 35 lines: -* 'ZBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. -* ZGERC T PUT F FOR NO TEST. SAME COLUMNS. -* ZGERU T PUT F FOR NO TEST. SAME COLUMNS. -* ZHER T PUT F FOR NO TEST. SAME COLUMNS. -* ZHPR T PUT F FOR NO TEST. SAME COLUMNS. -* ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. -* ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. -* -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -72,8 +118,8 @@ COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) - DOUBLE PRECISION RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0D0, RHALF = 0.5D0, RONE = 1.0D0 ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -127,7 +173,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -136,7 +182,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -241,14 +287,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 90 CONTINUE - IF( DDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 100 - EPS = RHALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of ZMVCH using exact data. @@ -3087,7 +3126,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LZERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/zblat3.f b/blas/testing/zblat3.f index d6a522f2a..59ca24145 100644 --- a/blas/testing/zblat3.f +++ b/blas/testing/zblat3.f @@ -1,50 +1,97 @@ +*> \brief \b ZBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX*16 Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 9 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 23 lines: +*> 'zblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHERK T PUT F FOR NO TEST. SAME COLUMNS. +*> ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. +*> ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16_blas_testing +* +* ===================================================================== PROGRAM ZBLAT3 * -* Test program for the COMPLEX*16 Level 3 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 9 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 23 lines: -* 'ZBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. -* ZHERK T PUT F FOR NO TEST. SAME COLUMNS. -* ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. -* ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. -* -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -54,8 +101,8 @@ COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) - DOUBLE PRECISION RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0D0, RHALF = 0.5D0, RONE = 1.0D0 ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -104,7 +151,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -113,7 +160,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -190,14 +237,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 70 CONTINUE - IF( DDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 80 - EPS = RHALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of ZMMCH using exact data. @@ -1949,7 +1989,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, RALPHA, BETA, RBETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1959,12 +1999,20 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) +* 3-19-92: Fix argument 12 in calls to ZSYMM and ZHEMM +* with INFOT = 9 (eca) +* 10-9-00: Declared INTRINSIC DCMPLX (susan) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION RALPHA, RBETA @@ -1973,6 +2021,8 @@ * .. External Subroutines .. EXTERNAL ZGEMM, ZHEMM, ZHER2K, ZHERK, CHKXER, ZSYMM, $ ZSYR2K, ZSYRK, ZTRMM, ZTRSM +* .. Intrinsic Functions .. + INTRINSIC DCMPLX * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. @@ -1982,6 +2032,14 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA, BETA, RALPHA, and RBETA. +* + ALPHA = DCMPLX( ONE, -ONE ) + BETA = DCMPLX( TWO, -TWO ) + RALPHA = ONE + RBETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90 )ISNUM 10 INFOT = 1 @@ -2208,16 +2266,16 @@ CALL ZHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2275,16 +2333,16 @@ CALL ZSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -3274,7 +3332,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LZERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/cmake/Eigen3Config.cmake.in b/cmake/Eigen3Config.cmake.in index e50f6dbe0..04e7886ce 100644 --- a/cmake/Eigen3Config.cmake.in +++ b/cmake/Eigen3Config.cmake.in @@ -15,7 +15,7 @@ # EIGEN3_VERSION_PATCH - The patch version of Eigen set ( EIGEN3_FOUND 1 ) -set ( EIGEN3_USE_FILE "@EIGEN_USE_FILE@" ) +set ( EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake" ) set ( EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@" ) set ( EIGEN3_INCLUDE_DIR "@EIGEN_INCLUDE_DIR@" ) diff --git a/cmake/EigenConfigureTesting.cmake b/cmake/EigenConfigureTesting.cmake index 0ee484e8c..afc24b5e9 100644 --- a/cmake/EigenConfigureTesting.cmake +++ b/cmake/EigenConfigureTesting.cmake @@ -46,16 +46,9 @@ if(CMAKE_COMPILER_IS_GNUCXX) if(EIGEN_COVERAGE_TESTING) set(COVERAGE_FLAGS "-fprofile-arcs -ftest-coverage") set(CTEST_CUSTOM_COVERAGE_EXCLUDE "/test/") - else(EIGEN_COVERAGE_TESTING) - set(COVERAGE_FLAGS "") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS}") endif(EIGEN_COVERAGE_TESTING) - if(CMAKE_SYSTEM_NAME MATCHES Linux) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS} -g2") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COVERAGE_FLAGS} -O2 -g2") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COVERAGE_FLAGS} -fno-inline-functions") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COVERAGE_FLAGS} -O0 -g3") - endif(CMAKE_SYSTEM_NAME MATCHES Linux) elseif(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS") endif(CMAKE_COMPILER_IS_GNUCXX) diff --git a/cmake/FindAdolc.cmake b/cmake/FindAdolc.cmake index 1a7ff3628..937e54990 100644 --- a/cmake/FindAdolc.cmake +++ b/cmake/FindAdolc.cmake @@ -5,7 +5,7 @@ endif (ADOLC_INCLUDES AND ADOLC_LIBRARIES) find_path(ADOLC_INCLUDES NAMES - adolc/adouble.h + adolc/adtl.h PATHS $ENV{ADOLCDIR} ${INCLUDE_INSTALL_DIR} diff --git a/cmake/FindSPQR.cmake b/cmake/FindSPQR.cmake index 794c212af..1e958c3c1 100644 --- a/cmake/FindSPQR.cmake +++ b/cmake/FindSPQR.cmake @@ -26,7 +26,12 @@ if(SPQR_LIBRARIES) find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS $ENV{SPQRDIR} ${LIB_INSTALL_DIR}) if (SUITESPARSE_LIBRARY) set(SPQR_LIBRARIES ${SPQR_LIBRARIES} ${SUITESPARSE_LIBRARY}) - endif (SUITESPARSE_LIBRARY) + endif() + + find_library(CHOLMOD_LIBRARY cholmod PATHS $ENV{UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) + if(CHOLMOD_LIBRARY) + set(SPQR_LIBRARIES ${SPQR_LIBRARIES} ${CHOLMOD_LIBRARY}) + endif() endif(SPQR_LIBRARIES) diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake index 8a3df3666..e4142fe4d 100644 --- a/cmake/FindSuperLU.cmake +++ b/cmake/FindSuperLU.cmake @@ -17,10 +17,64 @@ find_path(SUPERLU_INCLUDES SRC ) -find_library(SUPERLU_LIBRARIES superlu PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) - +find_library(SUPERLU_LIBRARIES NAMES "superlu_4.3" "superlu_4.2" "superlu_4.1" "superlu_4.0" "superlu_3.1" "superlu_3.0" "superlu" PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) + +if(SUPERLU_INCLUDES AND SUPERLU_LIBRARIES) + +include(CheckCXXSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state() + +set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${SUPERLU_INCLUDES}) + +# check whether struct mem_usage_t is globally defined +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main() { + mem_usage_t mem; + return 0; +}" +SUPERLU_HAS_GLOBAL_MEM_USAGE_T) + + +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main() { + return SLU_SINGLE; +}" +SUPERLU_HAS_CLEAN_ENUMS) + +if(SUPERLU_HAS_CLEAN_ENUMS) + # at least 4.3 + set(SUPERLU_VERSION_VAR "4.3") +elseif(SUPERLU_HAS_GLOBAL_MEM_USAGE_T) + # at least 4.3 + set(SUPERLU_VERSION_VAR "4.0") +else() + set(SUPERLU_VERSION_VAR "3.0") +endif() + +cmake_pop_check_state() + +if(SuperLU_FIND_VERSION) + if(${SUPERLU_VERSION_VAR} VERSION_LESS ${SuperLU_FIND_VERSION}) + set(SUPERLU_VERSION_OK FALSE) + else() + set(SUPERLU_VERSION_OK TRUE) + endif() +else() + set(SUPERLU_VERSION_OK TRUE) +endif() + +endif() + include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(SUPERLU DEFAULT_MSG - SUPERLU_INCLUDES SUPERLU_LIBRARIES) +find_package_handle_standard_args(SUPERLU + REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES SUPERLU_VERSION_OK + VERSION_VAR SUPERLU_VERSION_VAR) mark_as_advanced(SUPERLU_INCLUDES SUPERLU_LIBRARIES) diff --git a/cmake/FindUmfpack.cmake b/cmake/FindUmfpack.cmake index 16b046cd6..53cf0b49b 100644 --- a/cmake/FindUmfpack.cmake +++ b/cmake/FindUmfpack.cmake @@ -20,24 +20,29 @@ find_library(UMFPACK_LIBRARIES umfpack PATHS $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR} if(UMFPACK_LIBRARIES) - if (NOT UMFPACK_LIBDIR) + if(NOT UMFPACK_LIBDIR) get_filename_component(UMFPACK_LIBDIR ${UMFPACK_LIBRARIES} PATH) endif(NOT UMFPACK_LIBDIR) find_library(COLAMD_LIBRARY colamd PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if (COLAMD_LIBRARY) + if(COLAMD_LIBRARY) set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${COLAMD_LIBRARY}) - endif (COLAMD_LIBRARY) + endif () find_library(AMD_LIBRARY amd PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if (AMD_LIBRARY) + if(AMD_LIBRARY) set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${AMD_LIBRARY}) - endif (AMD_LIBRARY) + endif () find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if (SUITESPARSE_LIBRARY) + if(SUITESPARSE_LIBRARY) set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${SUITESPARSE_LIBRARY}) - endif (SUITESPARSE_LIBRARY) + endif () + + find_library(CHOLMOD_LIBRARY cholmod PATHS $ENV{UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) + if(CHOLMOD_LIBRARY) + set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${CHOLMOD_LIBRARY}) + endif() endif(UMFPACK_LIBRARIES) @@ -45,4 +50,4 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(UMFPACK DEFAULT_MSG UMFPACK_INCLUDES UMFPACK_LIBRARIES) -mark_as_advanced(UMFPACK_INCLUDES UMFPACK_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY SUITESPARSE_LIBRARY) +mark_as_advanced(UMFPACK_INCLUDES UMFPACK_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY CHOLMOD_LIBRARY SUITESPARSE_LIBRARY) diff --git a/cmake/language_support.cmake b/cmake/language_support.cmake index 93f8a8fd8..2f14f30b8 100644 --- a/cmake/language_support.cmake +++ b/cmake/language_support.cmake @@ -43,7 +43,7 @@ function(workaround_9220 language language_works) if(return_code EQUAL 0) # Second run execute_process ( - COMMAND ${CMAKE_COMMAND} . + COMMAND ${CMAKE_COMMAND} . -G "${CMAKE_GENERATOR}" WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/language_tests/${language} RESULT_VARIABLE return_code OUTPUT_QUIET diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 800bb30ee..e0c6a7e34 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -223,7 +223,8 @@ ALIASES = "only_for_vectors=This is only for vectors (either row- "note_about_using_kernel_to_study_multiple_solutions=If you need a complete analysis of the space of solutions, take the one solution obtained by this method and add to it elements of the kernel, as determined by kernel()." \ "note_about_checking_solutions=This method just tries to find as good a solution as possible. If you want to check whether a solution exists or if it is accurate, just call this function to get a result and then compute the error of this result, or use MatrixBase::isApprox() directly, for instance like this: \code bool a_solution_exists = (A*result).isApprox(b, precision); \endcode This method avoids dividing by zero, so that the non-existence of a solution doesn't by itself mean that you'll get \c inf or \c nan values." \ "note_try_to_help_rvo=This function returns the result by value. In order to make that efficient, it is implemented as just a return statement using a special constructor, hopefully allowing the compiler to perform a RVO (return value optimization)." \ - "nonstableyet=\warning This is not considered to be part of the stable public API yet. Changes may happen in future releases. See \ref Experimental \"Experimental parts of Eigen\"" + "nonstableyet=\warning This is not considered to be part of the stable public API yet. Changes may happen in future releases. See \ref Experimental \"Experimental parts of Eigen\"" \ + "implsparsesolverconcept=This class follows the \link TutorialSparseSolverConcept sparse solver concept \endlink." ALIASES += "eigenAutoToc= " @@ -866,13 +867,13 @@ STRIP_CODE_COMMENTS = YES # then for each documented function all documented # functions referencing it will be listed. -REFERENCED_BY_RELATION = YES +REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. -REFERENCES_RELATION = YES +REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from diff --git a/doc/Manual.dox b/doc/Manual.dox index 7f04edff4..c10c490a7 100644 --- a/doc/Manual.dox +++ b/doc/Manual.dox @@ -125,6 +125,8 @@ namespace Eigen { \ingroup Sparse_chapter */ /** \addtogroup TopicSparseSystems \ingroup Sparse_chapter */ +/** \addtogroup MatrixfreeSolverExample + \ingroup Sparse_chapter */ /** \addtogroup Sparse_Reference \ingroup Sparse_chapter */ diff --git a/doc/MatrixfreeSolverExample.dox b/doc/MatrixfreeSolverExample.dox new file mode 100644 index 000000000..000cb0bbe --- /dev/null +++ b/doc/MatrixfreeSolverExample.dox @@ -0,0 +1,20 @@ + +namespace Eigen { + +/** + +\eigenManualPage MatrixfreeSolverExample Matrix-free solvers + +Iterative solvers such as ConjugateGradient and BiCGSTAB can be used in a matrix free context. To this end, user must provide a wrapper class inheriting EigenBase<> and implementing the following methods: + - Index rows() and Index cols(): returns number of rows and columns respectively + - operator* with and %Eigen dense column vector (its actual implementation goes in a specialization of the internal::generic_product_impl class) + +Eigen::internal::traits<> must also be specialized for the wrapper type. + +Here is a complete example wrapping a Eigen::SparseMatrix: +\include matrixfree_cg.cpp +Output: \verbinclude matrixfree_cg.out + +*/ + +} \ No newline at end of file diff --git a/doc/PreprocessorDirectives.dox b/doc/PreprocessorDirectives.dox index 76ce2eb99..7cde1a36f 100644 --- a/doc/PreprocessorDirectives.dox +++ b/doc/PreprocessorDirectives.dox @@ -106,6 +106,7 @@ following macros are supported; none of them are defined by default. - \b EIGEN_MATRIX_PLUGIN - filename of plugin for extending the Matrix class. - \b EIGEN_MATRIXBASE_PLUGIN - filename of plugin for extending the MatrixBase class. - \b EIGEN_PLAINOBJECTBASE_PLUGIN - filename of plugin for extending the PlainObjectBase class. + - \b EIGEN_MAPBASE_PLUGIN - filename of plugin for extending the MapBase class. - \b EIGEN_QUATERNION_PLUGIN - filename of plugin for extending the Quaternion class. - \b EIGEN_QUATERNIONBASE_PLUGIN - filename of plugin for extending the QuaternionBase class. - \b EIGEN_SPARSEMATRIX_PLUGIN - filename of plugin for extending the SparseMatrix class. diff --git a/doc/SparseLinearSystems.dox b/doc/SparseLinearSystems.dox index 48c18f46f..9fb3282e7 100644 --- a/doc/SparseLinearSystems.dox +++ b/doc/SparseLinearSystems.dox @@ -4,36 +4,63 @@ In Eigen, there are several methods available to solve linear systems when the c \eigenAutoToc -\section TutorialSparseDirectSolvers Sparse solvers +\section TutorialSparseSolverList List of sparse solvers -%Eigen currently provides a limited set of built-in solvers, as well as wrappers to external solver libraries. -They are summarized in the following table: +%Eigen currently provides a wide set of built-in solvers, as well as wrappers to external solver libraries. +They are summarized in the following tables: + +\subsection TutorialSparseSolverList_Direct Built-in direct solvers + + + + + + + + + + + + + + + + + + + + + + +
ClassSolver kindMatrix kindFeatures related to performanceLicense

Notes

SimplicialLLT \n #includeDirect LLt factorizationSPDFill-in reducingLGPLSimplicialLDLT is often preferable
SimplicialLDLT \n #includeDirect LDLt factorizationSPDFill-in reducingLGPLRecommended for very sparse and not too large problems (e.g., 2D Poisson eq.)
SparseLU \n #include LU factorization Square Fill-in reducing, Leverage fast dense algebraMPL2optimized for small and large problems with irregular patterns
SparseQR \n #include QR factorizationAny, rectangular Fill-in reducingMPL2recommended for least-square problems, has a basic rank-revealing feature
+ +\subsection TutorialSparseSolverList_Iterative Built-in iterative solvers + + + + + + + + + + + + + + + + + + + +
ClassSolver kindMatrix kindSupported preconditioners, [default]License

Notes

ConjugateGradient \n #include Classic iterative CGSPDIdentityPreconditioner, [DiagonalPreconditioner], IncompleteCholeskyMPL2Recommended for large symmetric problems (e.g., 3D Poisson eq.)
LeastSquaresConjugateGradient \n #includeCG for rectangular least-square problemRectangularIdentityPreconditioner, [LeastSquareDiagonalPreconditioner]MPL2Solve for min |A'Ax-b|^2 without forming A'A
BiCGSTAB \n #includeIterative stabilized bi-conjugate gradientSquareIdentityPreconditioner, [DiagonalPreconditioner], IncompleteLUTMPL2To speedup the convergence, try it with the \ref IncompleteLUT preconditioner.
+ +\subsection TutorialSparseSolverList_Wrapper Wrappers to external solvers - - - - - - - - - - - - - - - - - - - - - - @@ -53,6 +80,8 @@ They are summarized in the following table: Here \c SPD means symmetric positive definite. +\section TutorialSparseSolverConcept Sparse solver concept + All these solvers follow the same general concept. Here is a typical and general example: \code @@ -104,9 +133,11 @@ x2 = solver.solve(b2); \endcode The compute() method is equivalent to calling both analyzePattern() and factorize(). -Finally, each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on. +Each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on. More details are available in the documentations of the respective classes. +Finally, most of the iterative solvers, can also be used in a \b matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + \section TheSparseCompute The Compute Step In the compute() function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices, LU for non hermitian matrices and QR for rectangular matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into analyzePattern() and factorize(). diff --git a/doc/TopicMultithreading.dox b/doc/TopicMultithreading.dox index 95f6bf287..47c9b261f 100644 --- a/doc/TopicMultithreading.dox +++ b/doc/TopicMultithreading.dox @@ -43,6 +43,8 @@ int main(int argc, char** argv) } \endcode +\note With Eigen 3.3, and a fully C++11 compliant compiler (i.e., thread-safe static local variable initialization), then calling \c initParallel() is optional. + \warning note that all functions generating random matrices are \b not re-entrant nor thread-safe. Those include DenseBase::Random(), and DenseBase::setRandom() despite a call to Eigen::initParallel(). This is because these functions are based on std::rand which is not re-entrant. For thread-safe random generator, we recommend the use of boost::random or c++11 random feature. In the case your application is parallelized with OpenMP, you might want to disable Eigen's own parallization as detailed in the previous section. diff --git a/doc/TutorialReductionsVisitorsBroadcasting.dox b/doc/TutorialReductionsVisitorsBroadcasting.dox index eb6787dbc..908a1b4b2 100644 --- a/doc/TutorialReductionsVisitorsBroadcasting.dox +++ b/doc/TutorialReductionsVisitorsBroadcasting.dox @@ -32,7 +32,7 @@ Eigen also provides the \link MatrixBase::norm() norm() \endlink method, which r These operations can also operate on matrices; in that case, a n-by-p matrix is seen as a vector of size (n*p), so for example the \link MatrixBase::norm() norm() \endlink method returns the "Frobenius" or "Hilbert-Schmidt" norm. We refrain from speaking of the \f$\ell^2\f$ norm of a matrix because that can mean different things. -If you want other \f$\ell^p\f$ norms, use the \link MatrixBase::lpNorm() lpNorm

() \endlink method. The template parameter \a p can take the special value \a Infinity if you want the \f$\ell^\infty\f$ norm, which is the maximum of the absolute values of the coefficients. +If you want other coefficient-wise \f$\ell^p\f$ norms, use the \link MatrixBase::lpNorm() lpNorm

() \endlink method. The template parameter \a p can take the special value \a Infinity if you want the \f$\ell^\infty\f$ norm, which is the maximum of the absolute values of the coefficients. The following example demonstrates these methods. @@ -45,6 +45,17 @@ The following example demonstrates these methods. \verbinclude Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.out

ClassModuleSolver kindMatrix kindFeatures related to performance Dependencies,License

Notes

SimplicialLLT \link SparseCholesky_Module SparseCholesky \endlinkDirect LLt factorizationSPDFill-in reducingbuilt-in, LGPLSimplicialLDLT is often preferable
SimplicialLDLT \link SparseCholesky_Module SparseCholesky \endlinkDirect LDLt factorizationSPDFill-in reducingbuilt-in, LGPLRecommended for very sparse and not too large problems (e.g., 2D Poisson eq.)
ConjugateGradient\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkClassic iterative CGSPDPreconditionningbuilt-in, MPL2Recommended for large symmetric problems (e.g., 3D Poisson eq.)
LeastSquaresConjugateGradient\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkCG for rectangular least-square problemRectangularPreconditionningbuilt-in, MPL2Solve for min |A'Ax-b|^2 without forming A'A
BiCGSTAB\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkIterative stabilized bi-conjugate gradientSquarePreconditionningbuilt-in, MPL2To speedup the convergence, try it with the \ref IncompleteLUT preconditioner.
SparseLU \link SparseLU_Module SparseLU \endlink LU factorization Square Fill-in reducing, Leverage fast dense algebra built-in, MPL2 optimized for small and large problems with irregular patterns
SparseQR \link SparseQR_Module SparseQR \endlink QR factorizationAny, rectangular Fill-in reducingbuilt-in, MPL2recommended for least-square problems, has a basic rank-revealing feature
Wrappers to external solvers
PastixLLT \n PastixLDLT \n PastixLU\link PaStiXSupport_Module PaStiXSupport \endlinkDirect LLt, LDLt, LU factorizationsSPD \n SPD \n SquareFill-in reducing, Leverage fast dense algebra, Multithreading Requires the PaStiX package, \b CeCILL-C optimized for tough problems and symmetric patterns
+\b Operator \b norm: The 1-norm and \f$\infty\f$-norm matrix operator norms can easily be computed as follows: + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.out +
+See below for more explanations on the syntax of these expressions. + \subsection TutorialReductionsVisitorsBroadcastingReductionsBool Boolean reductions The following reductions operate on boolean values: diff --git a/doc/TutorialSparse.dox b/doc/TutorialSparse.dox index 835c59354..fb07adaa2 100644 --- a/doc/TutorialSparse.dox +++ b/doc/TutorialSparse.dox @@ -83,7 +83,7 @@ There is no notion of compressed/uncompressed mode for a SparseVector. \section TutorialSparseExample First example -Before describing each individual class, let's start with the following typical example: solving the Laplace equation \f$ \nabla u = 0 \f$ on a regular 2D grid using a finite difference scheme and Dirichlet boundary conditions. +Before describing each individual class, let's start with the following typical example: solving the Laplace equation \f$ \Delta u = 0 \f$ on a regular 2D grid using a finite difference scheme and Dirichlet boundary conditions. Such problem can be mathematically expressed as a linear problem of the form \f$ Ax=b \f$ where \f$ x \f$ is the vector of \c m unknowns (in our case, the values of the pixels), \f$ b \f$ is the right hand side vector resulting from the boundary conditions, and \f$ A \f$ is an \f$ m \times m \f$ matrix containing only a few non-zero elements resulting from the discretization of the Laplacian operator. diff --git a/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp b/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp new file mode 100644 index 000000000..62e28fc31 --- /dev/null +++ b/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp @@ -0,0 +1,18 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + MatrixXf m(2,2); + m << 1,-2, + -3,4; + + cout << "1-norm(m) = " << m.cwiseAbs().colwise().sum().maxCoeff() + << " == " << m.colwise().lpNorm<1>().maxCoeff() << endl; + + cout << "infty-norm(m) = " << m.cwiseAbs().rowwise().sum().maxCoeff() + << " == " << m.rowwise().lpNorm<1>().maxCoeff() << endl; +} diff --git a/doc/examples/matrixfree_cg.cpp b/doc/examples/matrixfree_cg.cpp new file mode 100644 index 000000000..6a205aea3 --- /dev/null +++ b/doc/examples/matrixfree_cg.cpp @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include + +class MatrixReplacement; +using Eigen::SparseMatrix; + +namespace Eigen { +namespace internal { + // MatrixReplacement looks-like a SparseMatrix, so let's inherits its traits: + template<> + struct traits : public Eigen::internal::traits > + {}; +} +} + +// Example of a matrix-free wrapper from a user type to Eigen's compatible type +// For the sake of simplicity, this example simply wrap a Eigen::SparseMatrix. +class MatrixReplacement : public Eigen::EigenBase { +public: + // Required typedefs, constants, and method: + typedef double Scalar; + typedef double RealScalar; + typedef int StorageIndex; + enum { + ColsAtCompileTime = Eigen::Dynamic, + MaxColsAtCompileTime = Eigen::Dynamic, + IsRowMajor = false + }; + + Index rows() const { return mp_mat->rows(); } + Index cols() const { return mp_mat->cols(); } + + template + Eigen::Product operator*(const Eigen::MatrixBase& x) const { + return Eigen::Product(*this, x.derived()); + } + + // Custom API: + MatrixReplacement() : mp_mat(0) {} + + void attachMyMatrix(const SparseMatrix &mat) { + mp_mat = &mat; + } + const SparseMatrix my_matrix() const { return *mp_mat; } + +private: + const SparseMatrix *mp_mat; +}; + + +// Implementation of MatrixReplacement * Eigen::DenseVector though a specialization of internal::generic_product_impl: +namespace Eigen { +namespace internal { + + template + struct generic_product_impl // GEMV stands for matrix-vector + : generic_product_impl_base > + { + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const MatrixReplacement& lhs, const Rhs& rhs, const Scalar& alpha) + { + // This method should implement "dst += alpha * lhs * rhs" inplace, + // however, for iterative solvers, alpha is always equal to 1, so let's not bother about it. + assert(alpha==Scalar(1) && "scaling is not implemented"); + + // Here we could simply call dst.noalias() += lhs.my_matrix() * rhs, + // but let's do something fancier (and less efficient): + for(Index i=0; i S = Eigen::MatrixXd::Random(n,n).sparseView(0.5,1); + S = S.transpose()*S; + + MatrixReplacement A; + A.attachMyMatrix(S); + + Eigen::VectorXd b(n), x; + b.setRandom(); + + // Solve Ax = b using various iterative solver with matrix-free version: + { + Eigen::ConjugateGradient cg; + cg.compute(A); + x = cg.solve(b); + std::cout << "CG: #iterations: " << cg.iterations() << ", estimated error: " << cg.error() << std::endl; + } + + { + Eigen::BiCGSTAB bicg; + bicg.compute(A); + x = bicg.solve(b); + std::cout << "BiCGSTAB: #iterations: " << bicg.iterations() << ", estimated error: " << bicg.error() << std::endl; + } + + { + Eigen::GMRES gmres; + gmres.compute(A); + x = gmres.solve(b); + std::cout << "GMRES: #iterations: " << gmres.iterations() << ", estimated error: " << gmres.error() << std::endl; + } + + { + Eigen::DGMRES gmres; + gmres.compute(A); + x = gmres.solve(b); + std::cout << "DGMRES: #iterations: " << gmres.iterations() << ", estimated error: " << gmres.error() << std::endl; + } + + { + Eigen::MINRES minres; + minres.compute(A); + x = minres.solve(b); + std::cout << "MINRES: #iterations: " << minres.iterations() << ", estimated error: " << minres.error() << std::endl; + } +} diff --git a/doc/special_examples/random_cpp11.cpp b/doc/special_examples/random_cpp11.cpp index ccd7c77d0..adc3c110c 100644 --- a/doc/special_examples/random_cpp11.cpp +++ b/doc/special_examples/random_cpp11.cpp @@ -7,7 +7,7 @@ using namespace Eigen; int main() { std::default_random_engine generator; std::poisson_distribution distribution(4.1); - auto poisson = [&] (int) {return distribution(generator);}; + auto poisson = [&] (Eigen::Index) {return distribution(generator);}; RowVectorXi v = RowVectorXi::NullaryExpr(10, poisson ); std::cout << v << "\n"; diff --git a/eigen3.pc.in b/eigen3.pc.in index c5855de33..3368a3aa1 100644 --- a/eigen3.pc.in +++ b/eigen3.pc.in @@ -1,6 +1,9 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} + Name: Eigen3 Description: A C++ template library for linear algebra: vectors, matrices, and related algorithms Requires: -Version: ${EIGEN_VERSION_NUMBER} +Version: @EIGEN_VERSION_NUMBER@ Libs: -Cflags: -I${INCLUDE_INSTALL_DIR} +Cflags: -I${prefix}/@INCLUDE_INSTALL_DIR@ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 767e82f21..bbebf29cd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -68,7 +68,7 @@ else() ei_add_property(EIGEN_MISSING_BACKENDS "UmfPack, ") endif() -find_package(SuperLU) +find_package(SuperLU 4.0) if(SUPERLU_FOUND) add_definitions("-DEIGEN_SUPERLU_SUPPORT") include_directories(${SUPERLU_INCLUDES}) @@ -236,6 +236,7 @@ ei_add_test(sparse_solvers) ei_add_test(sparse_permutations) ei_add_test(simplicial_cholesky) ei_add_test(conjugate_gradient) +ei_add_test(incomplete_cholesky) ei_add_test(bicgstab) ei_add_test(lscg) ei_add_test(sparselu) @@ -254,6 +255,19 @@ ei_add_test(special_numbers) ei_add_test(rvalue_types) ei_add_test(dense_storage) ei_add_test(ctorleak) +ei_add_test(mpl2only) + +check_cxx_compiler_flag("-ffast-math" COMPILER_SUPPORT_FASTMATH) +if(COMPILER_SUPPORT_FASTMATH) + set(EIGEN_FASTMATH_FLAGS "-ffast-math") +else() + check_cxx_compiler_flag("/fp:fast" COMPILER_SUPPORT_FPFAST) + if(COMPILER_SUPPORT_FPFAST) + set(EIGEN_FASTMATH_FLAGS "/fp:fast") + endif() +endif() + +ei_add_test(fastmath " ${EIGEN_FASTMATH_FLAGS} ") # # ei_add_test(denseLM) diff --git a/test/array.cpp b/test/array.cpp index 9f61c4b26..6adedfb06 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -202,7 +202,7 @@ template void array_real(const ArrayType& m) m2 = ArrayType::Random(rows, cols), m3(rows, cols), m4 = m1; - + m4 = (m4.abs()==Scalar(0)).select(1,m4); Scalar s1 = internal::random(); @@ -217,6 +217,11 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); +#ifdef EIGEN_HAS_C99_MATH + VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1)); + VERIFY_IS_APPROX(m1.erf(), erf(m1)); + VERIFY_IS_APPROX(m1.erfc(), erfc(m1)); +#endif // EIGEN_HAS_C99_MATH VERIFY_IS_APPROX(m1.arg(), arg(m1)); VERIFY_IS_APPROX(m1.round(), round(m1)); VERIFY_IS_APPROX(m1.floor(), floor(m1)); @@ -230,11 +235,13 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m1.square(), square(m1)); VERIFY_IS_APPROX(m1.cube(), cube(m1)); VERIFY_IS_APPROX(cos(m1+RealScalar(3)*m2), cos((m1+RealScalar(3)*m2).eval())); + VERIFY_IS_APPROX(m1.sign(), sign(m1)); // avoid NaNs with abs() so verification doesn't fail m3 = m1.abs(); VERIFY_IS_APPROX(m3.sqrt(), sqrt(abs(m1))); + VERIFY_IS_APPROX(m3.rsqrt(), Scalar(1)/sqrt(abs(m1))); VERIFY_IS_APPROX(m3.log(), log(m3)); VERIFY_IS_APPROX(m3.log10(), log10(m3)); @@ -247,7 +254,7 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1))); VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1))); VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); - VERIFY_IS_APPROX(arg(m1), ((ArrayType)(m1<0))*std::acos(-1.0)); + VERIFY_IS_APPROX(arg(m1), ((m1<0).template cast())*std::acos(-1.0)); VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all()); VERIFY((Eigen::isnan)((m1*0.0)/0.0).all()); VERIFY((Eigen::isinf)(m4/0.0).all()); @@ -255,6 +262,9 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(inverse(inverse(m1)),m1); VERIFY((abs(m1) == m1 || abs(m1) == -m1).all()); VERIFY_IS_APPROX(m3, sqrt(abs2(m1))); + VERIFY_IS_APPROX( m1.sign(), -(-m1).sign() ); + VERIFY_IS_APPROX( m1*m1.sign(),m1.abs()); + VERIFY_IS_APPROX(m1.sign() * m1.abs(), m1); VERIFY_IS_APPROX(numext::abs2(numext::real(m1)) + numext::abs2(numext::imag(m1)), numext::abs2(m1)); VERIFY_IS_APPROX(numext::abs2(real(m1)) + numext::abs2(imag(m1)), numext::abs2(m1)); @@ -288,6 +298,10 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m3.pow(RealScalar(0.5)), m3.sqrt()); VERIFY_IS_APPROX(pow(m3,RealScalar(0.5)), m3.sqrt()); + + VERIFY_IS_APPROX(m3.pow(RealScalar(-0.5)), m3.rsqrt()); + VERIFY_IS_APPROX(pow(m3,RealScalar(-0.5)), m3.rsqrt()); + VERIFY_IS_APPROX(log10(m3), log(m3)/log(10)); // scalar by array division @@ -348,6 +362,7 @@ template void array_complex(const ArrayType& m) VERIFY_IS_APPROX(m1.square(), square(m1)); VERIFY_IS_APPROX(m1.cube(), cube(m1)); VERIFY_IS_APPROX(cos(m1+RealScalar(3)*m2), cos((m1+RealScalar(3)*m2).eval())); + VERIFY_IS_APPROX(m1.sign(), sign(m1)); VERIFY_IS_APPROX(m1.exp() * m2.exp(), exp(m1+m2)); @@ -365,11 +380,15 @@ template void array_complex(const ArrayType& m) std::complex zero(0.0,0.0); VERIFY((Eigen::isnan)(m1*zero/zero).all()); +#if EIGEN_COMP_MSVC + // msvc complex division is not robust + VERIFY((Eigen::isinf)(m4/RealScalar(0)).all()); +#else #if EIGEN_COMP_CLANG - // clang's complex division is notoriously broken + // clang's complex division is notoriously broken too if((numext::isinf)(m4(0,0)/RealScalar(0))) { #endif - VERIFY((Eigen::isinf)(m4/zero).all()); + VERIFY((Eigen::isinf)(m4/zero).all()); #if EIGEN_COMP_CLANG } else @@ -377,6 +396,8 @@ template void array_complex(const ArrayType& m) VERIFY((Eigen::isinf)(m4.real()/zero.real()).all()); } #endif +#endif // MSVC + VERIFY(((Eigen::isfinite)(m1) && (!(Eigen::isfinite)(m1*zero/zero)) && (!(Eigen::isfinite)(m1/zero))).all()); VERIFY_IS_APPROX(inverse(inverse(m1)),m1); @@ -385,6 +406,9 @@ template void array_complex(const ArrayType& m) VERIFY_IS_APPROX(abs(m1), sqrt(abs2(m1))); VERIFY_IS_APPROX(log10(m1), log(m1)/log(10)); + VERIFY_IS_APPROX( m1.sign(), -(-m1).sign() ); + VERIFY_IS_APPROX( m1.sign() * m1.abs(), m1); + // scalar by array division const RealScalar tiny = sqrt(std::numeric_limits::epsilon()); s1 += Scalar(tiny); diff --git a/test/dynalloc.cpp b/test/dynalloc.cpp index 3d895f2e0..6f22e1ab4 100644 --- a/test/dynalloc.cpp +++ b/test/dynalloc.cpp @@ -129,13 +129,6 @@ void test_dynalloc() for (int i=0; i() ); - CALL_SUBTEST(check_dynaligned() ); - CALL_SUBTEST(check_dynaligned() ); - CALL_SUBTEST(check_dynaligned() ); - CALL_SUBTEST(check_dynaligned() ); - CALL_SUBTEST(check_dynaligned() ); - CALL_SUBTEST( check_custom_new_delete() ); CALL_SUBTEST( check_custom_new_delete() ); CALL_SUBTEST( check_custom_new_delete() ); @@ -144,6 +137,16 @@ void test_dynalloc() // check static allocation, who knows ? #if EIGEN_MAX_STATIC_ALIGN_BYTES + for (int i=0; i() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + } + { MyStruct foo0; VERIFY(size_t(foo0.avec.data())%ALIGNMENT==0); MyClassA fooA; VERIFY(size_t(fooA.avec.data())%ALIGNMENT==0); diff --git a/test/eigensolver_complex.cpp b/test/eigensolver_complex.cpp index 0d4e2dc87..8e2bb9ef0 100644 --- a/test/eigensolver_complex.cpp +++ b/test/eigensolver_complex.cpp @@ -118,13 +118,19 @@ template void eigensolver(const MatrixType& m) MatrixType id = MatrixType::Identity(rows, cols); VERIFY_IS_APPROX(id.operatorNorm(), RealScalar(1)); - if (rows > 1) + if (rows > 1 && rows < 20) { // Test matrix with NaN a(0,0) = std::numeric_limits::quiet_NaN(); ComplexEigenSolver eiNaN(a); VERIFY_IS_EQUAL(eiNaN.info(), NoConvergence); } + + // regression test for bug 1098 + { + ComplexEigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } } template void eigensolver_verify_assert(const MatrixType& m) diff --git a/test/eigensolver_generalized_real.cpp b/test/eigensolver_generalized_real.cpp index 566a4bdc6..a46a2e50e 100644 --- a/test/eigensolver_generalized_real.cpp +++ b/test/eigensolver_generalized_real.cpp @@ -39,6 +39,14 @@ template void generalized_eigensolver_real(const MatrixType VectorType realEigenvalues = eig.eigenvalues().real(); std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + + // regression test for bug 1098 + { + GeneralizedSelfAdjointEigenSolver eig1(a.adjoint() * a,b.adjoint() * b); + eig1.compute(a.adjoint() * a,b.adjoint() * b); + GeneralizedEigenSolver eig2(a.adjoint() * a,b.adjoint() * b); + eig2.compute(a.adjoint() * a,b.adjoint() * b); + } } void test_eigensolver_generalized_real() diff --git a/test/eigensolver_generic.cpp b/test/eigensolver_generic.cpp index c5441ac4e..566546310 100644 --- a/test/eigensolver_generic.cpp +++ b/test/eigensolver_generic.cpp @@ -63,13 +63,19 @@ template void eigensolver(const MatrixType& m) MatrixType id = MatrixType::Identity(rows, cols); VERIFY_IS_APPROX(id.operatorNorm(), RealScalar(1)); - if (rows > 2) + if (rows > 2 && rows < 20) { // Test matrix with NaN a(0,0) = std::numeric_limits::quiet_NaN(); EigenSolver eiNaN(a); VERIFY_IS_EQUAL(eiNaN.info(), NoConvergence); } + + // regression test for bug 1098 + { + EigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } } template void eigensolver_verify_assert(const MatrixType& m) diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp index 41b6d99ab..f909761a1 100644 --- a/test/eigensolver_selfadjoint.cpp +++ b/test/eigensolver_selfadjoint.cpp @@ -130,13 +130,13 @@ template void selfadjointeigensolver(const MatrixType& m) Tridiagonalization tridiag(symmC); VERIFY_IS_APPROX(tridiag.diagonal(), tridiag.matrixT().diagonal()); VERIFY_IS_APPROX(tridiag.subDiagonal(), tridiag.matrixT().template diagonal<-1>()); - MatrixType T = tridiag.matrixT(); + Matrix T = tridiag.matrixT(); if(rows>1 && cols>1) { // FIXME check that upper and lower part are 0: //VERIFY(T.topRightCorner(rows-2, cols-2).template triangularView().isZero()); } - VERIFY_IS_APPROX(tridiag.diagonal(), T.diagonal().real()); - VERIFY_IS_APPROX(tridiag.subDiagonal(), T.template diagonal<1>().real()); + VERIFY_IS_APPROX(tridiag.diagonal(), T.diagonal()); + VERIFY_IS_APPROX(tridiag.subDiagonal(), T.template diagonal<1>()); VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView()), tridiag.matrixQ() * tridiag.matrixT().eval() * MatrixType(tridiag.matrixQ()).adjoint()); VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView()), tridiag.matrixQ() * tridiag.matrixT() * tridiag.matrixQ().adjoint()); @@ -149,13 +149,19 @@ template void selfadjointeigensolver(const MatrixType& m) VERIFY_IS_APPROX(tridiag.matrixT(), eiSymmTridiag.eigenvectors().real() * eiSymmTridiag.eigenvalues().asDiagonal() * eiSymmTridiag.eigenvectors().real().transpose()); } - if (rows > 1) + if (rows > 1 && rows < 20) { // Test matrix with NaN symmC(0,0) = std::numeric_limits::quiet_NaN(); SelfAdjointEigenSolver eiSymmNaN(symmC); VERIFY_IS_EQUAL(eiSymmNaN.info(), NoConvergence); } + + // regression test for bug 1098 + { + SelfAdjointEigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } } void bug_854() diff --git a/test/evaluators.cpp b/test/evaluators.cpp index f41968da8..876dffe22 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -2,6 +2,20 @@ #include "main.h" namespace Eigen { + + template + const Product + prod(const Lhs& lhs, const Rhs& rhs) + { + return Product(lhs,rhs); + } + + template + const Product + lazyprod(const Lhs& lhs, const Rhs& rhs) + { + return Product(lhs,rhs); + } template EIGEN_STRONG_INLINE @@ -69,9 +83,18 @@ namespace Eigen { typedef typename DstXprType::Scalar Scalar; call_assignment(dst.const_cast_derived(), src.const_cast_derived(), internal::swap_assign_op()); } + + namespace internal { + template class StorageBase, typename Src, typename Func> + EIGEN_DEVICE_FUNC void call_assignment(const NoAlias& dst, const Src& src, const Func& func) + { + call_assignment_no_alias(dst.expression(), src, func); + } + } } +template long get_cost(const XprType& ) { return Eigen::internal::evaluator::CoeffReadCost; } using namespace std; @@ -448,7 +471,6 @@ void test_evaluators() VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.triangularView(),A), MatrixXd(A.triangularView()*A)); VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.selfadjointView(),A), MatrixXd(A.selfadjointView()*A)); - } { @@ -459,6 +481,19 @@ void test_evaluators() VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(d.asDiagonal(),A), MatrixXd(d.asDiagonal()*A)); VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(A,d.asDiagonal()), MatrixXd(A*d.asDiagonal())); - + } + + { + // test CoeffReadCost + Matrix4d a, b; + VERIFY_IS_EQUAL( get_cost(a), 1 ); + VERIFY_IS_EQUAL( get_cost(a+b), 3); + VERIFY_IS_EQUAL( get_cost(2*a+b), 4); + VERIFY_IS_EQUAL( get_cost(a*b), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(b)), 15); + VERIFY_IS_EQUAL( get_cost(a*(a*b)), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a*b)), 15); + VERIFY_IS_EQUAL( get_cost(a*(a+b)), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a+b)), 15); } } diff --git a/test/fastmath.cpp b/test/fastmath.cpp new file mode 100644 index 000000000..efdd5b313 --- /dev/null +++ b/test/fastmath.cpp @@ -0,0 +1,98 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +void check(bool b, bool ref) +{ + std::cout << b; + if(b==ref) + std::cout << " OK "; + else + std::cout << " BAD "; +} + +#if EIGEN_COMP_MSVC && EIGEN_COMP_MSVC < 1800 +namespace std { + template bool (isfinite)(T x) { return _finite(x); } + template bool (isnan)(T x) { return _isnan(x); } + template bool (isinf)(T x) { return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; } +} +#endif + +template +void check_inf_nan(bool dryrun) { + Matrix m(10); + m.setRandom(); + m(3) = std::numeric_limits::quiet_NaN(); + + if(dryrun) + { + std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), false); std::cout << "\n"; + std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n"; + std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),true); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), true); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 1); std::cout << "\n"; + std::cout << "\n"; + } + else + { + VERIFY( !(numext::isfinite)(m(3)) ); + VERIFY( !(numext::isinf)(m(3)) ); + VERIFY( (numext::isnan)(m(3)) ); + VERIFY( !m.allFinite() ); + VERIFY( m.hasNaN() ); + } + m(4) /= 0.0; + if(dryrun) + { + std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n"; + std::cout << "std::isinf(" << m(4) << ") = "; check((std::isinf)(m(4)),true); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(4)), true); std::cout << "\n"; + std::cout << "std::isnan(" << m(4) << ") = "; check((std::isnan)(m(4)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(4)), false); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 1); std::cout << "\n"; + std::cout << "\n"; + } + else + { + VERIFY( !(numext::isfinite)(m(4)) ); + VERIFY( (numext::isinf)(m(4)) ); + VERIFY( !(numext::isnan)(m(4)) ); + VERIFY( !m.allFinite() ); + VERIFY( m.hasNaN() ); + } + m(3) = 0; + if(dryrun) + { + std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),true); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), true); std::cout << "\n"; + std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n"; + std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), false); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 0); std::cout << "\n"; + std::cout << "\n\n"; + } + else + { + VERIFY( (numext::isfinite)(m(3)) ); + VERIFY( !(numext::isinf)(m(3)) ); + VERIFY( !(numext::isnan)(m(3)) ); + VERIFY( !m.allFinite() ); + VERIFY( !m.hasNaN() ); + } +} + +void test_fastmath() { + std::cout << "*** float *** \n\n"; check_inf_nan(true); + std::cout << "*** double ***\n\n"; check_inf_nan(true); + std::cout << "*** long double *** \n\n"; check_inf_nan(true); + + check_inf_nan(false); + check_inf_nan(false); + check_inf_nan(false); +} diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp index e2792ed18..2bdb4b7f2 100644 --- a/test/geo_alignedbox.cpp +++ b/test/geo_alignedbox.cpp @@ -16,7 +16,7 @@ using namespace std; template EIGEN_DONT_INLINE -void kill_extra_precision(T& x) { eigen_assert(&x != 0); } +void kill_extra_precision(T& x) { eigen_assert((void*)(&x) != (void*)0); } template void alignedbox(const BoxType& _box) @@ -179,6 +179,8 @@ void test_geo_alignedbox() CALL_SUBTEST_9( alignedbox(AlignedBox1i()) ); CALL_SUBTEST_10( alignedbox(AlignedBox2i()) ); CALL_SUBTEST_11( alignedbox(AlignedBox3i()) ); + + CALL_SUBTEST_14( alignedbox(AlignedBox(4)) ); } CALL_SUBTEST_12( specificTest1() ); CALL_SUBTEST_13( specificTest2() ); diff --git a/test/geo_quaternion.cpp b/test/geo_quaternion.cpp index 17229be4a..761bb52b4 100644 --- a/test/geo_quaternion.cpp +++ b/test/geo_quaternion.cpp @@ -49,6 +49,7 @@ template void quaternion(void) */ using std::abs; typedef Matrix Vector3; + typedef Matrix Matrix3; typedef Matrix Vector4; typedef Quaternion Quaternionx; typedef AngleAxis AngleAxisx; @@ -101,6 +102,11 @@ template void quaternion(void) q2 = q1.toRotationMatrix(); VERIFY_IS_APPROX(q1*v1,q2*v1); + Matrix3 rot1(q1); + VERIFY_IS_APPROX(q1*v1,rot1*v1); + Quaternionx q3(rot1.transpose()*rot1); + VERIFY_IS_APPROX(q3*v1,v1); + // angle-axis conversion AngleAxisx aa = AngleAxisx(q1); diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp index d50c7c76a..51f90036d 100644 --- a/test/geo_transformations.cpp +++ b/test/geo_transformations.cpp @@ -12,6 +12,12 @@ #include #include +template +Matrix angleToVec(T a) +{ + return Matrix(std::cos(a), std::sin(a)); +} + template void non_projective_only() { /* this test covers the following files: @@ -130,14 +136,16 @@ template void transformations() AngleAxisx aa = AngleAxisx(q1); VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); - if(abs(aa.angle()) > NumTraits::dummy_precision()) + // The following test is stable only if 2*angle != angle and v1 is not colinear with axis + if( (abs(aa.angle()) > test_precision()) && (abs(aa.axis().dot(v1.normalized()))<(Scalar(1)-Scalar(4)*test_precision())) ) { VERIFY( !(q1 * v1).isApprox(Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1) ); } aa.fromRotationMatrix(aa.toRotationMatrix()); VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); - if(abs(aa.angle()) > NumTraits::dummy_precision()) + // The following test is stable only if 2*angle != angle and v1 is not colinear with axis + if( (abs(aa.angle()) > test_precision()) && (abs(aa.axis().dot(v1.normalized()))<(Scalar(1)-Scalar(4)*test_precision())) ) { VERIFY( !(q1 * v1).isApprox(Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1) ); } @@ -214,7 +222,9 @@ template void transformations() t4 *= aa3; VERIFY_IS_APPROX(t3.matrix(), t4.matrix()); - v3 = Vector3::Random(); + do { + v3 = Vector3::Random(); + } while (v3.cwiseAbs().minCoeff()::epsilon()); Translation3 tv3(v3); Transform3 t5(tv3); t4 = tv3; @@ -414,14 +424,16 @@ template void transformations() Scalar angle = internal::random(-100,100); Rotation2D rot2(angle); VERIFY( rot2.smallestPositiveAngle() >= 0 ); - VERIFY( rot2.smallestPositiveAngle() < Scalar(2)*Scalar(EIGEN_PI) ); - VERIFY_IS_APPROX( std::cos(rot2.smallestPositiveAngle()), std::cos(rot2.angle()) ); - VERIFY_IS_APPROX( std::sin(rot2.smallestPositiveAngle()), std::sin(rot2.angle()) ); + VERIFY( rot2.smallestPositiveAngle() <= Scalar(2)*Scalar(EIGEN_PI) ); + VERIFY_IS_APPROX( angleToVec(rot2.smallestPositiveAngle()), angleToVec(rot2.angle()) ); VERIFY( rot2.smallestAngle() >= -Scalar(EIGEN_PI) ); VERIFY( rot2.smallestAngle() <= Scalar(EIGEN_PI) ); - VERIFY_IS_APPROX( std::cos(rot2.smallestAngle()), std::cos(rot2.angle()) ); - VERIFY_IS_APPROX( std::sin(rot2.smallestAngle()), std::sin(rot2.angle()) ); + VERIFY_IS_APPROX( angleToVec(rot2.smallestAngle()), angleToVec(rot2.angle()) ); + + Matrix rot2_as_mat(rot2); + Rotation2D rot3(rot2_as_mat); + VERIFY_IS_APPROX( angleToVec(rot2.smallestAngle()), angleToVec(rot3.angle()) ); } s0 = internal::random(-100,100); @@ -437,7 +449,7 @@ template void transformations() VERIFY_IS_APPROX(t20,t21); VERIFY_IS_APPROX(s0, (R0.slerp(0, R1)).angle()); - VERIFY_IS_APPROX(R1.smallestPositiveAngle(), (R0.slerp(1, R1)).smallestPositiveAngle()); + VERIFY_IS_APPROX( angleToVec(R1.smallestPositiveAngle()), angleToVec((R0.slerp(1, R1)).smallestPositiveAngle()) ); VERIFY_IS_APPROX(R0.smallestPositiveAngle(), (R0.slerp(0.5, R0)).smallestPositiveAngle()); if(std::cos(s0)>0) @@ -447,13 +459,14 @@ template void transformations() // Check path length Scalar l = 0; - for(int k=0; k<100; ++k) + int path_steps = 100; + for(int k=0; k::epsilon()*Scalar(path_steps/2))); // check basic features { diff --git a/unsupported/test/incomplete_cholesky.cpp b/test/incomplete_cholesky.cpp similarity index 96% rename from unsupported/test/incomplete_cholesky.cpp rename to test/incomplete_cholesky.cpp index cc2ed698e..435e2839a 100644 --- a/unsupported/test/incomplete_cholesky.cpp +++ b/test/incomplete_cholesky.cpp @@ -18,7 +18,7 @@ template void test_incomplete_cholesky_T() ConjugateGradient > > cg_illt_lower_amd; ConjugateGradient > > cg_illt_lower_nat; ConjugateGradient > > cg_illt_upper_amd; - ConjugateGradient > > cg_illt_upper_nat; + ConjugateGradient > > cg_illt_upper_nat; CALL_SUBTEST( check_sparse_spd_solving(cg_illt_lower_amd) ); diff --git a/test/is_same_dense.cpp b/test/is_same_dense.cpp index 318ba8717..6d7904bac 100644 --- a/test/is_same_dense.cpp +++ b/test/is_same_dense.cpp @@ -11,9 +11,10 @@ void test_is_same_dense() { - MatrixXd m1(10,10); - Ref ref_m1(m1); - Ref const_ref_m1(m1); + typedef Matrix ColMatrixXd; + ColMatrixXd m1(10,10); + Ref ref_m1(m1); + Ref const_ref_m1(m1); VERIFY(is_same_dense(m1,m1)); VERIFY(is_same_dense(m1,ref_m1)); VERIFY(is_same_dense(const_ref_m1,m1)); @@ -22,9 +23,9 @@ void test_is_same_dense() VERIFY(is_same_dense(m1.block(0,0,m1.rows(),m1.cols()),m1)); VERIFY(!is_same_dense(m1.row(0),m1.col(0))); - Ref const_ref_m1_row(m1.row(1)); + Ref const_ref_m1_row(m1.row(1)); VERIFY(!is_same_dense(m1.row(1),const_ref_m1_row)); - Ref const_ref_m1_col(m1.col(1)); + Ref const_ref_m1_col(m1.col(1)); VERIFY(is_same_dense(m1.col(1),const_ref_m1_col)); } diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp index 3c7cdbe41..292f33969 100644 --- a/test/linearstructure.cpp +++ b/test/linearstructure.cpp @@ -108,9 +108,11 @@ void test_linearstructure() CALL_SUBTEST_7( linearStructure(MatrixXi (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_8( linearStructure(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); CALL_SUBTEST_9( linearStructure(ArrayXXf (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_10( linearStructure(ArrayXXcf (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_10( real_complex() ); - CALL_SUBTEST_10( real_complex(10,10) ); + CALL_SUBTEST_11( real_complex() ); + CALL_SUBTEST_11( real_complex(10,10) ); + CALL_SUBTEST_11( real_complex(10,10) ); } #ifdef EIGEN_TEST_PART_4 diff --git a/test/lu.cpp b/test/lu.cpp index b90367438..f14435114 100644 --- a/test/lu.cpp +++ b/test/lu.cpp @@ -92,6 +92,26 @@ template void lu_non_invertible() // test that the code, which does resize(), may be applied to an xpr m2.block(0,0,m2.rows(),m2.cols()) = lu.solve(m3); VERIFY_IS_APPROX(m3, m1*m2); + + // test solve with transposed + m3 = MatrixType::Random(rows,cols2); + m2 = m1.transpose()*m3; + m3 = MatrixType::Random(rows,cols2); + lu.template _solve_impl_transposed(m2, m3); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + m3 = MatrixType::Random(rows,cols2); + m3 = lu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + + // test solve with conjugate transposed + m3 = MatrixType::Random(rows,cols2); + m2 = m1.adjoint()*m3; + m3 = MatrixType::Random(rows,cols2); + lu.template _solve_impl_transposed(m2, m3); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); + m3 = MatrixType::Random(rows,cols2); + m3 = lu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); } template void lu_invertible() @@ -125,6 +145,20 @@ template void lu_invertible() VERIFY_IS_APPROX(m3, m1*m2); VERIFY_IS_APPROX(m2, lu.inverse()*m3); + // test solve with transposed + lu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.transpose()*m2); + m3 = MatrixType::Random(size,size); + m3 = lu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + + // test solve with conjugate transposed + lu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.adjoint()*m2); + m3 = MatrixType::Random(size,size); + m3 = lu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); + // Regression test for Bug 302 MatrixType m4 = MatrixType::Random(size,size); VERIFY_IS_APPROX(lu.solve(m3*m4), lu.solve(m3)*m4); @@ -136,14 +170,32 @@ template void lu_partial_piv() PartialPivLU.h */ typedef typename MatrixType::Index Index; - Index rows = internal::random(1,4); - Index cols = rows; + Index size = internal::random(1,4); - MatrixType m1(cols, rows); + MatrixType m1(size, size), m2(size, size), m3(size, size); m1.setRandom(); PartialPivLU plu(m1); VERIFY_IS_APPROX(m1, plu.reconstructedMatrix()); + + m3 = MatrixType::Random(size,size); + m2 = plu.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + VERIFY_IS_APPROX(m2, plu.inverse()*m3); + + // test solve with transposed + plu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.transpose()*m2); + m3 = MatrixType::Random(size,size); + m3 = plu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + + // test solve with conjugate transposed + plu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.adjoint()*m2); + m3 = MatrixType::Random(size,size); + m3 = plu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); } template void lu_verify_assert() diff --git a/test/metis_support.cpp b/test/metis_support.cpp index 932b04074..d87c56a13 100644 --- a/test/metis_support.cpp +++ b/test/metis_support.cpp @@ -3,24 +3,10 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #include "sparse_solver.h" #include #include diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index 71f099bb8..32d9d0be9 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // Copyright (C) 2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -15,9 +15,13 @@ #define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them #endif -// #ifndef EIGEN_DONT_VECTORIZE -// #define EIGEN_DONT_VECTORIZE // SSE intrinsics aren't designed to allow mixing types -// #endif +#if defined(EIGEN_TEST_PART_1) || defined(EIGEN_TEST_PART_2) || defined(EIGEN_TEST_PART_3) + +#ifndef EIGEN_DONT_VECTORIZE +#define EIGEN_DONT_VECTORIZE +#endif + +#endif #include "main.h" @@ -56,10 +60,12 @@ template void mixingtypes(int size = SizeAtCompileType) // this one does not even compile with C++11 VERIFY_RAISES_ASSERT(mf+mcf); #endif - // the following do not even compile since the introduction of evaluators -// VERIFY_RAISES_ASSERT(vf=vd); -// VERIFY_RAISES_ASSERT(vf+=vd); -// VERIFY_RAISES_ASSERT(mcd=md); + +#ifdef EIGEN_DONT_VECTORIZE + VERIFY_RAISES_ASSERT(vf=vd); + VERIFY_RAISES_ASSERT(vf+=vd); + VERIFY_RAISES_ASSERT(mcd=md); +#endif // check scalar products VERIFY_IS_APPROX(vcf * sf , vcf * complex(sf)); @@ -79,6 +85,7 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX(vcd.asDiagonal() * md, vcd.asDiagonal() * md.template cast >()); VERIFY_IS_APPROX(mcf * vf.asDiagonal(), mcf * vf.template cast >().asDiagonal()); VERIFY_IS_APPROX(md * vcd.asDiagonal(), md.template cast >() * vcd.asDiagonal()); + // vd.asDiagonal() * mf; // does not even compile // vcd.asDiagonal() * mf; // does not even compile @@ -148,5 +155,9 @@ void test_mixingtypes() CALL_SUBTEST_1(mixingtypes<3>()); CALL_SUBTEST_2(mixingtypes<4>()); CALL_SUBTEST_3(mixingtypes(internal::random(1,EIGEN_TEST_MAX_SIZE))); + + CALL_SUBTEST_4(mixingtypes<3>()); + CALL_SUBTEST_5(mixingtypes<4>()); + CALL_SUBTEST_6(mixingtypes(internal::random(1,EIGEN_TEST_MAX_SIZE))); } } diff --git a/test/mpl2only.cpp b/test/mpl2only.cpp new file mode 100644 index 000000000..5ef0d2b2e --- /dev/null +++ b/test/mpl2only.cpp @@ -0,0 +1,20 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_MPL2_ONLY +#include +#include +#include +#include +#include + +int main() +{ + return 0; +} diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp index 6e772c70f..2f5025305 100644 --- a/test/nesting_ops.cpp +++ b/test/nesting_ops.cpp @@ -2,14 +2,35 @@ // for linear algebra. // // Copyright (C) 2010 Hauke Heibel +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_ENABLE_TEMPORARY_TRACKING + #include "main.h" -template void run_nesting_ops(const MatrixType& _m) +template +void use_n_times(const XprType &xpr) +{ + typename internal::nested_eval::type mat(xpr); + typename XprType::PlainObject res(mat.rows(), mat.cols()); + nb_temporaries--; // remove res + res.setZero(); + for(int i=0; i +bool verify_eval_type(const XprType &, const ReferenceType&) +{ + typedef typename internal::nested_eval::type EvalType; + return internal::is_same::type, typename internal::remove_all::type>::value; +} + +template void run_nesting_ops_1(const MatrixType& _m) { typename internal::nested_eval::type m(_m); @@ -24,10 +45,63 @@ template void run_nesting_ops(const MatrixType& _m) VERIFY_IS_APPROX( (m.transpose() * m).array().abs().sum(), (m.transpose() * m).array().abs().sum() ); } +template void run_nesting_ops_2(const MatrixType& _m) +{ + typedef typename MatrixType::Scalar Scalar; + Index rows = _m.rows(); + Index cols = _m.cols(); + MatrixType m1 = MatrixType::Random(rows,cols); + Matrix m2; + + if((MatrixType::SizeAtCompileTime==Dynamic)) + { + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1 + m1*m1), 1 ); + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1 + m1*m1), 1 ); + + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1.template triangularView().solve(m1.col(0))), 1 ); + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1.template triangularView().solve(m1.col(0))), 1 ); + + VERIFY_EVALUATION_COUNT( use_n_times<1>(Scalar(2)*m1.template triangularView().solve(m1.col(0))), 2 ); // FIXME could be one by applying the scaling in-place on the solve result + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1.col(0)+m1.template triangularView().solve(m1.col(0))), 2 ); // FIXME could be one by adding m1.col() inplace + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1.col(0)+m1.template triangularView().solve(m1.col(0))), 2 ); + } + + { + VERIFY( verify_eval_type<10>(m1, m1) ); + if(!NumTraits::IsComplex) + { + VERIFY( verify_eval_type<3>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<4>(2*m1, m1) ); + } + else + { + VERIFY( verify_eval_type<1>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<2>(2*m1, m1) ); + } + VERIFY( verify_eval_type<2>(m1+m1, m1+m1) ); + VERIFY( verify_eval_type<3>(m1+m1, m1) ); + VERIFY( verify_eval_type<1>(m1*m1.transpose(), m2) ); + VERIFY( verify_eval_type<1>(m1*(m1+m1).transpose(), m2) ); + VERIFY( verify_eval_type<2>(m1*m1.transpose(), m2) ); + VERIFY( verify_eval_type<1>(m1+m1*m1, m1) ); + + VERIFY( verify_eval_type<1>(m1.template triangularView().solve(m1), m1) ); + VERIFY( verify_eval_type<1>(m1+m1.template triangularView().solve(m1), m1) ); + } +} + + void test_nesting_ops() { - CALL_SUBTEST_1(run_nesting_ops(MatrixXf::Random(25,25))); - CALL_SUBTEST_2(run_nesting_ops(MatrixXd::Random(25,25))); - CALL_SUBTEST_3(run_nesting_ops(Matrix4f::Random())); - CALL_SUBTEST_4(run_nesting_ops(Matrix4d::Random())); + CALL_SUBTEST_1(run_nesting_ops_1(MatrixXf::Random(25,25))); + CALL_SUBTEST_2(run_nesting_ops_1(MatrixXcd::Random(25,25))); + CALL_SUBTEST_3(run_nesting_ops_1(Matrix4f::Random())); + CALL_SUBTEST_4(run_nesting_ops_1(Matrix2d::Random())); + + Index s = internal::random(1,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_1( run_nesting_ops_2(MatrixXf(s,s)) ); + CALL_SUBTEST_2( run_nesting_ops_2(MatrixXcd(s,s)) ); + CALL_SUBTEST_3( run_nesting_ops_2(Matrix4f()) ); + CALL_SUBTEST_4( run_nesting_ops_2(Matrix2d()) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } diff --git a/test/nullary.cpp b/test/nullary.cpp index 2c148e205..4844f2952 100644 --- a/test/nullary.cpp +++ b/test/nullary.cpp @@ -123,6 +123,8 @@ void test_nullary() CALL_SUBTEST_6( testVectorType(Vector3d()) ); CALL_SUBTEST_7( testVectorType(VectorXf(internal::random(1,300))) ); CALL_SUBTEST_8( testVectorType(Vector3f()) ); + CALL_SUBTEST_8( testVectorType(Vector4f()) ); + CALL_SUBTEST_8( testVectorType(Matrix()) ); CALL_SUBTEST_8( testVectorType(Matrix()) ); } } diff --git a/test/packetmath.cpp b/test/packetmath.cpp index b2b1cadc9..e09a361bf 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -18,7 +18,9 @@ template T negate(const T& x) { return -x; } } } -template bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits::Real& refvalue) +// NOTE: we disbale inlining for this function to workaround a GCC issue when using -O3 and the i387 FPU. +template EIGEN_DONT_INLINE +bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits::Real& refvalue) { return internal::isMuchSmallerThan(a-b, refvalue); } @@ -29,7 +31,7 @@ template bool areApproxAbs(const Scalar* a, const Scalar* b, in { if (!isApproxAbs(a[i],b[i],refvalue)) { - std::cout << "[" << Map >(a,size) << "]" << " != " << Map >(b,size) << "\n"; + std::cout << "ref: [" << Map >(a,size) << "]" << " != vec: [" << Map >(b,size) << "]\n"; return false; } } @@ -42,21 +44,13 @@ template bool areApprox(const Scalar* a, const Scalar* b, int s { if (a[i]!=b[i] && !internal::isApprox(a[i],b[i])) { - std::cout << "[" << Map >(a,size) << "]" << " != " << Map >(b,size) << "\n"; + std::cout << "ref: [" << Map >(a,size) << "]" << " != vec: [" << Map >(b,size) << "]\n"; return false; } } return true; } - -#define CHECK_CWISE2(REFOP, POP) { \ - for (int i=0; i(data1), internal::pload(data1+PacketSize))); \ - VERIFY(areApprox(ref, data2, PacketSize) && #POP); \ -} - #define CHECK_CWISE1(REFOP, POP) { \ for (int i=0; i VERIFY(areApprox(ref, data2, PacketSize) && #POP); \ } +#define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \ + packet_helper h; \ + for (int i=0; i template void packetmath() { using std::abs; - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; typedef typename NumTraits::Real RealScalar; const int max_size = PacketSize > 4 ? PacketSize : 4; @@ -153,13 +156,17 @@ template void packetmath() VERIFY(areApprox(ref, data2, PacketSize) && "internal::palign"); } - CHECK_CWISE2(REF_ADD, internal::padd); - CHECK_CWISE2(REF_SUB, internal::psub); - CHECK_CWISE2(REF_MUL, internal::pmul); - #if !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) - if (!internal::is_same::value) - CHECK_CWISE2(REF_DIV, internal::pdiv); - #endif + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasNegate); + VERIFY((internal::is_same::value) || (!PacketTraits::Vectorizable) || PacketTraits::HasDiv); + + CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD, internal::padd); + CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB, internal::psub); + CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul); + CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv); + CHECK_CWISE1(internal::negate, internal::pnegate); CHECK_CWISE1(numext::conj, internal::pconj); @@ -262,7 +269,7 @@ template void packetmath() } } - if (internal::packet_traits::HasBlend) { + if (PacketTraits::HasBlend) { Packet thenPacket = internal::pload(data1); Packet elsePacket = internal::pload(data2); EIGEN_ALIGN_MAX internal::Selector selector; @@ -282,42 +289,47 @@ template void packetmath() template void packetmath_real() { using std::abs; - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; const int size = PacketSize*4; - EIGEN_ALIGN_MAX Scalar data1[internal::packet_traits::size*4]; - EIGEN_ALIGN_MAX Scalar data2[internal::packet_traits::size*4]; - EIGEN_ALIGN_MAX Scalar ref[internal::packet_traits::size*4]; + EIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; for (int i=0; i(-1,1) * std::pow(Scalar(10), internal::random(-3,3)); data2[i] = internal::random(-1,1) * std::pow(Scalar(10), internal::random(-3,3)); } - CHECK_CWISE1_IF(internal::packet_traits::HasSin, std::sin, internal::psin); - CHECK_CWISE1_IF(internal::packet_traits::HasCos, std::cos, internal::pcos); - CHECK_CWISE1_IF(internal::packet_traits::HasTan, std::tan, internal::ptan); + CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin); + CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos); + CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan); + + CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround); + CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil); + CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor); for (int i=0; i(-1,1); data2[i] = internal::random(-1,1); } - CHECK_CWISE1_IF(internal::packet_traits::HasASin, std::asin, internal::pasin); - CHECK_CWISE1_IF(internal::packet_traits::HasACos, std::acos, internal::pacos); + CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin); + CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos); for (int i=0; i(-87,88); data2[i] = internal::random(-87,88); } - CHECK_CWISE1_IF(internal::packet_traits::HasExp, std::exp, internal::pexp); - if(internal::packet_traits::HasExp && internal::packet_traits::size>=2) + CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp); + if(PacketTraits::HasExp && PacketTraits::size>=2) { data1[0] = std::numeric_limits::quiet_NaN(); data1[1] = std::numeric_limits::epsilon(); - packet_helper::HasExp,Packet> h; + packet_helper h; h.store(data2, internal::pexp(h.load(data1))); VERIFY((numext::isnan)(data2[0])); VERIFY_IS_EQUAL(std::exp(std::numeric_limits::epsilon()), data2[1]); @@ -326,7 +338,7 @@ template void packetmath_real() data1[1] = 0; h.store(data2, internal::pexp(h.load(data1))); VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::epsilon()), data2[0]); - VERIFY_IS_EQUAL(std::exp(0), data2[1]); + VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]); data1[0] = (std::numeric_limits::min)(); data1[1] = -(std::numeric_limits::min)(); @@ -341,20 +353,48 @@ template void packetmath_real() VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::denorm_min()), data2[1]); } +#ifdef EIGEN_HAS_C99_MATH + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasLGamma,Packet> h; + h.store(data2, internal::plgamma(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasErf,Packet> h; + h.store(data2, internal::perf(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasErfc,Packet> h; + h.store(data2, internal::perfc(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } +#endif // EIGEN_HAS_C99_MATH + for (int i=0; i(0,1) * std::pow(Scalar(10), internal::random(-6,6)); data2[i] = internal::random(0,1) * std::pow(Scalar(10), internal::random(-6,6)); } + if(internal::random(0,1)<0.1) data1[internal::random(0, PacketSize)] = 0; - CHECK_CWISE1_IF(internal::packet_traits::HasSqrt, std::sqrt, internal::psqrt); - CHECK_CWISE1_IF(internal::packet_traits::HasLog, std::log, internal::plog); - if(internal::packet_traits::HasLog && internal::packet_traits::size>=2) + CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); + CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); +#if defined(EIGEN_HAS_C99_MATH) && (__cplusplus > 199711L) + CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); + CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); + CHECK_CWISE1_IF(internal::packet_traits::HasErfc, std::erfc, internal::perfc); +#endif + + if(PacketTraits::HasLog && PacketTraits::size>=2) { data1[0] = std::numeric_limits::quiet_NaN(); data1[1] = std::numeric_limits::epsilon(); - packet_helper::HasLog,Packet> h; + packet_helper h; h.store(data2, internal::plog(h.load(data1))); VERIFY((numext::isnan)(data2[0])); VERIFY_IS_EQUAL(std::log(std::numeric_limits::epsilon()), data2[1]); @@ -363,7 +403,7 @@ template void packetmath_real() data1[1] = 0; h.store(data2, internal::plog(h.load(data1))); VERIFY((numext::isnan)(data2[0])); - VERIFY_IS_EQUAL(std::log(0), data2[1]); + VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]); data1[0] = (std::numeric_limits::min)(); data1[1] = -(std::numeric_limits::min)(); @@ -391,22 +431,26 @@ template void packetmath_real() template void packetmath_notcomplex() { using std::abs; - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; - EIGEN_ALIGN_MAX Scalar data1[internal::packet_traits::size*4]; - EIGEN_ALIGN_MAX Scalar data2[internal::packet_traits::size*4]; - EIGEN_ALIGN_MAX Scalar ref[internal::packet_traits::size*4]; + EIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; - Array::Map(data1, internal::packet_traits::size*4).setRandom(); + Array::Map(data1, PacketTraits::size*4).setRandom(); ref[0] = data1[0]; for (int i=0; i(data1))) && "internal::predux_min"); - CHECK_CWISE2((std::min), internal::pmin); - CHECK_CWISE2((std::max), internal::pmax); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax); + + CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin); + CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax); CHECK_CWISE1(abs, internal::pabs); ref[0] = data1[0]; @@ -422,8 +466,9 @@ template void packetmath_notcomplex() template void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval) { - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; internal::conj_if cj0; internal::conj_if cj1; @@ -450,8 +495,9 @@ template void test_conj_helper(Scalar template void packetmath_complex() { - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; const int size = PacketSize*4; EIGEN_ALIGN_MAX Scalar data1[PacketSize*4]; @@ -478,10 +524,12 @@ template void packetmath_complex() } } -template void packetmath_scatter_gather() { - typedef typename internal::packet_traits::type Packet; +template void packetmath_scatter_gather() +{ + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; typedef typename NumTraits::Real RealScalar; - const int PacketSize = internal::packet_traits::size; + const int PacketSize = PacketTraits::size; EIGEN_ALIGN_MAX Scalar data1[PacketSize]; RealScalar refvalue = 0; for (int i=0; i void product(const MatrixType& m) vcres.noalias() -= m1.transpose() * v1; VERIFY_IS_APPROX(vcres, vc2 - m1.transpose() * v1); + // test d ?= a+b*c rules + res.noalias() = square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + res.noalias() += square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, 2*(square + m1 * m2.transpose())); + res.noalias() -= square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + + tm1 = m1; VERIFY_IS_APPROX(tm1.transpose() * v1, m1.transpose() * v1); VERIFY_IS_APPROX(v1.transpose() * tm1, v1.transpose() * m1); diff --git a/test/product_large.cpp b/test/product_large.cpp index 84c489580..7207973c2 100644 --- a/test/product_large.cpp +++ b/test/product_large.cpp @@ -61,6 +61,17 @@ void test_product_large() MatrixXf r2 = mat1.row(2)*mat2; VERIFY_IS_APPROX(r2, (mat1.row(2)*mat2).eval()); } + + { + Eigen::MatrixXd A(10,10), B, C; + A.setRandom(); + C = A; + for(int k=0; k<79; ++k) + C = C * A; + B.noalias() = (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))) + * (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))); + VERIFY_IS_APPROX(B,C); + } #endif // Regression test for bug 714: diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp index 9fa69d901..ff93cb881 100644 --- a/test/product_notemporary.cpp +++ b/test/product_notemporary.cpp @@ -47,6 +47,10 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * (m1 * m2.transpose()), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() += m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * m2.adjoint(), 0); VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * (m1*s3+m2*s2).adjoint(), 1); VERIFY_EVALUATION_COUNT( m3.noalias() = (s1 * m1).adjoint() * s2 * m2, 0); @@ -62,7 +66,7 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( m3.noalias() -= (s1 * m1).template triangularView() * m2, 0); VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView() * (m2+m2), 1); VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView() * m2.adjoint(), 0); - + VERIFY_EVALUATION_COUNT( m3.template triangularView() = (m1 * m2.adjoint()), 0); VERIFY_EVALUATION_COUNT( m3.template triangularView() -= (m1 * m2.adjoint()), 0); @@ -107,6 +111,22 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.col(0), 0 ); VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * rv1.adjoint(), 0 ); VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.row(0).transpose(), 0 ); + + VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * cv1, 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * cv1, 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * (m1*cv1), 1 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * (m1*cv1), 1 ); + + // Check outer products + m3 = cv1 * rv1; + VERIFY_EVALUATION_COUNT( m3.noalias() = cv1 * rv1, 0 ); + VERIFY_EVALUATION_COUNT( m3.noalias() = (cv1+cv1) * (rv1+rv1), 1 ); + VERIFY_EVALUATION_COUNT( m3.noalias() = (m1*cv1) * (rv1), 1 ); + VERIFY_EVALUATION_COUNT( m3.noalias() += (m1*cv1) * (rv1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (cv1) * (rv1 * m1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() -= (cv1) * (rv1 * m1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (m1*cv1) * (rv1 * m1), 2 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() += (m1*cv1) * (rv1 * m1), 2 ); } void test_product_notemporary() diff --git a/test/product_small.cpp b/test/product_small.cpp index 091955a0f..c35db6f65 100644 --- a/test/product_small.cpp +++ b/test/product_small.cpp @@ -29,6 +29,153 @@ void product1x1() matAdynamic.cwiseProduct(matBdynamic.transpose()).sum() ); } +template +const TC& ref_prod(TC &C, const TA &A, const TB &B) +{ + for(Index i=0;i +typename internal::enable_if::type +test_lazy_single(int rows, int cols, int depth) +{ + Matrix A(rows,depth); A.setRandom(); + Matrix B(depth,cols); B.setRandom(); + Matrix C(rows,cols); C.setRandom(); + Matrix D(C); + VERIFY_IS_APPROX(C+=A.lazyProduct(B), ref_prod(D,A,B)); +} + +template +typename internal::enable_if< ( (Rows ==1&&Depth!=1&&OA==ColMajor) + || (Depth==1&&Rows !=1&&OA==RowMajor) + || (Cols ==1&&Depth!=1&&OB==RowMajor) + || (Depth==1&&Cols !=1&&OB==ColMajor) + || (Rows ==1&&Cols !=1&&OC==ColMajor) + || (Cols ==1&&Rows !=1&&OC==RowMajor)),void>::type +test_lazy_single(int, int, int) +{ +} + +template +void test_lazy_all_layout(int rows=Rows, int cols=Cols, int depth=Depth) +{ + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); +} + +template +void test_lazy_l1() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + + // Inner + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(1,1,depth) )); + + // Outer + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(4,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(7,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,cols) )); +} + +template +void test_lazy_l2() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + + // mat-vec + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(4,1,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,1,depth) )); + + // vec-mat + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(1,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(1,4,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(1,cols,depth) )); +} + +template +void test_lazy_l3() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + // mat-mat + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(4,3,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,6,depth) )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(8,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(3,4,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(4,cols,depth) )); +} void test_product_small() { @@ -39,6 +186,22 @@ void test_product_small() CALL_SUBTEST_4( product(Matrix4d()) ); CALL_SUBTEST_5( product(Matrix4f()) ); CALL_SUBTEST_6( product1x1() ); + + CALL_SUBTEST_11( test_lazy_l1() ); + CALL_SUBTEST_12( test_lazy_l2() ); + CALL_SUBTEST_13( test_lazy_l3() ); + + CALL_SUBTEST_21( test_lazy_l1() ); + CALL_SUBTEST_22( test_lazy_l2() ); + CALL_SUBTEST_23( test_lazy_l3() ); + + CALL_SUBTEST_31( test_lazy_l1 >() ); + CALL_SUBTEST_32( test_lazy_l2 >() ); + CALL_SUBTEST_33( test_lazy_l3 >() ); + + CALL_SUBTEST_41( test_lazy_l1 >() ); + CALL_SUBTEST_42( test_lazy_l2 >() ); + CALL_SUBTEST_43( test_lazy_l3 >() ); } #ifdef EIGEN_TEST_PART_6 @@ -56,5 +219,16 @@ void test_product_small() VERIFY_IS_APPROX(B * A.inverse(), B * A.inverse()[0]); VERIFY_IS_APPROX(A.inverse() * C, A.inverse()[0] * C); } + + { + Eigen::Matrix A, B, C; + A.setRandom(); + C = A; + for(int k=0; k<79; ++k) + C = C * A; + B.noalias() = (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))) + * (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))); + VERIFY_IS_APPROX(B,C); + } #endif } diff --git a/test/product_trmm.cpp b/test/product_trmm.cpp index d715b9a36..12e554410 100644 --- a/test/product_trmm.cpp +++ b/test/product_trmm.cpp @@ -9,10 +9,18 @@ #include "main.h" +template +int get_random_size() +{ + const int factor = NumTraits::ReadCost; + const int max_test_size = EIGEN_TEST_MAX_SIZE>2*factor ? EIGEN_TEST_MAX_SIZE/factor : EIGEN_TEST_MAX_SIZE; + return internal::random(1,max_test_size); +} + template -void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), - int cols=internal::random(1,EIGEN_TEST_MAX_SIZE), - int otherCols = OtherCols==Dynamic?internal::random(1,EIGEN_TEST_MAX_SIZE):OtherCols) +void trmm(int rows=get_random_size(), + int cols=get_random_size(), + int otherCols = OtherCols==Dynamic?get_random_size():OtherCols) { typedef Matrix TriMatrix; typedef Matrix OnTheRight; @@ -42,13 +50,13 @@ void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), VERIFY_IS_APPROX( ge_xs.noalias() = mat.template triangularView() * ge_right, tri * ge_right); VERIFY_IS_APPROX( ge_sx.noalias() = ge_left * mat.template triangularView(), ge_left * tri); - + VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView() * (s2*ge_left.transpose()), s1*triTr.conjugate() * (s2*ge_left.transpose())); VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.transpose() * mat.adjoint().template triangularView(), ge_right.transpose() * triTr.conjugate()); VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView() * (s2*ge_left.adjoint()), s1*triTr.conjugate() * (s2*ge_left.adjoint())); VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.adjoint() * mat.adjoint().template triangularView(), ge_right.adjoint() * triTr.conjugate()); - + ge_xs_save = ge_xs; VERIFY_IS_APPROX( (ge_xs_save + s1*triTr.conjugate() * (s2*ge_left.adjoint())).eval(), ge_xs.noalias() += (s1*mat.adjoint()).template triangularView() * (s2*ge_left.adjoint()) ); ge_sx.setRandom(); @@ -61,13 +69,13 @@ void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), } template -void trmv(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), int cols=internal::random(1,EIGEN_TEST_MAX_SIZE)) +void trmv(int rows=get_random_size(), int cols=get_random_size()) { trmm(rows,cols,1); } template -void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), int cols=internal::random(1,EIGEN_TEST_MAX_SIZE), int otherCols = internal::random(1,EIGEN_TEST_MAX_SIZE)) +void trmm(int rows=get_random_size(), int cols=get_random_size(), int otherCols = get_random_size()) { trmm(rows,cols,otherCols); } diff --git a/test/rand.cpp b/test/rand.cpp index 7c8068a3b..6790acf15 100644 --- a/test/rand.cpp +++ b/test/rand.cpp @@ -35,8 +35,8 @@ template void check_all_in_range(Scalar x, Scalar y) void test_rand() { long long_ref = NumTraits::highest()/10; - char char_offset = (std::min)(g_repeat,64); - char short_offset = (std::min)(g_repeat,16000); + signed char char_offset = (std::min)(g_repeat,64); + signed char short_offset = (std::min)(g_repeat,16000); for(int i = 0; i < g_repeat*10; i++) { CALL_SUBTEST(check_in_range(10,11)); @@ -57,13 +57,13 @@ void test_rand() CALL_SUBTEST(check_in_range(-long_ref,long_ref)); } - CALL_SUBTEST(check_all_in_range(11,11)); - CALL_SUBTEST(check_all_in_range(11,11+char_offset)); - CALL_SUBTEST(check_all_in_range(-5,5)); - CALL_SUBTEST(check_all_in_range(-11-char_offset,-11)); - CALL_SUBTEST(check_all_in_range(-126,-126+char_offset)); - CALL_SUBTEST(check_all_in_range(126-char_offset,126)); - CALL_SUBTEST(check_all_in_range(-126,126)); + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+char_offset)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-char_offset,-11)); + CALL_SUBTEST(check_all_in_range(-126,-126+char_offset)); + CALL_SUBTEST(check_all_in_range(126-char_offset,126)); + CALL_SUBTEST(check_all_in_range(-126,126)); CALL_SUBTEST(check_all_in_range(11,11)); CALL_SUBTEST(check_all_in_range(11,11+short_offset)); diff --git a/test/redux.cpp b/test/redux.cpp index 0d176e500..6ddc59c18 100644 --- a/test/redux.cpp +++ b/test/redux.cpp @@ -2,11 +2,14 @@ // for linear algebra. // // Copyright (C) 2008 Benoit Jacob +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_ENABLE_TEMPORARY_TRACKING + #include "main.h" template void matrixRedux(const MatrixType& m) @@ -21,7 +24,7 @@ template void matrixRedux(const MatrixType& m) MatrixType m1 = MatrixType::Random(rows, cols); // The entries of m1 are uniformly distributed in [0,1], so m1.prod() is very small. This may lead to test - // failures if we underflow into denormals. Thus, we scale so that entires are close to 1. + // failures if we underflow into denormals. Thus, we scale so that entries are close to 1. MatrixType m1_for_prod = MatrixType::Ones(rows, cols) + RealScalar(0.2) * m1; VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1)); @@ -53,10 +56,24 @@ template void matrixRedux(const MatrixType& m) VERIFY_IS_APPROX(m1_for_prod.block(r0,c0,r1,c1).prod(), m1_for_prod.block(r0,c0,r1,c1).eval().prod()); VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().minCoeff(), m1.block(r0,c0,r1,c1).real().eval().minCoeff()); VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().maxCoeff(), m1.block(r0,c0,r1,c1).real().eval().maxCoeff()); + + // regression for bug 1090 + const int R1 = MatrixType::RowsAtCompileTime>=2 ? MatrixType::RowsAtCompileTime/2 : 6; + const int C1 = MatrixType::ColsAtCompileTime>=2 ? MatrixType::ColsAtCompileTime/2 : 6; + if(R1<=rows-r0 && C1<=cols-c0) + { + VERIFY_IS_APPROX( (m1.template block(r0,c0).sum()), m1.block(r0,c0,R1,C1).sum() ); + } // test empty objects VERIFY_IS_APPROX(m1.block(r0,c0,0,0).sum(), Scalar(0)); VERIFY_IS_APPROX(m1.block(r0,c0,0,0).prod(), Scalar(1)); + + // test nesting complex expression + VERIFY_EVALUATION_COUNT( (m1.matrix()*m1.matrix().transpose()).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); + Matrix m2(rows,rows); + m2.setRandom(); + VERIFY_EVALUATION_COUNT( ((m1.matrix()*m1.matrix().transpose())+m2).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); } template void vectorRedux(const VectorType& w) diff --git a/test/ref.cpp b/test/ref.cpp index 1341dfef7..769db0414 100644 --- a/test/ref.cpp +++ b/test/ref.cpp @@ -18,6 +18,18 @@ // test Ref.h +// Deal with i387 extended precision +#if EIGEN_ARCH_i386 && !(EIGEN_ARCH_x86_64) + +#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(4,4) +#pragma GCC optimize ("-ffloat-store") +#else +#undef VERIFY_IS_EQUAL +#define VERIFY_IS_EQUAL(X,Y) VERIFY_IS_APPROX(X,Y) +#endif + +#endif + template void ref_matrix(const MatrixType& m) { typedef typename MatrixType::Index Index; @@ -55,7 +67,6 @@ template void ref_matrix(const MatrixType& m) rm2 = m2.block(i,j,brows,bcols); VERIFY_IS_EQUAL(m1, m2); - ConstRefDynMat rm3 = m1.block(i,j,brows,bcols); m1.block(i,j,brows,bcols) *= 2; m2.block(i,j,brows,bcols) *= 2; diff --git a/test/schur_complex.cpp b/test/schur_complex.cpp index 5e869790f..deb78e44e 100644 --- a/test/schur_complex.cpp +++ b/test/schur_complex.cpp @@ -25,7 +25,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim ComplexMatrixType T = schurOfA.matrixT(); for(int row = 1; row < size; ++row) { for(int col = 0; col < row; ++col) { - VERIFY(T(row,col) == (typename MatrixType::Scalar)0); + VERIFY(T(row,col) == (typename MatrixType::Scalar)0); } } VERIFY_IS_APPROX(A.template cast(), U * T * U.adjoint()); @@ -70,7 +70,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim VERIFY_IS_EQUAL(cs1.matrixT(), csOnlyT.matrixT()); VERIFY_RAISES_ASSERT(csOnlyT.matrixU()); - if (size > 1) + if (size > 1 && size < 20) { // Test matrix with NaN A(0,0) = std::numeric_limits::quiet_NaN(); diff --git a/test/schur_real.cpp b/test/schur_real.cpp index 36b9c24d1..cfe4570d4 100644 --- a/test/schur_real.cpp +++ b/test/schur_real.cpp @@ -91,7 +91,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim VERIFY_IS_EQUAL(rs1.matrixT(), rsOnlyT.matrixT()); VERIFY_RAISES_ASSERT(rsOnlyT.matrixU()); - if (size > 2) + if (size > 2 && size < 20) { // Test matrix with NaN A(0,0) = std::numeric_limits::quiet_NaN(); diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 492b3a4f2..d803e7dae 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -188,6 +188,8 @@ template void sparse_basic(const SparseMatrixType& re refM4.setRandom(); // sparse cwise* dense VERIFY_IS_APPROX(m3.cwiseProduct(refM4), refM3.cwiseProduct(refM4)); + // dense cwise* sparse + VERIFY_IS_APPROX(refM4.cwiseProduct(m3), refM4.cwiseProduct(refM3)); // VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4); // test aliasing @@ -219,10 +221,10 @@ template void sparse_basic(const SparseMatrixType& re refM2.setZero(); int countFalseNonZero = 0; int countTrueNonZero = 0; - for (Index j=0; j(0,1); if (x<0.1) @@ -232,22 +234,21 @@ template void sparse_basic(const SparseMatrixType& re else if (x<0.5) { countFalseNonZero++; - m2.insertBackByOuterInner(j,i) = Scalar(0); + m2.insert(i,j) = Scalar(0); } else { countTrueNonZero++; - m2.insertBackByOuterInner(j,i) = Scalar(1); - if(SparseMatrixType::IsRowMajor) - refM2(j,i) = Scalar(1); - else - refM2(i,j) = Scalar(1); + m2.insert(i,j) = Scalar(1); + refM2(i,j) = Scalar(1); } } } - m2.finalize(); + if(internal::random()) + m2.makeCompressed(); VERIFY(countFalseNonZero+countTrueNonZero == m2.nonZeros()); - VERIFY_IS_APPROX(m2, refM2); + if(countTrueNonZero>0) + VERIFY_IS_APPROX(m2, refM2); m2.prune(Scalar(1)); VERIFY(countTrueNonZero==m2.nonZeros()); VERIFY_IS_APPROX(m2, refM2); @@ -259,19 +260,33 @@ template void sparse_basic(const SparseMatrixType& re std::vector triplets; Index ntriplets = rows*cols; triplets.reserve(ntriplets); - DenseMatrix refMat(rows,cols); - refMat.setZero(); + DenseMatrix refMat_sum = DenseMatrix::Zero(rows,cols); + DenseMatrix refMat_prod = DenseMatrix::Zero(rows,cols); + DenseMatrix refMat_last = DenseMatrix::Zero(rows,cols); + for(Index i=0;i(0,StorageIndex(rows-1)); StorageIndex c = internal::random(0,StorageIndex(cols-1)); Scalar v = internal::random(); triplets.push_back(TripletType(r,c,v)); - refMat(r,c) += v; + refMat_sum(r,c) += v; + if(std::abs(refMat_prod(r,c))==0) + refMat_prod(r,c) = v; + else + refMat_prod(r,c) *= v; + refMat_last(r,c) = v; } SparseMatrixType m(rows,cols); m.setFromTriplets(triplets.begin(), triplets.end()); - VERIFY_IS_APPROX(m, refMat); + VERIFY_IS_APPROX(m, refMat_sum); + + m.setFromTriplets(triplets.begin(), triplets.end(), std::multiplies()); + VERIFY_IS_APPROX(m, refMat_prod); +#if (defined(__cplusplus) && __cplusplus >= 201103L) + m.setFromTriplets(triplets.begin(), triplets.end(), [] (Scalar,Scalar b) { return b; }); + VERIFY_IS_APPROX(m, refMat_last); +#endif } // test Map @@ -325,6 +340,10 @@ template void sparse_basic(const SparseMatrixType& re refMat3 = refMat2.template triangularView(); m3 = m2.template triangularView(); VERIFY_IS_APPROX(m3, refMat3); + + // check sparse-traingular to dense + refMat3 = m2.template triangularView(); + VERIFY_IS_APPROX(refMat3, DenseMatrix(refMat2.template triangularView())); } // test selfadjointView @@ -421,6 +440,20 @@ template void sparse_basic(const SparseMatrixType& re SparseMatrixType m1(rows, rows); m1.setIdentity(); VERIFY_IS_APPROX(m1, refMat1); + for(int k=0; k(0,rows-1); + Index j = internal::random(0,rows-1); + Scalar v = internal::random(); + m1.coeffRef(i,j) = v; + refMat1.coeffRef(i,j) = v; + VERIFY_IS_APPROX(m1, refMat1); + if(internal::random(0,10)<2) + m1.makeCompressed(); + } + m1.setIdentity(); + refMat1.setIdentity(); + VERIFY_IS_APPROX(m1, refMat1); } } @@ -480,4 +513,19 @@ void test_sparse_basic() // Regression test for bug 900: (manually insert higher values here, if you have enough RAM): CALL_SUBTEST_3((big_sparse_triplet >(10000, 10000, 0.125))); CALL_SUBTEST_4((big_sparse_triplet >(10000, 10000, 0.125))); + + // Regression test for bug 1105 +#ifdef EIGEN_TEST_PART_6 + { + int n = Eigen::internal::random(200,600); + SparseMatrix,0, long> mat(n, n); + std::complex val; + + for(int i=0; i +// Copyright (C) 2011-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +static long int nb_transposed_copies; +#define EIGEN_SPARSE_TRANSPOSED_COPY_PLUGIN {nb_transposed_copies++;} +#define VERIFY_TRANSPOSITION_COUNT(XPR,N) {\ + nb_transposed_copies = 0; \ + XPR; \ + if(nb_transposed_copies!=N) std::cerr << "nb_transposed_copies == " << nb_transposed_copies << "\n"; \ + VERIFY( (#XPR) && nb_transposed_copies==N ); \ + } + #include "sparse.h" +template +bool is_sorted(const T& mat) { + for(Index k = 0; k=it.index()) + return false; + prev = it.index(); + } + } + return true; +} + +template +typename internal::nested_eval::type eval(const T &xpr) +{ + VERIFY( int(internal::nested_eval::type::Flags&RowMajorBit) == int(internal::evaluator::Flags&RowMajorBit) ); + return xpr; +} + template void sparse_permutations(const SparseMatrixType& ref) { const Index rows = ref.rows(); @@ -18,6 +50,8 @@ template void sparse_permutations(c typedef SparseMatrix OtherSparseMatrixType; typedef Matrix DenseMatrix; typedef Matrix VectorI; +// bool IsRowMajor1 = SparseMatrixType::IsRowMajor; +// bool IsRowMajor2 = OtherSparseMatrixType::IsRowMajor; double density = (std::max)(8./(rows*cols), 0.01); @@ -42,58 +76,69 @@ template void sparse_permutations(c randomPermutationVector(pi, cols); p.indices() = pi; - res = mat*p; + VERIFY( is_sorted( ::eval(mat*p) )); + VERIFY( is_sorted( res = mat*p )); + VERIFY_TRANSPOSITION_COUNT( ::eval(mat*p), 0); + //VERIFY_TRANSPOSITION_COUNT( res = mat*p, IsRowMajor ? 1 : 0 ); res_d = mat_d*p; VERIFY(res.isApprox(res_d) && "mat*p"); - res = p*mat; + VERIFY( is_sorted( ::eval(p*mat) )); + VERIFY( is_sorted( res = p*mat )); + VERIFY_TRANSPOSITION_COUNT( ::eval(p*mat), 0); res_d = p*mat_d; VERIFY(res.isApprox(res_d) && "p*mat"); - res = mat*p.inverse(); + VERIFY( is_sorted( (mat*p).eval() )); + VERIFY( is_sorted( res = mat*p.inverse() )); + VERIFY_TRANSPOSITION_COUNT( ::eval(mat*p.inverse()), 0); res_d = mat*p.inverse(); VERIFY(res.isApprox(res_d) && "mat*inv(p)"); - res = p.inverse()*mat; + VERIFY( is_sorted( (p*mat+p*mat).eval() )); + VERIFY( is_sorted( res = p.inverse()*mat )); + VERIFY_TRANSPOSITION_COUNT( ::eval(p.inverse()*mat), 0); res_d = p.inverse()*mat_d; VERIFY(res.isApprox(res_d) && "inv(p)*mat"); - res = mat.twistedBy(p); + VERIFY( is_sorted( (p * mat * p.inverse()).eval() )); + VERIFY( is_sorted( res = mat.twistedBy(p) )); + VERIFY_TRANSPOSITION_COUNT( ::eval(p * mat * p.inverse()), 0); res_d = (p * mat_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "p*mat*inv(p)"); - res = mat.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p_null) )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint upper to full"); - res = mat.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p_null) )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint lower to full"); - res = up.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = up.template selfadjointView().twistedBy(p_null) )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "upper selfadjoint to full"); - res = lo.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = lo.template selfadjointView().twistedBy(p_null) )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "lower selfadjoint full"); - res = mat.template selfadjointView(); + VERIFY( is_sorted( res = mat.template selfadjointView() )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint upper to full"); - res = mat.template selfadjointView(); + VERIFY( is_sorted( res = mat.template selfadjointView() )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint lower to full"); - res = up.template selfadjointView(); + VERIFY( is_sorted( res = up.template selfadjointView() )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "upper selfadjoint to full"); - res = lo.template selfadjointView(); + VERIFY( is_sorted( res = lo.template selfadjointView() )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "lower selfadjoint full"); @@ -150,19 +195,19 @@ template void sparse_permutations(c VERIFY(res.isApprox(res_d) && "upper selfadjoint twisted to lower"); - res = mat.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p) )); res_d = (p * up_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "full selfadjoint upper twisted to full"); - res = mat.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p) )); res_d = (p * lo_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "full selfadjoint lower twisted to full"); - res = up.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = up.template selfadjointView().twistedBy(p) )); res_d = (p * up_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "upper selfadjoint twisted to full"); - res = lo.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = lo.template selfadjointView().twistedBy(p) )); res_d = (p * lo_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "lower selfadjoint twisted to full"); } @@ -182,4 +227,10 @@ void test_sparse_permutations() CALL_SUBTEST_1(( sparse_permutations_all(s) )); CALL_SUBTEST_2(( sparse_permutations_all >(s) )); } + + VERIFY((internal::is_same,OnTheRight,false,SparseShape>::ReturnType, + internal::nested_eval,PermutationMatrix,AliasFreeProduct>,1>::type>::value)); + + VERIFY((internal::is_same,OnTheLeft,false,SparseShape>::ReturnType, + internal::nested_eval,SparseMatrix,AliasFreeProduct>,1>::type>::value)); } diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp index f1e5b8e4c..7ec5270e8 100644 --- a/test/sparse_product.cpp +++ b/test/sparse_product.cpp @@ -76,6 +76,21 @@ template void sparse_product() VERIFY_IS_APPROX(m4=(m2t.transpose()*m3t.transpose()).pruned(0), refMat4=refMat2t.transpose()*refMat3t.transpose()); VERIFY_IS_APPROX(m4=(m2*m3t.transpose()).pruned(0), refMat4=refMat2*refMat3t.transpose()); + // dense ?= sparse * sparse + VERIFY_IS_APPROX(dm4 =m2*m3, refMat4 =refMat2*refMat3); + VERIFY_IS_APPROX(dm4+=m2*m3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4-=m2*m3, refMat4-=refMat2*refMat3); + VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3, refMat4 =refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3, refMat4+=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4-=m2t.transpose()*m3, refMat4-=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3t.transpose(), refMat4 =refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3t.transpose(), refMat4+=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4-=m2t.transpose()*m3t.transpose(), refMat4-=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4 =m2*m3t.transpose(), refMat4 =refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4+=m2*m3t.transpose(), refMat4+=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4-=m2*m3t.transpose(), refMat4-=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1); + // test aliasing m4 = m2; refMat4 = refMat2; VERIFY_IS_APPROX(m4=m4*m3, refMat4=refMat4*refMat3); diff --git a/test/sparse_ref.cpp b/test/sparse_ref.cpp index d173ee658..f4aefbb48 100644 --- a/test/sparse_ref.cpp +++ b/test/sparse_ref.cpp @@ -26,7 +26,7 @@ inline void on_temporary_creation() { #define VERIFY_EVALUATION_COUNT(XPR,N) {\ nb_temporaries = 0; \ - XPR; \ + CALL_SUBTEST( XPR ); \ if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ VERIFY( (#XPR) && nb_temporaries==N ); \ } @@ -53,10 +53,14 @@ EIGEN_DONT_INLINE void call_ref_3(const Ref, StandardC VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } +template +EIGEN_DONT_INLINE void call_ref_4(Ref > a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + +template +EIGEN_DONT_INLINE void call_ref_5(const Ref >& a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + void call_ref() { -// SparseVector > ca = VectorXcf::Random(10).sparseView(); -// SparseVector a = VectorXf::Random(10).sparseView(); SparseMatrix A = MatrixXf::Random(10,10).sparseView(0.5,1); SparseMatrix B = MatrixXf::Random(10,10).sparseView(0.5,1); SparseMatrix C = MatrixXf::Random(10,10).sparseView(0.5,1); @@ -64,6 +68,9 @@ void call_ref() const SparseMatrix& Ac(A); Block > Ab(A,0,1, 3,3); const Block > Abc(A,0,1,3,3); + SparseVector vc = VectorXf::Random(10).sparseView(0.5,1); + SparseVector vr = VectorXf::Random(10).sparseView(0.5,1); + SparseMatrix AA = A*A; VERIFY_EVALUATION_COUNT( call_ref_1(A, A), 0); @@ -80,8 +87,8 @@ void call_ref() VERIFY_EVALUATION_COUNT( call_ref_3(B, B), 1); VERIFY_EVALUATION_COUNT( call_ref_2(B.transpose(), B.transpose()), 0); VERIFY_EVALUATION_COUNT( call_ref_3(B.transpose(), B.transpose()), 0); - VERIFY_EVALUATION_COUNT( call_ref_2(A*A, A*A), 1); - VERIFY_EVALUATION_COUNT( call_ref_3(A*A, A*A), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(A*A, AA), 1); + VERIFY_EVALUATION_COUNT( call_ref_3(A*A, AA), 1); VERIFY(!C.isCompressed()); VERIFY_EVALUATION_COUNT( call_ref_3(C, C), 1); @@ -103,8 +110,20 @@ void call_ref() VERIFY_EVALUATION_COUNT( call_ref_2(A.middleCols(1,3), A.middleCols(1,3)), 0); VERIFY_EVALUATION_COUNT( call_ref_2(A.col(2), A.col(2)), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vr.transpose(), vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vr, vr.transpose()), 0); VERIFY_EVALUATION_COUNT( call_ref_2(A.block(1,1,3,3), A.block(1,1,3,3)), 1); // should be 0 (allocate starts/nnz only) + + VERIFY_EVALUATION_COUNT( call_ref_4(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_4(vr, vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(vr, vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_4(A.col(2), A.col(2)), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(A.col(2), A.col(2)), 0); + // VERIFY_EVALUATION_COUNT( call_ref_4(A.row(2), A.row(2).transpose()), 1); // does not compile on purpose + VERIFY_EVALUATION_COUNT( call_ref_5(A.row(2), A.row(2).transpose()), 1); } void test_sparse_ref() @@ -113,5 +132,8 @@ void test_sparse_ref() CALL_SUBTEST_1( check_const_correctness(SparseMatrix()) ); CALL_SUBTEST_1( check_const_correctness(SparseMatrix()) ); CALL_SUBTEST_2( call_ref() ); + + CALL_SUBTEST_3( check_const_correctness(SparseVector()) ); + CALL_SUBTEST_3( check_const_correctness(SparseVector()) ); } } diff --git a/test/sparse_solver.h b/test/sparse_solver.h index a0254ff1c..b67653496 100644 --- a/test/sparse_solver.h +++ b/test/sparse_solver.h @@ -63,32 +63,47 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A, VERIFY(xm.isApprox(refX,test_precision())); } - // test initialization ctor + // if not too large, do some extra check: + if(A.rows()<2000) { - Rhs x(b.rows(), b.cols()); - Solver solver2(A); - VERIFY(solver2.info() == Success); - x = solver2.solve(b); - VERIFY(x.isApprox(refX,test_precision())); - } - - // test dense Block as the result and rhs: - { - DenseRhs x(refX.rows(), refX.cols()); - DenseRhs oldb(db); - x.setZero(); - x.block(0,0,x.rows(),x.cols()) = solver.solve(db.block(0,0,db.rows(),db.cols())); - VERIFY(oldb.isApprox(db) && "sparse solver testing: the rhs should not be modified!"); - VERIFY(x.isApprox(refX,test_precision())); - } - - // test uncompressed inputs - { - Mat A2 = A; - A2.reserve((ArrayXf::Random(A.outerSize())+2).template cast().eval()); - solver.compute(A2); - Rhs x = solver.solve(b); - VERIFY(x.isApprox(refX,test_precision())); + // test initialization ctor + { + Rhs x(b.rows(), b.cols()); + Solver solver2(A); + VERIFY(solver2.info() == Success); + x = solver2.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test dense Block as the result and rhs: + { + DenseRhs x(refX.rows(), refX.cols()); + DenseRhs oldb(db); + x.setZero(); + x.block(0,0,x.rows(),x.cols()) = solver.solve(db.block(0,0,db.rows(),db.cols())); + VERIFY(oldb.isApprox(db) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test uncompressed inputs + { + Mat A2 = A; + A2.reserve((ArrayXf::Random(A.outerSize())+2).template cast().eval()); + solver.compute(A2); + Rhs x = solver.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test expression as input + { + solver.compute(0.5*(A+A)); + Rhs x = solver.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + + Solver solver2(0.5*(A+A)); + Rhs x2 = solver2.solve(b); + VERIFY(x2.isApprox(refX,test_precision())); + } } } diff --git a/test/sparselu.cpp b/test/sparselu.cpp index 78615ff3b..bd000baf1 100644 --- a/test/sparselu.cpp +++ b/test/sparselu.cpp @@ -3,25 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . - +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // SparseLU solve does not accept column major matrices for the destination. // However, as expected, the generic check_sparse_square_solving routines produces row-major diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp index 014cc834b..e2f03ffca 100644 --- a/test/unalignedassert.cpp +++ b/test/unalignedassert.cpp @@ -157,7 +157,9 @@ void unalignedassert() VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); - VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + // Complexes are disabled because the compiler might aggressively vectorize + // the initialization of complex coeffs to 0 before we can check for alignedness + //VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); } for(int b=8; b(b)); if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); if(b<128) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); - if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + //if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); } #endif } diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 6ff38ed11..35fbb9781 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -1,45 +1,22 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#undef EIGEN_DEFAULT_TO_ROW_MAJOR +#endif #define EIGEN_DEBUG_ASSIGN #include "main.h" #include -std::string demangle_traversal(int t) -{ - if(t==DefaultTraversal) return "DefaultTraversal"; - if(t==LinearTraversal) return "LinearTraversal"; - if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal"; - if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal"; - if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal"; - return "?"; -} -std::string demangle_unrolling(int t) -{ - if(t==NoUnrolling) return "NoUnrolling"; - if(t==InnerUnrolling) return "InnerUnrolling"; - if(t==CompleteUnrolling) return "CompleteUnrolling"; - return "?"; -} -std::string demangle_flags(int f) -{ - std::string res; - if(f&RowMajorBit) res += " | RowMajor"; - if(f&PacketAccessBit) res += " | Packet"; - if(f&LinearAccessBit) res += " | Linear"; - if(f&LvalueBit) res += " | Lvalue"; - if(f&DirectAccessBit) res += " | Direct"; - if(f&NestByRefBit) res += " | NestByRef"; - if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; - - return res; -} +using internal::demangle_flags; +using internal::demangle_traversal; +using internal::demangle_unrolling; template bool test_assign(const Dst&, const Src&, int traversal, int unrolling) diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp index 03f50bb5a..87476f95b 100644 --- a/test/vectorwiseop.cpp +++ b/test/vectorwiseop.cpp @@ -2,11 +2,13 @@ // for linear algebra. // // Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_ENABLE_TEMPORARY_TRACKING #define EIGEN_NO_STATIC_ASSERT #include "main.h" @@ -156,16 +158,22 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX(m2, m1.colwise() + colvec); VERIFY_IS_APPROX(m2.col(c), m1.col(c) + colvec); - VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose()); - VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose()); + if(rows>1) + { + VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose()); + } m2 = m1; m2.rowwise() += rowvec; VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec); VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec); - VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose()); - VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose()); + if(cols>1) + { + VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose()); + } // test substraction @@ -174,16 +182,22 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX(m2, m1.colwise() - colvec); VERIFY_IS_APPROX(m2.col(c), m1.col(c) - colvec); - VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose()); - VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose()); + if(rows>1) + { + VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose()); + } m2 = m1; m2.rowwise() -= rowvec; VERIFY_IS_APPROX(m2, m1.rowwise() - rowvec); VERIFY_IS_APPROX(m2.row(r), m1.row(r) - rowvec); - VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose()); - VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose()); + if(cols>1) + { + VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose()); + } // test norm rrres = m1.colwise().norm(); @@ -191,6 +205,11 @@ template void vectorwiseop_matrix(const MatrixType& m) rcres = m1.rowwise().norm(); VERIFY_IS_APPROX(rcres(r), m1.row(r).norm()); + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().sum(), m1.colwise().template lpNorm<1>()); + VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().sum(), m1.rowwise().template lpNorm<1>()); + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().maxCoeff(), m1.colwise().template lpNorm()); + VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().maxCoeff(), m1.rowwise().template lpNorm()); + // test normalized m2 = m1.colwise().normalized(); VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized()); @@ -204,14 +223,27 @@ template void vectorwiseop_matrix(const MatrixType& m) m2 = m1; m2.rowwise().normalize(); VERIFY_IS_APPROX(m2.row(r), m1.row(r).normalized()); + + // test with partial reduction of products + Matrix m1m1 = m1 * m1.transpose(); + VERIFY_IS_APPROX( (m1 * m1.transpose()).colwise().sum(), m1m1.colwise().sum()); + Matrix tmp(rows); + VERIFY_EVALUATION_COUNT( tmp = (m1 * m1.transpose()).colwise().sum(), (MatrixType::RowsAtCompileTime==Dynamic ? 1 : 0)); + + m2 = m1.rowwise() - (m1.colwise().sum()/m1.rows()).eval(); + m1 = m1.rowwise() - (m1.colwise().sum()/m1.rows()); + VERIFY_IS_APPROX( m1, m2 ); + VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/m1.rows()), (MatrixType::RowsAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime!=1 ? 1 : 0) ); } void test_vectorwiseop() { - CALL_SUBTEST_1(vectorwiseop_array(Array22cd())); - CALL_SUBTEST_2(vectorwiseop_array(Array())); - CALL_SUBTEST_3(vectorwiseop_array(ArrayXXf(3, 4))); - CALL_SUBTEST_4(vectorwiseop_matrix(Matrix4cf())); - CALL_SUBTEST_5(vectorwiseop_matrix(Matrix())); - CALL_SUBTEST_6(vectorwiseop_matrix(MatrixXd(7,2))); + CALL_SUBTEST_1( vectorwiseop_array(Array22cd()) ); + CALL_SUBTEST_2( vectorwiseop_array(Array()) ); + CALL_SUBTEST_3( vectorwiseop_array(ArrayXXf(3, 4)) ); + CALL_SUBTEST_4( vectorwiseop_matrix(Matrix4cf()) ); + CALL_SUBTEST_5( vectorwiseop_matrix(Matrix()) ); + CALL_SUBTEST_6( vectorwiseop_matrix(MatrixXd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( vectorwiseop_matrix(VectorXd(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( vectorwiseop_matrix(RowVectorXd(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } diff --git a/test/visitor.cpp b/test/visitor.cpp index 39a5d6b5f..844170ec6 100644 --- a/test/visitor.cpp +++ b/test/visitor.cpp @@ -55,6 +55,11 @@ template void matrixVisitor(const MatrixType& p) VERIFY_IS_APPROX(maxc, eigen_maxc); VERIFY_IS_APPROX(minc, m.minCoeff()); VERIFY_IS_APPROX(maxc, m.maxCoeff()); + + eigen_maxc = (m.adjoint()*m).maxCoeff(&eigen_maxrow,&eigen_maxcol); + eigen_maxc = (m.adjoint()*m).eval().maxCoeff(&maxrow,&maxcol); + VERIFY(maxrow == eigen_maxrow); + VERIFY(maxcol == eigen_maxcol); } template void vectorVisitor(const VectorType& w) diff --git a/unsupported/Eigen/AdolcForward b/unsupported/Eigen/AdolcForward index 2627decd0..15f5f0731 100644 --- a/unsupported/Eigen/AdolcForward +++ b/unsupported/Eigen/AdolcForward @@ -25,7 +25,7 @@ #ifndef NUMBER_DIRECTIONS # define NUMBER_DIRECTIONS 2 #endif -#include +#include // adolc defines some very stupid macros: #if defined(malloc) diff --git a/unsupported/Eigen/CMakeLists.txt b/unsupported/Eigen/CMakeLists.txt index 6faf4585d..6d0cf4f9d 100644 --- a/unsupported/Eigen/CMakeLists.txt +++ b/unsupported/Eigen/CMakeLists.txt @@ -1,7 +1,24 @@ -set(Eigen_HEADERS AdolcForward BVH IterativeSolvers MatrixFunctions MoreVectorization AutoDiff AlignedVector3 Polynomials - FFT NonLinearOptimization SparseExtra IterativeSolvers - NumericalDiff Skyline MPRealSupport OpenGLSupport KroneckerProduct Splines LevenbergMarquardt - ) +set(Eigen_HEADERS + AdolcForward + AlignedVector3 + ArpackSupport + AutoDiff + BVH + FFT + IterativeSolvers + KroneckerProduct + LevenbergMarquardt + MatrixFunctions + MoreVectorization + MPRealSupport + NonLinearOptimization + NumericalDiff + OpenGLSupport + Polynomials + Skyline + SparseExtra + Splines + ) install(FILES ${Eigen_HEADERS} diff --git a/unsupported/Eigen/CXX11/Core b/unsupported/Eigen/CXX11/Core index 292f09564..c8dcf7c16 100644 --- a/unsupported/Eigen/CXX11/Core +++ b/unsupported/Eigen/CXX11/Core @@ -32,11 +32,12 @@ #include +#include "src/Core/util/EmulateArray.h" + // Emulate the cxx11 functionality that we need if the compiler doesn't support it. #if __cplusplus <= 199711L #include "src/Core/util/EmulateCXX11Meta.h" #else -#include #include "src/Core/util/CXX11Workarounds.h" #include "src/Core/util/CXX11Meta.h" #endif diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index cbe416602..7481a9ddb 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -8,8 +8,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_TENSOR_MODULE -#define EIGEN_CXX11_TENSOR_MODULE +//#ifndef EIGEN_CXX11_TENSOR_MODULE +//#define EIGEN_CXX11_TENSOR_MODULE #include "Core" @@ -28,14 +28,22 @@ #include #include + +#ifdef _WIN32 +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else #include +#endif #if __cplusplus > 199711 #include #endif #ifdef _WIN32 -#include +#include #elif defined(__APPLE__) #include #else @@ -57,15 +65,19 @@ #endif +#include "src/Tensor/TensorMacros.h" #include "src/Tensor/TensorForwardDeclarations.h" #include "src/Tensor/TensorMeta.h" -#include "src/Tensor/TensorDeviceType.h" +#include "src/Tensor/TensorDeviceDefault.h" +#include "src/Tensor/TensorDeviceThreadPool.h" +#include "src/Tensor/TensorDeviceCuda.h" #include "src/Tensor/TensorIndexList.h" #include "src/Tensor/TensorDimensionList.h" #include "src/Tensor/TensorDimensions.h" #include "src/Tensor/TensorInitializer.h" #include "src/Tensor/TensorTraits.h" #include "src/Tensor/TensorFunctors.h" +#include "src/Tensor/TensorUInt128.h" #include "src/Tensor/TensorIntDiv.h" #include "src/Tensor/TensorBase.h" @@ -73,6 +85,7 @@ #include "src/Tensor/TensorEvaluator.h" #include "src/Tensor/TensorExpr.h" #include "src/Tensor/TensorReduction.h" +#include "src/Tensor/TensorReductionCuda.h" #include "src/Tensor/TensorArgMax.h" #include "src/Tensor/TensorConcatenation.h" #include "src/Tensor/TensorContraction.h" @@ -80,6 +93,7 @@ #include "src/Tensor/TensorContractionCuda.h" #include "src/Tensor/TensorConversion.h" #include "src/Tensor/TensorConvolution.h" +#include "src/Tensor/TensorFFT.h" #include "src/Tensor/TensorPatch.h" #include "src/Tensor/TensorImagePatch.h" #include "src/Tensor/TensorVolumePatch.h" @@ -111,4 +125,4 @@ #include -#endif // EIGEN_CXX11_TENSOR_MODULE +//#endif // EIGEN_CXX11_TENSOR_MODULE diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h index 3a08628be..3f149c6a3 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h @@ -112,7 +112,7 @@ template struct get<0, type_lis template struct get> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); }; template struct get> : get> {}; -template struct get<0, numeric_list> { constexpr static int value = a; }; +template struct get<0, numeric_list> { constexpr static T value = a; }; template struct get> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); }; /* always get type, regardless of dummy; good for parameter pack expansion */ @@ -252,6 +252,13 @@ template< typename... Ts > struct reduce; +template< + typename Reducer +> struct reduce +{ + constexpr static inline int run() { return Reducer::Identity; } +}; + template< typename Reducer, typename A, @@ -275,8 +282,14 @@ template< /* generic binary operations */ -struct sum_op { template constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; } }; -struct product_op { template constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; } }; +struct sum_op { + template EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; } + static constexpr int Identity = 0; +}; +struct product_op { + template EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; } + static constexpr int Identity = 1; +}; struct logical_and_op { template constexpr static inline auto run(A a, B b) -> decltype(a && b) { return a && b; } }; struct logical_or_op { template constexpr static inline auto run(A a, B b) -> decltype(a || b) { return a || b; } }; @@ -321,11 +334,12 @@ constexpr inline Array h_array_reverse(Array arr, numeric_list) } template -constexpr inline std::array array_reverse(std::array arr) +constexpr inline array array_reverse(array arr) { return h_array_reverse(arr, typename gen_numeric_list::type()); } + /* generic array reductions */ // can't reuse standard reduce() interface above because Intel's Compiler @@ -335,39 +349,48 @@ constexpr inline std::array array_reverse(std::array arr) // an infinite loop) template struct h_array_reduce { - constexpr static inline auto run(std::array arr) -> decltype(Reducer::run(h_array_reduce::run(arr), array_get(arr))) + EIGEN_DEVICE_FUNC constexpr static inline auto run(array arr, T identity) -> decltype(Reducer::run(h_array_reduce::run(arr, identity), array_get(arr))) { - return Reducer::run(h_array_reduce::run(arr), array_get(arr)); + return Reducer::run(h_array_reduce::run(arr, identity), array_get(arr)); } }; template struct h_array_reduce { - constexpr static inline T run(std::array arr) + EIGEN_DEVICE_FUNC constexpr static inline T run(const array& arr, T) { return array_get<0>(arr); } }; -template -constexpr inline auto array_reduce(std::array arr) -> decltype(h_array_reduce::run(arr)) +template +struct h_array_reduce { - return h_array_reduce::run(arr); + EIGEN_DEVICE_FUNC constexpr static inline T run(const array&, T identity) + { + return identity; + } +}; + +template +EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array& arr, T identity) -> decltype(h_array_reduce::run(arr, identity)) +{ + return h_array_reduce::run(arr, identity); } /* standard array reductions */ template -constexpr inline auto array_sum(std::array arr) -> decltype(array_reduce(arr)) +EIGEN_DEVICE_FUNC constexpr inline auto array_sum(const array& arr) -> decltype(array_reduce(arr, static_cast(0))) { - return array_reduce(arr); + return array_reduce(arr, static_cast(0)); } template -constexpr inline auto array_prod(std::array arr) -> decltype(array_reduce(arr)) +EIGEN_DEVICE_FUNC constexpr inline auto array_prod(const array& arr) -> decltype(array_reduce(arr, static_cast(1))) { - return array_reduce(arr); + return array_reduce(arr, static_cast(1)); } template @@ -381,13 +404,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector& a) { /* zip an array */ template -constexpr inline std::array h_array_zip(std::array a, std::array b, numeric_list) +constexpr inline array h_array_zip(array a, array b, numeric_list) { - return std::array{{ Op::run(array_get(a), array_get(b))... }}; + return array{{ Op::run(array_get(a), array_get(b))... }}; } template -constexpr inline std::array array_zip(std::array a, std::array b) +constexpr inline array array_zip(array a, array b) { return h_array_zip(a, b, typename gen_numeric_list::type()); } @@ -395,13 +418,13 @@ constexpr inline std::array array_zip(std::array< /* zip an array and reduce the result */ template -constexpr inline auto h_array_zip_and_reduce(std::array a, std::array b, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(a), array_get(b))...)) +constexpr inline auto h_array_zip_and_reduce(array a, array b, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(a), array_get(b))...)) { return reduce::type...>::run(Op::run(array_get(a), array_get(b))...); } template -constexpr inline auto array_zip_and_reduce(std::array a, std::array b) -> decltype(h_array_zip_and_reduce(a, b, typename gen_numeric_list::type())) +constexpr inline auto array_zip_and_reduce(array a, array b) -> decltype(h_array_zip_and_reduce(a, b, typename gen_numeric_list::type())) { return h_array_zip_and_reduce(a, b, typename gen_numeric_list::type()); } @@ -409,13 +432,13 @@ constexpr inline auto array_zip_and_reduce(std::array a, std::array /* apply stuff to an array */ template -constexpr inline std::array h_array_apply(std::array a, numeric_list) +constexpr inline array h_array_apply(array a, numeric_list) { - return std::array{{ Op::run(array_get(a))... }}; + return array{{ Op::run(array_get(a))... }}; } template -constexpr inline std::array array_apply(std::array a) +constexpr inline array array_apply(array a) { return h_array_apply(a, typename gen_numeric_list::type()); } @@ -423,34 +446,34 @@ constexpr inline std::array array_apply(std::array -constexpr inline auto h_array_apply_and_reduce(std::array arr, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(arr))...)) +constexpr inline auto h_array_apply_and_reduce(array arr, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(arr))...)) { return reduce::type...>::run(Op::run(array_get(arr))...); } template -constexpr inline auto array_apply_and_reduce(std::array a) -> decltype(h_array_apply_and_reduce(a, typename gen_numeric_list::type())) +constexpr inline auto array_apply_and_reduce(array a) -> decltype(h_array_apply_and_reduce(a, typename gen_numeric_list::type())) { return h_array_apply_and_reduce(a, typename gen_numeric_list::type()); } /* repeat a value n times (and make an array out of it * usage: - * std::array = repeat<16>(42); + * array = repeat<16>(42); */ template struct h_repeat { template - constexpr static inline std::array run(t v, numeric_list) + constexpr static inline array run(t v, numeric_list) { return {{ typename id_numeric::type(v)... }}; } }; template -constexpr std::array repeat(t v) { return h_repeat::run(v, typename gen_numeric_list::type()); } +constexpr array repeat(t v) { return h_repeat::run(v, typename gen_numeric_list::type()); } /* instantiate a class by a C-style array */ template diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h index a590cf4e1..b1528aa66 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h @@ -39,46 +39,16 @@ namespace Eigen { -// Use std::array as Eigen array -template using array = std::array; - namespace internal { /* std::get is only constexpr in C++14, not yet in C++11 - * - libstdc++ from version 4.7 onwards has it nevertheless, - * so use that - * - libstdc++ older versions: use _M_instance directly - * - libc++ all versions so far: use __elems_ directly - * - all other libs: use std::get to be portable, but - * this may not be constexpr */ -#if defined(__GLIBCXX__) && __GLIBCXX__ < 20120322 -#define STD_GET_ARR_HACK a._M_instance[I] -#elif defined(_LIBCPP_VERSION) -#define STD_GET_ARR_HACK a.__elems_[I] -#else -#define STD_GET_ARR_HACK std::template get(a) -#endif -template constexpr inline T& array_get(std::array& a) { return (T&) STD_GET_ARR_HACK; } -template constexpr inline T&& array_get(std::array&& a) { return (T&&) STD_GET_ARR_HACK; } -template constexpr inline T const& array_get(std::array const& a) { return (T const&) STD_GET_ARR_HACK; } template constexpr inline T& array_get(std::vector& a) { return a[I]; } template constexpr inline T&& array_get(std::vector&& a) { return a[I]; } template constexpr inline T const& array_get(std::vector const& a) { return a[I]; } -#undef STD_GET_ARR_HACK - -template struct array_size; -template struct array_size > { - static const size_t value = N; -}; -template struct array_size; -template struct array_size > { - static const size_t value = N; -}; - /* Suppose you have a template of the form * template struct X; * And you want to specialize it in such a way: diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h new file mode 100644 index 000000000..ab9c2ec3e --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h @@ -0,0 +1,225 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EMULATE_ARRAY_H +#define EIGEN_EMULATE_ARRAY_H + + + +// The array class is only available starting with cxx11. Emulate our own here +// if needed. +// Moreover, CUDA doesn't support the STL containers, so we use our own instead. +#if __cplusplus <= 199711L || defined(__CUDACC__) || defined(EIGEN_AVOID_STL_ARRAY) + +namespace Eigen { +template class array { + public: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& operator[] (size_t index) { return values[index]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + static std::size_t size() { return n; } + + T values[n]; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array() { } + explicit EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v) { + EIGEN_STATIC_ASSERT(n==1, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2) { + EIGEN_STATIC_ASSERT(n==2, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3) { + EIGEN_STATIC_ASSERT(n==3, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, + const T& v4) { + EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5) { + EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6) { + EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6, const T& v7) { + EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + values[6] = v7; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array( + const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6, const T& v7, const T& v8) { + EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + values[6] = v7; + values[7] = v8; + } + +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(std::initializer_list l) { + eigen_assert(l.size() == n); + internal::smart_copy(l.begin(), l.end(), values); + } +#endif +}; + + +// Specialize array for zero size +template class array { + public: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& operator[] (size_t) { + eigen_assert(false && "Can't index a zero size array"); + return *static_cast(NULL); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& operator[] (size_t) const { + eigen_assert(false && "Can't index a zero size array"); + return *static_cast(NULL); + } + + static EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array() { } + +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES + array(std::initializer_list l) { + eigen_assert(l.size() == 0); + } +#endif +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array& a) { + return a[I]; +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array& a) { + return a[I]; +} + +template struct array_size; +template struct array_size > { + static const size_t value = N; +}; +template struct array_size; +template struct array_size& > { + static const size_t value = N; +}; +template struct array_size; +template struct array_size > { + static const size_t value = N; +}; +template struct array_size; +template struct array_size& > { + static const size_t value = N; +}; + +} // end namespace internal +} // end namespace Eigen + +#else + +// The compiler supports c++11, and we're not targetting cuda: use std::array as Eigen array +#include +namespace Eigen { + +template using array = std::array; + +namespace internal { +/* std::get is only constexpr in C++14, not yet in C++11 + * - libstdc++ from version 4.7 onwards has it nevertheless, + * so use that + * - libstdc++ older versions: use _M_instance directly + * - libc++ all versions so far: use __elems_ directly + * - all other libs: use std::get to be portable, but + * this may not be constexpr + */ +#if defined(__GLIBCXX__) && __GLIBCXX__ < 20120322 +#define STD_GET_ARR_HACK a._M_instance[I] +#elif defined(_LIBCPP_VERSION) +#define STD_GET_ARR_HACK a.__elems_[I] +#else +#define STD_GET_ARR_HACK std::template get(a) +#endif + +template constexpr inline T& array_get(std::array& a) { return (T&) STD_GET_ARR_HACK; } +template constexpr inline T&& array_get(std::array&& a) { return (T&&) STD_GET_ARR_HACK; } +template constexpr inline T const& array_get(std::array const& a) { return (T const&) STD_GET_ARR_HACK; } + +#undef STD_GET_ARR_HACK + +template struct array_size; +template struct array_size > { + static const size_t value = N; +}; +template struct array_size; +template struct array_size > { + static const size_t value = N; +}; +} // end namespace internal +} // end namespace Eigen + +#endif + + + + + +#endif // EIGEN_EMULATE_ARRAY_H diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h index 0ae638fb9..d685d4f9d 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h @@ -14,105 +14,6 @@ namespace Eigen { -// The array class is only available starting with cxx11. Emulate our own here -// if needed -template class array { - public: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& operator[] (size_t index) { return values[index]; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } - - static const std::size_t size() { return n; } - - T values[n]; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array() { } - explicit EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v) { - EIGEN_STATIC_ASSERT(n==1, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2) { - EIGEN_STATIC_ASSERT(n==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3) { - EIGEN_STATIC_ASSERT(n==3, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, - const T& v4) { - EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5) { - EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6) { - EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6, const T& v7) { - EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - values[6] = v7; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array( - const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6, const T& v7, const T& v8) { - EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - values[6] = v7; - values[7] = v8; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - array(std::initializer_list l) { - eigen_assert(l.size() == n); - internal::smart_copy(l.begin(), l.end(), values); - } -#endif -}; - - namespace internal { /** \internal @@ -279,7 +180,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod( return arg_prod::value; } -template +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array& a) { t prod = 1; for (size_t i = 0; i < n; ++i) { prod *= a[i]; } @@ -298,14 +199,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector& a) { return prod; } -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array& a) { - return a[I]; -} -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array& a) { - return a[I]; -} template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(std::vector& a) { @@ -316,23 +209,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const std::vector& a return a[I]; } -template struct array_size; -template struct array_size > { - static const size_t value = N; -}; -template struct array_size; -template struct array_size& > { - static const size_t value = N; -}; -template struct array_size; -template struct array_size > { - static const size_t value = N; -}; -template struct array_size; -template struct array_size& > { - static const size_t value = N; -}; - struct sum_op { template static inline bool run(A a, B b) { return a + b; } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/README.md b/unsupported/Eigen/CXX11/src/Tensor/README.md index 87e57cebb..407485090 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/README.md +++ b/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -1149,6 +1149,19 @@ are the smallest of the reduced values. Reduce a tensor using the prod() operator. The resulting values are the product of the reduced values. +### <Operation> all(const Dimensions& new_dims) +### <Operation> all() +Reduce a tensor using the all() operator. Casts tensor to bool and then checks +whether all elements are true. Runs through all elements rather than +short-circuiting, so may be significantly inefficient. + +### <Operation> any(const Dimensions& new_dims) +### <Operation> any() +Reduce a tensor using the any() operator. Casts tensor to bool and then checks +whether any element is true. Runs through all elements rather than +short-circuiting, so may be significantly inefficient. + + ### <Operation> reduce(const Dimensions& new_dims, const Reducer& reducer) Reduce a tensor using a user-defined reduction operator. See ```SumReducer``` diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 6c16e0faa..ad525bac8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -59,7 +59,7 @@ namespace Eigen { * \ref TopicStorageOrders */ -template +template class Tensor : public TensorBase > { public: @@ -78,16 +78,25 @@ class Tensor : public TensorBase0) & !(Options_&DontAlign), PacketAccess = (internal::packet_traits::size > 1), Layout = Options_ & RowMajor ? RowMajor : ColMajor, - CoordAccess = true, + CoordAccess = true }; static const int Options = Options_; - static const std::size_t NumIndices = NumIndices_; + static const int NumIndices = NumIndices_; typedef DSizes Dimensions; protected: TensorStorage m_storage; +#ifdef EIGEN_HAS_SFINAE + template + struct isOfNormalIndex{ + static const bool is_array = internal::is_base_of, CustomIndices>::value; + static const bool is_int = NumTraits::IsInteger; + static const bool value = is_array | is_int; + }; +#endif + public: // Metadata EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } @@ -113,12 +122,30 @@ class Tensor : public TensorBase& indices) const { eigen_internal_assert(checkIndexRange(indices)); return m_storage.data()[linearizedIndex(indices)]; } + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(CustomIndices& indices) const + { + return coeff(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const { eigen_internal_assert(index >= 0 && index < size()); @@ -135,12 +162,30 @@ class Tensor : public TensorBase& indices) { eigen_internal_assert(checkIndexRange(indices)); return m_storage.data()[linearizedIndex(indices)]; } + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(CustomIndices& indices) + { + return coeffRef(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { eigen_internal_assert(index >= 0 && index < size()); @@ -178,9 +223,20 @@ class Tensor : public TensorBase::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(CustomIndices& indices) const + { + return coeff(internal::customIndices2Array(indices)); + } +#endif + + // normal indices EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const { - eigen_assert(checkIndexRange(indices)); return coeff(indices); } @@ -190,6 +246,12 @@ class Tensor : public TensorBase& indices) { - eigen_assert(checkIndexRange(indices)); return coeffRef(indices); } + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(CustomIndices& indices) + { + return coeffRef(internal::customIndices2Array(indices)); + } +#endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) { eigen_assert(index >= 0 && index < size()); return coeffRef(index); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeffRef(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index) { // The bracket operator is only for vectors, use the parenthesis operator instead @@ -261,41 +340,42 @@ class Tensor : public TensorBase - inline Tensor(Index firstDimension, IndexTypes... otherDimensions) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions) : m_storage(internal::array_prod(array{{firstDimension, otherDimensions...}}), array{{firstDimension, otherDimensions...}}) { // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } #else - inline explicit Tensor(Index dim1) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1) : m_storage(dim1, array(dim1)) { EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - inline explicit Tensor(Index dim1, Index dim2) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2) : m_storage(dim1*dim2, array(dim1, dim2)) { EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - inline explicit Tensor(Index dim1, Index dim2, Index dim3) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2, Index dim3) : m_storage(dim1*dim2*dim3, array(dim1, dim2, dim3)) { EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - inline explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4) : m_storage(dim1*dim2*dim3*dim4, array(dim1, dim2, dim3, dim4)) { EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - inline explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) - : m_storage(dim1*dim2*dim3*dim4*dim5, array(dim1, dim2, dim3, dim4, dim5)) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) + : m_storage(dim1*dim2*dim3*dim4*dim5, array(dim1, dim2, dim3, dim4, dim5)) { EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } #endif - inline explicit Tensor(const array& dimensions) + /** Normal Dimension */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array& dimensions) : m_storage(internal::array_prod(dimensions), dimensions) { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED @@ -341,7 +421,7 @@ class Tensor : public TensorBase EIGEN_DEVICE_FUNC + template EIGEN_DEVICE_FUNC void resize(Index firstDimension, IndexTypes... otherDimensions) { // The number of dimensions used to resize a tensor must be equal to the rank of the tensor. @@ -350,9 +430,10 @@ class Tensor : public TensorBase& dimensions) { - std::size_t i; + int i; Index size = Index(1); for (i = 0; i < NumIndices; i++) { internal::check_rows_cols_for_overflow::run(size, dimensions[i]); @@ -367,20 +448,39 @@ class Tensor : public TensorBase& dimensions) { array dims; - for (std::size_t i = 0; i < NumIndices; ++i) { + for (int i = 0; i < NumIndices; ++i) { dims[i] = dimensions[i]; } resize(dims); } + EIGEN_DEVICE_FUNC + void resize() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + // Nothing to do: rank 0 tensors have fixed size + } + + /** Custom Dimension */ +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(CustomDimension& dimensions) + { + resize(internal::customIndices2Array(dimensions)); + } +#endif + #ifndef EIGEN_EMULATE_CXX11_META_H template EIGEN_DEVICE_FUNC void resize(const Sizes& dimensions) { array dims; - for (std::size_t i = 0; i < NumIndices; ++i) { + for (int i = 0; i < NumIndices; ++i) { dims[i] = static_cast(dimensions[i]); } resize(dims); @@ -390,7 +490,7 @@ class Tensor : public TensorBase& dimensions) { array dims; - for (std::size_t i = 0; i < NumIndices; ++i) { + for (int i = 0; i < NumIndices; ++i) { dims[i] = static_cast(dimensions[i]); } resize(dims); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h index ee3bf7fe3..c783aab97 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -215,10 +215,18 @@ struct TensorEvaluator, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_orig_impl(op.expression(), device), m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), - m_return_dim(op.return_dim()), - m_strides(gen_strides(m_orig_impl.dimensions())), - m_stride_mod(gen_stride_mod(m_orig_impl.dimensions())), - m_stride_div(gen_stride_div()) { } + m_return_dim(op.return_dim()) { + + gen_strides(m_orig_impl.dimensions(), m_strides); + if (Layout == static_cast(ColMajor)) { + const Index total_size = internal::array_prod(m_orig_impl.dimensions()); + m_stride_mod = (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : total_size; + } else { + const Index total_size = internal::array_prod(m_orig_impl.dimensions()); + m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size; + } + m_stride_div = m_strides[m_return_dim]; + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); @@ -240,9 +248,10 @@ struct TensorEvaluator, Devi EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } private: - EIGEN_DEVICE_FUNC StrideDims gen_strides(const InputDimensions& dims) { - StrideDims strides; - if (m_return_dim < 0) return strides; // Won't be using these. + EIGEN_DEVICE_FUNC void gen_strides(const InputDimensions& dims, StrideDims& strides) { + if (m_return_dim < 0) { + return; // Won't be using the strides. + } eigen_assert(m_return_dim < NumDims && "Asking to convert index to a dimension outside of the rank"); @@ -259,28 +268,15 @@ struct TensorEvaluator, Devi strides[i] = strides[i+1] * dims[i+1]; } } - return strides; - } - - EIGEN_DEVICE_FUNC Index gen_stride_mod(const InputDimensions& dims) { - if (Layout == static_cast(ColMajor)) { - return (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : dims.TotalSize(); - } else { - return (m_return_dim > 0) ? m_strides[m_return_dim - 1] : dims.TotalSize(); - } - } - - EIGEN_DEVICE_FUNC Index gen_stride_div() { - return m_strides[m_return_dim]; } protected: TensorEvaluator, Device> m_orig_impl; TensorEvaluator >, Device> m_impl; const int m_return_dim; - const StrideDims m_strides; - const Index m_stride_mod; - const Index m_stride_div; + StrideDims m_strides; + Index m_stride_mod; + Index m_stride_div; }; } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 477e4a174..392acf302 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -86,6 +86,12 @@ class TensorBase return unaryExpr(internal::scalar_sqrt_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sign() const { + return unaryExpr(internal::scalar_sign_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> rsqrt() const { @@ -116,6 +122,24 @@ class TensorBase return unaryExpr(internal::scalar_tanh_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + lgamma() const { + return unaryExpr(internal::scalar_lgamma_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erf() const { + return unaryExpr(internal::scalar_erf_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erfc() const { + return unaryExpr(internal::scalar_erfc_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> sigmoid() const { @@ -155,7 +179,7 @@ class TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> operator- (Scalar rhs) const { - EIGEN_STATIC_ASSERT((std::numeric_limits::is_signed || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumTraits::IsSigned || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return unaryExpr(internal::scalar_sub_op(rhs)); } @@ -168,10 +192,16 @@ class TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> operator/ (Scalar rhs) const { - // EIGEN_STATIC_ASSERT(!std::numeric_limits::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE); return unaryExpr(internal::scalar_quotient1_op(rhs)); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + operator% (Scalar rhs) const { + EIGEN_STATIC_ASSERT(NumTraits::IsInteger, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD); + return unaryExpr(internal::scalar_mod_op(rhs)); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > cwiseMax(Scalar threshold) const { @@ -248,35 +278,67 @@ class TensorBase // Comparisons and tests. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::less()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<=(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::less_equal()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::greater()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>=(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::greater_equal()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator==(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::equal_to()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator!=(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::not_equal_to()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + + // comparisons and tests for Scalars + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator<(Scalar threshold) const { + return operator<(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator<=(Scalar threshold) const { + return operator<=(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator>(Scalar threshold) const { + return operator>(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator>=(Scalar threshold) const { + return operator>=(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator==(Scalar threshold) const { + return operator==(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator!=(Scalar threshold) const { + return operator!=(constant(threshold)); } // Coefficient-wise ternary operators. @@ -302,6 +364,13 @@ class TensorBase return TensorConvolutionOp(derived(), kernel.derived(), dims); } + // Fourier transforms + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorFFTOp + fft(const FFT& fft) const { + return TensorFFTOp(derived(), fft); + } + // Reductions. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReductionOp, const Dims, const Derived> @@ -363,6 +432,32 @@ class TensorBase return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MinReducer()); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp > + all(const Dims& dims) const { + return cast().reduce(dims, internal::AndReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const TensorConversionOp > + all() const { + DimensionList in_dims; + return cast().reduce(in_dims, internal::AndReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp > + any(const Dims& dims) const { + return cast().reduce(dims, internal::OrReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const TensorConversionOp > + any() const { + DimensionList in_dims; + return cast().reduce(in_dims, internal::OrReducer()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorTupleReducerOp< internal::ArgMaxTupleReducer >, @@ -561,8 +656,8 @@ class TensorBase } protected: - template friend class Tensor; - template friend class TensorFixedSize; + template friend class Tensor; + template friend class TensorFixedSize; template friend class TensorBase; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } @@ -578,7 +673,7 @@ class TensorBase : public TensorBase::type PacketReturnType; static const int NumDimensions = DerivedTraits::NumDimensions; - template friend class Tensor; + template friend class Tensor; template friend class TensorFixedSize; template friend class TensorBase; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 24a0df820..dc64959e1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -99,6 +99,10 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device) { + // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar + // and store the result in a scalar. Instead one should reshape the scalar into a a N-D + // tensor with N >= 1 of 1 element first and then broadcast. + EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); const Broadcast& broadcast = op.broadcast(); for (int i = 0; i < NumDims; ++i) { @@ -152,11 +156,11 @@ struct TensorEvaluator, Device> Index inputIndex = 0; for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx < m_impl.dimensions()[i]); inputIndex += idx * m_inputStrides[i]; } else { - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx % m_impl.dimensions()[i] == 0); } else { inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; @@ -164,11 +168,11 @@ struct TensorEvaluator, Device> } index -= idx * m_outputStrides[i]; } - if (internal::index_statically_eq()(0, 1)) { + if (internal::index_statically_eq(0, 1)) { eigen_assert(index < m_impl.dimensions()[0]); inputIndex += index; } else { - if (internal::index_statically_eq()(0, 1)) { + if (internal::index_statically_eq(0, 1)) { eigen_assert(index % m_impl.dimensions()[0] == 0); } else { inputIndex += (index % m_impl.dimensions()[0]); @@ -182,11 +186,11 @@ struct TensorEvaluator, Device> Index inputIndex = 0; for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx < m_impl.dimensions()[i]); inputIndex += idx * m_inputStrides[i]; } else { - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx % m_impl.dimensions()[i] == 0); } else { inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; @@ -194,11 +198,11 @@ struct TensorEvaluator, Device> } index -= idx * m_outputStrides[i]; } - if (internal::index_statically_eq()(NumDims-1, 1)) { + if (internal::index_statically_eq(NumDims-1, 1)) { eigen_assert(index < m_impl.dimensions()[NumDims-1]); inputIndex += index; } else { - if (internal::index_statically_eq()(NumDims-1, 1)) { + if (internal::index_statically_eq(NumDims-1, 1)) { eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); } else { inputIndex += (index % m_impl.dimensions()[NumDims-1]); @@ -231,11 +235,11 @@ struct TensorEvaluator, Device> Index inputIndex = 0; for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx < m_impl.dimensions()[i]); inputIndex += idx * m_inputStrides[i]; } else { - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx % m_impl.dimensions()[i] == 0); } else { inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; @@ -244,11 +248,11 @@ struct TensorEvaluator, Device> index -= idx * m_outputStrides[i]; } Index innermostLoc; - if (internal::index_statically_eq()(0, 1)) { + if (internal::index_statically_eq(0, 1)) { eigen_assert(index < m_impl.dimensions()[0]); innermostLoc = index; } else { - if (internal::index_statically_eq()(0, 1)) { + if (internal::index_statically_eq(0, 1)) { eigen_assert(index % m_impl.dimensions()[0] == 0); innermostLoc = 0; } else { @@ -284,11 +288,11 @@ struct TensorEvaluator, Device> Index inputIndex = 0; for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx < m_impl.dimensions()[i]); inputIndex += idx * m_inputStrides[i]; } else { - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx % m_impl.dimensions()[i] == 0); } else { inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; @@ -297,11 +301,11 @@ struct TensorEvaluator, Device> index -= idx * m_outputStrides[i]; } Index innermostLoc; - if (internal::index_statically_eq()(NumDims-1, 1)) { + if (internal::index_statically_eq(NumDims-1, 1)) { eigen_assert(index < m_impl.dimensions()[NumDims-1]); innermostLoc = index; } else { - if (internal::index_statically_eq()(NumDims-1, 1)) { + if (internal::index_statically_eq(NumDims-1, 1)) { eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); innermostLoc = 0; } else { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index c9fa39e51..abc3c92ca 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -50,7 +50,7 @@ struct nested, 1, typename eval struct DimensionId { - DimensionId(DenseIndex dim) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) { eigen_assert(dim == DimId); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { @@ -60,7 +60,7 @@ struct DimensionId template <> struct DimensionId { - DimensionId(DenseIndex dim) : actual_dim(dim) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) { eigen_assert(dim >= 0); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index fa05cab30..3d153bb94 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -131,7 +131,9 @@ struct TensorEvaluator(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(NumDims == RightNumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT(NumDims == RightNumDims, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(0 <= m_axis && m_axis < NumDims); const Dimensions& lhs_dims = m_leftImpl.dimensions(); const Dimensions& rhs_dims = m_rightImpl.dimensions(); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index e60fab713..eda93a1de 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -33,14 +33,14 @@ template -class BaseTensorContractionMapper { +class SimpleTensorContractionMapper { public: EIGEN_DEVICE_FUNC - BaseTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : + SimpleTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : m_tensor(tensor), m_nocontract_strides(nocontract_strides), m_ij_strides(ij_strides), @@ -160,104 +160,23 @@ class BaseTensorContractionMapper { }; - template -class TensorContractionInputMapper; - -template -class TensorContractionSubMapper { + int packet_size, bool inner_dim_contiguous, + bool inner_dim_reordered, int Alignment> + class BaseTensorContractionMapper : public SimpleTensorContractionMapper +{ public: - typedef typename packet_traits::type Packet; - typedef typename packet_traits::half HalfPacket; - - typedef TensorContractionInputMapper ParentMapper; - typedef TensorContractionSubMapper Self; - typedef Self LinearMapper; - - EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) - : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { - return m_base_mapper(i + m_vert_offset, m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { - return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { - return m_base_mapper.loadPacket(i + m_vert_offset, m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { - return m_base_mapper.loadPacket(i + m_vert_offset, j + m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { - return m_base_mapper.loadHalfPacket(i + m_vert_offset, m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { - m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { - return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); - } - - template - EIGEN_ALWAYS_INLINE PacketT load(Index i) const { - EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((AlignmentType == Aligned || Alignment == Unaligned), YOU_MADE_A_PROGRAMMING_MISTAKE); - return loadPacket(i); - } - - template - bool aligned(Index /*i*/) const { - return false; - } - - private: - const ParentMapper& m_base_mapper; - const Index m_vert_offset; - const Index m_horiz_offset; -}; - - -template::size : 1), - bool inner_dim_contiguous = false, bool inner_dim_reordered = (side != Lhs), int Alignment=Unaligned> -class TensorContractionInputMapper - : public BaseTensorContractionMapper { - - public: - typedef BaseTensorContractionMapper Base; - typedef TensorContractionSubMapper SubMapper; - typedef SubMapper VectorMapper; - - TensorContractionInputMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) - : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + typedef SimpleTensorContractionMapper ParentMapper; EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { - return SubMapper(*this, i, j); - } - - EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { - return VectorMapper(*this, i, j); - } + BaseTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } typedef typename packet_traits::type Packet; typedef typename packet_traits::half HalfPacket; @@ -322,35 +241,23 @@ class TensorContractionInputMapper }; - - template -class TensorContractionInputMapper - : public BaseTensorContractionMapper { - + bool inner_dim_contiguous, + bool inner_dim_reordered, int Alignment> +class BaseTensorContractionMapper : public SimpleTensorContractionMapper +{ public: - typedef BaseTensorContractionMapper Base; - typedef TensorContractionSubMapper SubMapper; - typedef SubMapper VectorMapper; - - TensorContractionInputMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) - : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + typedef SimpleTensorContractionMapper ParentMapper; EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { - return SubMapper(*this, i, j); - } - - EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { - return VectorMapper(*this, i, j); - } + BaseTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } typedef typename packet_traits::type Packet; EIGEN_DEVICE_FUNC @@ -365,6 +272,106 @@ class TensorContractionInputMapper +class TensorContractionInputMapper; + +template +class TensorContractionSubMapper { + public: + typedef typename packet_traits::type Packet; + typedef typename packet_traits::half HalfPacket; + + typedef TensorContractionInputMapper ParentMapper; + typedef TensorContractionSubMapper Self; + typedef Self LinearMapper; + + EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + return m_base_mapper(i + m_vert_offset, m_horiz_offset); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { + return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + return m_base_mapper.loadPacket(i + m_vert_offset, m_horiz_offset); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { + return m_base_mapper.loadPacket(i + m_vert_offset, j + m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { + return m_base_mapper.loadHalfPacket(i + m_vert_offset, m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { + m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { + EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((AlignmentType == Aligned || Alignment == Unaligned), YOU_MADE_A_PROGRAMMING_MISTAKE); + return loadPacket(i); + } + + template + EIGEN_DEVICE_FUNC bool aligned(Index) const { + return false; + } + + private: + const ParentMapper& m_base_mapper; + const Index m_vert_offset; + const Index m_horiz_offset; +}; + + +template +class TensorContractionInputMapper + : public BaseTensorContractionMapper { + + public: + typedef BaseTensorContractionMapper Base; + typedef TensorContractionSubMapper SubMapper; + typedef SubMapper VectorMapper; + + EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) + : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { + return SubMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { + return VectorMapper(*this, i, j); + } +}; + + template struct traits > @@ -488,7 +495,7 @@ struct TensorContractionEvaluatorBase internal::array_size::Dimensions>::value; static const int RDims = internal::array_size::Dimensions>::value; - static const unsigned int ContractDims = internal::array_size::value; + static const int ContractDims = internal::array_size::value; static const int NumDims = max_n_1::size; typedef array left_dim_mapper_t; @@ -524,7 +531,7 @@ struct TensorContractionEvaluatorBase eval_right_dims[i] = m_rightImpl.dimensions()[i]; } // We keep the pairs of contracting indices. - for (unsigned int i = 0; i < ContractDims; i++) { + for (int i = 0; i < ContractDims; i++) { eval_op_indices[i].first = op.indices()[i].first; eval_op_indices[i].second = op.indices()[i].second; } @@ -538,7 +545,7 @@ struct TensorContractionEvaluatorBase } // We need to flip all the pairs of contracting indices as well as // reversing the dimensions. - for (unsigned int i = 0; i < ContractDims; i++) { + for (int i = 0; i < ContractDims; i++) { eval_op_indices[i].first = LDims - 1 - op.indices()[i].second; eval_op_indices[i].second = RDims - 1 - op.indices()[i].first; } @@ -577,7 +584,7 @@ struct TensorContractionEvaluatorBase for (int i = 0; i < LDims; i++) { // find if we are contracting on index i of left tensor bool contracting = false; - for (unsigned int j = 0; j < ContractDims; j++) { + for (int j = 0; j < ContractDims; j++) { if (eval_op_indices[j].first == i) { contracting = true; break; @@ -605,7 +612,7 @@ struct TensorContractionEvaluatorBase for (int i = 0; i < RDims; i++) { bool contracting = false; // find if we are contracting on index i of right tensor - for (unsigned int j = 0; j < ContractDims; j++) { + for (int j = 0; j < ContractDims; j++) { if (eval_op_indices[j].second == i) { contracting = true; break; @@ -632,7 +639,7 @@ struct TensorContractionEvaluatorBase // each tensor, we'll only look at the first tensor here. m_rhs_inner_dim_contiguous = true; m_rhs_inner_dim_reordered = false; - for (unsigned int i = 0; i < ContractDims; i++) { + for (int i = 0; i < ContractDims; i++) { Index left = eval_op_indices[i].first; Index right = eval_op_indices[i].second; @@ -640,7 +647,7 @@ struct TensorContractionEvaluatorBase eigen_assert(size == eval_right_dims[right] && "Contraction axes must be same size"); - if (i+1 < internal::array_size::value) { + if (i+1 < static_cast(internal::array_size::value)) { m_k_strides[i+1] = m_k_strides[i] * size; } else { m_k_size = m_k_strides[i] * size; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h index f6bd949bd..90ee50678 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -1147,7 +1147,6 @@ EigenFloatContractionKernel(const LhsMapper lhs, const RhsMapper rhs, bool check_rhs = (base_n + 63) >= n_size; bool check_lhs128 = (base_m + 127) >= m_size; - bool check_lhs64 = (base_m + 63) >= m_size; if (!check_rhs) { if (!check_lhs128) { @@ -1227,9 +1226,9 @@ struct TensorEvaluator::type EvalLeftArgType; + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; typedef typename internal::conditional< - Layout == ColMajor, RightArgType, LeftArgType>::type EvalRightArgType; + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; static const int LDims = internal::array_size::Dimensions>::value; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index 4ca978ab4..3ca7daf32 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -52,6 +52,7 @@ struct nested, 1, typename eval struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {} @@ -67,6 +68,7 @@ struct PacketConverter { template struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {} @@ -87,6 +89,7 @@ struct PacketConverter { template struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} @@ -124,8 +127,8 @@ class TensorConversionOp : public TensorBase::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef typename internal::nested::type Nested; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef Scalar CoeffReturnType; + typedef Packet PacketReturnType; typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index 17f10c07b..29e50a3b2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -63,92 +63,6 @@ template class TensorDevice { ExpressionType& m_expression; }; - -#ifdef EIGEN_USE_THREADS -template class TensorDevice { - public: - TensorDevice(const ThreadPoolDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} - - template - EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { - typedef TensorAssignOp Assign; - Assign assign(m_expression, other); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - template - EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Sum; - Sum sum(m_expression, other); - typedef TensorAssignOp Assign; - Assign assign(m_expression, sum); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - template - EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; - Difference difference(m_expression, other); - typedef TensorAssignOp Assign; - Assign assign(m_expression, difference); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - protected: - const ThreadPoolDevice& m_device; - ExpressionType& m_expression; -}; -#endif - - -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) -template class TensorDevice -{ - public: - TensorDevice(const GpuDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} - - template - EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { - typedef TensorAssignOp Assign; - Assign assign(m_expression, other); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - template - EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Sum; - Sum sum(m_expression, other); - typedef TensorAssignOp Assign; - Assign assign(m_expression, sum); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - template - EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; - Difference difference(m_expression, other); - typedef TensorAssignOp Assign; - Assign assign(m_expression, difference); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - protected: - const GpuDevice& m_device; - ExpressionType& m_expression; -}; -#endif - - } // end namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h similarity index 51% rename from unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h rename to unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 2ff7d471d..4d7570077 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -7,272 +7,12 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H -#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H +#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H namespace Eigen { -// Default device for the machine (typically a single cpu core) -struct DefaultDevice { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { - ::memset(buffer, c, n); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { -#ifndef __CUDA_ARCH__ - // Running on the host CPU - return 1; -#else - // Running on a CUDA device - return 32; -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { -#ifndef __CUDA_ARCH__ - // Running single threaded on the host CPU - // Should return an enum that encodes the ISA supported by the CPU - return 1; -#else - // Running on a CUDA device - return __CUDA_ARCH__ / 100; -#endif - } -}; - - -// Multiple cpu cores -// We should really use a thread pool here but first we need to find a portable thread pool library. -#ifdef EIGEN_USE_THREADS - -// This defines an interface that ThreadPoolDevice can take to use -// custom thread pools underneath. -class ThreadPoolInterface { - public: - virtual void Schedule(std::function fn) = 0; - - virtual ~ThreadPoolInterface() {} -}; - -// The implementation of the ThreadPool type ensures that the Schedule method -// runs the functions it is provided in FIFO order when the scheduling is done -// by a single thread. -class ThreadPool : public ThreadPoolInterface { - public: - // Construct a pool that contains "num_threads" threads. - explicit ThreadPool(int num_threads) { - for (int i = 0; i < num_threads; i++) { - threads_.push_back(new std::thread([this]() { WorkerLoop(); })); - } - } - - // Wait until all scheduled work has finished and then destroy the - // set of threads. - ~ThreadPool() - { - { - // Wait for all work to get done. - std::unique_lock l(mu_); - empty_.wait(l, [this]() { return pending_.empty(); }); - exiting_ = true; - - // Wakeup all waiters. - for (auto w : waiters_) { - w->ready = true; - w->work = nullptr; - w->cv.notify_one(); - } - } - - // Wait for threads to finish. - for (auto t : threads_) { - t->join(); - delete t; - } - } - - // Schedule fn() for execution in the pool of threads. The functions are - // executed in the order in which they are scheduled. - void Schedule(std::function fn) { - std::unique_lock l(mu_); - if (waiters_.empty()) { - pending_.push_back(fn); - } else { - Waiter* w = waiters_.back(); - waiters_.pop_back(); - w->ready = true; - w->work = fn; - w->cv.notify_one(); - } - } - - protected: - void WorkerLoop() { - std::unique_lock l(mu_); - Waiter w; - while (!exiting_) { - std::function fn; - if (pending_.empty()) { - // Wait for work to be assigned to me - w.ready = false; - waiters_.push_back(&w); - w.cv.wait(l, [&w]() { return w.ready; }); - fn = w.work; - w.work = nullptr; - } else { - // Pick up pending work - fn = pending_.front(); - pending_.pop_front(); - if (pending_.empty()) { - empty_.notify_all(); - } - } - if (fn) { - mu_.unlock(); - fn(); - mu_.lock(); - } - } - } - - private: - struct Waiter { - std::condition_variable cv; - std::function work; - bool ready; - }; - - std::mutex mu_; - std::vector threads_; // All threads - std::vector waiters_; // Stack of waiting threads. - std::deque> pending_; // Queue of pending work - std::condition_variable empty_; // Signaled on pending_.empty() - bool exiting_ = false; -}; - - -// Notification is an object that allows a user to to wait for another -// thread to signal a notification that an event has occurred. -// -// Multiple threads can wait on the same Notification object. -// but only one caller must call Notify() on the object. -class Notification { - public: - Notification() : notified_(false) {} - ~Notification() {} - - void Notify() { - std::unique_lock l(mu_); - eigen_assert(!notified_); - notified_ = true; - cv_.notify_all(); - } - - void WaitForNotification() { - std::unique_lock l(mu_); - cv_.wait(l, [this]() { return notified_; } ); - } - - private: - std::mutex mu_; - std::condition_variable cv_; - bool notified_; -}; - -// Runs an arbitrary function and then calls Notify() on the passed in -// Notification. -template struct FunctionWrapper -{ - static void run(Notification* n, Function f, Args... args) { - f(args...); - n->Notify(); - } -}; - -static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) { - if (n) { - n->WaitForNotification(); - } -} - - -// Build a thread pool device on top the an existing pool of threads. -struct ThreadPoolDevice { - // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } - - EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); - } - - EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); - } - - EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); - } - EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - - EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { - ::memset(buffer, c, n); - } - - EIGEN_STRONG_INLINE size_t numThreads() const { - return num_threads_; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { - // Should return an enum that encodes the ISA supported by the CPU - return 1; - } - - template - EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { - Notification* n = new Notification(); - std::function func = - std::bind(&FunctionWrapper::run, n, f, args...); - pool_->Schedule(func); - return n; - } - template - EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { - std::function func = std::bind(f, args...); - pool_->Schedule(func); - } - - private: - ThreadPoolInterface* pool_; - size_t num_threads_; -}; - -#endif - - -// GPU offloading -#ifdef EIGEN_USE_GPU - // This defines an interface that GPUDevice can take to use // CUDA streams underneath. class StreamInterface { @@ -295,6 +35,7 @@ static void initializeDeviceProp() { if (!m_devicePropInitialized) { int num_devices; cudaError_t status = cudaGetDeviceCount(&num_devices); + EIGEN_UNUSED_VARIABLE(status) assert(status == cudaSuccess); m_deviceProperties = new cudaDeviceProp[num_devices]; for (int i = 0; i < num_devices; ++i) { @@ -330,6 +71,7 @@ class CudaStreamDevice : public StreamInterface { } else { int num_devices; cudaError_t err = cudaGetDeviceCount(&num_devices); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); assert(device < num_devices); device_ = device; @@ -343,6 +85,7 @@ class CudaStreamDevice : public StreamInterface { } virtual void* allocate(size_t num_bytes) const { cudaError_t err = cudaSetDevice(device_); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); void* result; err = cudaMalloc(&result, num_bytes); @@ -352,6 +95,7 @@ class CudaStreamDevice : public StreamInterface { } virtual void deallocate(void* buffer) const { cudaError_t err = cudaSetDevice(device_); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); assert(buffer != NULL); err = cudaFree(buffer); @@ -363,7 +107,6 @@ class CudaStreamDevice : public StreamInterface { int device_; }; - struct GpuDevice { // The StreamInterface is not owned: the caller is // responsible for its initialization and eventual destruction. @@ -398,6 +141,7 @@ struct GpuDevice { #ifndef __CUDA_ARCH__ cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else eigen_assert(false && "The default device should be used instead to generate kernel code"); @@ -408,6 +152,7 @@ struct GpuDevice { #ifndef __CUDA_ARCH__ cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else eigen_assert(false && "The default device should be used instead to generate kernel code"); @@ -418,6 +163,7 @@ struct GpuDevice { #ifndef __CUDA_ARCH__ cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else eigen_assert(false && "The default device should be used instead to generate kernel code"); @@ -427,6 +173,7 @@ struct GpuDevice { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { #ifndef __CUDA_ARCH__ cudaError_t err = cudaMemsetAsync(buffer, c, n, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else eigen_assert(false && "The default device should be used instead to generate kernel code"); @@ -450,8 +197,9 @@ struct GpuDevice { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const { -#ifndef __CUDA_ARCH__ +#if defined(__CUDACC__) && !defined(__CUDA_ARCH__) cudaError_t err = cudaStreamSynchronize(stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else assert(false && "The default device should be used instead to generate kernel code"); @@ -477,8 +225,12 @@ struct GpuDevice { // This function checks if the CUDA runtime recorded an error for the // underlying stream device. inline bool ok() const { +#ifdef __CUDACC__ cudaError_t error = cudaStreamQuery(stream_->stream()); return (error == cudaSuccess) || (error == cudaErrorNotReady); +#else + return false; +#endif } private: @@ -486,18 +238,22 @@ struct GpuDevice { }; - +#ifndef __CUDA_ARCH__ #define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \ (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \ assert(cudaGetLastError() == cudaSuccess); - +#else +#define LAUNCH_CUDA_KERNEL(...) \ + eigen_assert(false && "Cannot launch a kernel from another kernel"); +#endif // FIXME: Should be device and kernel specific. +#ifdef __CUDACC__ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { cudaError_t status = cudaDeviceSetSharedMemConfig(config); + EIGEN_UNUSED_VARIABLE(status) assert(status == cudaSuccess); } - #endif } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h new file mode 100644 index 000000000..267f6f8e3 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -0,0 +1,61 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H + + +namespace Eigen { + +// Default device for the machine (typically a single cpu core) +struct DefaultDevice { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { +#ifndef __CUDA_ARCH__ + // Running on the host CPU + return 1; +#else + // Running on a CUDA device + return 32; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { +#ifndef __CUDA_ARCH__ + // Running single threaded on the host CPU + // Should return an enum that encodes the ISA supported by the CPU + return 1; +#else + // Running on a CUDA device + return __CUDA_ARCH__ / 100; +#endif + } +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h new file mode 100644 index 000000000..dcbef5b03 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -0,0 +1,224 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H + +namespace Eigen { + +// This defines an interface that ThreadPoolDevice can take to use +// custom thread pools underneath. +class ThreadPoolInterface { + public: + virtual void Schedule(std::function fn) = 0; + + virtual ~ThreadPoolInterface() {} +}; + +// The implementation of the ThreadPool type ensures that the Schedule method +// runs the functions it is provided in FIFO order when the scheduling is done +// by a single thread. +class ThreadPool : public ThreadPoolInterface { + public: + // Construct a pool that contains "num_threads" threads. + explicit ThreadPool(int num_threads) { + for (int i = 0; i < num_threads; i++) { + threads_.push_back(new std::thread([this]() { WorkerLoop(); })); + } + } + + // Wait until all scheduled work has finished and then destroy the + // set of threads. + ~ThreadPool() + { + { + // Wait for all work to get done. + std::unique_lock l(mu_); + empty_.wait(l, [this]() { return pending_.empty(); }); + exiting_ = true; + + // Wakeup all waiters. + for (auto w : waiters_) { + w->ready = true; + w->work = nullptr; + w->cv.notify_one(); + } + } + + // Wait for threads to finish. + for (auto t : threads_) { + t->join(); + delete t; + } + } + + // Schedule fn() for execution in the pool of threads. The functions are + // executed in the order in which they are scheduled. + void Schedule(std::function fn) { + std::unique_lock l(mu_); + if (waiters_.empty()) { + pending_.push_back(fn); + } else { + Waiter* w = waiters_.back(); + waiters_.pop_back(); + w->ready = true; + w->work = fn; + w->cv.notify_one(); + } + } + + protected: + void WorkerLoop() { + std::unique_lock l(mu_); + Waiter w; + while (!exiting_) { + std::function fn; + if (pending_.empty()) { + // Wait for work to be assigned to me + w.ready = false; + waiters_.push_back(&w); + w.cv.wait(l, [&w]() { return w.ready; }); + fn = w.work; + w.work = nullptr; + } else { + // Pick up pending work + fn = pending_.front(); + pending_.pop_front(); + if (pending_.empty()) { + empty_.notify_all(); + } + } + if (fn) { + mu_.unlock(); + fn(); + mu_.lock(); + } + } + } + + private: + struct Waiter { + std::condition_variable cv; + std::function work; + bool ready; + }; + + std::mutex mu_; + std::vector threads_; // All threads + std::vector waiters_; // Stack of waiting threads. + std::deque> pending_; // Queue of pending work + std::condition_variable empty_; // Signaled on pending_.empty() + bool exiting_ = false; +}; + + +// Notification is an object that allows a user to to wait for another +// thread to signal a notification that an event has occurred. +// +// Multiple threads can wait on the same Notification object. +// but only one caller must call Notify() on the object. +class Notification { + public: + Notification() : notified_(false) {} + ~Notification() {} + + void Notify() { + std::unique_lock l(mu_); + eigen_assert(!notified_); + notified_ = true; + cv_.notify_all(); + } + + void WaitForNotification() { + std::unique_lock l(mu_); + cv_.wait(l, [this]() { return notified_; } ); + } + + private: + std::mutex mu_; + std::condition_variable cv_; + bool notified_; +}; + +// Runs an arbitrary function and then calls Notify() on the passed in +// Notification. +template struct FunctionWrapper +{ + static void run(Notification* n, Function f, Args... args) { + f(args...); + n->Notify(); + } +}; + +static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) { + if (n) { + n->WaitForNotification(); + } +} + + +// Build a thread pool device on top the an existing pool of threads. +struct ThreadPoolDevice { + // The ownership of the thread pool remains with the caller. + ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } + + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + + EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + + EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_STRONG_INLINE size_t numThreads() const { + return num_threads_; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { + // Should return an enum that encodes the ISA supported by the CPU + return 1; + } + + template + EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { + Notification* n = new Notification(); + std::function func = + std::bind(&FunctionWrapper::run, n, f, args...); + pool_->Schedule(func); + return n; + } + template + EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { + std::function func = std::bind(f, args...); + pool_->Schedule(func); + } + + private: + ThreadPoolInterface* pool_; + size_t num_threads_; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h index 9773afccf..ca9ac79df 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -23,6 +23,7 @@ namespace Eigen { */ template struct DimensionList { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const Index operator[] (const Index i) const { return i; } }; @@ -45,184 +46,184 @@ template const Index array_get(c #if defined(EIGEN_HAS_CONSTEXPR) template -struct index_known_statically > { - constexpr bool operator() (const DenseIndex) const { +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { return true; } }; template -struct index_known_statically > { - constexpr bool operator() (const DenseIndex) const { +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { return true; } }; template -struct all_indices_known_statically > { - constexpr bool operator() () const { +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { return true; } }; template -struct all_indices_known_statically > { - constexpr bool operator() () const { +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { return true; } }; template -struct indices_statically_known_to_increase > { - constexpr bool operator() () const { +struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { return true; } }; template -struct indices_statically_known_to_increase > { - constexpr bool operator() () const { +struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { return true; } }; template -struct index_statically_eq > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_eq_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i == value; } }; template -struct index_statically_eq > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i == value; } }; template -struct index_statically_ne > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i != value; } }; template -struct index_statically_ne > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_ne_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i != value; } }; template -struct index_statically_gt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i > value; } }; template -struct index_statically_gt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i > value; } }; template -struct index_statically_lt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i < value; } }; template -struct index_statically_lt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i < value; } }; #else template -struct index_known_statically > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const { +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { return true; } }; template -struct index_known_statically > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const { +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { return true; } }; template -struct all_indices_known_statically > { - EIGEN_ALWAYS_INLINE bool operator() () const { +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { return true; } }; template -struct all_indices_known_statically > { - EIGEN_ALWAYS_INLINE bool operator() () const { +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { return true; } }; template -struct indices_statically_known_to_increase > { - EIGEN_ALWAYS_INLINE bool operator() () const { +struct indices_statically_known_to_increase_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { return true; } }; template -struct indices_statically_known_to_increase > { - EIGEN_ALWAYS_INLINE bool operator() () const { +struct indices_statically_known_to_increase_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { return true; } }; template -struct index_statically_eq > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_eq_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template -struct index_statically_eq > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_eq_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template -struct index_statically_ne > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_ne_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){ return false; } }; template -struct index_statically_ne > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_ne_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template -struct index_statically_gt > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_gt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template -struct index_statically_gt > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_gt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template -struct index_statically_lt > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_lt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template -struct index_statically_lt > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_lt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index d6ec62a74..f3c9a3148 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -52,8 +52,8 @@ struct fixed_size_tensor_index_linearization_helper static inline Index run(array const& indices, const Dimensions& dimensions) { - return array_get(indices) + - dget::value * + return array_get(indices) + + dget::value * fixed_size_tensor_index_linearization_helper::run(indices, dimensions); } }; @@ -62,10 +62,9 @@ template struct fixed_size_tensor_index_linearization_helper { template EIGEN_DEVICE_FUNC - static inline Index run(array const& indices, - const Dimensions&) + static inline Index run(array const&, const Dimensions&) { - return array_get(indices); + return 0; } }; @@ -76,8 +75,8 @@ struct fixed_size_tensor_index_extraction_helper static inline Index run(const Index index, const Dimensions& dimensions) { - const Index mult = (index == n) ? 1 : 0; - return array_get(dimensions) * mult + + const Index mult = (index == n-1) ? 1 : 0; + return array_get(dimensions) * mult + fixed_size_tensor_index_extraction_helper::run(index, dimensions); } }; @@ -86,13 +85,12 @@ template struct fixed_size_tensor_index_extraction_helper { template EIGEN_DEVICE_FUNC - static inline Index run(const Index index, - const Dimensions& dimensions) + static inline Index run(const Index, + const Dimensions&) { - const Index mult = (index == 0) ? 1 : 0; - return array_get<0>(dimensions) * mult; + return 0; } -}; + }; } // end namespace internal @@ -130,16 +128,16 @@ struct Sizes : internal::numeric_list { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const { - return internal::fixed_size_tensor_index_extraction_helper::run(index, *this); + return internal::fixed_size_tensor_index_extraction_helper::run(index, *this); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); } }; @@ -216,17 +214,17 @@ template ::value; default: eigen_assert(false && "index overflow"); - return static_cast(-1); + return static_cast(-1); } } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); } }; @@ -267,10 +265,10 @@ struct tensor_index_linearization_helper // Dynamic size -template +template struct DSizes : array { typedef array Base; - static const std::size_t count = NumDims; + static const int count = NumDims; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { return NumDims; @@ -280,8 +278,8 @@ struct DSizes : array { return internal::array_prod(*static_cast(this)); } - EIGEN_DEVICE_FUNC DSizes() { - for (std::size_t i = 0 ; i < NumDims; ++i) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() { + for (int i = 0 ; i < NumDims; ++i) { (*this)[i] = 0; } } @@ -371,10 +369,10 @@ struct tensor_vsize_index_linearization_helper namespace internal { -template struct array_size > { +template struct array_size > { static const size_t value = NumDims; }; -template struct array_size > { +template struct array_size > { static const size_t value = NumDims; }; #ifndef EIGEN_EMULATE_CXX11_META_H @@ -387,6 +385,10 @@ static const std::ptrdiff_t value = Sizes::count; template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes&) { return get >::value; } +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) { + eigen_assert(false && "should never be called"); + return -1; +} #else template struct array_size > { static const size_t value = Sizes::count; @@ -402,22 +404,22 @@ template -struct sizes_match_up_to_dim { +struct sizes_match_below_dim { static inline bool run(Dims1&, Dims2&) { return false; } }; template -struct sizes_match_up_to_dim { +struct sizes_match_below_dim { static inline bool run(Dims1& dims1, Dims2& dims2) { - return (array_get(dims1) == array_get(dims2)) & - sizes_match_up_to_dim::run(dims1, dims2); + return (array_get(dims1) == array_get(dims2)) & + sizes_match_below_dim::run(dims1, dims2); } }; template -struct sizes_match_up_to_dim { - static inline bool run(Dims1& dims1, Dims2& dims2) { - return (array_get<0>(dims1) == array_get<0>(dims2)); +struct sizes_match_below_dim { + static inline bool run(Dims1&, Dims2&) { + return true; } }; @@ -426,7 +428,7 @@ struct sizes_match_up_to_dim { template bool dimensions_match(Dims1& dims1, Dims2& dims2) { - return internal::sizes_match_up_to_dim::value-1, internal::array_size::value-1>::run(dims1, dims2); + return internal::sizes_match_below_dim::value, internal::array_size::value>::run(dims1, dims2); } } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index be0b07cdf..902f25247 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -319,7 +319,7 @@ struct TensorEvaluator(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index b2800aefb..c28078882 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -50,6 +50,7 @@ class TensorExecutor { public: typedef typename Expression::Index Index; + EIGEN_DEVICE_FUNC static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice()) { TensorEvaluator evaluator(expr, device); @@ -57,7 +58,7 @@ class TensorExecutor if (needs_assign) { const Index size = array_prod(evaluator.dimensions()); - static const int PacketSize = unpacket_traits::PacketReturnType>::size; + const int PacketSize = unpacket_traits::PacketReturnType>::size; const Index VectorizedSize = (size / PacketSize) * PacketSize; for (Index i = 0; i < VectorizedSize; i += PacketSize) { @@ -149,7 +150,24 @@ class TensorExecutor // GPU: the evaluation of the expression is offloaded to a GPU. -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +#if defined(EIGEN_USE_GPU) + +template +class TensorExecutor { + public: + typedef typename Expression::Index Index; + EIGEN_DEVICE_FUNC static void run(const Expression& expr, const GpuDevice& device); +}; + +template +class TensorExecutor { + public: + typedef typename Expression::Index Index; + EIGEN_DEVICE_FUNC static void run(const Expression& expr, const GpuDevice& device); +}; + +#if defined(__CUDACC__) + template __global__ void __launch_bounds__(1024) @@ -193,48 +211,53 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) { } } - -template -class TensorExecutor +/*static*/ +template +EIGEN_DEVICE_FUNC inline void TensorExecutor::run(const Expression& expr, const GpuDevice& device) { - public: - typedef typename Expression::Index Index; - static inline void run(const Expression& expr, const GpuDevice& device) +#ifndef __CUDA_ARCH__ + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { - TensorEvaluator evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) - { - const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock(); - const int block_size = device.maxCudaThreadsPerBlock(); - const Index size = array_prod(evaluator.dimensions()); - LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); - } - evaluator.cleanup(); + const int block_size = device.maxCudaThreadsPerBlock(); + const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size; + const Index size = array_prod(evaluator.dimensions()); + // Create a least one block to ensure we won't crash if we're called with tensors of size 0. + const int num_blocks = numext::maxi(numext::mini(max_blocks, (size + block_size - 1) / block_size), 1); + LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); } -}; - -template -class TensorExecutor -{ - public: - typedef typename Expression::Index Index; - static inline void run(const Expression& expr, const GpuDevice& device) - { - TensorEvaluator evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) - { - const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock(); - const int block_size = device.maxCudaThreadsPerBlock(); - const Index size = array_prod(evaluator.dimensions()); - LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); - } - evaluator.cleanup(); - } -}; - + evaluator.cleanup(); +#else + eigen_assert(false && "Cannot launch a kernel from another kernel"); #endif +} + + +/*static*/ +template +EIGEN_DEVICE_FUNC inline void TensorExecutor::run(const Expression& expr, const GpuDevice& device) +{ +#ifndef __CUDA_ARCH__ + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) + { + const int block_size = device.maxCudaThreadsPerBlock(); + const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size; + const Index size = array_prod(evaluator.dimensions()); + // Create a least one block to ensure we won't crash if we're called with tensors of size 0. + const int num_blocks = numext::maxi(numext::mini(max_blocks, (size + block_size - 1) / block_size), 1); + LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); + } + evaluator.cleanup(); +#else + eigen_assert(false && "Cannot launch a kernel from another kernel"); +#endif +} + +#endif // __CUDACC__ +#endif // EIGEN_USE_GPU } // end namespace internal diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h new file mode 100644 index 000000000..215a4ebad --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -0,0 +1,598 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Jianwei Cui +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H +#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H + +// NVCC fails to compile this code +#if !defined(__CUDACC__) + +namespace Eigen { + +/** \class TensorFFT + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor FFT class. + * + * TODO: + * Vectorize the Cooley Tukey and the Bluestein algorithm + * Add support for multithreaded evaluation + * Improve the performance on GPU + */ + +template struct MakeComplex { + template + EIGEN_DEVICE_FUNC + T operator() (const T& val) const { return val; } +}; + +template <> struct MakeComplex { + template + EIGEN_DEVICE_FUNC + std::complex operator() (const T& val) const { return std::complex(val, 0); } +}; + +template <> struct MakeComplex { + template + EIGEN_DEVICE_FUNC + std::complex operator() (const std::complex& val) const { return val; } +}; + +template struct PartOf { + template T operator() (const T& val) const { return val; } +}; + +template <> struct PartOf { + template T operator() (const std::complex& val) const { return val.real(); } +}; + +template <> struct PartOf { + template T operator() (const std::complex& val) const { return val.imag(); } +}; + +namespace internal { +template +struct traits > : public traits { + typedef traits XprTraits; + typedef typename NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename XprTraits::Scalar InputScalar; + typedef typename conditional::type OutputScalar; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> { + typedef const TensorFFTOp& type; +}; + +template +struct nested, 1, typename eval >::type> { + typedef TensorFFTOp type; +}; + +} // end namespace internal + +template +class TensorFFTOp : public TensorBase, ReadOnlyAccessors> { + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename internal::conditional::type OutputScalar; + typedef OutputScalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) + : m_xpr(expr), m_fft(fft) {} + + EIGEN_DEVICE_FUNC + const FFT& fft() const { return m_fft; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& expression() const { + return m_xpr; + } + + protected: + typename XprType::Nested m_xpr; + const FFT m_fft; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> { + typedef TensorFFTOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename TensorEvaluator::Dimensions InputDimensions; + typedef internal::traits XprTraits; + typedef typename XprTraits::Scalar InputScalar; + typedef typename internal::conditional::type OutputScalar; + typedef OutputScalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + IsAligned = false, + PacketAccess = true, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) { + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + for (int i = 0; i < NumDims; ++i) { + eigen_assert(input_dims[i] > 0); + m_dimensions[i] = input_dims[i]; + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; + } + } else { + m_strides[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; + } + } + m_size = m_dimensions.TotalSize(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_dimensions; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) { + m_impl.evalSubExprsIfNeeded(NULL); + if (data) { + evalToBuf(data); + return false; + } else { + m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size); + evalToBuf(m_data); + return true; + } + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_data) { + m_device.deallocate(m_data); + m_data = NULL; + } + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { + return m_data[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const { + return internal::ploadt(m_data + index); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } + + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) { + const bool write_to_out = internal::is_same::value; + ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); + + for (Index i = 0; i < m_size; ++i) { + buf[i] = MakeComplex::value>()(m_impl.coeff(i)); + } + + for (size_t i = 0; i < m_fft.size(); ++i) { + int dim = m_fft[i]; + eigen_assert(dim >= 0 && dim < NumDims); + Index line_len = m_dimensions[dim]; + eigen_assert(line_len >= 1); + ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); + const bool is_power_of_two = isPowerOfTwo(line_len); + const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); + const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); + + ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); + ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); + ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); + if (!is_power_of_two) { + ComplexScalar pos_j_base = ComplexScalar(std::cos(M_PI/line_len), std::sin(M_PI/line_len)); + for (Index j = 0; j < line_len + 1; ++j) { + pos_j_base_powered[j] = std::pow(pos_j_base, j * j); + } + } + + for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { + Index base_offset = getBaseOffsetFromIndex(partial_index, dim); + + // get data into line_buf + for (Index j = 0; j < line_len; ++j) { + Index offset = getIndexFromOffset(base_offset, dim, j); + line_buf[j] = buf[offset]; + } + + // processs the line + if (is_power_of_two) { + processDataLineCooleyTukey(line_buf, line_len, log_len); + } + else { + processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered); + } + + // write back + for (Index j = 0; j < line_len; ++j) { + const ComplexScalar div_factor = (FFTDir == FFT_FORWARD) ? ComplexScalar(1, 0) : ComplexScalar(line_len, 0); + Index offset = getIndexFromOffset(base_offset, dim, j); + buf[offset] = line_buf[j] / div_factor; + } + } + m_device.deallocate(line_buf); + if (!pos_j_base_powered) { + m_device.deallocate(a); + m_device.deallocate(b); + m_device.deallocate(pos_j_base_powered); + } + } + + if(!write_to_out) { + for (Index i = 0; i < m_size; ++i) { + data[i] = PartOf()(buf[i]); + } + m_device.deallocate(buf); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) { + eigen_assert(x > 0); + return !(x & (x - 1)); + } + + // The composite number for padding, used in Bluestein's FFT algorithm + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) { + Index i = 2; + while (i < 2 * n - 1) i *= 2; + return i; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) { + Index log2m = 0; + while (m >>= 1) log2m++; + return log2m; + } + + // Call Cooley Tukey algorithm directly, data length must be power of 2 + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) { + eigen_assert(isPowerOfTwo(line_len)); + scramble_FFT(line_buf, line_len); + compute_1D_Butterfly(line_buf, line_len, log_len); + } + + // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { + Index n = line_len; + Index m = good_composite; + ComplexScalar* data = line_buf; + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + a[i] = data[i] * std::conj(pos_j_base_powered[i]); + } + else { + a[i] = data[i] * pos_j_base_powered[i]; + } + } + for (Index i = n; i < m; ++i) { + a[i] = ComplexScalar(0, 0); + } + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + b[i] = pos_j_base_powered[i]; + } + else { + b[i] = std::conj(pos_j_base_powered[i]); + } + } + for (Index i = n; i < m - n; ++i) { + b[i] = ComplexScalar(0, 0); + } + for (Index i = m - n; i < m; ++i) { + if(FFTDir == FFT_FORWARD) { + b[i] = pos_j_base_powered[m-i]; + } + else { + b[i] = std::conj(pos_j_base_powered[m-i]); + } + } + + scramble_FFT(a, m); + compute_1D_Butterfly(a, m, log_len); + + scramble_FFT(b, m); + compute_1D_Butterfly(b, m, log_len); + + for (Index i = 0; i < m; ++i) { + a[i] *= b[i]; + } + + scramble_FFT(a, m); + compute_1D_Butterfly(a, m, log_len); + + //Do the scaling after ifft + for (Index i = 0; i < m; ++i) { + a[i] /= m; + } + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + data[i] = a[i] * std::conj(pos_j_base_powered[i]); + } + else { + data[i] = a[i] * pos_j_base_powered[i]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) { + eigen_assert(isPowerOfTwo(n)); + Index j = 1; + for (Index i = 1; i < n; ++i){ + if (j > i) { + std::swap(data[j-1], data[i-1]); + } + Index m = n >> 1; + while (m >= 2 && j > m) { + j -= m; + m >>= 1; + } + j += m; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar* data, Index n, Index n_power_of_2) { + eigen_assert(isPowerOfTwo(n)); + if (n == 1) { + return; + } + else if (n == 2) { + ComplexScalar tmp = data[1]; + data[1] = data[0] - data[1]; + data[0] += tmp; + return; + } + else if (n == 4) { + ComplexScalar tmp[4]; + tmp[0] = data[0] + data[1]; + tmp[1] = data[0] - data[1]; + tmp[2] = data[2] + data[3]; + if(Dir == FFT_FORWARD) { + tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]); + } + else { + tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]); + } + data[0] = tmp[0] + tmp[2]; + data[1] = tmp[1] + tmp[3]; + data[2] = tmp[0] - tmp[2]; + data[3] = tmp[1] - tmp[3]; + return; + } + else if (n == 8) { + ComplexScalar tmp_1[8]; + ComplexScalar tmp_2[8]; + + tmp_1[0] = data[0] + data[1]; + tmp_1[1] = data[0] - data[1]; + tmp_1[2] = data[2] + data[3]; + if (Dir == FFT_FORWARD) { + tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1); + } + else { + tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1); + } + tmp_1[4] = data[4] + data[5]; + tmp_1[5] = data[4] - data[5]; + tmp_1[6] = data[6] + data[7]; + if (Dir == FFT_FORWARD) { + tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1); + } + else { + tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1); + } + tmp_2[0] = tmp_1[0] + tmp_1[2]; + tmp_2[1] = tmp_1[1] + tmp_1[3]; + tmp_2[2] = tmp_1[0] - tmp_1[2]; + tmp_2[3] = tmp_1[1] - tmp_1[3]; + tmp_2[4] = tmp_1[4] + tmp_1[6]; + // SQRT2DIV2 = sqrt(2)/2 + #define SQRT2DIV2 0.7071067811865476 + if (Dir == FFT_FORWARD) { + tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2); + tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1); + tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2); + } + else { + tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2); + tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1); + tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2); + } + data[0] = tmp_2[0] + tmp_2[4]; + data[1] = tmp_2[1] + tmp_2[5]; + data[2] = tmp_2[2] + tmp_2[6]; + data[3] = tmp_2[3] + tmp_2[7]; + data[4] = tmp_2[0] - tmp_2[4]; + data[5] = tmp_2[1] - tmp_2[5]; + data[6] = tmp_2[2] - tmp_2[6]; + data[7] = tmp_2[3] - tmp_2[7]; + + return; + } + else { + compute_1D_Butterfly(data, n/2, n_power_of_2 - 1); + compute_1D_Butterfly(data + n/2, n/2, n_power_of_2 - 1); + //Original code: + //RealScalar wtemp = std::sin(M_PI/n); + //RealScalar wpi = -std::sin(2 * M_PI/n); + RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2]; + RealScalar wpi; + if (Dir == FFT_FORWARD) { + wpi = m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; + } + else { + wpi = 0 - m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; + } + + const ComplexScalar wp(wtemp, wpi); + ComplexScalar w(1.0, 0.0); + for(Index i = 0; i < n/2; i++) { + ComplexScalar temp(data[i + n/2] * w); + data[i + n/2] = data[i] - temp; + data[i] += temp; + w += w * wp; + } + return; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const { + Index result = 0; + + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > omitted_dim; --i) { + const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; + const Index idx = index / partial_m_stride; + index -= idx * partial_m_stride; + result += idx * m_strides[i]; + } + result += index; + } + else { + for (Index i = 0; i < omitted_dim; ++i) { + const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; + const Index idx = index / partial_m_stride; + index -= idx * partial_m_stride; + result += idx * m_strides[i]; + } + result += index; + } + // Value of index_coords[omitted_dim] is not determined to this step + return result; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const { + Index result = base + offset * m_strides[omitted_dim] ; + return result; + } + + protected: + Index m_size; + const FFT& m_fft; + Dimensions m_dimensions; + array m_strides; + TensorEvaluator m_impl; + CoeffReturnType* m_data; + const Device& m_device; + + // This will support a maximum FFT size of 2^32 for each dimension + // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; + RealScalar m_sin_PI_div_n_LUT[32] = { + 0.0, + -2, + -0.999999999999999, + -0.292893218813453, + -0.0761204674887130, + -0.0192147195967696, + -0.00481527332780311, + -0.00120454379482761, + -3.01181303795779e-04, + -7.52981608554592e-05, + -1.88247173988574e-05, + -4.70619042382852e-06, + -1.17654829809007e-06, + -2.94137117780840e-07, + -7.35342821488550e-08, + -1.83835707061916e-08, + -4.59589268710903e-09, + -1.14897317243732e-09, + -2.87243293150586e-10, + -7.18108232902250e-11, + -1.79527058227174e-11, + -4.48817645568941e-12, + -1.12204411392298e-12, + -2.80511028480785e-13, + -7.01277571201985e-14, + -1.75319392800498e-14, + -4.38298482001247e-15, + -1.09574620500312e-15, + -2.73936551250781e-16, + -6.84841378126949e-17, + -1.71210344531737e-17, + -4.28025861329343e-18 + }; + + // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); + RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { + 0.0, + 0.0, + -1.00000000000000e+00, + -7.07106781186547e-01, + -3.82683432365090e-01, + -1.95090322016128e-01, + -9.80171403295606e-02, + -4.90676743274180e-02, + -2.45412285229123e-02, + -1.22715382857199e-02, + -6.13588464915448e-03, + -3.06795676296598e-03, + -1.53398018628477e-03, + -7.66990318742704e-04, + -3.83495187571396e-04, + -1.91747597310703e-04, + -9.58737990959773e-05, + -4.79368996030669e-05, + -2.39684498084182e-05, + -1.19842249050697e-05, + -5.99211245264243e-06, + -2.99605622633466e-06, + -1.49802811316901e-06, + -7.49014056584716e-07, + -3.74507028292384e-07, + -1.87253514146195e-07, + -9.36267570730981e-08, + -4.68133785365491e-08, + -2.34066892682746e-08, + -1.17033446341373e-08, + -5.85167231706864e-09, + -2.92583615853432e-09 + }; +}; + +} // end namespace Eigen + +#endif // __CUDACC__ + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index 5f3e49e61..a4d6ce6b3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -68,7 +68,7 @@ class TensorFixedSize : public TensorBase - inline const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const { // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -90,9 +90,17 @@ class TensorFixedSize : public TensorBase - inline Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) { // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -114,9 +122,17 @@ class TensorFixedSize : public TensorBase - inline const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const { // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -138,6 +154,13 @@ class TensorFixedSize : public TensorBase - inline Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) { // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -170,6 +193,13 @@ class TensorFixedSize : public TensorBase, Device> EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { m_impl.evalSubExprsIfNeeded(NULL); const Index numValues = m_impl.dimensions().TotalSize(); m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); @@ -121,7 +121,7 @@ struct TensorEvaluator, Device> m_impl.cleanup(); return true; } - EIGEN_STRONG_INLINE void cleanup() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_device.deallocate(m_buffer); m_buffer = NULL; } @@ -132,7 +132,7 @@ struct TensorEvaluator, Device> } template - EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return internal::ploadt(m_buffer + index); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index c22444e6f..a8bd8b888 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -12,7 +12,7 @@ namespace Eigen { -template class Tensor; +template class Tensor; template class TensorFixedSize; template class TensorMap; template class TensorRef; @@ -29,6 +29,7 @@ template class Tenso template class TensorContractionOp; template class TensorConversionOp; template class TensorConvolutionOp; +template class TensorFFTOp; template class TensorPatchOp; template class TensorImagePatchOp; template class TensorVolumePatchOp; @@ -58,6 +59,18 @@ struct DefaultDevice; struct ThreadPoolDevice; struct GpuDevice; +enum FFTResultType { + RealPart = 0, + ImagPart = 1, + BothParts = 2 +}; + +enum FFTDirection { + FFT_FORWARD = 0, + FFT_REVERSE = 1 +}; + + namespace internal { template diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index ed259399b..34ba4e392 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -14,6 +14,20 @@ namespace Eigen { namespace internal { +/** \internal + * \brief Template functor to compute the modulo between an array and a scalar. + */ +template +struct scalar_mod_op { + EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {} + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; } + const Scalar m_divisor; +}; +template +struct functor_traits > +{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; + + /** \internal * \brief Template functor to compute the sigmoid of a scalar * \sa class CwiseUnaryOp, ArrayBase::sigmoid() @@ -26,8 +40,8 @@ struct scalar_sigmoid_op { return one / (one + std::exp(-x)); } - template - inline Packet packetOp(const Packet& x) const { + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(const Packet& x) const { const Packet one = pset1(1); return pdiv(one, padd(one, pexp(pnegate(x)))); } @@ -82,6 +96,7 @@ template struct MeanReducer static const bool PacketAccess = true; static const bool IsStateful = true; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MeanReducer() : scalarCount_(0), packetCount_(0) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { @@ -219,6 +234,33 @@ template struct ProdReducer }; +struct AndReducer +{ + static const bool PacketAccess = false; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { + *accum = *accum && t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { + return accum; + } +}; + +struct OrReducer { + static const bool PacketAccess = false; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { + *accum = *accum || t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { + return false; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { + return accum; + } +}; + // Argmin/Argmax reducers template struct ArgMaxTupleReducer { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index 3b6f2c730..38a833f82 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -33,7 +33,10 @@ std::ostream& operator << (std::ostream& os, const TensorBase::value == 1) { + static const int rank = internal::array_size::value; + if (rank == 0) { + os << tensor.coeff(0); + } else if (rank == 1) { Map > array(const_cast(tensor.data()), total_size); os << array; } else { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h index 78e1d2bd1..74ce6d0ec 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -77,38 +77,128 @@ struct is_compile_time_constant& > { static constexpr bool value = true; }; + + + +template +struct IndexTuple; + +template +struct IndexTuple { + constexpr IndexTuple() : head(), others() { } + constexpr IndexTuple(const T& v, const O... o) : head(v), others(o...) { } + + constexpr static int count = 1 + sizeof...(O); + T head; + IndexTuple others; + typedef T Head; + typedef IndexTuple Other; +}; + +template + struct IndexTuple { + constexpr IndexTuple() : head() { } + constexpr IndexTuple(const T& v) : head(v) { } + + constexpr static int count = 1; + T head; + typedef T Head; +}; + + +template +struct IndexTupleExtractor; + +template +struct IndexTupleExtractor { + + typedef typename IndexTupleExtractor::ValType ValType; + + static constexpr ValType& get_val(IndexTuple& val) { + return IndexTupleExtractor::get_val(val.others); + } + + static constexpr const ValType& get_val(const IndexTuple& val) { + return IndexTupleExtractor::get_val(val.others); + } + template + static void set_val(IndexTuple& val, V& new_val) { + IndexTupleExtractor::set_val(val.others, new_val); + } + +}; + + template + struct IndexTupleExtractor<0, T, O...> { + + typedef T ValType; + + static constexpr ValType& get_val(IndexTuple& val) { + return val.head; + } + static constexpr const ValType& get_val(const IndexTuple& val) { + return val.head; + } + template + static void set_val(IndexTuple& val, V& new_val) { + val.head = new_val; + } +}; + + + +template +constexpr typename IndexTupleExtractor::ValType& array_get(IndexTuple& tuple) { + return IndexTupleExtractor::get_val(tuple); +} +template +constexpr const typename IndexTupleExtractor::ValType& array_get(const IndexTuple& tuple) { + return IndexTupleExtractor::get_val(tuple); +} +template + struct array_size > { + static const size_t value = IndexTuple::count; +}; +template + struct array_size > { + static const size_t value = IndexTuple::count; +}; + + + + template struct tuple_coeff { template - static constexpr DenseIndex get(const DenseIndex i, const std::tuple& t) { - return std::get(t) * (i == Idx) + tuple_coeff::get(i, t) * (i != Idx); + static constexpr DenseIndex get(const DenseIndex i, const IndexTuple& t) { + return array_get(t) * (i == Idx) + tuple_coeff::get(i, t) * (i != Idx); } template - static void set(const DenseIndex i, std::tuple& t, const DenseIndex value) { + static void set(const DenseIndex i, IndexTuple& t, const DenseIndex value) { if (i == Idx) { - update_value(std::get(t), value); + update_value(array_get(t), value); } else { tuple_coeff::set(i, t, value); } } template - static constexpr bool value_known_statically(const DenseIndex i, const std::tuple& t) { - return ((i == Idx) & is_compile_time_constant >::type>::value) || + static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple& t) { + return ((i == Idx) & is_compile_time_constant::ValType>::value) || tuple_coeff::value_known_statically(i, t); } template - static constexpr bool values_up_to_known_statically(const std::tuple& t) { - return is_compile_time_constant >::type>::value && + static constexpr bool values_up_to_known_statically(const IndexTuple& t) { + return is_compile_time_constant::ValType>::value && tuple_coeff::values_up_to_known_statically(t); } template - static constexpr bool values_up_to_statically_known_to_increase(const std::tuple& t) { - return is_compile_time_constant >::type>::value && - is_compile_time_constant >::type>::value && - std::get(t) > std::get(t) && + static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple& t) { + return is_compile_time_constant::ValType>::value && + is_compile_time_constant::ValType>::value && + array_get(t) > array_get(t) && tuple_coeff::values_up_to_statically_known_to_increase(t); } }; @@ -116,62 +206,66 @@ struct tuple_coeff { template <> struct tuple_coeff<0> { template - static constexpr DenseIndex get(const DenseIndex i, const std::tuple& t) { + static constexpr DenseIndex get(const DenseIndex i, const IndexTuple& t) { // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr - return std::get<0>(t) * (i == 0); + return array_get<0>(t) * (i == 0); } template - static void set(const DenseIndex i, std::tuple& t, const DenseIndex value) { + static void set(const DenseIndex i, IndexTuple& t, const DenseIndex value) { eigen_assert (i == 0); - update_value(std::get<0>(t), value); + update_value(array_get<0>(t), value); } template - static constexpr bool value_known_statically(const DenseIndex i, const std::tuple&) { - // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr - return is_compile_time_constant >::type>::value & (i == 0); + static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple&) { + return is_compile_time_constant::ValType>::value & (i == 0); } template - static constexpr bool values_up_to_known_statically(const std::tuple&) { - return is_compile_time_constant >::type>::value; + static constexpr bool values_up_to_known_statically(const IndexTuple&) { + return is_compile_time_constant::ValType>::value; } template - static constexpr bool values_up_to_statically_known_to_increase(const std::tuple&) { + static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple&) { return true; } }; } // namespace internal + template -struct IndexList : std::tuple { + struct IndexList : internal::IndexTuple { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex operator[] (const DenseIndex i) const { - return internal::tuple_coeff >::value-1>::get(i, *this); + return internal::tuple_coeff >::value-1>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex get(const DenseIndex i) const { + return internal::tuple_coeff >::value-1>::get(i, *this); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const DenseIndex value) { - return internal::tuple_coeff >::value-1>::set(i, *this, value); + return internal::tuple_coeff >::value-1>::set(i, *this, value); } - constexpr IndexList(const std::tuple& other) : std::tuple(other) { } - constexpr IndexList() : std::tuple() { } + constexpr IndexList(const internal::IndexTuple& other) : internal::IndexTuple(other) { } + constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple(first, other...) { } + constexpr IndexList() : internal::IndexTuple() { } constexpr bool value_known_statically(const DenseIndex i) const { - return internal::tuple_coeff >::value-1>::value_known_statically(i, *this); + return internal::tuple_coeff >::value-1>::value_known_statically(i, *this); } constexpr bool all_values_known_statically() const { - return internal::tuple_coeff >::value-1>::values_up_to_known_statically(*this); + return internal::tuple_coeff >::value-1>::values_up_to_known_statically(*this); } constexpr bool values_statically_known_to_increase() const { - return internal::tuple_coeff >::value-1>::values_up_to_statically_known_to_increase(*this); + return internal::tuple_coeff >::value-1>::values_up_to_statically_known_to_increase(*this); } }; template constexpr IndexList make_index_list(FirstType val1, OtherTypes... other_vals) { - return std::make_tuple(val1, other_vals...); + return IndexList(val1, other_vals...); } @@ -186,172 +280,178 @@ template size_t array_prod(const Ind } template struct array_size > { - static const size_t value = std::tuple_size >::value; + static const size_t value = array_size >::value; }; template struct array_size > { - static const size_t value = std::tuple_size >::value; + static const size_t value = array_size >::value; }; -template constexpr DenseIndex array_get(IndexList& a) { - return std::get(a); +template constexpr DenseIndex array_get(IndexList& a) { + return IndexTupleExtractor::get_val(a); } -template constexpr DenseIndex array_get(const IndexList& a) { - return std::get(a); +template constexpr DenseIndex array_get(const IndexList& a) { + return IndexTupleExtractor::get_val(a); } template -struct index_known_statically { - constexpr bool operator() (DenseIndex) const { +struct index_known_statically_impl { + static constexpr bool run(const DenseIndex) { return false; } }; template -struct index_known_statically > { - constexpr bool operator() (const DenseIndex i) const { +struct index_known_statically_impl > { + static constexpr bool run(const DenseIndex i) { return IndexList().value_known_statically(i); } }; template -struct index_known_statically > { - constexpr bool operator() (const DenseIndex i) const { +struct index_known_statically_impl > { + static constexpr bool run(const DenseIndex i) { return IndexList().value_known_statically(i); } }; + template -struct all_indices_known_statically { - constexpr bool operator() () const { +struct all_indices_known_statically_impl { + static constexpr bool run() { return false; } }; template -struct all_indices_known_statically > { - constexpr bool operator() () const { +struct all_indices_known_statically_impl > { + static constexpr bool run() { return IndexList().all_values_known_statically(); } }; template -struct all_indices_known_statically > { - constexpr bool operator() () const { +struct all_indices_known_statically_impl > { + static constexpr bool run() { return IndexList().all_values_known_statically(); } }; + template -struct indices_statically_known_to_increase { - constexpr bool operator() () const { +struct indices_statically_known_to_increase_impl { + static constexpr bool run() { return false; } }; template -struct indices_statically_known_to_increase > { - constexpr bool operator() () const { - return IndexList().values_statically_known_to_increase(); + struct indices_statically_known_to_increase_impl > { + static constexpr bool run() { + return Eigen::IndexList().values_statically_known_to_increase(); } }; template -struct indices_statically_known_to_increase > { - constexpr bool operator() () const { - return IndexList().values_statically_known_to_increase(); + struct indices_statically_known_to_increase_impl > { + static constexpr bool run() { + return Eigen::IndexList().values_statically_known_to_increase(); } }; + template -struct index_statically_eq { - constexpr bool operator() (DenseIndex, DenseIndex) const { +struct index_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { return false; } }; template -struct index_statically_eq > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & - (IndexList()[i] == value); + (IndexList().get(i) == value); } }; template -struct index_statically_eq > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & - (IndexList()[i] == value); - } -}; - -template -struct index_statically_ne { - constexpr bool operator() (DenseIndex, DenseIndex) const { - return false; - } -}; - -template -struct index_statically_ne > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return IndexList().value_known_statically(i) & - (IndexList()[i] != value); - } -}; - -template -struct index_statically_ne > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return IndexList().value_known_statically(i) & - (IndexList()[i] != value); + (IndexList().get(i) == value); } }; template -struct index_statically_gt { - constexpr bool operator() (DenseIndex, DenseIndex) const { - return false; +struct index_statically_ne_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; } }; template -struct index_statically_gt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & - (IndexList()[i] > value); + (IndexList().get(i) != value); } }; template -struct index_statically_gt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & - (IndexList()[i] > value); + (IndexList().get(i) != value); } }; + template -struct index_statically_lt { - constexpr bool operator() (DenseIndex, DenseIndex) const { - return false; +struct index_statically_gt_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; } }; template -struct index_statically_lt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & - (IndexList()[i] < value); + (IndexList().get(i) > value); } }; template -struct index_statically_lt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & - (IndexList()[i] < value); + (IndexList().get(i) > value); + } +}; + + + +template +struct index_statically_lt_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) < value); + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) < value); } }; @@ -363,52 +463,51 @@ struct index_statically_lt > { namespace Eigen { namespace internal { -// No C++11 support template -struct index_known_statically { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex) const{ +struct index_known_statically_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const DenseIndex) { return false; } }; template -struct all_indices_known_statically { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() () const { +struct all_indices_known_statically_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run() { return false; } }; template -struct indices_statically_known_to_increase { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() () const { +struct indices_statically_known_to_increase_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run() { return false; } }; template -struct index_statically_eq { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ +struct index_statically_eq_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { return false; } }; template -struct index_statically_ne { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ +struct index_statically_ne_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { return false; } }; template -struct index_statically_gt { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ +struct index_statically_gt_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { return false; } }; template -struct index_statically_lt { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ +struct index_statically_lt_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { return false; } }; @@ -418,4 +517,46 @@ struct index_statically_lt { #endif + +namespace Eigen { +namespace internal { +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_known_statically(DenseIndex i) { + return index_known_statically_impl::run(i); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool all_indices_known_statically() { + return all_indices_known_statically_impl::run(); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool indices_statically_known_to_increase() { + return indices_statically_known_to_increase_impl::run(); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_eq(DenseIndex i, DenseIndex value) { + return index_statically_eq_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_ne(DenseIndex i, DenseIndex value) { + return index_statically_ne_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_gt(DenseIndex i, DenseIndex value) { + return index_statically_gt_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_lt(DenseIndex i, DenseIndex value) { + return index_statically_lt_impl::run(i, value); +} + +} // end namespace internal +} // end namespace Eigen + + #endif // EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h index 4303e3536..ad2a1e6ac 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h @@ -55,6 +55,18 @@ struct Initializer { } }; +template +struct Initializer { + typedef typename traits::Scalar InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>*/* indices*/, + const InitList& v) { + tensor.coeffRef(0) = v; + } +}; + + template void initialize_tensor(TensorEvaluator& tensor, const typename Initializer::NumDimensions>::InitList& vals) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index fd2441894..b58173e58 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -34,10 +34,7 @@ namespace { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val) { #ifdef __CUDA_ARCH__ - if (sizeof(T) == 8) { - return __clzll(val); - } - return __clz(val); + return (sizeof(T) == 8) ? __clzll(val) : __clz(val); #elif EIGEN_COMP_MSVC DWORD leading_zeros = 0; if (sizeof(T) == 8) { @@ -46,11 +43,11 @@ namespace { else { _BitScanReverse(&leading_zero, val); } + return leading_zeros; #else - if (sizeof(T) == 8) { - return __builtin_clzl(static_cast(val)); - } - return __builtin_clz(static_cast(val)); + return (sizeof(T) == 8) ? + __builtin_clzl(static_cast(val)) : + __builtin_clz(static_cast(val)); #endif } @@ -61,13 +58,8 @@ namespace { template struct DividerTraits { -#if defined(__SIZEOF_INT128__) && !defined(__CUDACC__) typedef typename UnsignedTraits::type type; static const int N = sizeof(T) * 8; -#else - typedef uint32_t type; - static const int N = 32; -#endif }; template @@ -79,44 +71,42 @@ namespace { #endif } -#if defined(__CUDA_ARCH__) - template - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { - return __umul64hi(a, b); - } -#else template - EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { -#if defined(__SIZEOF_INT128__) && !defined(__CUDACC__) + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { +#if defined(__CUDA_ARCH__) + return __umul64hi(a, b); +#elif defined(__SIZEOF_INT128__) __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); return static_cast(v >> 64); #else - EIGEN_STATIC_ASSERT(sizeof(T) == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); - return (a * b) >> 32; + return (TensorUInt128, uint64_t>(a) * TensorUInt128, uint64_t>(b)).upper(); #endif } -#endif template struct DividerHelper { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier (const int log_div, const T divider) { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) { EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE); return static_cast((static_cast(1) << (N+log_div)) / divider - (static_cast(1) << N) + 1); } }; -#if defined(__SIZEOF_INT128__) && !defined(__CUDACC__) template struct DividerHelper<64, T> { - static EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { +#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) return static_cast((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); +#else + const uint64_t shift = 1ULL << log_div; + TensorUInt128 result = (TensorUInt128 >(shift, 0) / TensorUInt128, uint64_t>(divider) - TensorUInt128, static_val<0> >(1, 0) + TensorUInt128, static_val<1> >(1)); + return static_cast(result); +#endif } }; -#endif } -template +template struct TensorIntDivisor { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { @@ -166,8 +156,9 @@ struct TensorIntDivisor { // Optimized version for signed 32 bit integers. // Derived from Hacker's Delight. +// Only works for divisors strictly greater than one template <> -class TensorIntDivisor { +class TensorIntDivisor { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { magic = 0; @@ -226,8 +217,8 @@ private: }; -template -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor& divisor) { +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor& divisor) { return divisor.divide(numerator); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h new file mode 100644 index 000000000..8ed71f838 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h @@ -0,0 +1,54 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H +#define EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H + + +/** use this macro in sfinae selection in templated functions + * + * template::value , int >::type = 0 + * > + * void foo(){} + * + * becomes => + * + * template::value ) + * > + * void foo(){} + */ + +// SFINAE requires variadic templates +#ifndef __CUDACC__ +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES + // SFINAE doesn't work for gcc <= 4.7 + #ifdef EIGEN_COMP_GNUC + #if EIGEN_GNUC_AT_LEAST(4,8) + #define EIGEN_HAS_SFINAE + #endif + #else + #define EIGEN_HAS_SFINAE + #endif +#endif +#endif + +#define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \ + typename internal::enable_if< ( __condition__ ) , int >::type = 0 + + +#if defined(EIGEN_HAS_CONSTEXPR) +#define EIGEN_CONSTEXPR constexpr +#else +#define EIGEN_CONSTEXPR +#endif + + +#endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 2cb2bc7a6..5c759af09 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -49,9 +49,15 @@ template class TensorMap : public Tensor IsAligned = ((int(Options_)&Aligned)==Aligned), PacketAccess = (internal::packet_traits::size > 1), Layout = PlainObjectType::Layout, - CoordAccess = true, + CoordAccess = true }; + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr), m_dimensions() { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } + #ifdef EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) { @@ -82,15 +88,19 @@ template class TensorMap : public Tensor } #endif - inline TensorMap(PointerArgType dataPtr, const array& dimensions) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const array& dimensions) : m_data(dataPtr), m_dimensions(dimensions) { } template - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions) : m_data(dataPtr), m_dimensions(dimensions) { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PlainObjectType& tensor) + : m_data(tensor.data()), m_dimensions(tensor.dimensions()) + { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return m_dimensions.rank(); } EIGEN_DEVICE_FUNC @@ -117,11 +127,18 @@ template class TensorMap : public Tensor } } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) + return m_data[0]; + } + #ifdef EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const { - static_assert(sizeof...(otherIndices) + 1 == NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) if (PlainObjectType::Options&RowMajor) { const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, otherIndices...}}); return m_data[index]; @@ -141,7 +158,7 @@ template class TensorMap : public Tensor EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const { if (PlainObjectType::Options&RowMajor) { - const Index index = i1 + i0 * m_dimensions[0]; + const Index index = i1 + i0 * m_dimensions[1]; return m_data[index]; } else { const Index index = i0 + i1 * m_dimensions[0]; @@ -152,7 +169,7 @@ template class TensorMap : public Tensor EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const { if (PlainObjectType::Options&RowMajor) { - const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0); + const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); return m_data[index]; } else { const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); @@ -196,6 +213,13 @@ template class TensorMap : public Tensor } } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) + return m_data[0]; + } + #ifdef EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) @@ -221,7 +245,7 @@ template class TensorMap : public Tensor EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) { if (PlainObjectType::Options&RowMajor) { - const Index index = i1 + i0 * m_dimensions[0]; + const Index index = i1 + i0 * m_dimensions[1]; return m_data[index]; } else { const Index index = i0 + i1 * m_dimensions[0]; @@ -232,7 +256,7 @@ template class TensorMap : public Tensor EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) { if (PlainObjectType::Options&RowMajor) { - const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0); + const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); return m_data[index]; } else { const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 7dfa04760..f28a9699d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -32,14 +32,29 @@ template <> struct max_n_1<0> { }; +// Default packet types +template +struct PacketType { + typedef typename internal::packet_traits::type type; + static const int size = internal::unpacket_traits::size; +}; - -#if defined(EIGEN_HAS_CONSTEXPR) -#define EIGEN_CONSTEXPR constexpr -#else -#define EIGEN_CONSTEXPR +// For CUDA packet types when using a GpuDevice +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +template <> +struct PacketType { + typedef float4 type; + static const int size = 4; +}; +template <> +struct PacketType { + typedef double2 type; + static const int size = 2; +}; #endif + + // Tuple mimics std::pair but works on e.g. nvcc. template struct Tuple { public: @@ -83,7 +98,55 @@ bool operator!=(const Tuple& x, const Tuple& y) { return !(x == y); } -#undef EIGEN_CONSTEXPR + + +#ifdef EIGEN_HAS_SFINAE +namespace internal { + + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType& idx, numeric_list) { + return { idx[Is]... }; + } + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType&, numeric_list) { + return array(); + } + + /** Make an array (for index/dimensions) out of a custom index */ + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType& idx) { + return customIndices2Array(idx, typename gen_numeric_list::type{}); + } + + + template + struct is_base_of + { + + typedef char (&yes)[1]; + typedef char (&no)[2]; + + template + struct Host + { + operator BB*() const; + operator DD*(); + }; + + template + static yes check(D*, T); + static no check(B*, int); + + static const bool value = sizeof(check(Host(), int())) == sizeof(yes); + }; + +} +#endif + + } // namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index bdc86e0fa..d8c923d74 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -443,7 +443,7 @@ struct TensorEvaluator, Devi return rslt; } else { - typename internal::remove_const::type values[packetSize]; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; values[0] = m_impl.coeff(inputIndices[0]); values[packetSize-1] = m_impl.coeff(inputIndices[1]); for (int i = 1; i < packetSize-1; ++i) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 07a6e8d4c..91e32d200 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -98,6 +98,11 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_padding(op.padding()) { + // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead + // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector + // of 1 element first and then pad. + EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + // Compute dimensions m_dimensions = m_impl.dimensions(); for (int i = 0; i < NumDims; ++i) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 1d22843af..aaa877185 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -64,10 +64,10 @@ template struct DimInitializer { } }; -template <> struct DimInitializer > { +template <> struct DimInitializer > { template EIGEN_DEVICE_FUNC static void run(const InputDims& input_dims, const array&, - Sizes<1>*, array* reduced_dims) { + Sizes<>*, array* reduced_dims) { const int NumInputDims = internal::array_size::value; for (int i = 0; i < NumInputDims; ++i) { (*reduced_dims)[i] = input_dims[i]; @@ -88,30 +88,30 @@ struct preserve_inner_most_dims { #if defined(EIGEN_HAS_CONSTEXPR) && defined(EIGEN_HAS_VARIADIC_TEMPLATES) template struct are_inner_most_dims{ - static const bool tmp1 = indices_statically_known_to_increase()(); - static const bool tmp2 = index_statically_eq()(0, 0); - static const bool tmp3 = index_statically_eq()(array_size::value-1, array_size::value-1); + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_eq(0, 0); + static const bool tmp3 = index_statically_eq(array_size::value-1, array_size::value-1); static const bool value = tmp1 & tmp2 & tmp3; }; template struct are_inner_most_dims{ - static const bool tmp1 = indices_statically_known_to_increase()(); - static const bool tmp2 = index_statically_eq()(0, NumTensorDims - array_size::value); - static const bool tmp3 = index_statically_eq()(array_size::value - 1, NumTensorDims - 1); + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_eq(0, NumTensorDims - array_size::value); + static const bool tmp3 = index_statically_eq(array_size::value - 1, NumTensorDims - 1); static const bool value = tmp1 & tmp2 & tmp3; }; template struct preserve_inner_most_dims{ - static const bool tmp1 = indices_statically_known_to_increase()(); - static const bool tmp2 = index_statically_gt()(0, 0); + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_gt(0, 0); static const bool value = tmp1 & tmp2; }; template struct preserve_inner_most_dims{ - static const bool tmp1 = indices_statically_known_to_increase()(); - static const bool tmp2 = index_statically_lt()(array_size::value - 1, NumTensorDims - 1); + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_lt(array_size::value - 1, NumTensorDims - 1); static const bool value = tmp1 & tmp2; }; #endif @@ -136,6 +136,12 @@ struct GenericDimReducer<0, Self, Op> { } } }; +template +struct GenericDimReducer<-1, Self, Op> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index index, Op& reducer, typename Self::CoeffReturnType* accum) { + reducer.reduce(self.m_impl.coeff(index), accum); + } +}; template struct InnerMostDimReducer { @@ -192,6 +198,12 @@ struct InnerMostDimPreserver<0, Self, Op, true> { } } }; +template +struct InnerMostDimPreserver<-1, Self, Op, true> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { + eigen_assert(false && "should never be called"); + } +}; // Default full reducer template @@ -326,184 +338,9 @@ struct FullReducer { #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) -// Full reducers for GPU, don't vectorize for now - -// Reducer function that enables multiple cuda thread to safely accumulate at the same -// output address. It basically reads the current value of the output variable, and -// attempts to update it with the new value. If in the meantime another cuda thread -// updated the content of the output address it will try again. -template -__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { -#if __CUDA_ARCH__ >= 300 - if (sizeof(T) == 4) - { - unsigned int oldval = *reinterpret_cast(output); - unsigned int newval = oldval; - reducer.reduce(accum, reinterpret_cast(&newval)); - if (newval == oldval) { - return; - } - unsigned int readback; - while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { - oldval = readback; - newval = oldval; - reducer.reduce(accum, reinterpret_cast(&newval)); - if (newval == oldval) { - return; - } - } - } - else if (sizeof(T) == 8) { - unsigned long long oldval = *reinterpret_cast(output); - unsigned long long newval = oldval; - reducer.reduce(accum, reinterpret_cast(&newval)); - if (newval == oldval) { - return; - } - unsigned long long readback; - while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { - oldval = readback; - newval = oldval; - reducer.reduce(accum, reinterpret_cast(&newval)); - if (newval == oldval) { - return; - } - } - } - else { - assert(0 && "Wordsize not supported"); - } -#else - assert(0 && "Shouldn't be called on unsupported device"); +template +__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); #endif -} - -template -__device__ inline void atomicReduce(T* output, T accum, SumReducer&) { -#if __CUDA_ARCH__ >= 300 - atomicAdd(output, accum); -#else - assert(0 && "Shouldn't be called on unsupported device"); -#endif -} - -template -__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, - typename Self::CoeffReturnType* output) { - const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; - - if (first_index == 0) { - *output = reducer.initialize(); - } - - typename Self::CoeffReturnType accum = reducer.initialize(); - for (Index i = 0; i < NumPerThread; ++i) { - const Index index = first_index + i * BlockSize; - if (index >= num_coeffs) { - break; - } - typename Self::CoeffReturnType val = input.m_impl.coeff(index); - reducer.reduce(val, &accum); - } - - for (int offset = warpSize/2; offset > 0; offset /= 2) { - reducer.reduce(__shfl_down(accum, offset), &accum); - } - - if ((threadIdx.x & (warpSize - 1)) == 0) { - atomicReduce(output, accum, reducer); - } -} - - -template -struct FullReducer { - // Unfortunately nvidia doesn't support well exotic types such as complex, - // so reduce the scope of the optimized version of the code to the simple case - // of floats. - static const bool HasOptimizedImplementation = !Op::IsStateful && - internal::is_same::value; - - template - static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { - assert(false && "Should only be called on floats"); - } - - static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) { - typedef typename Self::Index Index; - - const Index num_coeffs = array_prod(self.m_impl.dimensions()); - const int block_size = 256; - const int num_per_thread = 128; - const int num_blocks = std::ceil(static_cast(num_coeffs) / (block_size * num_per_thread)); - LAUNCH_CUDA_KERNEL((FullReductionKernel), - num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); - } -}; - -#endif - - -template -class BlockReducer { - public: - typedef typename Self::Index Index; - typedef typename Self::Scalar Scalar; - typedef typename Self::CoeffReturnType CoeffReturnType; - explicit BlockReducer(const Op& reducer) : op_(reducer) { - accum_ = op_.initialize(); - } - void Reduce(Index index, Index num_values_to_reduce, Scalar* data) { - for (Index i = 0; i < num_values_to_reduce; ++i) { - op_.reduce(data[index + i], &accum_); - } - } - CoeffReturnType Finalize() { - return op_.finalize(accum_); - } - - private: - CoeffReturnType accum_; - Op op_; -}; - - -template -class BlockReducer { - public: - typedef typename Self::Index Index; - typedef typename Self::Scalar Scalar; - typedef typename Self::CoeffReturnType CoeffReturnType; - typedef typename Self::PacketReturnType PacketReturnType; - explicit BlockReducer(const Op& reducer) : op_(reducer) { - vaccum_ = op_.template initializePacket(); - accum_ = op_.initialize(); - } - void Reduce(Index index, Index num_values_to_reduce, Scalar* data) { - const int packet_size = internal::unpacket_traits::size; - const typename Self::Index vectorized_size = (num_values_to_reduce / - packet_size) * packet_size; - for (typename Self::Index i = 0; i < vectorized_size; i += packet_size) { - op_.reducePacket(internal::ploadt( - &data[index + i]), &vaccum_); - } - - for (typename Self::Index i = vectorized_size; - i < num_values_to_reduce; ++i) { - op_.reduce(data[index + i], &accum_); - } - } - typename Self::CoeffReturnType Finalize() { - return op_.finalizeBoth(accum_, vaccum_); - } - - private: - typename Self::PacketReturnType vaccum_; - typename Self::CoeffReturnType accum_; - Op op_; -}; } // end namespace internal @@ -550,8 +387,8 @@ struct TensorEvaluator, Device> typedef typename TensorEvaluator::Dimensions InputDimensions; static const int NumInputDims = internal::array_size::value; static const int NumReducedDims = internal::array_size::value; - static const int NumOutputDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims; - typedef typename internal::conditional, DSizes >::type Dimensions; + static const int NumOutputDims = NumInputDims - NumReducedDims; + typedef typename internal::conditional, DSizes >::type Dimensions; typedef typename XprType::Scalar Scalar; typedef TensorEvaluator, Device> Self; static const bool InputPacketAccess = TensorEvaluator::PacketAccess; @@ -565,7 +402,7 @@ struct TensorEvaluator, Device> static const bool ReducingInnerMostDims = internal::are_inner_most_dims::value; static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims::value; - static const bool RunningFullReduction = (NumInputDims==NumReducedDims); + static const bool RunningFullReduction = (NumOutputDims==0); EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) @@ -589,47 +426,50 @@ struct TensorEvaluator, Device> internal::DimInitializer::run(input_dims, reduced, &m_dimensions, &m_reducedDims); // Precompute output strides. - if (static_cast(Layout) == static_cast(ColMajor)) { - m_outputStrides[0] = 1; - for (int i = 1; i < NumOutputDims; ++i) { - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - } - } else { - m_outputStrides[NumOutputDims - 1] = 1; - for (int i = NumOutputDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - } - } - - // Precompute input strides. - array input_strides; - if (static_cast(Layout) == static_cast(ColMajor)) { - input_strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - input_strides[i] = input_strides[i-1] * input_dims[i-1]; - } - } else { - input_strides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; - } - } - - int outputIndex = 0; - int reduceIndex = 0; - for (int i = 0; i < NumInputDims; ++i) { - if (reduced[i]) { - m_reducedStrides[reduceIndex] = input_strides[i]; - ++reduceIndex; + if (NumOutputDims > 0) { + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + for (int i = 1; i < NumOutputDims; ++i) { + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + } } else { - m_preservedStrides[outputIndex] = input_strides[i]; - ++outputIndex; + m_outputStrides[NumOutputDims - 1] = 1; + for (int i = NumOutputDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + } + } + } + + // Precompute input strides. + if (NumInputDims > 0) { + array input_strides; + if (static_cast(Layout) == static_cast(ColMajor)) { + input_strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + input_strides[i] = input_strides[i-1] * input_dims[i-1]; + } + } else { + input_strides[NumInputDims - 1] = 1; + for (int i = NumInputDims - 2; i >= 0; --i) { + input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; + } + } + + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (reduced[i]) { + m_reducedStrides[reduceIndex] = input_strides[i]; + ++reduceIndex; + } else { + m_preservedStrides[outputIndex] = input_strides[i]; + ++outputIndex; + } } } // Special case for full reductions - if (NumInputDims == NumReducedDims) { - eigen_assert(m_dimensions[0] == 1); + if (NumOutputDims == 0) { m_preservedStrides[0] = internal::array_prod(input_dims); } } @@ -639,7 +479,7 @@ struct TensorEvaluator, Device> typedef typename internal::remove_const::type CoeffReturnType; typedef typename internal::remove_const::type PacketReturnType; - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { m_impl.evalSubExprsIfNeeded(NULL); // Use the FullReducer if possible. @@ -674,9 +514,9 @@ struct TensorEvaluator, Device> return *m_result; } Op reducer(m_reducer); - if (ReducingInnerMostDims) { + if (ReducingInnerMostDims || RunningFullReduction) { const Index num_values_to_reduce = - (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; return internal::InnerMostDimReducer::reduce(*this, firstInput(index), num_values_to_reduce, reducer); } else { @@ -697,7 +537,7 @@ struct TensorEvaluator, Device> EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; if (ReducingInnerMostDims) { const Index num_values_to_reduce = - (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; const Index firstIndex = firstInput(index); for (Index i = 0; i < packetSize; ++i) { Op reducer(m_reducer); @@ -748,7 +588,7 @@ struct TensorEvaluator, Device> if (static_cast(Layout) == static_cast(ColMajor)) { return index * m_preservedStrides[0]; } else { - return index * m_preservedStrides[NumOutputDims - 1]; + return index * m_preservedStrides[NumPreservedStrides - 1]; } } // TBD: optimize the case where we preserve the innermost dimensions. @@ -774,10 +614,10 @@ struct TensorEvaluator, Device> index -= idx * m_outputStrides[i]; } if (PreservingInnerMostDims) { - eigen_assert(m_preservedStrides[NumOutputDims - 1] == 1); + eigen_assert(m_preservedStrides[NumPreservedStrides - 1] == 1); startInput += index; } else { - startInput += index * m_preservedStrides[NumOutputDims - 1]; + startInput += index * m_preservedStrides[NumPreservedStrides - 1]; } } return startInput; @@ -789,7 +629,8 @@ struct TensorEvaluator, Device> array m_outputStrides; // Subset of strides of the input tensor for the non-reduced dimensions. // Indexed by output dimensions. - array m_preservedStrides; + static const int NumPreservedStrides = max_n_1::size; + array m_preservedStrides; // Subset of strides of the input tensor for the reduced dimensions. // Indexed by reduced dimensions. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h new file mode 100644 index 000000000..af1b9432c --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -0,0 +1,140 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H + +namespace Eigen { +namespace internal { + + +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +// Full reducers for GPU, don't vectorize for now + +// Reducer function that enables multiple cuda thread to safely accumulate at the same +// output address. It basically reads the current value of the output variable, and +// attempts to update it with the new value. If in the meantime another cuda thread +// updated the content of the output address it will try again. +template +__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { +#if __CUDA_ARCH__ >= 300 + if (sizeof(T) == 4) + { + unsigned int oldval = *reinterpret_cast(output); + unsigned int newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned int readback; + while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else if (sizeof(T) == 8) { + unsigned long long oldval = *reinterpret_cast(output); + unsigned long long newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned long long readback; + while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else { + assert(0 && "Wordsize not supported"); + } +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif +} + +template +__device__ inline void atomicReduce(T* output, T accum, SumReducer&) { +#if __CUDA_ARCH__ >= 300 + atomicAdd(output, accum); +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif +} + +template +__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, + typename Self::CoeffReturnType* output) { + const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; + + if (first_index == 0) { + *output = reducer.initialize(); + } + + typename Self::CoeffReturnType accum = reducer.initialize(); + Index max_iter = numext::mini(num_coeffs - first_index, NumPerThread*BlockSize); + for (Index i = 0; i < max_iter; i+=BlockSize) { + const Index index = first_index + i; + eigen_assert(index < num_coeffs); + typename Self::CoeffReturnType val = input.m_impl.coeff(index); + reducer.reduce(val, &accum); + } + +#pragma unroll + for (int offset = warpSize/2; offset > 0; offset /= 2) { + reducer.reduce(__shfl_down(accum, offset), &accum); + } + + if ((threadIdx.x & (warpSize - 1)) == 0) { + atomicReduce(output, accum, reducer); + } +} + + +template +struct FullReducer { + // Unfortunately nvidia doesn't support well exotic types such as complex, + // so reduce the scope of the optimized version of the code to the simple case + // of floats. + static const bool HasOptimizedImplementation = !Op::IsStateful && + internal::is_same::value; + + template + EIGEN_DEVICE_FUNC static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { + assert(false && "Should only be called on floats"); + } + + EIGEN_DEVICE_FUNC static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) { + typedef typename Self::Index Index; + + const Index num_coeffs = array_prod(self.m_impl.dimensions()); + const int block_size = 256; + const int num_per_thread = 128; + const int num_blocks = std::ceil(static_cast(num_coeffs) / (block_size * num_per_thread)); + LAUNCH_CUDA_KERNEL((FullReductionKernel), + num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); + } +}; + +#endif + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index e092c0e04..10328c61f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -66,7 +66,7 @@ class TensorReverseOp : public TensorBase, Device const Device& device) : m_impl(op.expression(), device), m_reverse(op.reverse()) { + // Reversing a scalar isn't supported yet. It would be a no-op anyway. + EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + // Compute strides m_dimensions = m_impl.dimensions(); if (static_cast(Layout) == static_cast(ColMajor)) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index 9e4cf039d..98631fc7f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -63,7 +63,7 @@ class TensorStorage // pure dynamic -template +template class TensorStorage, Options_> { public: @@ -71,7 +71,11 @@ class TensorStorage, Options_> typedef DSizes Dimensions; typedef TensorStorage, Options_> Self; - EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() {} + EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() { + if (NumIndices_ == 0) { + m_data = internal::conditional_aligned_new_auto(1); + } + } EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_dimensions(internal::template repeat(0)) {} EIGEN_DEVICE_FUNC TensorStorage(Index size, const array& dimensions) @@ -101,13 +105,17 @@ class TensorStorage, Options_> EIGEN_DEVICE_FUNC void resize(Index size, const array& nbDimensions) { + eigen_assert(size >= 1); const Index currentSz = internal::array_prod(m_dimensions); if(size != currentSz) { internal::conditional_aligned_delete_auto(m_data, currentSz); if (size) m_data = internal::conditional_aligned_new_auto(size); - else + else if (NumIndices_ == 0) { + m_data = internal::conditional_aligned_new_auto(1); + } + else m_data = 0; EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 8f1c02ea4..7a9568b36 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -44,7 +44,7 @@ class compute_tensor_flags }; -template +template struct traits > { typedef Scalar_ Scalar; @@ -107,13 +107,13 @@ struct traits > }; -template +template struct eval, Eigen::Dense> { typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; }; -template +template struct eval, Eigen::Dense> { typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; @@ -161,13 +161,13 @@ template struct nested typedef typename ref_selector::type type; }; -template +template struct nested > { typedef const Tensor& type; }; -template +template struct nested > { typedef const Tensor& type; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h new file mode 100644 index 000000000..f5cca0ad7 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h @@ -0,0 +1,233 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H +#define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H + +namespace Eigen { +namespace internal { + + +template +struct static_val { + static const uint64_t value = n; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { } + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) { + eigen_assert(v == n); + } +}; + + +template +struct TensorUInt128 +{ + HIGH high; + LOW low; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(int x) : high(0), low(x) { + eigen_assert(x >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(int64_t x) : high(0), low(x) { + eigen_assert(x >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(uint64_t x) : high(0), low(x) { } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(uint64_t y, uint64_t x) : high(y), low(x) { } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const { + return low; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const { + return low; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const { + return high; + } +}; + + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator == (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + return (lhs.high == rhs.high) & (lhs.low == rhs.low); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator != (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + return (lhs.high != rhs.high) | (lhs.low != rhs.low); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator >= (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + if (lhs.high != rhs.high) { + return lhs.high > rhs.high; + } + return lhs.low >= rhs.low; +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator < (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + if (lhs.high != rhs.high) { + return lhs.high < rhs.high; + } + return lhs.low < rhs.low; +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static TensorUInt128 operator + (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + TensorUInt128 result(lhs.high + rhs.high, lhs.low + rhs.low); + if (result.low < rhs.low) { + result.high += 1; + } + return result; +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static TensorUInt128 operator - (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + TensorUInt128 result(lhs.high - rhs.high, lhs.low - rhs.low); + if (result.low > lhs.low) { + result.high -= 1; + } + return result; +} + + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +static TensorUInt128 operator * (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + // Split each 128-bit integer into 4 32-bit integers, and then do the + // multiplications by hand as follow: + // lhs a b c d + // rhs e f g h + // ----------- + // ah bh ch dh + // bg cg dg + // cf df + // de + // The result is stored in 2 64bit integers, high and low. + + const uint64_t LOW = 0x00000000FFFFFFFFLL; + const uint64_t HIGH = 0xFFFFFFFF00000000LL; + + uint64_t d = lhs.low & LOW; + uint64_t c = (lhs.low & HIGH) >> 32LL; + uint64_t b = lhs.high & LOW; + uint64_t a = (lhs.high & HIGH) >> 32LL; + + uint64_t h = rhs.low & LOW; + uint64_t g = (rhs.low & HIGH) >> 32LL; + uint64_t f = rhs.high & LOW; + uint64_t e = (rhs.high & HIGH) >> 32LL; + + // Compute the low 32 bits of low + uint64_t acc = d * h; + uint64_t low = acc & LOW; + // Compute the high 32 bits of low. Add a carry every time we wrap around + acc >>= 32LL; + uint64_t carry = 0; + uint64_t acc2 = acc + c * h; + if (acc2 < acc) { + carry++; + } + acc = acc2 + d * g; + if (acc < acc2) { + carry++; + } + low |= (acc << 32LL); + + // Carry forward the high bits of acc to initiate the computation of the + // low 32 bits of high + acc2 = (acc >> 32LL) | (carry << 32LL); + carry = 0; + + acc = acc2 + b * h; + if (acc < acc2) { + carry++; + } + acc2 = acc + c * g; + if (acc2 < acc) { + carry++; + } + acc = acc2 + d * f; + if (acc < acc2) { + carry++; + } + uint64_t high = acc & LOW; + + // Start to compute the high 32 bits of high. + acc2 = (acc >> 32LL) | (carry << 32LL); + + acc = acc2 + a * h; + acc2 = acc + b * g; + acc = acc2 + c * f; + acc2 = acc + d * e; + high |= (acc2 << 32LL); + + return TensorUInt128(high, low); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +static TensorUInt128 operator / (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + if (rhs == TensorUInt128, static_val<1> >(1)) { + return TensorUInt128(lhs.high, lhs.low); + } else if (lhs < rhs) { + return TensorUInt128(0); + } else { + // calculate the biggest power of 2 times rhs that's less than or equal to lhs + TensorUInt128 power2(1); + TensorUInt128 d(rhs); + TensorUInt128 tmp(lhs - d); + while (lhs >= d) { + tmp = tmp - d; + d = d + d; + power2 = power2 + power2; + } + + tmp = TensorUInt128(lhs.high, lhs.low); + TensorUInt128 result(0); + while (power2 != TensorUInt128, static_val<0> >(0)) { + if (tmp >= d) { + tmp = tmp - d; + result = result + power2; + } + // Shift right + power2 = TensorUInt128(power2.high >> 1, (power2.low >> 1) | (power2.high << 63)); + d = TensorUInt128(d.high >> 1, (d.low >> 1) | (d.high << 63)); + } + + return result; + } +} + + +} // namespace internal +} // namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers index ff0d59b6e..31e880bdc 100644 --- a/unsupported/Eigen/IterativeSolvers +++ b/unsupported/Eigen/IterativeSolvers @@ -33,7 +33,7 @@ #include "../../Eigen/Jacobi" #include "../../Eigen/Householder" #include "src/IterativeSolvers/GMRES.h" -#include "src/IterativeSolvers/IncompleteCholesky.h" +#include "src/IterativeSolvers/DGMRES.h" //#include "src/IterativeSolvers/SSORPreconditioner.h" #include "src/IterativeSolvers/MINRES.h" diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h old mode 100644 new mode 100755 index 8336c2644..e30ad5b6d --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -99,7 +99,11 @@ class AutoDiffScalar {} template - AutoDiffScalar(const AutoDiffScalar& other) + AutoDiffScalar(const AutoDiffScalar& other +#ifndef EIGEN_PARSED_BY_DOXYGEN + , typename internal::enable_if::value,void*>::type = 0 +#endif + ) : m_value(other.value()), m_derivatives(other.derivatives()) {} @@ -127,6 +131,14 @@ class AutoDiffScalar return *this; } + inline AutoDiffScalar& operator=(const Scalar& other) + { + m_value = other; + if(m_derivatives.size()>0) + m_derivatives.setZero(); + return *this; + } + // inline operator const Scalar& () const { return m_value; } // inline operator Scalar& () { return m_value; } @@ -626,9 +638,10 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(acos, template struct NumTraits > : NumTraits< typename NumTraits::Real > { - typedef AutoDiffScalar::Real,DerType::RowsAtCompileTime,DerType::ColsAtCompileTime> > Real; + typedef AutoDiffScalar::Real,DerType::RowsAtCompileTime,DerType::ColsAtCompileTime, + DerType::Options, DerType::MaxRowsAtCompileTime, DerType::MaxColsAtCompileTime> > Real; typedef AutoDiffScalar NonInteger; - typedef AutoDiffScalar& Nested; + typedef AutoDiffScalar Nested; enum{ RequireInitialization = 1 }; diff --git a/unsupported/Eigen/src/CMakeLists.txt b/unsupported/Eigen/src/CMakeLists.txt index fae1c5854..754953335 100644 --- a/unsupported/Eigen/src/CMakeLists.txt +++ b/unsupported/Eigen/src/CMakeLists.txt @@ -1,5 +1,6 @@ ADD_SUBDIRECTORY(AutoDiff) ADD_SUBDIRECTORY(BVH) +ADD_SUBDIRECTORY(Eigenvalues) ADD_SUBDIRECTORY(FFT) ADD_SUBDIRECTORY(IterativeSolvers) ADD_SUBDIRECTORY(LevenbergMarquardt) diff --git a/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt b/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt new file mode 100644 index 000000000..1d4387c82 --- /dev/null +++ b/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Eigenvalues_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Eigenvalues_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/Eigenvalues COMPONENT Devel + ) diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h index 52eb65a2f..bae04fc30 100644 --- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h @@ -40,7 +40,6 @@ void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType:: { eigen_assert(vec.size() == perm.size()); typedef typename IndexType::Scalar Index; - typedef typename VectorType::Scalar Scalar; bool flag; for (Index k = 0; k < ncut; k++) { @@ -84,6 +83,8 @@ void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType:: * x = solver.solve(b); * \endcode * + * DGMRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * * References : * [1] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid * Algebraic Solvers for Linear Systems Arising from Compressible @@ -101,7 +102,7 @@ template< typename _MatrixType, typename _Preconditioner> class DGMRES : public IterativeSolverBase > { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -112,6 +113,7 @@ class DGMRES : public IterativeSolverBase > typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Index Index; + typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::RealScalar RealScalar; typedef _Preconditioner Preconditioner; typedef Matrix DenseMatrix; @@ -134,8 +136,8 @@ class DGMRES : public IterativeSolverBase > * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - DGMRES(const MatrixType& A) : Base(A),m_restart(30),m_neig(0),m_r(0),m_maxNeig(5),m_isDeflAllocated(false),m_isDeflInitialized(false) - {} + template + explicit DGMRES(const EigenBase& A) : Base(A.derived()), m_restart(30),m_neig(0),m_r(0),m_maxNeig(5),m_isDeflAllocated(false),m_isDeflInitialized(false) {} ~DGMRES() {} @@ -150,7 +152,7 @@ class DGMRES : public IterativeSolverBase > m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - dgmres(mp_matrix, b.col(j), xj, Base::m_preconditioner); + dgmres(matrix(), b.col(j), xj, Base::m_preconditioner); } m_info = failed ? NumericalIssue : m_error <= Base::m_tolerance ? Success @@ -202,7 +204,7 @@ class DGMRES : public IterativeSolverBase > template int dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, int& nbIts) const; // Compute data to use for deflation - int dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, Index& neig) const; + int dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const; // Apply deflation to a vector template int dgmresApplyDeflation(const RhsType& In, DestType& Out) const; @@ -218,7 +220,7 @@ class DGMRES : public IterativeSolverBase > mutable DenseMatrix m_MU; // matrix operator applied to m_U (for next cycles) mutable DenseMatrix m_T; /* T=U^T*M^{-1}*A*U */ mutable PartialPivLU m_luT; // LU factorization of m_T - mutable int m_neig; //Number of eigenvalues to extract at each restart + mutable StorageIndex m_neig; //Number of eigenvalues to extract at each restart mutable int m_r; // Current number of deflated eigenvalues, size of m_U mutable int m_maxNeig; // Maximum number of eigenvalues to deflate mutable RealScalar m_lambdaN; //Modulus of the largest eigenvalue of A @@ -338,7 +340,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con beta = std::abs(g(it+1)); m_error = beta/normRhs; - std::cerr << nbIts << " Relative Residual Norm " << m_error << std::endl; + // std::cerr << nbIts << " Relative Residual Norm " << m_error << std::endl; it++; nbIts++; if (m_error < m_tolerance) @@ -416,7 +418,7 @@ inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_Matr } template< typename _MatrixType, typename _Preconditioner> -int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, Index& neig) const +int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const { // First, find the Schur form of the Hessenberg matrix H typename internal::conditional::IsComplex, ComplexSchur, RealSchur >::type schurofH; @@ -426,7 +428,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri schurofH.computeFromHessenberg(m_Hes.topLeftCorner(it,it), matrixQ, computeU); ComplexVector eig(it); - Matrixperm(it); + Matrixperm(it); eig = this->schurValues(schurofH); // Reorder the absolute values of Schur values diff --git a/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/unsupported/Eigen/src/IterativeSolvers/GMRES.h index 05e5862a5..fbe21fc7e 100644 --- a/unsupported/Eigen/src/IterativeSolvers/GMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/GMRES.h @@ -251,13 +251,15 @@ struct traits > * By default the iterations start with x=0 as an initial guess of the solution. * One can control the start using the solveWithGuess() method. * + * GMRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ template< typename _MatrixType, typename _Preconditioner> class GMRES : public IterativeSolverBase > { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -288,7 +290,8 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - GMRES(const MatrixType& A) : Base(A), m_restart(30) {} + template + explicit GMRES(const EigenBase& A) : Base(A.derived()), m_restart(30) {} ~GMRES() {} @@ -312,7 +315,7 @@ public: m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - if(!internal::gmres(mp_matrix, b.col(j), xj, Base::m_preconditioner, m_iterations, m_restart, m_error)) + if(!internal::gmres(matrix(), b.col(j), xj, Base::m_preconditioner, m_iterations, m_restart, m_error)) failed = true; } m_info = failed ? NumericalIssue diff --git a/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/unsupported/Eigen/src/IterativeSolvers/MINRES.h index c393112a4..256990c1a 100644 --- a/unsupported/Eigen/src/IterativeSolvers/MINRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/MINRES.h @@ -191,6 +191,8 @@ namespace Eigen { * By default the iterations start with x=0 as an initial guess of the solution. * One can control the start using the solveWithGuess() method. * + * MINRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * * \sa class ConjugateGradient, BiCGSTAB, SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ template< typename _MatrixType, int _UpLo, typename _Preconditioner> @@ -198,7 +200,7 @@ namespace Eigen { { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -227,7 +229,8 @@ namespace Eigen { * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - MINRES(const MatrixType& A) : Base(A) {} + template + explicit MINRES(const EigenBase& A) : Base(A.derived()) {} /** Destructor. */ ~MINRES(){} @@ -236,21 +239,31 @@ namespace Eigen { template void _solve_with_guess_impl(const Rhs& b, Dest& x) const { + typedef typename Base::MatrixWrapper MatrixWrapper; + typedef typename Base::ActualMatrixType ActualMatrixType; + enum { + TransposeInput = (!MatrixWrapper::MatrixFree) + && (UpLo==(Lower|Upper)) + && (!MatrixType::IsRowMajor) + && (!NumTraits::IsComplex) + }; + typedef typename internal::conditional, ActualMatrixType const&>::type RowMajorWrapper; + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY); typedef typename internal::conditional&, - SparseSelfAdjointView, UpLo> - >::type MatrixWrapperType; - + RowMajorWrapper, + typename MatrixWrapper::template ConstSelfAdjointViewReturnType::Type + >::type SelfAdjointWrapper; + m_iterations = Base::maxIterations(); m_error = Base::m_tolerance; - + RowMajorWrapper row_mat(matrix()); for(int j=0; j > Flags = ((LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) | EvalBeforeNestingBit | EvalBeforeAssigningBit, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; typedef SparseMatrix ReturnType; diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h index b37481cbe..14a8aef58 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h @@ -348,7 +348,7 @@ void matrix_exp_compute(const MatrixType& arg, ResultType &result) typedef typename NumTraits::Real RealScalar; typedef typename std::complex ComplexScalar; if (sizeof(RealScalar) > 14) { - result = arg.matrixFunction(StdStemFunctions::exp); + result = arg.matrixFunction(internal::stem_function_exp); return; } #endif diff --git a/unsupported/Eigen/src/Skyline/SkylineProduct.h b/unsupported/Eigen/src/Skyline/SkylineProduct.h index d218a7c25..d9eb814c1 100644 --- a/unsupported/Eigen/src/Skyline/SkylineProduct.h +++ b/unsupported/Eigen/src/Skyline/SkylineProduct.h @@ -49,7 +49,7 @@ struct internal::traits > { | EvalBeforeAssigningBit | EvalBeforeNestingBit, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; typedef typename internal::conditional class DynamicSparseMatrix : public SparseMatrixBase > { + typedef SparseMatrixBase Base; + using Base::convert_index; public: EIGEN_SPARSE_PUBLIC_INTERFACE(DynamicSparseMatrix) // FIXME: why are these operator already alvailable ??? diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 653392e40..81a03f582 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -49,11 +49,11 @@ ei_add_test(EulerAngles) find_package(MPFR 2.3.0) find_package(GMP) -if(MPFR_FOUND) +if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CXX11) include_directories(${MPFR_INCLUDES} ./mpreal) ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ") set(EIGEN_MPFR_TEST_LIBRARIES ${MPFR_LIBRARIES} ${GMP_LIBRARIES}) - ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" ) + ei_add_test(mpreal_support "-std=c++11" "${EIGEN_MPFR_TEST_LIBRARIES}" ) else() ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ") endif() @@ -93,7 +93,6 @@ endif() ei_add_test(polynomialsolver) ei_add_test(polynomialutils) ei_add_test(splines) -ei_add_test(incomplete_cholesky) ei_add_test(gmres) ei_add_test(minres) ei_add_test(levenberg_marquardt) @@ -120,6 +119,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_of_const_values "-std=c++0x") ei_add_test(cxx11_tensor_of_complex "-std=c++0x") ei_add_test(cxx11_tensor_of_strings "-std=c++0x") + ei_add_test(cxx11_tensor_uint128 "-std=c++0x") ei_add_test(cxx11_tensor_intdiv "-std=c++0x") ei_add_test(cxx11_tensor_lvalue "-std=c++0x") ei_add_test(cxx11_tensor_map "-std=c++0x") @@ -145,6 +145,10 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_io "-std=c++0x") ei_add_test(cxx11_tensor_generator "-std=c++0x") ei_add_test(cxx11_tensor_custom_op "-std=c++0x") + ei_add_test(cxx11_tensor_custom_index "-std=c++0x") + ei_add_test(cxx11_tensor_sugar "-std=c++0x") + ei_add_test(cxx11_tensor_fft "-std=c++0x") + ei_add_test(cxx11_tensor_ifft "-std=c++0x") # These tests needs nvcc # ei_add_test(cxx11_tensor_device "-std=c++0x") diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp index 087e7c542..1aa1b3d2d 100644 --- a/unsupported/test/autodiff.cpp +++ b/unsupported/test/autodiff.cpp @@ -129,6 +129,7 @@ template void forward_jacobian(const Func& f) // TODO also check actual derivatives! +template void test_autodiff_scalar() { Vector2f p = Vector2f::Random(); @@ -140,6 +141,7 @@ void test_autodiff_scalar() } // TODO also check actual derivatives! +template void test_autodiff_vector() { Vector2f p = Vector2f::Random(); @@ -153,6 +155,7 @@ void test_autodiff_vector() VERIFY_IS_APPROX(res.value(), foo(p)); } +template void test_autodiff_jacobian() { CALL_SUBTEST(( forward_jacobian(TestFunc1()) )); @@ -162,12 +165,56 @@ void test_autodiff_jacobian() CALL_SUBTEST(( forward_jacobian(TestFunc1(3,3)) )); } + +template +void test_autodiff_hessian() +{ + typedef AutoDiffScalar AD; + typedef Matrix VectorAD; + typedef AutoDiffScalar ADD; + typedef Matrix VectorADD; + VectorADD x(2); + double s1 = internal::random(), s2 = internal::random(), s3 = internal::random(), s4 = internal::random(); + x(0).value()=s1; + x(1).value()=s2; + + //set unit vectors for the derivative directions (partial derivatives of the input vector) + x(0).derivatives().resize(2); + x(0).derivatives().setZero(); + x(0).derivatives()(0)= 1; + x(1).derivatives().resize(2); + x(1).derivatives().setZero(); + x(1).derivatives()(1)=1; + + //repeat partial derivatives for the inner AutoDiffScalar + x(0).value().derivatives() = VectorXd::Unit(2,0); + x(1).value().derivatives() = VectorXd::Unit(2,1); + + //set the hessian matrix to zero + for(int idx=0; idx<2; idx++) { + x(0).derivatives()(idx).derivatives() = VectorXd::Zero(2); + x(1).derivatives()(idx).derivatives() = VectorXd::Zero(2); + } + + ADD y = sin(AD(s3)*x(0) + AD(s4)*x(1)); + + VERIFY_IS_APPROX(y.value().derivatives()(0), y.derivatives()(0).value()); + VERIFY_IS_APPROX(y.value().derivatives()(1), y.derivatives()(1).value()); + VERIFY_IS_APPROX(y.value().derivatives()(0), s3*std::cos(s1*s3+s2*s4)); + VERIFY_IS_APPROX(y.value().derivatives()(1), s4*std::cos(s1*s3+s2*s4)); + VERIFY_IS_APPROX(y.derivatives()(0).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s3,s4*s3)); + VERIFY_IS_APPROX(y.derivatives()(1).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s4,s4*s4)); +} + + + void test_autodiff() { for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( test_autodiff_scalar() ); - CALL_SUBTEST_2( test_autodiff_vector() ); - CALL_SUBTEST_3( test_autodiff_jacobian() ); + CALL_SUBTEST_1( test_autodiff_scalar<1>() ); + CALL_SUBTEST_2( test_autodiff_vector<1>() ); + CALL_SUBTEST_3( test_autodiff_jacobian<1>() ); + CALL_SUBTEST_4( test_autodiff_hessian<1>() ); } } diff --git a/unsupported/test/cxx11_tensor_argmax.cpp b/unsupported/test/cxx11_tensor_argmax.cpp index 6eeecb717..482dfa7de 100644 --- a/unsupported/test/cxx11_tensor_argmax.cpp +++ b/unsupported/test/cxx11_tensor_argmax.cpp @@ -61,14 +61,14 @@ static void test_argmax_tuple_reducer() Tensor, 4, DataLayout> index_tuples(2,3,5,7); index_tuples = tensor.index_tuples(); - Tensor, 1, DataLayout> reduced(1); + Tensor, 0, DataLayout> reduced; DimensionList dims; reduced = index_tuples.reduce( dims, internal::ArgMaxTupleReducer>()); - Tensor maxi = tensor.maximum(); + Tensor maxi = tensor.maximum(); - VERIFY_IS_EQUAL(maxi(0), reduced(0).second); + VERIFY_IS_EQUAL(maxi(), reduced(0).second); array reduce_dims; for (int d = 0; d < 3; ++d) reduce_dims[d] = d; @@ -93,14 +93,14 @@ static void test_argmin_tuple_reducer() Tensor, 4, DataLayout> index_tuples(2,3,5,7); index_tuples = tensor.index_tuples(); - Tensor, 1, DataLayout> reduced(1); + Tensor, 0, DataLayout> reduced; DimensionList dims; reduced = index_tuples.reduce( dims, internal::ArgMinTupleReducer>()); - Tensor mini = tensor.minimum(); + Tensor mini = tensor.minimum(); - VERIFY_IS_EQUAL(mini(0), reduced(0).second); + VERIFY_IS_EQUAL(mini(), reduced(0).second); array reduce_dims; for (int d = 0; d < 3; ++d) reduce_dims[d] = d; @@ -123,7 +123,7 @@ static void test_simple_argmax() tensor = (tensor + tensor.constant(0.5)).log(); tensor(0,0,0,0) = 10.0; - Tensor tensor_argmax(1); + Tensor tensor_argmax; tensor_argmax = tensor.argmax(); @@ -144,7 +144,7 @@ static void test_simple_argmin() tensor = (tensor + tensor.constant(0.5)).log(); tensor(0,0,0,0) = -10.0; - Tensor tensor_argmin(1); + Tensor tensor_argmin; tensor_argmin = tensor.argmin(); diff --git a/unsupported/test/cxx11_tensor_assign.cpp b/unsupported/test/cxx11_tensor_assign.cpp index d16aaf847..e5cf61fe1 100644 --- a/unsupported/test/cxx11_tensor_assign.cpp +++ b/unsupported/test/cxx11_tensor_assign.cpp @@ -29,8 +29,8 @@ static void test_1d() int row_major[6]; memset(col_major, 0, 6*sizeof(int)); memset(row_major, 0, 6*sizeof(int)); - TensorMap> vec3(col_major, 6); - TensorMap> vec4(row_major, 6); + TensorMap > vec3(col_major, 6); + TensorMap > vec4(row_major, 6); vec3 = vec1; vec4 = vec2; @@ -92,8 +92,8 @@ static void test_2d() int row_major[6]; memset(col_major, 0, 6*sizeof(int)); memset(row_major, 0, 6*sizeof(int)); - TensorMap> mat3(row_major, 2, 3); - TensorMap> mat4(col_major, 2, 3); + TensorMap > mat3(row_major, 2, 3); + TensorMap > mat4(col_major, 2, 3); mat3 = mat1; mat4 = mat2; @@ -152,8 +152,8 @@ static void test_3d() int row_major[2*3*7]; memset(col_major, 0, 2*3*7*sizeof(int)); memset(row_major, 0, 2*3*7*sizeof(int)); - TensorMap> mat3(col_major, 2, 3, 7); - TensorMap> mat4(row_major, 2, 3, 7); + TensorMap > mat3(col_major, 2, 3, 7); + TensorMap > mat4(row_major, 2, 3, 7); mat3 = mat1; mat4 = mat2; diff --git a/unsupported/test/cxx11_tensor_casts.cpp b/unsupported/test/cxx11_tensor_casts.cpp index 729e43327..3c6d0d2ff 100644 --- a/unsupported/test/cxx11_tensor_casts.cpp +++ b/unsupported/test/cxx11_tensor_casts.cpp @@ -24,12 +24,12 @@ static void test_simple_cast() cplextensor.setRandom(); chartensor = ftensor.cast(); - cplextensor = ftensor.cast>(); + cplextensor = ftensor.cast >(); for (int i = 0; i < 20; ++i) { for (int j = 0; j < 30; ++j) { VERIFY_IS_EQUAL(chartensor(i,j), static_cast(ftensor(i,j))); - VERIFY_IS_EQUAL(cplextensor(i,j), static_cast>(ftensor(i,j))); + VERIFY_IS_EQUAL(cplextensor(i,j), static_cast >(ftensor(i,j))); } } } diff --git a/unsupported/test/cxx11_tensor_cuda.cpp b/unsupported/test/cxx11_tensor_cuda.cpp index 5ff082a3a..49e1894ab 100644 --- a/unsupported/test/cxx11_tensor_cuda.cpp +++ b/unsupported/test/cxx11_tensor_cuda.cpp @@ -507,6 +507,115 @@ static void test_cuda_convolution_3d() } } + +template +void test_cuda_lgamma(const Scalar stddev) +{ + Tensor in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in(d_in, 72, 97); + Eigen::TensorMap > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.lgamma(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j))); + } + } +} + +template +void test_cuda_erf(const Scalar stddev) +{ + Tensor in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in(d_in, 72, 97); + Eigen::TensorMap > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.erf(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j))); + } + } +} + +template +void test_cuda_erfc(const Scalar stddev) +{ + Tensor in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in(d_in, 72, 97); + Eigen::TensorMap > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.erfc(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j))); + } + } +} + void test_cxx11_tensor_cuda() { CALL_SUBTEST(test_cuda_elementwise_small()); @@ -522,4 +631,34 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST(test_cuda_convolution_2d()); CALL_SUBTEST(test_cuda_convolution_3d()); CALL_SUBTEST(test_cuda_convolution_3d()); + CALL_SUBTEST(test_cuda_lgamma(1.0f)); + CALL_SUBTEST(test_cuda_lgamma(100.0f)); + CALL_SUBTEST(test_cuda_lgamma(0.01f)); + CALL_SUBTEST(test_cuda_lgamma(0.001f)); + CALL_SUBTEST(test_cuda_erf(1.0f)); + CALL_SUBTEST(test_cuda_erf(100.0f)); + CALL_SUBTEST(test_cuda_erf(0.01f)); + CALL_SUBTEST(test_cuda_erf(0.001f)); + CALL_SUBTEST(test_cuda_erfc(1.0f)); + // CALL_SUBTEST(test_cuda_erfc(100.0f)); + CALL_SUBTEST(test_cuda_erfc(5.0f)); // CUDA erfc lacks precision for large inputs + CALL_SUBTEST(test_cuda_erfc(0.01f)); + CALL_SUBTEST(test_cuda_erfc(0.001f)); + CALL_SUBTEST(test_cuda_tanh(1.0)); + CALL_SUBTEST(test_cuda_tanh(100.0)); + CALL_SUBTEST(test_cuda_tanh(0.01)); + CALL_SUBTEST(test_cuda_tanh(0.001)); + CALL_SUBTEST(test_cuda_lgamma(1.0)); + CALL_SUBTEST(test_cuda_lgamma(100.0)); + CALL_SUBTEST(test_cuda_lgamma(0.01)); + CALL_SUBTEST(test_cuda_lgamma(0.001)); + CALL_SUBTEST(test_cuda_erf(1.0)); + CALL_SUBTEST(test_cuda_erf(100.0)); + CALL_SUBTEST(test_cuda_erf(0.01)); + CALL_SUBTEST(test_cuda_erf(0.001)); + CALL_SUBTEST(test_cuda_erfc(1.0)); + // CALL_SUBTEST(test_cuda_erfc(100.0)); + CALL_SUBTEST(test_cuda_erfc(5.0)); // CUDA erfc lacks precision for large inputs + CALL_SUBTEST(test_cuda_erfc(0.01)); + CALL_SUBTEST(test_cuda_erfc(0.001)); } diff --git a/unsupported/test/cxx11_tensor_custom_index.cpp b/unsupported/test/cxx11_tensor_custom_index.cpp new file mode 100644 index 000000000..4528cc176 --- /dev/null +++ b/unsupported/test/cxx11_tensor_custom_index.cpp @@ -0,0 +1,100 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include + +#include +#include + +using Eigen::Tensor; + + +template +static void test_map_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor tensor(2, 3, 5, 7); + tensor.setRandom(); + + using NormalIndex = DSizes; + using CustomIndex = std::map; + CustomIndex coeffC; + coeffC[0] = 1; + coeffC[1] = 2; + coeffC[2] = 4; + coeffC[3] = 1; + NormalIndex coeff(1,2,4,1); + + VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff)); +#endif +} + + +template +static void test_matrix_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor tensor(2, 3, 5, 7); + tensor.setRandom(); + + using NormalIndex = DSizes; + using CustomIndex = Matrix; + CustomIndex coeffC(1,2,4,1); + NormalIndex coeff(1,2,4,1); + + VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff)); +#endif +} + + +template +static void test_varlist_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor tensor(2, 3, 5, 7); + tensor.setRandom(); + + DSizes coeff(1,2,4,1); + + VERIFY_IS_EQUAL(tensor.coeff({1,2,4,1}), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef({1,2,4,1}), tensor.coeffRef(coeff)); +#endif +} + + +template +static void test_sizes_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor tensor(2, 3, 5, 7); + tensor.setRandom(); + + DSizes coeff(1,2,4,1); + Sizes<1,2,4,1> coeffC; + + VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff)); +#endif +} + + +void test_cxx11_tensor_custom_index() { + test_map_as_index(); + test_map_as_index(); + test_matrix_as_index(); + test_matrix_as_index(); + test_varlist_as_index(); + test_varlist_as_index(); + test_sizes_as_index(); + test_sizes_as_index(); +} diff --git a/unsupported/test/cxx11_tensor_custom_op.cpp b/unsupported/test/cxx11_tensor_custom_op.cpp index 7e33c9580..8baa477cc 100644 --- a/unsupported/test/cxx11_tensor_custom_op.cpp +++ b/unsupported/test/cxx11_tensor_custom_op.cpp @@ -25,7 +25,9 @@ struct InsertZeros { template void eval(const Tensor& input, Output& output, const Device& device) const { - array strides{{2, 2}}; + array strides; + strides[0] = 2; + strides[1] = 2; output.stride(strides).device(device) = input; Eigen::DSizes offsets(1,1); @@ -70,7 +72,8 @@ struct BatchMatMul { Output& output, const Device& device) const { typedef Tensor::DimensionPair DimPair; - array dims({{DimPair(1, 0)}}); + array dims; + dims[0] = DimPair(1, 0); for (int i = 0; i < output.dimension(2); ++i) { output.template chip<2>(i).device(device) = input1.chip<2>(i).contract(input2.chip<2>(i), dims); } @@ -88,9 +91,10 @@ static void test_custom_binary_op() Tensor result = tensor1.customOp(tensor2, BatchMatMul()); for (int i = 0; i < 5; ++i) { typedef Tensor::DimensionPair DimPair; - array dims({{DimPair(1, 0)}}); + array dims; + dims[0] = DimPair(1, 0); Tensor reference = tensor1.chip<2>(i).contract(tensor2.chip<2>(i), dims); - TensorRef> val = result.chip<2>(i); + TensorRef > val = result.chip<2>(i); for (int j = 0; j < 2; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(val(j, k), reference(j, k)); diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp new file mode 100644 index 000000000..0f6e09106 --- /dev/null +++ b/unsupported/test/cxx11_tensor_fft.cpp @@ -0,0 +1,273 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Jianwei Cui +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +using Eigen::Tensor; + +template +static void test_fft_2D_golden() { + Tensor input(2, 3); + input(0, 0) = 1; + input(0, 1) = 2; + input(0, 2) = 3; + input(1, 0) = 4; + input(1, 1) = 5; + input(1, 2) = 6; + + array fft; + fft[0] = 0; + fft[1] = 1; + + Tensor, 2, DataLayout, long> output = input.template fft(fft); + + std::complex output_golden[6]; // in ColMajor order + output_golden[0] = std::complex(21, 0); + output_golden[1] = std::complex(-9, 0); + output_golden[2] = std::complex(-3, 1.73205); + output_golden[3] = std::complex( 0, 0); + output_golden[4] = std::complex(-3, -1.73205); + output_golden[5] = std::complex(0 ,0); + + std::complex c_offset = std::complex(1.0, 1.0); + + if (DataLayout == ColMajor) { + VERIFY_IS_APPROX(output(0) + c_offset, output_golden[0] + c_offset); + VERIFY_IS_APPROX(output(1) + c_offset, output_golden[1] + c_offset); + VERIFY_IS_APPROX(output(2) + c_offset, output_golden[2] + c_offset); + VERIFY_IS_APPROX(output(3) + c_offset, output_golden[3] + c_offset); + VERIFY_IS_APPROX(output(4) + c_offset, output_golden[4] + c_offset); + VERIFY_IS_APPROX(output(5) + c_offset, output_golden[5] + c_offset); + } + else { + VERIFY_IS_APPROX(output(0)+ c_offset, output_golden[0]+ c_offset); + VERIFY_IS_APPROX(output(1)+ c_offset, output_golden[2]+ c_offset); + VERIFY_IS_APPROX(output(2)+ c_offset, output_golden[4]+ c_offset); + VERIFY_IS_APPROX(output(3)+ c_offset, output_golden[1]+ c_offset); + VERIFY_IS_APPROX(output(4)+ c_offset, output_golden[3]+ c_offset); + VERIFY_IS_APPROX(output(5)+ c_offset, output_golden[5]+ c_offset); + } +} + +static void test_fft_complex_input_golden() { + Tensor, 1, ColMajor, long> input(5); + input(0) = std::complex(1, 1); + input(1) = std::complex(2, 2); + input(2) = std::complex(3, 3); + input(3) = std::complex(4, 4); + input(4) = std::complex(5, 5); + + array fft; + fft[0] = 0; + + Tensor, 1, ColMajor, long> forward_output_both_parts = input.fft(fft); + Tensor, 1, ColMajor, long> reverse_output_both_parts = input.fft(fft); + + Tensor forward_output_real_part = input.fft(fft); + Tensor reverse_output_real_part = input.fft(fft); + + Tensor forward_output_imag_part = input.fft(fft); + Tensor reverse_output_imag_part = input.fft(fft); + + VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_real_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_real_part.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_imag_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_imag_part.dimension(0), input.dimension(0)); + + std::complex forward_golden_result[5]; + std::complex reverse_golden_result[5]; + + forward_golden_result[0] = std::complex(15.000000000000000,+15.000000000000000); + forward_golden_result[1] = std::complex(-5.940954801177935, +0.940954801177934); + forward_golden_result[2] = std::complex(-3.312299240582266, -1.687700759417735); + forward_golden_result[3] = std::complex(-1.687700759417735, -3.312299240582266); + forward_golden_result[4] = std::complex( 0.940954801177934, -5.940954801177935); + + reverse_golden_result[0] = std::complex( 3.000000000000000, + 3.000000000000000); + reverse_golden_result[1] = std::complex( 0.188190960235587, - 1.188190960235587); + reverse_golden_result[2] = std::complex(-0.337540151883547, - 0.662459848116453); + reverse_golden_result[3] = std::complex(-0.662459848116453, - 0.337540151883547); + reverse_golden_result[4] = std::complex(-1.188190960235587, + 0.188190960235587); + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(forward_output_both_parts(i), forward_golden_result[i]); + VERIFY_IS_APPROX(forward_output_real_part(i), forward_golden_result[i].real()); + VERIFY_IS_APPROX(forward_output_imag_part(i), forward_golden_result[i].imag()); + } + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(reverse_output_both_parts(i), reverse_golden_result[i]); + VERIFY_IS_APPROX(reverse_output_real_part(i), reverse_golden_result[i].real()); + VERIFY_IS_APPROX(reverse_output_imag_part(i), reverse_golden_result[i].imag()); + } +} + +static void test_fft_real_input_golden() { + Tensor input(5); + input(0) = 1.0; + input(1) = 2.0; + input(2) = 3.0; + input(3) = 4.0; + input(4) = 5.0; + + array fft; + fft[0] = 0; + + Tensor, 1, ColMajor, long> forward_output_both_parts = input.fft(fft); + Tensor, 1, ColMajor, long> reverse_output_both_parts = input.fft(fft); + + Tensor forward_output_real_part = input.fft(fft); + Tensor reverse_output_real_part = input.fft(fft); + + Tensor forward_output_imag_part = input.fft(fft); + Tensor reverse_output_imag_part = input.fft(fft); + + VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_real_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_real_part.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_imag_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_imag_part.dimension(0), input.dimension(0)); + + std::complex forward_golden_result[5]; + std::complex reverse_golden_result[5]; + + + forward_golden_result[0] = std::complex( 15, 0); + forward_golden_result[1] = std::complex(-2.5, +3.44095480117793); + forward_golden_result[2] = std::complex(-2.5, +0.81229924058227); + forward_golden_result[3] = std::complex(-2.5, -0.81229924058227); + forward_golden_result[4] = std::complex(-2.5, -3.44095480117793); + + reverse_golden_result[0] = std::complex( 3.0, 0); + reverse_golden_result[1] = std::complex(-0.5, -0.688190960235587); + reverse_golden_result[2] = std::complex(-0.5, -0.162459848116453); + reverse_golden_result[3] = std::complex(-0.5, +0.162459848116453); + reverse_golden_result[4] = std::complex(-0.5, +0.688190960235587); + + std::complex c_offset(1.0, 1.0); + float r_offset = 1.0; + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(forward_output_both_parts(i) + c_offset, forward_golden_result[i] + c_offset); + VERIFY_IS_APPROX(forward_output_real_part(i) + r_offset, forward_golden_result[i].real() + r_offset); + VERIFY_IS_APPROX(forward_output_imag_part(i) + r_offset, forward_golden_result[i].imag() + r_offset); + } + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(reverse_output_both_parts(i) + c_offset, reverse_golden_result[i] + c_offset); + VERIFY_IS_APPROX(reverse_output_real_part(i) + r_offset, reverse_golden_result[i].real() + r_offset); + VERIFY_IS_APPROX(reverse_output_imag_part(i) + r_offset, reverse_golden_result[i].imag() + r_offset); + } +} + + +template +static void test_fft_real_input_energy() { + + Eigen::DSizes dimensions; + int total_size = 1; + for (int i = 0; i < TensorRank; ++i) { + dimensions[i] = rand() % 20 + 1; + total_size *= dimensions[i]; + } + const DSizes arr = dimensions; + + typedef typename internal::conditional, RealScalar>::type InputScalar; + + Tensor input; + input.resize(arr); + input.setRandom(); + + array fft; + for (int i = 0; i < TensorRank; ++i) { + fft[i] = i; + } + + typedef typename internal::conditional, RealScalar>::type OutputScalar; + Tensor output; + output = input.template fft(fft); + + for (int i = 0; i < TensorRank; ++i) { + VERIFY_IS_EQUAL(output.dimension(i), input.dimension(i)); + } + + float energy_original = 0.0; + float energy_after_fft = 0.0; + + for (int i = 0; i < total_size; ++i) { + energy_original += pow(std::abs(input(i)), 2); + } + + for (int i = 0; i < total_size; ++i) { + energy_after_fft += pow(std::abs(output(i)), 2); + } + + if(FFTDirection == FFT_FORWARD) { + VERIFY_IS_APPROX(energy_original, energy_after_fft / total_size); + } + else { + VERIFY_IS_APPROX(energy_original, energy_after_fft * total_size); + } +} + +void test_cxx11_tensor_fft() { + test_fft_complex_input_golden(); + test_fft_real_input_golden(); + + test_fft_2D_golden(); + test_fft_2D_golden(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); +} diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp index 5252e4d72..1c33fefb3 100644 --- a/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -15,6 +15,33 @@ using Eigen::Tensor; using Eigen::RowMajor; +static void test_0d() +{ + TensorFixedSize > scalar1; + TensorFixedSize, RowMajor> scalar2; + VERIFY_IS_EQUAL(scalar1.rank(), 0); + + scalar1() = 7.0; + scalar2() = 13.0; + + // Test against shallow copy. + TensorFixedSize > copy = scalar1; + VERIFY_IS_NOT_EQUAL(scalar1.data(), copy.data()); + VERIFY_IS_APPROX(scalar1(), copy()); + copy = scalar1; + VERIFY_IS_NOT_EQUAL(scalar1.data(), copy.data()); + VERIFY_IS_APPROX(scalar1(), copy()); + + TensorFixedSize > scalar3 = scalar1.sqrt(); + TensorFixedSize, RowMajor> scalar4 = scalar2.sqrt(); + VERIFY_IS_EQUAL(scalar3.rank(), 0); + VERIFY_IS_APPROX(scalar3(), sqrtf(7.0)); + VERIFY_IS_APPROX(scalar4(), sqrtf(13.0)); + + scalar3 = scalar1 + scalar2; + VERIFY_IS_APPROX(scalar3(), 7.0f + 13.0f); +} + static void test_1d() { TensorFixedSize > vec1; @@ -223,6 +250,7 @@ static void test_array() void test_cxx11_tensor_fixed_size() { + CALL_SUBTEST(test_0d()); CALL_SUBTEST(test_1d()); CALL_SUBTEST(test_tensor_map()); CALL_SUBTEST(test_2d()); diff --git a/unsupported/test/cxx11_tensor_ifft.cpp b/unsupported/test/cxx11_tensor_ifft.cpp new file mode 100644 index 000000000..5fd88fa6c --- /dev/null +++ b/unsupported/test/cxx11_tensor_ifft.cpp @@ -0,0 +1,154 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Jianwei Cui +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +using Eigen::Tensor; + +template +static void test_1D_fft_ifft_invariant(int sequence_length) { + Tensor tensor(sequence_length); + tensor.setRandom(); + + array fft; + fft[0] = 0; + + Tensor, 1, DataLayout> tensor_after_fft; + Tensor, 1, DataLayout> tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), sequence_length); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), sequence_length); + + for (int i = 0; i < sequence_length; ++i) { + VERIFY_IS_APPROX(static_cast(tensor(i)), static_cast(std::real(tensor_after_fft_ifft(i)))); + } +} + +template +static void test_2D_fft_ifft_invariant(int dim0, int dim1) { + Tensor tensor(dim0, dim1); + tensor.setRandom(); + + array fft; + fft[0] = 0; + fft[1] = 1; + + Tensor, 2, DataLayout> tensor_after_fft; + Tensor, 2, DataLayout> tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1); + + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim1; ++j) { + //std::cout << "[" << i << "][" << j << "]" << " Original data: " << tensor(i,j) << " Transformed data:" << tensor_after_fft_ifft(i,j) << std::endl; + VERIFY_IS_APPROX(static_cast(tensor(i,j)), static_cast(std::real(tensor_after_fft_ifft(i,j)))); + } + } +} + +template +static void test_3D_fft_ifft_invariant(int dim0, int dim1, int dim2) { + Tensor tensor(dim0, dim1, dim2); + tensor.setRandom(); + + array fft; + fft[0] = 0; + fft[1] = 1; + fft[2] = 2; + + Tensor, 3, DataLayout> tensor_after_fft; + Tensor, 3, DataLayout> tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(2), dim2); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(2), dim2); + + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim1; ++j) { + for (int k = 0; k < dim2; ++k) { + VERIFY_IS_APPROX(static_cast(tensor(i,j,k)), static_cast(std::real(tensor_after_fft_ifft(i,j,k)))); + } + } + } +} + +template +static void test_sub_fft_ifft_invariant(int dim0, int dim1, int dim2, int dim3) { + Tensor tensor(dim0, dim1, dim2, dim3); + tensor.setRandom(); + + array fft; + fft[0] = 2; + fft[1] = 0; + + Tensor, 4, DataLayout> tensor_after_fft; + Tensor tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(2), dim2); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(3), dim3); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(2), dim2); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(3), dim3); + + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim1; ++j) { + for (int k = 0; k < dim2; ++k) { + for (int l = 0; l < dim3; ++l) { + VERIFY_IS_APPROX(static_cast(tensor(i,j,k,l)), static_cast(tensor_after_fft_ifft(i,j,k,l))); + } + } + } + } +} + +void test_cxx11_tensor_ifft() { + CALL_SUBTEST(test_1D_fft_ifft_invariant(4)); + CALL_SUBTEST(test_1D_fft_ifft_invariant(16)); + CALL_SUBTEST(test_1D_fft_ifft_invariant(32)); + CALL_SUBTEST(test_1D_fft_ifft_invariant(1024*1024)); + + CALL_SUBTEST(test_2D_fft_ifft_invariant(4,4)); + CALL_SUBTEST(test_2D_fft_ifft_invariant(8,16)); + CALL_SUBTEST(test_2D_fft_ifft_invariant(16,32)); + CALL_SUBTEST(test_2D_fft_ifft_invariant(1024,1024)); + + CALL_SUBTEST(test_3D_fft_ifft_invariant(4,4,4)); + CALL_SUBTEST(test_3D_fft_ifft_invariant(8,16,32)); + CALL_SUBTEST(test_3D_fft_ifft_invariant(16,4,8)); + CALL_SUBTEST(test_3D_fft_ifft_invariant(256,256,256)); + + CALL_SUBTEST(test_sub_fft_ifft_invariant(4,4,4,4)); + CALL_SUBTEST(test_sub_fft_ifft_invariant(8,16,32,64)); + CALL_SUBTEST(test_sub_fft_ifft_invariant(16,4,8,12)); + CALL_SUBTEST(test_sub_fft_ifft_invariant(64,64,64,64)); +} diff --git a/unsupported/test/cxx11_tensor_index_list.cpp b/unsupported/test/cxx11_tensor_index_list.cpp index ca9d18254..4ce8dea20 100644 --- a/unsupported/test/cxx11_tensor_index_list.cpp +++ b/unsupported/test/cxx11_tensor_index_list.cpp @@ -58,11 +58,11 @@ static void test_type2index_list() typedef Eigen::IndexList, Eigen::type2index<1>, Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> Dims4; #if 0 - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); #endif EIGEN_STATIC_ASSERT((internal::are_inner_most_dims::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); @@ -142,7 +142,7 @@ static void test_type2index_list() } const Dims4 reduction_axis4; - Tensor result4 = tensor.sum(reduction_axis4); + Tensor result4 = tensor.sum(reduction_axis4); float expected = 0.0f; for (int m = 0; m < 11; ++m) { for (int l = 0; l < 7; ++l) { @@ -155,7 +155,7 @@ static void test_type2index_list() } } } - VERIFY_IS_APPROX(result4(0), expected); + VERIFY_IS_APPROX(result4(), expected); } @@ -216,29 +216,29 @@ static void test_mixed_index_list() reduction_indices.set(3, 3); EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_indices) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_indices) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_known_statically()(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_known_statically()(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); #if 0 - EIGEN_STATIC_ASSERT((internal::all_indices_known_statically()() == false), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::all_indices_known_statically() == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == false), YOU_MADE_A_PROGRAMMING_MISTAKE); #endif typedef IndexList, type2index<1>, type2index<2>, type2index<3>> ReductionList; ReductionList reduction_list; - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(3, 3) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(3, 3) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); #if 0 - EIGEN_STATIC_ASSERT((internal::all_indices_known_statically()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::all_indices_known_statically() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); #endif - Tensor result1 = tensor.sum(reduction_axis); - Tensor result2 = tensor.sum(reduction_indices); - Tensor result3 = tensor.sum(reduction_list); + Tensor result1 = tensor.sum(reduction_axis); + Tensor result2 = tensor.sum(reduction_indices); + Tensor result3 = tensor.sum(reduction_list); float expected = 0.0f; for (int i = 0; i < 2; ++i) { @@ -250,9 +250,9 @@ static void test_mixed_index_list() } } } - VERIFY_IS_APPROX(result1(0), expected); - VERIFY_IS_APPROX(result2(0), expected); - VERIFY_IS_APPROX(result3(0), expected); + VERIFY_IS_APPROX(result1(), expected); + VERIFY_IS_APPROX(result2(), expected); + VERIFY_IS_APPROX(result3(), expected); } diff --git a/unsupported/test/cxx11_tensor_intdiv.cpp b/unsupported/test/cxx11_tensor_intdiv.cpp index 343b37dbd..48aa6d368 100644 --- a/unsupported/test/cxx11_tensor_intdiv.cpp +++ b/unsupported/test/cxx11_tensor_intdiv.cpp @@ -14,8 +14,29 @@ void test_signed_32bit() { + // Divide by one + const Eigen::internal::TensorIntDivisor div_by_one(1); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div_by_one; + const int32_t slow_div = j / 1; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + + // Standard divide by 2 or more for (int32_t i = 2; i < 25000; ++i) { - const Eigen::internal::TensorIntDivisor div(i); + const Eigen::internal::TensorIntDivisor div(i); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div; + const int32_t slow_div = j / i; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + } + + // Optimized divide by 2 or more + for (int32_t i = 2; i < 25000; ++i) { + const Eigen::internal::TensorIntDivisor div(i); for (int32_t j = 0; j < 25000; ++j) { const int32_t fast_div = j / div; @@ -42,7 +63,7 @@ void test_unsigned_32bit() void test_signed_64bit() { - for (int64_t i = 2; i < 25000; ++i) { + for (int64_t i = 1; i < 25000; ++i) { const Eigen::internal::TensorIntDivisor div(i); for (int64_t j = 0; j < 25000; ++j) { @@ -56,7 +77,7 @@ void test_signed_64bit() void test_unsigned_64bit() { - for (uint64_t i = 2; i < 25000; ++i) { + for (uint64_t i = 1; i < 25000; ++i) { const Eigen::internal::TensorIntDivisor div(i); for (uint64_t j = 0; j < 25000; ++j) { @@ -95,8 +116,7 @@ void test_powers_64bit() { if (start_num < 0) start_num = 0; for (int64_t num = start_num; num < end_num; num++) { - Eigen::internal::TensorIntDivisor divider = - Eigen::internal::TensorIntDivisor(div); + Eigen::internal::TensorIntDivisor divider(div); int64_t result = num/div; int64_t result_op = divider.divide(num); VERIFY_IS_EQUAL(result_op, result); @@ -109,8 +129,7 @@ void test_specific() { // A particular combination that was previously failing int64_t div = 209715200; int64_t num = 3238002688; - Eigen::internal::TensorIntDivisor divider = - Eigen::internal::TensorIntDivisor(div); + Eigen::internal::TensorIntDivisor divider(div); int64_t result = num/div; int64_t result_op = divider.divide(num); VERIFY_IS_EQUAL(result, result_op); diff --git a/unsupported/test/cxx11_tensor_map.cpp b/unsupported/test/cxx11_tensor_map.cpp index 9cf2eb150..a8a095e38 100644 --- a/unsupported/test/cxx11_tensor_map.cpp +++ b/unsupported/test/cxx11_tensor_map.cpp @@ -14,6 +14,24 @@ using Eigen::Tensor; using Eigen::RowMajor; +static void test_0d() +{ + Tensor scalar1; + Tensor scalar2; + + TensorMap> scalar3(scalar1.data()); + TensorMap> scalar4(scalar2.data()); + + scalar1() = 7; + scalar2() = 13; + + VERIFY_IS_EQUAL(scalar1.rank(), 0); + VERIFY_IS_EQUAL(scalar1.size(), 1); + + VERIFY_IS_EQUAL(scalar3(), 7); + VERIFY_IS_EQUAL(scalar4(), 13); +} + static void test_1d() { Tensor vec1(6); @@ -139,9 +157,117 @@ static void test_3d() } +static void test_from_tensor() +{ + Tensor mat1(2,3,7); + Tensor mat2(2,3,7); + + int val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + mat2(i,j,k) = val; + val++; + } + } + } + + TensorMap> mat3(mat1); + TensorMap> mat4(mat2); + + VERIFY_IS_EQUAL(mat3.rank(), 3); + VERIFY_IS_EQUAL(mat3.size(), 2*3*7); + VERIFY_IS_EQUAL(mat3.dimension(0), 2); + VERIFY_IS_EQUAL(mat3.dimension(1), 3); + VERIFY_IS_EQUAL(mat3.dimension(2), 7); + + VERIFY_IS_EQUAL(mat4.rank(), 3); + VERIFY_IS_EQUAL(mat4.size(), 2*3*7); + VERIFY_IS_EQUAL(mat4.dimension(0), 2); + VERIFY_IS_EQUAL(mat4.dimension(1), 3); + VERIFY_IS_EQUAL(mat4.dimension(2), 7); + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(mat3(i,j,k), val); + VERIFY_IS_EQUAL(mat4(i,j,k), val); + val++; + } + } + } + + TensorFixedSize> mat5; + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat5(i,j,k) = val; + val++; + } + } + } + + TensorMap>> mat6(mat5); + + VERIFY_IS_EQUAL(mat6.rank(), 3); + VERIFY_IS_EQUAL(mat6.size(), 2*3*7); + VERIFY_IS_EQUAL(mat6.dimension(0), 2); + VERIFY_IS_EQUAL(mat6.dimension(1), 3); + VERIFY_IS_EQUAL(mat6.dimension(2), 7); + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(mat6(i,j,k), val); + val++; + } + } + } +} + + +static int f(const TensorMap >& tensor) { + // Size<0> empty; + EIGEN_STATIC_ASSERT((internal::array_size>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::array_size>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + Tensor result = tensor.sum(); + return result(); +} + +static void test_casting() +{ + Tensor tensor(2,3,7); + + int val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + tensor(i,j,k) = val; + val++; + } + } + } + + TensorMap> map(tensor); + int sum1 = f(map); + int sum2 = f(tensor); + + VERIFY_IS_EQUAL(sum1, sum2); + VERIFY_IS_EQUAL(sum1, 861); +} + void test_cxx11_tensor_map() { + CALL_SUBTEST(test_0d()); CALL_SUBTEST(test_1d()); CALL_SUBTEST(test_2d()); CALL_SUBTEST(test_3d()); + + CALL_SUBTEST(test_from_tensor()); + CALL_SUBTEST(test_casting()); } diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp index b2c85a879..0ec316991 100644 --- a/unsupported/test/cxx11_tensor_reduction.cpp +++ b/unsupported/test/cxx11_tensor_reduction.cpp @@ -13,6 +13,45 @@ using Eigen::Tensor; +template +static void test_trivial_reductions() { + { + Tensor tensor; + tensor.setRandom(); + array reduction_axis; + + Tensor result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result(), tensor()); + } + + { + Tensor tensor(7); + tensor.setRandom(); + array reduction_axis; + + Tensor result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result.dimension(0), 7); + for (int i = 0; i < 7; ++i) { + VERIFY_IS_EQUAL(result(i), tensor(i)); + } + } + + { + Tensor tensor(2, 3); + tensor.setRandom(); + array reduction_axis; + + Tensor result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_EQUAL(result.dimension(1), 3); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(result(i, j), tensor(i, j)); + } + } + } +} + template static void test_simple_reductions() { Tensor tensor(2, 3, 5, 7); @@ -37,18 +76,18 @@ static void test_simple_reductions() { } { - Tensor sum1 = tensor.sum(); - VERIFY_IS_EQUAL(sum1.dimension(0), 1); + Tensor sum1 = tensor.sum(); + VERIFY_IS_EQUAL(sum1.rank(), 0); array reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; - Tensor sum2 = tensor.sum(reduction_axis4); - VERIFY_IS_EQUAL(sum2.dimension(0), 1); + Tensor sum2 = tensor.sum(reduction_axis4); + VERIFY_IS_EQUAL(sum2.rank(), 0); - VERIFY_IS_APPROX(sum1(0), sum2(0)); + VERIFY_IS_APPROX(sum1(), sum2()); } reduction_axis2[0] = 0; @@ -69,18 +108,18 @@ static void test_simple_reductions() { } { - Tensor prod1 = tensor.prod(); - VERIFY_IS_EQUAL(prod1.dimension(0), 1); + Tensor prod1 = tensor.prod(); + VERIFY_IS_EQUAL(prod1.rank(), 0); array reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; - Tensor prod2 = tensor.prod(reduction_axis4); - VERIFY_IS_EQUAL(prod2.dimension(0), 1); + Tensor prod2 = tensor.prod(reduction_axis4); + VERIFY_IS_EQUAL(prod2.rank(), 0); - VERIFY_IS_APPROX(prod1(0), prod2(0)); + VERIFY_IS_APPROX(prod1(), prod2()); } reduction_axis2[0] = 0; @@ -101,18 +140,18 @@ static void test_simple_reductions() { } { - Tensor max1 = tensor.maximum(); - VERIFY_IS_EQUAL(max1.dimension(0), 1); + Tensor max1 = tensor.maximum(); + VERIFY_IS_EQUAL(max1.rank(), 0); array reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; - Tensor max2 = tensor.maximum(reduction_axis4); - VERIFY_IS_EQUAL(max2.dimension(0), 1); + Tensor max2 = tensor.maximum(reduction_axis4); + VERIFY_IS_EQUAL(max2.rank(), 0); - VERIFY_IS_APPROX(max1(0), max2(0)); + VERIFY_IS_APPROX(max1(), max2()); } reduction_axis2[0] = 0; @@ -133,18 +172,18 @@ static void test_simple_reductions() { } { - Tensor min1 = tensor.minimum(); - VERIFY_IS_EQUAL(min1.dimension(0), 1); + Tensor min1 = tensor.minimum(); + VERIFY_IS_EQUAL(min1.rank(), 0); array reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; - Tensor min2 = tensor.minimum(reduction_axis4); - VERIFY_IS_EQUAL(min2.dimension(0), 1); + Tensor min2 = tensor.minimum(reduction_axis4); + VERIFY_IS_EQUAL(min2.rank(), 0); - VERIFY_IS_APPROX(min1(0), min2(0)); + VERIFY_IS_APPROX(min1(), min2()); } reduction_axis2[0] = 0; @@ -167,18 +206,35 @@ static void test_simple_reductions() { } { - Tensor mean1 = tensor.mean(); - VERIFY_IS_EQUAL(mean1.dimension(0), 1); + Tensor mean1 = tensor.mean(); + VERIFY_IS_EQUAL(mean1.rank(), 0); array reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; - Tensor mean2 = tensor.mean(reduction_axis4); - VERIFY_IS_EQUAL(mean2.dimension(0), 1); + Tensor mean2 = tensor.mean(reduction_axis4); + VERIFY_IS_EQUAL(mean2.rank(), 0); - VERIFY_IS_APPROX(mean1(0), mean2(0)); + VERIFY_IS_APPROX(mean1(), mean2()); + } + + { + Tensor ints(10); + std::iota(ints.data(), ints.data() + ints.dimension(0), 0); + + TensorFixedSize > all; + all = ints.all(); + VERIFY(!all()); + all = (ints >= ints.constant(0)).all(); + VERIFY(all()); + + TensorFixedSize > any; + any = (ints > ints.constant(10)).any(); + VERIFY(!any()); + any = (ints < ints.constant(1)).any(); + VERIFY(any()); } } @@ -190,8 +246,8 @@ static void test_full_reductions() { reduction_axis[0] = 0; reduction_axis[1] = 1; - Tensor result = tensor.sum(reduction_axis); - VERIFY_IS_EQUAL(result.dimension(0), 1); + Tensor result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result.rank(), 0); float sum = 0.0f; for (int i = 0; i < 2; ++i) { @@ -202,7 +258,7 @@ static void test_full_reductions() { VERIFY_IS_APPROX(result(0), sum); result = tensor.square().sum(reduction_axis).sqrt(); - VERIFY_IS_EQUAL(result.dimension(0), 1); + VERIFY_IS_EQUAL(result.rank(), 0); sum = 0.0f; for (int i = 0; i < 2; ++i) { @@ -210,7 +266,7 @@ static void test_full_reductions() { sum += tensor(i, j) * tensor(i, j); } } - VERIFY_IS_APPROX(result(0), sqrtf(sum)); + VERIFY_IS_APPROX(result(), sqrtf(sum)); } struct UserReducer { @@ -401,6 +457,8 @@ static void test_reduce_middle_dims() { } void test_cxx11_tensor_reduction() { + CALL_SUBTEST(test_trivial_reductions()); + CALL_SUBTEST(test_trivial_reductions()); CALL_SUBTEST(test_simple_reductions()); CALL_SUBTEST(test_simple_reductions()); CALL_SUBTEST(test_full_reductions()); diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cpp b/unsupported/test/cxx11_tensor_reduction_cuda.cpp index f426ebbc1..9e06eb126 100644 --- a/unsupported/test/cxx11_tensor_reduction_cuda.cpp +++ b/unsupported/test/cxx11_tensor_reduction_cuda.cpp @@ -28,7 +28,7 @@ static void test_full_reductions() { Tensor in(num_rows, num_cols); in.setRandom(); - Tensor full_redux(1); + Tensor full_redux; full_redux = in.sum(); std::size_t in_bytes = in.size() * sizeof(float); @@ -38,16 +38,16 @@ static void test_full_reductions() { gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); TensorMap > in_gpu(gpu_in_ptr, num_rows, num_cols); - TensorMap > out_gpu(gpu_out_ptr, 1); + TensorMap > out_gpu(gpu_out_ptr); out_gpu.device(gpu_device) = in_gpu.sum(); - Tensor full_redux_gpu(1); + Tensor full_redux_gpu; gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); gpu_device.synchronize(); // Check that the CPU and GPU reductions return the same result. - VERIFY_IS_APPROX(full_redux(0), full_redux_gpu(0)); + VERIFY_IS_APPROX(full_redux(), full_redux_gpu()); } void test_cxx11_tensor_reduction_cuda() { diff --git a/unsupported/test/cxx11_tensor_reverse.cpp b/unsupported/test/cxx11_tensor_reverse.cpp index f96c21fa3..b35b8d29e 100644 --- a/unsupported/test/cxx11_tensor_reverse.cpp +++ b/unsupported/test/cxx11_tensor_reverse.cpp @@ -114,10 +114,18 @@ static void test_expr_reverse(bool LValue) Tensor result(2,3,5,7); - array src_slice_dim{{2,3,1,7}}; - array src_slice_start{{0,0,0,0}}; - array dst_slice_dim{{2,3,1,7}}; - array dst_slice_start{{0,0,0,0}}; + array src_slice_dim; + src_slice_dim[0] = 2; + src_slice_dim[1] = 3; + src_slice_dim[2] = 1; + src_slice_dim[3] = 7; + array src_slice_start; + src_slice_start[0] = 0; + src_slice_start[1] = 0; + src_slice_start[2] = 0; + src_slice_start[3] = 0; + array dst_slice_dim = src_slice_dim; + array dst_slice_start = src_slice_start; for (int i = 0; i < 5; ++i) { if (LValue) { diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp index 8cd2ab7fd..47d4d8636 100644 --- a/unsupported/test/cxx11_tensor_simple.cpp +++ b/unsupported/test/cxx11_tensor_simple.cpp @@ -14,6 +14,35 @@ using Eigen::Tensor; using Eigen::RowMajor; +static void test_0d() +{ + Tensor scalar1; + Tensor scalar2; + Tensor scalar3; + Tensor scalar4; + + scalar3.resize(); + scalar4.resize(); + + scalar1() = 7; + scalar2() = 13; + scalar3.setValues(17); + scalar4.setZero(); + + VERIFY_IS_EQUAL(scalar1.rank(), 0); + VERIFY_IS_EQUAL(scalar1.size(), 1); + + VERIFY_IS_EQUAL(scalar1(), 7); + VERIFY_IS_EQUAL(scalar2(), 13); + VERIFY_IS_EQUAL(scalar3(), 17); + VERIFY_IS_EQUAL(scalar4(), 0); + + Tensor scalar5(scalar1); + + VERIFY_IS_EQUAL(scalar5(), 7); + VERIFY_IS_EQUAL(scalar5.data()[0], 7); +} + static void test_1d() { Tensor vec1(6); @@ -287,13 +316,10 @@ static void test_resize() void test_cxx11_tensor_simple() { + CALL_SUBTEST(test_0d()); CALL_SUBTEST(test_1d()); CALL_SUBTEST(test_2d()); CALL_SUBTEST(test_3d()); CALL_SUBTEST(test_simple_assign()); CALL_SUBTEST(test_resize()); } - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/unsupported/test/cxx11_tensor_sugar.cpp b/unsupported/test/cxx11_tensor_sugar.cpp new file mode 100644 index 000000000..adac472cf --- /dev/null +++ b/unsupported/test/cxx11_tensor_sugar.cpp @@ -0,0 +1,38 @@ +#include "main.h" + +#include + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_comparison_sugar() { + // we already trust comparisons between tensors, we're simply checking that + // the sugared versions are doing the same thing + Tensor t(6, 7, 5); + + t.setRandom(); + // make sure we have at least one value == 0 + t(0,0,0) = 0; + + Tensor b; + +#define TEST_TENSOR_EQUAL(e1, e2) \ + b = ((e1) == (e2)).all(); \ + VERIFY(b()) + +#define TEST_OP(op) TEST_TENSOR_EQUAL(t op 0, t op t.constant(0)) + + TEST_OP(==); + TEST_OP(!=); + TEST_OP(<=); + TEST_OP(>=); + TEST_OP(<); + TEST_OP(>); +#undef TEST_OP +#undef TEST_TENSOR_EQUAL +} + +void test_cxx11_tensor_sugar() +{ + CALL_SUBTEST(test_comparison_sugar()); +} diff --git a/unsupported/test/cxx11_tensor_uint128.cpp b/unsupported/test/cxx11_tensor_uint128.cpp new file mode 100644 index 000000000..ee3767e58 --- /dev/null +++ b/unsupported/test/cxx11_tensor_uint128.cpp @@ -0,0 +1,144 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +using Eigen::internal::TensorUInt128; +using Eigen::internal::static_val; + +void VERIFY_EQUAL(TensorUInt128 actual, __uint128_t expected) { + bool matchl = actual.lower() == static_cast(expected); + bool matchh = actual.upper() == static_cast(expected >> 64); + if (!matchl || !matchh) { + const char* testname = g_test_stack.back().c_str(); + std::cerr << "Test " << testname << " failed in " << __FILE__ + << " (" << __LINE__ << ")" + << std::endl; + abort(); + } +} + + +void test_add() { + uint64_t incr = internal::random(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128 i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128 j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128 actual = i + j; + __uint128_t expected = a + b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +void test_sub() { + uint64_t incr = internal::random(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128 i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128 j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128 actual = i - j; + __uint128_t expected = a - b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +void test_mul() { + uint64_t incr = internal::random(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128 i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128 j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128 actual = i * j; + __uint128_t expected = a * b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +void test_div() { + uint64_t incr = internal::random(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128 i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128 j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128 actual = i / j; + __uint128_t expected = a / b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +void test_misc1() { + uint64_t incr = internal::random(1, 9999999999); + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128, uint64_t> i(0, i2); + __uint128_t a = static_cast<__uint128_t>(i2); + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128, uint64_t> j(0, j2); + __uint128_t b = static_cast<__uint128_t>(j2); + uint64_t actual = (i * j).upper(); + uint64_t expected = (a * b) >> 64; + VERIFY_IS_EQUAL(actual, expected); + } + } +} + +void test_misc2() { + int64_t incr = internal::random(1, 100); + for (int64_t log_div = 0; log_div < 63; ++log_div) { + for (int64_t divider = 1; divider <= 1000000 * incr; divider += incr) { + uint64_t expected = (static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1; + uint64_t shift = 1ULL << log_div; + + TensorUInt128 result = (TensorUInt128 >(shift, 0) / TensorUInt128, uint64_t>(divider) - TensorUInt128, static_val<0> >(1, 0) + TensorUInt128, static_val<1> >(1)); + uint64_t actual = static_cast(result); + VERIFY_EQUAL(actual, expected); + } + } +} + + +void test_cxx11_tensor_uint128() +{ + CALL_SUBTEST_1(test_add()); + CALL_SUBTEST_2(test_sub()); + CALL_SUBTEST_3(test_mul()); + CALL_SUBTEST_4(test_div()); + CALL_SUBTEST_5(test_misc1()); + CALL_SUBTEST_6(test_misc2()); +} diff --git a/unsupported/test/forward_adolc.cpp b/unsupported/test/forward_adolc.cpp index d4baafe62..866db8e86 100644 --- a/unsupported/test/forward_adolc.cpp +++ b/unsupported/test/forward_adolc.cpp @@ -13,8 +13,6 @@ #define NUMBER_DIRECTIONS 16 #include -int adtl::ADOLC_numDir; - template EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p) { @@ -123,7 +121,7 @@ template void adolc_forward_jacobian(const Func& f) void test_forward_adolc() { - adtl::ADOLC_numDir = NUMBER_DIRECTIONS; + adtl::setNumDir(NUMBER_DIRECTIONS); for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST(( adolc_forward_jacobian(TestFunc1()) )); diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index 104cb686f..9b0cf7268 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -1,33 +1,34 @@ /* - MPFR C++: Multi-precision floating point number class for C++. + MPFR C++: Multi-precision floating point number class for C++. Based on MPFR library: http://mpfr.org Project homepage: http://www.holoborodko.com/pavel/mpfr Contact e-mail: pavel@holoborodko.com - Copyright (c) 2008-2014 Pavel Holoborodko + Copyright (c) 2008-2015 Pavel Holoborodko Contributors: - Dmitriy Gubanov, Konstantin Holoborodko, Brian Gladman, - Helmut Jarausch, Fokko Beekhof, Ulrich Mutze, Heinz van Saanen, - Pere Constans, Peter van Hoof, Gael Guennebaud, Tsai Chia Cheng, + Dmitriy Gubanov, Konstantin Holoborodko, Brian Gladman, + Helmut Jarausch, Fokko Beekhof, Ulrich Mutze, Heinz van Saanen, + Pere Constans, Peter van Hoof, Gael Guennebaud, Tsai Chia Cheng, Alexei Zubanov, Jauhien Piatlicki, Victor Berger, John Westwood, - Petr Aleksandrov, Orion Poplawski, Charles Karney. + Petr Aleksandrov, Orion Poplawski, Charles Karney, Arash Partow, + Rodney James, Jorge Leitao. Licensing: (A) MPFR C++ is under GNU General Public License ("GPL"). - - (B) Non-free licenses may also be purchased from the author, for users who + + (B) Non-free licenses may also be purchased from the author, for users who do not want their programs protected by the GPL. - The non-free licenses are for users that wish to use MPFR C++ in - their products but are unwilling to release their software - under the GPL (which would require them to release source code + The non-free licenses are for users that wish to use MPFR C++ in + their products but are unwilling to release their software + under the GPL (which would require them to release source code and allow free redistribution). Such users can purchase an unlimited-use license from the author. Contact us for more details. - + GNU General Public License ("GPL") copyright permissions statement: ************************************************************************** This program is free software: you can redistribute it and/or modify @@ -55,10 +56,10 @@ #include #include #include +#include +#include // Options -// FIXME HAVE_INT64_SUPPORT leads to clashes with long int and int64_t on some systems. -//#define MPREAL_HAVE_INT64_SUPPORT // Enable int64_t support if possible. Available only for MSVC 2010 & GCC. #define MPREAL_HAVE_MSVC_DEBUGVIEW // Enable Debugger Visualizer for "Debug" builds in MSVC. #define MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS // Enable extended std::numeric_limits specialization. // Meaning that "digits", "round_style" and similar members are defined as functions, not constants. @@ -66,17 +67,15 @@ // Library version #define MPREAL_VERSION_MAJOR 3 -#define MPREAL_VERSION_MINOR 5 -#define MPREAL_VERSION_PATCHLEVEL 9 -#define MPREAL_VERSION_STRING "3.5.9" +#define MPREAL_VERSION_MINOR 6 +#define MPREAL_VERSION_PATCHLEVEL 2 +#define MPREAL_VERSION_STRING "3.6.2" // Detect compiler using signatures from http://predef.sourceforge.net/ -#if defined(__GNUC__) && defined(__INTEL_COMPILER) - #define IsInf(x) (isinf)(x) // Intel ICC compiler on Linux - -#elif defined(_MSC_VER) // Microsoft Visual C++ - #define IsInf(x) (!_finite(x)) - +#if defined(__GNUC__) + #define IsInf(x) (isinf)(x) // GNU C++/Intel ICC compiler on Linux +#elif defined(_MSC_VER) // Microsoft Visual C++ + #define IsInf(x) (!_finite(x)) #else #define IsInf(x) (std::isinf)(x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance #endif @@ -93,54 +92,27 @@ #define MPREAL_HAVE_MOVE_SUPPORT - // Use fields in mpfr_t structure to check if it was initialized / set dummy initialization + // Use fields in mpfr_t structure to check if it was initialized / set dummy initialization #define mpfr_is_initialized(x) (0 != (x)->_mpfr_d) #define mpfr_set_uninitialized(x) ((x)->_mpfr_d = 0 ) #endif -// Detect support for explicit converters. +// Detect support for explicit converters. #if (__has_feature(cxx_explicit_conversions) || \ - defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \ - (defined(_MSC_VER) && _MSC_VER >= 1800)) + (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GNUC_MINOR >= 5) || __cplusplus >= 201103L || \ + (defined(_MSC_VER) && _MSC_VER >= 1800)) #define MPREAL_HAVE_EXPLICIT_CONVERTERS #endif -// Detect available 64-bit capabilities -#if defined(MPREAL_HAVE_INT64_SUPPORT) - - #define MPFR_USE_INTMAX_T // Should be defined before mpfr.h - - #if defined(_MSC_VER) // MSVC + Windows - #if (_MSC_VER >= 1600) - #include // is available only in msvc2010! - - #else // MPFR relies on intmax_t which is available only in msvc2010 - #undef MPREAL_HAVE_INT64_SUPPORT // Besides, MPFR & MPIR have to be compiled with msvc2010 - #undef MPFR_USE_INTMAX_T // Since we cannot detect this, disable x64 by default - // Someone should change this manually if needed. - #endif - - #elif defined (__GNUC__) && defined(__linux__) - #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) || defined (__PPC64__) - #undef MPREAL_HAVE_INT64_SUPPORT // Remove all shaman dances for x64 builds since - #undef MPFR_USE_INTMAX_T // GCC already supports x64 as of "long int" is 64-bit integer, nothing left to do - #else - #include // use int64_t, uint64_t otherwise - #endif - - #else - #include // rely on int64_t, uint64_t in all other cases, Mac OSX, etc. - #endif - -#endif +#define MPFR_USE_INTMAX_T // Enable 64-bit integer types - should be defined before mpfr.h #if defined(MPREAL_HAVE_MSVC_DEBUGVIEW) && defined(_MSC_VER) && defined(_DEBUG) #define MPREAL_MSVC_DEBUGVIEW_CODE DebugView = toString(); #define MPREAL_MSVC_DEBUGVIEW_DATA std::string DebugView; #else - #define MPREAL_MSVC_DEBUGVIEW_CODE - #define MPREAL_MSVC_DEBUGVIEW_DATA + #define MPREAL_MSVC_DEBUGVIEW_CODE + #define MPREAL_MSVC_DEBUGVIEW_DATA #endif #include @@ -150,9 +122,15 @@ #endif // Less important options -#define MPREAL_DOUBLE_BITS_OVERFLOW -1 // Triggers overflow exception during conversion to double if mpreal +#define MPREAL_DOUBLE_BITS_OVERFLOW -1 // Triggers overflow exception during conversion to double if mpreal // cannot fit in MPREAL_DOUBLE_BITS_OVERFLOW bits // = -1 disables overflow checks (default) + +// Fast replacement for mpfr_set_zero(x, +1): +// (a) uses low-level data members, might not be compatible with new versions of MPFR +// (b) sign is not set, add (x)->_mpfr_sign = 1; +#define mpfr_set_zero_fast(x) ((x)->_mpfr_exp = __MPFR_EXP_ZERO) + #if defined(__GNUC__) #define MPREAL_PERMISSIVE_EXPR __extension__ #else @@ -164,9 +142,9 @@ namespace mpfr { class mpreal { private: mpfr_t mp; - + public: - + // Get default rounding mode & precision inline static mp_rnd_t get_default_rnd() { return (mp_rnd_t)(mpfr_get_default_rounding_mode()); } inline static mp_prec_t get_default_prec() { return mpfr_get_default_prec(); } @@ -174,29 +152,26 @@ public: // Constructors && type conversions mpreal(); mpreal(const mpreal& u); - mpreal(const mpf_t u); - mpreal(const mpz_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const mpq_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const long double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const unsigned long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const unsigned int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - - // Construct mpreal from mpfr_t structure. - // shared = true allows to avoid deep copy, so that mpreal and 'u' share the same data & pointers. - mpreal(const mpfr_t u, bool shared = false); + mpreal(const mpf_t u); + mpreal(const mpz_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const mpq_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const long double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const unsigned long long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const long long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const unsigned long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const unsigned int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); -#if defined (MPREAL_HAVE_INT64_SUPPORT) - mpreal(const uint64_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const int64_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); -#endif + // Construct mpreal from mpfr_t structure. + // shared = true allows to avoid deep copy, so that mpreal and 'u' share the same data & pointers. + mpreal(const mpfr_t u, bool shared = false); mpreal(const char* s, mp_prec_t prec = mpreal::get_default_prec(), int base = 10, mp_rnd_t mode = mpreal::get_default_rnd()); mpreal(const std::string& s, mp_prec_t prec = mpreal::get_default_prec(), int base = 10, mp_rnd_t mode = mpreal::get_default_rnd()); - ~mpreal(); + ~mpreal(); #ifdef MPREAL_HAVE_MOVE_SUPPORT mpreal& operator=(mpreal&& v); @@ -205,7 +180,7 @@ public: // Operations // = - // +, -, *, /, ++, --, <<, >> + // +, -, *, /, ++, --, <<, >> // *=, +=, -=, /=, // <, >, ==, <=, >= @@ -215,13 +190,16 @@ public: mpreal& operator=(const mpz_t v); mpreal& operator=(const mpq_t v); mpreal& operator=(const long double v); - mpreal& operator=(const double v); + mpreal& operator=(const double v); mpreal& operator=(const unsigned long int v); + mpreal& operator=(const unsigned long long int v); + mpreal& operator=(const long long int v); mpreal& operator=(const unsigned int v); mpreal& operator=(const long int v); mpreal& operator=(const int v); mpreal& operator=(const char* s); mpreal& operator=(const std::string& s); + template mpreal& operator= (const std::complex& z); // + mpreal& operator+=(const mpreal& v); @@ -235,20 +213,18 @@ public: mpreal& operator+=(const long int u); mpreal& operator+=(const int u); -#if defined (MPREAL_HAVE_INT64_SUPPORT) - mpreal& operator+=(const int64_t u); - mpreal& operator+=(const uint64_t u); - mpreal& operator-=(const int64_t u); - mpreal& operator-=(const uint64_t u); - mpreal& operator*=(const int64_t u); - mpreal& operator*=(const uint64_t u); - mpreal& operator/=(const int64_t u); - mpreal& operator/=(const uint64_t u); -#endif + mpreal& operator+=(const long long int u); + mpreal& operator+=(const unsigned long long int u); + mpreal& operator-=(const long long int u); + mpreal& operator-=(const unsigned long long int u); + mpreal& operator*=(const long long int u); + mpreal& operator*=(const unsigned long long int u); + mpreal& operator/=(const long long int u); + mpreal& operator/=(const unsigned long long int u); const mpreal operator+() const; mpreal& operator++ (); - const mpreal operator++ (int); + const mpreal operator++ (int); // - mpreal& operator-=(const mpreal& v); @@ -266,7 +242,7 @@ public: friend const mpreal operator-(const long int b, const mpreal& a); friend const mpreal operator-(const int b, const mpreal& a); friend const mpreal operator-(const double b, const mpreal& a); - mpreal& operator-- (); + mpreal& operator-- (); const mpreal operator-- (int); // * @@ -279,7 +255,7 @@ public: mpreal& operator*=(const unsigned int v); mpreal& operator*=(const long int v); mpreal& operator*=(const int v); - + // / mpreal& operator/=(const mpreal& v); mpreal& operator/=(const mpz_t v); @@ -308,51 +284,27 @@ public: mpreal& operator>>=(const long int u); mpreal& operator>>=(const int u); - // Boolean Operators - friend bool operator > (const mpreal& a, const mpreal& b); - friend bool operator >= (const mpreal& a, const mpreal& b); - friend bool operator < (const mpreal& a, const mpreal& b); - friend bool operator <= (const mpreal& a, const mpreal& b); - friend bool operator == (const mpreal& a, const mpreal& b); - friend bool operator != (const mpreal& a, const mpreal& b); - - // Optimized specializations for boolean operators - friend bool operator == (const mpreal& a, const unsigned long int b); - friend bool operator == (const mpreal& a, const unsigned int b); - friend bool operator == (const mpreal& a, const long int b); - friend bool operator == (const mpreal& a, const int b); - friend bool operator == (const mpreal& a, const long double b); - friend bool operator == (const mpreal& a, const double b); - // Type Conversion operators - bool toBool (mp_rnd_t mode = GMP_RNDZ) const; - long toLong (mp_rnd_t mode = GMP_RNDZ) const; - unsigned long toULong (mp_rnd_t mode = GMP_RNDZ) const; - float toFloat (mp_rnd_t mode = GMP_RNDN) const; - double toDouble (mp_rnd_t mode = GMP_RNDN) const; - long double toLDouble (mp_rnd_t mode = GMP_RNDN) const; + bool toBool ( ) const; + long toLong (mp_rnd_t mode = GMP_RNDZ) const; + unsigned long toULong (mp_rnd_t mode = GMP_RNDZ) const; + long long toLLong (mp_rnd_t mode = GMP_RNDZ) const; + unsigned long long toULLong (mp_rnd_t mode = GMP_RNDZ) const; + float toFloat (mp_rnd_t mode = GMP_RNDN) const; + double toDouble (mp_rnd_t mode = GMP_RNDN) const; + long double toLDouble (mp_rnd_t mode = GMP_RNDN) const; #if defined (MPREAL_HAVE_EXPLICIT_CONVERTERS) - explicit operator bool () const { return toBool(); } - explicit operator int () const { return int(toLong()); } - explicit operator long () const { return toLong(); } - explicit operator long long () const { return toLong(); } - explicit operator unsigned () const { return unsigned(toULong()); } - explicit operator unsigned long () const { return toULong(); } - explicit operator unsigned long long () const { return toULong(); } - explicit operator float () const { return toFloat(); } - explicit operator double () const { return toDouble(); } - explicit operator long double () const { return toLDouble(); } -#endif - -#if defined (MPREAL_HAVE_INT64_SUPPORT) - int64_t toInt64 (mp_rnd_t mode = GMP_RNDZ) const; - uint64_t toUInt64 (mp_rnd_t mode = GMP_RNDZ) const; - - #if defined (MPREAL_HAVE_EXPLICIT_CONVERTERS) - explicit operator int64_t () const { return toInt64(); } - explicit operator uint64_t () const { return toUInt64(); } - #endif + explicit operator bool () const { return toBool(); } + explicit operator int () const { return int(toLong()); } + explicit operator long () const { return toLong(); } + explicit operator long long () const { return toLLong(); } + explicit operator unsigned () const { return unsigned(toULong()); } + explicit operator unsigned long () const { return toULong(); } + explicit operator unsigned long long () const { return toULLong(); } + explicit operator float () const { return toFloat(); } + explicit operator double () const { return toDouble(); } + explicit operator long double () const { return toLDouble(); } #endif // Get raw pointers so that mpreal can be directly used in raw mpfr_* functions @@ -391,11 +343,12 @@ public: friend inline const mpreal div_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode); friend inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode); friend int cmpabs(const mpreal& a,const mpreal& b); - + friend const mpreal log (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal log2 (const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal logb (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal log10(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal exp (const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal exp (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal exp2 (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal exp10(const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal log1p(const mpreal& v, mp_rnd_t rnd_mode); @@ -436,21 +389,22 @@ public: friend const mpreal eint (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal gamma (const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal tgamma (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal lngamma (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal lgamma (const mpreal& v, int *signp, mp_rnd_t rnd_mode); friend const mpreal zeta (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal erf (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal erfc (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal besselj0 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal besselj1 (const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal besselj0 (const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal besselj1 (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal besseljn (long n, const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal bessely0 (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal bessely1 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal besselyn (long n, const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal besselyn (long n, const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal fma (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode); friend const mpreal fms (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode); friend const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode); - friend const mpreal sum (const mpreal tab[], unsigned long int n, mp_rnd_t rnd_mode); + friend const mpreal sum (const mpreal tab[], const unsigned long int n, int& status, mp_rnd_t rnd_mode); friend int sgn(const mpreal& v); // returns -1 or +1 // MPFR 2.4.0 Specifics @@ -465,28 +419,26 @@ public: friend const mpreal mod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); // Modulus after division #endif -// MPFR 3.0.0 Specifics #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) friend const mpreal digamma (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal ai (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal urandom (gmp_randstate_t& state, mp_rnd_t rnd_mode); // use gmp_randinit_default() to init state, gmp_randclear() to clear +#endif + +#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0)) friend const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode); // use gmp_randinit_default() to init state, gmp_randclear() to clear friend const mpreal grandom (unsigned int seed); #endif - + // Uniformly distributed random number generation in [0,1] using // Mersenne-Twister algorithm by default. // Use parameter to setup seed, e.g.: random((unsigned)time(NULL)) // Check urandom() for more precise control. friend const mpreal random(unsigned int seed); - // Exponent and mantissa manipulation - friend const mpreal frexp(const mpreal& v, mp_exp_t* exp); - friend const mpreal ldexp(const mpreal& v, mp_exp_t exp); - // Splits mpreal value into fractional and integer parts. // Returns fractional part and stores integer part in n. - friend const mpreal modf(const mpreal& v, mpreal& n); + friend const mpreal modf(const mpreal& v, mpreal& n); // Constants // don't forget to call mpfr_free_cache() for every thread where you are using const-functions @@ -515,14 +467,14 @@ public: friend const mpreal frac (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal remainder ( const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); friend const mpreal remquo (long* q, const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); - + // Miscellaneous Functions friend const mpreal nexttoward (const mpreal& x, const mpreal& y); friend const mpreal nextabove (const mpreal& x); friend const mpreal nextbelow (const mpreal& x); // use gmp_randinit_default() to init state, gmp_randclear() to clear - friend const mpreal urandomb (gmp_randstate_t& state); + friend const mpreal urandomb (gmp_randstate_t& state); // MPFR < 2.4.2 Specifics #if (MPFR_VERSION <= MPFR_VERSION_NUM(2,4,2)) @@ -549,9 +501,9 @@ public: // Aliases for get_prec(), set_prec() - needed for compatibility with std::complex interface inline mpreal& setPrecision(int Precision, mp_rnd_t RoundingMode = get_default_rnd()); inline int getPrecision() const; - + // Set mpreal to +/- inf, NaN, +/-0 - mpreal& setInf (int Sign = +1); + mpreal& setInf (int Sign = +1); mpreal& setNan (); mpreal& setZero (int Sign = +1); mpreal& setSign (int Sign, mp_rnd_t RoundingMode = get_default_rnd()); @@ -560,7 +512,7 @@ public: mp_exp_t get_exp(); int set_exp(mp_exp_t e); int check_range (int t, mp_rnd_t rnd_mode = get_default_rnd()); - int subnormalize (int t,mp_rnd_t rnd_mode = get_default_rnd()); + int subnormalize (int t, mp_rnd_t rnd_mode = get_default_rnd()); // Inexact conversion from float inline bool fits_in_bits(double x, int n); @@ -580,7 +532,7 @@ public: // Efficient swapping of two mpreal values - needed for std algorithms friend void swap(mpreal& x, mpreal& y); - + friend const mpreal fmax(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); friend const mpreal fmin(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); @@ -590,7 +542,7 @@ private: // // mpfr::mpreal= ; Show value only // mpfr::mpreal=, bits ; Show value & precision - // + // // at the beginning of // [Visual Studio Installation Folder]\Common7\Packages\Debugger\autoexp.dat MPREAL_MSVC_DEBUGVIEW_DATA @@ -609,15 +561,15 @@ public: ////////////////////////////////////////////////////////////////////////// // Constructors & converters // Default constructor: creates mp number and initializes it to 0. -inline mpreal::mpreal() -{ - mpfr_init2 (mpfr_ptr(), mpreal::get_default_prec()); - mpfr_set_ui(mpfr_ptr(), 0, mpreal::get_default_rnd()); +inline mpreal::mpreal() +{ + mpfr_init2(mpfr_ptr(), mpreal::get_default_prec()); + mpfr_set_zero_fast(mpfr_ptr()); MPREAL_MSVC_DEBUGVIEW_CODE; } -inline mpreal::mpreal(const mpreal& u) +inline mpreal::mpreal(const mpreal& u) { mpfr_init2(mpfr_ptr(),mpfr_get_prec(u.mpfr_srcptr())); mpfr_set (mpfr_ptr(),u.mpfr_srcptr(),mpreal::get_default_rnd()); @@ -628,7 +580,7 @@ inline mpreal::mpreal(const mpreal& u) #ifdef MPREAL_HAVE_MOVE_SUPPORT inline mpreal::mpreal(mpreal&& other) { - mpfr_set_uninitialized(mpfr_ptr()); // make sure "other" holds no pinter to actual data + mpfr_set_uninitialized(mpfr_ptr()); // make sure "other" holds no pointer to actual data mpfr_swap(mpfr_ptr(), other.mpfr_ptr()); MPREAL_MSVC_DEBUGVIEW_CODE; @@ -700,15 +652,31 @@ inline mpreal::mpreal(const double u, mp_prec_t prec, mp_rnd_t mode) } inline mpreal::mpreal(const long double u, mp_prec_t prec, mp_rnd_t mode) -{ +{ mpfr_init2 (mpfr_ptr(), prec); mpfr_set_ld(mpfr_ptr(), u, mode); MPREAL_MSVC_DEBUGVIEW_CODE; } +inline mpreal::mpreal(const unsigned long long int u, mp_prec_t prec, mp_rnd_t mode) +{ + mpfr_init2 (mpfr_ptr(), prec); + mpfr_set_uj(mpfr_ptr(), u, mode); + + MPREAL_MSVC_DEBUGVIEW_CODE; +} + +inline mpreal::mpreal(const long long int u, mp_prec_t prec, mp_rnd_t mode) +{ + mpfr_init2 (mpfr_ptr(), prec); + mpfr_set_sj(mpfr_ptr(), u, mode); + + MPREAL_MSVC_DEBUGVIEW_CODE; +} + inline mpreal::mpreal(const unsigned long int u, mp_prec_t prec, mp_rnd_t mode) -{ +{ mpfr_init2 (mpfr_ptr(), prec); mpfr_set_ui(mpfr_ptr(), u, mode); @@ -716,7 +684,7 @@ inline mpreal::mpreal(const unsigned long int u, mp_prec_t prec, mp_rnd_t mode) } inline mpreal::mpreal(const unsigned int u, mp_prec_t prec, mp_rnd_t mode) -{ +{ mpfr_init2 (mpfr_ptr(), prec); mpfr_set_ui(mpfr_ptr(), u, mode); @@ -724,7 +692,7 @@ inline mpreal::mpreal(const unsigned int u, mp_prec_t prec, mp_rnd_t mode) } inline mpreal::mpreal(const long int u, mp_prec_t prec, mp_rnd_t mode) -{ +{ mpfr_init2 (mpfr_ptr(), prec); mpfr_set_si(mpfr_ptr(), u, mode); @@ -732,35 +700,17 @@ inline mpreal::mpreal(const long int u, mp_prec_t prec, mp_rnd_t mode) } inline mpreal::mpreal(const int u, mp_prec_t prec, mp_rnd_t mode) -{ +{ mpfr_init2 (mpfr_ptr(), prec); mpfr_set_si(mpfr_ptr(), u, mode); MPREAL_MSVC_DEBUGVIEW_CODE; } -#if defined (MPREAL_HAVE_INT64_SUPPORT) -inline mpreal::mpreal(const uint64_t u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_uj(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const int64_t u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_sj(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} -#endif - inline mpreal::mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode) { mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_str(mpfr_ptr(), s, base, mode); + mpfr_set_str(mpfr_ptr(), s, base, mode); MPREAL_MSVC_DEBUGVIEW_CODE; } @@ -768,7 +718,7 @@ inline mpreal::mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode) inline mpreal::mpreal(const std::string& s, mp_prec_t prec, int base, mp_rnd_t mode) { mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_str(mpfr_ptr(), s.c_str(), base, mode); + mpfr_set_str(mpfr_ptr(), s.c_str(), base, mode); MPREAL_MSVC_DEBUGVIEW_CODE; } @@ -776,15 +726,15 @@ inline mpreal::mpreal(const std::string& s, mp_prec_t prec, int base, mp_rnd_t m inline void mpreal::clear(::mpfr_ptr x) { #ifdef MPREAL_HAVE_MOVE_SUPPORT - if(mpfr_is_initialized(x)) + if(mpfr_is_initialized(x)) #endif mpfr_clear(x); } -inline mpreal::~mpreal() -{ +inline mpreal::~mpreal() +{ clear(mpfr_ptr()); -} +} // internal namespace needed for template magic namespace internal{ @@ -792,58 +742,55 @@ namespace internal{ // Use SFINAE to restrict arithmetic operations instantiation only for numeric types // This is needed for smooth integration with libraries based on expression templates, like Eigen. // TODO: Do the same for boolean operators. - template struct result_type {}; - - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; + template struct result_type {}; -#if defined (MPREAL_HAVE_INT64_SUPPORT) - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; -#endif + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; } // + Addition -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator+(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) += rhs; } -template -inline const typename internal::result_type::type - operator+(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) += lhs; } +template +inline const typename internal::result_type::type + operator+(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) += lhs; } // - Subtraction -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator-(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) -= rhs; } -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator-(const Lhs& lhs, const mpreal& rhs){ return mpreal(lhs) -= rhs; } // * Multiplication -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator*(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) *= rhs; } -template -inline const typename internal::result_type::type - operator*(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) *= lhs; } +template +inline const typename internal::result_type::type + operator*(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) *= lhs; } // / Division -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator/(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) /= rhs; } -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator/(const Lhs& lhs, const mpreal& rhs){ return mpreal(lhs) /= rhs; } ////////////////////////////////////////////////////////////////////////// @@ -893,17 +840,17 @@ const mpreal pow(const long int a, const double b, mp_rnd_t rnd_mode = mpreal::g const mpreal pow(const int a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); +const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); +const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); +const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const long double a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const long double a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const long double a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const long double a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const double a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); +const mpreal pow(const double a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const double a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const double a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const double a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); @@ -920,9 +867,9 @@ inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode = mpr inline mpreal machine_epsilon(mp_prec_t prec = mpreal::get_default_prec()); // Returns smallest eps such that x + eps != x (relative machine epsilon) -inline mpreal machine_epsilon(const mpreal& x); +inline mpreal machine_epsilon(const mpreal& x); -// Gives max & min values for the required precision, +// Gives max & min values for the required precision, // minval is 'safe' meaning 1 / minval does not overflow // maxval is 'safe' meaning 1 / maxval does not underflow inline mpreal minval(mp_prec_t prec = mpreal::get_default_prec()); @@ -935,13 +882,13 @@ inline bool isEqualFuzzy(const mpreal& a, const mpreal& b, const mpreal& eps); inline bool isEqualFuzzy(const mpreal& a, const mpreal& b); // 'Bitwise' equality check -// maxUlps - a and b can be apart by maxUlps binary numbers. +// maxUlps - a and b can be apart by maxUlps binary numbers. inline bool isEqualUlps(const mpreal& a, const mpreal& b, int maxUlps); ////////////////////////////////////////////////////////////////////////// -// Convert precision in 'bits' to decimal digits and vice versa. -// bits = ceil(digits*log[2](10)) -// digits = floor(bits*log[10](2)) +// Convert precision in 'bits' to decimal digits and vice versa. +// bits = ceil(digits*log[2](10)) +// digits = floor(bits*log[10](2)) inline mp_prec_t digits2bits(int d); inline int bits2digits(mp_prec_t b); @@ -979,7 +926,7 @@ inline mpreal& mpreal::operator=(const mpreal& v) inline mpreal& mpreal::operator=(const mpf_t v) { mpfr_set_f(mpfr_ptr(), v, mpreal::get_default_rnd()); - + MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } @@ -987,7 +934,7 @@ inline mpreal& mpreal::operator=(const mpf_t v) inline mpreal& mpreal::operator=(const mpz_t v) { mpfr_set_z(mpfr_ptr(), v, mpreal::get_default_rnd()); - + MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } @@ -1000,16 +947,16 @@ inline mpreal& mpreal::operator=(const mpq_t v) return *this; } -inline mpreal& mpreal::operator=(const long double v) -{ +inline mpreal& mpreal::operator=(const long double v) +{ mpfr_set_ld(mpfr_ptr(), v, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator=(const double v) -{ +inline mpreal& mpreal::operator=(const double v) +{ #if (MPREAL_DOUBLE_BITS_OVERFLOW > -1) if(fits_in_bits(v, MPREAL_DOUBLE_BITS_OVERFLOW)) { @@ -1024,33 +971,49 @@ inline mpreal& mpreal::operator=(const double v) return *this; } -inline mpreal& mpreal::operator=(const unsigned long int v) -{ - mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd()); +inline mpreal& mpreal::operator=(const unsigned long int v) +{ + mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator=(const unsigned int v) -{ - mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd()); +inline mpreal& mpreal::operator=(const unsigned int v) +{ + mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator=(const long int v) -{ - mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd()); +inline mpreal& mpreal::operator=(const unsigned long long int v) +{ + mpfr_set_uj(mpfr_ptr(), v, mpreal::get_default_rnd()); + + MPREAL_MSVC_DEBUGVIEW_CODE; + return *this; +} + +inline mpreal& mpreal::operator=(const long long int v) +{ + mpfr_set_sj(mpfr_ptr(), v, mpreal::get_default_rnd()); + + MPREAL_MSVC_DEBUGVIEW_CODE; + return *this; +} + +inline mpreal& mpreal::operator=(const long int v) +{ + mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } inline mpreal& mpreal::operator=(const int v) -{ - mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd()); +{ + mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; @@ -1071,7 +1034,7 @@ inline mpreal& mpreal::operator=(const char* s) if(0 == mpfr_set_str(t, s, 10, mpreal::get_default_rnd())) { - mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); + mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; } @@ -1094,7 +1057,7 @@ inline mpreal& mpreal::operator=(const std::string& s) if(0 == mpfr_set_str(t, s.c_str(), 10, mpreal::get_default_rnd())) { - mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); + mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; } @@ -1102,6 +1065,11 @@ inline mpreal& mpreal::operator=(const std::string& s) return *this; } +template +inline mpreal& mpreal::operator= (const std::complex& z) +{ + return *this = z.real(); +} ////////////////////////////////////////////////////////////////////////// // + Addition @@ -1135,9 +1103,9 @@ inline mpreal& mpreal::operator+=(const mpq_t u) inline mpreal& mpreal::operator+= (const long double u) { - *this += mpreal(u); + *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; + return *this; } inline mpreal& mpreal::operator+= (const double u) @@ -1180,16 +1148,14 @@ inline mpreal& mpreal::operator+=(const int u) return *this; } -#if defined (MPREAL_HAVE_INT64_SUPPORT) -inline mpreal& mpreal::operator+=(const int64_t u){ *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator+=(const uint64_t u){ *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator-=(const int64_t u){ *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator-=(const uint64_t u){ *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator*=(const int64_t u){ *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator*=(const uint64_t u){ *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator/=(const int64_t u){ *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator/=(const uint64_t u){ *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -#endif +inline mpreal& mpreal::operator+=(const long long int u) { *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator+=(const unsigned long long int u){ *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator-=(const long long int u) { *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator-=(const unsigned long long int u){ *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator*=(const long long int u) { *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator*=(const unsigned long long int u){ *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator/=(const long long int u) { *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator/=(const unsigned long long int u){ *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } inline const mpreal mpreal::operator+()const { return mpreal(*this); } @@ -1200,7 +1166,7 @@ inline const mpreal operator+(const mpreal& a, const mpreal& b) return c; } -inline mpreal& mpreal::operator++() +inline mpreal& mpreal::operator++() { return *this += 1; } @@ -1212,7 +1178,7 @@ inline const mpreal mpreal::operator++ (int) return x; } -inline mpreal& mpreal::operator--() +inline mpreal& mpreal::operator--() { return *this -= 1; } @@ -1249,9 +1215,9 @@ inline mpreal& mpreal::operator-=(const mpq_t v) inline mpreal& mpreal::operator-=(const long double v) { - *this -= mpreal(v); + *this -= mpreal(v); MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; + return *this; } inline mpreal& mpreal::operator-=(const double v) @@ -1259,7 +1225,7 @@ inline mpreal& mpreal::operator-=(const double v) #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) mpfr_sub_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); #else - *this -= mpreal(v); + *this -= mpreal(v); #endif MPREAL_MSVC_DEBUGVIEW_CODE; @@ -1374,9 +1340,9 @@ inline mpreal& mpreal::operator*=(const mpq_t v) inline mpreal& mpreal::operator*=(const long double v) { - *this *= mpreal(v); + *this *= mpreal(v); MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; + return *this; } inline mpreal& mpreal::operator*=(const double v) @@ -1384,7 +1350,7 @@ inline mpreal& mpreal::operator*=(const double v) #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) mpfr_mul_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); #else - *this *= mpreal(v); + *this *= mpreal(v); #endif MPREAL_MSVC_DEBUGVIEW_CODE; return *this; @@ -1452,7 +1418,7 @@ inline mpreal& mpreal::operator/=(const long double v) { *this /= mpreal(v); MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; + return *this; } inline mpreal& mpreal::operator/=(const double v) @@ -1460,7 +1426,7 @@ inline mpreal& mpreal::operator/=(const double v) #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) mpfr_div_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); #else - *this /= mpreal(v); + *this /= mpreal(v); #endif MPREAL_MSVC_DEBUGVIEW_CODE; return *this; @@ -1671,21 +1637,65 @@ inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode) } ////////////////////////////////////////////////////////////////////////// -//Boolean operators -inline bool operator > (const mpreal& a, const mpreal& b){ return (mpfr_greater_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator >= (const mpreal& a, const mpreal& b){ return (mpfr_greaterequal_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator < (const mpreal& a, const mpreal& b){ return (mpfr_less_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator <= (const mpreal& a, const mpreal& b){ return (mpfr_lessequal_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator == (const mpreal& a, const mpreal& b){ return (mpfr_equal_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator != (const mpreal& a, const mpreal& b){ return (mpfr_lessgreater_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } +//Relational operators -inline bool operator == (const mpreal& a, const unsigned long int b ){ return (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const unsigned int b ){ return (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const long int b ){ return (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const int b ){ return (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const long double b ){ return (mpfr_cmp_ld(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const double b ){ return (mpfr_cmp_d (a.mpfr_srcptr(),b) == 0 ); } +// WARNING: +// +// Please note that following checks for double-NaN are guaranteed to work only in IEEE math mode: +// +// isnan(b) = (b != b) +// isnan(b) = !(b == b) (we use in code below) +// +// Be cautions if you use compiler options which break strict IEEE compliance (e.g. -ffast-math in GCC). +// Use std::isnan instead (C++11). +inline bool operator > (const mpreal& a, const mpreal& b ){ return (mpfr_greater_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } +inline bool operator > (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) > 0 ); } + +inline bool operator >= (const mpreal& a, const mpreal& b ){ return (mpfr_greaterequal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } +inline bool operator >= (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 ); } +// inline bool operator >= (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (isnan()a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) >= 0 ); } + +inline bool operator < (const mpreal& a, const mpreal& b ){ return (mpfr_less_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } +inline bool operator < (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) < 0 ); } + +inline bool operator <= (const mpreal& a, const mpreal& b ){ return (mpfr_lessequal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } +inline bool operator <= (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) <= 0 ); } + +inline bool operator == (const mpreal& a, const mpreal& b ){ return (mpfr_equal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } +inline bool operator == (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) == 0 ); } + +inline bool operator != (const mpreal& a, const mpreal& b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const unsigned long int b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const unsigned int b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const long int b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const int b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const long double b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const double b ){ return !(a == b); } inline bool (isnan) (const mpreal& op){ return (mpfr_nan_p (op.mpfr_srcptr()) != 0 ); } inline bool (isinf) (const mpreal& op){ return (mpfr_inf_p (op.mpfr_srcptr()) != 0 ); } @@ -1695,21 +1705,18 @@ inline bool isint (const mpreal& op){ return (mpfr_integer_p(op.mpfr_srcpt #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) inline bool isregular(const mpreal& op){ return (mpfr_regular_p(op.mpfr_srcptr()));} -#endif +#endif ////////////////////////////////////////////////////////////////////////// // Type Converters -inline bool mpreal::toBool (mp_rnd_t /*mode*/) const { return mpfr_zero_p (mpfr_srcptr()) == 0; } -inline long mpreal::toLong (mp_rnd_t mode) const { return mpfr_get_si (mpfr_srcptr(), mode); } -inline unsigned long mpreal::toULong (mp_rnd_t mode) const { return mpfr_get_ui (mpfr_srcptr(), mode); } -inline float mpreal::toFloat (mp_rnd_t mode) const { return mpfr_get_flt(mpfr_srcptr(), mode); } -inline double mpreal::toDouble (mp_rnd_t mode) const { return mpfr_get_d (mpfr_srcptr(), mode); } -inline long double mpreal::toLDouble(mp_rnd_t mode) const { return mpfr_get_ld (mpfr_srcptr(), mode); } - -#if defined (MPREAL_HAVE_INT64_SUPPORT) -inline int64_t mpreal::toInt64 (mp_rnd_t mode) const{ return mpfr_get_sj(mpfr_srcptr(), mode); } -inline uint64_t mpreal::toUInt64(mp_rnd_t mode) const{ return mpfr_get_uj(mpfr_srcptr(), mode); } -#endif +inline bool mpreal::toBool ( ) const { return mpfr_zero_p (mpfr_srcptr()) == 0; } +inline long mpreal::toLong (mp_rnd_t mode) const { return mpfr_get_si (mpfr_srcptr(), mode); } +inline unsigned long mpreal::toULong (mp_rnd_t mode) const { return mpfr_get_ui (mpfr_srcptr(), mode); } +inline float mpreal::toFloat (mp_rnd_t mode) const { return mpfr_get_flt(mpfr_srcptr(), mode); } +inline double mpreal::toDouble (mp_rnd_t mode) const { return mpfr_get_d (mpfr_srcptr(), mode); } +inline long double mpreal::toLDouble(mp_rnd_t mode) const { return mpfr_get_ld (mpfr_srcptr(), mode); } +inline long long mpreal::toLLong (mp_rnd_t mode) const { return mpfr_get_sj (mpfr_srcptr(), mode); } +inline unsigned long long mpreal::toULLong (mp_rnd_t mode) const { return mpfr_get_uj (mpfr_srcptr(), mode); } inline ::mpfr_ptr mpreal::mpfr_ptr() { return mp; } inline ::mpfr_srcptr mpreal::mpfr_ptr() const { return mp; } @@ -1755,21 +1762,21 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const std::ostringstream format; - int digits = (n >= 0) ? n : bits2digits(mpfr_get_prec(mpfr_srcptr())); - + int digits = (n >= 0) ? n : 1 + bits2digits(mpfr_get_prec(mpfr_srcptr())); + format << "%." << digits << "RNg"; return toString(format.str()); #else - char *s, *ns = NULL; + char *s, *ns = NULL; size_t slen, nslen; mp_exp_t exp; std::string out; if(mpfr_inf_p(mp)) - { + { if(mpfr_sgn(mp)>0) return "+Inf"; else return "-Inf"; } @@ -1784,7 +1791,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const { slen = strlen(s); nslen = strlen(ns); - if(nslen<=slen) + if(nslen<=slen) { mpfr_free_str(s); s = ns; @@ -1801,7 +1808,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const { // Remove zeros starting from right end char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s+exp) ptr--; + while (*ptr=='0' && ptr>s+exp) ptr--; if(ptr==s+exp) out = std::string(s,exp+1); else out = std::string(s,exp+1)+'.'+std::string(s+exp+1,ptr-(s+exp+1)+1); @@ -1812,7 +1819,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const { // Remove zeros starting from right end char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s+exp-1) ptr--; + while (*ptr=='0' && ptr>s+exp-1) ptr--; if(ptr==s+exp-1) out = std::string(s,exp); else out = std::string(s,exp)+'.'+std::string(s+exp,ptr-(s+exp)+1); @@ -1825,7 +1832,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const { // Remove zeros starting from right end char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s+1) ptr--; + while (*ptr=='0' && ptr>s+1) ptr--; if(ptr==s+1) out = std::string(s,2); else out = std::string(s,2)+'.'+std::string(s+2,ptr-(s+2)+1); @@ -1836,7 +1843,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const { // Remove zeros starting from right end char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s) ptr--; + while (*ptr=='0' && ptr>s) ptr--; if(ptr==s) out = std::string(s,1); else out = std::string(s,1)+'.'+std::string(s+1,ptr-(s+1)+1); @@ -1863,7 +1870,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const ////////////////////////////////////////////////////////////////////////// // I/O -inline std::ostream& mpreal::output(std::ostream& os) const +inline std::ostream& mpreal::output(std::ostream& os) const { std::ostringstream format; const std::ios::fmtflags flags = os.flags(); @@ -1926,8 +1933,7 @@ inline int bits2digits(mp_prec_t b) // Set/Get number properties inline int sgn(const mpreal& op) { - int r = mpfr_signbit(op.mpfr_srcptr()); - return (r > 0? -1 : 1); + return mpfr_sgn(op.mpfr_srcptr()); } inline mpreal& mpreal::setSign(int sign, mp_rnd_t RoundingMode) @@ -1949,29 +1955,28 @@ inline mpreal& mpreal::setPrecision(int Precision, mp_rnd_t RoundingMode) return *this; } -inline mpreal& mpreal::setInf(int sign) -{ +inline mpreal& mpreal::setInf(int sign) +{ mpfr_set_inf(mpfr_ptr(), sign); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; -} +} -inline mpreal& mpreal::setNan() +inline mpreal& mpreal::setNan() { mpfr_set_nan(mpfr_ptr()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::setZero(int sign) +inline mpreal& mpreal::setZero(int sign) { - #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) mpfr_set_zero(mpfr_ptr(), sign); #else mpfr_set_si(mpfr_ptr(), 0, (mpfr_get_default_rounding_mode)()); setSign(sign); -#endif +#endif MPREAL_MSVC_DEBUGVIEW_CODE; return *this; @@ -2000,23 +2005,32 @@ inline int mpreal::set_exp (mp_exp_t e) return x; } -inline const mpreal frexp(const mpreal& v, mp_exp_t* exp) +inline const mpreal frexp(const mpreal& x, mp_exp_t* exp, mp_rnd_t mode = mpreal::get_default_rnd()) { - mpreal x(v); - *exp = x.get_exp(); - x.set_exp(0); - return x; + mpreal y(x); +#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0)) + mpfr_frexp(exp,y.mpfr_ptr(),x.mpfr_srcptr(),mode); +#else + *exp = mpfr_get_exp(y.mpfr_srcptr()); + mpfr_set_exp(y.mpfr_ptr(),0); +#endif + return y; } inline const mpreal ldexp(const mpreal& v, mp_exp_t exp) { mpreal x(v); - // rounding is not important since we just increasing the exponent - mpfr_mul_2si(x.mpfr_ptr(), x.mpfr_srcptr(), exp, mpreal::get_default_rnd()); + // rounding is not important since we are just increasing the exponent (= exact operation) + mpfr_mul_2si(x.mpfr_ptr(), x.mpfr_srcptr(), exp, mpreal::get_default_rnd()); return x; } +inline const mpreal scalbn(const mpreal& v, mp_exp_t exp) +{ + return ldexp(v, exp); +} + inline mpreal machine_epsilon(mp_prec_t prec) { /* the smallest eps such that 1 + eps != 1 */ @@ -2024,7 +2038,7 @@ inline mpreal machine_epsilon(mp_prec_t prec) } inline mpreal machine_epsilon(const mpreal& x) -{ +{ /* the smallest eps such that x + eps != x */ if( x < 0) { @@ -2045,7 +2059,7 @@ inline mpreal minval(mp_prec_t prec) inline mpreal maxval(mp_prec_t prec) { /* max = (1 - eps) * 2^emax, eps is machine epsilon */ - return (mpreal(1, prec) - machine_epsilon(prec)) << mpreal::get_emax(); + return (mpreal(1, prec) - machine_epsilon(prec)) << mpreal::get_emax(); } inline bool isEqualUlps(const mpreal& a, const mpreal& b, int maxUlps) @@ -2063,12 +2077,26 @@ inline bool isEqualFuzzy(const mpreal& a, const mpreal& b) return isEqualFuzzy(a, b, machine_epsilon((max)(1, (min)(abs(a), abs(b))))); } +////////////////////////////////////////////////////////////////////////// +// C++11 sign functions. +inline mpreal copysign(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) +{ + mpreal rop(0, mpfr_get_prec(x.mpfr_ptr())); + mpfr_setsign(rop.mpfr_ptr(), x.mpfr_srcptr(), mpfr_signbit(y.mpfr_srcptr()), rnd_mode); + return rop; +} + +inline bool signbit(const mpreal& x) +{ + return mpfr_signbit(x.mpfr_srcptr()); +} + inline const mpreal modf(const mpreal& v, mpreal& n) { mpreal f(v); // rounding is not important since we are using the same number - mpfr_frac (f.mpfr_ptr(),f.mpfr_srcptr(),mpreal::get_default_rnd()); + mpfr_frac (f.mpfr_ptr(),f.mpfr_srcptr(),mpreal::get_default_rnd()); mpfr_trunc(n.mpfr_ptr(),v.mpfr_srcptr()); return f; } @@ -2131,7 +2159,7 @@ inline mp_exp_t mpreal::get_emax_max (void) #define MPREAL_UNARY_MATH_FUNCTION_BODY(f) \ mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); \ mpfr_##f(y.mpfr_ptr(), x.mpfr_srcptr(), r); \ - return y; + return y; inline const mpreal sqr (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(sqr ); } @@ -2154,7 +2182,7 @@ inline const mpreal sqrt(const unsigned int v, mp_rnd_t rnd_mode) inline const mpreal sqrt(const long int v, mp_rnd_t rnd_mode) { if (v>=0) return sqrt(static_cast(v),rnd_mode); - else return mpreal().setNan(); // NaN + else return mpreal().setNan(); // NaN } inline const mpreal sqrt(const int v, mp_rnd_t rnd_mode) @@ -2165,9 +2193,9 @@ inline const mpreal sqrt(const int v, mp_rnd_t rnd_mode) inline const mpreal root(const mpreal& x, unsigned long int k, mp_rnd_t r = mpreal::get_default_rnd()) { - mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); - mpfr_root(y.mpfr_ptr(), x.mpfr_srcptr(), k, r); - return y; + mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); + mpfr_root(y.mpfr_ptr(), x.mpfr_srcptr(), k, r); + return y; } inline const mpreal dim(const mpreal& a, const mpreal& b, mp_rnd_t r = mpreal::get_default_rnd()) @@ -2209,6 +2237,8 @@ inline const mpreal acos (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd inline const mpreal asin (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(asin ); } inline const mpreal atan (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(atan ); } +inline const mpreal logb (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { return log2 (abs(x),r); } + inline const mpreal acot (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return atan (1/v, r); } inline const mpreal asec (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return acos (1/v, r); } inline const mpreal acsc (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return asin (1/v, r); } @@ -2230,6 +2260,7 @@ inline const mpreal log1p (const mpreal& x, mp_rnd_t r = mpreal::get_default_r inline const mpreal expm1 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(expm1 ); } inline const mpreal eint (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(eint ); } inline const mpreal gamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(gamma ); } +inline const mpreal tgamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(gamma ); } inline const mpreal lngamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(lngamma); } inline const mpreal zeta (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(zeta ); } inline const mpreal erf (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(erf ); } @@ -2254,7 +2285,7 @@ inline const mpreal hypot (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = } inline const mpreal remainder (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ +{ mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision())); mpfr_remainder(a.mpfr_ptr(), x.mpfr_srcptr(), y.mpfr_srcptr(), rnd_mode); return a; @@ -2307,9 +2338,9 @@ inline const mpreal fma (const mpreal& v1, const mpreal& v2, const mpreal& v3, m mpreal a; mp_prec_t p1, p2, p3; - p1 = v1.get_prec(); - p2 = v2.get_prec(); - p3 = v3.get_prec(); + p1 = v1.get_prec(); + p2 = v2.get_prec(); + p3 = v3.get_prec(); a.set_prec(p3>p2?(p3>p1?p3:p1):(p2>p1?p2:p1)); @@ -2322,9 +2353,9 @@ inline const mpreal fms (const mpreal& v1, const mpreal& v2, const mpreal& v3, m mpreal a; mp_prec_t p1, p2, p3; - p1 = v1.get_prec(); - p2 = v2.get_prec(); - p3 = v3.get_prec(); + p1 = v1.get_prec(); + p2 = v2.get_prec(); + p3 = v3.get_prec(); a.set_prec(p3>p2?(p3>p1?p3:p1):(p2>p1?p2:p1)); @@ -2337,8 +2368,8 @@ inline const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode = mpreal a; mp_prec_t p1, p2; - p1 = v1.get_prec(); - p2 = v2.get_prec(); + p1 = v1.get_prec(); + p2 = v2.get_prec(); a.set_prec(p1>p2?p1:p2); @@ -2347,16 +2378,17 @@ inline const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode = return a; } -inline const mpreal sum (const mpreal tab[], unsigned long int n, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) +inline const mpreal sum (const mpreal tab[], const unsigned long int n, int& status, mp_rnd_t mode = mpreal::get_default_rnd()) { - mpreal x; - mpfr_ptr* t; - unsigned long int i; + mpfr_srcptr *p = new mpfr_srcptr[n]; - t = new mpfr_ptr[n]; - for (i=0;ixp?yp:xp); @@ -2553,33 +2585,24 @@ inline const mpreal nextbelow (const mpreal& x) inline const mpreal urandomb (gmp_randstate_t& state) { mpreal x; - mpfr_urandomb(x.mp,state); + mpfr_urandomb(x.mpfr_ptr(),state); return x; } -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0)) -// use gmp_randinit_default() to init state, gmp_randclear() to clear +#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) inline const mpreal urandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) { mpreal x; - mpfr_urandom(x.mp,state,rnd_mode); + mpfr_urandom(x.mpfr_ptr(), state, rnd_mode); return x; } - -inline const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x; - mpfr_grandom(x.mp, NULL, state, rnd_mode); - return x; -} - -#endif +#endif #if (MPFR_VERSION <= MPFR_VERSION_NUM(2,4,2)) inline const mpreal random2 (mp_size_t size, mp_exp_t exp) { mpreal x; - mpfr_random2(x.mp,size,exp); + mpfr_random2(x.mpfr_ptr(),size,exp); return x; } #endif @@ -2590,16 +2613,15 @@ inline const mpreal random2 (mp_size_t size, mp_exp_t exp) // seed != 0 inline const mpreal random(unsigned int seed = 0) { - #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) static gmp_randstate_t state; - static bool isFirstTime = true; + static bool initialize = true; - if(isFirstTime) + if(initialize) { gmp_randinit_default(state); gmp_randseed_ui(state,0); - isFirstTime = false; + initialize = false; } if(seed != 0) gmp_randseed_ui(state,seed); @@ -2612,17 +2634,25 @@ inline const mpreal random(unsigned int seed = 0) } -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) +#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0)) + +inline const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) +{ + mpreal x; + mpfr_grandom(x.mpfr_ptr(), NULL, state, rnd_mode); + return x; +} + inline const mpreal grandom(unsigned int seed = 0) { static gmp_randstate_t state; - static bool isFirstTime = true; + static bool initialize = true; - if(isFirstTime) + if(initialize) { gmp_randinit_default(state); gmp_randseed_ui(state,0); - isFirstTime = false; + initialize = false; } if(seed != 0) gmp_randseed_ui(state,seed); @@ -2634,17 +2664,17 @@ inline const mpreal grandom(unsigned int seed = 0) ////////////////////////////////////////////////////////////////////////// // Set/Get global properties inline void mpreal::set_default_prec(mp_prec_t prec) -{ - mpfr_set_default_prec(prec); +{ + mpfr_set_default_prec(prec); } inline void mpreal::set_default_rnd(mp_rnd_t rnd_mode) -{ - mpfr_set_default_rounding_mode(rnd_mode); +{ + mpfr_set_default_rounding_mode(rnd_mode); } inline bool mpreal::fits_in_bits(double x, int n) -{ +{ int i; double t; return IsInf(x) || (std::modf ( std::ldexp ( std::frexp ( x, &i ), n ), &t ) == 0.0); @@ -2894,7 +2924,7 @@ inline const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode) else return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow } -// pow long double +// pow long double inline const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode) { return pow(mpreal(a),mpreal(b),rnd_mode); @@ -2953,9 +2983,9 @@ namespace std { // we are allowed to extend namespace std with specializations only template <> - inline void swap(mpfr::mpreal& x, mpfr::mpreal& y) - { - return mpfr::swap(x, y); + inline void swap(mpfr::mpreal& x, mpfr::mpreal& y) + { + return mpfr::swap(x, y); } template<> @@ -2966,7 +2996,7 @@ namespace std static const bool is_signed = true; static const bool is_integer = false; static const bool is_exact = false; - static const int radix = 2; + static const int radix = 2; static const bool has_infinity = true; static const bool has_quiet_NaN = true; @@ -2986,7 +3016,7 @@ namespace std // Returns smallest eps such that 1 + eps != 1 (classic machine epsilon) inline static mpfr::mpreal epsilon(mp_prec_t precision = mpfr::mpreal::get_default_prec()) { return mpfr::machine_epsilon(precision); } - + // Returns smallest eps such that x + eps != x (relative machine epsilon) inline static mpfr::mpreal epsilon(const mpfr::mpreal& x) { return mpfr::machine_epsilon(x); } @@ -2994,8 +3024,8 @@ namespace std { mp_rnd_t r = mpfr::mpreal::get_default_rnd(); - if(r == GMP_RNDN) return mpfr::mpreal(0.5, precision); - else return mpfr::mpreal(1.0, precision); + if(r == GMP_RNDN) return mpfr::mpreal(0.5, precision); + else return mpfr::mpreal(1.0, precision); } inline static const mpfr::mpreal infinity() { return mpfr::const_infinity(); } @@ -3006,17 +3036,17 @@ namespace std // Please note, exponent range is not fixed in MPFR static const int min_exponent = MPFR_EMIN_DEFAULT; static const int max_exponent = MPFR_EMAX_DEFAULT; - MPREAL_PERMISSIVE_EXPR static const int min_exponent10 = (int) (MPFR_EMIN_DEFAULT * 0.3010299956639811); - MPREAL_PERMISSIVE_EXPR static const int max_exponent10 = (int) (MPFR_EMAX_DEFAULT * 0.3010299956639811); + MPREAL_PERMISSIVE_EXPR static const int min_exponent10 = (int) (MPFR_EMIN_DEFAULT * 0.3010299956639811); + MPREAL_PERMISSIVE_EXPR static const int max_exponent10 = (int) (MPFR_EMAX_DEFAULT * 0.3010299956639811); #ifdef MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS // Following members should be constant according to standard, but they can be variable in MPFR - // So we define them as functions here. + // So we define them as functions here. // // This is preferable way for std::numeric_limits specialization. - // But it is incompatible with standard std::numeric_limits and might not work with other libraries, e.g. boost. - // See below for compatible implementation. + // But it is incompatible with standard std::numeric_limits and might not work with other libraries, e.g. boost. + // See below for compatible implementation. inline static float_round_style round_style() { mp_rnd_t r = mpfr::mpreal::get_default_rnd(); @@ -3024,9 +3054,9 @@ namespace std switch (r) { case GMP_RNDN: return round_to_nearest; - case GMP_RNDZ: return round_toward_zero; - case GMP_RNDU: return round_toward_infinity; - case GMP_RNDD: return round_toward_neg_infinity; + case GMP_RNDZ: return round_toward_zero; + case GMP_RNDU: return round_toward_infinity; + case GMP_RNDD: return round_toward_neg_infinity; default: return round_indeterminate; } } @@ -3053,13 +3083,13 @@ namespace std // If possible, please use functions digits() and round_style() defined above. // // These (default) values are preserved for compatibility with existing libraries, e.g. boost. - // Change them accordingly to your application. + // Change them accordingly to your application. // // For example, if you use 256 bits of precision uniformly in your program, then: // digits = 256 - // digits10 = 77 + // digits10 = 77 // max_digits10 = 78 - // + // // Approximate formula for decimal digits is: digits10 = floor(log10(2) * digits). See bits2digits() for more details. static const std::float_round_style round_style = round_to_nearest;