Pull latest updates from upstream

This commit is contained in:
Benoit Steiner 2016-04-11 17:20:17 -07:00
commit d6e596174d
340 changed files with 13835 additions and 5211 deletions

View File

@ -246,7 +246,7 @@ if(NOT MSVC)
else() else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
endif() endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard")
message(STATUS "Enabling NEON in tests/examples") message(STATUS "Enabling NEON in tests/examples")
endif() endif()
@ -256,7 +256,11 @@ if(NOT MSVC)
message(STATUS "Enabling NEON in tests/examples") message(STATUS "Enabling NEON in tests/examples")
endif() endif()
option(EIGEN_TEST_ZVECTOR "Enable/Disable S390X(zEC13) ZVECTOR in tests/examples" OFF)
if(EIGEN_TEST_ZVECTOR)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z13 -mzvector")
message(STATUS "Enabling S390X(zEC13) ZVECTOR in tests/examples")
endif()
check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP) check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP)
if(COMPILER_SUPPORT_OPENMP) if(COMPILER_SUPPORT_OPENMP)
@ -342,6 +346,8 @@ endif()
option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF) option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code")
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR # Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR

View File

@ -19,7 +19,7 @@ extern "C" {
/** \ingroup Support_modules /** \ingroup Support_modules
* \defgroup CholmodSupport_Module CholmodSupport module * \defgroup CholmodSupport_Module CholmodSupport module
* *
* This module provides an interface to the Cholmod library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package. * This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
* It provides the two following main factorization classes: * It provides the two following main factorization classes:
* - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization. * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
* - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial). * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).

View File

@ -42,7 +42,10 @@
#endif #endif
#if defined(__CUDA_ARCH__) // When compiling CUDA device code with NVCC, pull in math functions from the
// global namespace. In host mode, and when device doee with clang, use the
// std versions.
#if defined(__CUDA_ARCH__) && defined(__NVCC__)
#define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
#else #else
#define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
@ -202,12 +205,25 @@
#define EIGEN_VECTORIZE #define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_NEON #define EIGEN_VECTORIZE_NEON
#include <arm_neon.h> #include <arm_neon.h>
#elif (defined __s390x__ && defined __VEC__)
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ZVECTOR
#include <vecintrin.h>
#endif #endif
#endif #endif
#if defined(__F16C__)
// We can use the optimized fp16 to float and float to fp16 conversion routines
#define EIGEN_HAS_FP16_C
#endif
#if defined __CUDACC__ #if defined __CUDACC__
#define EIGEN_VECTORIZE_CUDA #define EIGEN_VECTORIZE_CUDA
#include <vector_types.h> #include <vector_types.h>
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
#define EIGEN_HAS_CUDA_FP16
#include <cuda_fp16.h>
#endif
#endif #endif
#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
@ -273,6 +289,8 @@ inline static const char *SimdInstructionSetsInUse(void) {
return "VSX"; return "VSX";
#elif defined(EIGEN_VECTORIZE_NEON) #elif defined(EIGEN_VECTORIZE_NEON)
return "ARM NEON"; return "ARM NEON";
#elif defined(EIGEN_VECTORIZE_ZVECTOR)
return "S390X ZVECTOR";
#else #else
return "None"; return "None";
#endif #endif
@ -310,6 +328,7 @@ using std::ptrdiff_t;
#include "src/Core/NumTraits.h" #include "src/Core/NumTraits.h"
#include "src/Core/MathFunctions.h" #include "src/Core/MathFunctions.h"
#include "src/Core/SpecialFunctions.h"
#include "src/Core/GenericPacketMath.h" #include "src/Core/GenericPacketMath.h"
#if defined EIGEN_VECTORIZE_AVX512 #if defined EIGEN_VECTORIZE_AVX512
@ -339,11 +358,19 @@ using std::ptrdiff_t;
#include "src/Core/arch/NEON/PacketMath.h" #include "src/Core/arch/NEON/PacketMath.h"
#include "src/Core/arch/NEON/MathFunctions.h" #include "src/Core/arch/NEON/MathFunctions.h"
#include "src/Core/arch/NEON/Complex.h" #include "src/Core/arch/NEON/Complex.h"
#elif defined EIGEN_VECTORIZE_ZVECTOR
#include "src/Core/arch/ZVector/PacketMath.h"
#include "src/Core/arch/ZVector/MathFunctions.h"
#include "src/Core/arch/ZVector/Complex.h"
#endif #endif
#include "src/Core/arch/CUDA/Half.h"
#if defined EIGEN_VECTORIZE_CUDA #if defined EIGEN_VECTORIZE_CUDA
#include "src/Core/arch/CUDA/PacketMath.h" #include "src/Core/arch/CUDA/PacketMath.h"
#include "src/Core/arch/CUDA/PacketMathHalf.h"
#include "src/Core/arch/CUDA/MathFunctions.h" #include "src/Core/arch/CUDA/MathFunctions.h"
#include "src/Core/arch/CUDA/TypeCasting.h"
#endif #endif
#include "src/Core/arch/Default/Settings.h" #include "src/Core/arch/Default/Settings.h"
@ -438,14 +465,14 @@ using std::ptrdiff_t;
#include "src/Core/ArrayWrapper.h" #include "src/Core/ArrayWrapper.h"
#ifdef EIGEN_USE_BLAS #ifdef EIGEN_USE_BLAS
#include "src/Core/products/GeneralMatrixMatrix_MKL.h" #include "src/Core/products/GeneralMatrixMatrix_BLAS.h"
#include "src/Core/products/GeneralMatrixVector_MKL.h" #include "src/Core/products/GeneralMatrixVector_BLAS.h"
#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h" #include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h"
#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h" #include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h"
#include "src/Core/products/SelfadjointMatrixVector_MKL.h" #include "src/Core/products/SelfadjointMatrixVector_BLAS.h"
#include "src/Core/products/TriangularMatrixMatrix_MKL.h" #include "src/Core/products/TriangularMatrixMatrix_BLAS.h"
#include "src/Core/products/TriangularMatrixVector_MKL.h" #include "src/Core/products/TriangularMatrixVector_BLAS.h"
#include "src/Core/products/TriangularSolverMatrix_MKL.h" #include "src/Core/products/TriangularSolverMatrix_BLAS.h"
#endif // EIGEN_USE_BLAS #endif // EIGEN_USE_BLAS
#ifdef EIGEN_USE_MKL_VML #ifdef EIGEN_USE_MKL_VML

View File

@ -17,16 +17,16 @@
#include <limits> #include <limits>
/** \defgroup Geometry_Module Geometry module /** \defgroup Geometry_Module Geometry module
*
*
* *
* This module provides support for: * This module provides support for:
* - fixed-size homogeneous transformations * - fixed-size homogeneous transformations
* - translation, scaling, 2D and 3D rotations * - translation, scaling, 2D and 3D rotations
* - quaternions * - \link Quaternion quaternions \endlink
* - \ref MatrixBase::cross() "cross product" * - cross products (\ref MatrixBase::cross, \ref MatrixBase::cross3)
* - \ref MatrixBase::unitOrthogonal() "orthognal vector generation" * - orthognal vector generation (\ref MatrixBase::unitOrthogonal)
* - some linear components: parametrized-lines and hyperplanes * - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink
* - \link AlignedBox axis aligned bounding boxes \endlink
* - \link umeyama least-square transformation fitting \endlink
* *
* \code * \code
* #include <Eigen/Geometry> * #include <Eigen/Geometry>

View File

@ -12,7 +12,6 @@
#include "src/Core/util/DisableStupidWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
#include <complex.h>
extern "C" { extern "C" {
#include <pastix_nompi.h> #include <pastix_nompi.h>
#include <pastix.h> #include <pastix.h>

2
Eigen/PardisoSupport Normal file → Executable file
View File

@ -14,8 +14,6 @@
#include <mkl_pardiso.h> #include <mkl_pardiso.h>
#include <unsupported/Eigen/SparseExtra>
/** \ingroup Support_modules /** \ingroup Support_modules
* \defgroup PardisoSupport_Module PardisoSupport module * \defgroup PardisoSupport_Module PardisoSupport module
* *

View File

@ -34,6 +34,7 @@
#include "src/QR/HouseholderQR.h" #include "src/QR/HouseholderQR.h"
#include "src/QR/FullPivHouseholderQR.h" #include "src/QR/FullPivHouseholderQR.h"
#include "src/QR/ColPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h"
#include "src/QR/CompleteOrthogonalDecomposition.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#include "src/QR/HouseholderQR_MKL.h" #include "src/QR/HouseholderQR_MKL.h"
#include "src/QR/ColPivHouseholderQR_MKL.h" #include "src/QR/ColPivHouseholderQR_MKL.h"

View File

@ -17,7 +17,7 @@
/** \ingroup Support_modules /** \ingroup Support_modules
* \defgroup SPQRSupport_Module SuiteSparseQR module * \defgroup SPQRSupport_Module SuiteSparseQR module
* *
* This module provides an interface to the SPQR library, which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package. * This module provides an interface to the SPQR library, which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
* *
* \code * \code
* #include <Eigen/SPQRSupport> * #include <Eigen/SPQRSupport>

View File

@ -43,6 +43,8 @@ namespace Eigen { struct SluMatrix; }
* - class SuperLU: a supernodal sequential LU factorization. * - class SuperLU: a supernodal sequential LU factorization.
* - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods).
* *
* \warning This wrapper is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported.
*
* \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
* *
* \code * \code

View File

@ -19,7 +19,7 @@ extern "C" {
/** \ingroup Support_modules /** \ingroup Support_modules
* \defgroup UmfPackSupport_Module UmfPackSupport module * \defgroup UmfPackSupport_Module UmfPackSupport module
* *
* This module provides an interface to the UmfPack library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package. * This module provides an interface to the UmfPack library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
* It provides the following factorization class: * It provides the following factorization class:
* - class UmfPackLU: a multifrontal sequential LU factorization. * - class UmfPackLU: a multifrontal sequential LU factorization.
* *

View File

@ -28,8 +28,8 @@ namespace internal {
* *
* \brief Robust Cholesky decomposition of a matrix with pivoting * \brief Robust Cholesky decomposition of a matrix with pivoting
* *
* \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition * \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read. * The other triangular part won't be read.
* *
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
@ -266,8 +266,8 @@ template<> struct ldlt_inplace<Lower>
if (size <= 1) if (size <= 1)
{ {
transpositions.setIdentity(); transpositions.setIdentity();
if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef; if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef; else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
else sign = ZeroSign; else sign = ZeroSign;
return true; return true;
} }
@ -324,12 +324,12 @@ template<> struct ldlt_inplace<Lower>
A21 /= realAkk; A21 /= realAkk;
if (sign == PositiveSemiDef) { if (sign == PositiveSemiDef) {
if (realAkk < 0) sign = Indefinite; if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite;
} else if (sign == NegativeSemiDef) { } else if (sign == NegativeSemiDef) {
if (realAkk > 0) sign = Indefinite; if (realAkk > static_cast<RealScalar>(0)) sign = Indefinite;
} else if (sign == ZeroSign) { } else if (sign == ZeroSign) {
if (realAkk > 0) sign = PositiveSemiDef; if (realAkk > static_cast<RealScalar>(0)) sign = PositiveSemiDef;
else if (realAkk < 0) sign = NegativeSemiDef; else if (realAkk < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
} }
} }

View File

@ -22,8 +22,8 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* *
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
* *
* \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read. * The other triangular part won't be read.
* *
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
@ -436,10 +436,7 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
* *
* \param bAndX represents both the right-hand side matrix b and result x. * \param bAndX represents both the right-hand side matrix b and result x.
* *
* \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD. * This version avoids a copy when the right hand side matrix b is not needed anymore.
*
* This version avoids a copy when the right hand side matrix b is not
* needed anymore.
* *
* \sa LLT::solve(), MatrixBase::llt() * \sa LLT::solve(), MatrixBase::llt()
*/ */

View File

@ -78,7 +78,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
{ {
res.itype = CHOLMOD_INT; res.itype = CHOLMOD_INT;
} }
else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value) else if (internal::is_same<_StorageIndex,long>::value)
{ {
res.itype = CHOLMOD_LONG; res.itype = CHOLMOD_LONG;
} }
@ -178,14 +178,14 @@ class CholmodBase : public SparseSolverBase<Derived>
public: public:
CholmodBase() CholmodBase()
: m_cholmodFactor(0), m_info(Success) : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
{ {
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
cholmod_start(&m_cholmod); cholmod_start(&m_cholmod);
} }
explicit CholmodBase(const MatrixType& matrix) explicit CholmodBase(const MatrixType& matrix)
: m_cholmodFactor(0), m_info(Success) : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
{ {
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
cholmod_start(&m_cholmod); cholmod_start(&m_cholmod);
@ -273,9 +273,10 @@ class CholmodBase : public SparseSolverBase<Derived>
const Index size = m_cholmodFactor->n; const Index size = m_cholmodFactor->n;
EIGEN_UNUSED_VARIABLE(size); EIGEN_UNUSED_VARIABLE(size);
eigen_assert(size==b.rows()); eigen_assert(size==b.rows());
// Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref.
Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());
// note: cd stands for Cholmod Dense
Rhs& b_ref(b.const_cast_derived());
cholmod_dense b_cd = viewAsCholmod(b_ref); cholmod_dense b_cd = viewAsCholmod(b_ref);
cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
if(!x_cd) if(!x_cd)
@ -327,6 +328,57 @@ class CholmodBase : public SparseSolverBase<Derived>
return derived(); return derived();
} }
/** \returns the determinant of the underlying matrix from the current factorization */
Scalar determinant() const
{
using std::exp;
return exp(logDeterminant());
}
/** \returns the log determinant of the underlying matrix from the current factorization */
Scalar logDeterminant() const
{
using std::log;
using numext::real;
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
RealScalar logDet = 0;
Scalar *x = static_cast<Scalar*>(m_cholmodFactor->x);
if (m_cholmodFactor->is_super)
{
// Supernodal factorization stored as a packed list of dense column-major blocs,
// as described by the following structure:
// super[k] == index of the first column of the j-th super node
StorageIndex *super = static_cast<StorageIndex*>(m_cholmodFactor->super);
// pi[k] == offset to the description of row indices
StorageIndex *pi = static_cast<StorageIndex*>(m_cholmodFactor->pi);
// px[k] == offset to the respective dense block
StorageIndex *px = static_cast<StorageIndex*>(m_cholmodFactor->px);
Index nb_super_nodes = m_cholmodFactor->nsuper;
for (Index k=0; k < nb_super_nodes; ++k)
{
StorageIndex ncols = super[k + 1] - super[k];
StorageIndex nrows = pi[k + 1] - pi[k];
Map<const Array<Scalar,1,Dynamic>, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1));
logDet += sk.real().log().sum();
}
}
else
{
// Simplicial factorization stored as standard CSC matrix.
StorageIndex *p = static_cast<StorageIndex*>(m_cholmodFactor->p);
Index size = m_cholmodFactor->n;
for (Index k=0; k<size; ++k)
logDet += log(real( x[p[k]] ));
}
if (m_cholmodFactor->is_ll)
logDet *= 2.0;
return logDet;
};
template<typename Stream> template<typename Stream>
void dumpMemory(Stream& /*s*/) void dumpMemory(Stream& /*s*/)
{} {}
@ -358,7 +410,7 @@ class CholmodBase : public SparseSolverBase<Derived>
* *
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
* *
* \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
*/ */
template<typename _MatrixType, int _UpLo = Lower> template<typename _MatrixType, int _UpLo = Lower>
class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> > class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> >
@ -407,7 +459,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
* *
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
* *
* \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
*/ */
template<typename _MatrixType, int _UpLo = Lower> template<typename _MatrixType, int _UpLo = Lower>
class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> > class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> >
@ -454,7 +506,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
* *
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
* *
* \sa \ref TutorialSparseDirectSolvers * \sa \ref TutorialSparseSolverConcept
*/ */
template<typename _MatrixType, int _UpLo = Lower> template<typename _MatrixType, int _UpLo = Lower>
class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> > class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> >
@ -503,7 +555,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
* *
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
* *
* \sa \ref TutorialSparseDirectSolvers * \sa \ref TutorialSparseSolverConcept
*/ */
template<typename _MatrixType, int _UpLo = Lower> template<typename _MatrixType, int _UpLo = Lower>
class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> > class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> >

View File

@ -12,7 +12,16 @@
namespace Eigen { namespace Eigen {
/** \class Array namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
typedef ArrayXpr XprKind;
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
};
}
/** \class Array
* \ingroup Core_Module * \ingroup Core_Module
* *
* \brief General-purpose arrays with easy API for coefficient-wise operations * \brief General-purpose arrays with easy API for coefficient-wise operations
@ -26,21 +35,12 @@ namespace Eigen {
* *
* See documentation of class Matrix for detailed information on the template parameters * See documentation of class Matrix for detailed information on the template parameters
* storage layout. * storage layout.
* *
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
* *
* \sa \ref TutorialArrayClass, \ref TopicClassHierarchy * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
*/ */
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
typedef ArrayXpr XprKind;
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
};
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Array class Array
: public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >

View File

@ -103,7 +103,7 @@ template<typename Derived> class ArrayBase
/** Special case of the template operator=, in order to prevent the compiler /** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1) * from generating a default operator= (issue hit with g++ 4.1)
*/ */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const ArrayBase& other) Derived& operator=(const ArrayBase& other)
{ {
internal::call_assignment(derived(), other.derived()); internal::call_assignment(derived(), other.derived());
@ -112,28 +112,28 @@ template<typename Derived> class ArrayBase
/** Set all the entries to \a value. /** Set all the entries to \a value.
* \sa DenseBase::setConstant(), DenseBase::fill() */ * \sa DenseBase::setConstant(), DenseBase::fill() */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const Scalar &value) Derived& operator=(const Scalar &value)
{ Base::setConstant(value); return derived(); } { Base::setConstant(value); return derived(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const Scalar& scalar); Derived& operator+=(const Scalar& scalar);
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const Scalar& scalar); Derived& operator-=(const Scalar& scalar);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const ArrayBase<OtherDerived>& other); Derived& operator+=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const ArrayBase<OtherDerived>& other); Derived& operator-=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator*=(const ArrayBase<OtherDerived>& other); Derived& operator*=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator/=(const ArrayBase<OtherDerived>& other); Derived& operator/=(const ArrayBase<OtherDerived>& other);
public: public:

View File

@ -52,7 +52,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
const Scalar const Scalar
>::type ScalarWithConstIfNotLvalue; >::type ScalarWithConstIfNotLvalue;
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType; typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@ -67,7 +67,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
inline Index innerStride() const { return m_expression.innerStride(); } inline Index innerStride() const { return m_expression.innerStride(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); } inline const Scalar* data() const { return m_expression.data(); }
@ -80,13 +80,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId) inline Scalar& coeffRef(Index rowId, Index colId)
{ {
return m_expression.const_cast_derived().coeffRef(rowId, colId); return m_expression.coeffRef(rowId, colId);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const inline const Scalar& coeffRef(Index rowId, Index colId) const
{ {
return m_expression.const_cast_derived().coeffRef(rowId, colId); return m_expression.coeffRef(rowId, colId);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -98,13 +98,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{ {
return m_expression.const_cast_derived().coeffRef(index); return m_expression.coeffRef(index);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
return m_expression.const_cast_derived().coeffRef(index); return m_expression.coeffRef(index);
} }
template<int LoadMode> template<int LoadMode>
@ -116,7 +116,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val) inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{ {
m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val); m_expression.template writePacket<LoadMode>(rowId, colId, val);
} }
template<int LoadMode> template<int LoadMode>
@ -128,7 +128,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val) inline void writePacket(Index index, const PacketScalar& val)
{ {
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val); m_expression.template writePacket<LoadMode>(index, val);
} }
template<typename Dest> template<typename Dest>
@ -145,11 +145,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index) */ * \sa DenseBase::resize(Index) */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } void resize(Index newSize) { m_expression.resize(newSize); }
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/ * \sa DenseBase::resize(Index,Index)*/
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); } void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
protected: protected:
NestedExpressionType m_expression; NestedExpressionType m_expression;
@ -195,7 +195,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
const Scalar const Scalar
>::type ScalarWithConstIfNotLvalue; >::type ScalarWithConstIfNotLvalue;
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType; typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@ -210,7 +210,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
inline Index innerStride() const { return m_expression.innerStride(); } inline Index innerStride() const { return m_expression.innerStride(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); } inline const Scalar* data() const { return m_expression.data(); }
@ -223,7 +223,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId) inline Scalar& coeffRef(Index rowId, Index colId)
{ {
return m_expression.const_cast_derived().coeffRef(rowId, colId); return m_expression.coeffRef(rowId, colId);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -241,13 +241,13 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{ {
return m_expression.const_cast_derived().coeffRef(index); return m_expression.coeffRef(index);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
return m_expression.const_cast_derived().coeffRef(index); return m_expression.coeffRef(index);
} }
template<int LoadMode> template<int LoadMode>
@ -259,7 +259,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val) inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{ {
m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val); m_expression.template writePacket<LoadMode>(rowId, colId, val);
} }
template<int LoadMode> template<int LoadMode>
@ -271,7 +271,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val) inline void writePacket(Index index, const PacketScalar& val)
{ {
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val); m_expression.template writePacket<LoadMode>(index, val);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -284,11 +284,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index) */ * \sa DenseBase::resize(Index) */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } void resize(Index newSize) { m_expression.resize(newSize); }
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/ * \sa DenseBase::resize(Index,Index)*/
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); } void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
protected: protected:
NestedExpressionType m_expression; NestedExpressionType m_expression;

View File

@ -606,7 +606,7 @@ public:
assignPacket<StoreMode,LoadMode,PacketType>(row, col); assignPacket<StoreMode,LoadMode,PacketType>(row, col);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index rowIndexByOuterInner(Index outer, Index inner) EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
{ {
typedef typename DstEvaluatorType::ExpressionTraits Traits; typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::RowsAtCompileTime) == 1 ? 0 return int(Traits::RowsAtCompileTime) == 1 ? 0
@ -615,7 +615,7 @@ public:
: inner; : inner;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index colIndexByOuterInner(Index outer, Index inner) EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
{ {
typedef typename DstEvaluatorType::ExpressionTraits Traits; typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::ColsAtCompileTime) == 1 ? 0 return int(Traits::ColsAtCompileTime) == 1 ? 0
@ -637,7 +637,7 @@ protected:
***************************************************************************/ ***************************************************************************/
template<typename DstXprType, typename SrcXprType, typename Functor> template<typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
{ {
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@ -654,7 +654,7 @@ EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const S
} }
template<typename DstXprType, typename SrcXprType> template<typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
{ {
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>()); call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
} }
@ -682,47 +682,53 @@ template< typename DstXprType, typename SrcXprType, typename Functor,
struct Assignment; struct Assignment;
// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition. // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated. // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
// So this intermediate function removes everything related to AssumeAliasing such that Assignment // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
// does not has to bother about these annoying details. // does not has to bother about these annoying details.
template<typename Dst, typename Src> template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src)
{ {
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
} }
template<typename Dst, typename Src> template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(const Dst& dst, const Src& src)
{ {
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
} }
// Deal with AssumeAliasing // Deal with "assume-aliasing"
template<typename Dst, typename Src, typename Func> template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
{ {
typename plain_matrix_type<Src>::type tmp(src); typename plain_matrix_type<Src>::type tmp(src);
call_assignment_no_alias(dst, tmp, func); call_assignment_no_alias(dst, tmp, func);
} }
template<typename Dst, typename Src, typename Func> template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
{ {
call_assignment_no_alias(dst, src, func); call_assignment_no_alias(dst, src, func);
} }
// by-pass AssumeAliasing // by-pass "assume-aliasing"
// When there is no aliasing, we require that 'dst' has been properly resized // When there is no aliasing, we require that 'dst' has been properly resized
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
{ {
call_assignment_no_alias(dst.expression(), src, func); call_assignment_no_alias(dst.expression(), src, func);
} }
template<typename Dst, typename Src, typename Func> template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
{ {
enum { enum {
NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
@ -747,13 +753,15 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
} }
template<typename Dst, typename Src> template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias(Dst& dst, const Src& src)
{ {
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
} }
template<typename Dst, typename Src, typename Func> template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
{ {
Index dstRows = src.rows(); Index dstRows = src.rows();
Index dstCols = src.cols(); Index dstCols = src.cols();
@ -767,7 +775,8 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src
Assignment<Dst,Src,Func>::run(dst, src, func); Assignment<Dst,Src,Func>::run(dst, src, func);
} }
template<typename Dst, typename Src> template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
{ {
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
} }
@ -779,7 +788,8 @@ template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, con
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
{ {
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{ {
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@ -796,7 +806,8 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar> struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
{ {
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
{ {
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.evalTo(dst); src.evalTo(dst);

0
Eigen/src/Core/Assign_MKL.h Executable file → Normal file
View File

View File

@ -161,15 +161,15 @@ class BandMatrixBase : public EigenBase<Derived>
* *
* \brief Represents a rectangular matrix with a banded storage * \brief Represents a rectangular matrix with a banded storage
* *
* \param _Scalar Numeric type, i.e. float, double, int * \tparam _Scalar Numeric type, i.e. float, double, int
* \param Rows Number of rows, or \b Dynamic * \tparam _Rows Number of rows, or \b Dynamic
* \param Cols Number of columns, or \b Dynamic * \tparam _Cols Number of columns, or \b Dynamic
* \param Supers Number of super diagonal * \tparam _Supers Number of super diagonal
* \param Subs Number of sub diagonal * \tparam _Subs Number of sub diagonal
* \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
* The former controls \ref TopicStorageOrders "storage order", and defaults to * The former controls \ref TopicStorageOrders "storage order", and defaults to
* column-major. The latter controls whether the matrix represents a selfadjoint * column-major. The latter controls whether the matrix represents a selfadjoint
* matrix in which case either Supers of Subs have to be null. * matrix in which case either Supers of Subs have to be null.
* *
* \sa class TridiagonalMatrix * \sa class TridiagonalMatrix
*/ */
@ -302,9 +302,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
* *
* \brief Represents a tridiagonal matrix with a compact banded storage * \brief Represents a tridiagonal matrix with a compact banded storage
* *
* \param _Scalar Numeric type, i.e. float, double, int * \tparam Scalar Numeric type, i.e. float, double, int
* \param Size Number of rows and cols, or \b Dynamic * \tparam Size Number of rows and cols, or \b Dynamic
* \param _Options Can be 0 or \b SelfAdjoint * \tparam Options Can be 0 or \b SelfAdjoint
* *
* \sa class BandMatrix * \sa class BandMatrix
*/ */

View File

@ -13,41 +13,6 @@
namespace Eigen { namespace Eigen {
/** \class Block
* \ingroup Core_Module
*
* \brief Expression of a fixed-size or dynamic-size block
*
* \param XprType the type of the expression in which we are taking a block
* \param BlockRows the number of rows of the block we are taking at compile time (optional)
* \param BlockCols the number of columns of the block we are taking at compile time (optional)
* \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or
* to set of columns of a column major matrix (optional). The parameter allows to determine
* at compile time whether aligned access is possible on the block expression.
*
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
* most of the time this is the only way it is used.
*
* However, if you want to directly maniputate block expressions,
* for instance if you want to write a function returning such an expression, you
* will need to use this class.
*
* Here is an example illustrating the dynamic case:
* \include class_Block.cpp
* Output: \verbinclude class_Block.out
*
* \note Even though this expression has dynamic size, in the case where \a XprType
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
* it does not cause a dynamic memory allocation.
*
* Here is an example illustrating the fixed-size case:
* \include class_FixedBlock.cpp
* Output: \verbinclude class_FixedBlock.out
*
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/
namespace internal { namespace internal {
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType> struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
@ -101,6 +66,40 @@ template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool In
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl; template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
/** \class Block
* \ingroup Core_Module
*
* \brief Expression of a fixed-size or dynamic-size block
*
* \tparam XprType the type of the expression in which we are taking a block
* \tparam BlockRows the number of rows of the block we are taking at compile time (optional)
* \tparam BlockCols the number of columns of the block we are taking at compile time (optional)
* \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or
* to set of columns of a column major matrix (optional). The parameter allows to determine
* at compile time whether aligned access is possible on the block expression.
*
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
* most of the time this is the only way it is used.
*
* However, if you want to directly maniputate block expressions,
* for instance if you want to write a function returning such an expression, you
* will need to use this class.
*
* Here is an example illustrating the dynamic case:
* \include class_Block.cpp
* Output: \verbinclude class_Block.out
*
* \note Even though this expression has dynamic size, in the case where \a XprType
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
* it does not cause a dynamic memory allocation.
*
* Here is an example illustrating the fixed-size case:
* \include class_FixedBlock.cpp
* Output: \verbinclude class_FixedBlock.out
*
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
: public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
{ {
@ -130,8 +129,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
: Impl(xpr, startRow, startCol) : Impl(xpr, startRow, startCol)
{ {
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows() eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols()); && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
} }
/** Dynamic-size constructor /** Dynamic-size constructor
@ -174,6 +173,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
: public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
{ {
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType; typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
public: public:
typedef typename internal::dense_xpr_base<BlockType>::type Base; typedef typename internal::dense_xpr_base<BlockType>::type Base;
@ -222,15 +222,13 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
inline Scalar& coeffRef(Index rowId, Index colId) inline Scalar& coeffRef(Index rowId, Index colId)
{ {
EIGEN_STATIC_ASSERT_LVALUE(XprType) EIGEN_STATIC_ASSERT_LVALUE(XprType)
return m_xpr.const_cast_derived() return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const inline const Scalar& coeffRef(Index rowId, Index colId) const
{ {
return m_xpr.derived() return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -243,39 +241,34 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{ {
EIGEN_STATIC_ASSERT_LVALUE(XprType) EIGEN_STATIC_ASSERT_LVALUE(XprType)
return m_xpr.const_cast_derived() return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
return m_xpr.const_cast_derived() return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const CoeffReturnType coeff(Index index) const inline const CoeffReturnType coeff(Index index) const
{ {
return m_xpr return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
template<int LoadMode> template<int LoadMode>
inline PacketScalar packet(Index rowId, Index colId) const inline PacketScalar packet(Index rowId, Index colId) const
{ {
return m_xpr.template packet<Unaligned> return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
(rowId + m_startRow.value(), colId + m_startCol.value());
} }
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val) inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{ {
m_xpr.const_cast_derived().template writePacket<Unaligned> m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
(rowId + m_startRow.value(), colId + m_startCol.value(), val);
} }
template<int LoadMode> template<int LoadMode>
@ -289,7 +282,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val) inline void writePacket(Index index, const PacketScalar& val)
{ {
m_xpr.const_cast_derived().template writePacket<Unaligned> m_xpr.template writePacket<Unaligned>
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val); m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
} }
@ -302,10 +295,13 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
#endif #endif
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
{ {
return m_xpr; return m_xpr;
} }
EIGEN_DEVICE_FUNC
XprType& nestedExpression() { return m_xpr; }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
StorageIndex startRow() const StorageIndex startRow() const
@ -321,9 +317,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
protected: protected:
const typename XprType::Nested m_xpr; XprTypeNested m_xpr;
const internal::variable_if_dynamic<StorageIndex, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow; const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
const internal::variable_if_dynamic<StorageIndex, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol; const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows; const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols; const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
}; };
@ -334,6 +330,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
: public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> > : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
{ {
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType; typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
enum { enum {
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0 XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
}; };
@ -351,7 +348,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|| ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()), || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
BlockRows==1 ? 1 : xpr.rows(), BlockRows==1 ? 1 : xpr.rows(),
BlockCols==1 ? 1 : xpr.cols()), BlockCols==1 ? 1 : xpr.cols()),
m_xpr(xpr) m_xpr(xpr),
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
{ {
init(); init();
} }
@ -361,7 +360,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
: Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)), : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
m_xpr(xpr) m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
{ {
init(); init();
} }
@ -373,16 +372,19 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
Index startRow, Index startCol, Index startRow, Index startCol,
Index blockRows, Index blockCols) Index blockRows, Index blockCols)
: Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols), : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
m_xpr(xpr) m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
{ {
init(); init();
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
{ {
return m_xpr; return m_xpr;
} }
EIGEN_DEVICE_FUNC
XprType& nestedExpression() { return m_xpr; }
/** \sa MapBase::innerStride() */ /** \sa MapBase::innerStride() */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -400,6 +402,18 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
return m_outerStride; return m_outerStride;
} }
EIGEN_DEVICE_FUNC
StorageIndex startRow() const
{
return m_startRow.value();
}
EIGEN_DEVICE_FUNC
StorageIndex startCol() const
{
return m_startCol.value();
}
#ifndef __SUNPRO_CC #ifndef __SUNPRO_CC
// FIXME sunstudio is not friendly with the above friend... // FIXME sunstudio is not friendly with the above friend...
// META-FIXME there is no 'friend' keyword around here. Is this obsolete? // META-FIXME there is no 'friend' keyword around here. Is this obsolete?
@ -425,7 +439,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
: m_xpr.innerStride(); : m_xpr.innerStride();
} }
typename XprType::Nested m_xpr; XprTypeNested m_xpr;
const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
Index m_outerStride; Index m_outerStride;
}; };

View File

@ -22,7 +22,7 @@ namespace Eigen {
* the return type of MatrixBase::operator<<, and most of the time this is the only * the return type of MatrixBase::operator<<, and most of the time this is the only
* way it is used. * way it is used.
* *
* \sa \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished() * \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
*/ */
template<typename XprType> template<typename XprType>
struct CommaInitializer struct CommaInitializer

View File

@ -63,10 +63,6 @@ struct evaluator_traits_base
// by default, get evaluator kind and shape from storage // by default, get evaluator kind and shape from storage
typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind; typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape; typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
// 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
// temporary; 0 if not.
static const int AssumeAliasing = 0;
}; };
// Default evaluator traits // Default evaluator traits
@ -75,6 +71,10 @@ struct evaluator_traits : public evaluator_traits_base<T>
{ {
}; };
template<typename T, typename Shape = typename evaluator_traits<T>::Shape >
struct evaluator_assume_aliasing {
static const bool value = false;
};
// By default, we assume a unary expression: // By default, we assume a unary expression:
template<typename T> template<typename T>
@ -148,7 +148,8 @@ struct evaluator<PlainObjectBase<Derived> >
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
if (IsRowMajor) if (IsRowMajor)
return m_data[row * m_outerStride.value() + col]; return m_data[row * m_outerStride.value() + col];
@ -156,12 +157,14 @@ struct evaluator<PlainObjectBase<Derived> >
return m_data[row + col * m_outerStride.value()]; return m_data[row + col * m_outerStride.value()];
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_data[index]; return m_data[index];
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
if (IsRowMajor) if (IsRowMajor)
return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col]; return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
@ -169,12 +172,14 @@ struct evaluator<PlainObjectBase<Derived> >
return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()]; return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return const_cast<Scalar*>(m_data)[index]; return const_cast<Scalar*>(m_data)[index];
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
if (IsRowMajor) if (IsRowMajor)
@ -184,12 +189,14 @@ struct evaluator<PlainObjectBase<Derived> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return ploadt<PacketType, LoadMode>(m_data + index); return ploadt<PacketType, LoadMode>(m_data + index);
} }
template<int StoreMode,typename PacketType> template<int StoreMode,typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
if (IsRowMajor) if (IsRowMajor)
@ -201,6 +208,7 @@ struct evaluator<PlainObjectBase<Derived> >
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x); return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
@ -260,45 +268,53 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_argImpl.coeff(col, row); return m_argImpl.coeff(col, row);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_argImpl.coeff(index); return m_argImpl.coeff(index);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_argImpl.coeffRef(col, row); return m_argImpl.coeffRef(col, row);
} }
EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename XprType::Scalar& coeffRef(Index index)
{ {
return m_argImpl.coeffRef(index); return m_argImpl.coeffRef(index);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_argImpl.template packet<LoadMode,PacketType>(col, row); return m_argImpl.template packet<LoadMode,PacketType>(col, row);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return m_argImpl.template packet<LoadMode,PacketType>(index); return m_argImpl.template packet<LoadMode,PacketType>(index);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x); m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
m_argImpl.template writePacket<StoreMode,PacketType>(index, x); m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
@ -338,23 +354,27 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_functor(row, col); return m_functor(row, col);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_functor(index); return m_functor(index);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_functor.template packetOp<Index,PacketType>(row, col); return m_functor.template packetOp<Index,PacketType>(row, col);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return m_functor.template packetOp<Index,PacketType>(index); return m_functor.template packetOp<Index,PacketType>(index);
@ -380,7 +400,8 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
Alignment = evaluator<ArgType>::Alignment Alignment = evaluator<ArgType>::Alignment
}; };
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
explicit unary_evaluator(const XprType& op)
: m_functor(op.functor()), : m_functor(op.functor()),
m_argImpl(op.nestedExpression()) m_argImpl(op.nestedExpression())
{ {
@ -390,23 +411,27 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_functor(m_argImpl.coeff(row, col)); return m_functor(m_argImpl.coeff(row, col));
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_functor(m_argImpl.coeff(index)); return m_functor(m_argImpl.coeff(index));
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col)); return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index)); return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
@ -466,17 +491,20 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col), return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
@ -484,6 +512,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index), return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
@ -523,22 +552,26 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_unaryOp(m_argImpl.coeff(row, col)); return m_unaryOp(m_argImpl.coeff(row, col));
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_unaryOp(m_argImpl.coeff(index)); return m_unaryOp(m_argImpl.coeff(index));
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_unaryOp(m_argImpl.coeffRef(row, col)); return m_unaryOp(m_argImpl.coeffRef(row, col));
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return m_unaryOp(m_argImpl.coeffRef(index)); return m_unaryOp(m_argImpl.coeffRef(index));
} }
@ -578,47 +611,55 @@ struct mapbase_evaluator : evaluator_base<Derived>
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_data[index * m_xpr.innerStride()]; return m_data[index * m_xpr.innerStride()];
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return m_data[index * m_xpr.innerStride()]; return m_data[index * m_xpr.innerStride()];
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::ploadt<PacketType, LoadMode>(ptr); return internal::ploadt<PacketType, LoadMode>(ptr);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride()); return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x); return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x); internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
@ -767,46 +808,54 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
RowsAtCompileTime = XprType::RowsAtCompileTime RowsAtCompileTime = XprType::RowsAtCompileTime
}; };
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col); return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0); RowsAtCompileTime == 1 ? index : 0);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x); return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
@ -816,8 +865,8 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
protected: protected:
evaluator<ArgType> m_argImpl; evaluator<ArgType> m_argImpl;
const variable_if_dynamic<Index, ArgType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow; const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
const variable_if_dynamic<Index, ArgType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol; const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
}; };
// TODO: This evaluator does not actually use the child evaluator; // TODO: This evaluator does not actually use the child evaluator;
@ -859,7 +908,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment) Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
}; };
inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
: m_conditionImpl(select.conditionMatrix()), : m_conditionImpl(select.conditionMatrix()),
m_thenImpl(select.thenMatrix()), m_thenImpl(select.thenMatrix()),
m_elseImpl(select.elseMatrix()) m_elseImpl(select.elseMatrix())
@ -869,7 +918,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
if (m_conditionImpl.coeff(row, col)) if (m_conditionImpl.coeff(row, col))
return m_thenImpl.coeff(row, col); return m_thenImpl.coeff(row, col);
@ -877,7 +927,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
return m_elseImpl.coeff(row, col); return m_elseImpl.coeff(row, col);
} }
inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
if (m_conditionImpl.coeff(index)) if (m_conditionImpl.coeff(index))
return m_thenImpl.coeff(index); return m_thenImpl.coeff(index);
@ -921,7 +972,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
m_cols(replicate.nestedExpression().cols()) m_cols(replicate.nestedExpression().cols())
{} {}
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
// try to avoid using modulo; this is a pure optimization strategy // try to avoid using modulo; this is a pure optimization strategy
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0 const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
@ -934,7 +986,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
return m_argImpl.coeff(actual_row, actual_col); return m_argImpl.coeff(actual_row, actual_col);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
// try to avoid using modulo; this is a pure optimization strategy // try to avoid using modulo; this is a pure optimization strategy
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1 const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
@ -945,6 +998,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0 const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
@ -958,6 +1012,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1 const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
@ -994,7 +1049,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
CoeffReadCost = TraversalSize==Dynamic ? HugeCost CoeffReadCost = TraversalSize==Dynamic ? HugeCost
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits), Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit,
Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
}; };
@ -1008,7 +1063,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Scalar coeff(Index i, Index j) const
{ {
if (Direction==Vertical) if (Direction==Vertical)
return m_functor(m_arg.col(j)); return m_functor(m_arg.col(j));
@ -1016,7 +1072,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
return m_functor(m_arg.row(i)); return m_functor(m_arg.row(i));
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Scalar coeff(Index index) const
{ {
if (Direction==Vertical) if (Direction==Vertical)
return m_functor(m_arg.col(index)); return m_functor(m_arg.col(index));
@ -1051,45 +1108,53 @@ struct evaluator_wrapper_base
typedef typename ArgType::Scalar Scalar; typedef typename ArgType::Scalar Scalar;
typedef typename ArgType::CoeffReturnType CoeffReturnType; typedef typename ArgType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_argImpl.coeff(row, col); return m_argImpl.coeff(row, col);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_argImpl.coeff(index); return m_argImpl.coeff(index);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_argImpl.coeffRef(row, col); return m_argImpl.coeffRef(row, col);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return m_argImpl.coeffRef(index); return m_argImpl.coeffRef(index);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_argImpl.template packet<LoadMode,PacketType>(row, col); return m_argImpl.template packet<LoadMode,PacketType>(row, col);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return m_argImpl.template packet<LoadMode,PacketType>(index); return m_argImpl.template packet<LoadMode,PacketType>(index);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
m_argImpl.template writePacket<StoreMode>(row, col, x); m_argImpl.template writePacket<StoreMode>(row, col, x);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
m_argImpl.template writePacket<StoreMode>(index, x); m_argImpl.template writePacket<StoreMode>(index, x);
@ -1164,29 +1229,34 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
{ } { }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
ReverseCol ? m_cols.value() - col - 1 : col); ReverseCol ? m_cols.value() - col - 1 : col);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
ReverseCol ? m_cols.value() - col - 1 : col); ReverseCol ? m_cols.value() - col - 1 : col);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
enum { enum {
@ -1201,6 +1271,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
enum { PacketSize = unpacket_traits<PacketType>::size }; enum { PacketSize = unpacket_traits<PacketType>::size };
@ -1208,6 +1279,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
// FIXME we could factorize some code with packet(i,j) // FIXME we could factorize some code with packet(i,j)
@ -1224,6 +1296,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
enum { PacketSize = unpacket_traits<PacketType>::size }; enum { PacketSize = unpacket_traits<PacketType>::size };
@ -1267,22 +1340,26 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value, typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value,
typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index) const
{ {
return m_argImpl.coeff(row + rowOffset(), row + colOffset()); return m_argImpl.coeff(row + rowOffset(), row + colOffset());
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_argImpl.coeff(index + rowOffset(), index + colOffset()); return m_argImpl.coeff(index + rowOffset(), index + colOffset());
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index)
{ {
return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
} }

View File

@ -13,26 +13,6 @@
namespace Eigen { namespace Eigen {
/** \class CwiseBinaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
*
* \param BinaryOp template functor implementing the operator
* \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side
*
* This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
* It is the return type of binary operators, by which we mean only those binary operators where
* both the left-hand side and the right-hand side are Eigen expressions.
* For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
*
* Most of the time, this is the only way that it is used, so you typically don't have to name
* CwiseBinaryOp types explicitly.
*
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
*/
namespace internal { namespace internal {
template<typename BinaryOp, typename Lhs, typename Rhs> template<typename BinaryOp, typename Lhs, typename Rhs>
struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
@ -52,8 +32,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
// we still want to handle the case when the result type is different. // we still want to handle the case when the result type is different.
typedef typename result_of< typedef typename result_of<
BinaryOp( BinaryOp(
typename Lhs::Scalar, const typename Lhs::Scalar&,
typename Rhs::Scalar const typename Rhs::Scalar&
) )
>::type Scalar; >::type Scalar;
typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind, typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
@ -74,6 +54,25 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind> template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
class CwiseBinaryOpImpl; class CwiseBinaryOpImpl;
/** \class CwiseBinaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
*
* \tparam BinaryOp template functor implementing the operator
* \tparam LhsType the type of the left-hand side
* \tparam RhsType the type of the right-hand side
*
* This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
* It is the return type of binary operators, by which we mean only those binary operators where
* both the left-hand side and the right-hand side are Eigen expressions.
* For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
*
* Most of the time, this is the only way that it is used, so you typically don't have to name
* CwiseBinaryOp types explicitly.
*
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
*/
template<typename BinaryOp, typename LhsType, typename RhsType> template<typename BinaryOp, typename LhsType, typename RhsType>
class CwiseBinaryOp : class CwiseBinaryOp :
public CwiseBinaryOpImpl< public CwiseBinaryOpImpl<

View File

@ -12,24 +12,6 @@
namespace Eigen { namespace Eigen {
/** \class CwiseNullaryOp
* \ingroup Core_Module
*
* \brief Generic expression of a matrix where all coefficients are defined by a functor
*
* \param NullaryOp template functor implementing the operator
* \param PlainObjectType the underlying plain matrix/array type
*
* This class represents an expression of a generic nullary operator.
* It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
* and most of the time this is the only way it is used.
*
* However, if you want to write a function returning such an expression, you
* will need to use this class.
*
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
*/
namespace internal { namespace internal {
template<typename NullaryOp, typename PlainObjectType> template<typename NullaryOp, typename PlainObjectType>
struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType> struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
@ -40,6 +22,23 @@ struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectT
}; };
} }
/** \class CwiseNullaryOp
* \ingroup Core_Module
*
* \brief Generic expression of a matrix where all coefficients are defined by a functor
*
* \tparam NullaryOp template functor implementing the operator
* \tparam PlainObjectType the underlying plain matrix/array type
*
* This class represents an expression of a generic nullary operator.
* It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
* and most of the time this is the only way it is used.
*
* However, if you want to write a function returning such an expression, you
* will need to use this class.
*
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
*/
template<typename NullaryOp, typename PlainObjectType> template<typename NullaryOp, typename PlainObjectType>
class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
{ {
@ -224,7 +223,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
} }
/** /**
* \brief Sets a linearly space vector. * \brief Sets a linearly spaced vector.
* *
* The function generates 'size' equally spaced values in the closed interval [low,high]. * The function generates 'size' equally spaced values in the closed interval [low,high].
* This particular version of LinSpaced() uses sequential access, i.e. vector access is * This particular version of LinSpaced() uses sequential access, i.e. vector access is
@ -262,7 +261,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
} }
/** /**
* \brief Sets a linearly space vector. * \brief Sets a linearly spaced vector.
* *
* The function generates 'size' equally spaced values in the closed interval [low,high]. * The function generates 'size' equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned. * When size is set to 1, a vector of length 1 containing 'high' is returned.
@ -328,7 +327,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
setConstant(val); setConstant(val);
} }
/** Sets all coefficients in this expression to \a value. /** Sets all coefficients in this expression to value \a val.
* *
* \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
*/ */
@ -338,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
return derived() = Constant(rows(), cols(), val); return derived() = Constant(rows(), cols(), val);
} }
/** Resizes to the given \a size, and sets all coefficients in this expression to the given \a value. /** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val.
* *
* \only_for_vectors * \only_for_vectors
* *
@ -355,7 +354,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
return setConstant(val); return setConstant(val);
} }
/** Resizes to the given size, and sets all coefficients in this expression to the given \a value. /** Resizes to the given size, and sets all coefficients in this expression to the given value \a val.
* *
* \param rows the new number of rows * \param rows the new number of rows
* \param cols the new number of columns * \param cols the new number of columns
@ -375,7 +374,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
} }
/** /**
* \brief Sets a linearly space vector. * \brief Sets a linearly spaced vector.
* *
* The function generates 'size' equally spaced values in the closed interval [low,high]. * The function generates 'size' equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned. * When size is set to 1, a vector of length 1 containing 'high' is returned.
@ -395,7 +394,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
} }
/** /**
* \brief Sets a linearly space vector. * \brief Sets a linearly spaced vector.
* *
* The function fill *this with equally spaced values in the closed interval [low,high]. * The function fill *this with equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned. * When size is set to 1, a vector of length 1 containing 'high' is returned.

View File

@ -13,33 +13,13 @@
namespace Eigen { namespace Eigen {
/** \class CwiseUnaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
*
* \param UnaryOp template functor implementing the operator
* \param XprType the type of the expression to which we are applying the unary operator
*
* This class represents an expression where a unary operator is applied to an expression.
* It is the return type of all operations taking exactly 1 input expression, regardless of the
* presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
* is considered unary, because only the right-hand side is an expression, and its
* return type is a specialization of CwiseUnaryOp.
*
* Most of the time, this is the only way that it is used, so you typically don't have to name
* CwiseUnaryOp types explicitly.
*
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
*/
namespace internal { namespace internal {
template<typename UnaryOp, typename XprType> template<typename UnaryOp, typename XprType>
struct traits<CwiseUnaryOp<UnaryOp, XprType> > struct traits<CwiseUnaryOp<UnaryOp, XprType> >
: traits<XprType> : traits<XprType>
{ {
typedef typename result_of< typedef typename result_of<
UnaryOp(typename XprType::Scalar) UnaryOp(const typename XprType::Scalar&)
>::type Scalar; >::type Scalar;
typedef typename XprType::Nested XprTypeNested; typedef typename XprType::Nested XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
@ -52,6 +32,25 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
template<typename UnaryOp, typename XprType, typename StorageKind> template<typename UnaryOp, typename XprType, typename StorageKind>
class CwiseUnaryOpImpl; class CwiseUnaryOpImpl;
/** \class CwiseUnaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
*
* \tparam UnaryOp template functor implementing the operator
* \tparam XprType the type of the expression to which we are applying the unary operator
*
* This class represents an expression where a unary operator is applied to an expression.
* It is the return type of all operations taking exactly 1 input expression, regardless of the
* presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
* is considered unary, because only the right-hand side is an expression, and its
* return type is a specialization of CwiseUnaryOp.
*
* Most of the time, this is the only way that it is used, so you typically don't have to name
* CwiseUnaryOp types explicitly.
*
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
*/
template<typename UnaryOp, typename XprType> template<typename UnaryOp, typename XprType>
class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
{ {
@ -59,33 +58,34 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base; typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
typedef typename internal::remove_all<XprType>::type NestedExpression; typedef typename internal::remove_all<XprType>::type NestedExpression;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
: m_xpr(xpr), m_functor(func) {} : m_xpr(xpr), m_functor(func) {}
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); } Index rows() const { return m_xpr.rows(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); } Index cols() const { return m_xpr.cols(); }
/** \returns the functor representing the unary operation */ /** \returns the functor representing the unary operation */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const UnaryOp& functor() const { return m_functor; } const UnaryOp& functor() const { return m_functor; }
/** \returns the nested expression */ /** \returns the nested expression */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const typename internal::remove_all<typename XprType::Nested>::type& const typename internal::remove_all<XprTypeNested>::type&
nestedExpression() const { return m_xpr; } nestedExpression() const { return m_xpr; }
/** \returns the nested expression */ /** \returns the nested expression */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename internal::remove_all<typename XprType::Nested>::type& typename internal::remove_all<XprTypeNested>::type&
nestedExpression() { return m_xpr.const_cast_derived(); } nestedExpression() { return m_xpr; }
protected: protected:
typename XprType::Nested m_xpr; XprTypeNested m_xpr;
const UnaryOp m_functor; const UnaryOp m_functor;
}; };

View File

@ -12,27 +12,13 @@
namespace Eigen { namespace Eigen {
/** \class CwiseUnaryView
* \ingroup Core_Module
*
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
*
* \param ViewOp template functor implementing the view
* \param MatrixType the type of the matrix we are applying the unary operator
*
* This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
* It is the return type of real() and imag(), and most of the time this is the only way it is used.
*
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
*/
namespace internal { namespace internal {
template<typename ViewOp, typename MatrixType> template<typename ViewOp, typename MatrixType>
struct traits<CwiseUnaryView<ViewOp, MatrixType> > struct traits<CwiseUnaryView<ViewOp, MatrixType> >
: traits<MatrixType> : traits<MatrixType>
{ {
typedef typename result_of< typedef typename result_of<
ViewOp(typename traits<MatrixType>::Scalar) ViewOp(const typename traits<MatrixType>::Scalar&)
>::type Scalar; >::type Scalar;
typedef typename MatrixType::Nested MatrixTypeNested; typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
@ -55,6 +41,19 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
template<typename ViewOp, typename MatrixType, typename StorageKind> template<typename ViewOp, typename MatrixType, typename StorageKind>
class CwiseUnaryViewImpl; class CwiseUnaryViewImpl;
/** \class CwiseUnaryView
* \ingroup Core_Module
*
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
*
* \tparam ViewOp template functor implementing the view
* \tparam MatrixType the type of the matrix we are applying the unary operator
*
* This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
* It is the return type of real() and imag(), and most of the time this is the only way it is used.
*
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
*/
template<typename ViewOp, typename MatrixType> template<typename ViewOp, typename MatrixType>
class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind> class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
{ {
@ -62,6 +61,7 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base; typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename internal::remove_all<MatrixType>::type NestedExpression; typedef typename internal::remove_all<MatrixType>::type NestedExpression;
explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
@ -76,15 +76,15 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
const ViewOp& functor() const { return m_functor; } const ViewOp& functor() const { return m_functor; }
/** \returns the nested expression */ /** \returns the nested expression */
const typename internal::remove_all<typename MatrixType::Nested>::type& const typename internal::remove_all<MatrixTypeNested>::type&
nestedExpression() const { return m_matrix; } nestedExpression() const { return m_matrix; }
/** \returns the nested expression */ /** \returns the nested expression */
typename internal::remove_all<typename MatrixType::Nested>::type& typename internal::remove_reference<MatrixTypeNested>::type&
nestedExpression() { return m_matrix.const_cast_derived(); } nestedExpression() { return m_matrix.const_cast_derived(); }
protected: protected:
typename internal::ref_selector<MatrixType>::type m_matrix; MatrixTypeNested m_matrix;
ViewOp m_functor; ViewOp m_functor;
}; };

View File

@ -36,7 +36,7 @@ static inline void check_DenseIndex_is_signed() {
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
* *
* \sa \ref TopicClassHierarchy * \sa \blank \ref TopicClassHierarchy
*/ */
template<typename Derived> class DenseBase template<typename Derived> class DenseBase
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
@ -60,7 +60,7 @@ template<typename Derived> class DenseBase
* \brief The type used to store indices * \brief The type used to store indices
* \details This typedef is relevant for types that store multiple indices such as * \details This typedef is relevant for types that store multiple indices such as
* PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index * PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index
* \sa \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase. * \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.
*/ */
typedef typename internal::traits<Derived>::StorageIndex StorageIndex; typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
@ -275,13 +275,13 @@ template<typename Derived> class DenseBase
/** Copies \a other into *this. \returns a reference to *this. */ /** Copies \a other into *this. \returns a reference to *this. */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const DenseBase<OtherDerived>& other); Derived& operator=(const DenseBase<OtherDerived>& other);
/** Special case of the template operator=, in order to prevent the compiler /** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1) * from generating a default operator= (issue hit with g++ 4.1)
*/ */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const DenseBase& other); Derived& operator=(const DenseBase& other);
template<typename OtherDerived> template<typename OtherDerived>
@ -388,10 +388,10 @@ template<typename Derived> class DenseBase
inline bool hasNaN() const; inline bool hasNaN() const;
inline bool allFinite() const; inline bool allFinite() const;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
inline Derived& operator*=(const Scalar& other); Derived& operator*=(const Scalar& other);
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
inline Derived& operator/=(const Scalar& other); Derived& operator/=(const Scalar& other);
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType; typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
/** \returns the matrix or vector obtained by evaluating this expression. /** \returns the matrix or vector obtained by evaluating this expression.

View File

@ -191,19 +191,31 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType EIGEN_STRONG_INLINE CoeffReturnType
y() const { return (*this)[1]; } y() const
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
return (*this)[1];
}
/** equivalent to operator[](2). */ /** equivalent to operator[](2). */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType EIGEN_STRONG_INLINE CoeffReturnType
z() const { return (*this)[2]; } z() const
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
return (*this)[2];
}
/** equivalent to operator[](3). */ /** equivalent to operator[](3). */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType EIGEN_STRONG_INLINE CoeffReturnType
w() const { return (*this)[3]; } w() const
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
return (*this)[3];
}
/** \internal /** \internal
* \returns the packet of coefficients starting at the given row and column. It is your responsibility * \returns the packet of coefficients starting at the given row and column. It is your responsibility
@ -424,19 +436,31 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& EIGEN_STRONG_INLINE Scalar&
y() { return (*this)[1]; } y()
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
return (*this)[1];
}
/** equivalent to operator[](2). */ /** equivalent to operator[](2). */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& EIGEN_STRONG_INLINE Scalar&
z() { return (*this)[2]; } z()
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
return (*this)[2];
}
/** equivalent to operator[](3). */ /** equivalent to operator[](3). */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& EIGEN_STRONG_INLINE Scalar&
w() { return (*this)[3]; } w()
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
return (*this)[3];
}
}; };
/** \brief Base class providing direct read-only coefficient access to matrices and arrays. /** \brief Base class providing direct read-only coefficient access to matrices and arrays.
@ -448,7 +472,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
* inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using * inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
* \c operator() . * \c operator() .
* *
* \sa \ref TopicClassHierarchy * \sa \blank \ref TopicClassHierarchy
*/ */
template<typename Derived> template<typename Derived>
class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors> class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
@ -521,7 +545,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using * inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
* \c operator(). * \c operator().
* *
* \sa \ref TopicClassHierarchy * \sa \blank \ref TopicClassHierarchy
*/ */
template<typename Derived> template<typename Derived>
class DenseCoeffsBase<Derived, DirectWriteAccessors> class DenseCoeffsBase<Derived, DirectWriteAccessors>

View File

@ -103,21 +103,21 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
>::type ScalarWithConstIfNotLvalue; >::type ScalarWithConstIfNotLvalue;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index) inline Scalar& coeffRef(Index row, Index)
{ {
EIGEN_STATIC_ASSERT_LVALUE(MatrixType) EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index row, Index) const inline const Scalar& coeffRef(Index row, Index) const
{ {
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -130,13 +130,13 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
inline Scalar& coeffRef(Index idx) inline Scalar& coeffRef(Index idx)
{ {
EIGEN_STATIC_ASSERT_LVALUE(MatrixType) EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index idx) const inline const Scalar& coeffRef(Index idx) const
{ {
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -159,7 +159,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
} }
protected: protected:
typename MatrixType::Nested m_matrix; typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
const internal::variable_if_dynamicindex<Index, DiagIndex> m_index; const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
private: private:

View File

@ -82,7 +82,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
* In both cases, it consists in the sum of the square of all the matrix entries. * In both cases, it consists in the sum of the square of all the matrix entries.
* For vectors, this is also equals to the dot product of \c *this with itself. * For vectors, this is also equals to the dot product of \c *this with itself.
* *
* \sa dot(), norm() * \sa dot(), norm(), lpNorm()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
@ -94,16 +94,18 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
* In both cases, it consists in the square root of the sum of the square of all the matrix entries. * In both cases, it consists in the square root of the sum of the square of all the matrix entries.
* For vectors, this is also equals to the square root of the dot product of \c *this with itself. * For vectors, this is also equals to the square root of the dot product of \c *this with itself.
* *
* \sa dot(), squaredNorm() * \sa lpNorm(), dot(), squaredNorm()
*/ */
template<typename Derived> template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{ {
EIGEN_USING_STD_MATH(sqrt) return numext::sqrt(squaredNorm());
return sqrt(squaredNorm());
} }
/** \returns an expression of the quotient of *this by its own norm. /** \returns an expression of the quotient of \c *this by its own norm.
*
* \warning If the input vector is too small (i.e., this->norm()==0),
* then this function returns a copy of the input.
* *
* \only_for_vectors * \only_for_vectors
* *
@ -115,19 +117,75 @@ MatrixBase<Derived>::normalized() const
{ {
typedef typename internal::nested_eval<Derived,2>::type _Nested; typedef typename internal::nested_eval<Derived,2>::type _Nested;
_Nested n(derived()); _Nested n(derived());
return n / n.norm(); RealScalar z = n.squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
if(z>RealScalar(0))
return n / numext::sqrt(z);
else
return n;
} }
/** Normalizes the vector, i.e. divides it by its own norm. /** Normalizes the vector, i.e. divides it by its own norm.
* *
* \only_for_vectors * \only_for_vectors
* *
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
*
* \sa norm(), normalized() * \sa norm(), normalized()
*/ */
template<typename Derived> template<typename Derived>
inline void MatrixBase<Derived>::normalize() inline void MatrixBase<Derived>::normalize()
{ {
*this /= norm(); RealScalar z = squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
if(z>RealScalar(0))
derived() /= numext::sqrt(z);
}
/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow.
*
* \only_for_vectors
*
* This method is analogue to the normalized() method, but it reduces the risk of
* underflow and overflow when computing the norm.
*
* \warning If the input vector is too small (i.e., this->norm()==0),
* then this function returns a copy of the input.
*
* \sa stableNorm(), stableNormalize(), normalized()
*/
template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::stableNormalized() const
{
typedef typename internal::nested_eval<Derived,3>::type _Nested;
_Nested n(derived());
RealScalar w = n.cwiseAbs().maxCoeff();
RealScalar z = (n/w).squaredNorm();
if(z>RealScalar(0))
return n / (numext::sqrt(z)*w);
else
return n;
}
/** Normalizes the vector while avoid underflow and overflow
*
* \only_for_vectors
*
* This method is analogue to the normalize() method, but it reduces the risk of
* underflow and overflow when computing the norm.
*
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
*
* \sa stableNorm(), stableNormalized(), normalize()
*/
template<typename Derived>
inline void MatrixBase<Derived>::stableNormalize()
{
RealScalar w = cwiseAbs().maxCoeff();
RealScalar z = (derived()/w).squaredNorm();
if(z>RealScalar(0))
derived() /= numext::sqrt(z)*w;
} }
//---------- implementation of other norms ---------- //---------- implementation of other norms ----------
@ -188,7 +246,11 @@ struct lpNorm_selector<Derived, Infinity>
*/ */
template<typename Derived> template<typename Derived>
template<int p> template<int p>
#ifndef EIGEN_PARSED_BY_DOXYGEN
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
#else
MatrixBase<Derived>::RealScalar
#endif
MatrixBase<Derived>::lpNorm() const MatrixBase<Derived>::lpNorm() const
{ {
return internal::lpNorm_selector<Derived, p>::run(*this); return internal::lpNorm_selector<Derived, p>::run(*this);

View File

@ -23,7 +23,7 @@ namespace Eigen {
* *
* Notice that this class is trivial, it is only used to disambiguate overloaded functions. * Notice that this class is trivial, it is only used to disambiguate overloaded functions.
* *
* \sa \ref TopicClassHierarchy * \sa \blank \ref TopicClassHierarchy
*/ */
template<typename Derived> struct EigenBase template<typename Derived> struct EigenBase
{ {

View File

@ -76,32 +76,6 @@ public:
#endif #endif
}; };
// template<typename Lhs, typename Rhs> struct product_tag
// {
// private:
//
// typedef typename remove_all<Lhs>::type _Lhs;
// typedef typename remove_all<Rhs>::type _Rhs;
// enum {
// Rows = _Lhs::RowsAtCompileTime,
// Cols = _Rhs::ColsAtCompileTime,
// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime)
// };
//
// enum {
// rows_select = Rows==1 ? int(Rows) : int(Large),
// cols_select = Cols==1 ? int(Cols) : int(Large),
// depth_select = Depth==1 ? int(Depth) : int(Large)
// };
// typedef product_type_selector<rows_select, cols_select, depth_select> selector;
//
// public:
// enum {
// ret = selector::ret
// };
//
// };
/* The following allows to select the kind of product at compile time /* The following allows to select the kind of product at compile time
* based on the three dimensions of the product. * based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
@ -125,8 +99,8 @@ template<> struct product_type_selector<Small,Small,Large> { enum
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal } // end namespace internal
@ -239,15 +213,18 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
* RhsBlasTraits::extractScalarFactor(rhs); * RhsBlasTraits::extractScalarFactor(rhs);
// make sure Dest is a compile-time vector type (bug 1166)
typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
enum { enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways... // on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1, EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex), ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
}; };
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest; gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
@ -340,7 +317,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
actualLhs.rows(), actualLhs.cols(), actualLhs.rows(), actualLhs.cols(),
LhsMapper(actualLhs.data(), actualLhs.outerStride()), LhsMapper(actualLhs.data(), actualLhs.outerStride()),
RhsMapper(actualRhsPtr, 1), RhsMapper(actualRhsPtr, 1),
dest.data(), dest.innerStride(), dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
actualAlpha); actualAlpha);
} }
}; };

View File

@ -43,7 +43,7 @@ struct default_packet_traits
{ {
enum { enum {
HasHalfPacket = 0, HasHalfPacket = 0,
HasAdd = 1, HasAdd = 1,
HasSub = 1, HasSub = 1,
HasMul = 1, HasMul = 1,
@ -62,7 +62,7 @@ struct default_packet_traits
HasRsqrt = 0, HasRsqrt = 0,
HasExp = 0, HasExp = 0,
HasLog = 0, HasLog = 0,
HasLog10 = 0, HasLog10 = 0,
HasPow = 0, HasPow = 0,
HasSin = 0, HasSin = 0,
@ -71,9 +71,17 @@ struct default_packet_traits
HasASin = 0, HasASin = 0,
HasACos = 0, HasACos = 0,
HasATan = 0, HasATan = 0,
HasSinh = 0, HasSinh = 0,
HasCosh = 0, HasCosh = 0,
HasTanh = 0, HasTanh = 0,
HasLGamma = 0,
HasDiGamma = 0,
HasZeta = 0,
HasPolygamma = 0,
HasErf = 0,
HasErfc = 0,
HasIGamma = 0,
HasIGammac = 0,
HasRound = 0, HasRound = 0,
HasFloor = 0, HasFloor = 0,
@ -130,6 +138,11 @@ pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
return static_cast<TgtPacket>(a); return static_cast<TgtPacket>(a);
} }
template <typename SrcPacket, typename TgtPacket>
EIGEN_DEVICE_FUNC inline TgtPacket
pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
return static_cast<TgtPacket>(a);
}
/** \internal \returns a + b (coeff-wise) */ /** \internal \returns a + b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
@ -281,7 +294,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
{ pstore(to, from); } { pstore(to, from); }
/** \internal tries to do cache prefetching of \a addr */ /** \internal tries to do cache prefetching of \a addr */
template<typename Scalar> inline void prefetch(const Scalar* addr) template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
{ {
#ifdef __CUDA_ARCH__ #ifdef __CUDA_ARCH__
#if defined(__LP64__) #if defined(__LP64__)
@ -432,6 +445,38 @@ Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); }
/** \internal \returns the zeta function of two arguments (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); }
/** \internal \returns the polygamma function (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); }
/** \internal \returns the erf(\a a) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet perf(const Packet& a) { using numext::erf; return erf(a); }
/** \internal \returns the erfc(\a a) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); }
/*************************************************************************** /***************************************************************************
* The following functions might not have to be overwritten for vectorized types * The following functions might not have to be overwritten for vectorized types
***************************************************************************/ ***************************************************************************/

View File

@ -49,6 +49,12 @@ namespace Eigen
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(zeta,scalar_zeta_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(polygamma,scalar_polygamma_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op)
@ -125,6 +131,36 @@ namespace Eigen
); );
} }
/** \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays.
*
* This function computes the coefficient-wise incomplete gamma function.
*
*/
template<typename Derived,typename ExponentDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
{
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
a.derived(),
x.derived()
);
}
/** \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays.
*
* This function computes the coefficient-wise complementary incomplete gamma function.
*
*/
template<typename Derived,typename ExponentDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
{
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
a.derived(),
x.derived()
);
}
namespace internal namespace internal
{ {
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op) EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)

View File

@ -80,7 +80,7 @@ struct IOFormat
* *
* \brief Pseudo expression providing matrix output with given format * \brief Pseudo expression providing matrix output with given format
* *
* \param ExpressionType the type of the object on which IO stream operations are performed * \tparam ExpressionType the type of the object on which IO stream operations are performed
* *
* This class represents an expression with stream operators controlled by a given IOFormat. * This class represents an expression with stream operators controlled by a given IOFormat.
* It is the return type of DenseBase::format() * It is the return type of DenseBase::format()

View File

@ -13,6 +13,28 @@
namespace Eigen { namespace Eigen {
namespace internal {
template<typename PlainObjectType, int MapOptions, typename StrideType>
struct traits<Map<PlainObjectType, MapOptions, StrideType> >
: public traits<PlainObjectType>
{
typedef traits<PlainObjectType> TraitsBase;
enum {
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
? int(PlainObjectType::InnerStrideAtCompileTime)
: int(StrideType::InnerStrideAtCompileTime),
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
};
private:
enum { Options }; // Expressions don't have Options
};
}
/** \class Map /** \class Map
* \ingroup Core_Module * \ingroup Core_Module
* *
@ -63,29 +85,6 @@ namespace Eigen {
* *
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
*/ */
namespace internal {
template<typename PlainObjectType, int MapOptions, typename StrideType>
struct traits<Map<PlainObjectType, MapOptions, StrideType> >
: public traits<PlainObjectType>
{
typedef traits<PlainObjectType> TraitsBase;
enum {
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
? int(PlainObjectType::InnerStrideAtCompileTime)
: int(StrideType::InnerStrideAtCompileTime),
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
};
private:
enum { Options }; // Expressions don't have Options
};
}
template<typename PlainObjectType, int MapOptions, typename StrideType> class Map template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
: public MapBase<Map<PlainObjectType, MapOptions, StrideType> > : public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
{ {

View File

@ -130,7 +130,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
{ {
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
checkSanity(); checkSanity<Derived>();
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -142,7 +142,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
eigen_assert(vecSize >= 0); eigen_assert(vecSize >= 0);
eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize); eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
checkSanity(); checkSanity<Derived>();
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -152,19 +152,30 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
eigen_assert( (dataPtr == 0) eigen_assert( (dataPtr == 0)
|| ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
checkSanity(); checkSanity<Derived>();
} }
#ifdef EIGEN_MAPBASE_PLUGIN
#include EIGEN_MAPBASE_PLUGIN
#endif
protected: protected:
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
void checkSanity() const void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
{ {
#if EIGEN_MAX_ALIGN_BYTES>0 #if EIGEN_MAX_ALIGN_BYTES>0
eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits<Derived>::Alignment)) == 0) && "data is not aligned"); eigen_assert(( ((size_t(m_data) % internal::traits<Derived>::Alignment) == 0)
|| (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
#endif #endif
} }
template<typename T>
EIGEN_DEVICE_FUNC
void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
{}
PointerType m_data; PointerType m_data;
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows; const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols; const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;

View File

@ -23,10 +23,10 @@ double abs(double x) { return (fabs(x)); }
float abs(float x) { return (fabsf(x)); } float abs(float x) { return (fabsf(x)); }
long double abs(long double x) { return (fabsl(x)); } long double abs(long double x) { return (fabsl(x)); }
#endif #endif
namespace internal { namespace internal {
/** \internal \struct global_math_functions_filtering_base /** \internal \class global_math_functions_filtering_base
* *
* What it does: * What it does:
* Defines a typedef 'type' as follows: * Defines a typedef 'type' as follows:
@ -496,7 +496,7 @@ template<typename Scalar, bool IsInteger>
struct pow_default_impl struct pow_default_impl
{ {
typedef Scalar retval; typedef Scalar retval;
static inline Scalar run(const Scalar& x, const Scalar& y) static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y)
{ {
EIGEN_USING_STD_MATH(pow); EIGEN_USING_STD_MATH(pow);
return pow(x, y); return pow(x, y);
@ -506,7 +506,7 @@ struct pow_default_impl
template<typename Scalar> template<typename Scalar>
struct pow_default_impl<Scalar, true> struct pow_default_impl<Scalar, true>
{ {
static inline Scalar run(Scalar x, Scalar y) static EIGEN_DEVICE_FUNC inline Scalar run(Scalar x, Scalar y)
{ {
Scalar res(1); Scalar res(1);
eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0); eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
@ -704,11 +704,13 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isfinite_impl(const T& x) isfinite_impl(const T& x)
{ {
#if EIGEN_USE_STD_FPCLASSIFY #ifdef __CUDA_ARCH__
return (::isfinite)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isfinite; using std::isfinite;
return isfinite EIGEN_NOT_A_MACRO (x); return isfinite EIGEN_NOT_A_MACRO (x);
#else #else
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest(); return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();
#endif #endif
} }
@ -717,7 +719,9 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isinf_impl(const T& x) isinf_impl(const T& x)
{ {
#if EIGEN_USE_STD_FPCLASSIFY #ifdef __CUDA_ARCH__
return (::isinf)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isinf; using std::isinf;
return isinf EIGEN_NOT_A_MACRO (x); return isinf EIGEN_NOT_A_MACRO (x);
#else #else
@ -730,7 +734,9 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isnan_impl(const T& x) isnan_impl(const T& x)
{ {
#if EIGEN_USE_STD_FPCLASSIFY #ifdef __CUDA_ARCH__
return (::isnan)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isnan; using std::isnan;
return isnan EIGEN_NOT_A_MACRO (x); return isnan EIGEN_NOT_A_MACRO (x);
#else #else
@ -748,9 +754,9 @@ template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
} }
//MSVC defines a _isnan builtin function, but for double only //MSVC defines a _isnan builtin function, but for double only
EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); } EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); } EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; }
EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); } EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; }
EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
@ -780,9 +786,9 @@ template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return
#endif #endif
// The following overload are defined at the end of this file // The following overload are defined at the end of this file
template<typename T> bool isfinite_impl(const std::complex<T>& x); template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x);
template<typename T> bool isnan_impl(const std::complex<T>& x); template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
template<typename T> bool isinf_impl(const std::complex<T>& x); template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
} // end namespace internal } // end namespace internal
@ -946,6 +952,14 @@ T (floor)(const T& x)
return floor(x); return floor(x);
} }
#ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float floor(const float &x) { return ::floorf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double floor(const double &x) { return ::floor(x); }
#endif
template<typename T> template<typename T>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
T (ceil)(const T& x) T (ceil)(const T& x)
@ -954,8 +968,17 @@ T (ceil)(const T& x)
return ceil(x); return ceil(x);
} }
// Log base 2 for 32 bits positive integers. #ifdef __CUDACC__
// Conveniently returns 0 for x==0. template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float ceil(const float &x) { return ::ceilf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double ceil(const double &x) { return ::ceil(x); }
#endif
/** Log base 2 for 32 bits positive integers.
* Conveniently returns 0 for x==0. */
inline int log2(int x) inline int log2(int x)
{ {
eigen_assert(x>=0); eigen_assert(x>=0);
@ -969,24 +992,122 @@ inline int log2(int x)
return table[(v * 0x07C4ACDDU) >> 27]; return table[(v * 0x07C4ACDDU) >> 27];
} }
/** \returns the square root of \a x.
*
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
* specializations when SSE is enabled.
*
* It's usage is justified in performance critical functions, like norm/normalize.
*/
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T sqrt(const T &x)
{
EIGEN_USING_STD_MATH(sqrt);
return sqrt(x);
}
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T log(const T &x) {
EIGEN_USING_STD_MATH(log);
return log(x);
}
#ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float log(const float &x) { return ::logf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double log(const double &x) { return ::log(x); }
#endif
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T tan(const T &x) {
EIGEN_USING_STD_MATH(tan);
return tan(x);
}
#ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tan(const float &x) { return ::tanf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double tan(const double &x) { return ::tan(x); }
#endif
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename NumTraits<T>::Real abs(const T &x) {
EIGEN_USING_STD_MATH(abs);
return abs(x);
}
#ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float abs(const float &x) { return ::fabsf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double abs(const double &x) { return ::fabs(x); }
#endif
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T exp(const T &x) {
EIGEN_USING_STD_MATH(exp);
return exp(x);
}
#ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float exp(const float &x) { return ::expf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double exp(const double &x) { return ::exp(x); }
#endif
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T fmod(const T& a, const T& b) {
EIGEN_USING_STD_MATH(floor);
return fmod(a, b);
}
#ifdef __CUDACC__
template <>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float fmod(const float& a, const float& b) {
return ::fmodf(a, b);
}
template <>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double fmod(const double& a, const double& b) {
return ::fmod(a, b);
}
#endif
} // end namespace numext } // end namespace numext
namespace internal { namespace internal {
template<typename T> template<typename T>
bool isfinite_impl(const std::complex<T>& x) EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x)
{ {
return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
} }
template<typename T> template<typename T>
bool isnan_impl(const std::complex<T>& x) EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
{ {
return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
} }
template<typename T> template<typename T>
bool isinf_impl(const std::complex<T>& x) EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x)
{ {
return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
} }
@ -1007,14 +1128,12 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
template<typename OtherScalar> EIGEN_DEVICE_FUNC template<typename OtherScalar> EIGEN_DEVICE_FUNC
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
{ {
EIGEN_USING_STD_MATH(abs); return numext::abs(x) <= numext::abs(y) * prec;
return abs(x) <= abs(y) * prec;
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{ {
EIGEN_USING_STD_MATH(abs); return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec;
return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec;
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
@ -1064,21 +1183,21 @@ struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::
template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
{ {
return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision); return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
} }
template<typename Scalar> EIGEN_DEVICE_FUNC template<typename Scalar> EIGEN_DEVICE_FUNC
inline bool isApprox(const Scalar& x, const Scalar& y, inline bool isApprox(const Scalar& x, const Scalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
{ {
return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision); return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
} }
template<typename Scalar> EIGEN_DEVICE_FUNC template<typename Scalar> EIGEN_DEVICE_FUNC
inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
{ {
return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision); return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
} }

View File

@ -13,6 +13,45 @@
namespace Eigen { namespace Eigen {
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
private:
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
enum {
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
required_alignment = unpacket_traits<PacketScalar>::alignment,
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
};
public:
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef Eigen::Index StorageIndex;
typedef MatrixXpr XprKind;
enum {
RowsAtCompileTime = _Rows,
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
Options = _Options,
InnerStrideAtCompileTime = 1,
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
Alignment = actual_alignment
};
};
}
/** \class Matrix /** \class Matrix
* \ingroup Core_Module * \ingroup Core_Module
* *
@ -98,7 +137,7 @@ namespace Eigen {
* </dl> * </dl>
* *
* <i><b>ABI and storage layout</b></i> * <i><b>ABI and storage layout</b></i>
* *
* The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3. * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
* <table class="manual"> * <table class="manual">
* <tr><th>Matrix type</th><th>Equivalent C structure</th></tr> * <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
@ -130,50 +169,11 @@ namespace Eigen {
* </table> * </table>
* Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
* smaller to EIGEN_MAX_STATIC_ALIGN_BYTES. * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
* *
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
* \ref TopicStorageOrders * \ref TopicStorageOrders
*/ */
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
private:
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
enum {
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
required_alignment = unpacket_traits<PacketScalar>::alignment,
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
};
public:
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef Eigen::Index StorageIndex;
typedef MatrixXpr XprKind;
enum {
RowsAtCompileTime = _Rows,
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
Options = _Options,
InnerStrideAtCompileTime = 1,
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
Alignment = actual_alignment
};
};
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Matrix class Matrix
: public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >

View File

@ -43,7 +43,7 @@ namespace Eigen {
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
* *
* \sa \ref TopicClassHierarchy * \sa \blank \ref TopicClassHierarchy
*/ */
template<typename Derived> class MatrixBase template<typename Derived> class MatrixBase
: public DenseBase<Derived> : public DenseBase<Derived>
@ -66,7 +66,7 @@ template<typename Derived> class MatrixBase
using Base::MaxSizeAtCompileTime; using Base::MaxSizeAtCompileTime;
using Base::IsVectorAtCompileTime; using Base::IsVectorAtCompileTime;
using Base::Flags; using Base::Flags;
using Base::derived; using Base::derived;
using Base::const_cast_derived; using Base::const_cast_derived;
using Base::rows; using Base::rows;
@ -135,14 +135,14 @@ template<typename Derived> class MatrixBase
/** Special case of the template operator=, in order to prevent the compiler /** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1) * from generating a default operator= (issue hit with g++ 4.1)
*/ */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const MatrixBase& other); Derived& operator=(const MatrixBase& other);
// We cannot inherit here via Base::operator= since it is causing // We cannot inherit here via Base::operator= since it is causing
// trouble with MSVC. // trouble with MSVC.
template <typename OtherDerived> template <typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const DenseBase<OtherDerived>& other); Derived& operator=(const DenseBase<OtherDerived>& other);
template <typename OtherDerived> template <typename OtherDerived>
@ -154,10 +154,10 @@ template<typename Derived> class MatrixBase
Derived& operator=(const ReturnByValue<OtherDerived>& other); Derived& operator=(const ReturnByValue<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const MatrixBase<OtherDerived>& other); Derived& operator+=(const MatrixBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const MatrixBase<OtherDerived>& other); Derived& operator-=(const MatrixBase<OtherDerived>& other);
#ifdef __CUDACC__ #ifdef __CUDACC__
@ -175,7 +175,7 @@ template<typename Derived> class MatrixBase
#endif #endif
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const Product<Derived,OtherDerived,LazyProduct> const Product<Derived,OtherDerived,LazyProduct>
lazyProduct(const MatrixBase<OtherDerived> &other) const; lazyProduct(const MatrixBase<OtherDerived> &other) const;
@ -204,7 +204,9 @@ template<typename Derived> class MatrixBase
RealScalar blueNorm() const; RealScalar blueNorm() const;
RealScalar hypotNorm() const; RealScalar hypotNorm() const;
EIGEN_DEVICE_FUNC const PlainObject normalized() const; EIGEN_DEVICE_FUNC const PlainObject normalized() const;
EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
EIGEN_DEVICE_FUNC void normalize(); EIGEN_DEVICE_FUNC void normalize();
EIGEN_DEVICE_FUNC void stableNormalize();
EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const; EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
EIGEN_DEVICE_FUNC void adjointInPlace(); EIGEN_DEVICE_FUNC void adjointInPlace();
@ -212,7 +214,7 @@ template<typename Derived> class MatrixBase
typedef Diagonal<Derived> DiagonalReturnType; typedef Diagonal<Derived> DiagonalReturnType;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
DiagonalReturnType diagonal(); DiagonalReturnType diagonal();
typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType; typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
ConstDiagonalReturnType diagonal() const; ConstDiagonalReturnType diagonal() const;
@ -220,14 +222,14 @@ template<typename Derived> class MatrixBase
template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; }; template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; }; template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
template<int Index> template<int Index>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
typename DiagonalIndexReturnType<Index>::Type diagonal(); typename DiagonalIndexReturnType<Index>::Type diagonal();
template<int Index> template<int Index>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const; typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType; typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType; typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
@ -249,7 +251,7 @@ template<typename Derived> class MatrixBase
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; }; template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; }; template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
template<unsigned int UpLo> template<unsigned int UpLo>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView(); typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
template<unsigned int UpLo> template<unsigned int UpLo>
@ -338,7 +340,7 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Inverse<Derived> inverse() const; inline const Inverse<Derived> inverse() const;
template<typename ResultType> template<typename ResultType>
inline void computeInverseAndDetWithCheck( inline void computeInverseAndDetWithCheck(
ResultType& inverse, ResultType& inverse,
@ -364,6 +366,7 @@ template<typename Derived> class MatrixBase
inline const HouseholderQR<PlainObject> householderQr() const; inline const HouseholderQR<PlainObject> householderQr() const;
inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const; inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const; inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const;
/////////// Eigenvalues module /////////// /////////// Eigenvalues module ///////////
@ -386,25 +389,29 @@ template<typename Derived> class MatrixBase
#endif // EIGEN_PARSED_BY_DOXYGEN #endif // EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
#ifndef EIGEN_PARSED_BY_DOXYGEN
inline typename cross_product_return_type<OtherDerived>::type inline typename cross_product_return_type<OtherDerived>::type
#else
inline PlainObject
#endif
cross(const MatrixBase<OtherDerived>& other) const; cross(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const; inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline PlainObject unitOrthogonal(void) const; inline PlainObject unitOrthogonal(void) const;
inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const; inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
inline ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const; inline ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
// put this as separate enum value to work around possible GCC 4.3 bug (?) // put this as separate enum value to work around possible GCC 4.3 bug (?)
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
: ColsAtCompileTime==1 ? Vertical : Horizontal }; : ColsAtCompileTime==1 ? Vertical : Horizontal };
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType; typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
inline HomogeneousReturnType homogeneous() const; inline HomogeneousReturnType homogeneous() const;
enum { enum {
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
}; };

View File

@ -13,25 +13,24 @@
namespace Eigen { namespace Eigen {
/** \class NestByValue
* \ingroup Core_Module
*
* \brief Expression which must be nested by value
*
* \param ExpressionType the type of the object of which we are requiring nesting-by-value
*
* This class is the return type of MatrixBase::nestByValue()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::nestByValue()
*/
namespace internal { namespace internal {
template<typename ExpressionType> template<typename ExpressionType>
struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType> struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
{}; {};
} }
/** \class NestByValue
* \ingroup Core_Module
*
* \brief Expression which must be nested by value
*
* \tparam ExpressionType the type of the object of which we are requiring nesting-by-value
*
* This class is the return type of MatrixBase::nestByValue()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::nestByValue()
*/
template<typename ExpressionType> class NestByValue template<typename ExpressionType> class NestByValue
: public internal::dense_xpr_base< NestByValue<ExpressionType> >::type : public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
{ {

View File

@ -17,7 +17,7 @@ namespace Eigen {
* *
* \brief Pseudo expression providing an operator = assuming no aliasing * \brief Pseudo expression providing an operator = assuming no aliasing
* *
* \param ExpressionType the type of the object on which to do the lazy assignment * \tparam ExpressionType the type of the object on which to do the lazy assignment
* *
* This class represents an expression with special assignment operators * This class represents an expression with special assignment operators
* assuming no aliasing between the target expression and the source expression. * assuming no aliasing between the target expression and the source expression.

View File

@ -17,7 +17,7 @@ namespace Eigen {
* *
* \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen. * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
* *
* \param T the numeric type at hand * \tparam T the numeric type at hand
* *
* This class stores enums, typedefs and static methods giving information about a numeric type. * This class stores enums, typedefs and static methods giving information about a numeric type.
* *
@ -60,6 +60,23 @@ template<typename T> struct GenericNumTraits
MulCost = 1 MulCost = 1
}; };
// Division is messy but important, because it is expensive and throughput
// varies significantly. The following numbers are based on min division
// throughput on Haswell.
template<bool Vectorized>
struct Div {
enum {
#ifdef EIGEN_VECTORIZE_AVX
AVX = true,
#else
AVX = false,
#endif
Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)):
Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8
};
};
typedef T Real; typedef T Real;
typedef typename internal::conditional< typedef typename internal::conditional<
IsInteger, IsInteger,
@ -71,11 +88,7 @@ template<typename T> struct GenericNumTraits
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline Real epsilon() static inline Real epsilon()
{ {
#if defined(__CUDA_ARCH__) return numext::numeric_limits<T>::epsilon();
return internal::device::numeric_limits<T>::epsilon();
#else
return std::numeric_limits<T>::epsilon();
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline Real dummy_precision() static inline Real dummy_precision()
@ -87,20 +100,22 @@ template<typename T> struct GenericNumTraits
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline T highest() { static inline T highest() {
#if defined(__CUDA_ARCH__) return (numext::numeric_limits<T>::max)();
return (internal::device::numeric_limits<T>::max)();
#else
return (std::numeric_limits<T>::max)();
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline T lowest() { static inline T lowest() {
#if defined(__CUDA_ARCH__) return IsInteger ? (numext::numeric_limits<T>::min)() : (-(numext::numeric_limits<T>::max)());
return IsInteger ? (internal::device::numeric_limits<T>::min)() : (-(internal::device::numeric_limits<T>::max)()); }
#else
return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); EIGEN_DEVICE_FUNC
#endif static inline T infinity() {
return numext::numeric_limits<T>::infinity();
}
EIGEN_DEVICE_FUNC
static inline T quiet_NaN() {
return numext::numeric_limits<T>::quiet_NaN();
} }
}; };
@ -138,7 +153,9 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
}; };
EIGEN_DEVICE_FUNC
static inline Real epsilon() { return NumTraits<Real>::epsilon(); } static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
EIGEN_DEVICE_FUNC
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); } static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
}; };
@ -151,7 +168,7 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar; typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger; typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
typedef ArrayType & Nested; typedef ArrayType & Nested;
enum { enum {
IsComplex = NumTraits<Scalar>::IsComplex, IsComplex = NumTraits<Scalar>::IsComplex,
IsInteger = NumTraits<Scalar>::IsInteger, IsInteger = NumTraits<Scalar>::IsInteger,
@ -161,8 +178,10 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost, AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
}; };
EIGEN_DEVICE_FUNC
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); } static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
EIGEN_DEVICE_FUNC
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); } static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
}; };

View File

@ -13,12 +13,18 @@
namespace Eigen { namespace Eigen {
namespace internal {
enum PermPermProduct_t {PermPermProduct};
} // end namespace internal
/** \class PermutationBase /** \class PermutationBase
* \ingroup Core_Module * \ingroup Core_Module
* *
* \brief Base class for permutations * \brief Base class for permutations
* *
* \param Derived the derived class * \tparam Derived the derived class
* *
* This class is the base class for all expressions representing a permutation matrix, * This class is the base class for all expressions representing a permutation matrix,
* internally stored as a vector of integers. * internally stored as a vector of integers.
@ -36,13 +42,6 @@ namespace Eigen {
* *
* \sa class PermutationMatrix, class PermutationWrapper * \sa class PermutationMatrix, class PermutationWrapper
*/ */
namespace internal {
enum PermPermProduct_t {PermPermProduct};
} // end namespace internal
template<typename Derived> template<typename Derived>
class PermutationBase : public EigenBase<Derived> class PermutationBase : public EigenBase<Derived>
{ {
@ -192,13 +191,13 @@ class PermutationBase : public EigenBase<Derived>
/** \returns the inverse permutation matrix. /** \returns the inverse permutation matrix.
* *
* \note \note_try_to_help_rvo * \note \blank \note_try_to_help_rvo
*/ */
inline InverseReturnType inverse() const inline InverseReturnType inverse() const
{ return InverseReturnType(derived()); } { return InverseReturnType(derived()); }
/** \returns the tranpose permutation matrix. /** \returns the tranpose permutation matrix.
* *
* \note \note_try_to_help_rvo * \note \blank \note_try_to_help_rvo
*/ */
inline InverseReturnType transpose() const inline InverseReturnType transpose() const
{ return InverseReturnType(derived()); } { return InverseReturnType(derived()); }
@ -225,7 +224,7 @@ class PermutationBase : public EigenBase<Derived>
/** \returns the product permutation matrix. /** \returns the product permutation matrix.
* *
* \note \note_try_to_help_rvo * \note \blank \note_try_to_help_rvo
*/ */
template<typename Other> template<typename Other>
inline PlainPermutationType operator*(const PermutationBase<Other>& other) const inline PlainPermutationType operator*(const PermutationBase<Other>& other) const
@ -233,7 +232,7 @@ class PermutationBase : public EigenBase<Derived>
/** \returns the product of a permutation with another inverse permutation. /** \returns the product of a permutation with another inverse permutation.
* *
* \note \note_try_to_help_rvo * \note \blank \note_try_to_help_rvo
*/ */
template<typename Other> template<typename Other>
inline PlainPermutationType operator*(const InverseImpl<Other,PermutationStorage>& other) const inline PlainPermutationType operator*(const InverseImpl<Other,PermutationStorage>& other) const
@ -241,7 +240,7 @@ class PermutationBase : public EigenBase<Derived>
/** \returns the product of an inverse permutation with another permutation. /** \returns the product of an inverse permutation with another permutation.
* *
* \note \note_try_to_help_rvo * \note \blank \note_try_to_help_rvo
*/ */
template<typename Other> friend template<typename Other> friend
inline PlainPermutationType operator*(const InverseImpl<Other, PermutationStorage>& other, const PermutationBase& perm) inline PlainPermutationType operator*(const InverseImpl<Other, PermutationStorage>& other, const PermutationBase& perm)
@ -280,20 +279,6 @@ class PermutationBase : public EigenBase<Derived>
}; };
/** \class PermutationMatrix
* \ingroup Core_Module
*
* \brief Permutation matrix
*
* \param SizeAtCompileTime the number of rows/cols, or Dynamic
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
* \param StorageIndex the integer type of the indices
*
* This class represents a permutation matrix, internally stored as a vector of integers.
*
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
*/
namespace internal { namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex> template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> > struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
@ -306,6 +291,19 @@ struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _Storag
}; };
} }
/** \class PermutationMatrix
* \ingroup Core_Module
*
* \brief Permutation matrix
*
* \tparam SizeAtCompileTime the number of rows/cols, or Dynamic
* \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
* \tparam _StorageIndex the integer type of the indices
*
* This class represents a permutation matrix, internally stored as a vector of integers.
*
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
*/
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex> template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> > class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
{ {
@ -482,18 +480,6 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
IndicesType m_indices; IndicesType m_indices;
}; };
/** \class PermutationWrapper
* \ingroup Core_Module
*
* \brief Class to view a vector of integers as a permutation matrix
*
* \param _IndicesType the type of the vector of integer (can be any compatible expression)
*
* This class allows to view any vector expression of integers as a permutation matrix.
*
* \sa class PermutationBase, class PermutationMatrix
*/
template<typename _IndicesType> class TranspositionsWrapper; template<typename _IndicesType> class TranspositionsWrapper;
namespace internal { namespace internal {
template<typename _IndicesType> template<typename _IndicesType>
@ -513,6 +499,17 @@ struct traits<PermutationWrapper<_IndicesType> >
}; };
} }
/** \class PermutationWrapper
* \ingroup Core_Module
*
* \brief Class to view a vector of integers as a permutation matrix
*
* \tparam _IndicesType the type of the vector of integer (can be any compatible expression)
*
* This class allows to view any vector expression of integers as a permutation matrix.
*
* \sa class PermutationBase, class PermutationMatrix
*/
template<typename _IndicesType> template<typename _IndicesType>
class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> > class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
{ {

View File

@ -533,7 +533,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
public: public:
/** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&) /** \copydoc DenseBase::operator=(const EigenBase<OtherDerived>&)
*/ */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -618,8 +618,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
//@} //@}
using Base::setConstant; using Base::setConstant;
EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value); EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value); EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);
using Base::setZero; using Base::setZero;
EIGEN_DEVICE_FUNC Derived& setZero(Index size); EIGEN_DEVICE_FUNC Derived& setZero(Index size);

View File

@ -14,22 +14,6 @@ namespace Eigen {
template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl; template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
/** \class Product
* \ingroup Core_Module
*
* \brief Expression of the product of two arbitrary matrices or vectors
*
* \param Lhs the type of the left-hand side expression
* \param Rhs the type of the right-hand side expression
*
* This class represents an expression of the product of two arbitrary matrices.
*
* The other template parameters are:
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
*
*/
namespace internal { namespace internal {
// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times // Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times
@ -102,7 +86,20 @@ struct traits<Product<Lhs, Rhs, Option> >
} // end namespace internal } // end namespace internal
/** \class Product
* \ingroup Core_Module
*
* \brief Expression of the product of two arbitrary matrices or vectors
*
* \tparam _Lhs the type of the left-hand side expression
* \tparam _Rhs the type of the right-hand side expression
*
* This class represents an expression of the product of two arbitrary matrices.
*
* The other template parameters are:
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
*
*/
template<typename _Lhs, typename _Rhs, int Option> template<typename _Lhs, typename _Rhs, int Option>
class Product : public ProductImpl<_Lhs,_Rhs,Option, class Product : public ProductImpl<_Lhs,_Rhs,Option,
typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind, typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,

31
Eigen/src/Core/ProductEvaluators.h Executable file → Normal file
View File

@ -38,10 +38,9 @@ struct evaluator<Product<Lhs, Rhs, Options> >
// Catch scalar * ( A * B ) and transform it to (A*scalar) * B // Catch scalar * ( A * B ) and transform it to (A*scalar) * B
// TODO we should apply that rule only if that's really helpful // TODO we should apply that rule only if that's really helpful
template<typename Lhs, typename Rhs, typename Scalar> template<typename Lhs, typename Rhs, typename Scalar>
struct evaluator_traits<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > struct evaluator_assume_aliasing<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
: evaluator_traits_base<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
{ {
enum { AssumeAliasing = 1 }; static const bool value = true;
}; };
template<typename Lhs, typename Rhs, typename Scalar> template<typename Lhs, typename Rhs, typename Scalar>
struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
@ -81,17 +80,8 @@ template< typename Lhs, typename Rhs,
struct generic_product_impl; struct generic_product_impl;
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> > struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
: evaluator_traits_base<Product<Lhs, Rhs, DefaultProduct> > static const bool value = true;
{
enum { AssumeAliasing = 1 };
};
template<typename Lhs, typename Rhs>
struct evaluator_traits<Product<Lhs, Rhs, AliasFreeProduct> >
: evaluator_traits_base<Product<Lhs, Rhs, AliasFreeProduct> >
{
enum { AssumeAliasing = 0 };
}; };
// This is the default evaluator implementation for products: // This is the default evaluator implementation for products:
@ -107,7 +97,8 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
Flags = Base::Flags | EvalBeforeNestingBit Flags = Base::Flags | EvalBeforeNestingBit
}; };
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
explicit product_evaluator(const XprType& xpr)
: m_result(xpr.rows(), xpr.cols()) : m_result(xpr.rows(), xpr.cols())
{ {
::new (static_cast<Base*>(this)) Base(m_result); ::new (static_cast<Base*>(this)) Base(m_result);
@ -189,6 +180,13 @@ struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBi
//---------------------------------------- //----------------------------------------
// Catch "Dense ?= xpr + Product<>" expression to save one temporary // Catch "Dense ?= xpr + Product<>" expression to save one temporary
// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
// TODO enable it for "Dense ?= xpr - Product<>" as well.
template<typename OtherXpr, typename Lhs, typename Rhs>
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
static const bool value = true;
};
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2> template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2>
struct assignment_from_xpr_plus_product struct assignment_from_xpr_plus_product
@ -415,7 +413,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType; typedef typename XprType::PacketReturnType PacketReturnType;
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
explicit product_evaluator(const XprType& xpr)
: m_lhs(xpr.lhs()), : m_lhs(xpr.lhs()),
m_rhs(xpr.rhs()), m_rhs(xpr.rhs()),
m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!

View File

@ -12,76 +12,6 @@
namespace Eigen { namespace Eigen {
/** \class Ref
* \ingroup Core_Module
*
* \brief A matrix or vector expression mapping an existing expression
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
* but accepts a variable outer stride (leading dimension).
* This can be overridden by specifying strides.
* The type passed here must be a specialization of the Stride template, see examples below.
*
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
* A Ref<> object can represent either a const expression or a l-value:
* \code
* // in-out argument:
* void foo1(Ref<VectorXf> x);
*
* // read-only const argument:
* void foo2(const Ref<const VectorXf>& x);
* \endcode
*
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
* can be greater than the number of rows.
*
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
* Here are some examples:
* \code
* MatrixXf A;
* VectorXf a;
* foo1(a.head()); // OK
* foo1(A.col()); // OK
* foo1(A.row()); // Compilation error because here innerstride!=1
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
* foo2(2*a); // The expression is evaluated into a temporary
* foo2(A.col().segment(2,4)); // No temporary
* \endcode
*
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
* Here is an example accepting an innerstride!=1:
* \code
* // in-out argument:
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
* foo3(A.row()); // OK
* \endcode
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
* template function, e.g.:
* \code
* // in the .h:
* void foo(const Ref<MatrixXf>& A);
* void foo(const Ref<MatrixXf,0,Stride<> >& A);
*
* // in the .cpp:
* template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
* ... // crazy code goes here
* }
* void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
* void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
* \endcode
*
*
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
*/
namespace internal { namespace internal {
template<typename _PlainObjectType, int _Options, typename _StrideType> template<typename _PlainObjectType, int _Options, typename _StrideType>
@ -182,7 +112,75 @@ protected:
StrideBase m_stride; StrideBase m_stride;
}; };
/** \class Ref
* \ingroup Core_Module
*
* \brief A matrix or vector expression mapping an existing expression
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
* but accepts a variable outer stride (leading dimension).
* This can be overridden by specifying strides.
* The type passed here must be a specialization of the Stride template, see examples below.
*
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
* A Ref<> object can represent either a const expression or a l-value:
* \code
* // in-out argument:
* void foo1(Ref<VectorXf> x);
*
* // read-only const argument:
* void foo2(const Ref<const VectorXf>& x);
* \endcode
*
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
* can be greater than the number of rows.
*
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
* Here are some examples:
* \code
* MatrixXf A;
* VectorXf a;
* foo1(a.head()); // OK
* foo1(A.col()); // OK
* foo1(A.row()); // Compilation error because here innerstride!=1
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
* foo2(2*a); // The expression is evaluated into a temporary
* foo2(A.col().segment(2,4)); // No temporary
* \endcode
*
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
* Here is an example accepting an innerstride!=1:
* \code
* // in-out argument:
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
* foo3(A.row()); // OK
* \endcode
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
* template function, e.g.:
* \code
* // in the .h:
* void foo(const Ref<MatrixXf>& A);
* void foo(const Ref<MatrixXf,0,Stride<> >& A);
*
* // in the .cpp:
* template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
* ... // crazy code goes here
* }
* void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
* void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
* \endcode
*
*
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
*/
template<typename PlainObjectType, int Options, typename StrideType> class Ref template<typename PlainObjectType, int Options, typename StrideType> class Ref
: public RefBase<Ref<PlainObjectType, Options, StrideType> > : public RefBase<Ref<PlainObjectType, Options, StrideType> >
{ {
@ -209,6 +207,7 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr, EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0) typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
#else #else
/** Implicit constructor from any dense expression */
template<typename Derived> template<typename Derived>
inline Ref(DenseBase<Derived>& expr) inline Ref(DenseBase<Derived>& expr)
#endif #endif

View File

@ -12,21 +12,6 @@
namespace Eigen { namespace Eigen {
/**
* \class Replicate
* \ingroup Core_Module
*
* \brief Expression of the multiple replication of a matrix or vector
*
* \param MatrixType the type of the object we are replicating
*
* This class represents an expression of the multiple replication of a matrix or vector.
* It is the return type of DenseBase::replicate() and most of the time
* this is the only way it is used.
*
* \sa DenseBase::replicate()
*/
namespace internal { namespace internal {
template<typename MatrixType,int RowFactor,int ColFactor> template<typename MatrixType,int RowFactor,int ColFactor>
struct traits<Replicate<MatrixType,RowFactor,ColFactor> > struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
@ -57,6 +42,22 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
}; };
} }
/**
* \class Replicate
* \ingroup Core_Module
*
* \brief Expression of the multiple replication of a matrix or vector
*
* \tparam MatrixType the type of the object we are replicating
* \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic.
* \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic.
*
* This class represents an expression of the multiple replication of a matrix or vector.
* It is the return type of DenseBase::replicate() and most of the time
* this is the only way it is used.
*
* \sa DenseBase::replicate()
*/
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
: public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
{ {

View File

@ -13,11 +13,6 @@
namespace Eigen { namespace Eigen {
/** \class ReturnByValue
* \ingroup Core_Module
*
*/
namespace internal { namespace internal {
template<typename Derived> template<typename Derived>
@ -48,6 +43,10 @@ struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
} // end namespace internal } // end namespace internal
/** \class ReturnByValue
* \ingroup Core_Module
*
*/
template<typename Derived> class ReturnByValue template<typename Derived> class ReturnByValue
: public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator : public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator
{ {

View File

@ -14,20 +14,6 @@
namespace Eigen { namespace Eigen {
/** \class Reverse
* \ingroup Core_Module
*
* \brief Expression of the reverse of a vector or matrix
*
* \param MatrixType the type of the object of which we are taking the reverse
*
* This class represents an expression of the reverse of a vector.
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
*/
namespace internal { namespace internal {
template<typename MatrixType, int Direction> template<typename MatrixType, int Direction>
@ -60,6 +46,20 @@ template<typename PacketType> struct reverse_packet_cond<PacketType,false>
} // end namespace internal } // end namespace internal
/** \class Reverse
* \ingroup Core_Module
*
* \brief Expression of the reverse of a vector or matrix
*
* \tparam MatrixType the type of the object of which we are taking the reverse
* \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections
*
* This class represents an expression of the reverse of a vector.
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
*/
template<typename MatrixType, int Direction> class Reverse template<typename MatrixType, int Direction> class Reverse
: public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type : public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
{ {

View File

@ -32,7 +32,7 @@ namespace internal {
template<typename MatrixType, unsigned int UpLo> template<typename MatrixType, unsigned int UpLo>
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType> struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
{ {
typedef typename ref_selector<MatrixType>::type MatrixTypeNested; typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef MatrixType ExpressionType; typedef MatrixType ExpressionType;
typedef typename MatrixType::PlainObject FullMatrixType; typedef typename MatrixType::PlainObject FullMatrixType;
@ -97,7 +97,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
{ {
EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView); EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
Base::check_coordinates_internal(row, col); Base::check_coordinates_internal(row, col);
return m_matrix.const_cast_derived().coeffRef(row, col); return m_matrix.coeffRef(row, col);
} }
/** \internal */ /** \internal */
@ -107,7 +107,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; } const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); } MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; }
/** Efficient triangular matrix times vector/matrix product */ /** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived> template<typename OtherDerived>
@ -203,8 +203,6 @@ struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
{ {
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind; typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef SelfAdjointShape Shape; typedef SelfAdjointShape Shape;
static const int AssumeAliasing = 0;
}; };
template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version> template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>

View File

@ -13,7 +13,7 @@
namespace Eigen { namespace Eigen {
template<typename Derived> template<typename Derived>
inline Derived& DenseBase<Derived>::operator*=(const Scalar& other) EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>());
@ -21,7 +21,7 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
} }
template<typename Derived> template<typename Derived>
inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other) EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>());
@ -29,7 +29,7 @@ inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
} }
template<typename Derived> template<typename Derived>
inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other) EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>());
@ -37,7 +37,7 @@ inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
} }
template<typename Derived> template<typename Derived>
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other) EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>());

File diff suppressed because it is too large Load Diff

View File

@ -31,8 +31,8 @@ namespace Eigen {
* arguments to the constructor. * arguments to the constructor.
* *
* Indeed, this class takes two template parameters: * Indeed, this class takes two template parameters:
* \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime. * \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
* \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime. * \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
* *
* Here is an example: * Here is an example:
* \include Map_general_stride.cpp * \include Map_general_stride.cpp

View File

@ -13,20 +13,6 @@
namespace Eigen { namespace Eigen {
/** \class Transpose
* \ingroup Core_Module
*
* \brief Expression of the transpose of a matrix
*
* \param MatrixType the type of the object of which we are taking the transpose
*
* This class represents an expression of the transpose of a matrix.
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
*/
namespace internal { namespace internal {
template<typename MatrixType> template<typename MatrixType>
struct traits<Transpose<MatrixType> > : public traits<MatrixType> struct traits<Transpose<MatrixType> > : public traits<MatrixType>
@ -50,11 +36,26 @@ struct traits<Transpose<MatrixType> > : public traits<MatrixType>
template<typename MatrixType, typename StorageKind> class TransposeImpl; template<typename MatrixType, typename StorageKind> class TransposeImpl;
/** \class Transpose
* \ingroup Core_Module
*
* \brief Expression of the transpose of a matrix
*
* \tparam MatrixType the type of the object of which we are taking the transpose
*
* This class represents an expression of the transpose of a matrix.
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
*/
template<typename MatrixType> class Transpose template<typename MatrixType> class Transpose
: public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind> : public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
{ {
public: public:
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base; typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose) EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
typedef typename internal::remove_all<MatrixType>::type NestedExpression; typedef typename internal::remove_all<MatrixType>::type NestedExpression;
@ -69,16 +70,16 @@ template<typename MatrixType> class Transpose
/** \returns the nested expression */ /** \returns the nested expression */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename MatrixType::Nested>::type& const typename internal::remove_all<MatrixTypeNested>::type&
nestedExpression() const { return m_matrix; } nestedExpression() const { return m_matrix; }
/** \returns the nested expression */ /** \returns the nested expression */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
typename internal::remove_all<typename MatrixType::Nested>::type& typename internal::remove_reference<MatrixTypeNested>::type&
nestedExpression() { return m_matrix.const_cast_derived(); } nestedExpression() { return m_matrix; }
protected: protected:
typename MatrixType::Nested m_matrix; typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
}; };
namespace internal { namespace internal {

View File

@ -12,35 +12,6 @@
namespace Eigen { namespace Eigen {
/** \class Transpositions
* \ingroup Core_Module
*
* \brief Represents a sequence of transpositions (row/column interchange)
*
* \param SizeAtCompileTime the number of transpositions, or Dynamic
* \param MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
*
* This class represents a permutation transformation as a sequence of \em n transpositions
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
* the rows \c i and \c indices[i] of the matrix \c M.
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
*
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
*
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
* \code
* Transpositions tr;
* MatrixXf mat;
* mat = tr * mat;
* \endcode
* In this example, we detect that the matrix appears on both side, and so the transpositions
* are applied in-place without any temporary or extra copy.
*
* \sa class PermutationMatrix
*/
template<typename Derived> template<typename Derived>
class TranspositionsBase class TranspositionsBase
{ {
@ -154,6 +125,35 @@ struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageInde
}; };
} }
/** \class Transpositions
* \ingroup Core_Module
*
* \brief Represents a sequence of transpositions (row/column interchange)
*
* \tparam SizeAtCompileTime the number of transpositions, or Dynamic
* \tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
*
* This class represents a permutation transformation as a sequence of \em n transpositions
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
* the rows \c i and \c indices[i] of the matrix \c M.
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
*
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
*
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
* \code
* Transpositions tr;
* MatrixXf mat;
* mat = tr * mat;
* \endcode
* In this example, we detect that the matrix appears on both side, and so the transpositions
* are applied in-place without any temporary or extra copy.
*
* \sa class PermutationMatrix
*/
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex> template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> > class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
{ {
@ -325,7 +325,7 @@ class TranspositionsWrapper
protected: protected:
const typename IndicesType::Nested m_indices; typename IndicesType::Nested m_indices;
}; };

View File

@ -168,7 +168,7 @@ namespace internal {
template<typename MatrixType, unsigned int _Mode> template<typename MatrixType, unsigned int _Mode>
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType> struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
{ {
typedef typename ref_selector<MatrixType>::type MatrixTypeNested; typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef; typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef typename MatrixType::PlainObject FullMatrixType; typedef typename MatrixType::PlainObject FullMatrixType;
@ -213,7 +213,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
IsVectorAtCompileTime = false IsVectorAtCompileTime = false
}; };
// FIXME This, combined with const_cast_derived in transpose() leads to a const-correctness loophole
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix) explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
{} {}
@ -235,7 +234,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
/** \returns a reference to the nested expression */ /** \returns a reference to the nested expression */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
NestedExpression& nestedExpression() { return *const_cast<NestedExpression*>(&m_matrix); } NestedExpression& nestedExpression() { return m_matrix; }
typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType; typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
/** \sa MatrixBase::conjugate() const */ /** \sa MatrixBase::conjugate() const */
@ -255,7 +254,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
inline TransposeReturnType transpose() inline TransposeReturnType transpose()
{ {
EIGEN_STATIC_ASSERT_LVALUE(MatrixType) EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
typename MatrixType::TransposeReturnType tmp(m_matrix.const_cast_derived()); typename MatrixType::TransposeReturnType tmp(m_matrix);
return TransposeReturnType(tmp); return TransposeReturnType(tmp);
} }
@ -418,7 +417,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
{ {
EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType); EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);
Base::check_coordinates_internal(row, col); Base::check_coordinates_internal(row, col);
return derived().nestedExpression().const_cast_derived().coeffRef(row, col); return derived().nestedExpression().coeffRef(row, col);
} }
/** Assigns a triangular matrix to a triangular part of a dense matrix */ /** Assigns a triangular matrix to a triangular part of a dense matrix */
@ -595,14 +594,7 @@ template<typename Derived>
template<typename DenseDerived> template<typename DenseDerived>
void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
{ {
if(internal::traits<Derived>::Flags & EvalBeforeAssigningBit) evalToLazy(other.derived());
{
typename internal::plain_matrix_type<Derived>::type other_evaluated(rows(), cols());
evalToLazy(other_evaluated);
other.derived().swap(other_evaluated);
}
else
evalToLazy(other.derived());
} }
/*************************************************************************** /***************************************************************************
@ -711,10 +703,6 @@ struct evaluator_traits<TriangularView<MatrixType,Mode> >
{ {
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind; typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape; typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
// 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
// temporary; 0 if not.
static const int AssumeAliasing = 0;
}; };
template<typename MatrixType, unsigned int Mode> template<typename MatrixType, unsigned int Mode>
@ -788,7 +776,8 @@ public:
}; };
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor> template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
{ {
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@ -812,7 +801,8 @@ EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, co
} }
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType> template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
{ {
call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>()); call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>());
} }

View File

@ -13,13 +13,23 @@
namespace Eigen { namespace Eigen {
namespace internal {
template<typename VectorType, int Size>
struct traits<VectorBlock<VectorType, Size> >
: public traits<Block<VectorType,
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
{
};
}
/** \class VectorBlock /** \class VectorBlock
* \ingroup Core_Module * \ingroup Core_Module
* *
* \brief Expression of a fixed-size or dynamic-size sub-vector * \brief Expression of a fixed-size or dynamic-size sub-vector
* *
* \param VectorType the type of the object in which we are taking a sub-vector * \tparam VectorType the type of the object in which we are taking a sub-vector
* \param Size size of the sub-vector we are taking at compile time (optional) * \tparam Size size of the sub-vector we are taking at compile time (optional)
* *
* This class represents an expression of either a fixed-size or dynamic-size sub-vector. * This class represents an expression of either a fixed-size or dynamic-size sub-vector.
* It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and * It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and
@ -43,17 +53,6 @@ namespace Eigen {
* *
* \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index) * \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
*/ */
namespace internal {
template<typename VectorType, int Size>
struct traits<VectorBlock<VectorType, Size> >
: public traits<Block<VectorType,
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
{
};
}
template<typename VectorType, int Size> class VectorBlock template<typename VectorType, int Size> class VectorBlock
: public Block<VectorType, : public Block<VectorType,
internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size, internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,

View File

@ -115,7 +115,7 @@ struct member_lpnorm {
typedef ResultType result_type; typedef ResultType result_type;
template<typename Scalar, int Size> struct Cost template<typename Scalar, int Size> struct Cost
{ enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; }; { enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; };
EIGEN_DEVICE_FUNC explicit member_lpnorm() {} EIGEN_DEVICE_FUNC member_lpnorm() {}
template<typename XprType> template<typename XprType>
EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const
{ return mat.template lpNorm<p>(); } { return mat.template lpNorm<p>(); }
@ -124,7 +124,7 @@ struct member_lpnorm {
template <typename BinaryOp, typename Scalar> template <typename BinaryOp, typename Scalar>
struct member_redux { struct member_redux {
typedef typename result_of< typedef typename result_of<
BinaryOp(Scalar,Scalar) BinaryOp(const Scalar&,const Scalar&)
>::type result_type; >::type result_type;
template<typename _Scalar, int Size> struct Cost template<typename _Scalar, int Size> struct Cost
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; }; { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
@ -141,8 +141,8 @@ struct member_redux {
* *
* \brief Pseudo expression providing partial reduction operations * \brief Pseudo expression providing partial reduction operations
* *
* \param ExpressionType the type of the object on which to do partial reductions * \tparam ExpressionType the type of the object on which to do partial reductions
* \param Direction indicates the direction of the redux (#Vertical or #Horizontal) * \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
* *
* This class represents a pseudo expression with partial reduction features. * This class represents a pseudo expression with partial reduction features.
* It is the return type of DenseBase::colwise() and DenseBase::rowwise() * It is the return type of DenseBase::colwise() and DenseBase::rowwise()
@ -187,11 +187,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
protected: protected:
/** \internal
* \returns the i-th subvector according to the \c Direction */
typedef typename internal::conditional<isVertical, typedef typename internal::conditional<isVertical,
typename ExpressionType::ColXpr, typename ExpressionType::ColXpr,
typename ExpressionType::RowXpr>::type SubVector; typename ExpressionType::RowXpr>::type SubVector;
/** \internal
* \returns the i-th subvector according to the \c Direction */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
SubVector subVector(Index i) SubVector subVector(Index i)
{ {

View File

@ -197,7 +197,7 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
/** \returns the minimum of all coefficients of *this and puts in *row and *col its location. /** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff() * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
*/ */
template<typename Derived> template<typename Derived>
template<typename IndexType> template<typename IndexType>
@ -215,7 +215,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
/** \returns the minimum of all coefficients of *this and puts in *index its location. /** \returns the minimum of all coefficients of *this and puts in *index its location.
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff() * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
*/ */
template<typename Derived> template<typename Derived>
template<typename IndexType> template<typename IndexType>
@ -233,7 +233,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
/** \returns the maximum of all coefficients of *this and puts in *row and *col its location. /** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff() * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
*/ */
template<typename Derived> template<typename Derived>
template<typename IndexType> template<typename IndexType>

View File

@ -10,11 +10,6 @@
#ifndef EIGEN_MATH_FUNCTIONS_AVX_H #ifndef EIGEN_MATH_FUNCTIONS_AVX_H
#define EIGEN_MATH_FUNCTIONS_AVX_H #define EIGEN_MATH_FUNCTIONS_AVX_H
// For some reason, this function didn't make it into the avxintirn.h
// used by the compiler, so we'll just wrap it.
#define _mm256_setr_m128(lo, hi) \
_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)
/* The sin, cos, exp, and log functions of this file are loosely derived from /* The sin, cos, exp, and log functions of this file are loosely derived from
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
*/ */
@ -23,6 +18,28 @@ namespace Eigen {
namespace internal { namespace internal {
inline Packet8i pshiftleft(Packet8i v, int n)
{
#ifdef EIGEN_VECTORIZE_AVX2
return _mm256_slli_epi32(v, n);
#else
__m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
__m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
#endif
}
inline Packet8f pshiftright(Packet8f v, int n)
{
#ifdef EIGEN_VECTORIZE_AVX2
return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
#else
__m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
__m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
#endif
}
// Sine function // Sine function
// Computes sin(x) by wrapping x to the interval [-Pi/4,3*Pi/4] and // Computes sin(x) by wrapping x to the interval [-Pi/4,3*Pi/4] and
// evaluating interpolants in [-Pi/4,Pi/4] or [Pi/4,3*Pi/4]. The interpolants // evaluating interpolants in [-Pi/4,Pi/4] or [Pi/4,3*Pi/4]. The interpolants
@ -54,17 +71,8 @@ psin<Packet8f>(const Packet8f& _x) {
// Make a mask for the entries that need flipping, i.e. wherever the shift // Make a mask for the entries that need flipping, i.e. wherever the shift
// is odd. // is odd.
Packet8i shift_ints = _mm256_cvtps_epi32(shift); Packet8i shift_ints = _mm256_cvtps_epi32(shift);
Packet8i shift_isodd = Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
_mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one))); Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31);
#ifdef EIGEN_VECTORIZE_AVX2
Packet8i sign_flip_mask = _mm256_slli_epi32(shift_isodd, 31);
#else
__m128i lo =
_mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 0), 31);
__m128i hi =
_mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 1), 31);
Packet8i sign_flip_mask = _mm256_setr_m128(lo, hi);
#endif
// Create a mask for which interpolant to use, i.e. if z > 1, then the mask // Create a mask for which interpolant to use, i.e. if z > 1, then the mask
// is set to ones for that entry. // is set to ones for that entry.
@ -142,15 +150,7 @@ plog<Packet8f>(const Packet8f& _x) {
// Truncate input values to the minimum positive normal. // Truncate input values to the minimum positive normal.
x = pmax(x, p8f_min_norm_pos); x = pmax(x, p8f_min_norm_pos);
// Extract the shifted exponents (No bitwise shifting in regular AVX, so Packet8f emm0 = pshiftright(x,23);
// convert to SSE and do it there).
#ifdef EIGEN_VECTORIZE_AVX2
Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(x), 23));
#else
__m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 0), 23);
__m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 1), 23);
Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_setr_m128(lo, hi));
#endif
Packet8f e = _mm256_sub_ps(emm0, p8f_126f); Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
// Set the exponents to -1, i.e. x are in the range [0.5,1). // Set the exponents to -1, i.e. x are in the range [0.5,1).
@ -259,18 +259,61 @@ pexp<Packet8f>(const Packet8f& _x) {
// Build emm0 = 2^m. // Build emm0 = 2^m.
Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127)); Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
#ifdef EIGEN_VECTORIZE_AVX2 emm0 = pshiftleft(emm0, 23);
emm0 = _mm256_slli_epi32(emm0, 23);
#else
__m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 0), 23);
__m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 1), 23);
emm0 = _mm256_setr_m128(lo, hi);
#endif
// Return 2^m * exp(r). // Return 2^m * exp(r).
return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x); return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
} }
// Hyperbolic Tangent function.
// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
// fl(tanh(x)) = +/-1.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
ptanh<Packet8f>(const Packet8f& _x) {
// Clamp the inputs to the range [-9, 9] since anything outside
// this range is +/-1.0f in single-precision.
_EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f);
_EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f);
const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x));
// The monomial coefficients of the numerator polynomial (odd).
_EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f);
// The monomial coefficients of the denominator polynomial (even).
_EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f);
_EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f);
_EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f);
_EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f);
// Since the polynomials are odd/even, we need x^2.
const Packet8f x2 = pmul(x, x);
// Evaluate the numerator polynomial p.
Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11);
p = pmadd(x2, p, p8f_alpha_9);
p = pmadd(x2, p, p8f_alpha_7);
p = pmadd(x2, p, p8f_alpha_5);
p = pmadd(x2, p, p8f_alpha_3);
p = pmadd(x2, p, p8f_alpha_1);
p = pmul(x, p);
// Evaluate the denominator polynomial p.
Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4);
q = pmadd(x2, q, p8f_beta_2);
q = pmadd(x2, q, p8f_beta_0);
// Divide the numerator by the denominator.
return pdiv(p, q);
}
template <> template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
pexp<Packet4d>(const Packet4d& _x) { pexp<Packet4d>(const Packet4d& _x) {

View File

@ -68,6 +68,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasExp = 1, HasExp = 1,
HasSqrt = 1, HasSqrt = 1,
HasRsqrt = 1, HasRsqrt = 1,
HasTanh = EIGEN_FAST_MATH,
HasBlend = 1, HasBlend = 1,
HasRound = 1, HasRound = 1,
HasFloor = 1, HasFloor = 1,

View File

@ -0,0 +1,497 @@
// Standard 16-bit float type, mostly useful for GPUs. Defines a new
// class Eigen::half (inheriting from CUDA's __half struct) with
// operator overloads such that it behaves basically as an arithmetic
// type. It will be quite slow on CPUs (so it is recommended to stay
// in fp32 for CPUs, except for simple parameter conversions, I/O
// to disk and the likes), but fast on GPUs.
//
//
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
//
// The conversion routines are Copyright (c) Fabian Giesen, 2016.
// The original license follows:
//
// Copyright (c) Fabian Giesen, 2016
// All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef EIGEN_HALF_CUDA_H
#define EIGEN_HALF_CUDA_H
#if __cplusplus > 199711L
#define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type()
#else
#define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type()
#endif
#if !defined(EIGEN_HAS_CUDA_FP16)
// Make our own __half definition that is similar to CUDA's.
struct __half {
unsigned short x;
};
#endif
namespace Eigen {
namespace internal {
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
} // end namespace internal
// Class definition.
struct half : public __half {
EIGEN_DEVICE_FUNC half() {}
EIGEN_DEVICE_FUNC half(const __half& h) : __half(h) {}
EIGEN_DEVICE_FUNC half(const half& h) : __half(h) {}
explicit EIGEN_DEVICE_FUNC half(bool b)
: __half(internal::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
explicit EIGEN_DEVICE_FUNC half(int i)
: __half(internal::float_to_half_rtne(static_cast<float>(i))) {}
explicit EIGEN_DEVICE_FUNC half(long l)
: __half(internal::float_to_half_rtne(static_cast<float>(l))) {}
explicit EIGEN_DEVICE_FUNC half(long long ll)
: __half(internal::float_to_half_rtne(static_cast<float>(ll))) {}
explicit EIGEN_DEVICE_FUNC half(float f)
: __half(internal::float_to_half_rtne(f)) {}
explicit EIGEN_DEVICE_FUNC half(double d)
: __half(internal::float_to_half_rtne(static_cast<float>(d))) {}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
// +0.0 and -0.0 become false, everything else becomes true.
return (x & 0x7fff) != 0;
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
return static_cast<signed char>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
return static_cast<unsigned char>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
return static_cast<short>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const {
return static_cast<unsigned short>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
return static_cast<int>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
return static_cast<unsigned int>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
return static_cast<long>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
return static_cast<unsigned long>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
return static_cast<long long>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
return static_cast<unsigned long long>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
return internal::half_to_float(*this);
}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
return static_cast<double>(internal::half_to_float(*this));
}
EIGEN_DEVICE_FUNC half& operator=(const half& other) {
x = other.x;
return *this;
}
};
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
// Intrinsics for native fp16 support. Note that on current hardware,
// these are no faster than fp32 arithmetic (you need to use the half2
// versions to get the ALU speed increased), but you do save the
// conversion steps back and forth.
__device__ half operator + (const half& a, const half& b) {
return __hadd(a, b);
}
__device__ half operator * (const half& a, const half& b) {
return __hmul(a, b);
}
__device__ half operator - (const half& a, const half& b) {
return __hsub(a, b);
}
__device__ half operator / (const half& a, const half& b) {
float num = __half2float(a);
float denom = __half2float(b);
return __float2half(num / denom);
}
__device__ half operator - (const half& a) {
return __hneg(a);
}
__device__ half& operator += (half& a, const half& b) {
a = a + b;
return a;
}
__device__ half& operator *= (half& a, const half& b) {
a = a * b;
return a;
}
__device__ half& operator -= (half& a, const half& b) {
a = a - b;
return a;
}
__device__ half& operator /= (half& a, const half& b) {
a = a / b;
return a;
}
__device__ bool operator == (const half& a, const half& b) {
return __heq(a, b);
}
__device__ bool operator != (const half& a, const half& b) {
return __hne(a, b);
}
__device__ bool operator < (const half& a, const half& b) {
return __hlt(a, b);
}
__device__ bool operator <= (const half& a, const half& b) {
return __hle(a, b);
}
__device__ bool operator > (const half& a, const half& b) {
return __hgt(a, b);
}
__device__ bool operator >= (const half& a, const half& b) {
return __hge(a, b);
}
#else // Emulate support for half floats
// Definitions for CPUs and older CUDA, mostly working through conversion
// to/from fp32.
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
return half(float(a) + float(b));
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
return half(float(a) * float(b));
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
return half(float(a) - float(b));
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
return half(float(a) / float(b));
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
half result;
result.x = a.x ^ 0x8000;
return result;
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
a = half(float(a) + float(b));
return a;
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
a = half(float(a) * float(b));
return a;
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
a = half(float(a) - float(b));
return a;
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
a = half(float(a) / float(b));
return a;
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
return float(a) == float(b);
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
return float(a) != float(b);
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
return float(a) < float(b);
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
return float(a) <= float(b);
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
return float(a) > float(b);
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
return float(a) >= float(b);
}
#endif // Emulate support for half floats
// Division by an index. Do it in full float precision to avoid accuracy
// issues in converting the denominator to half.
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
return Eigen::half(static_cast<float>(a) / static_cast<float>(b));
}
// Conversion routines, including fallbacks for the host or older CUDA.
// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
// these in hardware. If we need more performance on older/other CPUs, they are
// also possible to vectorize directly.
namespace internal {
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
__half h;
h.x = x;
return h;
}
union FP32 {
unsigned int u;
float f;
};
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float2half(ff);
#elif defined(EIGEN_HAS_FP16_C)
__half h;
h.x = _cvtss_sh(ff, 0);
return h;
#else
FP32 f; f.f = ff;
const FP32 f32infty = { 255 << 23 };
const FP32 f16max = { (127 + 16) << 23 };
const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
unsigned int sign_mask = 0x80000000u;
__half o = { 0 };
unsigned int sign = f.u & sign_mask;
f.u ^= sign;
// NOTE all the integer compares in this function can be safely
// compiled into signed compares since all operands are below
// 0x80000000. Important if you want fast straight SSE2 code
// (since there's no unsigned PCMPGTD).
if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
} else { // (De)normalized number or zero
if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
// use a magic value to align our 10 mantissa bits at the bottom of
// the float. as long as FP addition is round-to-nearest-even this
// just works.
f.f += denorm_magic.f;
// and one integer subtract of the bias later, we have our final float!
o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
} else {
unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
// update exponent, rounding bias part 1
f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
// rounding bias part 2
f.u += mant_odd;
// take the bits!
o.x = static_cast<unsigned short>(f.u >> 13);
}
}
o.x |= static_cast<unsigned short>(sign >> 16);
return o;
#endif
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __half2float(h);
#elif defined(EIGEN_HAS_FP16_C)
return _cvtsh_ss(h.x);
#else
const FP32 magic = { 113 << 23 };
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
FP32 o;
o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
unsigned int exp = shifted_exp & o.u; // just the exponent
o.u += (127 - 15) << 23; // exponent adjust
// handle exponent special cases
if (exp == shifted_exp) { // Inf/NaN?
o.u += (128 - 16) << 23; // extra exp adjust
} else if (exp == 0) { // Zero/Denormal?
o.u += 1 << 23; // extra exp adjust
o.f -= magic.f; // renormalize
}
o.u |= (h.x & 0x8000) << 16; // sign bit
return o.f;
#endif
}
} // end namespace internal
// Traits.
namespace internal {
template<> struct is_arithmetic<half> { enum { value = true }; };
} // end namespace internal
template<> struct NumTraits<Eigen::half>
: GenericNumTraits<Eigen::half>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
return internal::raw_uint16_to_half(0x0800);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return half(1e-3f); }
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
return internal::raw_uint16_to_half(0x7bff);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
return internal::raw_uint16_to_half(0xfbff);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
return internal::raw_uint16_to_half(0x7c00);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
return internal::raw_uint16_to_half(0x7c01);
}
};
// Infinity/NaN checks.
namespace numext {
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const Eigen::half& a) {
return (a.x & 0x7fff) == 0x7c00;
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const Eigen::half& a) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hisnan(a);
#else
return (a.x & 0x7fff) > 0x7c00;
#endif
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const Eigen::half& a) {
return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a);
}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half abs(const Eigen::half& a) {
Eigen::half result;
result.x = a.x & 0x7FFF;
return result;
}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exp(const Eigen::half& a) {
return Eigen::half(::expf(float(a)));
}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::half& a) {
return Eigen::half(::logf(float(a)));
}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) {
return Eigen::half(::sqrtf(float(a)));
}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half pow(const Eigen::half& a, const Eigen::half& b) {
return Eigen::half(::powf(float(a), float(b)));
}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) {
return Eigen::half(::floorf(float(a)));
}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceil(const Eigen::half& a) {
return Eigen::half(::ceilf(float(a)));
}
} // end namespace numext
} // end namespace Eigen
// Standard mathematical functions and trancendentals.
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) {
Eigen::half result;
result.x = a.x & 0x7FFF;
return result;
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
return Eigen::half(::expf(float(a)));
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
return Eigen::half(::logf(float(a)));
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
return Eigen::half(::sqrtf(float(a)));
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
return Eigen::half(::powf(float(a), float(b)));
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
return Eigen::half(::floorf(float(a)));
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) {
return Eigen::half(::ceilf(float(a)));
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isnan)(const Eigen::half& a) {
return (Eigen::numext::isnan)(a);
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isinf)(const Eigen::half& a) {
return (Eigen::numext::isinf)(a);
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isfinite)(const Eigen::half& a) {
return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a);
}
namespace std {
#if __cplusplus > 199711L
template <>
struct hash<Eigen::half> {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
return static_cast<std::size_t>(a.x);
}
};
#endif
} // end namespace std
// Add the missing shfl_xor intrinsic
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
}
#endif
// ldg() has an overload for __half, but we also need one for Eigen::half.
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
return Eigen::internal::raw_uint16_to_half(
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
}
#endif
#endif // EIGEN_HALF_CUDA_H

View File

@ -66,6 +66,121 @@ double2 prsqrt<double2>(const double2& a)
return make_double2(rsqrt(a.x), rsqrt(a.y)); return make_double2(rsqrt(a.x), rsqrt(a.y));
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 plgamma<float4>(const float4& a)
{
return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 plgamma<double2>(const double2& a)
{
return make_double2(lgamma(a.x), lgamma(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pdigamma<float4>(const float4& a)
{
using numext::digamma;
return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pdigamma<double2>(const double2& a)
{
using numext::digamma;
return make_double2(digamma(a.x), digamma(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pzeta<float4>(const float4& x, const float4& q)
{
using numext::zeta;
return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pzeta<double2>(const double2& x, const double2& q)
{
using numext::zeta;
return make_double2(zeta(x.x, q.x), zeta(x.y, q.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 ppolygamma<float4>(const float4& n, const float4& x)
{
using numext::polygamma;
return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 ppolygamma<double2>(const double2& n, const double2& x)
{
using numext::polygamma;
return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 perf<float4>(const float4& a)
{
return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 perf<double2>(const double2& a)
{
return make_double2(erf(a.x), erf(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 perfc<float4>(const float4& a)
{
return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 perfc<double2>(const double2& a)
{
return make_double2(erfc(a.x), erfc(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pigamma<float4>(const float4& a, const float4& x)
{
using numext::igamma;
return make_float4(
igamma(a.x, x.x),
igamma(a.y, x.y),
igamma(a.z, x.z),
igamma(a.w, x.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pigamma<double2>(const double2& a, const double2& x)
{
using numext::igamma;
return make_double2(igamma(a.x, x.x), igamma(a.y, x.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pigammac<float4>(const float4& a, const float4& x)
{
using numext::igammac;
return make_float4(
igammac(a.x, x.x),
igammac(a.y, x.y),
igammac(a.z, x.z),
igammac(a.w, x.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pigammac<double2>(const double2& a, const double2& x)
{
using numext::igammac;
return make_double2(igammac(a.x, x.x), igammac(a.y, x.y));
}
#endif #endif
} // end namespace internal } // end namespace internal

View File

@ -21,7 +21,6 @@ namespace internal {
template<> struct is_arithmetic<float4> { enum { value = true }; }; template<> struct is_arithmetic<float4> { enum { value = true }; };
template<> struct is_arithmetic<double2> { enum { value = true }; }; template<> struct is_arithmetic<double2> { enum { value = true }; };
template<> struct packet_traits<float> : default_packet_traits template<> struct packet_traits<float> : default_packet_traits
{ {
typedef float4 type; typedef float4 type;
@ -39,6 +38,14 @@ template<> struct packet_traits<float> : default_packet_traits
HasExp = 1, HasExp = 1,
HasSqrt = 1, HasSqrt = 1,
HasRsqrt = 1, HasRsqrt = 1,
HasLGamma = 1,
HasDiGamma = 1,
HasZeta = 1,
HasPolygamma = 1,
HasErf = 1,
HasErfc = 1,
HasIgamma = 1,
HasIGammac = 1,
HasBlend = 0, HasBlend = 0,
}; };
@ -59,6 +66,12 @@ template<> struct packet_traits<double> : default_packet_traits
HasExp = 1, HasExp = 1,
HasSqrt = 1, HasSqrt = 1,
HasRsqrt = 1, HasRsqrt = 1,
HasLGamma = 1,
HasDiGamma = 1,
HasErf = 1,
HasErfc = 1,
HasIGamma = 1,
HasIGammac = 1,
HasBlend = 0, HasBlend = 0,
}; };
@ -177,25 +190,39 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to
to[1] = from.y; to[1] = from.y;
} }
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
template<> template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return __ldg((const float4*)from); return __ldg((const float4*)from);
#else
return make_float4(from[0], from[1], from[2], from[3]);
#endif
} }
template<> template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return __ldg((const double2*)from); return __ldg((const double2*)from);
#else
return make_double2(from[0], from[1]);
#endif
} }
template<> template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3)); return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
#else
return make_float4(from[0], from[1], from[2], from[3]);
#endif
} }
template<> template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return make_double2(__ldg(from+0), __ldg(from+1)); return make_double2(__ldg(from+0), __ldg(from+1));
} #else
return make_double2(from[0], from[1]);
#endif #endif
}
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) { template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]); return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
@ -251,6 +278,35 @@ template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a)
return a.x * a.y; return a.x * a.y;
} }
template<size_t offset>
struct protate_impl<offset, float4>
{
static float4 run(const float4& a) {
if (offset == 0) {
return make_float4(a.x, a.y, a.z, a.w);
}
if (offset == 1) {
return make_float4(a.w, a.x, a.y, a.z);
}
if (offset == 2) {
return make_float4(a.z, a.w, a.x, a.y);
}
return make_float4(a.y, a.z, a.w, a.x);
}
};
template<size_t offset>
struct protate_impl<offset, double2>
{
static double2 run(const double2& a) {
if (offset == 0) {
return make_double2(a.x, a.y);
}
return make_double2(a.y, a.x);
}
};
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) { template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
} }
@ -258,7 +314,6 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
return make_double2(fabs(a.x), fabs(a.y)); return make_double2(fabs(a.x), fabs(a.y));
} }
EIGEN_DEVICE_FUNC inline void EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<float4,4>& kernel) { ptranspose(PacketBlock<float4,4>& kernel) {
double tmp = kernel.packet[0].y; double tmp = kernel.packet[0].y;

View File

@ -0,0 +1,192 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_PACKET_MATH_HALF_CUDA_H
#define EIGEN_PACKET_MATH_HALF_CUDA_H
#if defined(EIGEN_HAS_CUDA_FP16)
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
// Most of the following operations require arch >= 5.3
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
namespace Eigen {
namespace internal {
template<> struct is_arithmetic<half2> { enum { value = true }; };
template<> struct packet_traits<half> : default_packet_traits
{
typedef half2 type;
typedef half2 half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=2,
HasHalfPacket = 0,
HasDiv = 1
};
};
template<> struct unpacket_traits<half2> { typedef half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const half& from) {
return __half2half2(from);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const half* from) {
return *reinterpret_cast<const half2*>(from);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const half* from) {
return __halves2half2(from[0], from[1]);
}
template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const half* from) {
return __halves2half2(from[0], from[0]);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<half>(half* to, const half2& from) {
*reinterpret_cast<half2*>(to) = from;
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, const half2& from) {
to[0] = __low2half(from);
to[1] = __high2half(from);
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) {
return __ldg((const half2*)from);
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) {
return __halves2half2(__ldg(from+0), __ldg(from+1));
}
template<> EIGEN_DEVICE_FUNC inline half2 pgather<half, half2>(const half* from, Index stride) {
return __halves2half2(from[0*stride], from[1*stride]);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<half, half2>(half* to, const half2& from, Index stride) {
to[stride*0] = __low2half(from);
to[stride*1] = __high2half(from);
}
template<> EIGEN_DEVICE_FUNC inline half pfirst<half2>(const half2& a) {
return __low2half(a);
}
template<> EIGEN_DEVICE_FUNC inline half2 pabs<half2>(const half2& a) {
half2 result;
result.x = a.x & 0x7FFF7FFF;
return result;
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<half2,2>& kernel) {
half a1 = __low2half(kernel.packet[0]);
half a2 = __high2half(kernel.packet[0]);
half b1 = __low2half(kernel.packet[1]);
half b2 = __high2half(kernel.packet[1]);
kernel.packet[0] = __halves2half2(a1, b1);
kernel.packet[1] = __halves2half2(a2, b2);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) {
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
return __hadd2(a, b);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
return __hsub2(a, b);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
return __hneg2(a);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
return __hmul2(a, b);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
return __hfma2(a, b, c);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
float b2 = __high2float(b);
float r1 = a1 / b1;
float r2 = a2 / b2;
return __floats2half2_rn(r1, r2);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
float b2 = __high2float(b);
half r1 = a1 < b1 ? __low2half(a) : __low2half(b);
half r2 = a2 < b2 ? __high2half(a) : __high2half(b);
return __halves2half2(r1, r2);
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
float b2 = __high2float(b);
half r1 = a1 > b1 ? __low2half(a) : __low2half(b);
half r2 = a2 > b2 ? __high2half(a) : __high2half(b);
return __halves2half2(r1, r2);
}
template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) {
return __hadd(__low2half(a), __high2half(a));
}
template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) {
half first = __low2half(a);
half second = __high2half(a);
return __hgt(first, second) ? first : second;
}
template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) {
half first = __low2half(a);
half second = __high2half(a);
return __hlt(first, second) ? first : second;
}
template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) {
return __hmul(__low2half(a), __high2half(a));
}
} // end namespace internal
} // end namespace Eigen
#endif
#endif
#endif
#endif // EIGEN_PACKET_MATH_HALF_CUDA_H

View File

@ -0,0 +1,112 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TYPE_CASTING_CUDA_H
#define EIGEN_TYPE_CASTING_CUDA_H
namespace Eigen {
namespace internal {
#if defined(EIGEN_HAS_CUDA_FP16)
template<>
struct scalar_cast_op<float, half> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
typedef half result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const float& a) const {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float2half(a);
#else
return half(a);
#endif
}
};
template<>
struct functor_traits<scalar_cast_op<float, half> >
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
template<>
struct scalar_cast_op<int, half> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
typedef half result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const int& a) const {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float2half(static_cast<float>(a));
#else
return half(static_cast<float>(a));
#endif
}
};
template<>
struct functor_traits<scalar_cast_op<int, half> >
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
template<>
struct scalar_cast_op<half, float> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
typedef float result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const half& a) const {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __half2float(a);
#else
return static_cast<float>(a);
#endif
}
};
template<>
struct functor_traits<scalar_cast_op<half, float> >
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
template <>
struct type_casting_traits<half, float> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 2,
TgtCoeffRatio = 1
};
};
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
float2 r1 = __half22float2(a);
float2 r2 = __half22float2(b);
return make_float4(r1.x, r1.y, r2.x, r2.y);
}
template <>
struct type_casting_traits<float, half> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 2
};
};
template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
// Simply discard the second half of the input
return __float22half2_rn(make_float2(a.x, a.y));
}
#endif
#endif
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TYPE_CASTING_CUDA_H

View File

@ -177,7 +177,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
return pset1<Packet4i>(0); return pset1<Packet4i>(0);
} }
#ifdef __ARM_FEATURE_FMA // Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available,
// then implements a slow software scalar fallback calling fmaf()!
// Filed LLVM bug:
// https://llvm.org/bugs/show_bug.cgi?id=27216
#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
// See bug 936. // See bug 936.
// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4. // FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding. // FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.
@ -186,7 +190,27 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
// MLA: 10 GFlop/s ; FMA: 12 GFlops/s. // MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
#else #else
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
// Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu,
// at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on
// -march=armv7-a, that is a very common case.
// See e.g. this thread:
// http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html
// Filed LLVM bug:
// https://llvm.org/bugs/show_bug.cgi?id=27219
Packet4f r = c;
asm volatile(
"vmla.f32 %q[r], %q[a], %q[b]"
: [r] "+w" (r)
: [a] "w" (a),
[b] "w" (b)
: );
return r;
#else
return vmlaq_f32(c,a,b);
#endif
}
#endif #endif
// No FMA instruction for int, so use MLA unconditionally. // No FMA instruction for int, so use MLA unconditionally.
@ -532,20 +556,21 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
#if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__)
// Bug 907: workaround missing declarations of the following two functions in the ADK // Bug 907: workaround missing declarations of the following two functions in the ADK
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) // Defining these functions as templates ensures that if these intrinsics are
vreinterpretq_u64_f64 (float64x2_t __a) // already defined in arm_neon.h, then our workaround doesn't cause a conflict
// and has lower priority in overload resolution.
template <typename T>
uint64x2_t vreinterpretq_u64_f64(T a)
{ {
return (uint64x2_t) __a; return (uint64x2_t) a;
} }
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) template <typename T>
vreinterpretq_f64_u64 (uint64x2_t __a) float64x2_t vreinterpretq_f64_u64(T a)
{ {
return (float64x2_t) __a; return (float64x2_t) a;
} }
#endif
typedef float64x2_t Packet2d; typedef float64x2_t Packet2d;
typedef float64x1_t Packet1d; typedef float64x1_t Packet1d;

View File

@ -255,7 +255,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1))))); return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
} }
EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x) EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
{ {
return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2)); return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
} }
@ -456,7 +456,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, con
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1)))); return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
} }
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) EIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x)
{ {
return Packet1cd(preverse(Packet2d(x.v))); return Packet1cd(preverse(Packet2d(x.v)));
} }

View File

@ -516,8 +516,81 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x)); return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
} }
// Hyperbolic Tangent function.
// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
// fl(tanh(x)) = +/-1.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
ptanh<Packet4f>(const Packet4f& _x) {
// Clamp the inputs to the range [-9, 9] since anything outside
// this range is +/-1.0f in single-precision.
_EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f);
_EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f);
const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x));
// The monomial coefficients of the numerator polynomial (odd).
_EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
// The monomial coefficients of the denominator polynomial (even).
_EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f);
_EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f);
_EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f);
_EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f);
// Since the polynomials are odd/even, we need x^2.
const Packet4f x2 = pmul(x, x);
// Evaluate the numerator polynomial p.
Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11);
p = pmadd(x2, p, p4f_alpha_9);
p = pmadd(x2, p, p4f_alpha_7);
p = pmadd(x2, p, p4f_alpha_5);
p = pmadd(x2, p, p4f_alpha_3);
p = pmadd(x2, p, p4f_alpha_1);
p = pmul(x, p);
// Evaluate the denominator polynomial p.
Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4);
q = pmadd(x2, q, p4f_beta_2);
q = pmadd(x2, q, p4f_beta_0);
// Divide the numerator by the denominator.
return pdiv(p, q);
}
} // end namespace internal } // end namespace internal
namespace numext {
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float sqrt(const float &x)
{
return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double sqrt(const double &x)
{
#if EIGEN_COMP_GNUC_STRICT
// This works around a GCC bug generating poor code for _mm_sqrt_pd
// See https://bitbucket.org/eigen/eigen/commits/14f468dba4d350d7c19c9b93072e19f7b3df563b
return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
#else
return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
#endif
}
} // end namespace numex
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_SSE_H #endif // EIGEN_MATH_FUNCTIONS_SSE_H

View File

@ -109,6 +109,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasExp = 1, HasExp = 1,
HasSqrt = 1, HasSqrt = 1,
HasRsqrt = 1, HasRsqrt = 1,
HasTanh = EIGEN_FAST_MATH,
HasBlend = 1 HasBlend = 1
#ifdef EIGEN_VECTORIZE_SSE4_1 #ifdef EIGEN_VECTORIZE_SSE4_1
@ -314,58 +315,27 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
return _mm_loadu_ps(from); return _mm_loadu_ps(from);
#endif #endif
} }
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
#else #else
// NOTE: with the code below, MSVC's compiler crashes! // NOTE: with the code below, MSVC's compiler crashes!
#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8)))
// bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#elif EIGEN_COMP_CLANG
// bug 201: Segfaults in __mm_loadh_pd with clang 2.8
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#else
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
#endif
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{ {
EIGEN_DEBUG_UNALIGNED_LOAD EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_ps(from); return _mm_loadu_ps(from);
#else
__m128d res;
res = _mm_load_sd((const double*)(from)) ;
res = _mm_loadh_pd(res, (const double*)(from+2)) ;
return _mm_castpd_ps(res);
#endif
} }
#endif
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
{ {
EIGEN_DEBUG_UNALIGNED_LOAD EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_pd(from); return _mm_loadu_pd(from);
#else
__m128d res;
res = _mm_load_sd(from) ;
res = _mm_loadh_pd(res,from+1);
return res;
#endif
} }
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{ {
EIGEN_DEBUG_UNALIGNED_LOAD EIGEN_DEBUG_UNALIGNED_LOAD
#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
#else
__m128d res;
res = _mm_load_sd((const double*)(from)) ;
res = _mm_loadh_pd(res, (const double*)(from+2)) ;
return _mm_castpd_si128(res);
#endif
} }
#endif
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{ {

View File

@ -0,0 +1,6 @@
FILE(GLOB Eigen_Core_arch_ZVector_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_ZVector_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/ZVector COMPONENT Devel
)

View File

@ -0,0 +1,201 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_COMPLEX32_ALTIVEC_H
#define EIGEN_COMPLEX32_ALTIVEC_H
namespace Eigen {
namespace internal {
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
struct Packet1cd
{
EIGEN_STRONG_INLINE Packet1cd() {}
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
Packet2d v;
};
template<> struct packet_traits<std::complex<double> > : default_packet_traits
{
typedef Packet1cd type;
typedef Packet1cd half;
enum {
Vectorizable = 1,
AlignedOnScalar = 0,
size = 1,
HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
{
std::complex<double> EIGEN_ALIGN16 af[2];
af[0] = from[0*stride];
af[1] = from[1*stride];
return pload<Packet1cd>(af);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
{
std::complex<double> EIGEN_ALIGN16 af[2];
pstore<std::complex<double> >(af, from);
to[0*stride] = af[0];
to[1*stride] = af[1];
}
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
Packet2d a_re, a_im, v1, v2;
// Permute and multiply the real parts of a and b
a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
// Get the imaginary parts of a
a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
// multiply a_re * b
v1 = vec_madd(a_re, b.v, p2d_ZERO);
// multiply a_im * b and get the conjugate result
v2 = vec_madd(a_im, b.v, p2d_ZERO);
v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
return Packet1cd(v1 + v2);
}
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
{
return pset1<Packet1cd>(*from);
}
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{
std::complex<double> EIGEN_ALIGN16 res[2];
pstore<std::complex<double> >(res, a);
return res[0];
}
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
{
return pfirst(a);
}
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
{
return vecs[0];
}
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
{
return pfirst(a);
}
template<int Offset>
struct palign_impl<Offset,Packet1cd>
{
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
return pconj(internal::pmul(a, b));
}
};
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for AltiVec
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
}
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
{
return Packet1cd(preverse(Packet2d(x.v)));
}
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
{
Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
kernel.packet[0].v = tmp;
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_COMPLEX32_ALTIVEC_H

View File

@ -0,0 +1,110 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2007 Julien Pommier
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/* The sin, cos, exp, and log functions of this file come from
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
*/
#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
namespace Eigen {
namespace internal {
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d pexp<Packet2d>(const Packet2d& _x)
{
Packet2d x = _x;
_EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
_EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
_EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
_EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
_EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
Packet2d tmp, fx;
Packet2l emm0;
// clamp x
x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */
fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
fx = vec_floor(fx);
tmp = pmul(fx, p2d_cephes_exp_C1);
Packet2d z = pmul(fx, p2d_cephes_exp_C2);
x = psub(x, tmp);
x = psub(x, z);
Packet2d x2 = pmul(x,x);
Packet2d px = p2d_cephes_exp_p0;
px = pmadd(px, x2, p2d_cephes_exp_p1);
px = pmadd(px, x2, p2d_cephes_exp_p2);
px = pmul (px, x);
Packet2d qx = p2d_cephes_exp_q0;
qx = pmadd(qx, x2, p2d_cephes_exp_q1);
qx = pmadd(qx, x2, p2d_cephes_exp_q2);
qx = pmadd(qx, x2, p2d_cephes_exp_q3);
x = pdiv(px,psub(qx,px));
x = pmadd(p2d_2,x,p2d_1);
// build 2^n
emm0 = vec_ctsl(fx, 0);
static const Packet2l p2l_1023 = { 1023, 1023 };
static const Packet2ul p2ul_52 = { 52, 52 };
emm0 = emm0 + p2l_1023;
emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52);
// Altivec's max & min operators just drop silent NaNs. Check NaNs in
// inputs and return them unmodified.
Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
isnumber_mask);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d psqrt<Packet2d>(const Packet2d& x)
{
return __builtin_s390_vfsqdb(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d prsqrt<Packet2d>(const Packet2d& x) {
// Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_ALTIVEC_H

View File

@ -0,0 +1,575 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_PACKET_MATH_ZVECTOR_H
#define EIGEN_PACKET_MATH_ZVECTOR_H
#include <stdint.h>
namespace Eigen {
namespace internal {
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
#endif
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
#endif
typedef __vector int Packet4i;
typedef __vector unsigned int Packet4ui;
typedef __vector __bool int Packet4bi;
typedef __vector short int Packet8i;
typedef __vector unsigned char Packet16uc;
typedef __vector double Packet2d;
typedef __vector unsigned long long Packet2ul;
typedef __vector long long Packet2l;
typedef union {
int32_t i[4];
uint32_t ui[4];
int64_t l[2];
uint64_t ul[2];
double d[2];
Packet4i v4i;
Packet4ui v4ui;
Packet2l v2l;
Packet2ul v2ul;
Packet2d v2d;
} Packet;
// We don't want to write the same code all the time, but we need to reuse the constants
// and it doesn't really work to declare them global, so we define macros instead
#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
#define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
#define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
Packet4i p4i_##NAME = pset1<Packet4i>(X)
#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
Packet2d p2d_##NAME = pset1<Packet2d>(X)
#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
Packet2l p2l_##NAME = pset1<Packet2l>(X)
// These constants are endian-agnostic
//static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
static Packet2d p2d_ONE = { 1.0, 1.0 };
static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
// Mask alignment
#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
// Handle endianness properly while loading constants
// Define global static constants:
static Packet16uc p16uc_FORWARD = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };
static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
/*static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };*/
static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
/*static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};*/
static Packet16uc p16uc_TRANSPOSE64_HI = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
static Packet16uc p16uc_TRANSPOSE64_LO = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
//static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
//static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
#define EIGEN_ZVECTOR_PREFETCH(ADDR) __builtin_prefetch(ADDR);
#else
#define EIGEN_ZVECTOR_PREFETCH(ADDR) asm( " pfd [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
#endif
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
typedef Packet4i half;
enum {
// FIXME check the Has*
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
HasHalfPacket = 0,
// FIXME check the Has*
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasBlend = 1
};
};
template<> struct packet_traits<double> : default_packet_traits
{
typedef Packet2d type;
typedef Packet2d half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=2,
HasHalfPacket = 1,
// FIXME check the Has*
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasMin = 1,
HasMax = 1,
HasAbs = 1,
HasSin = 0,
HasCos = 0,
HasLog = 0,
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasRound = 1,
HasFloor = 1,
HasCeil = 1,
HasNegate = 1,
HasBlend = 1
};
};
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
{
Packet vt;
vt.v4i = v;
s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3];
return s;
}
inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
{
Packet vt;
vt.v4ui = v;
s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3];
return s;
}
inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
{
Packet vt;
vt.v2l = v;
s << vt.l[0] << ", " << vt.l[1];
return s;
}
inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v)
{
Packet vt;
vt.v2ul = v;
s << vt.ul[0] << ", " << vt.ul[1] ;
return s;
}
inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
{
Packet vt;
vt.v2d = v;
s << vt.d[0] << ", " << vt.d[1];
return s;
}
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
switch (Offset % 4) {
case 1:
first = vec_sld(first, second, 4); break;
case 2:
first = vec_sld(first, second, 8); break;
case 3:
first = vec_sld(first, second, 12); break;
}
}
};
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
{
if (Offset == 1)
first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8));
}
};
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_LOAD
Packet *vfrom;
vfrom = (Packet *) from;
return vfrom->v4i;
}
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_LOAD
Packet *vfrom;
vfrom = (Packet *) from;
return vfrom->v2d;
}
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_STORE
Packet *vto;
vto = (Packet *) to;
vto->v4i = from;
}
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_STORE
Packet *vto;
vto = (Packet *) to;
vto->v2d = from;
}
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
{
return vec_splats(from);
}
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
return vec_splats(from);
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4i>(const int *a,
Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
{
a3 = pload<Packet4i>(a);
a0 = vec_splat(a3, 0);
a1 = vec_splat(a3, 1);
a2 = vec_splat(a3, 2);
a3 = vec_splat(a3, 3);
}
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet2d>(const double *a,
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
{
a1 = pload<Packet2d>(a);
a0 = vec_splat(a1, 0);
a1 = vec_splat(a1, 1);
a3 = pload<Packet2d>(a+2);
a2 = vec_splat(a3, 0);
a3 = vec_splat(a3, 1);
}
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
{
int EIGEN_ALIGN16 ai[4];
ai[0] = from[0*stride];
ai[1] = from[1*stride];
ai[2] = from[2*stride];
ai[3] = from[3*stride];
return pload<Packet4i>(ai);
}
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
{
double EIGEN_ALIGN16 af[2];
af[0] = from[0*stride];
af[1] = from[1*stride];
return pload<Packet2d>(af);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
{
int EIGEN_ALIGN16 ai[4];
pstore<int>((int *)ai, from);
to[0*stride] = ai[0];
to[1*stride] = ai[1];
to[2*stride] = ai[2];
to[3*stride] = ai[3];
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
{
double EIGEN_ALIGN16 af[2];
pstore<double>(af, from);
to[0*stride] = af[0];
to[1*stride] = af[1];
}
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
Packet4i p = pload<Packet4i>(from);
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
}
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
{
Packet2d p = pload<Packet2d>(from);
return vec_perm(p, p, p16uc_PSET64_HI);
}
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
{
return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
}
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
{
return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
}
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
Packet4i b, sum;
b = vec_sld(a, a, 8);
sum = padd<Packet4i>(a, b);
b = vec_sld(sum, sum, 4);
sum = padd<Packet4i>(sum, b);
return pfirst(sum);
}
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
{
Packet2d b, sum;
b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));
sum = padd<Packet2d>(a, b);
return pfirst(sum);
}
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
{
Packet4i v[4], sum[4];
// It's easier and faster to transpose then add as columns
// Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
// Do the transpose, first set of moves
v[0] = vec_mergeh(vecs[0], vecs[2]);
v[1] = vec_mergel(vecs[0], vecs[2]);
v[2] = vec_mergeh(vecs[1], vecs[3]);
v[3] = vec_mergel(vecs[1], vecs[3]);
// Get the resulting vectors
sum[0] = vec_mergeh(v[0], v[2]);
sum[1] = vec_mergel(v[0], v[2]);
sum[2] = vec_mergeh(v[1], v[3]);
sum[3] = vec_mergel(v[1], v[3]);
// Now do the summation:
// Lines 0+1
sum[0] = padd<Packet4i>(sum[0], sum[1]);
// Lines 2+3
sum[1] = padd<Packet4i>(sum[2], sum[3]);
// Add the results
sum[0] = padd<Packet4i>(sum[0], sum[1]);
return sum[0];
}
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
{
Packet2d v[2], sum;
v[0] = padd<Packet2d>(vecs[0], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[0]), reinterpret_cast<Packet4ui>(vecs[0]), 8)));
v[1] = padd<Packet2d>(vecs[1], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[1]), reinterpret_cast<Packet4ui>(vecs[1]), 8)));
sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v[0]), reinterpret_cast<Packet4ui>(v[1]), 8));
return sum;
}
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
{
EIGEN_ALIGN16 int aux[4];
pstore(aux, a);
return aux[0] * aux[1] * aux[2] * aux[3];
}
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
{
return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
}
// min
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{
Packet4i b, res;
b = pmin<Packet4i>(a, vec_sld(a, a, 8));
res = pmin<Packet4i>(b, vec_sld(b, b, 4));
return pfirst(res);
}
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
{
return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
}
// max
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{
Packet4i b, res;
b = pmax<Packet4i>(a, vec_sld(a, a, 8));
res = pmax<Packet4i>(b, vec_sld(b, b, 4));
return pfirst(res);
}
// max
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
{
return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4i,4>& kernel) {
Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
kernel.packet[0] = vec_mergeh(t0, t2);
kernel.packet[1] = vec_mergel(t0, t2);
kernel.packet[2] = vec_mergeh(t1, t3);
kernel.packet[3] = vec_mergel(t1, t3);
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2d,2>& kernel) {
Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
kernel.packet[0] = t0;
kernel.packet[1] = t1;
}
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
return vec_sel(elsePacket, thenPacket, mask);
}
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
return vec_sel(elsePacket, thenPacket, mask);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_PACKET_MATH_ZVECTOR_H

View File

@ -238,7 +238,13 @@ template<typename Scalar> struct scalar_hypot_op {
}; };
template<typename Scalar> template<typename Scalar>
struct functor_traits<scalar_hypot_op<Scalar> > { struct functor_traits<scalar_hypot_op<Scalar> > {
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 }; enum
{
Cost = 3 * NumTraits<Scalar>::AddCost +
2 * NumTraits<Scalar>::MulCost +
2 * NumTraits<Scalar>::template Div<false>::Cost,
PacketAccess = false
};
}; };
/** \internal /** \internal
@ -297,9 +303,10 @@ template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
}; };
template<typename LhsScalar,typename RhsScalar> template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > { struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
enum { enum {
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost), // rough estimate! PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable,
PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable Cost = NumTraits<result_type>::template Div<PacketAccess>::Cost
}; };
}; };
@ -337,6 +344,55 @@ template<> struct functor_traits<scalar_boolean_or_op> {
}; };
}; };
/** \internal
* \brief Template functor to compute the incomplete gamma function igamma(a, x)
*
* \sa class CwiseBinaryOp, Cwise::igamma
*/
template<typename Scalar> struct scalar_igamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
using numext::igamma; return igamma(a, x);
}
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const {
return internal::pigammac(a, x);
}
};
template<typename Scalar>
struct functor_traits<scalar_igamma_op<Scalar> > {
enum {
// Guesstimate
Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasIGamma
};
};
/** \internal
* \brief Template functor to compute the complementary incomplete gamma function igammac(a, x)
*
* \sa class CwiseBinaryOp, Cwise::igammac
*/
template<typename Scalar> struct scalar_igammac_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
using numext::igammac; return igammac(a, x);
}
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const
{
return internal::pigammac(a, x);
}
};
template<typename Scalar>
struct functor_traits<scalar_igammac_op<Scalar> > {
enum {
// Guesstimate
Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasIGammac
};
};
//---------- binary functors bound to a constant, thus appearing as a unary functor ---------- //---------- binary functors bound to a constant, thus appearing as a unary functor ----------
@ -515,6 +571,10 @@ struct scalar_inverse_mult_op {
{ return internal::pdiv(pset1<Packet>(m_other),a); } { return internal::pdiv(pset1<Packet>(m_other),a); }
Scalar m_other; Scalar m_other;
}; };
template<typename Scalar>
struct functor_traits<scalar_inverse_mult_op<Scalar> >
{ enum { PacketAccess = packet_traits<Scalar>::HasDiv, Cost = NumTraits<Scalar>::template Div<PacketAccess>::Cost }; };
} // end namespace internal } // end namespace internal

View File

@ -37,7 +37,7 @@ template<typename Scalar>
struct functor_traits<scalar_identity_op<Scalar> > struct functor_traits<scalar_identity_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; }; { enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_op_impl; template <typename Scalar, typename Packet, bool RandomAccess, bool IsInteger> struct linspaced_op_impl;
// linear access for packet ops: // linear access for packet ops:
// 1) initialization // 1) initialization
@ -48,12 +48,12 @@ template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp) // TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
// in order to avoid the padd() in operator() ? // in order to avoid the padd() in operator() ?
template <typename Scalar, typename Packet> template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,false> struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
{ {
linspaced_op_impl(const Scalar& low, const Scalar& step) : linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_step(step), m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*step)), m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Packet>(-unpacket_traits<Packet>::size)))) {} m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
template<typename Index> template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
@ -75,11 +75,11 @@ struct linspaced_op_impl<Scalar,Packet,false>
// 1) each step // 1) each step
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
template <typename Scalar, typename Packet> template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,true> struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
{ {
linspaced_op_impl(const Scalar& low, const Scalar& step) : linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_step(step), m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {} m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
template<typename Index> template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
@ -95,6 +95,31 @@ struct linspaced_op_impl<Scalar,Packet,true>
const Packet m_interPacket; const Packet m_interPacket;
}; };
template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
{
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_length(high-low), m_divisor(num_steps==1?1:num_steps-1), m_interPacket(plset<Packet>(0))
{}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Scalar operator() (Index i) const {
return m_low + (m_length*Scalar(i))/m_divisor;
}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Packet packetOp(Index i) const {
return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
pset1<Packet>(m_divisor))); }
const Scalar m_low;
const Scalar m_length;
const Index m_divisor;
const Packet m_interPacket;
};
// ----- Linspace functor ---------------------------------------------------------------- // ----- Linspace functor ----------------------------------------------------------------
// Forward declaration (we default to random access which does not really give // Forward declaration (we default to random access which does not really give
@ -102,10 +127,20 @@ struct linspaced_op_impl<Scalar,Packet,true>
// nested expressions). // nested expressions).
template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op; template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> > template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; }; {
enum
{
Cost = 1,
PacketAccess = packet_traits<Scalar>::HasSetLinear
&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),
IsRepeatable = true
};
};
template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
{ {
linspaced_op(const Scalar& low, const Scalar& high, Index num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {} linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
: impl((num_steps==1 ? high : low),high,num_steps)
{}
template<typename Index> template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
@ -134,7 +169,9 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
// This proxy object handles the actual required temporaries, the different // This proxy object handles the actual required temporaries, the different
// implementations (random vs. sequential access) as well as the // implementations (random vs. sequential access) as well as the
// correct piping to size 2/4 packet operations. // correct piping to size 2/4 packet operations.
const linspaced_op_impl<Scalar,PacketType,RandomAccess> impl; // As long as we don't have a Bresenham-like implementation for linear-access and integer types,
// we have to by-pass RandomAccess for integer types. See bug 698.
const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl;
}; };
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta // all functors allow linear access, except scalar_identity_op. So we fix here a quick meta

View File

@ -41,7 +41,7 @@ struct functor_traits<scalar_opposite_op<Scalar> >
template<typename Scalar> struct scalar_abs_op { template<typename Scalar> struct scalar_abs_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
typedef typename NumTraits<Scalar>::Real result_type; typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pabs(a); } { return internal::pabs(a); }
@ -73,7 +73,7 @@ template<typename Scalar, typename=void> struct abs_knowing_score
EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score) EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
typedef typename NumTraits<Scalar>::Real result_type; typedef typename NumTraits<Scalar>::Real result_type;
template<typename Score> template<typename Score>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { using std::abs; return abs(a); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { return numext::abs(a); }
}; };
template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs> template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>
{ {
@ -230,7 +230,7 @@ struct functor_traits<scalar_imag_ref_op<Scalar> >
*/ */
template<typename Scalar> struct scalar_exp_op { template<typename Scalar> struct scalar_exp_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); } EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::exp(a); }
template <typename Packet> template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
}; };
@ -246,7 +246,7 @@ struct functor_traits<scalar_exp_op<Scalar> >
*/ */
template<typename Scalar> struct scalar_log_op { template<typename Scalar> struct scalar_log_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); } EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log(a); }
template <typename Packet> template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); } EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
}; };
@ -276,7 +276,7 @@ struct functor_traits<scalar_log10_op<Scalar> >
*/ */
template<typename Scalar> struct scalar_sqrt_op { template<typename Scalar> struct scalar_sqrt_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); } EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sqrt(a); }
template <typename Packet> template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
}; };
@ -294,7 +294,7 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
*/ */
template<typename Scalar> struct scalar_rsqrt_op { template<typename Scalar> struct scalar_rsqrt_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return Scalar(1)/sqrt(a); } EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar(1)/numext::sqrt(a); }
template <typename Packet> template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); } EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
}; };
@ -403,6 +403,143 @@ struct functor_traits<scalar_asin_op<Scalar> >
}; };
}; };
/** \internal
* \brief Template functor to compute the natural log of the absolute
* value of Gamma of a scalar
* \sa class CwiseUnaryOp, Cwise::lgamma()
*/
template<typename Scalar> struct scalar_lgamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
using numext::lgamma; return lgamma(a);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); }
};
template<typename Scalar>
struct functor_traits<scalar_lgamma_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasLGamma
};
};
/** \internal
* \brief Template functor to compute psi, the derivative of lgamma of a scalar.
* \sa class CwiseUnaryOp, Cwise::digamma()
*/
template<typename Scalar> struct scalar_digamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
using numext::digamma; return digamma(a);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); }
};
template<typename Scalar>
struct functor_traits<scalar_digamma_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasDiGamma
};
};
/** \internal
* \brief Template functor to compute the Riemann Zeta function of two arguments.
* \sa class CwiseUnaryOp, Cwise::zeta()
*/
template<typename Scalar> struct scalar_zeta_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const {
using numext::zeta; return zeta(x, q);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); }
};
template<typename Scalar>
struct functor_traits<scalar_zeta_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasZeta
};
};
/** \internal
* \brief Template functor to compute the polygamma function.
* \sa class CwiseUnaryOp, Cwise::polygamma()
*/
template<typename Scalar> struct scalar_polygamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const {
using numext::polygamma; return polygamma(n, x);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); }
};
template<typename Scalar>
struct functor_traits<scalar_polygamma_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasPolygamma
};
};
/** \internal
* \brief Template functor to compute the Gauss error function of a
* scalar
* \sa class CwiseUnaryOp, Cwise::erf()
*/
template<typename Scalar> struct scalar_erf_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
using numext::erf; return erf(a);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perf(a); }
};
template<typename Scalar>
struct functor_traits<scalar_erf_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasErf
};
};
/** \internal
* \brief Template functor to compute the Complementary Error Function
* of a scalar
* \sa class CwiseUnaryOp, Cwise::erfc()
*/
template<typename Scalar> struct scalar_erfc_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
using numext::erfc; return erfc(a);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perfc(a); }
};
template<typename Scalar>
struct functor_traits<scalar_erfc_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasErfc
};
};
/** \internal /** \internal
* \brief Template functor to compute the atan of a scalar * \brief Template functor to compute the atan of a scalar
* \sa class CwiseUnaryOp, ArrayBase::atan() * \sa class CwiseUnaryOp, ArrayBase::atan()
@ -422,6 +559,7 @@ struct functor_traits<scalar_atan_op<Scalar> >
}; };
}; };
/** \internal /** \internal
* \brief Template functor to compute the tanh of a scalar * \brief Template functor to compute the tanh of a scalar
* \sa class CwiseUnaryOp, ArrayBase::tanh() * \sa class CwiseUnaryOp, ArrayBase::tanh()
@ -572,7 +710,7 @@ struct functor_traits<scalar_floor_op<Scalar> >
template<typename Scalar> struct scalar_ceil_op { template<typename Scalar> struct scalar_ceil_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
typedef typename packet_traits<Scalar>::type Packet; template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); } EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
}; };
template<typename Scalar> template<typename Scalar>
@ -660,10 +798,10 @@ struct functor_traits<scalar_boolean_not_op<Scalar> > {
* \sa class CwiseUnaryOp, Cwise::sign() * \sa class CwiseUnaryOp, Cwise::sign()
*/ */
template<typename Scalar,bool iscpx=(NumTraits<Scalar>::IsComplex!=0) > struct scalar_sign_op; template<typename Scalar,bool iscpx=(NumTraits<Scalar>::IsComplex!=0) > struct scalar_sign_op;
template<typename Scalar> template<typename Scalar>
struct scalar_sign_op<Scalar,false> { struct scalar_sign_op<Scalar,false> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
{ {
return Scalar( (a>Scalar(0)) - (a<Scalar(0)) ); return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
} }
@ -671,17 +809,16 @@ struct scalar_sign_op<Scalar,false> {
//template <typename Packet> //template <typename Packet>
//EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); } //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
}; };
template<typename Scalar> template<typename Scalar>
struct scalar_sign_op<Scalar,true> { struct scalar_sign_op<Scalar,true> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
{ {
using std::abs;
typedef typename NumTraits<Scalar>::Real real_type; typedef typename NumTraits<Scalar>::Real real_type;
real_type aa = abs(a); real_type aa = numext::abs(a);
if (aa==0) if (aa==0)
return Scalar(0); return Scalar(0);
aa = 1./aa; aa = 1./aa;
return Scalar(real(a)*aa, imag(a)*aa ); return Scalar(real(a)*aa, imag(a)*aa );
} }
//TODO //TODO

View File

@ -178,7 +178,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
// We also include a register-level block of the result (mx x nr). // We also include a register-level block of the result (mx x nr).
// (In an ideal world only the lhs panel would stay in L1) // (In an ideal world only the lhs panel would stay in L1)
// Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of: // Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of:
const Index max_kc = ((l1-k_sub)/k_div) & (~(k_peeling-1)); const Index max_kc = std::max<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
const Index old_k = k; const Index old_k = k;
if(k>max_kc) if(k>max_kc)
{ {
@ -252,7 +252,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
// we have both L2 and L3, and problem is small enough to be kept in L2 // we have both L2 and L3, and problem is small enough to be kept in L2
// Let's choose m such that lhs's block fit in 1/3 of L2 // Let's choose m such that lhs's block fit in 1/3 of L2
actual_lm = l2; actual_lm = l2;
max_mc = 576; max_mc = (std::min<Index>)(576,max_mc);
} }
Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc); Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
if (mc > Traits::mr) mc -= mc % Traits::mr; if (mc > Traits::mr) mc -= mc % Traits::mr;

View File

@ -145,12 +145,9 @@ static void run(Index rows, Index cols, Index depth,
// Release all the sub blocks A'_i of A' for the current thread, // Release all the sub blocks A'_i of A' for the current thread,
// i.e., we simply decrement the number of users by 1 // i.e., we simply decrement the number of users by 1
#pragma omp critical
{
for(Index i=0; i<threads; ++i) for(Index i=0; i<threads; ++i)
#pragma omp atomic #pragma omp atomic
info[i].users -= 1; info[i].users -= 1;
}
} }
} }
else else
@ -355,9 +352,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
} }
else // no l3 blocking else // no l3 blocking
{ {
Index m = this->m_mc;
Index n = this->m_nc; Index n = this->m_nc;
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, n, num_threads); computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
} }
m_sizeA = this->m_mc * this->m_kc; m_sizeA = this->m_mc * this->m_kc;

View File

@ -42,13 +42,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride,
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
{ {
general_matrix_matrix_triangular_product<Index, general_matrix_matrix_triangular_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
ColMajor, UpLo==Lower?Upper:Lower> ColMajor, UpLo==Lower?Upper:Lower>
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha); ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking);
} }
}; };
@ -58,7 +59,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha) const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride,
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
{ {
typedef gebp_traits<LhsScalar,RhsScalar> Traits; typedef gebp_traits<LhsScalar,RhsScalar> Traits;
@ -69,16 +71,18 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
RhsMapper rhs(_rhs,rhsStride); RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride); ResMapper res(_res, resStride);
Index kc = depth; // cache block size along the K direction Index kc = blocking.kc();
Index mc = size; // cache block size along the M direction Index mc = (std::min)(size,blocking.mc());
Index nc = size; // cache block size along the N direction
computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc, 1);
// !!! mc must be a multiple of nr: // !!! mc must be a multiple of nr:
if(mc > Traits::nr) if(mc > Traits::nr)
mc = (mc/Traits::nr)*Traits::nr; mc = (mc/Traits::nr)*Traits::nr;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0); std::size_t sizeA = kc*mc;
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0); std::size_t sizeB = kc*size;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs; gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
@ -136,7 +140,7 @@ struct tribb_kernel
typedef typename Traits::ResScalar ResScalar; typedef typename Traits::ResScalar ResScalar;
enum { enum {
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) BlockSize = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret
}; };
void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha) void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
{ {
@ -256,13 +260,27 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
enum {
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0
};
Index size = mat.cols();
Index depth = actualLhs.cols();
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType;
BlockingType blocking(size, size, depth, 1, false);
internal::general_matrix_matrix_triangular_product<Index, internal::general_matrix_matrix_triangular_product<Index,
typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> IsRowMajor ? RowMajor : ColMajor, UpLo>
::run(mat.cols(), actualLhs.cols(), ::run(size, depth,
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
mat.data(), mat.outerStride(), actualAlpha); mat.data(), mat.outerStride(), actualAlpha, blocking);
} }
}; };

View File

@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************** ********************************************************************************
* Content : Eigen bindings to Intel(R) MKL * Content : Eigen bindings to BLAS F77
* Level 3 BLAS SYRK/HERK implementation. * Level 3 BLAS SYRK/HERK implementation.
******************************************************************************** ********************************************************************************
*/ */
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
namespace Eigen { namespace Eigen {
@ -44,34 +44,35 @@ struct general_matrix_matrix_rankupdate :
// try to go to BLAS specialization // try to go to BLAS specialization
#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \ #define EIGEN_BLAS_RANKUPDATE_SPECIALIZE(Scalar) \
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \ template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs, int UpLo> \ int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \ Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
{ \ { \
if (lhs==rhs) { \ if (lhs==rhs) { \
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
} else { \ } else { \
general_matrix_matrix_triangular_product<Index, \ general_matrix_matrix_triangular_product<Index, \
Scalar, LhsStorageOrder, ConjugateLhs, \ Scalar, LhsStorageOrder, ConjugateLhs, \
Scalar, RhsStorageOrder, ConjugateRhs, \ Scalar, RhsStorageOrder, ConjugateRhs, \
ColMajor, UpLo, BuiltIn> \ ColMajor, UpLo, BuiltIn> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
} \ } \
} \ } \
}; };
EIGEN_MKL_RANKUPDATE_SPECIALIZE(double) EIGEN_BLAS_RANKUPDATE_SPECIALIZE(double)
//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex) EIGEN_BLAS_RANKUPDATE_SPECIALIZE(float)
EIGEN_MKL_RANKUPDATE_SPECIALIZE(float) // TODO handle complex cases
//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex) // EIGEN_BLAS_RANKUPDATE_SPECIALIZE(dcomplex)
// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(scomplex)
// SYRK for float/double // SYRK for float/double
#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \ #define EIGEN_BLAS_RANKUPDATE_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \ template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \ enum { \
@ -80,23 +81,19 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \ conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
}; \ }; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \ { \
/* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \ /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
\ \
MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \ char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
MKLTYPE alpha_, beta_; \ EIGTYPE beta; \
\ BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
} \ } \
}; };
// HERK for complex data // HERK for complex data
#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \ #define EIGEN_BLAS_RANKUPDATE_C(EIGTYPE, BLASTYPE, RTYPE, BLASFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \ template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
enum { \ enum { \
@ -105,18 +102,15 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \ conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
}; \ }; \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \ { \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
\ \
MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \ char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
RTYPE alpha_, beta_; \ RTYPE alpha_, beta_; \
const EIGTYPE* a_ptr; \ const EIGTYPE* a_ptr; \
\ \
/* Set alpha_ & beta_ */ \
/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
alpha_ = alpha.real(); \ alpha_ = alpha.real(); \
beta_ = 1.0; \ beta_ = 1.0; \
/* Copy with conjugation in some cases*/ \ /* Copy with conjugation in some cases*/ \
@ -127,20 +121,21 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
lda = a.outerStride(); \ lda = a.outerStride(); \
a_ptr = a.data(); \ a_ptr = a.data(); \
} else a_ptr=lhs; \ } else a_ptr=lhs; \
MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \ BLASFUNC(&uplo, &trans, &n, &k, &alpha_, (BLASTYPE*)a_ptr, &lda, &beta_, (BLASTYPE*)res, &ldc); \
} \ } \
}; };
EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk) EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk) EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_)
//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk) // TODO hanlde complex cases
//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk) // EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
// EIGEN_BLAS_RANKUPDATE_C(scomplex, float, float, cherk_)
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H

View File

@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************** ********************************************************************************
* Content : Eigen bindings to Intel(R) MKL * Content : Eigen bindings to BLAS F77
* General matrix-matrix product functionality based on ?GEMM. * General matrix-matrix product functionality based on ?GEMM.
******************************************************************************** ********************************************************************************
*/ */
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H #ifndef EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H #define EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
namespace Eigen { namespace Eigen {
@ -46,7 +46,7 @@ namespace internal {
// gemm specialization // gemm specialization
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \ #define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \
template< \ template< \
typename Index, \ typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
@ -66,55 +66,50 @@ static void run(Index rows, Index cols, Index depth, \
using std::conj; \ using std::conj; \
\ \
char transa, transb; \ char transa, transb; \
MKL_INT m, n, k, lda, ldb, ldc; \ BlasIndex m, n, k, lda, ldb, ldc; \
const EIGTYPE *a, *b; \ const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \ EIGTYPE beta(1); \
MatrixX##EIGPREFIX a_tmp, b_tmp; \ MatrixX##EIGPREFIX a_tmp, b_tmp; \
EIGTYPE myone(1);\
\ \
/* Set transpose options */ \ /* Set transpose options */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\ \
/* Set m, n, k */ \ /* Set m, n, k */ \
m = (MKL_INT)rows; \ m = convert_index<BlasIndex>(rows); \
n = (MKL_INT)cols; \ n = convert_index<BlasIndex>(cols); \
k = (MKL_INT)depth; \ k = convert_index<BlasIndex>(depth); \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\ \
/* Set lda, ldb, ldc */ \ /* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \ lda = convert_index<BlasIndex>(lhsStride); \
ldb = (MKL_INT)rhsStride; \ ldb = convert_index<BlasIndex>(rhsStride); \
ldc = (MKL_INT)resStride; \ ldc = convert_index<BlasIndex>(resStride); \
\ \
/* Set a, b, c */ \ /* Set a, b, c */ \
if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \ if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \ a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \ a = a_tmp.data(); \
lda = a_tmp.outerStride(); \ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _lhs; \ } else a = _lhs; \
\ \
if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \ if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \ b_tmp = rhs.conjugate(); \
b = b_tmp.data(); \ b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \ } else b = _rhs; \
\ \
MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
}}; }};
GEMM_SPECIALIZATION(double, d, double, d) GEMM_SPECIALIZATION(double, d, double, d)
GEMM_SPECIALIZATION(float, f, float, s) GEMM_SPECIALIZATION(float, f, float, s)
GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z) GEMM_SPECIALIZATION(dcomplex, cd, double, z)
GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c) GEMM_SPECIALIZATION(scomplex, cf, float, c)
} // end namespase internal } // end namespase internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H #endif // EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H

View File

@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************** ********************************************************************************
* Content : Eigen bindings to Intel(R) MKL * Content : Eigen bindings to BLAS F77
* General matrix-vector product functionality based on ?GEMV. * General matrix-vector product functionality based on ?GEMV.
******************************************************************************** ********************************************************************************
*/ */
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H #ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H #define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
namespace Eigen { namespace Eigen {
@ -49,7 +49,7 @@ namespace internal {
template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
struct general_matrix_vector_product_gemv; struct general_matrix_vector_product_gemv;
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \ #define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \ template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \ struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \
static void run( \ static void run( \
@ -80,12 +80,12 @@ static void run( \
} \ } \
}; \ }; \
EIGEN_MKL_GEMV_SPECIALIZE(double) EIGEN_BLAS_GEMV_SPECIALIZE(double)
EIGEN_MKL_GEMV_SPECIALIZE(float) EIGEN_BLAS_GEMV_SPECIALIZE(float)
EIGEN_MKL_GEMV_SPECIALIZE(dcomplex) EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
EIGEN_MKL_GEMV_SPECIALIZE(scomplex) EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \ #define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \ template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \ { \
@ -97,16 +97,15 @@ static void run( \
const EIGTYPE* rhs, Index rhsIncr, \ const EIGTYPE* rhs, Index rhsIncr, \
EIGTYPE* res, Index resIncr, EIGTYPE alpha) \ EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
{ \ { \
MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \ BlasIndex m=convert_index<BlasIndex>(rows), n=convert_index<BlasIndex>(cols), \
MKLTYPE alpha_, beta_; \ lda=convert_index<BlasIndex>(lhsStride), incx=convert_index<BlasIndex>(rhsIncr), incy=convert_index<BlasIndex>(resIncr); \
const EIGTYPE *x_ptr, myone(1); \ const EIGTYPE beta(1); \
const EIGTYPE *x_ptr; \
char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \ char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
if (LhsStorageOrder==RowMajor) { \ if (LhsStorageOrder==RowMajor) { \
m=cols; \ m = convert_index<BlasIndex>(cols); \
n=rows; \ n = convert_index<BlasIndex>(rows); \
}\ }\
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
GEMVVector x_tmp; \ GEMVVector x_tmp; \
if (ConjugateRhs) { \ if (ConjugateRhs) { \
Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \ Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
@ -114,17 +113,17 @@ static void run( \
x_ptr=x_tmp.data(); \ x_ptr=x_tmp.data(); \
incx=1; \ incx=1; \
} else x_ptr=rhs; \ } else x_ptr=rhs; \
MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\ }\
}; };
EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d) EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d)
EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s) EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s)
EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z) EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z)
EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c) EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c)
} // end namespase internal } // end namespase internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H #endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H

View File

@ -291,7 +291,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
const Scalar* lhs, Index lhsStride, const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride, const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride, Scalar* res, Index resStride,
const Scalar& alpha) const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ {
product_selfadjoint_matrix<Scalar, Index, product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor, EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@ -299,7 +299,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
ColMajor> ColMajor>
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
} }
}; };
@ -314,7 +314,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride, Scalar* res, Index resStride,
const Scalar& alpha); const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
}; };
template <typename Scalar, typename Index, template <typename Scalar, typename Index,
@ -325,7 +325,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride, Scalar* _res, Index resStride,
const Scalar& alpha) const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ {
Index size = rows; Index size = rows;
@ -340,17 +340,14 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
RhsMapper rhs(_rhs,rhsStride); RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride); ResMapper res(_res, resStride);
Index kc = size; // cache block size along the K direction Index kc = blocking.kc(); // cache block size along the K direction
Index mc = rows; // cache block size along the M direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
Index nc = cols; // cache block size along the N direction // kc must be smaller than mc
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1);
// kc must smaller than mc
kc = (std::min)(kc,mc); kc = (std::min)(kc,mc);
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols; std::size_t sizeB = kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Scalar* blockB = allocatedBlockB;
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@ -410,7 +407,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride, Scalar* res, Index resStride,
const Scalar& alpha); const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
}; };
template <typename Scalar, typename Index, template <typename Scalar, typename Index,
@ -421,7 +418,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride, Scalar* _res, Index resStride,
const Scalar& alpha) const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ {
Index size = cols; Index size = cols;
@ -432,14 +429,12 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
LhsMapper lhs(_lhs,lhsStride); LhsMapper lhs(_lhs,lhsStride);
ResMapper res(_res,resStride); ResMapper res(_res,resStride);
Index kc = size; // cache block size along the K direction Index kc = blocking.kc(); // cache block size along the K direction
Index mc = rows; // cache block size along the M direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
Index nc = cols; // cache block size along the N direction std::size_t sizeA = kc*mc;
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1);
std::size_t sizeB = kc*cols; std::size_t sizeB = kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Scalar* blockB = allocatedBlockB;
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@ -498,6 +493,11 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
* RhsBlasTraits::extractScalarFactor(a_rhs); * RhsBlasTraits::extractScalarFactor(a_rhs);
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
internal::product_selfadjoint_matrix<Scalar, Index, internal::product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
@ -509,7 +509,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info &dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha // alpha actualAlpha, blocking // alpha
); );
} }
}; };

View File

@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// //
******************************************************************************** ********************************************************************************
* Content : Eigen bindings to Intel(R) MKL * Content : Eigen bindings to BLAS F77
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM. * Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
******************************************************************************** ********************************************************************************
*/ */
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H #define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
namespace Eigen { namespace Eigen {
@ -40,7 +40,7 @@ namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ #define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -52,28 +52,23 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \ EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \ { \
char side='L', uplo='L'; \ char side='L', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \ const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \ MatrixX##EIGPREFIX b_tmp; \
EIGTYPE myone(1);\
\ \
/* Set transpose options */ \ /* Set transpose options */ \
/* Set m, n, k */ \ /* Set m, n, k */ \
m = (MKL_INT)rows; \ m = convert_index<BlasIndex>(rows); \
n = (MKL_INT)cols; \ n = convert_index<BlasIndex>(cols); \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\ \
/* Set lda, ldb, ldc */ \ /* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \ lda = convert_index<BlasIndex>(lhsStride); \
ldb = (MKL_INT)rhsStride; \ ldb = convert_index<BlasIndex>(rhsStride); \
ldc = (MKL_INT)resStride; \ ldc = convert_index<BlasIndex>(resStride); \
\ \
/* Set a, b, c */ \ /* Set a, b, c */ \
if (LhsStorageOrder==RowMajor) uplo='U'; \ if (LhsStorageOrder==RowMajor) uplo='U'; \
@ -83,16 +78,16 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \ b_tmp = rhs.adjoint(); \
b = b_tmp.data(); \ b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \ } else b = _rhs; \
\ \
MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\ \
} \ } \
}; };
#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ #define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -103,29 +98,24 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \ EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \ { \
char side='L', uplo='L'; \ char side='L', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \ const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \ MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \ Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
EIGTYPE myone(1); \
\ \
/* Set transpose options */ \ /* Set transpose options */ \
/* Set m, n, k */ \ /* Set m, n, k */ \
m = (MKL_INT)rows; \ m = convert_index<BlasIndex>(rows); \
n = (MKL_INT)cols; \ n = convert_index<BlasIndex>(cols); \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\ \
/* Set lda, ldb, ldc */ \ /* Set lda, ldb, ldc */ \
lda = (MKL_INT)lhsStride; \ lda = convert_index<BlasIndex>(lhsStride); \
ldb = (MKL_INT)rhsStride; \ ldb = convert_index<BlasIndex>(rhsStride); \
ldc = (MKL_INT)resStride; \ ldc = convert_index<BlasIndex>(resStride); \
\ \
/* Set a, b, c */ \ /* Set a, b, c */ \
if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \ if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
@ -151,23 +141,23 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
b_tmp = rhs.transpose(); \ b_tmp = rhs.transpose(); \
} \ } \
b = b_tmp.data(); \ b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \ } \
\ \
MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\ \
} \ } \
}; };
EIGEN_MKL_SYMM_L(double, double, d, d) EIGEN_BLAS_SYMM_L(double, double, d, d)
EIGEN_MKL_SYMM_L(float, float, f, s) EIGEN_BLAS_SYMM_L(float, float, f, s)
EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z) EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c) EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ #define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -179,27 +169,22 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \ EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \ { \
char side='R', uplo='L'; \ char side='R', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \ const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \ MatrixX##EIGPREFIX b_tmp; \
EIGTYPE myone(1);\
\ \
/* Set m, n, k */ \ /* Set m, n, k */ \
m = (MKL_INT)rows; \ m = convert_index<BlasIndex>(rows); \
n = (MKL_INT)cols; \ n = convert_index<BlasIndex>(cols); \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\ \
/* Set lda, ldb, ldc */ \ /* Set lda, ldb, ldc */ \
lda = (MKL_INT)rhsStride; \ lda = convert_index<BlasIndex>(rhsStride); \
ldb = (MKL_INT)lhsStride; \ ldb = convert_index<BlasIndex>(lhsStride); \
ldc = (MKL_INT)resStride; \ ldc = convert_index<BlasIndex>(resStride); \
\ \
/* Set a, b, c */ \ /* Set a, b, c */ \
if (RhsStorageOrder==RowMajor) uplo='U'; \ if (RhsStorageOrder==RowMajor) uplo='U'; \
@ -209,16 +194,16 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \ Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = lhs.adjoint(); \ b_tmp = lhs.adjoint(); \
b = b_tmp.data(); \ b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _lhs; \ } else b = _lhs; \
\ \
MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\ \
} \ } \
}; };
#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ #define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -229,35 +214,30 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \ EIGTYPE* res, Index resStride, \
EIGTYPE alpha) \ EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \ { \
char side='R', uplo='L'; \ char side='R', uplo='L'; \
MKL_INT m, n, lda, ldb, ldc; \ BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \ const EIGTYPE *a, *b; \
MKLTYPE alpha_, beta_; \ EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \ MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \ Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
EIGTYPE myone(1); \
\ \
/* Set m, n, k */ \ /* Set m, n, k */ \
m = (MKL_INT)rows; \ m = convert_index<BlasIndex>(rows); \
n = (MKL_INT)cols; \ n = convert_index<BlasIndex>(cols); \
\
/* Set alpha_ & beta_ */ \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
\ \
/* Set lda, ldb, ldc */ \ /* Set lda, ldb, ldc */ \
lda = (MKL_INT)rhsStride; \ lda = convert_index<BlasIndex>(rhsStride); \
ldb = (MKL_INT)lhsStride; \ ldb = convert_index<BlasIndex>(lhsStride); \
ldc = (MKL_INT)resStride; \ ldc = convert_index<BlasIndex>(resStride); \
\ \
/* Set a, b, c */ \ /* Set a, b, c */ \
if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \ if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \ Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
a_tmp = rhs.conjugate(); \ a_tmp = rhs.conjugate(); \
a = a_tmp.data(); \ a = a_tmp.data(); \
lda = a_tmp.outerStride(); \ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _rhs; \ } else a = _rhs; \
if (RhsStorageOrder==RowMajor) uplo='U'; \ if (RhsStorageOrder==RowMajor) uplo='U'; \
\ \
@ -279,17 +259,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = b_tmp.outerStride(); \ ldb = b_tmp.outerStride(); \
} \ } \
\ \
MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
} \ } \
}; };
EIGEN_MKL_SYMM_R(double, double, d, d) EIGEN_BLAS_SYMM_R(double, double, d, d)
EIGEN_MKL_SYMM_R(float, float, f, s) EIGEN_BLAS_SYMM_R(float, float, f, s)
EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z) EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c) EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H

View File

@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************** ********************************************************************************
* Content : Eigen bindings to Intel(R) MKL * Content : Eigen bindings to BLAS F77
* Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV. * Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
******************************************************************************** ********************************************************************************
*/ */
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H #define EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
namespace Eigen { namespace Eigen {
@ -47,31 +47,31 @@ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool Conju
struct selfadjoint_matrix_vector_product_symv : struct selfadjoint_matrix_vector_product_symv :
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {}; selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \ #define EIGEN_BLAS_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \ template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \ struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
static void run( \ static void run( \
Index size, const Scalar* lhs, Index lhsStride, \ Index size, const Scalar* lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ const Scalar* _rhs, Scalar* res, Scalar alpha) { \
enum {\ enum {\
IsColMajor = StorageOrder==ColMajor \ IsColMajor = StorageOrder==ColMajor \
}; \ }; \
if (IsColMajor == ConjugateLhs) {\ if (IsColMajor == ConjugateLhs) {\
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \ selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ size, lhs, lhsStride, _rhs, res, alpha); \
} else {\ } else {\
selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \ selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ size, lhs, lhsStride, _rhs, res, alpha); \
}\ }\
} \ } \
}; \ }; \
EIGEN_MKL_SYMV_SPECIALIZE(double) EIGEN_BLAS_SYMV_SPECIALIZE(double)
EIGEN_MKL_SYMV_SPECIALIZE(float) EIGEN_BLAS_SYMV_SPECIALIZE(float)
EIGEN_MKL_SYMV_SPECIALIZE(dcomplex) EIGEN_BLAS_SYMV_SPECIALIZE(dcomplex)
EIGEN_MKL_SYMV_SPECIALIZE(scomplex) EIGEN_BLAS_SYMV_SPECIALIZE(scomplex)
#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \ #define EIGEN_BLAS_SYMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \ template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \ struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
{ \ { \
@ -79,36 +79,33 @@ typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
\ \
static void run( \ static void run( \
Index size, const EIGTYPE* lhs, Index lhsStride, \ Index size, const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
{ \ { \
enum {\ enum {\
IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \ IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
IsLower = UpLo == Lower ? 1 : 0 \ IsLower = UpLo == Lower ? 1 : 0 \
}; \ }; \
MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \ BlasIndex n=convert_index<BlasIndex>(size), lda=convert_index<BlasIndex>(lhsStride), incx=1, incy=1; \
MKLTYPE alpha_, beta_; \ EIGTYPE beta(1); \
const EIGTYPE *x_ptr, myone(1); \ const EIGTYPE *x_ptr; \
char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \ char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
SYMVVector x_tmp; \ SYMVVector x_tmp; \
if (ConjugateRhs) { \ if (ConjugateRhs) { \
Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \ Map<const SYMVVector, 0 > map_x(_rhs,size,1); \
x_tmp=map_x.conjugate(); \ x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \ x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=_rhs; \ } else x_ptr=_rhs; \
MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\ }\
}; };
EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv) EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv) EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H

View File

@ -92,15 +92,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived()); Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; enum {
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0
};
Index size = mat.cols();
Index depth = actualOther.cols();
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;
BlockingType blocking(size, size, depth, 1, false);
internal::general_matrix_matrix_triangular_product<Index, internal::general_matrix_matrix_triangular_product<Index,
Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex, Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> IsRowMajor ? RowMajor : ColMajor, UpLo>
::run(mat.cols(), actualOther.cols(), ::run(size, depth,
&actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
mat.data(), mat.outerStride(), actualAlpha); mat.data(), mat.outerStride(), actualAlpha, blocking);
} }
}; };

View File

@ -126,6 +126,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
Index kc = blocking.kc(); // cache block size along the K direction Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
// The small panel size must not be larger than blocking size.
// Usually this should never be the case because SmallPanelWidth^2 is very small
// compared to L2 cache size, but let's be safe:
Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));
std::size_t sizeA = kc*mc; std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols; std::size_t sizeB = kc*cols;
@ -169,9 +173,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
if(IsLower || actual_k2<rows) if(IsLower || actual_k2<rows)
{ {
// for each small vertical panels of lhs // for each small vertical panels of lhs
for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth) for (Index k1=0; k1<actual_kc; k1+=panelWidth)
{ {
Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth); Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1; Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
Index startBlock = actual_k2+k1; Index startBlock = actual_k2+k1;
Index blockBOffset = k1; Index blockBOffset = k1;

View File

@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************** ********************************************************************************
* Content : Eigen bindings to Intel(R) MKL * Content : Eigen bindings to BLAS F77
* Triangular matrix * matrix product functionality based on ?TRMM. * Triangular matrix * matrix product functionality based on ?TRMM.
******************************************************************************** ********************************************************************************
*/ */
#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H #define EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
namespace Eigen { namespace Eigen {
@ -50,7 +50,7 @@ struct product_triangular_matrix_matrix_trmm :
// try to go to BLAS specialization // try to go to BLAS specialization
#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \ #define EIGEN_BLAS_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
template <typename Index, int Mode, \ template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -65,17 +65,17 @@ struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
} \ } \
}; };
EIGEN_MKL_TRMM_SPECIALIZE(double, true) EIGEN_BLAS_TRMM_SPECIALIZE(double, true)
EIGEN_MKL_TRMM_SPECIALIZE(double, false) EIGEN_BLAS_TRMM_SPECIALIZE(double, false)
EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true) EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, true)
EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false) EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, false)
EIGEN_MKL_TRMM_SPECIALIZE(float, true) EIGEN_BLAS_TRMM_SPECIALIZE(float, true)
EIGEN_MKL_TRMM_SPECIALIZE(float, false) EIGEN_BLAS_TRMM_SPECIALIZE(float, false)
EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true) EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false) EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general) // implements col-major += alpha * op(triangular) * op(general)
#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ #define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, int Mode, \ template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -106,13 +106,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\ \
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ /* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
if (rows != depth) { \ if (rows != depth) { \
\ \
int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ /* FIXME handle mkl_domain_get_max_threads */ \
/*int nthr = mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS);*/ int nthr = 1;\
\ \
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \ /* Most likely no benefit to call TRMM or GEMM from BLAS */ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \ product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
@ -121,27 +122,23 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
/* Make sense to call GEMM */ \ /* Make sense to call GEMM */ \
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \ Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \ MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
MKL_INT aStride = aa_tmp.outerStride(); \ BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \ gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \ rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
\ \
/*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ /*std::cout << "TRMM_L: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
} \ } \
return; \ return; \
} \ } \
char side = 'L', transa, uplo, diag = 'N'; \ char side = 'L', transa, uplo, diag = 'N'; \
EIGTYPE *b; \ EIGTYPE *b; \
const EIGTYPE *a; \ const EIGTYPE *a; \
MKL_INT m, n, lda, ldb; \ BlasIndex m, n, lda, ldb; \
MKLTYPE alpha_; \
\
/* Set alpha_*/ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
\ \
/* Set m, n */ \ /* Set m, n */ \
m = (MKL_INT)diagSize; \ m = convert_index<BlasIndex>(diagSize); \
n = (MKL_INT)cols; \ n = convert_index<BlasIndex>(cols); \
\ \
/* Set trans */ \ /* Set trans */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
@ -152,7 +149,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
\ \
if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \ if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
b = b_tmp.data(); \ b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
\ \
/* Set uplo */ \ /* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \ uplo = IsLower ? 'L' : 'U'; \
@ -168,14 +165,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
else if (IsUnitDiag) \ else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\ a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \ a = a_tmp.data(); \
lda = a_tmp.outerStride(); \ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \ } else { \
a = _lhs; \ a = _lhs; \
lda = lhsStride; \ lda = convert_index<BlasIndex>(lhsStride); \
} \ } \
/*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \ /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \ /* call ?trmm*/ \
MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\ \
/* Add op(a_triangular)*b into res*/ \ /* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@ -183,13 +180,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \ } \
}; };
EIGEN_MKL_TRMM_L(double, double, d, d) EIGEN_BLAS_TRMM_L(double, double, d, d)
EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z) EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z)
EIGEN_MKL_TRMM_L(float, float, f, s) EIGEN_BLAS_TRMM_L(float, float, f, s)
EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c) EIGEN_BLAS_TRMM_L(scomplex, float, cf, c)
// implements col-major += alpha * op(general) * op(triangular) // implements col-major += alpha * op(general) * op(triangular)
#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ #define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template <typename Index, int Mode, \ template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -220,13 +217,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\ \
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ /* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
if (cols != depth) { \ if (cols != depth) { \
\ \
int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ int nthr = 1 /*mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS)*/; \
\ \
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \ /* Most likely no benefit to call TRMM or GEMM from BLAS*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \ product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
@ -235,27 +232,23 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
/* Make sense to call GEMM */ \ /* Make sense to call GEMM */ \
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \ Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \ MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
MKL_INT aStride = aa_tmp.outerStride(); \ BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \ gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \ rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
\ \
/*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ /*std::cout << "TRMM_R: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
} \ } \
return; \ return; \
} \ } \
char side = 'R', transa, uplo, diag = 'N'; \ char side = 'R', transa, uplo, diag = 'N'; \
EIGTYPE *b; \ EIGTYPE *b; \
const EIGTYPE *a; \ const EIGTYPE *a; \
MKL_INT m, n, lda, ldb; \ BlasIndex m, n, lda, ldb; \
MKLTYPE alpha_; \
\
/* Set alpha_*/ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
\ \
/* Set m, n */ \ /* Set m, n */ \
m = (MKL_INT)rows; \ m = convert_index<BlasIndex>(rows); \
n = (MKL_INT)diagSize; \ n = convert_index<BlasIndex>(diagSize); \
\ \
/* Set trans */ \ /* Set trans */ \
transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
@ -266,7 +259,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
\ \
if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \ if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
b = b_tmp.data(); \ b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
\ \
/* Set uplo */ \ /* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \ uplo = IsLower ? 'L' : 'U'; \
@ -282,14 +275,14 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
else if (IsUnitDiag) \ else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\ a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \ a = a_tmp.data(); \
lda = a_tmp.outerStride(); \ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \ } else { \
a = _rhs; \ a = _rhs; \
lda = rhsStride; \ lda = convert_index<BlasIndex>(rhsStride); \
} \ } \
/*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \ /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \ /* call ?trmm*/ \
MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\ \
/* Add op(a_triangular)*b into res*/ \ /* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@ -297,13 +290,13 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \ } \
}; };
EIGEN_MKL_TRMM_R(double, double, d, d) EIGEN_BLAS_TRMM_R(double, double, d, d)
EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z) EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z)
EIGEN_MKL_TRMM_R(float, float, f, s) EIGEN_BLAS_TRMM_R(float, float, f, s)
EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c) EIGEN_BLAS_TRMM_R(scomplex, float, cf, c)
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H

View File

@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************** ********************************************************************************
* Content : Eigen bindings to Intel(R) MKL * Content : Eigen bindings to BLAS F77
* Triangular matrix-vector product functionality based on ?TRMV. * Triangular matrix-vector product functionality based on ?TRMV.
******************************************************************************** ********************************************************************************
*/ */
#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H #ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H #define EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
namespace Eigen { namespace Eigen {
@ -47,7 +47,7 @@ template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename Rh
struct triangular_matrix_vector_product_trmv : struct triangular_matrix_vector_product_trmv :
triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {}; triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \ #define EIGEN_BLAS_TRMV_SPECIALIZE(Scalar) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
@ -65,13 +65,13 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs
} \ } \
}; };
EIGEN_MKL_TRMV_SPECIALIZE(double) EIGEN_BLAS_TRMV_SPECIALIZE(double)
EIGEN_MKL_TRMV_SPECIALIZE(float) EIGEN_BLAS_TRMV_SPECIALIZE(float)
EIGEN_MKL_TRMV_SPECIALIZE(dcomplex) EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
EIGEN_MKL_TRMV_SPECIALIZE(scomplex) EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector // implements col-major: res += alpha * op(triangular) * vector
#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ #define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \ enum { \
@ -105,17 +105,15 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
/* Square part handling */\ /* Square part handling */\
\ \
char trans, uplo, diag; \ char trans, uplo, diag; \
MKL_INT m, n, lda, incx, incy; \ BlasIndex m, n, lda, incx, incy; \
EIGTYPE const *a; \ EIGTYPE const *a; \
MKLTYPE alpha_, beta_; \ EIGTYPE beta(1); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
\ \
/* Set m, n */ \ /* Set m, n */ \
n = (MKL_INT)size; \ n = convert_index<BlasIndex>(size); \
lda = lhsStride; \ lda = convert_index<BlasIndex>(lhsStride); \
incx = 1; \ incx = 1; \
incy = resIncr; \ incy = convert_index<BlasIndex>(resIncr); \
\ \
/* Set uplo, trans and diag*/ \ /* Set uplo, trans and diag*/ \
trans = 'N'; \ trans = 'N'; \
@ -123,39 +121,39 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \ diag = IsUnitDiag ? 'U' : 'N'; \
\ \
/* call ?TRMV*/ \ /* call ?TRMV*/ \
MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\ \
/* Add op(a_tr)rhs into res*/ \ /* Add op(a_tr)rhs into res*/ \
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \ if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \ x = x_tmp.data(); \
if (size<rows) { \ if (size<rows) { \
y = _res + size*resIncr; \ y = _res + size*resIncr; \
a = _lhs + size; \ a = _lhs + size; \
m = rows-size; \ m = convert_index<BlasIndex>(rows-size); \
n = size; \ n = convert_index<BlasIndex>(size); \
} \ } \
else { \ else { \
x += size; \ x += size; \
y = _res; \ y = _res; \
a = _lhs + size*lda; \ a = _lhs + size*lda; \
m = size; \ m = convert_index<BlasIndex>(size); \
n = cols-size; \ n = convert_index<BlasIndex>(cols-size); \
} \ } \
MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \ BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \ } \
} \ } \
}; };
EIGEN_MKL_TRMV_CM(double, double, d, d) EIGEN_BLAS_TRMV_CM(double, double, d, d)
EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z) EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z)
EIGEN_MKL_TRMV_CM(float, float, f, s) EIGEN_BLAS_TRMV_CM(float, float, f, s)
EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c) EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c)
// implements row-major: res += alpha * op(triangular) * vector // implements row-major: res += alpha * op(triangular) * vector
#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ #define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \ enum { \
@ -189,17 +187,15 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
/* Square part handling */\ /* Square part handling */\
\ \
char trans, uplo, diag; \ char trans, uplo, diag; \
MKL_INT m, n, lda, incx, incy; \ BlasIndex m, n, lda, incx, incy; \
EIGTYPE const *a; \ EIGTYPE const *a; \
MKLTYPE alpha_, beta_; \ EIGTYPE beta(1); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
\ \
/* Set m, n */ \ /* Set m, n */ \
n = (MKL_INT)size; \ n = convert_index<BlasIndex>(size); \
lda = lhsStride; \ lda = convert_index<BlasIndex>(lhsStride); \
incx = 1; \ incx = 1; \
incy = resIncr; \ incy = convert_index<BlasIndex>(resIncr); \
\ \
/* Set uplo, trans and diag*/ \ /* Set uplo, trans and diag*/ \
trans = ConjLhs ? 'C' : 'T'; \ trans = ConjLhs ? 'C' : 'T'; \
@ -207,39 +203,39 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \ diag = IsUnitDiag ? 'U' : 'N'; \
\ \
/* call ?TRMV*/ \ /* call ?TRMV*/ \
MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\ \
/* Add op(a_tr)rhs into res*/ \ /* Add op(a_tr)rhs into res*/ \
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \ if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \ x = x_tmp.data(); \
if (size<rows) { \ if (size<rows) { \
y = _res + size*resIncr; \ y = _res + size*resIncr; \
a = _lhs + size*lda; \ a = _lhs + size*lda; \
m = rows-size; \ m = convert_index<BlasIndex>(rows-size); \
n = size; \ n = convert_index<BlasIndex>(size); \
} \ } \
else { \ else { \
x += size; \ x += size; \
y = _res; \ y = _res; \
a = _lhs + size; \ a = _lhs + size; \
m = size; \ m = convert_index<BlasIndex>(size); \
n = cols-size; \ n = convert_index<BlasIndex>(cols-size); \
} \ } \
MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \ BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \ } \
} \ } \
}; };
EIGEN_MKL_TRMV_RM(double, double, d, d) EIGEN_BLAS_TRMV_RM(double, double, d, d)
EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z) EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z)
EIGEN_MKL_TRMV_RM(float, float, f, s) EIGEN_BLAS_TRMV_RM(float, float, f, s)
EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c) EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c)
} // end namespase internal } // end namespase internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H #endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H

View File

@ -25,20 +25,20 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************** ********************************************************************************
* Content : Eigen bindings to Intel(R) MKL * Content : Eigen bindings to BLAS F77
* Triangular matrix * matrix product functionality based on ?TRMM. * Triangular matrix * matrix product functionality based on ?TRMM.
******************************************************************************** ********************************************************************************
*/ */
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H #define EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
// implements LeftSide op(triangular)^-1 * general // implements LeftSide op(triangular)^-1 * general
#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \ #define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \ { \
@ -53,13 +53,11 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
const EIGTYPE* _tri, Index triStride, \ const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \ EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \ { \
MKL_INT m = size, n = otherSize, lda, ldb; \ BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \
char side = 'L', uplo, diag='N', transa; \ char side = 'L', uplo, diag='N', transa; \
/* Set alpha_ */ \ /* Set alpha_ */ \
MKLTYPE alpha; \ EIGTYPE alpha(1); \
EIGTYPE myone(1); \ ldb = convert_index<BlasIndex>(otherStride);\
assign_scalar_eig2mkl(alpha, myone); \
ldb = otherStride;\
\ \
const EIGTYPE *a; \ const EIGTYPE *a; \
/* Set trans */ \ /* Set trans */ \
@ -75,25 +73,25 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
if (conjA) { \ if (conjA) { \
a_tmp = tri.conjugate(); \ a_tmp = tri.conjugate(); \
a = a_tmp.data(); \ a = a_tmp.data(); \
lda = a_tmp.outerStride(); \ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \ } else { \
a = _tri; \ a = _tri; \
lda = triStride; \ lda = convert_index<BlasIndex>(triStride); \
} \ } \
if (IsUnitDiag) diag='U'; \ if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \ /* call ?trsm*/ \
MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
} \ } \
}; };
EIGEN_MKL_TRSM_L(double, double, d) EIGEN_BLAS_TRSM_L(double, double, d)
EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z) EIGEN_BLAS_TRSM_L(dcomplex, double, z)
EIGEN_MKL_TRSM_L(float, float, s) EIGEN_BLAS_TRSM_L(float, float, s)
EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c) EIGEN_BLAS_TRSM_L(scomplex, float, c)
// implements RightSide general * op(triangular)^-1 // implements RightSide general * op(triangular)^-1
#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \ #define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \ { \
@ -108,13 +106,11 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
const EIGTYPE* _tri, Index triStride, \ const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \ EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \ { \
MKL_INT m = otherSize, n = size, lda, ldb; \ BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \
char side = 'R', uplo, diag='N', transa; \ char side = 'R', uplo, diag='N', transa; \
/* Set alpha_ */ \ /* Set alpha_ */ \
MKLTYPE alpha; \ EIGTYPE alpha(1); \
EIGTYPE myone(1); \ ldb = convert_index<BlasIndex>(otherStride);\
assign_scalar_eig2mkl(alpha, myone); \
ldb = otherStride;\
\ \
const EIGTYPE *a; \ const EIGTYPE *a; \
/* Set trans */ \ /* Set trans */ \
@ -130,26 +126,26 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
if (conjA) { \ if (conjA) { \
a_tmp = tri.conjugate(); \ a_tmp = tri.conjugate(); \
a = a_tmp.data(); \ a = a_tmp.data(); \
lda = a_tmp.outerStride(); \ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \ } else { \
a = _tri; \ a = _tri; \
lda = triStride; \ lda = convert_index<BlasIndex>(triStride); \
} \ } \
if (IsUnitDiag) diag='U'; \ if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \ /* call ?trsm*/ \
MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \ /*std::cout << "TRMS_L specialization!\n";*/ \
} \ } \
}; };
EIGEN_MKL_TRSM_R(double, double, d) EIGEN_BLAS_TRSM_R(double, double, d)
EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z) EIGEN_BLAS_TRSM_R(dcomplex, double, z)
EIGEN_MKL_TRSM_R(float, float, s) EIGEN_BLAS_TRSM_R(float, float, s)
EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c) EIGEN_BLAS_TRSM_R(scomplex, float, c)
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H #endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H

View File

@ -123,18 +123,18 @@ template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::R
template<typename Scalar, typename Index> template<typename Scalar, typename Index>
class BlasVectorMapper { class BlasVectorMapper {
public: public:
EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {} EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
return m_data[i]; return m_data[i];
} }
template <typename Packet, int AlignmentType> template <typename Packet, int AlignmentType>
EIGEN_ALWAYS_INLINE Packet load(Index i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
return ploadt<Packet, AlignmentType>(m_data + i); return ploadt<Packet, AlignmentType>(m_data + i);
} }
template <typename Packet> template <typename Packet>
bool aligned(Index i) const { EIGEN_DEVICE_FUNC bool aligned(Index i) const {
return (size_t(m_data+i)%sizeof(Packet))==0; return (size_t(m_data+i)%sizeof(Packet))==0;
} }
@ -148,25 +148,25 @@ class BlasLinearMapper {
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket; typedef typename packet_traits<Scalar>::half HalfPacket;
EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {} EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
EIGEN_ALWAYS_INLINE void prefetch(int i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i)); internal::prefetch(&operator()(i));
} }
EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
return m_data[i]; return m_data[i];
} }
EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
return ploadt<Packet, AlignmentType>(m_data + i); return ploadt<Packet, AlignmentType>(m_data + i);
} }
EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
return ploadt<HalfPacket, AlignmentType>(m_data + i); return ploadt<HalfPacket, AlignmentType>(m_data + i);
} }
EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
pstoret<Scalar, Packet, AlignmentType>(m_data + i, p); pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
} }
@ -184,18 +184,18 @@ class blas_data_mapper {
typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper; typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
typedef BlasVectorMapper<Scalar, Index> VectorMapper; typedef BlasVectorMapper<Scalar, Index> VectorMapper;
EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {} EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
getSubMapper(Index i, Index j) const { getSubMapper(Index i, Index j) const {
return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride); return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
} }
EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
return LinearMapper(&operator()(i, j)); return LinearMapper(&operator()(i, j));
} }
EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
return VectorMapper(&operator()(i, j)); return VectorMapper(&operator()(i, j));
} }
@ -205,28 +205,28 @@ class blas_data_mapper {
return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
} }
EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
return ploadt<Packet, AlignmentType>(&operator()(i, j)); return ploadt<Packet, AlignmentType>(&operator()(i, j));
} }
EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
return ploadt<HalfPacket, AlignmentType>(&operator()(i, j)); return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
} }
template<typename SubPacket> template<typename SubPacket>
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride); pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
} }
template<typename SubPacket> template<typename SubPacket>
EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride); return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
} }
const Index stride() const { return m_stride; } EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
const Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
Index firstAligned(Index size) const { EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
if (size_t(m_data)%sizeof(Scalar)) { if (size_t(m_data)%sizeof(Scalar)) {
return -1; return -1;
} }

View File

@ -56,8 +56,8 @@ const int HugeCost = 10000;
* for a matrix, this means that the storage order is row-major. * for a matrix, this means that the storage order is row-major.
* If this bit is not set, the storage order is column-major. * If this bit is not set, the storage order is column-major.
* For an expression, this determines the storage order of * For an expression, this determines the storage order of
* the matrix created by evaluation of that expression. * the matrix created by evaluation of that expression.
* \sa \ref TopicStorageOrders */ * \sa \blank \ref TopicStorageOrders */
const unsigned int RowMajorBit = 0x1; const unsigned int RowMajorBit = 0x1;
/** \ingroup flags /** \ingroup flags
@ -67,6 +67,7 @@ const unsigned int EvalBeforeNestingBit = 0x2;
/** \ingroup flags /** \ingroup flags
* \deprecated * \deprecated
* means the expression should be evaluated before any assignment */ * means the expression should be evaluated before any assignment */
EIGEN_DEPRECATED
const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated
/** \ingroup flags /** \ingroup flags
@ -158,7 +159,7 @@ const unsigned int DirectAccessBit = 0x40;
* expression.packet<Aligned>(0); * expression.packet<Aligned>(0);
* \endcode * \endcode
*/ */
const unsigned int AlignedBit = 0x80; EIGEN_DEPRECATED const unsigned int AlignedBit = 0x80;
const unsigned int NestByRefBit = 0x100; const unsigned int NestByRefBit = 0x100;
@ -168,7 +169,7 @@ const unsigned int NestByRefBit = 0x100;
* can be either row-major or column-major. * can be either row-major or column-major.
* The precise choice will be decided at evaluation time or when * The precise choice will be decided at evaluation time or when
* combined with other expressions. * combined with other expressions.
* \sa \ref RowMajorBit, \ref TopicStorageOrders */ * \sa \blank \ref RowMajorBit, \ref TopicStorageOrders */
const unsigned int NoPreferredStorageOrderBit = 0x200; const unsigned int NoPreferredStorageOrderBit = 0x200;
/** \ingroup flags /** \ingroup flags
@ -187,8 +188,7 @@ const unsigned int CompressedAccessBit = 0x400;
// list of flags that are inherited by default // list of flags that are inherited by default
const unsigned int HereditaryBits = RowMajorBit const unsigned int HereditaryBits = RowMajorBit
| EvalBeforeNestingBit | EvalBeforeNestingBit;
| EvalBeforeAssigningBit;
/** \defgroup enums Enumerations /** \defgroup enums Enumerations
* \ingroup Core_Module * \ingroup Core_Module
@ -224,7 +224,7 @@ enum {
/** \ingroup enums /** \ingroup enums
* Enum for indicating whether a buffer is aligned or not. */ * Enum for indicating whether a buffer is aligned or not. */
enum { enum {
Unaligned=0, /**< Data pointer has no specific alignment. */ Unaligned=0, /**< Data pointer has no specific alignment. */
Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */ Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */
Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */ Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */

21
Eigen/src/Core/util/DisableStupidWarnings.h Normal file → Executable file
View File

@ -15,20 +15,25 @@
// 4522 - 'class' : multiple assignment operators specified // 4522 - 'class' : multiple assignment operators specified
// 4700 - uninitialized local variable 'xyz' used // 4700 - uninitialized local variable 'xyz' used
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
// 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning( push ) #pragma warning( push )
#endif #endif
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 ) #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800)
#elif defined __INTEL_COMPILER #elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed) // 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body // ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
// typedef that may be a reference type. // typedef that may be a reference type.
// 279 - controlling expression is constant // 279 - controlling expression is constant
// ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case. // ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.
// 1684 - conversion from pointer to same-sized integral type (potential portability problem)
// 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning push #pragma warning push
#endif #endif
#pragma warning disable 2196 279 #pragma warning disable 2196 279 1684 2259
#elif defined __clang__ #elif defined __clang__
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
// this is really a stupid warning as it warns on compile-time expressions involving enums // this is really a stupid warning as it warns on compile-time expressions involving enums
@ -38,4 +43,16 @@
#pragma clang diagnostic ignored "-Wconstant-logical-operand" #pragma clang diagnostic ignored "-Wconstant-logical-operand"
#endif #endif
#if defined __NVCC__
// Disable the "statement is unreachable" message
#pragma diag_suppress code_is_unreachable
// Disable the "dynamic initialization in unreachable code" message
#pragma diag_suppress initialization_not_reachable
// Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are 4 of them)
#pragma diag_suppress 2651
#pragma diag_suppress 2653
#pragma diag_suppress 2668
#pragma diag_suppress 2670
#endif
#endif // not EIGEN_WARNINGS_DISABLED #endif // not EIGEN_WARNINGS_DISABLED

View File

@ -94,12 +94,8 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename Decomposition, typename Rhstype> class Solve; template<typename Decomposition, typename Rhstype> class Solve;
template<typename XprType> class Inverse; template<typename XprType> class Inverse;
namespace internal {
template<typename Lhs, typename Rhs> struct product_tag;
}
template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product; template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
template<typename Derived> class DiagonalBase; template<typename Derived> class DiagonalBase;
template<typename _DiagonalVectorType> class DiagonalWrapper; template<typename _DiagonalVectorType> class DiagonalWrapper;
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix; template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
@ -210,6 +206,8 @@ template<typename Scalar> struct scalar_add_op;
template<typename Scalar> struct scalar_constant_op; template<typename Scalar> struct scalar_constant_op;
template<typename Scalar> struct scalar_identity_op; template<typename Scalar> struct scalar_identity_op;
template<typename Scalar,bool iscpx> struct scalar_sign_op; template<typename Scalar,bool iscpx> struct scalar_sign_op;
template<typename Scalar> struct scalar_igamma_op;
template<typename Scalar> struct scalar_igammac_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op; template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op; template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
@ -252,6 +250,7 @@ template<typename MatrixType> struct inverse_impl;
template<typename MatrixType> class HouseholderQR; template<typename MatrixType> class HouseholderQR;
template<typename MatrixType> class ColPivHouseholderQR; template<typename MatrixType> class ColPivHouseholderQR;
template<typename MatrixType> class FullPivHouseholderQR; template<typename MatrixType> class FullPivHouseholderQR;
template<typename MatrixType> class CompleteOrthogonalDecomposition;
template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD; template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
template<typename MatrixType> class BDCSVD; template<typename MatrixType> class BDCSVD;
template<typename MatrixType, int UpLo = Lower> class LLT; template<typename MatrixType, int UpLo = Lower> class LLT;

View File

@ -49,7 +49,7 @@
#define EIGEN_USE_LAPACKE #define EIGEN_USE_LAPACKE
#endif #endif
#if defined(EIGEN_USE_BLAS) || defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML) #if defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML)
#define EIGEN_USE_MKL #define EIGEN_USE_MKL
#endif #endif
@ -64,7 +64,6 @@
# ifndef EIGEN_USE_MKL # ifndef EIGEN_USE_MKL
/*If the MKL version is too old, undef everything*/ /*If the MKL version is too old, undef everything*/
# undef EIGEN_USE_MKL_ALL # undef EIGEN_USE_MKL_ALL
# undef EIGEN_USE_BLAS
# undef EIGEN_USE_LAPACKE # undef EIGEN_USE_LAPACKE
# undef EIGEN_USE_MKL_VML # undef EIGEN_USE_MKL_VML
# undef EIGEN_USE_LAPACKE_STRICT # undef EIGEN_USE_LAPACKE_STRICT
@ -107,52 +106,23 @@
#else #else
#define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO #define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO
#endif #endif
#endif
namespace Eigen { namespace Eigen {
typedef std::complex<double> dcomplex; typedef std::complex<double> dcomplex;
typedef std::complex<float> scomplex; typedef std::complex<float> scomplex;
namespace internal { #if defined(EIGEN_USE_MKL)
typedef MKL_INT BlasIndex;
template<typename MKLType, typename EigenType> #else
static inline void assign_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) { typedef int BlasIndex;
mklScalar=eigenScalar; #endif
}
template<typename MKLType, typename EigenType>
static inline void assign_conj_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
mklScalar=eigenScalar;
}
template <>
inline void assign_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=eigenScalar.imag();
}
template <>
inline void assign_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=eigenScalar.imag();
}
template <>
inline void assign_conj_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=-eigenScalar.imag();
}
template <>
inline void assign_conj_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=-eigenScalar.imag();
}
} // end namespace internal
} // end namespace Eigen } // end namespace Eigen
#if defined(EIGEN_USE_BLAS)
#include "../../misc/blas.h"
#endif #endif
#endif // EIGEN_MKL_SUPPORT_H #endif // EIGEN_MKL_SUPPORT_H

View File

@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3 #define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 2 #define EIGEN_MAJOR_VERSION 2
#define EIGEN_MINOR_VERSION 91 #define EIGEN_MINOR_VERSION 92
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@ -99,9 +99,16 @@
#define EIGEN_COMP_ARM 0 #define EIGEN_COMP_ARM 0
#endif #endif
/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler
#if defined(__EMSCRIPTEN__)
#define EIGEN_COMP_EMSCRIPTEN 1
#else
#define EIGEN_COMP_EMSCRIPTEN 0
#endif
/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.) /// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM ) #if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN)
#define EIGEN_COMP_GNUC_STRICT 1 #define EIGEN_COMP_GNUC_STRICT 1
#else #else
#define EIGEN_COMP_GNUC_STRICT 0 #define EIGEN_COMP_GNUC_STRICT 0
@ -336,25 +343,35 @@
// Do we support r-value references? // Do we support r-value references?
#if (__has_feature(cxx_rvalue_references) || \ #if (__has_feature(cxx_rvalue_references) || \
(defined(__cplusplus) && __cplusplus >= 201103L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \
defined(__GXX_EXPERIMENTAL_CXX0X__) || \
(EIGEN_COMP_MSVC >= 1600)) (EIGEN_COMP_MSVC >= 1600))
#define EIGEN_HAVE_RVALUE_REFERENCES #define EIGEN_HAVE_RVALUE_REFERENCES
#endif #endif
// Does the compiler support C99?
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
|| (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
|| (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))
#define EIGEN_HAS_C99_MATH 1
#endif
// Does the compiler support result_of? // Does the compiler support result_of?
#if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)) #if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L))
#define EIGEN_HAS_STD_RESULT_OF 1 #define EIGEN_HAS_STD_RESULT_OF 1
#endif #endif
// Does the compiler support variadic templates? // Does the compiler support variadic templates?
#if __cplusplus > 199711L #if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900
// Disable the use of variadic templates when compiling with nvcc on ARM devices:
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
#if !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64
#define EIGEN_HAS_VARIADIC_TEMPLATES 1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1
#endif #endif
#endif
// Does the compiler support const expressions? // Does the compiler support const expressions?
#ifdef __CUDACC__ #ifdef __CUDACC__
// Const expressions are supported provided that c++11 is enabled and we're using nvcc 7.5 or above // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
#if defined(__CUDACC_VER__) && __CUDACC_VER__ >= 70500 && __cplusplus > 199711L #if __cplusplus > 199711L && defined(__CUDACC_VER__) && (defined(__clang__) || __CUDACC_VER__ >= 70500)
#define EIGEN_HAS_CONSTEXPR 1 #define EIGEN_HAS_CONSTEXPR 1
#endif #endif
#elif (defined(__cplusplus) && __cplusplus >= 201402L) || \ #elif (defined(__cplusplus) && __cplusplus >= 201402L) || \

View File

@ -59,28 +59,6 @@
#endif #endif
#ifndef EIGEN_HAS_POSIX_MEMALIGN
// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
// Currently, let's include it only on unix systems:
#if EIGEN_OS_UNIX && !(EIGEN_OS_SUN || EIGEN_OS_SOLARIS)
#include <unistd.h>
#if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
#define EIGEN_HAS_POSIX_MEMALIGN 1
#endif
#endif
#ifndef EIGEN_HAS_POSIX_MEMALIGN
#define EIGEN_HAS_POSIX_MEMALIGN 0
#endif
#endif
#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_AVX || defined EIGEN_VECTORIZE_AVX512
#define EIGEN_HAS_MM_MALLOC 1
#else
#define EIGEN_HAS_MM_MALLOC 0
#endif
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
@ -122,7 +100,7 @@ inline void handmade_aligned_free(void *ptr)
/** \internal /** \internal
* \brief Reallocates aligned memory. * \brief Reallocates aligned memory.
* Since we know that our handmade version is based on std::realloc * Since we know that our handmade version is based on std::malloc
* we can use std::realloc to implement efficient reallocation. * we can use std::realloc to implement efficient reallocation.
*/ */
inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0) inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
@ -141,47 +119,6 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
return aligned; return aligned;
} }
/*****************************************************************************
*** Implementation of generic aligned realloc (when no realloc can be used)***
*****************************************************************************/
EIGEN_DEVICE_FUNC void* aligned_malloc(std::size_t size);
EIGEN_DEVICE_FUNC void aligned_free(void *ptr);
/** \internal
* \brief Reallocates aligned memory.
* Allows reallocation with aligned ptr types. This implementation will
* always create a new memory chunk and copy the old data.
*/
inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
{
if (ptr==0)
return aligned_malloc(size);
if (size==0)
{
aligned_free(ptr);
return 0;
}
void* newptr = aligned_malloc(size);
if (newptr == 0)
{
#ifdef EIGEN_HAS_ERRNO
errno = ENOMEM; // according to the standard
#endif
return 0;
}
if (ptr != 0)
{
std::memcpy(newptr, ptr, (std::min)(size,old_size));
aligned_free(ptr);
}
return newptr;
}
/***************************************************************************** /*****************************************************************************
*** Implementation of portable aligned versions of malloc/free/realloc *** *** Implementation of portable aligned versions of malloc/free/realloc ***
*****************************************************************************/ *****************************************************************************/
@ -218,16 +155,11 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
check_that_malloc_is_allowed(); check_that_malloc_is_allowed();
void *result; void *result;
#if EIGEN_DEFAULT_ALIGN_BYTES==0 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
result = std::malloc(size); result = std::malloc(size);
#elif EIGEN_MALLOC_ALREADY_ALIGNED #if EIGEN_DEFAULT_ALIGN_BYTES==16
result = std::malloc(size); eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
#elif EIGEN_HAS_POSIX_MEMALIGN #endif
if(posix_memalign(&result, EIGEN_DEFAULT_ALIGN_BYTES, size)) result = 0;
#elif EIGEN_HAS_MM_MALLOC
result = _mm_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES);
#elif EIGEN_OS_WIN_STRICT
result = _aligned_malloc(size, EIGEN_DEFAULT_ALIGN_BYTES);
#else #else
result = handmade_aligned_malloc(size); result = handmade_aligned_malloc(size);
#endif #endif
@ -241,48 +173,25 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
/** \internal Frees memory allocated with aligned_malloc. */ /** \internal Frees memory allocated with aligned_malloc. */
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
{ {
#if EIGEN_DEFAULT_ALIGN_BYTES==0 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
std::free(ptr); std::free(ptr);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
std::free(ptr);
#elif EIGEN_HAS_POSIX_MEMALIGN
free(ptr);
#elif EIGEN_HAS_MM_MALLOC
_mm_free(ptr);
#elif EIGEN_OS_WIN_STRICT
_aligned_free(ptr);
#else #else
handmade_aligned_free(ptr); handmade_aligned_free(ptr);
#endif #endif
} }
/** /**
* \internal * \internal
* \brief Reallocates an aligned block of memory. * \brief Reallocates an aligned block of memory.
* \throws std::bad_alloc on allocation failure * \throws std::bad_alloc on allocation failure
**/ */
inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
{ {
EIGEN_UNUSED_VARIABLE(old_size); EIGEN_UNUSED_VARIABLE(old_size);
void *result; void *result;
#if EIGEN_DEFAULT_ALIGN_BYTES==0 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
result = std::realloc(ptr,new_size); result = std::realloc(ptr,new_size);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
result = std::realloc(ptr,new_size);
#elif EIGEN_HAS_POSIX_MEMALIGN
result = generic_aligned_realloc(ptr,new_size,old_size);
#elif EIGEN_HAS_MM_MALLOC
// The defined(_mm_free) is just here to verify that this MSVC version
// implements _mm_malloc/_mm_free based on the corresponding _aligned_
// functions. This may not always be the case and we just try to be safe.
#if EIGEN_OS_WIN_STRICT && defined(_mm_free)
result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES);
#else
result = generic_aligned_realloc(ptr,new_size,old_size);
#endif
#elif EIGEN_OS_WIN_STRICT
result = _aligned_realloc(ptr,new_size,EIGEN_DEFAULT_ALIGN_BYTES);
#else #else
result = handmade_aligned_realloc(ptr,new_size,old_size); result = handmade_aligned_realloc(ptr,new_size,old_size);
#endif #endif
@ -524,11 +433,11 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_align
* \sa first_default_aligned() * \sa first_default_aligned()
*/ */
template<int Alignment, typename Scalar, typename Index> template<int Alignment, typename Scalar, typename Index>
inline Index first_aligned(const Scalar* array, Index size) EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
{ {
static const Index ScalarSize = sizeof(Scalar); const Index ScalarSize = sizeof(Scalar);
static const Index AlignmentSize = Alignment / ScalarSize; const Index AlignmentSize = Alignment / ScalarSize;
static const Index AlignmentMask = AlignmentSize-1; const Index AlignmentMask = AlignmentSize-1;
if(AlignmentSize<=1) if(AlignmentSize<=1)
{ {
@ -544,14 +453,15 @@ inline Index first_aligned(const Scalar* array, Index size)
} }
else else
{ {
return std::min<Index>( (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask, size); Index first = (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
return (first < size) ? first : size;
} }
} }
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement. /** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
* \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */ * \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
template<typename Scalar, typename Index> template<typename Scalar, typename Index>
inline Index first_default_aligned(const Scalar* array, Index size) EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
{ {
typedef typename packet_traits<Scalar>::type DefaultPacketType; typedef typename packet_traits<Scalar>::type DefaultPacketType;
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size); return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
@ -576,7 +486,12 @@ template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T*
template<typename T> struct smart_copy_helper<T,true> { template<typename T> struct smart_copy_helper<T,true> {
EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
{ memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); } {
std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start);
if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0);
memcpy(target, start, size);
}
}; };
template<typename T> struct smart_copy_helper<T,false> { template<typename T> struct smart_copy_helper<T,false> {
@ -594,7 +509,12 @@ template<typename T> void smart_memmove(const T* start, const T* end, T* target)
template<typename T> struct smart_memmove_helper<T,true> { template<typename T> struct smart_memmove_helper<T,true> {
static inline void run(const T* start, const T* end, T* target) static inline void run(const T* start, const T* end, T* target)
{ std::memmove(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); } {
std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start);
if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0);
std::memmove(target, start, size);
}
}; };
template<typename T> struct smart_memmove_helper<T,false> { template<typename T> struct smart_memmove_helper<T,false> {
@ -784,7 +704,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
* std::map< int, Vector3f > my_map_vec3; * std::map< int, Vector3f > my_map_vec3;
* \endcode * \endcode
* *
* \sa \ref TopicStlContainers. * \sa \blank \ref TopicStlContainers.
*/ */
template<class T> template<class T>
class aligned_allocator : public std::allocator<T> class aligned_allocator : public std::allocator<T>

Some files were not shown because too many files have changed in this diff Show More