Merged latest updates from trunk

This commit is contained in:
Benoit Steiner 2016-10-05 18:48:55 -07:00
commit 78b569f685
474 changed files with 33986 additions and 6837 deletions

View File

@ -1,4 +1,4 @@
project(Eigen) project(Eigen3)
cmake_minimum_required(VERSION 2.8.5) cmake_minimum_required(VERSION 2.8.5)
@ -8,6 +8,11 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ") message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ")
endif() endif()
# Alias Eigen_*_DIR to Eigen3_*_DIR:
set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR})
set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR})
# guard against bad build-type strings # guard against bad build-type strings
if (NOT CMAKE_BUILD_TYPE) if (NOT CMAKE_BUILD_TYPE)
@ -93,9 +98,11 @@ else()
endif() endif()
option(EIGEN_BUILD_BTL "Build benchmark suite" OFF) option(EIGEN_BUILD_BTL "Build benchmark suite" OFF)
if(NOT WIN32)
# Disable pkgconfig only for native Windows builds
if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON) option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON)
endif(NOT WIN32) endif()
set(CMAKE_INCLUDE_CURRENT_DIR ON) set(CMAKE_INCLUDE_CURRENT_DIR ON)
@ -120,7 +127,7 @@ endmacro(ei_add_cxx_compiler_flag)
if(NOT MSVC) if(NOT MSVC)
# We assume that other compilers are partly compatible with GNUCC # We assume that other compilers are partly compatible with GNUCC
# clang outputs some warnings for unknwon flags that are not caught by check_cxx_compiler_flag # clang outputs some warnings for unknown flags that are not caught by check_cxx_compiler_flag
# adding -Werror turns such warnings into errors # adding -Werror turns such warnings into errors
check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR) check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR)
if(COMPILER_SUPPORT_WERROR) if(COMPILER_SUPPORT_WERROR)
@ -142,8 +149,11 @@ if(NOT MSVC)
ei_add_cxx_compiler_flag("-Wwrite-strings") ei_add_cxx_compiler_flag("-Wwrite-strings")
ei_add_cxx_compiler_flag("-Wformat-security") ei_add_cxx_compiler_flag("-Wformat-security")
ei_add_cxx_compiler_flag("-Wshorten-64-to-32") ei_add_cxx_compiler_flag("-Wshorten-64-to-32")
ei_add_cxx_compiler_flag("-Wlogical-op")
ei_add_cxx_compiler_flag("-Wenum-conversion") ei_add_cxx_compiler_flag("-Wenum-conversion")
ei_add_cxx_compiler_flag("-Wc++11-extensions") ei_add_cxx_compiler_flag("-Wc++11-extensions")
ei_add_cxx_compiler_flag("-Wdouble-promotion")
# ei_add_cxx_compiler_flag("-Wconversion")
# -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6 # -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6
# if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0")) # if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0"))
@ -159,7 +169,7 @@ if(NOT MSVC)
ei_add_cxx_compiler_flag("-fno-common") ei_add_cxx_compiler_flag("-fno-common")
ei_add_cxx_compiler_flag("-fstrict-aliasing") ei_add_cxx_compiler_flag("-fstrict-aliasing")
ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark
ei_add_cxx_compiler_flag("-wd2304") # disbale ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor ei_add_cxx_compiler_flag("-wd2304") # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
# The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails # The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
@ -402,7 +412,7 @@ if(EIGEN_BUILD_PKGCONFIG)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
DESTINATION ${PKGCONFIG_INSTALL_DIR} DESTINATION ${PKGCONFIG_INSTALL_DIR}
) )
endif(EIGEN_BUILD_PKGCONFIG) endif()
add_subdirectory(Eigen) add_subdirectory(Eigen)

View File

@ -16,4 +16,4 @@ install(FILES
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel
) )
add_subdirectory(src) install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h")

View File

@ -31,7 +31,8 @@
#include "src/Cholesky/LLT.h" #include "src/Cholesky/LLT.h"
#include "src/Cholesky/LDLT.h" #include "src/Cholesky/LDLT.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#include "src/Cholesky/LLT_MKL.h" #include "src/misc/lapacke.h"
#include "src/Cholesky/LLT_LAPACKE.h"
#endif #endif
#include "src/Core/util/ReenableStupidWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"

View File

@ -164,6 +164,7 @@
#if EIGEN_COMP_ICC >= 1110 #if EIGEN_COMP_ICC >= 1110
#include <immintrin.h> #include <immintrin.h>
#else #else
#include <mmintrin.h>
#include <emmintrin.h> #include <emmintrin.h>
#include <xmmintrin.h> #include <xmmintrin.h>
#ifdef EIGEN_VECTORIZE_SSE3 #ifdef EIGEN_VECTORIZE_SSE3
@ -259,6 +260,11 @@
// for min/max: // for min/max:
#include <algorithm> #include <algorithm>
// for std::is_nothrow_move_assignable
#ifdef EIGEN_INCLUDE_TYPE_TRAITS
#include <type_traits>
#endif
// for outputting debug info // for outputting debug info
#ifdef EIGEN_DEBUG_ASSIGN #ifdef EIGEN_DEBUG_ASSIGN
#include <iostream> #include <iostream>
@ -332,8 +338,8 @@ using std::ptrdiff_t;
#include "src/Core/NumTraits.h" #include "src/Core/NumTraits.h"
#include "src/Core/MathFunctions.h" #include "src/Core/MathFunctions.h"
#include "src/Core/SpecialFunctions.h"
#include "src/Core/GenericPacketMath.h" #include "src/Core/GenericPacketMath.h"
#include "src/Core/MathFunctionsImpl.h"
#if defined EIGEN_VECTORIZE_AVX512 #if defined EIGEN_VECTORIZE_AVX512
#include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/PacketMath.h"
@ -368,23 +374,29 @@ using std::ptrdiff_t;
#include "src/Core/arch/ZVector/Complex.h" #include "src/Core/arch/ZVector/Complex.h"
#endif #endif
// Half float support
#include "src/Core/arch/CUDA/Half.h" #include "src/Core/arch/CUDA/Half.h"
#include "src/Core/arch/CUDA/PacketMathHalf.h"
#include "src/Core/arch/CUDA/TypeCasting.h"
#if defined EIGEN_VECTORIZE_CUDA #if defined EIGEN_VECTORIZE_CUDA
#include "src/Core/arch/CUDA/PacketMath.h" #include "src/Core/arch/CUDA/PacketMath.h"
#include "src/Core/arch/CUDA/PacketMathHalf.h"
#include "src/Core/arch/CUDA/MathFunctions.h" #include "src/Core/arch/CUDA/MathFunctions.h"
#include "src/Core/arch/CUDA/TypeCasting.h"
#endif #endif
#include "src/Core/arch/Default/Settings.h" #include "src/Core/arch/Default/Settings.h"
#include "src/Core/functors/TernaryFunctors.h"
#include "src/Core/functors/BinaryFunctors.h" #include "src/Core/functors/BinaryFunctors.h"
#include "src/Core/functors/UnaryFunctors.h" #include "src/Core/functors/UnaryFunctors.h"
#include "src/Core/functors/NullaryFunctors.h" #include "src/Core/functors/NullaryFunctors.h"
#include "src/Core/functors/StlFunctors.h" #include "src/Core/functors/StlFunctors.h"
#include "src/Core/functors/AssignmentFunctors.h" #include "src/Core/functors/AssignmentFunctors.h"
// Specialized functors to enable the processing of complex numbers
// on CUDA devices
#include "src/Core/arch/CUDA/Complex.h"
#include "src/Core/DenseCoeffsBase.h" #include "src/Core/DenseCoeffsBase.h"
#include "src/Core/DenseBase.h" #include "src/Core/DenseBase.h"
#include "src/Core/MatrixBase.h" #include "src/Core/MatrixBase.h"
@ -411,6 +423,7 @@ using std::ptrdiff_t;
#include "src/Core/PlainObjectBase.h" #include "src/Core/PlainObjectBase.h"
#include "src/Core/Matrix.h" #include "src/Core/Matrix.h"
#include "src/Core/Array.h" #include "src/Core/Array.h"
#include "src/Core/CwiseTernaryOp.h"
#include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseBinaryOp.h"
#include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseUnaryOp.h"
#include "src/Core/CwiseNullaryOp.h" #include "src/Core/CwiseNullaryOp.h"

View File

@ -32,6 +32,7 @@
* \endcode * \endcode
*/ */
#include "src/misc/RealSvd2x2.h"
#include "src/Eigenvalues/Tridiagonalization.h" #include "src/Eigenvalues/Tridiagonalization.h"
#include "src/Eigenvalues/RealSchur.h" #include "src/Eigenvalues/RealSchur.h"
#include "src/Eigenvalues/EigenSolver.h" #include "src/Eigenvalues/EigenSolver.h"
@ -44,9 +45,10 @@
#include "src/Eigenvalues/GeneralizedEigenSolver.h" #include "src/Eigenvalues/GeneralizedEigenSolver.h"
#include "src/Eigenvalues/MatrixBaseEigenvalues.h" #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#include "src/Eigenvalues/RealSchur_MKL.h" #include "src/misc/lapacke.h"
#include "src/Eigenvalues/ComplexSchur_MKL.h" #include "src/Eigenvalues/RealSchur_LAPACKE.h"
#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h" #include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"
#endif #endif
#include "src/Core/util/ReenableStupidWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"

View File

@ -28,7 +28,8 @@
#include "src/LU/FullPivLU.h" #include "src/LU/FullPivLU.h"
#include "src/LU/PartialPivLU.h" #include "src/LU/PartialPivLU.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#include "src/LU/PartialPivLU_MKL.h" #include "src/misc/lapacke.h"
#include "src/LU/PartialPivLU_LAPACKE.h"
#endif #endif
#include "src/LU/Determinant.h" #include "src/LU/Determinant.h"
#include "src/LU/InverseImpl.h" #include "src/LU/InverseImpl.h"

View File

@ -36,8 +36,9 @@
#include "src/QR/ColPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h"
#include "src/QR/CompleteOrthogonalDecomposition.h" #include "src/QR/CompleteOrthogonalDecomposition.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#include "src/QR/HouseholderQR_MKL.h" #include "src/misc/lapacke.h"
#include "src/QR/ColPivHouseholderQR_MKL.h" #include "src/QR/HouseholderQR_LAPACKE.h"
#include "src/QR/ColPivHouseholderQR_LAPACKE.h"
#endif #endif
#include "src/Core/util/ReenableStupidWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"

View File

@ -31,12 +31,14 @@
* \endcode * \endcode
*/ */
#include "src/misc/RealSvd2x2.h"
#include "src/SVD/UpperBidiagonalization.h" #include "src/SVD/UpperBidiagonalization.h"
#include "src/SVD/SVDBase.h" #include "src/SVD/SVDBase.h"
#include "src/SVD/JacobiSVD.h" #include "src/SVD/JacobiSVD.h"
#include "src/SVD/BDCSVD.h" #include "src/SVD/BDCSVD.h"
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
#include "src/SVD/JacobiSVD_MKL.h" #include "src/misc/lapacke.h"
#include "src/SVD/JacobiSVD_LAPACKE.h"
#endif #endif
#include "src/Core/util/ReenableStupidWarnings.h" #include "src/Core/util/ReenableStupidWarnings.h"

View File

@ -43,7 +43,7 @@ namespace Eigen { struct SluMatrix; }
* - class SuperLU: a supernodal sequential LU factorization. * - class SuperLU: a supernodal sequential LU factorization.
* - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods).
* *
* \warning This wrapper is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. * \warning This wrapper requires at least versions 4.0 of SuperLU. The 3.x versions are not supported.
* *
* \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
* *

View File

@ -1,7 +0,0 @@
file(GLOB Eigen_src_subdirectories "*")
escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
foreach(f ${Eigen_src_subdirectories})
if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" )
add_subdirectory(${f})
endif()
endforeach()

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Cholesky_SRCS "*.h")
INSTALL(FILES
${Eigen_Cholesky_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky COMPONENT Devel
)

View File

@ -43,6 +43,8 @@ namespace internal {
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
* decomposition to determine whether a system of equations has a solution. * decomposition to determine whether a system of equations has a solution.
* *
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
*
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
*/ */
template<typename _MatrixType, int _UpLo> class LDLT template<typename _MatrixType, int _UpLo> class LDLT
@ -52,7 +54,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
enum { enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime, RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime,
Options = MatrixType::Options & ~RowMajorBit, // these are the options for the TmpMatrixType, we need a ColMajor matrix here!
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
UpLo = _UpLo UpLo = _UpLo
@ -61,7 +62,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::StorageIndex StorageIndex;
typedef Matrix<Scalar, RowsAtCompileTime, 1, Options, MaxRowsAtCompileTime, 1> TmpMatrixType; typedef Matrix<Scalar, RowsAtCompileTime, 1, 0, MaxRowsAtCompileTime, 1> TmpMatrixType;
typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType; typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType; typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
@ -97,6 +98,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
/** \brief Constructor with decomposition /** \brief Constructor with decomposition
* *
* This calculates the decomposition for the input \a matrix. * This calculates the decomposition for the input \a matrix.
*
* \sa LDLT(Index size) * \sa LDLT(Index size)
*/ */
template<typename InputType> template<typename InputType>
@ -110,6 +112,23 @@ template<typename _MatrixType, int _UpLo> class LDLT
compute(matrix.derived()); compute(matrix.derived());
} }
/** \brief Constructs a LDLT factorization from a given matrix
*
* This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
*
* \sa LDLT(const EigenBase&)
*/
template<typename InputType>
explicit LDLT(EigenBase<InputType>& matrix)
: m_matrix(matrix.derived()),
m_transpositions(matrix.rows()),
m_temporary(matrix.rows()),
m_sign(internal::ZeroSign),
m_isInitialized(false)
{
compute(matrix.derived());
}
/** Clear any existing decomposition /** Clear any existing decomposition
* \sa rankUpdate(w,sigma) * \sa rankUpdate(w,sigma)
*/ */
@ -234,7 +253,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
ComputationInfo info() const ComputationInfo info() const
{ {
eigen_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_isInitialized && "LDLT is not initialized.");
return Success; return m_info;
} }
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
@ -262,6 +281,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
TmpMatrixType m_temporary; TmpMatrixType m_temporary;
internal::SignMatrix m_sign; internal::SignMatrix m_sign;
bool m_isInitialized; bool m_isInitialized;
ComputationInfo m_info;
}; };
namespace internal { namespace internal {
@ -279,6 +299,8 @@ template<> struct ldlt_inplace<Lower>
typedef typename TranspositionType::StorageIndex IndexType; typedef typename TranspositionType::StorageIndex IndexType;
eigen_assert(mat.rows()==mat.cols()); eigen_assert(mat.rows()==mat.cols());
const Index size = mat.rows(); const Index size = mat.rows();
bool found_zero_pivot = false;
bool ret = true;
if (size <= 1) if (size <= 1)
{ {
@ -337,9 +359,27 @@ template<> struct ldlt_inplace<Lower>
// we should only make sure that we do not introduce INF or NaN values. // we should only make sure that we do not introduce INF or NaN values.
// Remark that LAPACK also uses 0 as the cutoff value. // Remark that LAPACK also uses 0 as the cutoff value.
RealScalar realAkk = numext::real(mat.coeffRef(k,k)); RealScalar realAkk = numext::real(mat.coeffRef(k,k));
if((rs>0) && (abs(realAkk) > RealScalar(0))) bool pivot_is_valid = (abs(realAkk) > RealScalar(0));
if(k==0 && !pivot_is_valid)
{
// The entire diagonal is zero, there is nothing more to do
// except filling the transpositions, and checking whether the matrix is zero.
sign = ZeroSign;
for(Index j = 0; j<size; ++j)
{
transpositions.coeffRef(j) = IndexType(j);
ret = ret && (mat.col(j).tail(size-j-1).array()==Scalar(0)).all();
}
return ret;
}
if((rs>0) && pivot_is_valid)
A21 /= realAkk; A21 /= realAkk;
if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
else if(!pivot_is_valid) found_zero_pivot = true;
if (sign == PositiveSemiDef) { if (sign == PositiveSemiDef) {
if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite; if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite;
} else if (sign == NegativeSemiDef) { } else if (sign == NegativeSemiDef) {
@ -350,7 +390,7 @@ template<> struct ldlt_inplace<Lower>
} }
} }
return true; return ret;
} }
// Reference for the algorithm: Davis and Hager, "Multiple Rank // Reference for the algorithm: Davis and Hager, "Multiple Rank
@ -474,7 +514,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputTyp
m_temporary.resize(size); m_temporary.resize(size);
m_sign = internal::ZeroSign; m_sign = internal::ZeroSign;
internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign); m_info = internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue;
m_isInitialized = true; m_isInitialized = true;
return *this; return *this;
@ -602,7 +642,6 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
return res; return res;
} }
#ifndef __CUDACC__
/** \cholesky_module /** \cholesky_module
* \returns the Cholesky decomposition with full pivoting without square root of \c *this * \returns the Cholesky decomposition with full pivoting without square root of \c *this
* \sa MatrixBase::ldlt() * \sa MatrixBase::ldlt()
@ -624,7 +663,6 @@ MatrixBase<Derived>::ldlt() const
{ {
return LDLT<PlainObject>(derived()); return LDLT<PlainObject>(derived());
} }
#endif // __CUDACC__
} // end namespace Eigen } // end namespace Eigen

View File

@ -41,6 +41,8 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* Example: \include LLT_example.cpp * Example: \include LLT_example.cpp
* Output: \verbinclude LLT_example.out * Output: \verbinclude LLT_example.out
* *
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
*
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
*/ */
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH) /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
@ -54,7 +56,6 @@ template<typename _MatrixType, int _UpLo> class LLT
enum { enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime, RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime,
Options = MatrixType::Options,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
}; };
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
@ -95,6 +96,21 @@ template<typename _MatrixType, int _UpLo> class LLT
compute(matrix.derived()); compute(matrix.derived());
} }
/** \brief Constructs a LDLT factorization from a given matrix
*
* This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when
* \c MatrixType is a Eigen::Ref.
*
* \sa LLT(const EigenBase&)
*/
template<typename InputType>
explicit LLT(EigenBase<InputType>& matrix)
: m_matrix(matrix.derived()),
m_isInitialized(false)
{
compute(matrix.derived());
}
/** \returns a view of the upper triangular matrix U */ /** \returns a view of the upper triangular matrix U */
inline typename Traits::MatrixU matrixU() const inline typename Traits::MatrixU matrixU() const
{ {
@ -491,7 +507,6 @@ MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
return matrixL() * matrixL().adjoint().toDenseMatrix(); return matrixL() * matrixL().adjoint().toDenseMatrix();
} }
#ifndef __CUDACC__
/** \cholesky_module /** \cholesky_module
* \returns the LLT decomposition of \c *this * \returns the LLT decomposition of \c *this
* \sa SelfAdjointView::llt() * \sa SelfAdjointView::llt()
@ -513,7 +528,6 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
{ {
return LLT<PlainObject,UpLo>(m_matrix); return LLT<PlainObject,UpLo>(m_matrix);
} }
#endif // __CUDACC__
} // end namespace Eigen } // end namespace Eigen

View File

@ -25,25 +25,22 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************** ********************************************************************************
* Content : Eigen bindings to Intel(R) MKL * Content : Eigen bindings to LAPACKe
* LLt decomposition based on LAPACKE_?potrf function. * LLt decomposition based on LAPACKE_?potrf function.
******************************************************************************** ********************************************************************************
*/ */
#ifndef EIGEN_LLT_MKL_H #ifndef EIGEN_LLT_LAPACKE_H
#define EIGEN_LLT_MKL_H #define EIGEN_LLT_LAPACKE_H
#include "Eigen/src/Core/util/MKL_support.h"
#include <iostream>
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
template<typename Scalar> struct mkl_llt; template<typename Scalar> struct lapacke_llt;
#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \ #define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \
template<> struct mkl_llt<EIGTYPE> \ template<> struct lapacke_llt<EIGTYPE> \
{ \ { \
template<typename MatrixType> \ template<typename MatrixType> \
static inline Index potrf(MatrixType& m, char uplo) \ static inline Index potrf(MatrixType& m, char uplo) \
@ -53,13 +50,13 @@ template<> struct mkl_llt<EIGTYPE> \
EIGTYPE* a; \ EIGTYPE* a; \
eigen_assert(m.rows()==m.cols()); \ eigen_assert(m.rows()==m.cols()); \
/* Set up parameters for ?potrf */ \ /* Set up parameters for ?potrf */ \
size = m.rows(); \ size = convert_index<lapack_int>(m.rows()); \
StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \ StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
a = &(m.coeffRef(0,0)); \ a = &(m.coeffRef(0,0)); \
lda = m.outerStride(); \ lda = convert_index<lapack_int>(m.outerStride()); \
\ \
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \ info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \
info = (info==0) ? -1 : info>0 ? info-1 : size; \ info = (info==0) ? -1 : info>0 ? info-1 : size; \
return info; \ return info; \
} \ } \
@ -69,7 +66,7 @@ template<> struct llt_inplace<EIGTYPE, Lower> \
template<typename MatrixType> \ template<typename MatrixType> \
static Index blocked(MatrixType& m) \ static Index blocked(MatrixType& m) \
{ \ { \
return mkl_llt<EIGTYPE>::potrf(m, 'L'); \ return lapacke_llt<EIGTYPE>::potrf(m, 'L'); \
} \ } \
template<typename MatrixType, typename VectorType> \ template<typename MatrixType, typename VectorType> \
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
@ -80,7 +77,7 @@ template<> struct llt_inplace<EIGTYPE, Upper> \
template<typename MatrixType> \ template<typename MatrixType> \
static Index blocked(MatrixType& m) \ static Index blocked(MatrixType& m) \
{ \ { \
return mkl_llt<EIGTYPE>::potrf(m, 'U'); \ return lapacke_llt<EIGTYPE>::potrf(m, 'U'); \
} \ } \
template<typename MatrixType, typename VectorType> \ template<typename MatrixType, typename VectorType> \
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
@ -90,13 +87,13 @@ template<> struct llt_inplace<EIGTYPE, Upper> \
} \ } \
}; };
EIGEN_MKL_LLT(double, double, d) EIGEN_LAPACKE_LLT(double, double, d)
EIGEN_MKL_LLT(float, float, s) EIGEN_LAPACKE_LLT(float, float, s)
EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z) EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z)
EIGEN_MKL_LLT(scomplex, MKL_Complex8, c) EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c)
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_LLT_MKL_H #endif // EIGEN_LLT_LAPACKE_H

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
INSTALL(FILES
${Eigen_CholmodSupport_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
)

View File

@ -37,7 +37,7 @@ struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : tra
* storage layout. * storage layout.
* *
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
* *
* \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
*/ */
@ -147,9 +147,9 @@ class Array
} }
#endif #endif
#ifdef EIGEN_HAVE_RVALUE_REFERENCES #if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
Array(Array&& other) Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
: Base(std::move(other)) : Base(std::move(other))
{ {
Base::_check_template_params(); Base::_check_template_params();
@ -157,7 +157,7 @@ class Array
Base::_set_noalias(other); Base::_set_noalias(other);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
Array& operator=(Array&& other) Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
{ {
other.swap(*this); other.swap(*this);
return *this; return *this;

View File

@ -32,7 +32,7 @@ template<typename ExpressionType> class MatrixWrapper;
* \tparam Derived is the derived type, e.g., an array or an expression type. * \tparam Derived is the derived type, e.g., an array or an expression type.
* *
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN. * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
* *
* \sa class MatrixBase, \ref TopicClassHierarchy * \sa class MatrixBase, \ref TopicClassHierarchy
*/ */
@ -52,8 +52,6 @@ template<typename Derived> class ArrayBase
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DenseBase<Derived> Base; typedef DenseBase<Derived> Base;
using Base::operator*;
using Base::operator/;
using Base::RowsAtCompileTime; using Base::RowsAtCompileTime;
using Base::ColsAtCompileTime; using Base::ColsAtCompileTime;
using Base::SizeAtCompileTime; using Base::SizeAtCompileTime;
@ -89,6 +87,7 @@ template<typename Derived> class ArrayBase
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
#define EIGEN_DOC_UNARY_ADDONS(X,Y)
# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseUnaryOps.h"
# include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h"
# include "../plugins/ArrayCwiseUnaryOps.h" # include "../plugins/ArrayCwiseUnaryOps.h"
@ -99,6 +98,7 @@ template<typename Derived> class ArrayBase
# include EIGEN_ARRAYBASE_PLUGIN # include EIGEN_ARRAYBASE_PLUGIN
# endif # endif
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
#undef EIGEN_DOC_UNARY_ADDONS
/** Special case of the template operator=, in order to prevent the compiler /** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1) * from generating a default operator= (issue hit with g++ 4.1)
@ -178,7 +178,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>()); call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
return derived(); return derived();
} }
@ -191,7 +191,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
{ {
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>()); call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
return derived(); return derived();
} }
@ -217,7 +217,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
{ {
call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar>()); call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
return derived(); return derived();
} }

View File

@ -75,23 +75,24 @@ private:
DstIsRowMajor = DstFlags&RowMajorBit, DstIsRowMajor = DstFlags&RowMajorBit,
SrcIsRowMajor = SrcFlags&RowMajorBit, SrcIsRowMajor = SrcFlags&RowMajorBit,
StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
MightVectorize = StorageOrdersAgree MightVectorize = bool(StorageOrdersAgree)
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
&& (functor_traits<AssignFunc>::PacketAccess), && bool(functor_traits<AssignFunc>::PacketAccess),
MayInnerVectorize = MightVectorize MayInnerVectorize = MightVectorize
&& int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0 && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
&& int(JointAlignment)>=int(InnerRequiredAlignment), && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
&& ((int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll, /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */ so it's only good for large enough sizes. */
MaySliceVectorize = MightVectorize && DstHasDirectAccess MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*InnerPacketSize) && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
/* slice vectorization can be slow, so we only want it if the slices are big, which is /* slice vectorization can be slow, so we only want it if the slices are big, which is
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
in a fixed-size matrix */ in a fixed-size matrix
However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
}; };
public: public:
@ -116,9 +117,9 @@ private:
: 1, : 1,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
&& int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
MayUnrollInner = int(InnerSize) != Dynamic MayUnrollInner = int(InnerSize) != Dynamic
&& int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
}; };
public: public:
@ -130,11 +131,17 @@ public:
: int(NoUnrolling) : int(NoUnrolling)
) )
: int(Traversal) == int(LinearVectorizedTraversal) : int(Traversal) == int(LinearVectorizedTraversal)
? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(LinearRequiredAlignment)) ? int(CompleteUnrolling) ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
? int(CompleteUnrolling)
: int(NoUnrolling) ) : int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal) : int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) ) : int(NoUnrolling) )
#if EIGEN_UNALIGNED_VECTORIZE
: int(Traversal) == int(SliceVectorizedTraversal)
? ( bool(MayUnrollInner) ? int(InnerUnrolling)
: int(NoUnrolling) )
#endif
: int(NoUnrolling) : int(NoUnrolling)
}; };
@ -156,6 +163,7 @@ public:
EIGEN_DEBUG_VAR(InnerMaxSize) EIGEN_DEBUG_VAR(InnerMaxSize)
EIGEN_DEBUG_VAR(LinearPacketSize) EIGEN_DEBUG_VAR(LinearPacketSize)
EIGEN_DEBUG_VAR(InnerPacketSize) EIGEN_DEBUG_VAR(InnerPacketSize)
EIGEN_DEBUG_VAR(ActualPacketSize)
EIGEN_DEBUG_VAR(StorageOrdersAgree) EIGEN_DEBUG_VAR(StorageOrdersAgree)
EIGEN_DEBUG_VAR(MightVectorize) EIGEN_DEBUG_VAR(MightVectorize)
EIGEN_DEBUG_VAR(MayLinearize) EIGEN_DEBUG_VAR(MayLinearize)
@ -163,6 +171,7 @@ public:
EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize)
EIGEN_DEBUG_VAR(MaySliceVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize)
std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
EIGEN_DEBUG_VAR(UnrollingLimit) EIGEN_DEBUG_VAR(UnrollingLimit)
EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollCompletely)
EIGEN_DEBUG_VAR(MayUnrollInner) EIGEN_DEBUG_VAR(MayUnrollInner)
@ -256,13 +265,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
enum { enum {
outer = Index / DstXprType::InnerSizeAtCompileTime, outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime,
JointAlignment = Kernel::AssignmentTraits::JointAlignment, SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
DefaultAlignment = unpacket_traits<PacketType>::alignment DstAlignment = Kernel::AssignmentTraits::DstAlignment
}; };
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{ {
kernel.template assignPacketByOuterInner<DefaultAlignment, JointAlignment, PacketType>(outer, inner); kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
enum { NextIndex = Index + unpacket_traits<PacketType>::size }; enum { NextIndex = Index + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel); copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
} }
@ -274,23 +283,20 @@ struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
}; };
template<typename Kernel, int Index_, int Stop> template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
struct copy_using_evaluator_innervec_InnerUnrolling struct copy_using_evaluator_innervec_InnerUnrolling
{ {
typedef typename Kernel::PacketType PacketType; typedef typename Kernel::PacketType PacketType;
enum {
DefaultAlignment = unpacket_traits<PacketType>::alignment
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
{ {
kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, Index_); kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size }; enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer); copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
} }
}; };
template<typename Kernel, int Stop> template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop> struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
{ {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
}; };
@ -419,9 +425,10 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{ {
typedef typename Kernel::DstEvaluatorType::XprType DstXprType; typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
enum { size = DstXprType::SizeAtCompileTime, enum { size = DstXprType::SizeAtCompileTime,
packetSize = packet_traits<typename Kernel::Scalar>::size, packetSize =unpacket_traits<PacketType>::size,
alignedSize = (size/packetSize)*packetSize }; alignedSize = (size/packetSize)*packetSize };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel); copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
@ -438,7 +445,8 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
{ {
typedef typename Kernel::PacketType PacketType; typedef typename Kernel::PacketType PacketType;
enum { enum {
DefaultAlignment = unpacket_traits<PacketType>::alignment SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
DstAlignment = Kernel::AssignmentTraits::DstAlignment
}; };
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{ {
@ -447,7 +455,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
const Index packetSize = unpacket_traits<PacketType>::size; const Index packetSize = unpacket_traits<PacketType>::size;
for(Index outer = 0; outer < outerSize; ++outer) for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize) for(Index inner = 0; inner < innerSize; inner+=packetSize)
kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, inner); kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
} }
}; };
@ -467,9 +475,11 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{ {
typedef typename Kernel::DstEvaluatorType::XprType DstXprType; typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::AssignmentTraits Traits;
const Index outerSize = kernel.outerSize(); const Index outerSize = kernel.outerSize();
for(Index outer = 0; outer < outerSize; ++outer) for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer); copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
} }
}; };
@ -518,7 +528,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
: int(Kernel::AssignmentTraits::DstAlignment) : int(Kernel::AssignmentTraits::DstAlignment)
}; };
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0); const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0) if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
{ {
// the pointer is not aligend-on scalar, so alignment is not possible // the pointer is not aligend-on scalar, so alignment is not possible
return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel); return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
@ -549,6 +559,29 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
} }
}; };
#if EIGEN_UNALIGNED_VECTORIZE
template<typename Kernel>
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
{
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
enum { size = DstXprType::InnerSizeAtCompileTime,
packetSize =unpacket_traits<PacketType>::size,
vectorizableSize = (size/packetSize)*packetSize };
for(Index outer = 0; outer < kernel.outerSize(); ++outer)
{
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
}
}
};
#endif
/*************************************************************************** /***************************************************************************
* Part 4 : Generic dense assignment kernel * Part 4 : Generic dense assignment kernel
***************************************************************************/ ***************************************************************************/
@ -683,7 +716,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX
template<typename DstXprType, typename SrcXprType> template<typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
{ {
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>()); call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
} }
/*************************************************************************** /***************************************************************************
@ -705,7 +738,7 @@ template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Ki
// This is the main assignment class // This is the main assignment class
template< typename DstXprType, typename SrcXprType, typename Functor, template< typename DstXprType, typename SrcXprType, typename Functor,
typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind, typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
typename Scalar = typename DstXprType::Scalar> typename EnableIf = void>
struct Assignment; struct Assignment;
@ -718,13 +751,13 @@ template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src) void call_assignment(Dst& dst, const Src& src)
{ {
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
} }
template<typename Dst, typename Src> template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(const Dst& dst, const Src& src) void call_assignment(const Dst& dst, const Src& src)
{ {
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
} }
// Deal with "assume-aliasing" // Deal with "assume-aliasing"
@ -783,7 +816,7 @@ template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias(Dst& dst, const Src& src) void call_assignment_no_alias(Dst& dst, const Src& src)
{ {
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
} }
template<typename Dst, typename Src, typename Func> template<typename Dst, typename Src, typename Func>
@ -805,15 +838,17 @@ template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
{ {
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
} }
// forward declaration // forward declaration
template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src); template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
// Generic Dense to Dense assignment // Generic Dense to Dense assignment
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> // Note that the last template argument "Weak" is needed to make it possible to perform
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar> // both partial specialization+SFINAE without ambiguous specialization
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
{ {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
@ -830,11 +865,13 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
// Generic assignment through evalTo. // Generic assignment through evalTo.
// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> // Note that the last template argument "Weak" is needed to make it possible to perform
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar> // both partial specialization+SFINAE without ambiguous specialization
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
{ {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{ {
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.evalTo(dst); src.evalTo(dst);

26
Eigen/src/Core/Assign_MKL.h Normal file → Executable file
View File

@ -81,10 +81,10 @@ class vml_assign_traits
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
template< typename DstXprType, typename SrcXprNested> \ template< typename DstXprType, typename SrcXprNested> \
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \ struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \ Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \ typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \ static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \ if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
@ -138,22 +138,24 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
template< typename DstXprType, typename SrcXprNested> \ template< typename DstXprType, typename SrcXprNested, typename Plain> \
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \ struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \ const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>, \
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \ Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \ typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.functor().m_exponent); \ VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \ if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
{ \ { \
VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent, \ VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
} else { \ } else { \
const Index outerSize = dst.outerSize(); \ const Index outerSize = dst.outerSize(); \
for(Index outer = 0; outer < outerSize; ++outer) { \ for(Index outer = 0; outer < outerSize; ++outer) { \
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \
&(src.nestedExpression().coeffRef(0, outer)); \ &(src.lhs().coeffRef(0, outer)); \
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \ VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \

View File

@ -1,11 +0,0 @@
FILE(GLOB Eigen_Core_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel
)
ADD_SUBDIRECTORY(products)
ADD_SUBDIRECTORY(util)
ADD_SUBDIRECTORY(arch)
ADD_SUBDIRECTORY(functors)

View File

@ -80,9 +80,7 @@ struct CommaInitializer
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
CommaInitializer& operator,(const DenseBase<OtherDerived>& other) CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
{ {
if(other.cols()==0 || other.rows()==0) if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows))
return *this;
if (m_col==m_xpr.cols())
{ {
m_row+=m_currentBlockRows; m_row+=m_currentBlockRows;
m_col = 0; m_col = 0;
@ -90,15 +88,11 @@ struct CommaInitializer
eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
&& "Too many rows passed to comma initializer (operator<<)"); && "Too many rows passed to comma initializer (operator<<)");
} }
eigen_assert(m_col<m_xpr.cols() eigen_assert((m_col + other.cols() <= m_xpr.cols())
&& "Too many coefficients passed to comma initializer (operator<<)"); && "Too many coefficients passed to comma initializer (operator<<)");
eigen_assert(m_currentBlockRows==other.rows()); eigen_assert(m_currentBlockRows==other.rows());
if (OtherDerived::SizeAtCompileTime != Dynamic) m_xpr.template block<OtherDerived::RowsAtCompileTime, OtherDerived::ColsAtCompileTime>
m_xpr.template block<OtherDerived::RowsAtCompileTime != Dynamic ? OtherDerived::RowsAtCompileTime : 1, (m_row, m_col, other.rows(), other.cols()) = other;
OtherDerived::ColsAtCompileTime != Dynamic ? OtherDerived::ColsAtCompileTime : 1>
(m_row, m_col) = other;
else
m_xpr.block(m_row, m_col, other.rows(), other.cols()) = other;
m_col += other.cols(); m_col += other.cols();
return *this; return *this;
} }
@ -109,9 +103,7 @@ struct CommaInitializer
EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception)
#endif #endif
{ {
eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows() finished();
&& m_col == m_xpr.cols()
&& "Too few coefficients passed to comma initializer (operator<<)");
} }
/** \returns the built matrix once all its coefficients have been set. /** \returns the built matrix once all its coefficients have been set.
@ -122,7 +114,12 @@ struct CommaInitializer
* \endcode * \endcode
*/ */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline XprType& finished() { return m_xpr; } inline XprType& finished() {
eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0)
&& m_col == m_xpr.cols()
&& "Too few coefficients passed to comma initializer (operator<<)");
return m_xpr;
}
XprType& m_xpr; // target expression XprType& m_xpr; // target expression
Index m_row; // current row id Index m_row; // current row id

View File

@ -32,33 +32,6 @@ struct rcond_compute_sign<Vector, Vector, false> {
} }
}; };
/** \brief Reciprocal condition number estimator.
*
* Computing a decomposition of a dense matrix takes O(n^3) operations, while
* this method estimates the condition number quickly and reliably in O(n^2)
* operations.
*
* \returns an estimate of the reciprocal condition number
* (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and
* its decomposition. Supports the following decompositions: FullPivLU,
* PartialPivLU, LDLT, and LLT.
*
* \sa FullPivLU, PartialPivLU, LDLT, LLT.
*/
template <typename Decomposition>
typename Decomposition::RealScalar
rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec)
{
typedef typename Decomposition::RealScalar RealScalar;
eigen_assert(dec.rows() == dec.cols());
if (dec.rows() == 0) return RealScalar(1);
if (matrix_norm == RealScalar(0)) return RealScalar(0);
if (dec.rows() == 1) return RealScalar(1);
const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)
: (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
}
/** /**
* \returns an estimate of ||inv(matrix)||_1 given a decomposition of * \returns an estimate of ||inv(matrix)||_1 given a decomposition of
* \a matrix that implements .solve() and .adjoint().solve() methods. * \a matrix that implements .solve() and .adjoint().solve() methods.
@ -94,7 +67,15 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp
if (n == 0) if (n == 0)
return 0; return 0;
// Disable Index to float conversion warning
#ifdef __INTEL_COMPILER
#pragma warning push
#pragma warning ( disable : 2259 )
#endif
Vector v = dec.solve(Vector::Ones(n) / Scalar(n)); Vector v = dec.solve(Vector::Ones(n) / Scalar(n));
#ifdef __INTEL_COMPILER
#pragma warning pop
#endif
// lower_bound is a lower bound on // lower_bound is a lower bound on
// ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1 // ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1
@ -151,7 +132,8 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp
// Hager's algorithm to vastly underestimate ||matrix||_1. // Hager's algorithm to vastly underestimate ||matrix||_1.
Scalar alternating_sign(RealScalar(1)); Scalar alternating_sign(RealScalar(1));
for (Index i = 0; i < n; ++i) { for (Index i = 0; i < n; ++i) {
v[i] = alternating_sign * (RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1)))); // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates
v[i] = alternating_sign * static_cast<RealScalar>(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1))));
alternating_sign = -alternating_sign; alternating_sign = -alternating_sign;
} }
v = dec.solve(v); v = dec.solve(v);
@ -159,6 +141,33 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp
return numext::maxi(lower_bound, alternate_lower_bound); return numext::maxi(lower_bound, alternate_lower_bound);
} }
/** \brief Reciprocal condition number estimator.
*
* Computing a decomposition of a dense matrix takes O(n^3) operations, while
* this method estimates the condition number quickly and reliably in O(n^2)
* operations.
*
* \returns an estimate of the reciprocal condition number
* (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and
* its decomposition. Supports the following decompositions: FullPivLU,
* PartialPivLU, LDLT, and LLT.
*
* \sa FullPivLU, PartialPivLU, LDLT, LLT.
*/
template <typename Decomposition>
typename Decomposition::RealScalar
rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec)
{
typedef typename Decomposition::RealScalar RealScalar;
eigen_assert(dec.rows() == dec.cols());
if (dec.rows() == 0) return RealScalar(1);
if (matrix_norm == RealScalar(0)) return RealScalar(0);
if (dec.rows() == 1) return RealScalar(1);
const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)
: (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
}
} // namespace internal } // namespace internal
} // namespace Eigen } // namespace Eigen

View File

@ -41,10 +41,19 @@ template<> struct storage_kind_to_shape<TranspositionsStorage> { typedef Transp
// We currently distinguish the following kind of evaluators: // We currently distinguish the following kind of evaluators:
// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) // - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate)
// - binary_evaluator for expression taking two arguments (CwiseBinaryOp) // - binary_evaluator for expression taking two arguments (CwiseBinaryOp)
// - ternary_evaluator for expression taking three arguments (CwiseTernaryOp)
// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. // - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching.
// - mapbase_evaluator for Map, Block, Ref // - mapbase_evaluator for Map, Block, Ref
// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) // - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator)
template< typename T,
typename Arg1Kind = typename evaluator_traits<typename T::Arg1>::Kind,
typename Arg2Kind = typename evaluator_traits<typename T::Arg2>::Kind,
typename Arg3Kind = typename evaluator_traits<typename T::Arg3>::Kind,
typename Arg1Scalar = typename traits<typename T::Arg1>::Scalar,
typename Arg2Scalar = typename traits<typename T::Arg2>::Scalar,
typename Arg3Scalar = typename traits<typename T::Arg3>::Scalar> struct ternary_evaluator;
template< typename T, template< typename T,
typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind, typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind,
typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind, typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind,
@ -328,6 +337,120 @@ protected:
// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. // Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator.
// Likewise, there is not need to more sophisticated dispatching here. // Likewise, there is not need to more sophisticated dispatching here.
template<typename Scalar,typename NullaryOp,
bool has_nullary = has_nullary_operator<NullaryOp>::value,
bool has_unary = has_unary_operator<NullaryOp>::value,
bool has_binary = has_binary_operator<NullaryOp>::value>
struct nullary_wrapper
{
template <typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); }
template <typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp<T>(i,j); }
template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
};
template<typename Scalar,typename NullaryOp>
struct nullary_wrapper<Scalar,NullaryOp,true,false,false>
{
template <typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); }
template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp<T>(); }
};
template<typename Scalar,typename NullaryOp>
struct nullary_wrapper<Scalar,NullaryOp,false,false,true>
{
template <typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }
template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp<T>(i,j); }
};
// We need the following specialization for vector-only functors assigned to a runtime vector,
// for instance, using linspace and assigning a RowVectorXd to a MatrixXd or even a row of a MatrixXd.
// In this case, i==0 and j is used for the actual iteration.
template<typename Scalar,typename NullaryOp>
struct nullary_wrapper<Scalar,NullaryOp,false,true,false>
{
template <typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
eigen_assert(i==0 || j==0);
return op(i+j);
}
template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
eigen_assert(i==0 || j==0);
return op.template packetOp<T>(i+j);
}
template <typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
template <typename T, typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
};
template<typename Scalar,typename NullaryOp>
struct nullary_wrapper<Scalar,NullaryOp,false,false,false> {};
#if 0 && EIGEN_COMP_MSVC>0
// Disable this ugly workaround. This is now handled in traits<Ref>::match,
// but this piece of code might still become handly if some other weird compilation
// erros pop up again.
// MSVC exhibits a weird compilation error when
// compiling:
// Eigen::MatrixXf A = MatrixXf::Random(3,3);
// Ref<const MatrixXf> R = 2.f*A;
// and that has_*ary_operator<scalar_constant_op<float>> have not been instantiated yet.
// The "problem" is that evaluator<2.f*A> is instantiated by traits<Ref>::match<2.f*A>
// and at that time has_*ary_operator<T> returns true regardless of T.
// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>.
// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(),
// and packet() are really instantiated as implemented below:
// This is a simple wrapper around Index to enforce the re-instantiation of
// has_*ary_operator when needed.
template<typename T> struct nullary_wrapper_workaround_msvc {
nullary_wrapper_workaround_msvc(const T&);
operator T()const;
};
template<typename Scalar,typename NullaryOp>
struct nullary_wrapper<Scalar,NullaryOp,true,true,true>
{
template <typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
return nullary_wrapper<Scalar,NullaryOp,
has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i,j);
}
template <typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const {
return nullary_wrapper<Scalar,NullaryOp,
has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i);
}
template <typename T, typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
return nullary_wrapper<Scalar,NullaryOp,
has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i,j);
}
template <typename T, typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const {
return nullary_wrapper<Scalar,NullaryOp,
has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i);
}
};
#endif // MSVC workaround
template<typename NullaryOp, typename PlainObjectType> template<typename NullaryOp, typename PlainObjectType>
struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> > struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
: evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> > : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
@ -347,41 +470,44 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
}; };
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
: m_functor(n.functor()) : m_functor(n.functor()), m_wrapper()
{ {
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
} }
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
template <typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const CoeffReturnType coeff(IndexType row, IndexType col) const
{ {
return m_functor(row, col); return m_wrapper(m_functor, row, col);
} }
template <typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const CoeffReturnType coeff(IndexType index) const
{ {
return m_functor(index); return m_wrapper(m_functor,index);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType, typename IndexType>
EIGEN_STRONG_INLINE EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(IndexType row, IndexType col) const
{ {
return m_functor.template packetOp<Index,PacketType>(row, col); return m_wrapper.template packetOp<PacketType>(m_functor, row, col);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType, typename IndexType>
EIGEN_STRONG_INLINE EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(IndexType index) const
{ {
return m_functor.template packetOp<Index,PacketType>(index); return m_wrapper.template packetOp<PacketType>(m_functor, index);
} }
protected: protected:
const NullaryOp m_functor; const NullaryOp m_functor;
const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
}; };
// -------------------- CwiseUnaryOp -------------------- // -------------------- CwiseUnaryOp --------------------
@ -442,6 +568,96 @@ protected:
evaluator<ArgType> m_argImpl; evaluator<ArgType> m_argImpl;
}; };
// -------------------- CwiseTernaryOp --------------------
// this is a ternary expression
template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
: public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
{
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
};
template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased, IndexBased>
: evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
{
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
enum {
CoeffReadCost = evaluator<Arg1>::CoeffReadCost + evaluator<Arg2>::CoeffReadCost + evaluator<Arg3>::CoeffReadCost + functor_traits<TernaryOp>::Cost,
Arg1Flags = evaluator<Arg1>::Flags,
Arg2Flags = evaluator<Arg2>::Flags,
Arg3Flags = evaluator<Arg3>::Flags,
SameType = is_same<typename Arg1::Scalar,typename Arg2::Scalar>::value && is_same<typename Arg1::Scalar,typename Arg3::Scalar>::value,
StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit),
Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & (
HereditaryBits
| (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) &
( (StorageOrdersAgree ? LinearAccessBit : 0)
| (functor_traits<TernaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
)
)
),
Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit),
Alignment = EIGEN_PLAIN_ENUM_MIN(
EIGEN_PLAIN_ENUM_MIN(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),
evaluator<Arg3>::Alignment)
};
EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr)
: m_functor(xpr.functor()),
m_arg1Impl(xpr.arg1()),
m_arg2Impl(xpr.arg2()),
m_arg3Impl(xpr.arg3())
{
EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost);
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{
return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col));
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{
return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
}
template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const
{
return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col),
m_arg2Impl.template packet<LoadMode,PacketType>(row, col),
m_arg3Impl.template packet<LoadMode,PacketType>(row, col));
}
template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index),
m_arg2Impl.template packet<LoadMode,PacketType>(index),
m_arg3Impl.template packet<LoadMode,PacketType>(index));
}
protected:
const TernaryOp m_functor;
evaluator<Arg1> m_arg1Impl;
evaluator<Arg2> m_arg2Impl;
evaluator<Arg3> m_arg3Impl;
};
// -------------------- CwiseBinaryOp -------------------- // -------------------- CwiseBinaryOp --------------------
// this is a binary expression // this is a binary expression
@ -604,7 +820,8 @@ struct mapbase_evaluator : evaluator_base<Derived>
EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
: m_data(const_cast<PointerType>(map.data())), : m_data(const_cast<PointerType>(map.data())),
m_xpr(map) m_innerStride(map.innerStride()),
m_outerStride(map.outerStride())
{ {
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1), EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
@ -614,32 +831,32 @@ struct mapbase_evaluator : evaluator_base<Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const CoeffReturnType coeff(Index row, Index col) const
{ {
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; return m_data[col * colStride() + row * rowStride()];
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const CoeffReturnType coeff(Index index) const
{ {
return m_data[index * m_xpr.innerStride()]; return m_data[index * m_innerStride.value()];
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col) Scalar& coeffRef(Index row, Index col)
{ {
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; return m_data[col * colStride() + row * rowStride()];
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index) Scalar& coeffRef(Index index)
{ {
return m_data[index * m_xpr.innerStride()]; return m_data[index * m_innerStride.value()];
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); PointerType ptr = m_data + row * rowStride() + col * colStride();
return internal::ploadt<PacketType, LoadMode>(ptr); return internal::ploadt<PacketType, LoadMode>(ptr);
} }
@ -647,14 +864,14 @@ struct mapbase_evaluator : evaluator_base<Derived>
EIGEN_STRONG_INLINE EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride()); return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); PointerType ptr = m_data + row * rowStride() + col * colStride();
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x); return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
} }
@ -662,12 +879,17 @@ struct mapbase_evaluator : evaluator_base<Derived>
EIGEN_STRONG_INLINE EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x); internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
} }
protected: protected:
EIGEN_DEVICE_FUNC
inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
EIGEN_DEVICE_FUNC
inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
PointerType m_data; PointerType m_data;
const XprType& m_xpr; const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
}; };
template<typename PlainObjectType, int MapOptions, typename StrideType> template<typename PlainObjectType, int MapOptions, typename StrideType>
@ -755,9 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
? int(outer_stride_at_compile_time<ArgType>::ret) ? int(outer_stride_at_compile_time<ArgType>::ret)
: int(inner_stride_at_compile_time<ArgType>::ret), : int(inner_stride_at_compile_time<ArgType>::ret),
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0) MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit, FlagsRowMajorBit = XprType::Flags&RowMajorBit,
@ -884,7 +1104,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block) : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
{ {
// TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned"); eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
} }
}; };
@ -1325,7 +1545,7 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
enum { enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost, CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit, Flags = (unsigned int)(evaluator<ArgType>::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit,
Alignment = 0 Alignment = 0
}; };

View File

@ -160,7 +160,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other) MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>()); call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
return derived(); return derived();
} }
@ -173,7 +173,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other) MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
{ {
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>()); call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
return derived(); return derived();
} }

View File

@ -20,7 +20,8 @@ struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectT
Flags = traits<PlainObjectType>::Flags & RowMajorBit Flags = traits<PlainObjectType>::Flags & RowMajorBit
}; };
}; };
}
} // namespace internal
/** \class CwiseNullaryOp /** \class CwiseNullaryOp
* \ingroup Core_Module * \ingroup Core_Module
@ -37,7 +38,23 @@ struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectT
* However, if you want to write a function returning such an expression, you * However, if you want to write a function returning such an expression, you
* will need to use this class. * will need to use this class.
* *
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr() * The functor NullaryOp must expose one of the following method:
<table class="manual">
<tr ><td>\c operator()() </td><td>if the procedural generation does not depend on the coefficient entries (e.g., random numbers)</td></tr>
<tr class="alt"><td>\c operator()(Index i)</td><td>if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace) </td></tr>
<tr ><td>\c operator()(Index i,Index j)</td><td>if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1)</td></tr>
</table>
* It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors.
*
* See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding
* C++11 random number generators.
*
* A nullary expression can also be used to implement custom sophisticated matrix manipulations
* that cannot be covered by the existing set of natively supported matrix manipulations.
* See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations
* on the behavior of CwiseNullaryOp.
*
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr
*/ */
template<typename NullaryOp, typename PlainObjectType> template<typename NullaryOp, typename PlainObjectType>
class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
@ -62,30 +79,6 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); } EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
{
return m_functor(rowId, colId);
}
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
{
return m_functor.packetOp(rowId, colId);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
{
return m_functor(index);
}
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
{
return m_functor.packetOp(index);
}
/** \returns the functor representing the nullary operation */ /** \returns the functor representing the nullary operation */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const NullaryOp& functor() const { return m_functor; } const NullaryOp& functor() const { return m_functor; }
@ -227,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
* *
* The function generates 'size' equally spaced values in the closed interval [low,high]. * The function generates 'size' equally spaced values in the closed interval [low,high].
* This particular version of LinSpaced() uses sequential access, i.e. vector access is * This particular version of LinSpaced() uses sequential access, i.e. vector access is
* assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization * assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization
* and yields faster code than the random access version. * and yields faster code than the random access version.
* *
* When size is set to 1, a vector of length 1 containing 'high' is returned. * When size is set to 1, a vector of length 1 containing 'high' is returned.
@ -396,7 +389,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
/** /**
* \brief Sets a linearly spaced vector. * \brief Sets a linearly spaced vector.
* *
* The function fill *this with equally spaced values in the closed interval [low,high]. * The function fills *this with equally spaced values in the closed interval [low,high].
* When size is set to 1, a vector of length 1 containing 'high' is returned. * When size is set to 1, a vector of length 1 containing 'high' is returned.
* *
* \only_for_vectors * \only_for_vectors

View File

@ -0,0 +1,197 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CWISE_TERNARY_OP_H
#define EIGEN_CWISE_TERNARY_OP_H
namespace Eigen {
namespace internal {
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
// we must not inherit from traits<Arg1> since it has
// the potential to cause problems with MSVC
typedef typename remove_all<Arg1>::type Ancestor;
typedef typename traits<Ancestor>::XprKind XprKind;
enum {
RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
};
// even though we require Arg1, Arg2, and Arg3 to have the same scalar type
// (see CwiseTernaryOp constructor),
// we still want to handle the case when the result type is different.
typedef typename result_of<TernaryOp(
const typename Arg1::Scalar&, const typename Arg2::Scalar&,
const typename Arg3::Scalar&)>::type Scalar;
typedef typename internal::traits<Arg1>::StorageKind StorageKind;
typedef typename internal::traits<Arg1>::StorageIndex StorageIndex;
typedef typename Arg1::Nested Arg1Nested;
typedef typename Arg2::Nested Arg2Nested;
typedef typename Arg3::Nested Arg3Nested;
typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
enum { Flags = _Arg1Nested::Flags & RowMajorBit };
};
} // end namespace internal
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
typename StorageKind>
class CwiseTernaryOpImpl;
/** \class CwiseTernaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise ternary operator is
* applied to two expressions
*
* \tparam TernaryOp template functor implementing the operator
* \tparam Arg1Type the type of the first argument
* \tparam Arg2Type the type of the second argument
* \tparam Arg3Type the type of the third argument
*
* This class represents an expression where a coefficient-wise ternary
* operator is applied to three expressions.
* It is the return type of ternary operators, by which we mean only those
* ternary operators where
* all three arguments are Eigen expressions.
* For example, the return type of betainc(matrix1, matrix2, matrix3) is a
* CwiseTernaryOp.
*
* Most of the time, this is the only way that it is used, so you typically
* don't have to name
* CwiseTernaryOp types explicitly.
*
* \sa MatrixBase::ternaryExpr(const MatrixBase<Argument2> &, const
* MatrixBase<Argument3> &, const CustomTernaryOp &) const, class CwiseBinaryOp,
* class CwiseUnaryOp, class CwiseNullaryOp
*/
template <typename TernaryOp, typename Arg1Type, typename Arg2Type,
typename Arg3Type>
class CwiseTernaryOp : public CwiseTernaryOpImpl<
TernaryOp, Arg1Type, Arg2Type, Arg3Type,
typename internal::traits<Arg1Type>::StorageKind>,
internal::no_assignment_operator
{
public:
typedef typename internal::remove_all<Arg1Type>::type Arg1;
typedef typename internal::remove_all<Arg2Type>::type Arg2;
typedef typename internal::remove_all<Arg3Type>::type Arg3;
typedef typename CwiseTernaryOpImpl<
TernaryOp, Arg1Type, Arg2Type, Arg3Type,
typename internal::traits<Arg1Type>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp)
typedef typename internal::ref_selector<Arg1Type>::type Arg1Nested;
typedef typename internal::ref_selector<Arg2Type>::type Arg2Nested;
typedef typename internal::ref_selector<Arg3Type>::type Arg3Nested;
typedef typename internal::remove_reference<Arg1Nested>::type _Arg1Nested;
typedef typename internal::remove_reference<Arg2Nested>::type _Arg2Nested;
typedef typename internal::remove_reference<Arg3Nested>::type _Arg3Nested;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2,
const Arg3& a3,
const TernaryOp& func = TernaryOp())
: m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) {
// require the sizes to match
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
// The index types should match
EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
typename internal::traits<Arg2Type>::StorageKind>::value),
STORAGE_KIND_MUST_MATCH)
EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
typename internal::traits<Arg3Type>::StorageKind>::value),
STORAGE_KIND_MUST_MATCH)
eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() &&
a1.rows() == a3.rows() && a1.cols() == a3.cols());
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index rows() const {
// return the fixed size type if available to enable compile time
// optimizations
if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
RowsAtCompileTime == Dynamic &&
internal::traits<typename internal::remove_all<Arg2Nested>::type>::
RowsAtCompileTime == Dynamic)
return m_arg3.rows();
else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
RowsAtCompileTime == Dynamic &&
internal::traits<typename internal::remove_all<Arg3Nested>::type>::
RowsAtCompileTime == Dynamic)
return m_arg2.rows();
else
return m_arg1.rows();
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index cols() const {
// return the fixed size type if available to enable compile time
// optimizations
if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
ColsAtCompileTime == Dynamic &&
internal::traits<typename internal::remove_all<Arg2Nested>::type>::
ColsAtCompileTime == Dynamic)
return m_arg3.cols();
else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
ColsAtCompileTime == Dynamic &&
internal::traits<typename internal::remove_all<Arg3Nested>::type>::
ColsAtCompileTime == Dynamic)
return m_arg2.cols();
else
return m_arg1.cols();
}
/** \returns the first argument nested expression */
EIGEN_DEVICE_FUNC
const _Arg1Nested& arg1() const { return m_arg1; }
/** \returns the first argument nested expression */
EIGEN_DEVICE_FUNC
const _Arg2Nested& arg2() const { return m_arg2; }
/** \returns the third argument nested expression */
EIGEN_DEVICE_FUNC
const _Arg3Nested& arg3() const { return m_arg3; }
/** \returns the functor representing the ternary operation */
EIGEN_DEVICE_FUNC
const TernaryOp& functor() const { return m_functor; }
protected:
Arg1Nested m_arg1;
Arg2Nested m_arg2;
Arg3Nested m_arg3;
const TernaryOp m_functor;
};
// Generic API dispatcher
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
typename StorageKind>
class CwiseTernaryOpImpl
: public internal::generic_xpr_base<
CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type {
public:
typedef typename internal::generic_xpr_base<
CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type Base;
};
} // end namespace Eigen
#endif // EIGEN_CWISE_TERNARY_OP_H

View File

@ -34,17 +34,15 @@ static inline void check_DenseIndex_is_signed() {
* \tparam Derived is the derived type, e.g., a matrix type or an expression. * \tparam Derived is the derived type, e.g., a matrix type or an expression.
* *
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
* *
* \sa \blank \ref TopicClassHierarchy * \sa \blank \ref TopicClassHierarchy
*/ */
template<typename Derived> class DenseBase template<typename Derived> class DenseBase
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
: public internal::special_scalar_op_base<Derived, typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real,
DenseCoeffsBase<Derived> >
#else
: public DenseCoeffsBase<Derived> : public DenseCoeffsBase<Derived>
#else
: public DenseCoeffsBase<Derived,DirectWriteAccessors>
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
{ {
public: public:
@ -73,10 +71,8 @@ template<typename Derived> class DenseBase
typedef Scalar value_type; typedef Scalar value_type;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef internal::special_scalar_op_base<Derived,Scalar,RealScalar, DenseCoeffsBase<Derived> > Base; typedef DenseCoeffsBase<Derived> Base;
using Base::operator*;
using Base::operator/;
using Base::derived; using Base::derived;
using Base::const_cast_derived; using Base::const_cast_derived;
using Base::rows; using Base::rows;
@ -562,12 +558,15 @@ template<typename Derived> class DenseBase
EIGEN_DEVICE_FUNC void reverseInPlace(); EIGEN_DEVICE_FUNC void reverseInPlace();
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
# include "../plugins/BlockMethods.h" # include "../plugins/BlockMethods.h"
# ifdef EIGEN_DENSEBASE_PLUGIN # ifdef EIGEN_DENSEBASE_PLUGIN
# include EIGEN_DENSEBASE_PLUGIN # include EIGEN_DENSEBASE_PLUGIN
# endif # endif
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
// disable the use of evalTo for dense objects with a nice compilation error // disable the use of evalTo for dense objects with a nice compilation error
template<typename Dest> template<typename Dest>

View File

@ -67,13 +67,13 @@ struct plain_array
template<typename PtrType> template<typename PtrType>
EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; } EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \ eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
&& "this assertion is explained here: " \ && "this assertion is explained here: " \
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
" **** READ THIS WEB PAGE !!! ****"); " **** READ THIS WEB PAGE !!! ****");
#else #else
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
eigen_assert((reinterpret_cast<size_t>(array) & (sizemask)) == 0 \ eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
&& "this assertion is explained here: " \ && "this assertion is explained here: " \
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
" **** READ THIS WEB PAGE !!! ****"); " **** READ THIS WEB PAGE !!! ****");
@ -362,9 +362,9 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
} }
return *this; return *this;
} }
#ifdef EIGEN_HAVE_RVALUE_REFERENCES #if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
DenseStorage(DenseStorage&& other) DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
: m_data(std::move(other.m_data)) : m_data(std::move(other.m_data))
, m_rows(std::move(other.m_rows)) , m_rows(std::move(other.m_rows))
, m_cols(std::move(other.m_cols)) , m_cols(std::move(other.m_cols))
@ -374,7 +374,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
other.m_cols = 0; other.m_cols = 0;
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
DenseStorage& operator=(DenseStorage&& other) DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
{ {
using std::swap; using std::swap;
swap(m_data, other.m_data); swap(m_data, other.m_data);
@ -441,9 +441,9 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
} }
return *this; return *this;
} }
#ifdef EIGEN_HAVE_RVALUE_REFERENCES #if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
DenseStorage(DenseStorage&& other) DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
: m_data(std::move(other.m_data)) : m_data(std::move(other.m_data))
, m_cols(std::move(other.m_cols)) , m_cols(std::move(other.m_cols))
{ {
@ -451,7 +451,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
other.m_cols = 0; other.m_cols = 0;
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
DenseStorage& operator=(DenseStorage&& other) DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
{ {
using std::swap; using std::swap;
swap(m_data, other.m_data); swap(m_data, other.m_data);
@ -514,9 +514,9 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
} }
return *this; return *this;
} }
#ifdef EIGEN_HAVE_RVALUE_REFERENCES #if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
DenseStorage(DenseStorage&& other) DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
: m_data(std::move(other.m_data)) : m_data(std::move(other.m_data))
, m_rows(std::move(other.m_rows)) , m_rows(std::move(other.m_rows))
{ {
@ -524,7 +524,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
other.m_rows = 0; other.m_rows = 0;
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
DenseStorage& operator=(DenseStorage&& other) DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
{ {
using std::swap; using std::swap;
swap(m_data, other.m_data); swap(m_data, other.m_data);

View File

@ -71,18 +71,17 @@ class DiagonalBase : public EigenBase<Derived>
return InverseReturnType(diagonal().cwiseInverse()); return InverseReturnType(diagonal().cwiseInverse());
} }
typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> > ScalarMultipleReturnType;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const ScalarMultipleReturnType inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >
operator*(const Scalar& scalar) const operator*(const Scalar& scalar) const
{ {
return ScalarMultipleReturnType(diagonal() * scalar); return DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >(diagonal() * scalar);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
friend inline const ScalarMultipleReturnType friend inline const DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >
operator*(const Scalar& scalar, const DiagonalBase& other) operator*(const Scalar& scalar, const DiagonalBase& other)
{ {
return ScalarMultipleReturnType(other.diagonal() * scalar); return DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >(scalar * other.diagonal());
} }
}; };
@ -317,19 +316,19 @@ struct Diagonal2Dense {};
template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; }; template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; };
// Diagonal matrix to Dense assignment // Diagonal matrix to Dense assignment
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> template< typename DstXprType, typename SrcXprType, typename Functor>
struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense, Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
{ {
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{ {
dst.setZero(); dst.setZero();
dst.diagonal() = src.diagonal(); dst.diagonal() = src.diagonal();
} }
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar> &/*func*/) static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{ dst.diagonal() += src.diagonal(); } { dst.diagonal() += src.diagonal(); }
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar> &/*func*/) static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{ dst.diagonal() -= src.diagonal(); } { dst.diagonal() -= src.diagonal(); }
}; };

View File

@ -28,22 +28,24 @@ template<typename T, typename U,
> >
struct dot_nocheck struct dot_nocheck
{ {
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar; typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
typedef typename conj_prod::result_type ResScalar;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{ {
return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum(); return a.template binaryExpr<conj_prod>(b).sum();
} }
}; };
template<typename T, typename U> template<typename T, typename U>
struct dot_nocheck<T, U, true> struct dot_nocheck<T, U, true>
{ {
typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar; typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
typedef typename conj_prod::result_type ResScalar;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{ {
return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum(); return a.transpose().template binaryExpr<conj_prod>(b).sum();
} }
}; };
@ -62,7 +64,7 @@ struct dot_nocheck<T, U, true>
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -227,9 +229,12 @@ struct lpNorm_selector<Derived, 2>
template<typename Derived> template<typename Derived>
struct lpNorm_selector<Derived, Infinity> struct lpNorm_selector<Derived, Infinity>
{ {
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m) static inline RealScalar run(const MatrixBase<Derived>& m)
{ {
if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0))
return RealScalar(0);
return m.cwiseAbs().maxCoeff(); return m.cwiseAbs().maxCoeff();
} }
}; };
@ -240,6 +245,8 @@ struct lpNorm_selector<Derived, Infinity>
* of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ * of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
* norm, that is the maximum of the absolute values of the coefficients of \c *this. * norm, that is the maximum of the absolute values of the coefficients of \c *this.
* *
* In all cases, if \c *this is empty, then the value 0 is returned.
*
* \note For matrices, this function does not compute the <a href="https://en.wikipedia.org/wiki/Operator_norm">operator-norm</a>. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink. * \note For matrices, this function does not compute the <a href="https://en.wikipedia.org/wiki/Operator_norm">operator-norm</a>. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink.
* *
* \sa norm() * \sa norm()

View File

@ -138,7 +138,7 @@ template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other) Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>()); call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
return derived(); return derived();
} }
@ -146,7 +146,7 @@ template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other) Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>()); call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
return derived(); return derived();
} }

View File

@ -159,20 +159,20 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
template<typename Scalar,int Size,int MaxSize> template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true> struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{ {
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
enum { enum {
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable, ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
PacketSize = internal::packet_traits<Scalar>::size PacketSize = internal::packet_traits<Scalar>::size
}; };
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data; #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment return ForceAlignment
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
: m_data.array; : m_data.array;
} }
#endif #endif
@ -207,7 +207,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
typedef internal::blas_traits<Rhs> RhsBlasTraits; typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);

View File

@ -62,6 +62,7 @@ struct default_packet_traits
HasRsqrt = 0, HasRsqrt = 0,
HasExp = 0, HasExp = 0,
HasLog = 0, HasLog = 0,
HasLog1p = 0,
HasLog10 = 0, HasLog10 = 0,
HasPow = 0, HasPow = 0,
@ -82,6 +83,7 @@ struct default_packet_traits
HasErfc = 0, HasErfc = 0,
HasIGamma = 0, HasIGamma = 0,
HasIGammac = 0, HasIGammac = 0,
HasBetaInc = 0,
HasRound = 0, HasRound = 0,
HasFloor = 0, HasFloor = 0,
@ -304,7 +306,7 @@ template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a
// 32-bit pointer operand constraint for inlined asm // 32-bit pointer operand constraint for inlined asm
asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
#endif #endif
#elif !EIGEN_COMP_MSVC #elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
__builtin_prefetch(addr); __builtin_prefetch(addr);
#endif #endif
} }
@ -346,22 +348,6 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Pack
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
{ return a; } { return a; }
template<size_t offset, typename Packet>
struct protate_impl
{
// Empty so attempts to use this unimplemented path will fail to compile.
// Only specializations of this template should be used.
};
/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention,
* by the given offset, e.g. for offset == 1:
* (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1])
*/
template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a)
{
return offset ? protate_impl<offset, Packet>::run(a) : a;
}
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
{ {
@ -419,6 +405,10 @@ Packet pexp(const Packet& a) { using std::exp; return exp(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plog(const Packet& a) { using std::log; return log(a); } Packet plog(const Packet& a) { using std::log; return log(a); }
/** \internal \returns the log1p of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plog1p(const Packet& a) { return numext::log1p(a); }
/** \internal \returns the log10 of \a a (coeff-wise) */ /** \internal \returns the log10 of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plog10(const Packet& a) { using std::log10; return log10(a); } Packet plog10(const Packet& a) { using std::log10; return log10(a); }
@ -445,38 +435,6 @@ Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); }
/** \internal \returns the zeta function of two arguments (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); }
/** \internal \returns the polygamma function (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); }
/** \internal \returns the erf(\a a) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet perf(const Packet& a) { using numext::erf; return erf(a); }
/** \internal \returns the erfc(\a a) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); }
/*************************************************************************** /***************************************************************************
* The following functions might not have to be overwritten for vectorized types * The following functions might not have to be overwritten for vectorized types
***************************************************************************/ ***************************************************************************/

View File

@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library // This file is part of Eigen, a lightweight C++ template library
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2010-2012 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2010-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com> // Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
@ -11,13 +11,30 @@
#ifndef EIGEN_GLOBAL_FUNCTIONS_H #ifndef EIGEN_GLOBAL_FUNCTIONS_H
#define EIGEN_GLOBAL_FUNCTIONS_H #define EIGEN_GLOBAL_FUNCTIONS_H
#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \ #ifdef EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
/** \returns an expression of the coefficient-wise DOC_OP of \a x
DOC_DETAILS
\sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_##NAME">Math functions</a>, class CwiseUnaryOp
*/ \
template<typename Derived> \
inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
NAME(const Eigen::ArrayBase<Derived>& x);
#else
#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
template<typename Derived> \ template<typename Derived> \
inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \ inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
(NAME)(const Eigen::ArrayBase<Derived>& x) { \ (NAME)(const Eigen::ArrayBase<Derived>& x) { \
return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \ return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
} }
#endif // EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \ #define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \
\ \
template<typename Derived> \ template<typename Derived> \
@ -36,47 +53,68 @@
namespace Eigen namespace Eigen
{ {
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\sa ArrayBase::imag)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\sa ArrayBase::conjugate)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\sa ArrayBase::inverse)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\sa ArrayBase::sin)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\sa ArrayBase::cos)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\sa ArrayBase::tan)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\sa ArrayBase::atan)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\sa ArrayBase::asin)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\sa ArrayBase::acos)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(zeta,scalar_zeta_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(polygamma,scalar_polygamma_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign)
/** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
*
* \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar).
*
* \sa ArrayBase::pow()
*
* \relates ArrayBase
*/
#ifdef EIGEN_PARSED_BY_DOXYGEN
template<typename Derived,typename ScalarExponent>
inline const CwiseBinaryOp<internal::scalar_pow_op<Derived::Scalar,ScalarExponent>,Derived,Constant<ScalarExponent> >
pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
#else
template<typename Derived,typename ScalarExponent>
inline typename internal::enable_if< !(internal::is_same<typename Derived::Scalar,ScalarExponent>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent),
const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type
pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent) {
return x.derived().pow(exponent);
}
template<typename Derived> template<typename Derived>
inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived> inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow)
pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) { pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) {
return x.derived().pow(exponent); return x.derived().pow(exponent);
} }
#endif
/** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
* *
@ -86,12 +124,14 @@ namespace Eigen
* Output: \verbinclude Cwise_array_power_array.out * Output: \verbinclude Cwise_array_power_array.out
* *
* \sa ArrayBase::pow() * \sa ArrayBase::pow()
*
* \relates ArrayBase
*/ */
template<typename Derived,typename ExponentDerived> template<typename Derived,typename ExponentDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived> inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents) pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents)
{ {
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>( return Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
x.derived(), x.derived(),
exponents.derived() exponents.derived()
); );
@ -100,66 +140,39 @@ namespace Eigen
/** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
* *
* This function computes the coefficient-wise power between a scalar and an array of exponents. * This function computes the coefficient-wise power between a scalar and an array of exponents.
* Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same. *
* \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar).
* *
* Example: \include Cwise_scalar_power_array.cpp * Example: \include Cwise_scalar_power_array.cpp
* Output: \verbinclude Cwise_scalar_power_array.out * Output: \verbinclude Cwise_scalar_power_array.out
* *
* \sa ArrayBase::pow() * \sa ArrayBase::pow()
*
* \relates ArrayBase
*/ */
#ifdef EIGEN_PARSED_BY_DOXYGEN
template<typename Scalar,typename Derived>
inline const CwiseBinaryOp<internal::scalar_pow_op<Scalar,Derived::Scalar>,Constant<Scalar>,Derived>
pow(const Scalar& x,const Eigen::ArrayBase<Derived>& x);
#else
template<typename Scalar, typename Derived>
inline typename internal::enable_if< !(internal::is_same<typename Derived::Scalar,Scalar>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar),
const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type
pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
{
return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)(
typename internal::plain_constant_type<Derived,Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
}
template<typename Derived> template<typename Derived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived> inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)
pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents) pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
{ {
typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x); return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)(
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>( typename internal::plain_constant_type<Derived,typename Derived::Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
constant_x,
exponents.derived()
);
} }
#endif
/**
* \brief Component-wise division of a scalar by array elements.
**/
template <typename Derived>
inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
{
return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
a.derived(),
Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>(s)
);
}
/** \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays.
*
* This function computes the coefficient-wise incomplete gamma function.
*
*/
template<typename Derived,typename ExponentDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
{
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
a.derived(),
x.derived()
);
}
/** \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays.
*
* This function computes the coefficient-wise complementary incomplete gamma function.
*
*/
template<typename Derived,typename ExponentDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
{
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
a.derived(),
x.derived()
);
}
namespace internal namespace internal
{ {

View File

@ -125,31 +125,17 @@ DenseBase<Derived>::format(const IOFormat& fmt) const
namespace internal { namespace internal {
template<typename Scalar, bool IsInteger> // NOTE: This helper is kept for backward compatibility with previous code specializing
struct significant_decimals_default_impl // this internal::significant_decimals_impl structure. In the future we should directly
{ // call digits10() which has been introduced in July 2016 in 3.3.
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline int run()
{
using std::ceil;
using std::log;
return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
}
};
template<typename Scalar>
struct significant_decimals_default_impl<Scalar, true>
{
static inline int run()
{
return 0;
}
};
template<typename Scalar> template<typename Scalar>
struct significant_decimals_impl struct significant_decimals_impl
: significant_decimals_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {
{}; static inline int run()
{
return NumTraits<Scalar>::digits10();
}
};
/** \internal /** \internal
* print the matrix \a _m to the output stream \a s using the output format \a fmt */ * print the matrix \a _m to the output stream \a s using the output format \a fmt */

View File

@ -50,7 +50,7 @@ public:
typedef typename internal::ref_selector<Inverse>::type Nested; typedef typename internal::ref_selector<Inverse>::type Nested;
typedef typename internal::remove_all<XprType>::type NestedExpression; typedef typename internal::remove_all<XprType>::type NestedExpression;
explicit Inverse(const XprType &xpr) explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr)
: m_xpr(xpr) : m_xpr(xpr)
{} {}

View File

@ -17,10 +17,20 @@
namespace Eigen { namespace Eigen {
/** \class MapBase /** \ingroup Core_Module
* \ingroup Core_Module
* *
* \brief Base class for Map and Block expression with direct access * \brief Base class for dense Map and Block expression with direct access
*
* This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense
* Map and Block objects with direct access.
* Typical users do not have to directly deal with this class.
*
* This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN.
* See \link TopicCustomizing_Plugins customizing Eigen \endlink for details.
*
* The \c Derived class has to provide the following two methods describing the memory layout:
* \code Index innerStride() const; \endcode
* \code Index outerStride() const; \endcode
* *
* \sa class Map, class Block * \sa class Map, class Block
*/ */
@ -75,7 +85,9 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
typedef typename Base::CoeffReturnType CoeffReturnType; typedef typename Base::CoeffReturnType CoeffReturnType;
/** \copydoc DenseBase::rows() */
EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); } EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
/** \copydoc DenseBase::cols() */
EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); } EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
/** Returns a pointer to the first coefficient of the matrix or vector. /** Returns a pointer to the first coefficient of the matrix or vector.
@ -86,12 +98,14 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
*/ */
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
/** \copydoc PlainObjectBase::coeff(Index,Index) const */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeff(Index rowId, Index colId) const inline const Scalar& coeff(Index rowId, Index colId) const
{ {
return m_data[colId * colStride() + rowId * rowStride()]; return m_data[colId * colStride() + rowId * rowStride()];
} }
/** \copydoc PlainObjectBase::coeff(Index) const */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeff(Index index) const inline const Scalar& coeff(Index index) const
{ {
@ -99,12 +113,14 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
return m_data[index * innerStride()]; return m_data[index * innerStride()];
} }
/** \copydoc PlainObjectBase::coeffRef(Index,Index) const */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const inline const Scalar& coeffRef(Index rowId, Index colId) const
{ {
return this->m_data[colId * colStride() + rowId * rowStride()]; return this->m_data[colId * colStride() + rowId * rowStride()];
} }
/** \copydoc PlainObjectBase::coeffRef(Index) const */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
@ -112,6 +128,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
return this->m_data[index * innerStride()]; return this->m_data[index * innerStride()];
} }
/** \internal */
template<int LoadMode> template<int LoadMode>
inline PacketScalar packet(Index rowId, Index colId) const inline PacketScalar packet(Index rowId, Index colId) const
{ {
@ -119,6 +136,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
(m_data + (colId * colStride() + rowId * rowStride())); (m_data + (colId * colStride() + rowId * rowStride()));
} }
/** \internal */
template<int LoadMode> template<int LoadMode>
inline PacketScalar packet(Index index) const inline PacketScalar packet(Index index) const
{ {
@ -126,6 +144,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride()); return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
} }
/** \internal Constructor for fixed size matrices or vectors */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
{ {
@ -133,6 +152,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
checkSanity<Derived>(); checkSanity<Derived>();
} }
/** \internal Constructor for dynamically sized vectors */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline MapBase(PointerType dataPtr, Index vecSize) inline MapBase(PointerType dataPtr, Index vecSize)
: m_data(dataPtr), : m_data(dataPtr),
@ -145,6 +165,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
checkSanity<Derived>(); checkSanity<Derived>();
} }
/** \internal Constructor for dynamically sized matrices */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline MapBase(PointerType dataPtr, Index rows, Index cols) inline MapBase(PointerType dataPtr, Index rows, Index cols)
: m_data(dataPtr), m_rows(rows), m_cols(cols) : m_data(dataPtr), m_rows(rows), m_cols(cols)
@ -166,7 +187,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
{ {
#if EIGEN_MAX_ALIGN_BYTES>0 #if EIGEN_MAX_ALIGN_BYTES>0
eigen_assert(( ((size_t(m_data) % internal::traits<Derived>::Alignment) == 0) eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
|| (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned"); || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
#endif #endif
} }
@ -181,6 +202,16 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols; const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
}; };
/** \ingroup Core_Module
*
* \brief Base class for non-const dense Map and Block expression with direct access
*
* This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of
* dense Map and Block objects with direct access.
* It inherits MapBase<Derived, ReadOnlyAccessors> which defines the const variant for reading specific entries.
*
* \sa class Map, class Block
*/
template<typename Derived> class MapBase<Derived, WriteAccessors> template<typename Derived> class MapBase<Derived, WriteAccessors>
: public MapBase<Derived, ReadOnlyAccessors> : public MapBase<Derived, ReadOnlyAccessors>
{ {

View File

@ -11,7 +11,9 @@
#define EIGEN_MATHFUNCTIONS_H #define EIGEN_MATHFUNCTIONS_H
// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html // source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html
#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406 // TODO this should better be moved to NumTraits
#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
namespace Eigen { namespace Eigen {
@ -95,6 +97,19 @@ struct real_default_impl<Scalar,true>
template<typename Scalar> struct real_impl : real_default_impl<Scalar> {}; template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
#ifdef __CUDA_ARCH__
template<typename T>
struct real_impl<std::complex<T> >
{
typedef T RealScalar;
EIGEN_DEVICE_FUNC
static inline T run(const std::complex<T>& x)
{
return x.real();
}
};
#endif
template<typename Scalar> template<typename Scalar>
struct real_retval struct real_retval
{ {
@ -130,6 +145,19 @@ struct imag_default_impl<Scalar,true>
template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {}; template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
#ifdef __CUDA_ARCH__
template<typename T>
struct imag_impl<std::complex<T> >
{
typedef T RealScalar;
EIGEN_DEVICE_FUNC
static inline T run(const std::complex<T>& x)
{
return x.imag();
}
};
#endif
template<typename Scalar> template<typename Scalar>
struct imag_retval struct imag_retval
{ {
@ -457,30 +485,33 @@ struct arg_retval
/**************************************************************************** /****************************************************************************
* Implementation of log1p * * Implementation of log1p *
****************************************************************************/ ****************************************************************************/
template<typename Scalar, bool isComplex = NumTraits<Scalar>::IsComplex >
struct log1p_impl namespace std_fallback {
{ // fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar,
static inline Scalar run(const Scalar& x) // or that there is no suitable std::log1p function available
{ template<typename Scalar>
EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_USING_STD_MATH(log); EIGEN_USING_STD_MATH(log);
Scalar x1p = RealScalar(1) + x; Scalar x1p = RealScalar(1) + x;
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
} }
}; }
#if EIGEN_HAS_CXX11_MATH
template<typename Scalar> template<typename Scalar>
struct log1p_impl<Scalar, false> { struct log1p_impl {
static inline Scalar run(const Scalar& x) static inline Scalar run(const Scalar& x)
{ {
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
#if EIGEN_HAS_CXX11_MATH
using std::log1p; using std::log1p;
#endif
using std_fallback::log1p;
return log1p(x); return log1p(x);
} }
}; };
#endif
template<typename Scalar> template<typename Scalar>
struct log1p_retval struct log1p_retval
@ -492,24 +523,26 @@ struct log1p_retval
* Implementation of pow * * Implementation of pow *
****************************************************************************/ ****************************************************************************/
template<typename Scalar, bool IsInteger> template<typename ScalarX,typename ScalarY, bool IsInteger = NumTraits<ScalarX>::IsInteger&&NumTraits<ScalarY>::IsInteger>
struct pow_default_impl struct pow_impl
{ {
typedef Scalar retval; //typedef Scalar retval;
static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;
static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)
{ {
EIGEN_USING_STD_MATH(pow); EIGEN_USING_STD_MATH(pow);
return pow(x, y); return pow(x, y);
} }
}; };
template<typename Scalar> template<typename ScalarX,typename ScalarY>
struct pow_default_impl<Scalar, true> struct pow_impl<ScalarX,ScalarY, true>
{ {
static EIGEN_DEVICE_FUNC inline Scalar run(Scalar x, Scalar y) typedef ScalarX result_type;
static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y)
{ {
Scalar res(1); ScalarX res(1);
eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0); eigen_assert(!NumTraits<ScalarY>::IsSigned || y >= 0);
if(y & 1) res *= x; if(y & 1) res *= x;
y >>= 1; y >>= 1;
while(y) while(y)
@ -522,15 +555,6 @@ struct pow_default_impl<Scalar, true>
} }
}; };
template<typename Scalar>
struct pow_impl : pow_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
template<typename Scalar>
struct pow_retval
{
typedef Scalar type;
};
/**************************************************************************** /****************************************************************************
* Implementation of random * * Implementation of random *
****************************************************************************/ ****************************************************************************/
@ -620,16 +644,18 @@ struct random_default_impl<Scalar, false, true>
typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
if(y<x) if(y<x)
return x; return x;
// the following difference might overflow on a 32 bits system,
// but since y>=x the result converted to an unsigned long is still correct.
std::size_t range = ScalarX(y)-ScalarX(x); std::size_t range = ScalarX(y)-ScalarX(x);
std::size_t offset = 0; std::size_t offset = 0;
// rejection sampling // rejection sampling
std::size_t divisor = (range+RAND_MAX-1)/(range+1); std::size_t divisor = 1;
std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX); std::size_t multiplier = 1;
if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
else multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
do { do {
offset = ( (std::size_t(std::rand()) * multiplier) / divisor ); offset = (std::size_t(std::rand()) * multiplier) / divisor;
} while (offset > range); } while (offset > range);
return Scalar(ScalarX(x) + offset); return Scalar(ScalarX(x) + offset);
} }
@ -790,6 +816,8 @@ template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>&
template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x); template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x); template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
template<typename T> T generic_fast_tanh_float(const T& a_x);
} // end namespace internal } // end namespace internal
/**************************************************************************** /****************************************************************************
@ -825,7 +853,7 @@ template<>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
{ {
return fmin(x, y); return fminf(x, y);
} }
template<typename T> template<typename T>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -837,7 +865,7 @@ template<>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
{ {
return fmax(x, y); return fmaxf(x, y);
} }
#endif #endif
@ -926,11 +954,19 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
} }
template<typename Scalar> #ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float log1p(const float &x) { return ::log1pf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double log1p(const double &x) { return ::log1p(x); }
#endif
template<typename ScalarX,typename ScalarY>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const ScalarX& x, const ScalarY& y)
{ {
return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
} }
template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); }
@ -1036,6 +1072,16 @@ float abs(const float &x) { return ::fabsf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double abs(const double &x) { return ::fabs(x); } double abs(const double &x) { return ::fabs(x); }
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float abs(const std::complex<float>& x) {
return ::hypotf(x.real(), x.imag());
}
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double abs(const std::complex<double>& x) {
return ::hypot(x.real(), x.imag());
}
#endif #endif
template<typename T> template<typename T>
@ -1181,6 +1227,11 @@ T tanh(const T &x) {
return tanh(x); return tanh(x);
} }
#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tanh(float x) { return internal::generic_fast_tanh_float(x); }
#endif
#ifdef __CUDACC__ #ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tanh(const float &x) { return ::tanhf(x); } float tanh(const float &x) { return ::tanhf(x); }
@ -1192,7 +1243,7 @@ double tanh(const double &x) { return ::tanh(x); }
template <typename T> template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T fmod(const T& a, const T& b) { T fmod(const T& a, const T& b) {
EIGEN_USING_STD_MATH(floor); EIGEN_USING_STD_MATH(fmod);
return fmod(a, b); return fmod(a, b);
} }
@ -1287,11 +1338,12 @@ template<typename Scalar>
struct scalar_fuzzy_default_impl<Scalar, true, false> struct scalar_fuzzy_default_impl<Scalar, true, false>
{ {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
template<typename OtherScalar> template<typename OtherScalar> EIGEN_DEVICE_FUNC
static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
{ {
return numext::abs2(x) <= numext::abs2(y) * prec * prec; return numext::abs2(x) <= numext::abs2(y) * prec * prec;
} }
EIGEN_DEVICE_FUNC
static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
{ {
return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;

View File

@ -0,0 +1,78 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_MATHFUNCTIONSIMPL_H
#define EIGEN_MATHFUNCTIONSIMPL_H
namespace Eigen {
namespace internal {
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
is accurate up to a couple of ulp in the range [-9, 9], outside of which
the tanh(x) = +/-1.
This implementation works on both scalars and packets.
*/
template<typename T>
T generic_fast_tanh_float(const T& a_x)
{
// Clamp the inputs to the range [-9, 9] since anything outside
// this range is +/-1.0f in single-precision.
const T plus_9 = pset1<T>(9.f);
const T minus_9 = pset1<T>(-9.f);
// NOTE GCC prior to 6.3 might improperly optimize this max/min
// step such that if a_x is nan, x will be either 9 or -9,
// and tanh will return 1 or -1 instead of nan.
// This is supposed to be fixed in gcc6.3,
// see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
const T x = pmax(minus_9,pmin(plus_9,a_x));
// The monomial coefficients of the numerator polynomial (odd).
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
const T alpha_5 = pset1<T>(1.48572235717979e-05f);
const T alpha_7 = pset1<T>(5.12229709037114e-08f);
const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
const T alpha_11 = pset1<T>(2.00018790482477e-13f);
const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
// The monomial coefficients of the denominator polynomial (even).
const T beta_0 = pset1<T>(4.89352518554385e-03f);
const T beta_2 = pset1<T>(2.26843463243900e-03f);
const T beta_4 = pset1<T>(1.18534705686654e-04f);
const T beta_6 = pset1<T>(1.19825839466702e-06f);
// Since the polynomials are odd/even, we need x^2.
const T x2 = pmul(x, x);
// Evaluate the numerator polynomial p.
T p = pmadd(x2, alpha_13, alpha_11);
p = pmadd(x2, p, alpha_9);
p = pmadd(x2, p, alpha_7);
p = pmadd(x2, p, alpha_5);
p = pmadd(x2, p, alpha_3);
p = pmadd(x2, p, alpha_1);
p = pmul(x, p);
// Evaluate the denominator polynomial p.
T q = pmadd(x2, beta_6, beta_4);
q = pmadd(x2, q, beta_2);
q = pmadd(x2, q, beta_0);
// Divide the numerator by the denominator.
return pdiv(p, q);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_MATHFUNCTIONSIMPL_H

View File

@ -27,7 +27,7 @@ private:
default_alignment = compute_default_alignment<_Scalar,max_size>::value, default_alignment = compute_default_alignment<_Scalar,max_size>::value,
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
required_alignment = unpacket_traits<PacketScalar>::alignment, required_alignment = unpacket_traits<PacketScalar>::alignment,
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0 packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
}; };
public: public:
@ -106,7 +106,7 @@ public:
* \endcode * \endcode
* *
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN. * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
* *
* <i><b>Some notes:</b></i> * <i><b>Some notes:</b></i>
* *
@ -268,9 +268,9 @@ class Matrix
: Base(internal::constructor_without_unaligned_array_assert()) : Base(internal::constructor_without_unaligned_array_assert())
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
#ifdef EIGEN_HAVE_RVALUE_REFERENCES #if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
Matrix(Matrix&& other) Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
: Base(std::move(other)) : Base(std::move(other))
{ {
Base::_check_template_params(); Base::_check_template_params();
@ -278,7 +278,7 @@ class Matrix
Base::_set_noalias(other); Base::_set_noalias(other);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
Matrix& operator=(Matrix&& other) Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
{ {
other.swap(*this); other.swap(*this);
return *this; return *this;

View File

@ -41,7 +41,7 @@ namespace Eigen {
* \endcode * \endcode
* *
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
* *
* \sa \blank \ref TopicClassHierarchy * \sa \blank \ref TopicClassHierarchy
*/ */
@ -80,8 +80,6 @@ template<typename Derived> class MatrixBase
using Base::operator-=; using Base::operator-=;
using Base::operator*=; using Base::operator*=;
using Base::operator/=; using Base::operator/=;
using Base::operator*;
using Base::operator/;
typedef typename Base::CoeffReturnType CoeffReturnType; typedef typename Base::CoeffReturnType CoeffReturnType;
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType; typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
@ -100,7 +98,7 @@ template<typename Derived> class MatrixBase
/** \returns the size of the main diagonal, which is min(rows(),cols()). /** \returns the size of the main diagonal, which is min(rows(),cols()).
* \sa rows(), cols(), SizeAtCompileTime. */ * \sa rows(), cols(), SizeAtCompileTime. */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Index diagonalSize() const { return (std::min)(rows(),cols()); } inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); }
typedef typename Base::PlainObject PlainObject; typedef typename Base::PlainObject PlainObject;
@ -123,6 +121,7 @@ template<typename Derived> class MatrixBase
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
#define EIGEN_DOC_UNARY_ADDONS(X,Y)
# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseUnaryOps.h"
# include "../plugins/CommonCwiseBinaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h"
# include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h"
@ -131,6 +130,7 @@ template<typename Derived> class MatrixBase
# include EIGEN_MATRIXBASE_PLUGIN # include EIGEN_MATRIXBASE_PLUGIN
# endif # endif
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
#undef EIGEN_DOC_UNARY_ADDONS
/** Special case of the template operator=, in order to prevent the compiler /** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1) * from generating a default operator= (issue hit with g++ 4.1)
@ -195,7 +195,7 @@ template<typename Derived> class MatrixBase
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
dot(const MatrixBase<OtherDerived>& other) const; dot(const MatrixBase<OtherDerived>& other) const;
EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
@ -330,15 +330,11 @@ template<typename Derived> class MatrixBase
/////////// LU module /////////// /////////// LU module ///////////
EIGEN_DEVICE_FUNC
inline const FullPivLU<PlainObject> fullPivLu() const; inline const FullPivLU<PlainObject> fullPivLu() const;
EIGEN_DEVICE_FUNC
inline const PartialPivLU<PlainObject> partialPivLu() const; inline const PartialPivLU<PlainObject> partialPivLu() const;
EIGEN_DEVICE_FUNC
inline const PartialPivLU<PlainObject> lu() const; inline const PartialPivLU<PlainObject> lu() const;
EIGEN_DEVICE_FUNC
inline const Inverse<Derived> inverse() const; inline const Inverse<Derived> inverse() const;
template<typename ResultType> template<typename ResultType>
@ -383,7 +379,7 @@ template<typename Derived> class MatrixBase
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
/// \internal helper struct to form the return type of the cross product /// \internal helper struct to form the return type of the cross product
template<typename OtherDerived> struct cross_product_return_type { template<typename OtherDerived> struct cross_product_return_type {
typedef typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar; typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type; typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
}; };
#endif // EIGEN_PARSED_BY_DOXYGEN #endif // EIGEN_PARSED_BY_DOXYGEN
@ -405,7 +401,6 @@ template<typename Derived> class MatrixBase
inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const; inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
inline ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
// put this as separate enum value to work around possible GCC 4.3 bug (?) // put this as separate enum value to work around possible GCC 4.3 bug (?)
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
: ColsAtCompileTime==1 ? Vertical : Horizontal }; : ColsAtCompileTime==1 ? Vertical : Horizontal };
@ -418,8 +413,7 @@ template<typename Derived> class MatrixBase
typedef Block<const Derived, typedef Block<const Derived,
internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1, internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne; internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
typedef CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>, typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
const ConstStartMinusOne > HNormalizedReturnType;
inline const HNormalizedReturnType hnormalized() const; inline const HNormalizedReturnType hnormalized() const;

View File

@ -39,7 +39,7 @@ class NoAlias
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other) EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
{ {
call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar>()); call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
return m_expression; return m_expression;
} }
@ -47,7 +47,7 @@ class NoAlias
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other) EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
{ {
call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar>()); call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
return m_expression; return m_expression;
} }
@ -55,7 +55,7 @@ class NoAlias
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other) EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
{ {
call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar>()); call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
return m_expression; return m_expression;
} }

View File

@ -12,6 +12,37 @@
namespace Eigen { namespace Eigen {
namespace internal {
// default implementation of digits10(), based on numeric_limits if specialized,
// 0 for integer types, and log10(epsilon()) otherwise.
template< typename T,
bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
bool is_integer = NumTraits<T>::IsInteger>
struct default_digits10_impl
{
static int run() { return std::numeric_limits<T>::digits10; }
};
template<typename T>
struct default_digits10_impl<T,false,false> // Floating point
{
static int run() {
using std::log10;
using std::ceil;
typedef typename NumTraits<T>::Real Real;
return int(ceil(-log10(NumTraits<Real>::epsilon())));
}
};
template<typename T>
struct default_digits10_impl<T,false,true> // Integer
{
static int run() { return 0; }
};
} // end namespace internal
/** \class NumTraits /** \class NumTraits
* \ingroup Core_Module * \ingroup Core_Module
* *
@ -22,14 +53,16 @@ namespace Eigen {
* This class stores enums, typedefs and static methods giving information about a numeric type. * This class stores enums, typedefs and static methods giving information about a numeric type.
* *
* The provided data consists of: * The provided data consists of:
* \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real, * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real,
* then \a Real is just a typedef to \a T. If \a T is \c std::complex<U> then \a Real * then \c Real is just a typedef to \a T. If \a T is \c std::complex<U> then \c Real
* is a typedef to \a U. * is a typedef to \a U.
* \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values, * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values,
* such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
* \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
* take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
* only intended as a helper for code that needs to explicitly promote types. * only intended as a helper for code that needs to explicitly promote types.
* \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex<U>, Literal is defined as \c U.
* Of course, this type must be fully compatible with \a T. In doubt, just use \a T here.
* \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
* this means, just use \a T here. * this means, just use \a T here.
* \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex
@ -42,10 +75,14 @@ namespace Eigen {
* \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
* \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
* be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
* \li An epsilon() function which, unlike std::numeric_limits::epsilon(), returns a \a Real instead of a \a T. * \li An epsilon() function which, unlike <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/epsilon">std::numeric_limits::epsilon()</a>,
* it returns a \a Real instead of a \a T.
* \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default
* value by the fuzzy comparison operators. * value by the fuzzy comparison operators.
* \li highest() and lowest() functions returning the highest and lowest possible values respectively. * \li highest() and lowest() functions returning the highest and lowest possible values respectively.
* \li digits10() function returning the number of decimal digits that can be represented without change. This is
* the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">std::numeric_limits<T>::digits10</a>
* which is used as the default implementation if specialized.
*/ */
template<typename T> struct GenericNumTraits template<typename T> struct GenericNumTraits
@ -60,23 +97,6 @@ template<typename T> struct GenericNumTraits
MulCost = 1 MulCost = 1
}; };
// Division is messy but important, because it is expensive and throughput
// varies significantly. The following numbers are based on min division
// throughput on Haswell.
template<bool Vectorized>
struct Div {
enum {
#ifdef EIGEN_VECTORIZE_AVX
AVX = true,
#else
AVX = false,
#endif
Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)):
Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8
};
};
typedef T Real; typedef T Real;
typedef typename internal::conditional< typedef typename internal::conditional<
IsInteger, IsInteger,
@ -84,12 +104,20 @@ template<typename T> struct GenericNumTraits
T T
>::type NonInteger; >::type NonInteger;
typedef T Nested; typedef T Nested;
typedef T Literal;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline Real epsilon() static inline Real epsilon()
{ {
return numext::numeric_limits<T>::epsilon(); return numext::numeric_limits<T>::epsilon();
} }
EIGEN_DEVICE_FUNC
static inline int digits10()
{
return internal::default_digits10_impl<T>::run();
}
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline Real dummy_precision() static inline Real dummy_precision()
{ {
@ -145,6 +173,7 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
: GenericNumTraits<std::complex<_Real> > : GenericNumTraits<std::complex<_Real> >
{ {
typedef _Real Real; typedef _Real Real;
typedef typename NumTraits<_Real>::Literal Literal;
enum { enum {
IsComplex = 1, IsComplex = 1,
RequireInitialization = NumTraits<_Real>::RequireInitialization, RequireInitialization = NumTraits<_Real>::RequireInitialization,
@ -157,6 +186,8 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
static inline Real epsilon() { return NumTraits<Real>::epsilon(); } static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); } static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
EIGEN_DEVICE_FUNC
static inline int digits10() { return NumTraits<Real>::digits10(); }
}; };
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
@ -168,6 +199,7 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar; typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger; typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
typedef ArrayType & Nested; typedef ArrayType & Nested;
typedef typename NumTraits<Scalar>::Literal Literal;
enum { enum {
IsComplex = NumTraits<Scalar>::IsComplex, IsComplex = NumTraits<Scalar>::IsComplex,
@ -185,6 +217,30 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); } static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
}; };
template<> struct NumTraits<std::string>
: GenericNumTraits<std::string>
{
enum {
RequireInitialization = 1,
ReadCost = HugeCost,
AddCost = HugeCost,
MulCost = HugeCost
};
static inline int digits10() { return 0; }
private:
static inline std::string epsilon();
static inline std::string dummy_precision();
static inline std::string lowest();
static inline std::string highest();
static inline std::string infinity();
static inline std::string quiet_NaN();
};
// Empty specialization for void to allow template specialization based on NumTraits<T>::Real with T==void and SFINAE.
template<> struct NumTraits<void> {};
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_NUMTRAITS_H #endif // EIGEN_NUMTRAITS_H

View File

@ -59,33 +59,34 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
} // end namespace internal } // end namespace internal
/** \class PlainObjectBase /** \class PlainObjectBase
* \ingroup Core_Module
* \brief %Dense storage base class for matrices and arrays. * \brief %Dense storage base class for matrices and arrays.
* *
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN. * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
* *
* \sa \ref TopicClassHierarchy * \sa \ref TopicClassHierarchy
*/ */
#ifdef EIGEN_PARSED_BY_DOXYGEN #ifdef EIGEN_PARSED_BY_DOXYGEN
namespace internal { namespace doxygen {
// this is a workaround to doxygen not being able to understand the inheritance logic // this is a workaround to doxygen not being able to understand the inheritance logic
// when it is hidden by the dense_xpr_base helper struct. // when it is hidden by the dense_xpr_base helper struct.
/** This class is just a workaround for Doxygen and it does not not actually exist. */ /** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {}; template<typename Derived> struct dense_xpr_base_dispatcher;
/** This class is just a workaround for Doxygen and it does not not actually exist. */ /** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct dense_xpr_base_dispatcher_for_doxygen<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {}; : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
/** This class is just a workaround for Doxygen and it does not not actually exist. */ /** This class is just a workaround for Doxygen and it does not not actually exist. */
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct dense_xpr_base_dispatcher_for_doxygen<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
: public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {}; : public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
} // namespace internal } // namespace doxygen
template<typename Derived> template<typename Derived>
class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen<Derived> class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher<Derived>
#else #else
template<typename Derived> template<typename Derived>
class PlainObjectBase : public internal::dense_xpr_base<Derived>::type class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
@ -145,6 +146,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); } EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
/** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index,Index) const
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
*
* See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
{ {
@ -154,12 +159,20 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
return m_storage.data()[rowId + colId * m_storage.rows()]; return m_storage.data()[rowId + colId * m_storage.rows()];
} }
/** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
*
* See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
{ {
return m_storage.data()[index]; return m_storage.data()[index];
} }
/** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
*
* See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const for details. */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId) EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
{ {
@ -169,12 +182,18 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
return m_storage.data()[rowId + colId * m_storage.rows()]; return m_storage.data()[rowId + colId * m_storage.rows()];
} }
/** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
*
* See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const for details. */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
{ {
return m_storage.data()[index]; return m_storage.data()[index];
} }
/** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index).
* It is provided for convenience. */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
{ {
@ -184,6 +203,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
return m_storage.data()[rowId + colId * m_storage.rows()]; return m_storage.data()[rowId + colId * m_storage.rows()];
} }
/** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index).
* It is provided for convenience. */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
{ {
@ -471,15 +492,15 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
} }
#endif #endif
#ifdef EIGEN_HAVE_RVALUE_REFERENCES #if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
PlainObjectBase(PlainObjectBase&& other) PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
: m_storage( std::move(other.m_storage) ) : m_storage( std::move(other.m_storage) )
{ {
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
PlainObjectBase& operator=(PlainObjectBase&& other) PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
{ {
using std::swap; using std::swap;
swap(m_storage, other.m_storage); swap(m_storage, other.m_storage);
@ -697,7 +718,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
//_resize_to_match(other); //_resize_to_match(other);
// the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
// it wouldn't allow to copy a row-vector into a column-vector. // it wouldn't allow to copy a row-vector into a column-vector.
internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar>()); internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
return this->derived(); return this->derived();
} }
@ -713,11 +734,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
template<typename T0, typename T1> template<typename T0, typename T1>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0) EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
{ {
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
m_storage.data()[0] = val0; m_storage.data()[0] = Scalar(val0);
m_storage.data()[1] = val1; m_storage.data()[1] = Scalar(val1);
} }
template<typename T0, typename T1> template<typename T0, typename T1>

View File

@ -16,39 +16,6 @@ template<typename Lhs, typename Rhs, int Option, typename StorageKind> class Pro
namespace internal { namespace internal {
// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times
// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor.
template<typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape,
typename RhsShape = typename evaluator_traits<Rhs>::Shape >
struct product_result_scalar
{
typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
};
template<typename Lhs, typename Rhs, typename RhsShape>
struct product_result_scalar<Lhs, Rhs, PermutationShape, RhsShape>
{
typedef typename Rhs::Scalar Scalar;
};
template<typename Lhs, typename Rhs, typename LhsShape>
struct product_result_scalar<Lhs, Rhs, LhsShape, PermutationShape>
{
typedef typename Lhs::Scalar Scalar;
};
template<typename Lhs, typename Rhs, typename RhsShape>
struct product_result_scalar<Lhs, Rhs, TranspositionsShape, RhsShape>
{
typedef typename Rhs::Scalar Scalar;
};
template<typename Lhs, typename Rhs, typename LhsShape>
struct product_result_scalar<Lhs, Rhs, LhsShape, TranspositionsShape>
{
typedef typename Lhs::Scalar Scalar;
};
template<typename Lhs, typename Rhs, int Option> template<typename Lhs, typename Rhs, int Option>
struct traits<Product<Lhs, Rhs, Option> > struct traits<Product<Lhs, Rhs, Option> >
{ {
@ -59,7 +26,7 @@ struct traits<Product<Lhs, Rhs, Option> >
typedef MatrixXpr XprKind; typedef MatrixXpr XprKind;
typedef typename product_result_scalar<LhsCleaned,RhsCleaned>::Scalar Scalar; typedef typename ScalarBinaryOpTraits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
typedef typename product_promote_storage_type<typename LhsTraits::StorageKind, typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
typename RhsTraits::StorageKind, typename RhsTraits::StorageKind,
internal::product_type<Lhs,Rhs>::ret>::ret StorageKind; internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;

View File

@ -35,22 +35,28 @@ struct evaluator<Product<Lhs, Rhs, Options> >
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
}; };
// Catch scalar * ( A * B ) and transform it to (A*scalar) * B // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
// TODO we should apply that rule only if that's really helpful // TODO we should apply that rule only if that's really helpful
template<typename Lhs, typename Rhs, typename Scalar> template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
struct evaluator_assume_aliasing<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
const Product<Lhs, Rhs, DefaultProduct> > >
{ {
static const bool value = true; static const bool value = true;
}; };
template<typename Lhs, typename Rhs, typename Scalar> template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
: public evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
const Product<Lhs, Rhs, DefaultProduct> > >
: public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >
{ {
typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType; typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base; const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
const Product<Lhs, Rhs, DefaultProduct> > XprType;
typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
: Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
{} {}
}; };
@ -122,13 +128,17 @@ protected:
PlainObject m_result; PlainObject m_result;
}; };
// The following three shortcuts are enabled only if the scalar types match excatly.
// TODO: we could enable them for different scalar types when the product is not vectorized.
// Dense = Product // Dense = Product
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar>, Dense2Dense, struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{ {
typedef Product<Lhs,Rhs,Options> SrcXprType; typedef Product<Lhs,Rhs,Options> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{ {
// FIXME shall we handle nested_eval here? // FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs()); generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
@ -137,11 +147,12 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
// Dense += Product // Dense += Product
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar>, Dense2Dense, struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{ {
typedef Product<Lhs,Rhs,Options> SrcXprType; typedef Product<Lhs,Rhs,Options> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &) static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
{ {
// FIXME shall we handle nested_eval here? // FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs()); generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
@ -150,11 +161,12 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
// Dense -= Product // Dense -= Product
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar>, Dense2Dense, struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{ {
typedef Product<Lhs,Rhs,Options> SrcXprType; typedef Product<Lhs,Rhs,Options> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &) static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
{ {
// FIXME shall we handle nested_eval here? // FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs()); generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
@ -165,55 +177,57 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
// Dense ?= scalar * Product // Dense ?= scalar * Product
// TODO we should apply that rule if that's really helpful // TODO we should apply that rule if that's really helpful
// for instance, this is not good for inner products // for instance, this is not good for inner products
template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>, struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense, Scalar> const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
{ {
typedef CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>, typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType; const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
{ {
call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
} }
}; };
//---------------------------------------- //----------------------------------------
// Catch "Dense ?= xpr + Product<>" expression to save one temporary // Catch "Dense ?= xpr + Product<>" expression to save one temporary
// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
// TODO enable it for "Dense ?= xpr - Product<>" as well.
template<typename OtherXpr, typename Lhs, typename Rhs> template<typename OtherXpr, typename Lhs, typename Rhs>
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar>, const OtherXpr, struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
static const bool value = true; static const bool value = true;
}; };
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2> template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
struct assignment_from_xpr_plus_product struct assignment_from_xpr_op_product
{ {
typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, const ProductType> SrcXprType; template<typename SrcXprType, typename InitialFunc>
static void run(DstXprType &dst, const SrcXprType &src, const Func1& func) static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
{ {
call_assignment_no_alias(dst, src.lhs(), func); call_assignment_no_alias(dst, src.lhs(), Func1());
call_assignment_no_alias(dst, src.rhs(), Func2()); call_assignment_no_alias(dst, src.rhs(), Func2());
} }
}; };
template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> #define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \
const Product<Lhs,Rhs,DefaultProduct> >, internal::assign_op<Scalar>, Dense2Dense> struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \
: assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::assign_op<Scalar>, internal::add_assign_op<Scalar> > const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \
{}; : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \
template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> {}
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, internal::add_assign_op<Scalar>, Dense2Dense> EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op);
: assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::add_assign_op<Scalar>, internal::add_assign_op<Scalar> > EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
{}; EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op);
const Product<Lhs,Rhs,DefaultProduct> >, internal::sub_assign_op<Scalar>, Dense2Dense> EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);
: assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::sub_assign_op<Scalar>, internal::sub_assign_op<Scalar> > EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);
{};
//---------------------------------------- //----------------------------------------
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
@ -243,7 +257,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
// Column major result // Column major result
template<typename Dst, typename Lhs, typename Rhs, typename Func> template<typename Dst, typename Lhs, typename Rhs, typename Func>
EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
{ {
evaluator<Rhs> rhsEval(rhs); evaluator<Rhs> rhsEval(rhs);
typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs); typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
@ -251,12 +265,12 @@ EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, cons
// FIXME not very good if rhs is real and lhs complex while alpha is real too // FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dst.cols(); const Index cols = dst.cols();
for (Index j=0; j<cols; ++j) for (Index j=0; j<cols; ++j)
func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs); func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
} }
// Row major result // Row major result
template<typename Dst, typename Lhs, typename Rhs, typename Func> template<typename Dst, typename Lhs, typename Rhs, typename Func>
EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
{ {
evaluator<Lhs> lhsEval(lhs); evaluator<Lhs> lhsEval(lhs);
typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs); typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
@ -264,7 +278,7 @@ EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, cons
// FIXME not very good if lhs is real and rhs complex while alpha is real too // FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dst.rows(); const Index rows = dst.rows();
for (Index i=0; i<rows; ++i) for (Index i=0; i<rows; ++i)
func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs); func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
} }
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
@ -319,19 +333,19 @@ struct generic_product_impl_base
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dst> template<typename Dst>
static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
template<typename Dst> template<typename Dst>
static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
template<typename Dst> template<typename Dst>
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
template<typename Dst> template<typename Dst>
static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
}; };
@ -345,7 +359,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType; typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType;
template<typename Dest> template<typename Dest>
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ {
internal::gemv_dense_selector<Side, internal::gemv_dense_selector<Side,
(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
@ -360,25 +374,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dst> template<typename Dst>
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
// Same as: dst.noalias() = lhs.lazyProduct(rhs); // Same as: dst.noalias() = lhs.lazyProduct(rhs);
// but easier on the compiler side // but easier on the compiler side
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<Scalar>()); call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
} }
template<typename Dst> template<typename Dst>
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
// dst.noalias() += lhs.lazyProduct(rhs); // dst.noalias() += lhs.lazyProduct(rhs);
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<Scalar>()); call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
} }
template<typename Dst> template<typename Dst>
static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
// dst.noalias() -= lhs.lazyProduct(rhs); // dst.noalias() -= lhs.lazyProduct(rhs);
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<Scalar>()); call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
} }
// template<typename Dst> // template<typename Dst>
@ -423,6 +437,18 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost); EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
#if 0
std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
std::cerr << "Alignment= " << Alignment << "\n";
std::cerr << "Flags= " << Flags << "\n";
#endif
} }
// Everything below here is taken from CoeffBasedProduct.h // Everything below here is taken from CoeffBasedProduct.h
@ -473,15 +499,12 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value, SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
&& (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % RhsVecPacketSize) == 0) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1),
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
&& (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % LhsVecPacketSize) == 0) ),
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
: (RhsRowMajor && !CanVectorizeLhs), : (bool(RhsRowMajor) && !CanVectorizeLhs),
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0) | (EvalToRowMajor ? RowMajorBit : 0)
@ -492,8 +515,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
: CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
: 0, : 0,
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
@ -519,8 +542,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
*/ */
EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
{ {
const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
const Index col = RowsAtCompileTime == 1 ? index : 0; const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
} }
@ -538,8 +561,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
const PacketType packet(Index index) const const PacketType packet(Index index) const
{ {
const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
const Index col = RowsAtCompileTime == 1 ? index : 0; const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
return packet<LoadMode,PacketType>(row,col); return packet<LoadMode,PacketType>(row,col);
} }
@ -579,7 +602,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{ {
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res); res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
} }
}; };
@ -589,7 +612,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{ {
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res); res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
} }
}; };
@ -598,7 +621,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{ {
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col)); res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
} }
}; };
@ -607,7 +630,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{ {
res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col))); res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
} }
}; };
@ -616,7 +639,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{ {
res = pset1<Packet>(0); res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
} }
}; };
@ -625,7 +648,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{ {
res = pset1<Packet>(0); res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
} }
}; };
@ -634,7 +657,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{ {
res = pset1<Packet>(0); res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
for(Index i = 0; i < innerDim; ++i) for(Index i = 0; i < innerDim; ++i)
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res); res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
} }
@ -645,7 +668,7 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{ {
res = pset1<Packet>(0); res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
for(Index i = 0; i < innerDim; ++i) for(Index i = 0; i < innerDim; ++i)
res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
} }
@ -730,7 +753,7 @@ template<typename MatrixType, typename DiagonalType, typename Derived, int Produ
struct diagonal_product_evaluator_base struct diagonal_product_evaluator_base
: evaluator_base<Derived> : evaluator_base<Derived>
{ {
typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar; typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
public: public:
enum { enum {
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost, CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,

View File

@ -16,8 +16,7 @@ namespace internal {
template<typename Scalar> struct scalar_random_op { template<typename Scalar> struct scalar_random_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
template<typename Index> inline const Scalar operator() () const { return random<Scalar>(); }
inline const Scalar operator() (Index, Index = 0) const { return random<Scalar>(); }
}; };
template<typename Scalar> template<typename Scalar>

View File

@ -38,8 +38,8 @@ public:
enum { enum {
MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
&& (functor_traits<Func>::PacketAccess), && (functor_traits<Func>::PacketAccess),
MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit), MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
}; };
public: public:
@ -425,7 +425,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff() const DenseBase<Derived>::minCoeff() const
{ {
return derived().redux(Eigen::internal::scalar_min_op<Scalar>()); return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
} }
/** \returns the maximum of all coefficients of \c *this. /** \returns the maximum of all coefficients of \c *this.
@ -435,10 +435,12 @@ template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff() const DenseBase<Derived>::maxCoeff() const
{ {
return derived().redux(Eigen::internal::scalar_max_op<Scalar>()); return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
} }
/** \returns the sum of all coefficients of *this /** \returns the sum of all coefficients of \c *this
*
* If \c *this is empty, then the value 0 is returned.
* *
* \sa trace(), prod(), mean() * \sa trace(), prod(), mean()
*/ */
@ -448,7 +450,7 @@ DenseBase<Derived>::sum() const
{ {
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
return Scalar(0); return Scalar(0);
return derived().redux(Eigen::internal::scalar_sum_op<Scalar>()); return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
} }
/** \returns the mean of all coefficients of *this /** \returns the mean of all coefficients of *this
@ -459,7 +461,14 @@ template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::mean() const DenseBase<Derived>::mean() const
{ {
return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size()); #ifdef __INTEL_COMPILER
#pragma warning push
#pragma warning ( disable : 2259 )
#endif
return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
#ifdef __INTEL_COMPILER
#pragma warning pop
#endif
} }
/** \returns the product of all coefficients of *this /** \returns the product of all coefficients of *this

View File

@ -35,7 +35,13 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|| (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
OuterStrideMatch = Derived::IsVectorAtCompileTime OuterStrideMatch = Derived::IsVectorAtCompileTime
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (int(evaluator<Derived>::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment // NOTE, this indirection of evaluator<Derived>::Alignment is needed
// to workaround a very strange bug in MSVC related to the instantiation
// of has_*ary_operator in evaluator<CwiseNullaryOp>.
// This line is surprisingly very sensitive. For instance, simply adding parenthesis
// as "DerivedAlignment = (int(evaluator<Derived>::Alignment))," will make MSVC fail...
DerivedAlignment = int(evaluator<Derived>::Alignment),
AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value, ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
}; };
@ -262,7 +268,7 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
template<typename Expression> template<typename Expression>
EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type)
{ {
internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar>()); internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar,Scalar>());
Base::construct(m_object); Base::construct(m_object);
} }

View File

@ -55,6 +55,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
typedef TriangularBase<SelfAdjointView> Base; typedef TriangularBase<SelfAdjointView> Base;
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested; typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned; typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
typedef MatrixTypeNestedCleaned NestedExpression;
/** \brief The type of coefficients in this matrix */ /** \brief The type of coefficients in this matrix */
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar; typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
@ -128,7 +129,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
} }
friend EIGEN_DEVICE_FUNC friend EIGEN_DEVICE_FUNC
const SelfAdjointView<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,MatrixType>,UpLo> const SelfAdjointView<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,MatrixType,product),UpLo>
operator*(const Scalar& s, const SelfAdjointView& mat) operator*(const Scalar& s, const SelfAdjointView& mat)
{ {
return (s*mat.nestedExpression()).template selfadjointView<UpLo>(); return (s*mat.nestedExpression()).template selfadjointView<UpLo>();
@ -162,6 +163,41 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1)); SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
/** \returns an expression of a triangular view extracted from the current selfadjoint view of a given triangular part
*
* The parameter \a TriMode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
*
* If \c TriMode references the same triangular part than \c *this, then this method simply return a \c TriangularView of the nested expression,
* otherwise, the nested expression is first transposed, thus returning a \c TriangularView<Transpose<MatrixType>> object.
*
* \sa MatrixBase::triangularView(), class TriangularView
*/
template<unsigned int TriMode>
EIGEN_DEVICE_FUNC
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
TriangularView<MatrixType,TriMode>,
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type
triangularView() const
{
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType>::type tmp1(m_matrix);
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType>::type tmp2(tmp1);
return typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
TriangularView<MatrixType,TriMode>,
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
}
/** \returns a const expression of the main diagonal of the matrix \c *this
*
* This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator.
*
* \sa MatrixBase::diagonal(), class Diagonal */
EIGEN_DEVICE_FUNC
typename MatrixType::ConstDiagonalReturnType diagonal() const
{
return typename MatrixType::ConstDiagonalReturnType(m_matrix);
}
/////////// Cholesky module /////////// /////////// Cholesky module ///////////
const LLT<PlainObject, UpLo> llt() const; const LLT<PlainObject, UpLo> llt() const;

View File

@ -12,11 +12,13 @@
namespace Eigen { namespace Eigen {
// TODO generalize the scalar type of 'other'
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
@ -24,7 +26,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
@ -32,7 +34,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
@ -40,7 +42,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }

View File

@ -134,10 +134,10 @@ protected:
// Specialization for "dst = dec.solve(rhs)" // Specialization for "dst = dec.solve(rhs)"
// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar> template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar> struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
{ {
typedef Solve<DecType,RhsType> SrcXprType; typedef Solve<DecType,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{ {
// FIXME shall we resize dst here? // FIXME shall we resize dst here?
src.dec()._solve_impl(src.rhs(), dst); src.dec()._solve_impl(src.rhs(), dst);
@ -146,10 +146,10 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar
// Specialization for "dst = dec.transpose().solve(rhs)" // Specialization for "dst = dec.transpose().solve(rhs)"
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar> template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar> struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
{ {
typedef Solve<Transpose<const DecType>,RhsType> SrcXprType; typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{ {
src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst); src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
} }
@ -157,10 +157,11 @@ struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal:
// Specialization for "dst = dec.adjoint().solve(rhs)" // Specialization for "dst = dec.adjoint().solve(rhs)"
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar> template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar> struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>,
internal::assign_op<Scalar,Scalar>, Dense2Dense>
{ {
typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType; typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{ {
src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst); src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
} }

View File

@ -169,7 +169,7 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) ); eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower))); eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime }; enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
typedef typename internal::conditional<copy, typedef typename internal::conditional<copy,
typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy; typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
OtherCopy otherCopy(other); OtherCopy otherCopy(other);

View File

@ -367,14 +367,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
template<typename Other> template<typename Other>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
TriangularViewType& operator+=(const DenseBase<Other>& other) { TriangularViewType& operator+=(const DenseBase<Other>& other) {
internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar>()); internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar,typename Other::Scalar>());
return derived(); return derived();
} }
/** \sa MatrixBase::operator-=() */ /** \sa MatrixBase::operator-=() */
template<typename Other> template<typename Other>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
TriangularViewType& operator-=(const DenseBase<Other>& other) { TriangularViewType& operator-=(const DenseBase<Other>& other) {
internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar>()); internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar,typename Other::Scalar>());
return derived(); return derived();
} }
@ -552,7 +552,7 @@ template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>& inline TriangularView<MatrixType, Mode>&
TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other) TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
{ {
internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar>()); internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
return derived(); return derived();
} }
@ -794,7 +794,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr
enum { enum {
unroll = DstXprType::SizeAtCompileTime != Dynamic unroll = DstXprType::SizeAtCompileTime != Dynamic
&& SrcEvaluatorType::CoeffReadCost < HugeCost && SrcEvaluatorType::CoeffReadCost < HugeCost
&& DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT && DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT
}; };
triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel); triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
@ -804,7 +804,7 @@ template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
{ {
call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>()); call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
} }
template<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; }; template<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; };
@ -812,8 +812,8 @@ template<> struct AssignmentKind<DenseShape,TriangularShape> { typedef Tria
template<> struct AssignmentKind<TriangularShape,DenseShape> { typedef Dense2Triangular Kind; }; template<> struct AssignmentKind<TriangularShape,DenseShape> { typedef Dense2Triangular Kind; };
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> template< typename DstXprType, typename SrcXprType, typename Functor>
struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular, Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular>
{ {
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{ {
@ -823,8 +823,8 @@ struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular, Scalar
} }
}; };
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> template< typename DstXprType, typename SrcXprType, typename Functor>
struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense, Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense>
{ {
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{ {
@ -832,8 +832,8 @@ struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense, Scalar>
} }
}; };
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> template< typename DstXprType, typename SrcXprType, typename Functor>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular, Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular>
{ {
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{ {
@ -933,10 +933,10 @@ namespace internal {
// Triangular = Product // Triangular = Product
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Triangular, Scalar> struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
{ {
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
{ {
dst.setZero(); dst.setZero();
dst._assignProduct(src, 1); dst._assignProduct(src, 1);
@ -945,10 +945,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
// Triangular += Product // Triangular += Product
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Triangular, Scalar> struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
{ {
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &) static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
{ {
dst._assignProduct(src, 1); dst._assignProduct(src, 1);
} }
@ -956,10 +956,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_ass
// Triangular -= Product // Triangular -= Product
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Triangular, Scalar> struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
{ {
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &) static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
{ {
dst._assignProduct(src, -1); dst._assignProduct(src, -1);
} }

View File

@ -284,6 +284,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
typedef typename ReturnType<internal::member_any>::Type AnyReturnType; typedef typename ReturnType<internal::member_any>::Type AnyReturnType;
typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType; typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType;
typedef typename ReturnType<internal::member_prod>::Type ProdReturnType; typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;
typedef Reverse<const ExpressionType, Direction> ConstReverseReturnType;
typedef Reverse<ExpressionType, Direction> ReverseReturnType; typedef Reverse<ExpressionType, Direction> ReverseReturnType;
template<int p> struct LpNormReturnType { template<int p> struct LpNormReturnType {
@ -456,7 +457,15 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* *
* \sa DenseBase::reverse() */ * \sa DenseBase::reverse() */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const ReverseReturnType reverse() const const ConstReverseReturnType reverse() const
{ return ConstReverseReturnType( _expression() ); }
/** \returns a writable matrix expression
* where each column (or row) are reversed.
*
* \sa reverse() const */
EIGEN_DEVICE_FUNC
ReverseReturnType reverse()
{ return ReverseReturnType( _expression() ); } { return ReverseReturnType( _expression() ); }
typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType; typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;
@ -540,7 +549,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Returns the expression of the sum of the vector \a other to each subvector of \c *this */ /** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_sum_op<Scalar>, CwiseBinaryOp<internal::scalar_sum_op<Scalar,typename OtherDerived::Scalar>,
const ExpressionTypeNestedCleaned, const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type> const typename ExtendedType<OtherDerived>::Type>
operator+(const DenseBase<OtherDerived>& other) const operator+(const DenseBase<OtherDerived>& other) const
@ -553,7 +562,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Returns the expression of the difference between each subvector of \c *this and the vector \a other */ /** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_difference_op<Scalar>, CwiseBinaryOp<internal::scalar_difference_op<Scalar,typename OtherDerived::Scalar>,
const ExpressionTypeNestedCleaned, const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type> const typename ExtendedType<OtherDerived>::Type>
operator-(const DenseBase<OtherDerived>& other) const operator-(const DenseBase<OtherDerived>& other) const

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_arch_AVX_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_AVX_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AVX COMPONENT Devel
)

View File

@ -266,52 +266,10 @@ pexp<Packet8f>(const Packet8f& _x) {
} }
// Hyperbolic Tangent function. // Hyperbolic Tangent function.
// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
// fl(tanh(x)) = +/-1.
template <> template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
ptanh<Packet8f>(const Packet8f& _x) { ptanh<Packet8f>(const Packet8f& x) {
// Clamp the inputs to the range [-9, 9] since anything outside return internal::generic_fast_tanh_float(x);
// this range is +/-1.0f in single-precision.
_EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f);
_EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f);
const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x));
// The monomial coefficients of the numerator polynomial (odd).
_EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f);
_EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f);
// The monomial coefficients of the denominator polynomial (even).
_EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f);
_EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f);
_EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f);
_EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f);
// Since the polynomials are odd/even, we need x^2.
const Packet8f x2 = pmul(x, x);
// Evaluate the numerator polynomial p.
Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11);
p = pmadd(x2, p, p8f_alpha_9);
p = pmadd(x2, p, p8f_alpha_7);
p = pmadd(x2, p, p8f_alpha_5);
p = pmadd(x2, p, p8f_alpha_3);
p = pmadd(x2, p, p8f_alpha_1);
p = pmul(x, p);
// Evaluate the denominator polynomial p.
Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4);
q = pmadd(x2, q, p8f_beta_2);
q = pmadd(x2, q, p8f_beta_0);
// Divide the numerator by the denominator.
return pdiv(p, q);
} }
template <> template <>

View File

@ -97,6 +97,9 @@ template<> struct packet_traits<double> : default_packet_traits
}; };
#endif #endif
template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
/* Proper support for integers is only provided by AVX2. In the meantime, we'll /* Proper support for integers is only provided by AVX2. In the meantime, we'll
use SSE instructions and packets to deal with integers. use SSE instructions and packets to deal with integers.
template<> struct packet_traits<int> : default_packet_traits template<> struct packet_traits<int> : default_packet_traits
@ -156,7 +159,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
#ifdef __FMA__ #ifdef __FMA__
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG #if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
// and gcc stupidly generates a vfmadd132ps instruction, // and gcc stupidly generates a vfmadd132ps instruction,
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
@ -169,7 +172,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
#endif #endif
} }
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG #if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
// see above // see above
Packet4d res = c; Packet4d res = c;
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_arch_AltiVec_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_AltiVec_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec COMPONENT Devel
)

View File

@ -2,6 +2,7 @@
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2010-2016 Konstantinos Margaritis <markos@freevec.org>
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
@ -15,18 +16,20 @@ namespace Eigen {
namespace internal { namespace internal {
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
#ifdef _BIG_ENDIAN #ifdef __VSX__
#if defined(_BIG_ENDIAN)
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
#else #else
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
#endif #endif
#endif
//---------- float ---------- //---------- float ----------
struct Packet2cf struct Packet2cf
{ {
EIGEN_STRONG_INLINE Packet2cf() {} EIGEN_STRONG_INLINE explicit Packet2cf() : v(p4f_ZERO) {}
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
Packet4f v; Packet4f v;
}; };
@ -39,6 +42,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
Vectorizable = 1, Vectorizable = 1,
AlignedOnScalar = 1, AlignedOnScalar = 1,
size = 2, size = 2,
HasHalfPacket = 0,
HasAdd = 1, HasAdd = 1,
HasSub = 1, HasSub = 1,
@ -49,6 +53,9 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
HasAbs2 = 0, HasAbs2 = 0,
HasMin = 0, HasMin = 0,
HasMax = 0, HasMax = 0,
#ifdef __VSX__
HasBlend = 1,
#endif
HasSetLinear = 0 HasSetLinear = 0
}; };
}; };
@ -58,7 +65,6 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{ {
Packet2cf res; Packet2cf res;
/* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */
if((ptrdiff_t(&from) % 16) == 0) if((ptrdiff_t(&from) % 16) == 0)
res.v = pload<Packet4f>((const float *)&from); res.v = pload<Packet4f>((const float *)&from);
else else
@ -67,26 +73,32 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
return res; return res;
} }
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { return Packet2cf(pload<Packet4f>((const float *) from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride) template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
{ {
std::complex<float> EIGEN_ALIGN16 af[2]; std::complex<float> EIGEN_ALIGN16 af[2];
af[0] = from[0*stride]; af[0] = from[0*stride];
af[1] = from[1*stride]; af[1] = from[1*stride];
return Packet2cf(vec_ld(0, (const float*)af)); return pload<Packet2cf>(af);
} }
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride) template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
{ {
std::complex<float> EIGEN_ALIGN16 af[2]; std::complex<float> EIGEN_ALIGN16 af[2];
vec_st(from.v, 0, (float*)af); pstore<std::complex<float> >((std::complex<float> *) af, from);
to[0*stride] = af[0]; to[0*stride] = af[0];
to[1*stride] = af[1]; to[1*stride] = af[1];
} }
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); } template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ {
@ -100,30 +112,19 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
v1 = vec_madd(v1, b.v, p4f_ZERO); v1 = vec_madd(v1, b.v, p4f_ZERO);
// multiply a_im * b and get the conjugate result // multiply a_im * b and get the conjugate result
v2 = vec_madd(v2, b.v, p4f_ZERO); v2 = vec_madd(v2, b.v, p4f_ZERO);
v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR); v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
// permute back to a proper order // permute back to a proper order
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV); v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
return Packet2cf(vec_add(v1, v2)); return Packet2cf(padd<Packet4f>(v1, v2));
} }
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); } template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_PPC_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
{
return pset1<Packet2cf>(*from);
}
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{ {
@ -143,23 +144,23 @@ template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
{ {
Packet4f b; Packet4f b;
b = (Packet4f) vec_sld(a.v, a.v, 8); b = vec_sld(a.v, a.v, 8);
b = padd(a.v, b); b = padd<Packet4f>(a.v, b);
return pfirst(Packet2cf(b)); return pfirst<Packet2cf>(Packet2cf(b));
} }
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs) template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
{ {
Packet4f b1, b2; Packet4f b1, b2;
#ifdef _BIG_ENDIAN #ifdef _BIG_ENDIAN
b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
#else #else
b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); b1 = vec_sld(vecs[1].v, vecs[0].v, 8);
b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); b2 = vec_sld(vecs[0].v, vecs[1].v, 8);
#endif #endif
b2 = (Packet4f) vec_sld(b2, b2, 8); b2 = vec_sld(b2, b2, 8);
b2 = padd(b1, b2); b2 = padd<Packet4f>(b1, b2);
return Packet2cf(b2); return Packet2cf(b2);
} }
@ -168,10 +169,10 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
{ {
Packet4f b; Packet4f b;
Packet2cf prod; Packet2cf prod;
b = (Packet4f) vec_sld(a.v, a.v, 8); b = vec_sld(a.v, a.v, 8);
prod = pmul(a, Packet2cf(b)); prod = pmul<Packet2cf>(a, Packet2cf(b));
return pfirst(prod); return pfirst<Packet2cf>(prod);
} }
template<int Offset> template<int Offset>
@ -223,12 +224,30 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
} }
}; };
template<> struct conj_helper<Packet4f, Packet2cf, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
};
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ {
// TODO optimize it for AltiVec // TODO optimize it for AltiVec
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b); Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
Packet4f s = vec_madd(b.v, b.v, p4f_ZERO); Packet4f s = pmul<Packet4f>(b.v, b.v);
return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV)))); return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
} }
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x) template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
@ -243,6 +262,14 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
kernel.packet[0].v = tmp; kernel.packet[0].v = tmp;
} }
#ifdef __VSX__
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
Packet2cf result;
result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
return result;
}
#endif
//---------- double ---------- //---------- double ----------
#ifdef __VSX__ #ifdef __VSX__
struct Packet1cd struct Packet1cd
@ -277,10 +304,10 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; }; template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); } { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
@ -300,10 +327,10 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1c
to[1*stride] = af[1]; to[1*stride] = af[1];
} }
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_add(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_sub(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); } template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {
@ -317,23 +344,20 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
v1 = vec_madd(a_re, b.v, p2d_ZERO); v1 = vec_madd(a_re, b.v, p2d_ZERO);
// multiply a_im * b and get the conjugate result // multiply a_im * b and get the conjugate result
v2 = vec_madd(a_im, b.v, p2d_ZERO); v2 = vec_madd(a_im, b.v, p2d_ZERO);
v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8); v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1); v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
return Packet1cd(vec_add(v1, v2)); return Packet1cd(padd<Packet2d>(v1, v2));
} }
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); } template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
{
return pset1<Packet1cd>(*from);
}
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { vec_dstt((long *)addr, DST_CTRL(2,2,32), DST_CHAN); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_PPC_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{ {
@ -345,20 +369,10 @@ template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Pac
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
{ template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
return pfirst(a);
}
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
{
return vecs[0];
}
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
{
return pfirst(a);
}
template<int Offset> template<int Offset>
struct palign_impl<Offset,Packet1cd> struct palign_impl<Offset,Packet1cd>
@ -402,13 +416,30 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
return pconj(internal::pmul(a, b)); return pconj(internal::pmul(a, b));
} }
}; };
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
{ return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
};
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {
// TODO optimize it for AltiVec // TODO optimize it for AltiVec
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b); Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_); Packet2d s = pmul<Packet2d>(b.v, b.v);
return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_REVERSE64)))); return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
} }
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)

View File

@ -3,6 +3,7 @@
// //
// Copyright (C) 2007 Julien Pommier // Copyright (C) 2007 Julien Pommier
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
@ -19,38 +20,79 @@ namespace Eigen {
namespace internal { namespace internal {
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
Packet4f plog<Packet4f>(const Packet4f& _x) static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
{ static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
Packet4f x = _x; static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
_EIGEN_DECLARE_CONST_Packet4i(23, 23);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000); static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
/* the smallest non denormalized float number */ /* the smallest non denormalized float number */
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000); static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff); static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
/* natural logarithm computed for 4 simultaneous float /* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0 return NaN for x <= 0
*/ */
_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f); static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
#ifdef __VSX__
static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
static Packet2l p2l_1023 = { 1023, 1023 };
static Packet2ul p2ul_52 = { 52, 52 };
#endif
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f plog<Packet4f>(const Packet4f& _x)
{
Packet4f x = _x;
Packet4i emm0; Packet4i emm0;
@ -112,36 +154,17 @@ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f pexp<Packet4f>(const Packet4f& _x) Packet4f pexp<Packet4f>(const Packet4f& _x)
{ {
Packet4f x = _x; Packet4f x = _x;
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
_EIGEN_DECLARE_CONST_Packet4i(23, 23);
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
Packet4f tmp, fx; Packet4f tmp, fx;
Packet4i emm0; Packet4i emm0;
// clamp x // clamp x
x = vec_max(vec_min(x, p4f_exp_hi), p4f_exp_lo); x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */ // express exp(x) as exp(g + n*log(2))
fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half); fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
fx = vec_floor(fx); fx = pfloor(fx);
tmp = pmul(fx, p4f_cephes_exp_C1); tmp = pmul(fx, p4f_cephes_exp_C1);
Packet4f z = pmul(fx, p4f_cephes_exp_C2); Packet4f z = pmul(fx, p4f_cephes_exp_C2);
@ -171,14 +194,44 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
isnumber_mask); isnumber_mask);
} }
#ifndef EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f prsqrt<Packet4f>(const Packet4f& x)
{
return vec_rsqrt(x);
}
#endif
#ifdef __VSX__ #ifdef __VSX__
#ifndef EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d prsqrt<Packet2d>(const Packet2d& x)
{
return vec_rsqrt(x);
}
#endif
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f psqrt<Packet4f>(const Packet4f& x)
{
return vec_sqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d psqrt<Packet2d>(const Packet2d& x)
{
return vec_sqrt(x);
}
// VSX support varies between different compilers and even different // VSX support varies between different compilers and even different
// versions of the same compiler. For gcc version >= 4.9.3, we can use // versions of the same compiler. For gcc version >= 4.9.3, we can use
// vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use // vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
// a slow version that works with older compilers. // a slow version that works with older compilers.
// Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles
// are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963
static inline Packet2l ConvertToPacket2l(const Packet2d& x) { static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
#if EIGEN_GNUC_AT_LEAST(5, 0) || \ #if EIGEN_GNUC_AT_LEAST(5, 4) || \
(EIGEN_GNUC_AT(4, 9) && __GNUC_PATCHLEVEL__ >= 3) (EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1)
return vec_cts(x, 0); // TODO: check clang version. return vec_cts(x, 0); // TODO: check clang version.
#else #else
double tmp[2]; double tmp[2];
@ -194,36 +247,16 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
{ {
Packet2d x = _x; Packet2d x = _x;
_EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
_EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
_EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
_EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
_EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
Packet2d tmp, fx; Packet2d tmp, fx;
Packet2l emm0; Packet2l emm0;
// clamp x // clamp x
x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo); x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */
fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
fx = vec_floor(fx); /* express exp(x) as exp(g + n*log(2)) */
fx = pmadd(x, p2d_cephes_LOG2EF, p2d_half);
fx = pfloor(fx);
tmp = pmul(fx, p2d_cephes_exp_C1); tmp = pmul(fx, p2d_cephes_exp_C1);
Packet2d z = pmul(fx, p2d_cephes_exp_C2); Packet2d z = pmul(fx, p2d_cephes_exp_C2);
@ -249,9 +282,6 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
emm0 = ConvertToPacket2l(fx); emm0 = ConvertToPacket2l(fx);
#ifdef __POWER8_VECTOR__ #ifdef __POWER8_VECTOR__
static const Packet2l p2l_1023 = { 1023, 1023 };
static const Packet2ul p2ul_52 = { 52, 52 };
emm0 = vec_add(emm0, p2l_1023); emm0 = vec_add(emm0, p2l_1023);
emm0 = vec_sl(emm0, p2ul_52); emm0 = vec_sl(emm0, p2ul_52);
#else #else

View File

@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library // This file is part of Eigen, a lightweight C++ template library
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2008-2014 Konstantinos Margaritis <markos@freevec.org> // Copyright (C) 2008-2016 Konstantinos Margaritis <markos@freevec.org>
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
@ -42,7 +42,7 @@ typedef __vector unsigned char Packet16uc;
// and it doesn't really work to declare them global, so we define macros instead // and it doesn't really work to declare them global, so we define macros instead
#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \ #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X) Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \ #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
Packet4i p4i_##NAME = vec_splat_s32(X) Packet4i p4i_##NAME = vec_splat_s32(X)
@ -69,13 +69,13 @@ typedef __vector unsigned char Packet16uc;
// These constants are endian-agnostic // These constants are endian-agnostic
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0} static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,} static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
#ifndef __VSX__
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1} static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
#endif
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16} static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1} static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000} static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
#ifndef __VSX__
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
#endif
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 }; static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 }; static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
@ -96,7 +96,9 @@ static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
// Define global static constants: // Define global static constants:
#ifdef _BIG_ENDIAN #ifdef _BIG_ENDIAN
static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
#ifdef __VSX__
static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
#endif
static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
@ -110,8 +112,8 @@ static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i
static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; static Packet16uc p16uc_TRANSPOSE64_HI = p16uc_PSET64_HI + p16uc_HALF64_0_16; //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; static Packet16uc p16uc_TRANSPOSE64_LO = p16uc_PSET64_LO + p16uc_HALF64_0_16; //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
@ -121,6 +123,12 @@ static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8
static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
#endif // _BIG_ENDIAN #endif // _BIG_ENDIAN
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
#define EIGEN_PPC_PREFETCH(ADDR) __builtin_prefetch(ADDR);
#else
#define EIGEN_PPC_PREFETCH(ADDR) asm( " dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
#endif
template<> struct packet_traits<float> : default_packet_traits template<> struct packet_traits<float> : default_packet_traits
{ {
typedef Packet4f type; typedef Packet4f type;
@ -129,15 +137,35 @@ template<> struct packet_traits<float> : default_packet_traits
Vectorizable = 1, Vectorizable = 1,
AlignedOnScalar = 1, AlignedOnScalar = 1,
size=4, size=4,
HasHalfPacket=0, HasHalfPacket = 1,
// FIXME check the Has* HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1, HasDiv = 1,
HasMin = 1,
HasMax = 1,
HasAbs = 1,
HasSin = 0, HasSin = 0,
HasCos = 0, HasCos = 0,
HasLog = 1, HasLog = 0,
HasExp = 1, HasExp = 1,
HasSqrt = 0 #ifdef __VSX__
HasSqrt = 1,
#if !EIGEN_COMP_CLANG
HasRsqrt = 1,
#else
HasRsqrt = 0,
#endif
#else
HasSqrt = 0,
HasRsqrt = 0,
#endif
HasRound = 1,
HasFloor = 1,
HasCeil = 1,
HasNegate = 1,
HasBlend = 1
}; };
}; };
template<> struct packet_traits<int> : default_packet_traits template<> struct packet_traits<int> : default_packet_traits
@ -145,10 +173,16 @@ template<> struct packet_traits<int> : default_packet_traits
typedef Packet4i type; typedef Packet4i type;
typedef Packet4i half; typedef Packet4i half;
enum { enum {
// FIXME check the Has*
Vectorizable = 1, Vectorizable = 1,
AlignedOnScalar = 1, AlignedOnScalar = 1,
size=4 size = 4,
HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 0,
HasBlend = 1
}; };
}; };
@ -200,41 +234,56 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
return s; return s;
} }
/*
inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
{
union {
Packet4bi v;
unsigned int n[4];
} vt;
vt.v = v;
s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
return s;
}*/
// Need to define them first or we get specialization after instantiation errors // Need to define them first or we get specialization after instantiation errors
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } {
EIGEN_DEBUG_ALIGNED_LOAD
#ifdef __VSX__
return vec_vsx_ld(0, from);
#else
return vec_ld(0, from);
#endif
}
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } {
EIGEN_DEBUG_ALIGNED_LOAD
#ifdef __VSX__
return vec_vsx_ld(0, from);
#else
return vec_ld(0, from);
#endif
}
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
{
EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__
vec_vsx_st(from, 0, to);
#else
vec_st(from, 0, to);
#endif
}
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
{
EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__
vec_vsx_st(from, 0, to);
#else
vec_st(from, 0, to);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html Packet4f v = {from, from, from, from};
float EIGEN_ALIGN16 af[4]; return v;
af[0] = from;
Packet4f vc = pload<Packet4f>(af);
vc = vec_splat(vc, 0);
return vc;
} }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
int EIGEN_ALIGN16 ai[4]; Packet4i v = {from, from, from, from};
ai[0] = from; return v;
Packet4i vc = pload<Packet4i>(ai);
vc = vec_splat(vc, 0);
return vc;
} }
template<> EIGEN_STRONG_INLINE void template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4f>(const float *a, pbroadcast4<Packet4f>(const float *a,
@ -294,58 +343,24 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
to[3*stride] = ai[3]; to[3*stride] = ai[3];
} }
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); } template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN; }
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); } template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return pset1<Packet4i>(a) + p4i_COUNTDOWN; }
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); } template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return a + b; }
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); } template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return a + b; }
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); } template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return a - b; }
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); } template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return a - b; }
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub<Packet4f>(p4f_ZERO, a); } template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; }
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub<Packet4i>(p4i_ZERO, a); } template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; }
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_ZERO); } template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_ZERO); }
/* Commented out: it's actually slower than processing it scalar template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return a * b; }
*
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{
// Detailed in: http://freevec.org/content/32bit_signed_integer_multiplication_altivec
//Set up constants, variables
Packet4i a1, b1, bswap, low_prod, high_prod, prod, prod_, v1sel;
// Get the absolute values
a1 = vec_abs(a);
b1 = vec_abs(b);
// Get the signs using xor
Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), p4i_ZERO);
// Do the multiplication for the asbolute values.
bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) p4i_MINUS16 );
low_prod = vec_mulo((Packet8i) a1, (Packet8i)b1);
high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, p4i_ZERO);
high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) p4i_MINUS16);
prod = vec_add( low_prod, high_prod );
// NOR the product and select only the negative elements according to the sign mask
prod_ = vec_nor(prod, prod);
prod_ = vec_sel(p4i_ZERO, prod_, sgn);
// Add 1 to the result to get the negative numbers
v1sel = vec_sel(p4i_ZERO, p4i_ONE, sgn);
prod_ = vec_add(prod_, v1sel);
// Merge the results back to the final vector.
prod = vec_sel(prod, prod_, sgn);
return prod;
}
*/
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
{ {
#ifndef __VSX__ // VSX actually provides a div instruction #ifndef __VSX__ // VSX actually provides a div instruction
@ -371,7 +386,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
// for some weird raisons, it has to be overloaded for packet of integers // for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); } template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
@ -391,6 +406,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); }
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
#ifdef _BIG_ENDIAN #ifdef _BIG_ENDIAN
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{ {
@ -418,12 +437,12 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX // We also need ot redefine little endian loading of Packet4i/Packet4f using VSX
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
{ {
EIGEN_DEBUG_ALIGNED_LOAD EIGEN_DEBUG_UNALIGNED_LOAD
return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from)); return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from));
} }
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{ {
EIGEN_DEBUG_ALIGNED_LOAD EIGEN_DEBUG_UNALIGNED_LOAD
return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from)); return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from));
} }
#endif #endif
@ -494,16 +513,19 @@ template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f&
} }
#endif #endif
#ifndef __VSX__ template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_PPC_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_PPC_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
#endif
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); } template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); } {
return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
}
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
{
return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32)); }
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
@ -511,10 +533,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{ {
Packet4f b, sum; Packet4f b, sum;
b = (Packet4f) vec_sld(a, a, 8); b = vec_sld(a, a, 8);
sum = vec_add(a, b); sum = a + b;
b = (Packet4f) vec_sld(sum, sum, 4); b = vec_sld(sum, sum, 4);
sum = vec_add(sum, b); sum += b;
return pfirst(sum); return pfirst(sum);
} }
@ -537,11 +559,11 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
// Now do the summation: // Now do the summation:
// Lines 0+1 // Lines 0+1
sum[0] = vec_add(sum[0], sum[1]); sum[0] = sum[0] + sum[1];
// Lines 2+3 // Lines 2+3
sum[1] = vec_add(sum[2], sum[3]); sum[1] = sum[2] + sum[3];
// Add the results // Add the results
sum[0] = vec_add(sum[0], sum[1]); sum[0] = sum[0] + sum[1];
return sum[0]; return sum[0];
} }
@ -577,11 +599,11 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
// Now do the summation: // Now do the summation:
// Lines 0+1 // Lines 0+1
sum[0] = vec_add(sum[0], sum[1]); sum[0] = sum[0] + sum[1];
// Lines 2+3 // Lines 2+3
sum[1] = vec_add(sum[2], sum[3]); sum[1] = sum[2] + sum[3];
// Add the results // Add the results
sum[0] = vec_add(sum[0], sum[1]); sum[0] = sum[0] + sum[1];
return sum[0]; return sum[0];
} }
@ -591,8 +613,8 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{ {
Packet4f prod; Packet4f prod;
prod = pmul(a, (Packet4f)vec_sld(a, a, 8)); prod = pmul(a, vec_sld(a, a, 8));
return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4))); return pfirst(pmul(prod, vec_sld(prod, prod, 4)));
} }
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
@ -716,33 +738,52 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
kernel.packet[3] = vec_mergel(t1, t3); kernel.packet[3] = vec_mergel(t1, t3);
} }
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
return vec_sel(elsePacket, thenPacket, mask);
}
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
return vec_sel(elsePacket, thenPacket, mask);
}
//---------- double ---------- //---------- double ----------
#ifdef __VSX__ #ifdef __VSX__
typedef __vector double Packet2d; typedef __vector double Packet2d;
typedef __vector unsigned long long Packet2ul; typedef __vector unsigned long long Packet2ul;
typedef __vector long long Packet2l; typedef __vector long long Packet2l;
#if EIGEN_COMP_CLANG
typedef Packet2ul Packet2bl;
#else
typedef __vector __bool long Packet2bl;
#endif
static Packet2l p2l_ZERO = (Packet2l) p4i_ZERO; static Packet2l p2l_ONE = { 1, 1 };
static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
static Packet2d p2d_ONE = { 1.0, 1.0 }; static Packet2d p2d_ONE = { 1.0, 1.0 };
static Packet2d p2d_ZERO = (Packet2d) p4f_ZERO; static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
static Packet2d p2d_ZERO_ = { -0.0, -0.0 }; static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
#ifdef _BIG_ENDIAN #ifdef _BIG_ENDIAN
static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ZERO, (Packet16uc) p2d_ONE, 8); static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));
#else #else
static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ONE, (Packet16uc) p2d_ZERO, 8); static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ONE), reinterpret_cast<Packet4f>(p2d_ZERO), 8));
#endif #endif
static EIGEN_STRONG_INLINE Packet2d vec_splat_dbl(Packet2d& a, int index) template<int index> Packet2d vec_splat_dbl(Packet2d& a);
template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<0>(Packet2d& a)
{ {
switch (index) { return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_HI));
case 0:
return (Packet2d) vec_perm(a, a, p16uc_PSET64_HI);
case 1:
return (Packet2d) vec_perm(a, a, p16uc_PSET64_LO);
} }
return a;
template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<1>(Packet2d& a)
{
return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_LO));
} }
template<> struct packet_traits<double> : default_packet_traits template<> struct packet_traits<double> : default_packet_traits
@ -753,16 +794,41 @@ template<> struct packet_traits<double> : default_packet_traits
Vectorizable = 1, Vectorizable = 1,
AlignedOnScalar = 1, AlignedOnScalar = 1,
size=2, size=2,
HasHalfPacket = 0, HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1, HasDiv = 1,
HasMin = 1,
HasMax = 1,
HasAbs = 1,
HasSin = 0,
HasCos = 0,
HasLog = 0,
HasExp = 1, HasExp = 1,
HasSqrt = 0 HasSqrt = 1,
HasRsqrt = 1,
HasRound = 1,
HasFloor = 1,
HasCeil = 1,
HasNegate = 1,
HasBlend = 1
}; };
}; };
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
{
union {
Packet2l v;
int64_t n[2];
} vt;
vt.v = v;
s << vt.n[0] << ", " << vt.n[1];
return s;
}
inline std::ostream & operator <<(std::ostream & s, const Packet2d & v) inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
{ {
@ -776,28 +842,43 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
} }
// Need to define them first or we get specialization after instantiation errors // Need to define them first or we get specialization after instantiation errors
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return (Packet2d) vec_ld(0, (const float *) from); } //FIXME template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
#ifdef __VSX__
return vec_vsx_ld(0, from);
#else
return vec_ld(0, from);
#endif
}
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st((Packet4f)from, 0, (float *)to); } template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
{
EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__
vec_vsx_st(from, 0, to);
#else
vec_st(from, 0, to);
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
double EIGEN_ALIGN16 af[2]; Packet2d v = {from, from};
af[0] = from; return v;
Packet2d vc = pload<Packet2d>(af);
vc = vec_splat_dbl(vc, 0);
return vc;
} }
template<> EIGEN_STRONG_INLINE void template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet2d>(const double *a, pbroadcast4<Packet2d>(const double *a,
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3) Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
{ {
a1 = pload<Packet2d>(a); a1 = pload<Packet2d>(a);
a0 = vec_splat_dbl(a1, 0); a0 = vec_splat_dbl<0>(a1);
a1 = vec_splat_dbl(a1, 1); a1 = vec_splat_dbl<1>(a1);
a3 = pload<Packet2d>(a+2); a3 = pload<Packet2d>(a+2);
a2 = vec_splat_dbl(a3, 0); a2 = vec_splat_dbl<0>(a3);
a3 = vec_splat_dbl(a3, 1); a3 = vec_splat_dbl<1>(a3);
} }
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
{ {
double EIGEN_ALIGN16 af[2]; double EIGEN_ALIGN16 af[2];
@ -812,13 +893,14 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to,
to[0*stride] = af[0]; to[0*stride] = af[0];
to[1*stride] = af[1]; to[1*stride] = af[1];
} }
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return vec_add(pset1<Packet2d>(a), p2d_COUNTDOWN); }
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_add(a,b); } template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return pset1<Packet2d>(a) + p2d_COUNTDOWN; }
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_sub(a,b); } template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return a + b; }
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return psub<Packet2d>(p2d_ZERO, a); } template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
@ -840,17 +922,22 @@ template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
{ {
EIGEN_DEBUG_ALIGNED_LOAD EIGEN_DEBUG_ALIGNED_LOAD
return (Packet2d) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from)); return (Packet2d) vec_vsx_ld((long)from & 15, (const double*) _EIGEN_ALIGNED_PTR(from));
} }
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
{ {
Packet2d p; Packet2d p;
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from); if((ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
else p = ploadu<Packet2d>(from); else p = ploadu<Packet2d>(from);
return vec_perm(p, p, p16uc_PSET64_HI); return vec_splat_dbl<0>(p);
} }
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from)
@ -859,32 +946,34 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to)); vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
} }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { vec_dstt((const float *) addr, DST_CTRL(2,2,32), DST_CHAN); } template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; } template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore<double>(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return (Packet2d)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE64); }
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
{
return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
}
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
{ {
Packet2d b, sum; Packet2d b, sum;
b = (Packet2d) vec_sld((Packet4ui) a, (Packet4ui)a, 8); b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(a), reinterpret_cast<Packet4f>(a), 8));
sum = vec_add(a, b); sum = a + b;
return pfirst(sum); return pfirst<Packet2d>(sum);
} }
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
{ {
Packet2d v[2], sum; Packet2d v[2], sum;
v[0] = vec_add(vecs[0], (Packet2d) vec_sld((Packet4ui) vecs[0], (Packet4ui) vecs[0], 8)); v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
v[1] = vec_add(vecs[1], (Packet2d) vec_sld((Packet4ui) vecs[1], (Packet4ui) vecs[1], 8)); v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
#ifdef _BIG_ENDIAN #ifdef _BIG_ENDIAN
sum = (Packet2d) vec_sld((Packet4ui) v[0], (Packet4ui) v[1], 8); sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
#else #else
sum = (Packet2d) vec_sld((Packet4ui) v[1], (Packet4ui) v[0], 8); sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[1]), reinterpret_cast<Packet4f>(v[0]), 8));
#endif #endif
return sum; return sum;
@ -893,19 +982,19 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
// mul // mul
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
{ {
return pfirst(pmul(a, (Packet2d)vec_sld((Packet4ui) a, (Packet4ui) a, 8))); return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
} }
// min // min
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
{ {
return pfirst(vec_min(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8))); return pfirst(pmin(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
} }
// max // max
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
{ {
return pfirst(vec_max(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8))); return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
} }
template<int Offset> template<int Offset>
@ -915,9 +1004,9 @@ struct palign_impl<Offset,Packet2d>
{ {
if (Offset == 1) if (Offset == 1)
#ifdef _BIG_ENDIAN #ifdef _BIG_ENDIAN
first = (Packet2d) vec_sld((Packet4ui) first, (Packet4ui) second, 8); first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(first), reinterpret_cast<Packet4ui>(second), 8));
#else #else
first = (Packet2d) vec_sld((Packet4ui) second, (Packet4ui) first, 8); first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(second), reinterpret_cast<Packet4ui>(first), 8));
#endif #endif
} }
}; };
@ -931,6 +1020,11 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
kernel.packet[1] = t1; kernel.packet[1] = t1;
} }
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
return vec_sel(elsePacket, thenPacket, mask);
}
#endif // __VSX__ #endif // __VSX__
} // end namespace internal } // end namespace internal

View File

@ -1,10 +0,0 @@
ADD_SUBDIRECTORY(AltiVec)
ADD_SUBDIRECTORY(AVX)
ADD_SUBDIRECTORY(AVX512)
ADD_SUBDIRECTORY(CUDA)
ADD_SUBDIRECTORY(Default)
ADD_SUBDIRECTORY(NEON)
ADD_SUBDIRECTORY(SSE)

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_arch_CUDA_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_CUDA_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/CUDA COMPONENT Devel
)

View File

@ -0,0 +1,103 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_COMPLEX_CUDA_H
#define EIGEN_COMPLEX_CUDA_H
// clang-format off
namespace Eigen {
namespace internal {
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
// Many std::complex methods such as operator+, operator-, operator* and
// operator/ are not constexpr. Due to this, clang does not treat them as device
// functions and thus Eigen functors making use of these operators fail to
// compile. Here, we manually specialize these functors for complex types when
// building for CUDA to avoid non-constexpr methods.
// Sum
template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
typedef typename std::complex<T> result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
return std::complex<T>(numext::real(a) + numext::real(b),
numext::imag(a) + numext::imag(b));
}
};
template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {};
// Difference
template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
typedef typename std::complex<T> result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
return std::complex<T>(numext::real(a) - numext::real(b),
numext::imag(a) - numext::imag(b));
}
};
template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {};
// Product
template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
enum {
Vectorizable = packet_traits<std::complex<T>>::HasMul
};
typedef typename std::complex<T> result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
const T a_real = numext::real(a);
const T a_imag = numext::imag(a);
const T b_real = numext::real(b);
const T b_imag = numext::imag(b);
return std::complex<T>(a_real * b_real - a_imag * b_imag,
a_real * b_imag + a_imag * b_real);
}
};
template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {};
// Quotient
template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
enum {
Vectorizable = packet_traits<std::complex<T>>::HasDiv
};
typedef typename std::complex<T> result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
const T a_real = numext::real(a);
const T a_imag = numext::imag(a);
const T b_real = numext::real(b);
const T b_imag = numext::imag(b);
const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
(a_imag * b_real - a_real * b_imag) * norm);
}
};
template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {};
#endif
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_COMPLEX_CUDA_H

View File

@ -1,11 +1,3 @@
// Standard 16-bit float type, mostly useful for GPUs. Defines a new
// class Eigen::half (inheriting from CUDA's __half struct) with
// operator overloads such that it behaves basically as an arithmetic
// type. It will be quite slow on CPUs (so it is recommended to stay
// in fp32 for CPUs, except for simple parameter conversions, I/O
// to disk and the likes), but fast on GPUs.
//
//
// This file is part of Eigen, a lightweight C++ template library // This file is part of Eigen, a lightweight C++ template library
// for linear algebra. // for linear algebra.
// //
@ -32,6 +24,15 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Standard 16-bit float type, mostly useful for GPUs. Defines a new
// type Eigen::half (inheriting from CUDA's __half struct) with
// operator overloads such that it behaves basically as an arithmetic
// type. It will be quite slow on CPUs (so it is recommended to stay
// in fp32 for CPUs, except for simple parameter conversions, I/O
// to disk and the likes), but fast on GPUs.
#ifndef EIGEN_HALF_CUDA_H #ifndef EIGEN_HALF_CUDA_H
#define EIGEN_HALF_CUDA_H #define EIGEN_HALF_CUDA_H
@ -42,92 +43,93 @@
#endif #endif
namespace Eigen {
struct half;
namespace half_impl {
#if !defined(EIGEN_HAS_CUDA_FP16) #if !defined(EIGEN_HAS_CUDA_FP16)
// Make our own __half definition that is similar to CUDA's. // Make our own __half definition that is similar to CUDA's.
struct __half { struct __half {
__half() {} EIGEN_DEVICE_FUNC __half() {}
explicit __half(unsigned short raw) : x(raw) {} explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
unsigned short x; unsigned short x;
}; };
#endif #endif
namespace Eigen { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
namespace internal { struct half_base : public __half {
EIGEN_DEVICE_FUNC half_base() {}
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half(h) {}
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half(h) {}
};
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x); } // namespace half_impl
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
} // end namespace internal
// Class definition. // Class definition.
struct half : public __half { struct half : public half_impl::half_base {
#if !defined(EIGEN_HAS_CUDA_FP16)
typedef half_impl::__half __half;
#endif
EIGEN_DEVICE_FUNC half() {} EIGEN_DEVICE_FUNC half() {}
EIGEN_DEVICE_FUNC half(const __half& h) : __half(h) {} EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
EIGEN_DEVICE_FUNC half(const half& h) : __half(h) {} EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
explicit EIGEN_DEVICE_FUNC half(bool b) explicit EIGEN_DEVICE_FUNC half(bool b)
: __half(internal::raw_uint16_to_half(b ? 0x3c00 : 0)) {} : half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
explicit EIGEN_DEVICE_FUNC half(unsigned int ui) template<class T>
: __half(internal::float_to_half_rtne(static_cast<float>(ui))) {} explicit EIGEN_DEVICE_FUNC half(const T& val)
explicit EIGEN_DEVICE_FUNC half(int i) : half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(val))) {}
: __half(internal::float_to_half_rtne(static_cast<float>(i))) {}
explicit EIGEN_DEVICE_FUNC half(unsigned long ul)
: __half(internal::float_to_half_rtne(static_cast<float>(ul))) {}
explicit EIGEN_DEVICE_FUNC half(long l)
: __half(internal::float_to_half_rtne(static_cast<float>(l))) {}
explicit EIGEN_DEVICE_FUNC half(long long ll)
: __half(internal::float_to_half_rtne(static_cast<float>(ll))) {}
explicit EIGEN_DEVICE_FUNC half(unsigned long long ull)
: __half(internal::float_to_half_rtne(static_cast<float>(ull))) {}
explicit EIGEN_DEVICE_FUNC half(float f) explicit EIGEN_DEVICE_FUNC half(float f)
: __half(internal::float_to_half_rtne(f)) {} : half_impl::half_base(half_impl::float_to_half_rtne(f)) {}
explicit EIGEN_DEVICE_FUNC half(double d)
: __half(internal::float_to_half_rtne(static_cast<float>(d))) {}
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
// +0.0 and -0.0 become false, everything else becomes true. // +0.0 and -0.0 become false, everything else becomes true.
return (x & 0x7fff) != 0; return (x & 0x7fff) != 0;
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
return static_cast<signed char>(internal::half_to_float(*this)); return static_cast<signed char>(half_impl::half_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
return static_cast<unsigned char>(internal::half_to_float(*this)); return static_cast<unsigned char>(half_impl::half_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
return static_cast<short>(internal::half_to_float(*this)); return static_cast<short>(half_impl::half_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const {
return static_cast<unsigned short>(internal::half_to_float(*this)); return static_cast<unsigned short>(half_impl::half_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
return static_cast<int>(internal::half_to_float(*this)); return static_cast<int>(half_impl::half_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
return static_cast<unsigned int>(internal::half_to_float(*this)); return static_cast<unsigned int>(half_impl::half_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
return static_cast<long>(internal::half_to_float(*this)); return static_cast<long>(half_impl::half_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
return static_cast<unsigned long>(internal::half_to_float(*this)); return static_cast<unsigned long>(half_impl::half_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
return static_cast<long long>(internal::half_to_float(*this)); return static_cast<long long>(half_impl::half_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
return static_cast<unsigned long long>(internal::half_to_float(*this)); return static_cast<unsigned long long>(half_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
return internal::half_to_float(*this); return half_impl::half_to_float(*this);
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
return static_cast<double>(internal::half_to_float(*this)); return static_cast<double>(half_impl::half_to_float(*this));
} }
EIGEN_DEVICE_FUNC half& operator=(const half& other) { EIGEN_DEVICE_FUNC half& operator=(const half& other) {
@ -136,6 +138,8 @@ struct half : public __half {
} }
}; };
namespace half_impl {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
// Intrinsics for native fp16 support. Note that on current hardware, // Intrinsics for native fp16 support. Note that on current hardware,
@ -200,55 +204,55 @@ __device__ bool operator >= (const half& a, const half& b) {
// Definitions for CPUs and older CUDA, mostly working through conversion // Definitions for CPUs and older CUDA, mostly working through conversion
// to/from fp32. // to/from fp32.
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
return half(float(a) + float(b)); return half(float(a) + float(b));
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
return half(float(a) * float(b)); return half(float(a) * float(b));
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
return half(float(a) - float(b)); return half(float(a) - float(b));
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
return half(float(a) / float(b)); return half(float(a) / float(b));
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
half result; half result;
result.x = a.x ^ 0x8000; result.x = a.x ^ 0x8000;
return result; return result;
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
a = half(float(a) + float(b)); a = half(float(a) + float(b));
return a; return a;
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
a = half(float(a) * float(b)); a = half(float(a) * float(b));
return a; return a;
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
a = half(float(a) - float(b)); a = half(float(a) - float(b));
return a; return a;
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
a = half(float(a) / float(b)); a = half(float(a) / float(b));
return a; return a;
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
return float(a) == float(b); return float(a) == float(b);
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
return float(a) != float(b); return float(a) != float(b);
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
return float(a) < float(b); return float(a) < float(b);
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
return float(a) <= float(b); return float(a) <= float(b);
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
return float(a) > float(b); return float(a) > float(b);
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
return float(a) >= float(b); return float(a) >= float(b);
} }
@ -256,8 +260,8 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, co
// Division by an index. Do it in full float precision to avoid accuracy // Division by an index. Do it in full float precision to avoid accuracy
// issues in converting the denominator to half. // issues in converting the denominator to half.
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
return Eigen::half(static_cast<float>(a) / static_cast<float>(b)); return half(static_cast<float>(a) / static_cast<float>(b));
} }
// Conversion routines, including fallbacks for the host or older CUDA. // Conversion routines, including fallbacks for the host or older CUDA.
@ -265,9 +269,7 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Ind
// these in hardware. If we need more performance on older/other CPUs, they are // these in hardware. If we need more performance on older/other CPUs, they are
// also possible to vectorize directly. // also possible to vectorize directly.
namespace internal { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
__half h; __half h;
h.x = x; h.x = x;
return h; return h;
@ -278,7 +280,7 @@ union FP32 {
float f; float f;
}; };
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float2half(ff); return __float2half(ff);
@ -333,7 +335,7 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff)
#endif #endif
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __half2float(h); return __half2float(h);
@ -362,92 +364,69 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
#endif #endif
} }
} // end namespace internal // --- standard functions ---
// Traits. EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
namespace internal {
template<> struct is_arithmetic<half> { enum { value = true }; };
} // end namespace internal
template<> struct NumTraits<Eigen::half>
: GenericNumTraits<Eigen::half>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
return internal::raw_uint16_to_half(0x0800);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return half(1e-3f); }
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
return internal::raw_uint16_to_half(0x7bff);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
return internal::raw_uint16_to_half(0xfbff);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
return internal::raw_uint16_to_half(0x7c00);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
return internal::raw_uint16_to_half(0x7c01);
}
};
// Infinity/NaN checks.
namespace numext {
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const Eigen::half& a) {
return (a.x & 0x7fff) == 0x7c00; return (a.x & 0x7fff) == 0x7c00;
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hisnan(a); return __hisnan(a);
#else #else
return (a.x & 0x7fff) > 0x7c00; return (a.x & 0x7fff) > 0x7c00;
#endif #endif
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const half& a) {
return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a); return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half abs(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
Eigen::half result; half result;
result.x = a.x & 0x7FFF; result.x = a.x & 0x7FFF;
return result; return result;
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exp(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
return Eigen::half(::expf(float(a))); return half(::expf(float(a)));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
return Eigen::half(::logf(float(a))); #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return Eigen::half(::hlog(a));
#else
return half(::logf(float(a)));
#endif
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) {
return Eigen::half(::sqrtf(float(a))); return half(numext::log1p(float(a)));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half pow(const Eigen::half& a, const Eigen::half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
return Eigen::half(::powf(float(a), float(b))); return half(::log10f(float(a)));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sin(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
return Eigen::half(::sinf(float(a))); return half(::sqrtf(float(a)));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half cos(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
return Eigen::half(::cosf(float(a))); return half(::powf(float(a), float(b)));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half tan(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {
return Eigen::half(::tanf(float(a))); return half(::sinf(float(a)));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half tanh(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) {
return Eigen::half(::tanhf(float(a))); return half(::cosf(float(a)));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) {
return Eigen::half(::floorf(float(a))); return half(::tanf(float(a)));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceil(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
return Eigen::half(::ceilf(float(a))); return half(::tanhf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
return half(::floorf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
return half(::ceilf(float(a)));
} }
template <> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half mini(const Eigen::half& a, const Eigen::half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hlt(b, a) ? b : a; return __hlt(b, a) ? b : a;
#else #else
@ -456,7 +435,7 @@ template <> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half mini(const Eigen::
return f2 < f1 ? b : a; return f2 < f1 ? b : a;
#endif #endif
} }
template <> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half maxi(const Eigen::half& a, const Eigen::half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hlt(a, b) ? b : a; return __hlt(a, b) ? b : a;
#else #else
@ -466,78 +445,89 @@ template <> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half maxi(const Eigen::
#endif #endif
} }
#ifdef EIGEN_HAS_C99_MATH EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) {
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half lgamma(const Eigen::half& a) { os << static_cast<float>(v);
return Eigen::half(Eigen::numext::lgamma(static_cast<float>(a))); return os;
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half digamma(const Eigen::half& a) {
return Eigen::half(Eigen::numext::digamma(static_cast<float>(a))); } // end namespace half_impl
// import Eigen::half_impl::half into Eigen namespace
// using half_impl::half;
namespace internal {
template<>
struct random_default_impl<half, false, false>
{
static inline half run(const half& x, const half& y)
{
return x + (y-x) * half(float(std::rand()) / float(RAND_MAX));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half zeta(const Eigen::half& x, const Eigen::half& q) { static inline half run()
return Eigen::half(Eigen::numext::zeta(static_cast<float>(x), static_cast<float>(q))); {
return run(half(-1.f), half(1.f));
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half polygamma(const Eigen::half& n, const Eigen::half& x) { };
return Eigen::half(Eigen::numext::polygamma(static_cast<float>(n), static_cast<float>(x)));
template<> struct is_arithmetic<half> { enum { value = true }; };
} // end namespace internal
template<> struct NumTraits<Eigen::half>
: GenericNumTraits<Eigen::half>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
return half_impl::raw_uint16_to_half(0x0800);
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::half& a) { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half(1e-2f); }
return Eigen::half(Eigen::numext::erf(static_cast<float>(a))); EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
return half_impl::raw_uint16_to_half(0x7bff);
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
return Eigen::half(Eigen::numext::erfc(static_cast<float>(a))); return half_impl::raw_uint16_to_half(0xfbff);
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x))); return half_impl::raw_uint16_to_half(0x7c00);
} }
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
return Eigen::half(Eigen::numext::igammac(static_cast<float>(a), static_cast<float>(x))); return half_impl::raw_uint16_to_half(0x7c01);
} }
#endif };
} // end namespace numext
} // end namespace Eigen } // end namespace Eigen
// Standard mathematical functions and trancendentals. // C-like standard mathematical functions and trancendentals.
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) {
Eigen::half result; Eigen::half result;
result.x = a.x & 0x7FFF; result.x = a.x & 0x7FFF;
return result; return result;
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
return Eigen::half(::expf(float(a))); return Eigen::half(::expf(float(a)));
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return Eigen::half(::hlog(a));
#else
return Eigen::half(::logf(float(a))); return Eigen::half(::logf(float(a)));
#endif
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
return Eigen::half(::sqrtf(float(a))); return Eigen::half(::sqrtf(float(a)));
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
return Eigen::half(::powf(float(a), float(b))); return Eigen::half(::powf(float(a), float(b)));
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
return Eigen::half(::floorf(float(a))); return Eigen::half(::floorf(float(a)));
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) {
return Eigen::half(::ceilf(float(a))); return Eigen::half(::ceilf(float(a)));
} }
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isnan)(const Eigen::half& a) {
return (Eigen::numext::isnan)(a);
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isinf)(const Eigen::half& a) {
return (Eigen::numext::isinf)(a);
}
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isfinite)(const Eigen::half& a) {
return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a);
}
namespace std { namespace std {
EIGEN_ALWAYS_INLINE ostream& operator << (ostream& os, const Eigen::half& v) {
os << static_cast<float>(v);
return os;
}
#if __cplusplus > 199711L #if __cplusplus > 199711L
template <> template <>
struct hash<Eigen::half> { struct hash<Eigen::half> {
@ -551,19 +541,45 @@ struct hash<Eigen::half> {
// Add the missing shfl_xor intrinsic // Add the missing shfl_xor intrinsic
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) { __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width)); return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
} }
#endif #endif
// ldg() has an overload for __half, but we also need one for Eigen::half. // ldg() has an overload for __half, but we also need one for Eigen::half.
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
return Eigen::internal::raw_uint16_to_half( return Eigen::half_impl::raw_uint16_to_half(
__ldg(reinterpret_cast<const unsigned short*>(ptr))); __ldg(reinterpret_cast<const unsigned short*>(ptr)));
} }
#endif #endif
#if defined(__CUDA_ARCH__)
namespace Eigen {
namespace numext {
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool (isnan)(const Eigen::half& h) {
return (half_impl::isnan)(h);
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool (isinf)(const Eigen::half& h) {
return (half_impl::isinf)(h);
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool (isfinite)(const Eigen::half& h) {
return (half_impl::isfinite)(h);
}
} // namespace Eigen
} // namespace numext
#endif
#endif // EIGEN_HALF_CUDA_H #endif // EIGEN_HALF_CUDA_H

View File

@ -27,9 +27,22 @@ float4 plog<float4>(const float4& a)
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 plog<double2>(const double2& a) double2 plog<double2>(const double2& a)
{ {
using ::log;
return make_double2(log(a.x), log(a.y)); return make_double2(log(a.x), log(a.y));
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 plog1p<float4>(const float4& a)
{
return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 plog1p<double2>(const double2& a)
{
return make_double2(log1p(a.x), log1p(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pexp<float4>(const float4& a) float4 pexp<float4>(const float4& a)
{ {
@ -39,6 +52,7 @@ float4 pexp<float4>(const float4& a)
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pexp<double2>(const double2& a) double2 pexp<double2>(const double2& a)
{ {
using ::exp;
return make_double2(exp(a.x), exp(a.y)); return make_double2(exp(a.x), exp(a.y));
} }
@ -51,6 +65,7 @@ float4 psqrt<float4>(const float4& a)
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 psqrt<double2>(const double2& a) double2 psqrt<double2>(const double2& a)
{ {
using ::sqrt;
return make_double2(sqrt(a.x), sqrt(a.y)); return make_double2(sqrt(a.x), sqrt(a.y));
} }
@ -66,120 +81,6 @@ double2 prsqrt<double2>(const double2& a)
return make_double2(rsqrt(a.x), rsqrt(a.y)); return make_double2(rsqrt(a.x), rsqrt(a.y));
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 plgamma<float4>(const float4& a)
{
return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 plgamma<double2>(const double2& a)
{
return make_double2(lgamma(a.x), lgamma(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pdigamma<float4>(const float4& a)
{
using numext::digamma;
return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pdigamma<double2>(const double2& a)
{
using numext::digamma;
return make_double2(digamma(a.x), digamma(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pzeta<float4>(const float4& x, const float4& q)
{
using numext::zeta;
return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pzeta<double2>(const double2& x, const double2& q)
{
using numext::zeta;
return make_double2(zeta(x.x, q.x), zeta(x.y, q.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 ppolygamma<float4>(const float4& n, const float4& x)
{
using numext::polygamma;
return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 ppolygamma<double2>(const double2& n, const double2& x)
{
using numext::polygamma;
return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 perf<float4>(const float4& a)
{
return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 perf<double2>(const double2& a)
{
return make_double2(erf(a.x), erf(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 perfc<float4>(const float4& a)
{
return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 perfc<double2>(const double2& a)
{
return make_double2(erfc(a.x), erfc(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pigamma<float4>(const float4& a, const float4& x)
{
using numext::igamma;
return make_float4(
igamma(a.x, x.x),
igamma(a.y, x.y),
igamma(a.z, x.z),
igamma(a.w, x.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pigamma<double2>(const double2& a, const double2& x)
{
using numext::igamma;
return make_double2(igamma(a.x, x.x), igamma(a.y, x.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pigammac<float4>(const float4& a, const float4& x)
{
using numext::igammac;
return make_float4(
igammac(a.x, x.x),
igammac(a.y, x.y),
igammac(a.z, x.z),
igammac(a.w, x.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pigammac<double2>(const double2& a, const double2& x)
{
using numext::igammac;
return make_double2(igammac(a.x, x.x), igammac(a.y, x.y));
}
#endif #endif

View File

@ -44,8 +44,9 @@ template<> struct packet_traits<float> : default_packet_traits
HasPolygamma = 1, HasPolygamma = 1,
HasErf = 1, HasErf = 1,
HasErfc = 1, HasErfc = 1,
HasIgamma = 1, HasIGamma = 1,
HasIGammac = 1, HasIGammac = 1,
HasBetaInc = 1,
HasBlend = 0, HasBlend = 0,
}; };
@ -68,10 +69,13 @@ template<> struct packet_traits<double> : default_packet_traits
HasRsqrt = 1, HasRsqrt = 1,
HasLGamma = 1, HasLGamma = 1,
HasDiGamma = 1, HasDiGamma = 1,
HasZeta = 1,
HasPolygamma = 1,
HasErf = 1, HasErf = 1,
HasErfc = 1, HasErfc = 1,
HasIGamma = 1, HasIGamma = 1,
HasIGammac = 1, HasIGammac = 1,
HasBetaInc = 1,
HasBlend = 0, HasBlend = 0,
}; };
@ -278,35 +282,6 @@ template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a)
return a.x * a.y; return a.x * a.y;
} }
template<size_t offset>
struct protate_impl<offset, float4>
{
static float4 run(const float4& a) {
if (offset == 0) {
return make_float4(a.x, a.y, a.z, a.w);
}
if (offset == 1) {
return make_float4(a.w, a.x, a.y, a.z);
}
if (offset == 2) {
return make_float4(a.z, a.w, a.x, a.y);
}
return make_float4(a.y, a.z, a.w, a.x);
}
};
template<size_t offset>
struct protate_impl<offset, double2>
{
static double2 run(const double2& a) {
if (offset == 0) {
return make_double2(a.x, a.y);
}
return make_double2(a.y, a.x);
}
};
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) { template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
} }

View File

@ -10,22 +10,16 @@
#ifndef EIGEN_PACKET_MATH_HALF_CUDA_H #ifndef EIGEN_PACKET_MATH_HALF_CUDA_H
#define EIGEN_PACKET_MATH_HALF_CUDA_H #define EIGEN_PACKET_MATH_HALF_CUDA_H
#if defined(EIGEN_HAS_CUDA_FP16)
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
// Most of the following operations require arch >= 5.3
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
// Most of the following operations require arch >= 3.0
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
template<> struct is_arithmetic<half2> { enum { value = true }; }; template<> struct is_arithmetic<half2> { enum { value = true }; };
template<> struct packet_traits<half> : default_packet_traits template<> struct packet_traits<Eigen::half> : default_packet_traits
{ {
typedef half2 type; typedef half2 type;
typedef half2 half; typedef half2 half;
@ -34,105 +28,172 @@ template<> struct packet_traits<half> : default_packet_traits
AlignedOnScalar = 1, AlignedOnScalar = 1,
size=2, size=2,
HasHalfPacket = 0, HasHalfPacket = 0,
HasDiv = 1 HasAdd = 1,
HasMul = 1,
HasDiv = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasExp = 1,
HasLog = 1,
HasLog1p = 1
}; };
}; };
template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
template<> struct unpacket_traits<half2> { typedef half type; enum {size=2, alignment=Aligned16}; typedef half2 half; }; template<> __device__ EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const half& from) {
return __half2half2(from); return __half2half2(from);
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const half* from) { template<> __device__ EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
return *reinterpret_cast<const half2*>(from); return *reinterpret_cast<const half2*>(from);
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const half* from) { template<> __device__ EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
return __halves2half2(from[0], from[1]); return __halves2half2(from[0], from[1]);
} }
template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const half* from) { template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const Eigen::half* from) {
return __halves2half2(from[0], from[0]); return __halves2half2(from[0], from[0]);
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<half>(half* to, const half2& from) { template<> __device__ EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
*reinterpret_cast<half2*>(to) = from; *reinterpret_cast<half2*>(to) = from;
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, const half2& from) { template<> __device__ EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
to[0] = __low2half(from); to[0] = __low2half(from);
to[1] = __high2half(from); to[1] = __high2half(from);
} }
template<> template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) { __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
#if __CUDA_ARCH__ >= 350
return __ldg((const half2*)from); return __ldg((const half2*)from);
#else
return __halves2half2(*(from+0), *(from+1));
#endif
} }
template<> template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) { __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
#if __CUDA_ARCH__ >= 350
return __halves2half2(__ldg(from+0), __ldg(from+1)); return __halves2half2(__ldg(from+0), __ldg(from+1));
#else
return __halves2half2(*(from+0), *(from+1));
#endif
} }
template<> EIGEN_DEVICE_FUNC inline half2 pgather<half, half2>(const half* from, Index stride) { template<> __device__ EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
return __halves2half2(from[0*stride], from[1*stride]); return __halves2half2(from[0*stride], from[1*stride]);
} }
template<> EIGEN_DEVICE_FUNC inline void pscatter<half, half2>(half* to, const half2& from, Index stride) { template<> __device__ EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
to[stride*0] = __low2half(from); to[stride*0] = __low2half(from);
to[stride*1] = __high2half(from); to[stride*1] = __high2half(from);
} }
template<> EIGEN_DEVICE_FUNC inline half pfirst<half2>(const half2& a) { template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) {
return __low2half(a); return __low2half(a);
} }
template<> EIGEN_DEVICE_FUNC inline half2 pabs<half2>(const half2& a) { template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
half2 result; half2 result;
result.x = a.x & 0x7FFF7FFF; result.x = a.x & 0x7FFF7FFF;
return result; return result;
} }
EIGEN_DEVICE_FUNC inline void __device__ EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<half2,2>& kernel) { ptranspose(PacketBlock<half2,2>& kernel) {
half a1 = __low2half(kernel.packet[0]); __half a1 = __low2half(kernel.packet[0]);
half a2 = __high2half(kernel.packet[0]); __half a2 = __high2half(kernel.packet[0]);
half b1 = __low2half(kernel.packet[1]); __half b1 = __low2half(kernel.packet[1]);
half b2 = __high2half(kernel.packet[1]); __half b2 = __high2half(kernel.packet[1]);
kernel.packet[0] = __halves2half2(a1, b1); kernel.packet[0] = __halves2half2(a1, b1);
kernel.packet[1] = __halves2half2(a2, b2); kernel.packet[1] = __halves2half2(a2, b2);
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) { template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
#if __CUDA_ARCH__ >= 530
return __halves2half2(a, __hadd(a, __float2half(1.0f))); return __halves2half2(a, __hadd(a, __float2half(1.0f)));
#else
float f = __half2float(a) + 1.0f;
return __halves2half2(a, __float2half(f));
#endif
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) { template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hadd2(a, b); return __hadd2(a, b);
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
float b2 = __high2float(b);
float r1 = a1 + b1;
float r2 = a2 + b2;
return __floats2half2_rn(r1, r2);
#endif
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) { template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hsub2(a, b); return __hsub2(a, b);
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
float b2 = __high2float(b);
float r1 = a1 - b1;
float r2 = a2 - b2;
return __floats2half2_rn(r1, r2);
#endif
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) { template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hneg2(a); return __hneg2(a);
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
return __floats2half2_rn(-a1, -a2);
#endif
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; } template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) { template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
#if __CUDA_ARCH__ >= 530
return __hmul2(a, b); return __hmul2(a, b);
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
float b2 = __high2float(b);
float r1 = a1 * b1;
float r2 = a2 * b2;
return __floats2half2_rn(r1, r2);
#endif
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) { template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
#if __CUDA_ARCH__ >= 530
return __hfma2(a, b, c); return __hfma2(a, b, c);
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
float b2 = __high2float(b);
float c1 = __low2float(c);
float c2 = __high2float(c);
float r1 = a1 * b1 + c1;
float r2 = a2 * b2 + c2;
return __floats2half2_rn(r1, r2);
#endif
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) { template<> __device__ EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a); float a1 = __low2float(a);
float a2 = __high2float(a); float a2 = __high2float(a);
float b1 = __low2float(b); float b1 = __low2float(b);
@ -142,51 +203,529 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2&
return __floats2half2_rn(r1, r2); return __floats2half2_rn(r1, r2);
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) { template<> __device__ EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a); float a1 = __low2float(a);
float a2 = __high2float(a); float a2 = __high2float(a);
float b1 = __low2float(b); float b1 = __low2float(b);
float b2 = __high2float(b); float b2 = __high2float(b);
half r1 = a1 < b1 ? __low2half(a) : __low2half(b); __half r1 = a1 < b1 ? __low2half(a) : __low2half(b);
half r2 = a2 < b2 ? __high2half(a) : __high2half(b); __half r2 = a2 < b2 ? __high2half(a) : __high2half(b);
return __halves2half2(r1, r2); return __halves2half2(r1, r2);
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) { template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a); float a1 = __low2float(a);
float a2 = __high2float(a); float a2 = __high2float(a);
float b1 = __low2float(b); float b1 = __low2float(b);
float b2 = __high2float(b); float b2 = __high2float(b);
half r1 = a1 > b1 ? __low2half(a) : __low2half(b); __half r1 = a1 > b1 ? __low2half(a) : __low2half(b);
half r2 = a2 > b2 ? __high2half(a) : __high2half(b); __half r2 = a2 > b2 ? __high2half(a) : __high2half(b);
return __halves2half2(r1, r2); return __halves2half2(r1, r2);
} }
template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) { template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hadd(__low2half(a), __high2half(a)); return __hadd(__low2half(a), __high2half(a));
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
return Eigen::half(half_impl::raw_uint16_to_half(__float2half_rn(a1 + a2)));
#endif
} }
template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) { template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
half first = __low2half(a); #if __CUDA_ARCH__ >= 530
half second = __high2half(a); __half first = __low2half(a);
__half second = __high2half(a);
return __hgt(first, second) ? first : second; return __hgt(first, second) ? first : second;
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
return a1 > a2 ? __low2half(a) : __high2half(a);
#endif
} }
template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) { template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
half first = __low2half(a); #if __CUDA_ARCH__ >= 530
half second = __high2half(a); __half first = __low2half(a);
__half second = __high2half(a);
return __hlt(first, second) ? first : second; return __hlt(first, second) ? first : second;
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
return a1 < a2 ? __low2half(a) : __high2half(a);
#endif
} }
template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) { template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530
return __hmul(__low2half(a), __high2half(a)); return __hmul(__low2half(a), __high2half(a));
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
return Eigen::half(half_impl::raw_uint16_to_half(__float2half_rn(a1 * a2)));
#endif
} }
} // end namespace internal template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = log1pf(a1);
float r2 = log1pf(a2);
return __floats2half2_rn(r1, r2);
}
} // end namespace Eigen #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
template<> __device__ EIGEN_STRONG_INLINE
half2 plog<half2>(const half2& a) {
return h2log(a);
}
template<> __device__ EIGEN_STRONG_INLINE
half2 pexp<half2>(const half2& a) {
return h2exp(a);
}
template<> __device__ EIGEN_STRONG_INLINE
half2 psqrt<half2>(const half2& a) {
return h2sqrt(a);
}
template<> __device__ EIGEN_STRONG_INLINE
half2 prsqrt<half2>(const half2& a) {
return h2rsqrt(a);
}
#else
template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = logf(a1);
float r2 = logf(a2);
return __floats2half2_rn(r1, r2);
}
template<> __device__ EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = expf(a1);
float r2 = expf(a2);
return __floats2half2_rn(r1, r2);
}
template<> __device__ EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = sqrtf(a1);
float r2 = sqrtf(a2);
return __floats2half2_rn(r1, r2);
}
template<> __device__ EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float r1 = rsqrtf(a1);
float r2 = rsqrtf(a2);
return __floats2half2_rn(r1, r2);
}
#endif #endif
#elif defined EIGEN_VECTORIZE_AVX
typedef struct {
__m128i x;
} Packet8h;
template<> struct is_arithmetic<Packet8h> { enum { value = true }; };
template <>
struct packet_traits<Eigen::half> : default_packet_traits {
typedef Packet8h type;
// There is no half-size packet for Packet8h.
typedef Packet8h half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
HasHalfPacket = 0,
HasAdd = 0,
HasSub = 0,
HasMul = 0,
HasNegate = 0,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasConj = 0,
HasSetLinear = 0,
HasDiv = 0,
HasSqrt = 0,
HasRsqrt = 0,
HasExp = 0,
HasLog = 0,
HasBlend = 0
};
};
template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16}; typedef Packet8h half; };
template<> EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) {
Packet8h result;
result.x = _mm_set1_epi16(from.x);
return result;
}
template<> EIGEN_STRONG_INLINE Eigen::half pfirst<Packet8h>(const Packet8h& from) {
return half_impl::raw_uint16_to_half(static_cast<unsigned short>(_mm_extract_epi16(from.x, 0)));
}
template<> EIGEN_STRONG_INLINE Packet8h pload<Packet8h>(const Eigen::half* from) {
Packet8h result;
result.x = _mm_load_si128(reinterpret_cast<const __m128i*>(from));
return result;
}
template<> EIGEN_STRONG_INLINE Packet8h ploadu<Packet8h>(const Eigen::half* from) {
Packet8h result;
result.x = _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
return result;
}
template<> EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet8h& from) {
_mm_store_si128(reinterpret_cast<__m128i*>(to), from.x);
}
template<> EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet8h& from) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(to), from.x);
}
template<> EIGEN_STRONG_INLINE Packet8h
ploadquad<Packet8h>(const Eigen::half* from) {
Packet8h result;
unsigned short a = from[0].x;
unsigned short b = from[1].x;
result.x = _mm_set_epi16(b, b, b, b, a, a, a, a);
return result;
}
EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h& a) {
#ifdef EIGEN_HAS_FP16_C
return _mm256_cvtph_ps(a.x);
#else
EIGEN_ALIGN32 Eigen::half aux[8];
pstore(aux, a);
float f0(aux[0]);
float f1(aux[1]);
float f2(aux[2]);
float f3(aux[3]);
float f4(aux[4]);
float f5(aux[5]);
float f6(aux[6]);
float f7(aux[7]);
return _mm256_set_ps(f7, f6, f5, f4, f3, f2, f1, f0);
#endif #endif
}
EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
#ifdef EIGEN_HAS_FP16_C
Packet8h result;
result.x = _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
return result;
#else
EIGEN_ALIGN32 float aux[8];
pstore(aux, a);
Eigen::half h0(aux[0]);
Eigen::half h1(aux[1]);
Eigen::half h2(aux[2]);
Eigen::half h3(aux[3]);
Eigen::half h4(aux[4]);
Eigen::half h5(aux[5]);
Eigen::half h6(aux[6]);
Eigen::half h7(aux[7]);
Packet8h result;
result.x = _mm_set_epi16(h7.x, h6.x, h5.x, h4.x, h3.x, h2.x, h1.x, h0.x);
return result;
#endif #endif
}
template<> EIGEN_STRONG_INLINE Packet8h pconj(const Packet8h& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
Packet8f af = half2float(a);
Packet8f bf = half2float(b);
Packet8f rf = padd(af, bf);
return float2half(rf);
}
template<> EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
Packet8f af = half2float(a);
Packet8f bf = half2float(b);
Packet8f rf = pmul(af, bf);
return float2half(rf);
}
template<> EIGEN_STRONG_INLINE Packet8h pgather<Eigen::half, Packet8h>(const Eigen::half* from, Index stride)
{
Packet8h result;
result.x = _mm_set_epi16(from[7*stride].x, from[6*stride].x, from[5*stride].x, from[4*stride].x, from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
return result;
}
template<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8h>(Eigen::half* to, const Packet8h& from, Index stride)
{
EIGEN_ALIGN32 Eigen::half aux[8];
pstore(aux, from);
to[stride*0].x = aux[0].x;
to[stride*1].x = aux[1].x;
to[stride*2].x = aux[2].x;
to[stride*3].x = aux[3].x;
to[stride*4].x = aux[4].x;
to[stride*5].x = aux[5].x;
to[stride*6].x = aux[6].x;
to[stride*7].x = aux[7].x;
}
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet8h,8>& kernel) {
__m128i a = kernel.packet[0].x;
__m128i b = kernel.packet[1].x;
__m128i c = kernel.packet[2].x;
__m128i d = kernel.packet[3].x;
__m128i e = kernel.packet[4].x;
__m128i f = kernel.packet[5].x;
__m128i g = kernel.packet[6].x;
__m128i h = kernel.packet[7].x;
__m128i a03b03 = _mm_unpacklo_epi16(a, b);
__m128i c03d03 = _mm_unpacklo_epi16(c, d);
__m128i e03f03 = _mm_unpacklo_epi16(e, f);
__m128i g03h03 = _mm_unpacklo_epi16(g, h);
__m128i a47b47 = _mm_unpackhi_epi16(a, b);
__m128i c47d47 = _mm_unpackhi_epi16(c, d);
__m128i e47f47 = _mm_unpackhi_epi16(e, f);
__m128i g47h47 = _mm_unpackhi_epi16(g, h);
__m128i a01b01c01d01 = _mm_unpacklo_epi32(a03b03, c03d03);
__m128i a23b23c23d23 = _mm_unpackhi_epi32(a03b03, c03d03);
__m128i e01f01g01h01 = _mm_unpacklo_epi32(e03f03, g03h03);
__m128i e23f23g23h23 = _mm_unpackhi_epi32(e03f03, g03h03);
__m128i a45b45c45d45 = _mm_unpacklo_epi32(a47b47, c47d47);
__m128i a67b67c67d67 = _mm_unpackhi_epi32(a47b47, c47d47);
__m128i e45f45g45h45 = _mm_unpacklo_epi32(e47f47, g47h47);
__m128i e67f67g67h67 = _mm_unpackhi_epi32(e47f47, g47h47);
__m128i a0b0c0d0e0f0g0h0 = _mm_unpacklo_epi64(a01b01c01d01, e01f01g01h01);
__m128i a1b1c1d1e1f1g1h1 = _mm_unpackhi_epi64(a01b01c01d01, e01f01g01h01);
__m128i a2b2c2d2e2f2g2h2 = _mm_unpacklo_epi64(a23b23c23d23, e23f23g23h23);
__m128i a3b3c3d3e3f3g3h3 = _mm_unpackhi_epi64(a23b23c23d23, e23f23g23h23);
__m128i a4b4c4d4e4f4g4h4 = _mm_unpacklo_epi64(a45b45c45d45, e45f45g45h45);
__m128i a5b5c5d5e5f5g5h5 = _mm_unpackhi_epi64(a45b45c45d45, e45f45g45h45);
__m128i a6b6c6d6e6f6g6h6 = _mm_unpacklo_epi64(a67b67c67d67, e67f67g67h67);
__m128i a7b7c7d7e7f7g7h7 = _mm_unpackhi_epi64(a67b67c67d67, e67f67g67h67);
kernel.packet[0].x = a0b0c0d0e0f0g0h0;
kernel.packet[1].x = a1b1c1d1e1f1g1h1;
kernel.packet[2].x = a2b2c2d2e2f2g2h2;
kernel.packet[3].x = a3b3c3d3e3f3g3h3;
kernel.packet[4].x = a4b4c4d4e4f4g4h4;
kernel.packet[5].x = a5b5c5d5e5f5g5h5;
kernel.packet[6].x = a6b6c6d6e6f6g6h6;
kernel.packet[7].x = a7b7c7d7e7f7g7h7;
}
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet8h,4>& kernel) {
EIGEN_ALIGN32 Eigen::half in[4][8];
pstore<Eigen::half>(in[0], kernel.packet[0]);
pstore<Eigen::half>(in[1], kernel.packet[1]);
pstore<Eigen::half>(in[2], kernel.packet[2]);
pstore<Eigen::half>(in[3], kernel.packet[3]);
EIGEN_ALIGN32 Eigen::half out[4][8];
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
out[i][j] = in[j][2*i];
}
for (int j = 0; j < 4; ++j) {
out[i][j+4] = in[j][2*i+1];
}
}
kernel.packet[0] = pload<Packet8h>(out[0]);
kernel.packet[1] = pload<Packet8h>(out[1]);
kernel.packet[2] = pload<Packet8h>(out[2]);
kernel.packet[3] = pload<Packet8h>(out[3]);
}
// Disable the following code since it's broken on too many platforms / compilers.
//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
#elif 0
typedef struct {
__m64 x;
} Packet4h;
template<> struct is_arithmetic<Packet4h> { enum { value = true }; };
template <>
struct packet_traits<Eigen::half> : default_packet_traits {
typedef Packet4h type;
// There is no half-size packet for Packet4h.
typedef Packet4h half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
HasHalfPacket = 0,
HasAdd = 0,
HasSub = 0,
HasMul = 0,
HasNegate = 0,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasConj = 0,
HasSetLinear = 0,
HasDiv = 0,
HasSqrt = 0,
HasRsqrt = 0,
HasExp = 0,
HasLog = 0,
HasBlend = 0
};
};
template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16}; typedef Packet4h half; };
template<> EIGEN_STRONG_INLINE Packet4h pset1<Packet4h>(const Eigen::half& from) {
Packet4h result;
result.x = _mm_set1_pi16(from.x);
return result;
}
template<> EIGEN_STRONG_INLINE Eigen::half pfirst<Packet4h>(const Packet4h& from) {
return half_impl::raw_uint16_to_half(static_cast<unsigned short>(_mm_cvtsi64_si32(from.x)));
}
template<> EIGEN_STRONG_INLINE Packet4h pconj(const Packet4h& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4h padd<Packet4h>(const Packet4h& a, const Packet4h& b) {
__int64_t a64 = _mm_cvtm64_si64(a.x);
__int64_t b64 = _mm_cvtm64_si64(b.x);
Eigen::half h[4];
Eigen::half ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64));
Eigen::half hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64));
h[0] = ha + hb;
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));
h[1] = ha + hb;
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 32));
h[2] = ha + hb;
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 48));
h[3] = ha + hb;
Packet4h result;
result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
return result;
}
template<> EIGEN_STRONG_INLINE Packet4h pmul<Packet4h>(const Packet4h& a, const Packet4h& b) {
__int64_t a64 = _mm_cvtm64_si64(a.x);
__int64_t b64 = _mm_cvtm64_si64(b.x);
Eigen::half h[4];
Eigen::half ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64));
Eigen::half hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64));
h[0] = ha * hb;
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));
h[1] = ha * hb;
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 32));
h[2] = ha * hb;
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 48));
h[3] = ha * hb;
Packet4h result;
result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
return result;
}
template<> EIGEN_STRONG_INLINE Packet4h pload<Packet4h>(const Eigen::half* from) {
Packet4h result;
result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
return result;
}
template<> EIGEN_STRONG_INLINE Packet4h ploadu<Packet4h>(const Eigen::half* from) {
Packet4h result;
result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
return result;
}
template<> EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet4h& from) {
__int64_t r = _mm_cvtm64_si64(from.x);
*(reinterpret_cast<__int64_t*>(to)) = r;
}
template<> EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet4h& from) {
__int64_t r = _mm_cvtm64_si64(from.x);
*(reinterpret_cast<__int64_t*>(to)) = r;
}
template<> EIGEN_STRONG_INLINE Packet4h
ploadquad<Packet4h>(const Eigen::half* from) {
return pset1<Packet4h>(*from);
}
template<> EIGEN_STRONG_INLINE Packet4h pgather<Eigen::half, Packet4h>(const Eigen::half* from, Index stride)
{
Packet4h result;
result.x = _mm_set_pi16(from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
return result;
}
template<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet4h>(Eigen::half* to, const Packet4h& from, Index stride)
{
__int64_t a = _mm_cvtm64_si64(from.x);
to[stride*0].x = static_cast<unsigned short>(a);
to[stride*1].x = static_cast<unsigned short>(a >> 16);
to[stride*2].x = static_cast<unsigned short>(a >> 32);
to[stride*3].x = static_cast<unsigned short>(a >> 48);
}
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet4h,4>& kernel) {
__m64 T0 = _mm_unpacklo_pi16(kernel.packet[0].x, kernel.packet[1].x);
__m64 T1 = _mm_unpacklo_pi16(kernel.packet[2].x, kernel.packet[3].x);
__m64 T2 = _mm_unpackhi_pi16(kernel.packet[0].x, kernel.packet[1].x);
__m64 T3 = _mm_unpackhi_pi16(kernel.packet[2].x, kernel.packet[3].x);
kernel.packet[0].x = _mm_unpacklo_pi32(T0, T1);
kernel.packet[1].x = _mm_unpackhi_pi32(T0, T1);
kernel.packet[2].x = _mm_unpacklo_pi32(T2, T3);
kernel.packet[3].x = _mm_unpackhi_pi32(T2, T3);
}
#endif
}
}
#endif // EIGEN_PACKET_MATH_HALF_CUDA_H #endif // EIGEN_PACKET_MATH_HALF_CUDA_H

View File

@ -14,50 +14,48 @@ namespace Eigen {
namespace internal { namespace internal {
#if defined(EIGEN_HAS_CUDA_FP16)
template<> template<>
struct scalar_cast_op<float, half> { struct scalar_cast_op<float, Eigen::half> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
typedef half result_type; typedef Eigen::half result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const float& a) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float2half(a); return __float2half(a);
#else #else
return half(a); return Eigen::half(a);
#endif #endif
} }
}; };
template<> template<>
struct functor_traits<scalar_cast_op<float, half> > struct functor_traits<scalar_cast_op<float, Eigen::half> >
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
template<> template<>
struct scalar_cast_op<int, half> { struct scalar_cast_op<int, Eigen::half> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
typedef half result_type; typedef Eigen::half result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const int& a) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float2half(static_cast<float>(a)); return __float2half(static_cast<float>(a));
#else #else
return half(static_cast<float>(a)); return Eigen::half(static_cast<float>(a));
#endif #endif
} }
}; };
template<> template<>
struct functor_traits<scalar_cast_op<int, half> > struct functor_traits<scalar_cast_op<int, Eigen::half> >
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
template<> template<>
struct scalar_cast_op<half, float> { struct scalar_cast_op<Eigen::half, float> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
typedef float result_type; typedef float result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const half& a) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __half2float(a); return __half2float(a);
#else #else
return static_cast<float>(a); return static_cast<float>(a);
@ -66,15 +64,15 @@ struct scalar_cast_op<half, float> {
}; };
template<> template<>
struct functor_traits<scalar_cast_op<half, float> > struct functor_traits<scalar_cast_op<Eigen::half, float> >
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
template <> template <>
struct type_casting_traits<half, float> { struct type_casting_traits<Eigen::half, float> {
enum { enum {
VectorizedCast = 1, VectorizedCast = 1,
SrcCoeffRatio = 2, SrcCoeffRatio = 2,
@ -89,7 +87,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(con
} }
template <> template <>
struct type_casting_traits<float, half> { struct type_casting_traits<float, Eigen::half> {
enum { enum {
VectorizedCast = 1, VectorizedCast = 1,
SrcCoeffRatio = 1, SrcCoeffRatio = 1,
@ -97,12 +95,87 @@ struct type_casting_traits<float, half> {
}; };
}; };
template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) { template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
// Simply discard the second half of the input // Simply discard the second half of the input
return __float22half2_rn(make_float2(a.x, a.y)); return __floats2half2_rn(a.x, a.y);
}
#elif defined EIGEN_VECTORIZE_AVX
template <>
struct type_casting_traits<Eigen::half, float> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
return half2float(a);
}
template <>
struct type_casting_traits<float, Eigen::half> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
return float2half(a);
}
// Disable the following code since it's broken on too many platforms / compilers.
//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
#elif 0
template <>
struct type_casting_traits<Eigen::half, float> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
__int64_t a64 = _mm_cvtm64_si64(a.x);
Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
float f1 = static_cast<float>(h);
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
float f2 = static_cast<float>(h);
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
float f3 = static_cast<float>(h);
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
float f4 = static_cast<float>(h);
return _mm_set_ps(f4, f3, f2, f1);
}
template <>
struct type_casting_traits<float, Eigen::half> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
EIGEN_ALIGN16 float aux[4];
pstore(aux, a);
Eigen::half h0(aux[0]);
Eigen::half h1(aux[1]);
Eigen::half h2(aux[2]);
Eigen::half h3(aux[3]);
Packet4h result;
result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
return result;
} }
#endif
#endif #endif
} // end namespace internal } // end namespace internal

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_arch_Default_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_Default_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/Default COMPONENT Devel
)

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_arch_NEON_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_NEON_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/NEON COMPONENT Devel
)

View File

@ -2,6 +2,7 @@
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
@ -14,8 +15,15 @@ namespace Eigen {
namespace internal { namespace internal {
static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); inline uint32x4_t p4ui_CONJ_XOR() {
static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
return vld1q_u32( conj_XOR_DATA );
}
inline uint32x2_t p2ui_CONJ_XOR() {
static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
return vld1_u32( conj_XOR_DATA );
}
//---------- float ---------- //---------- float ----------
struct Packet2cf struct Packet2cf
@ -64,7 +72,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Pa
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
{ {
Packet4ui b = vreinterpretq_u32_f32(a.v); Packet4ui b = vreinterpretq_u32_f32(a.v);
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR))); return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
} }
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
@ -80,7 +88,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
// Multiply the imag a with b // Multiply the imag a with b
v2 = vmulq_f32(v2, b.v); v2 = vmulq_f32(v2, b.v);
// Conjugate v2 // Conjugate v2
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR)); v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
// Swap real/imag elements in v2. // Swap real/imag elements in v2.
v2 = vrev64q_f32(v2); v2 = vrev64q_f32(v2);
// Add and return the result // Add and return the result
@ -195,7 +203,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
// Multiply the imag a with b // Multiply the imag a with b
v2 = vmul_f32(v2, a2); v2 = vmul_f32(v2, a2);
// Conjugate v2 // Conjugate v2
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR)); v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
// Swap real/imag elements in v2. // Swap real/imag elements in v2.
v2 = vrev64_f32(v2); v2 = vrev64_f32(v2);
// Add v1, v2 // Add v1, v2
@ -274,7 +282,8 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
//---------- double ---------- //---------- double ----------
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000); const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
struct Packet1cd struct Packet1cd
{ {

View File

@ -2,7 +2,7 @@
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2010 Konstantinos Margaritis <markos@codex.gr> // Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
// Heavily based on Gael's SSE version. // Heavily based on Gael's SSE version.
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
@ -49,17 +49,6 @@ typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i p4i_##NAME = pset1<Packet4i>(X) const Packet4i p4i_##NAME = pset1<Packet4i>(X)
#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG
//Special treatment for Apple's llvm-gcc, its NEON packet types are unions
#define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
#else
//Default initializer for packets
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
#endif
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
// which available on LLVM and GCC (at least) // which available on LLVM and GCC (at least)
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
@ -122,12 +111,14 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
{ {
Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); const float32_t f[] = {0, 1, 2, 3};
Packet4f countdown = vld1q_f32(f);
return vaddq_f32(pset1<Packet4f>(a), countdown); return vaddq_f32(pset1<Packet4f>(a), countdown);
} }
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)
{ {
Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); const int32_t i[] = {0, 1, 2, 3};
Packet4i countdown = vld1q_s32(i);
return vaddq_s32(pset1<Packet4i>(a), countdown); return vaddq_s32(pset1<Packet4i>(a), countdown);
} }
@ -334,22 +325,6 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
return vcombine_s32(a_hi, a_lo); return vcombine_s32(a_hi, a_lo);
} }
template<size_t offset>
struct protate_impl<offset, Packet4f>
{
static Packet4f run(const Packet4f& a) {
return vextq_f32(a, a, offset);
}
};
template<size_t offset>
struct protate_impl<offset, Packet4i>
{
static Packet4i run(const Packet4i& a) {
return vextq_s32(a, a, offset);
}
};
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); } template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
@ -601,7 +576,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { r
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
{ {
Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1); const double countdown_raw[] = {0.0,1.0};
const Packet2d countdown = vld1q_f64(countdown_raw);
return vaddq_f64(pset1<Packet2d>(a), countdown); return vaddq_f64(pset1<Packet2d>(a), countdown);
} }
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); } template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
@ -679,14 +655,6 @@ template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { retu
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); } template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
template<size_t offset>
struct protate_impl<offset, Packet2d>
{
static Packet2d run(const Packet2d& a) {
return vextq_f64(a, a, offset);
}
};
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); } template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
#if EIGEN_COMP_CLANG && defined(__apple_build_version__) #if EIGEN_COMP_CLANG && defined(__apple_build_version__)

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_arch_SSE_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_SSE_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/SSE COMPONENT Devel
)

View File

@ -517,52 +517,10 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
} }
// Hyperbolic Tangent function. // Hyperbolic Tangent function.
// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
// fl(tanh(x)) = +/-1.
template <> template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
ptanh<Packet4f>(const Packet4f& _x) { ptanh<Packet4f>(const Packet4f& x) {
// Clamp the inputs to the range [-9, 9] since anything outside return internal::generic_fast_tanh_float(x);
// this range is +/-1.0f in single-precision.
_EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f);
_EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f);
const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x));
// The monomial coefficients of the numerator polynomial (odd).
_EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
_EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
// The monomial coefficients of the denominator polynomial (even).
_EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f);
_EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f);
_EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f);
_EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f);
// Since the polynomials are odd/even, we need x^2.
const Packet4f x2 = pmul(x, x);
// Evaluate the numerator polynomial p.
Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11);
p = pmadd(x2, p, p4f_alpha_9);
p = pmadd(x2, p, p4f_alpha_7);
p = pmadd(x2, p, p4f_alpha_5);
p = pmadd(x2, p, p4f_alpha_3);
p = pmadd(x2, p, p4f_alpha_1);
p = pmul(x, p);
// Evaluate the denominator polynomial p.
Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4);
q = pmadd(x2, q, p4f_beta_2);
q = pmadd(x2, q, p4f_beta_0);
// Divide the numerator by the denominator.
return pdiv(p, q);
} }
} // end namespace internal } // end namespace internal

View File

@ -162,6 +162,11 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4,
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
#ifndef EIGEN_VECTORIZE_AVX
template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
#endif
#if EIGEN_COMP_MSVC==1500 #if EIGEN_COMP_MSVC==1500
// Workaround MSVC 9 internal compiler error. // Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
@ -434,30 +439,6 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
{ return _mm_shuffle_epi32(a,0x1B); } { return _mm_shuffle_epi32(a,0x1B); }
template<size_t offset>
struct protate_impl<offset, Packet4f>
{
static Packet4f run(const Packet4f& a) {
return vec4f_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4);
}
};
template<size_t offset>
struct protate_impl<offset, Packet4i>
{
static Packet4i run(const Packet4i& a) {
return vec4i_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4);
}
};
template<size_t offset>
struct protate_impl<offset, Packet2d>
{
static Packet2d run(const Packet2d& a) {
return vec2d_swizzle1(a, offset, (offset + 1) % 2);
}
};
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
{ {
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF)); const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
@ -837,6 +818,16 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
#endif #endif
} }
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
#ifdef __FMA__
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
return ::fmaf(a,b,c);
}
template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
return ::fma(a,b,c);
}
#endif
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_arch_ZVector_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_ZVector_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/ZVector COMPONENT Devel
)

View File

@ -57,21 +57,6 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); } { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
{
std::complex<double> EIGEN_ALIGN16 af[2];
af[0] = from[0*stride];
af[1] = from[1*stride];
return pload<Packet1cd>(af);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
{
std::complex<double> EIGEN_ALIGN16 af[2];
pstore<std::complex<double> >(af, from);
to[0*stride] = af[0];
to[1*stride] = af[1];
}
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); } template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); } template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }

View File

@ -18,20 +18,24 @@ namespace internal {
* \brief Template functor for scalar/packet assignment * \brief Template functor for scalar/packet assignment
* *
*/ */
template<typename Scalar> struct assign_op { template<typename DstScalar,typename SrcScalar> struct assign_op {
EIGEN_EMPTY_STRUCT_CTOR(assign_op) EIGEN_EMPTY_STRUCT_CTOR(assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
template<int Alignment, typename Packet> template<int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
{ internal::pstoret<Scalar,Packet,Alignment>(a,b); } { internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
}; };
template<typename Scalar>
struct functor_traits<assign_op<Scalar> > { // Empty overload for void type (used by PermutationMatrix
template<typename DstScalar> struct assign_op<DstScalar,void> {};
template<typename DstScalar,typename SrcScalar>
struct functor_traits<assign_op<DstScalar,SrcScalar> > {
enum { enum {
Cost = NumTraits<Scalar>::ReadCost, Cost = NumTraits<DstScalar>::ReadCost,
PacketAccess = packet_traits<Scalar>::Vectorizable PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::Vectorizable && packet_traits<SrcScalar>::Vectorizable
}; };
}; };
@ -39,20 +43,20 @@ struct functor_traits<assign_op<Scalar> > {
* \brief Template functor for scalar/packet assignment with addition * \brief Template functor for scalar/packet assignment with addition
* *
*/ */
template<typename Scalar> struct add_assign_op { template<typename DstScalar,typename SrcScalar> struct add_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(add_assign_op) EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
template<int Alignment, typename Packet> template<int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); } { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
}; };
template<typename Scalar> template<typename DstScalar,typename SrcScalar>
struct functor_traits<add_assign_op<Scalar> > { struct functor_traits<add_assign_op<DstScalar,SrcScalar> > {
enum { enum {
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost, Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasAdd PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasAdd
}; };
}; };
@ -60,20 +64,20 @@ struct functor_traits<add_assign_op<Scalar> > {
* \brief Template functor for scalar/packet assignment with subtraction * \brief Template functor for scalar/packet assignment with subtraction
* *
*/ */
template<typename Scalar> struct sub_assign_op { template<typename DstScalar,typename SrcScalar> struct sub_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op) EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; }
template<int Alignment, typename Packet> template<int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); } { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
}; };
template<typename Scalar> template<typename DstScalar,typename SrcScalar>
struct functor_traits<sub_assign_op<Scalar> > { struct functor_traits<sub_assign_op<DstScalar,SrcScalar> > {
enum { enum {
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost, Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasSub PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasSub
}; };
}; };
@ -98,30 +102,28 @@ struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul
}; };
}; };
template<typename DstScalar,typename SrcScalar> struct functor_is_product_like<mul_assign_op<DstScalar,SrcScalar> > { enum { ret = 1 }; };
/** \internal /** \internal
* \brief Template functor for scalar/packet assignment with diviving * \brief Template functor for scalar/packet assignment with diviving
* *
*/ */
template<typename Scalar> struct div_assign_op { template<typename DstScalar, typename SrcScalar=DstScalar> struct div_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(div_assign_op) EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a /= b; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a /= b; }
template<int Alignment, typename Packet> template<int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
{ internal::pstoret<Scalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); } { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
}; };
template<typename Scalar> template<typename DstScalar, typename SrcScalar>
struct functor_traits<div_assign_op<Scalar> > { struct functor_traits<div_assign_op<DstScalar,SrcScalar> > {
enum { enum {
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost, Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasDiv PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasDiv
}; };
}; };
/** \internal /** \internal
* \brief Template functor for scalar/packet assignment with swapping * \brief Template functor for scalar/packet assignment with swapping
* *

View File

@ -16,27 +16,43 @@ namespace internal {
//---------- associative binary functors ---------- //---------- associative binary functors ----------
template<typename Arg1, typename Arg2>
struct binary_op_base
{
typedef Arg1 first_argument_type;
typedef Arg2 second_argument_type;
};
/** \internal /** \internal
* \brief Template functor to compute the sum of two scalars * \brief Template functor to compute the sum of two scalars
* *
* \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum() * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum()
*/ */
template<typename Scalar> struct scalar_sum_op { template<typename LhsScalar,typename RhsScalar>
// typedef Scalar result_type; struct scalar_sum_op : binary_op_base<LhsScalar,RhsScalar>
{
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_sum_op>::ReturnType result_type;
#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } #else
scalar_sum_op() {
EIGEN_SCALAR_BINARY_OP_PLUGIN
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::padd(a,b); } { return internal::padd(a,b); }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
{ return internal::predux(a); } { return internal::predux(a); }
}; };
template<typename Scalar> template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_sum_op<Scalar> > { struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2, // rough estimate!
PacketAccess = packet_traits<Scalar>::HasAdd PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd
// TODO vectorize mixed sum
}; };
}; };
@ -45,7 +61,7 @@ struct functor_traits<scalar_sum_op<Scalar> > {
* This is required to solve Bug 426. * This is required to solve Bug 426.
* \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast() * \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()
*/ */
template<> struct scalar_sum_op<bool> : scalar_sum_op<int> { template<> struct scalar_sum_op<bool,bool> : scalar_sum_op<int,int> {
EIGEN_DEPRECATED EIGEN_DEPRECATED
scalar_sum_op() {} scalar_sum_op() {}
}; };
@ -56,13 +72,17 @@ template<> struct scalar_sum_op<bool> : scalar_sum_op<int> {
* *
* \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
*/ */
template<typename LhsScalar,typename RhsScalar> struct scalar_product_op { template<typename LhsScalar,typename RhsScalar>
enum { struct scalar_product_op : binary_op_base<LhsScalar,RhsScalar>
// TODO vectorize mixed product {
Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_product_op>::ReturnType result_type;
}; #ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
#else
scalar_product_op() {
EIGEN_SCALAR_BINARY_OP_PLUGIN
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
@ -75,7 +95,8 @@ template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > { struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
enum { enum {
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate! Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
PacketAccess = scalar_product_op<LhsScalar,RhsScalar>::Vectorizable PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
// TODO vectorize mixed product
}; };
}; };
@ -84,13 +105,15 @@ struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
* *
* This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y) * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
*/ */
template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op { template<typename LhsScalar,typename RhsScalar>
struct scalar_conj_product_op : binary_op_base<LhsScalar,RhsScalar>
{
enum { enum {
Conj = NumTraits<LhsScalar>::IsComplex Conj = NumTraits<LhsScalar>::IsComplex
}; };
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type; typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
@ -113,21 +136,24 @@ struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
* *
* \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
*/ */
template<typename Scalar> struct scalar_min_op { template<typename LhsScalar,typename RhsScalar>
struct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>
{
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmin(a,b); } { return internal::pmin(a,b); }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
{ return internal::predux_min(a); } { return internal::predux_min(a); }
}; };
template<typename Scalar> template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_min_op<Scalar> > { struct functor_traits<scalar_min_op<LhsScalar,RhsScalar> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
PacketAccess = packet_traits<Scalar>::HasMin PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMin
}; };
}; };
@ -136,21 +162,24 @@ struct functor_traits<scalar_min_op<Scalar> > {
* *
* \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
*/ */
template<typename Scalar> struct scalar_max_op { template<typename LhsScalar,typename RhsScalar>
struct scalar_max_op : binary_op_base<LhsScalar,RhsScalar>
{
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmax(a,b); } { return internal::pmax(a,b); }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
{ return internal::predux_max(a); } { return internal::predux_max(a); }
}; };
template<typename Scalar> template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_max_op<Scalar> > { struct functor_traits<scalar_max_op<LhsScalar,RhsScalar> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
PacketAccess = packet_traits<Scalar>::HasMax PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMax
}; };
}; };
@ -158,56 +187,70 @@ struct functor_traits<scalar_max_op<Scalar> > {
* \brief Template functors for comparison of two scalars * \brief Template functors for comparison of two scalars
* \todo Implement packet-comparisons * \todo Implement packet-comparisons
*/ */
template<typename Scalar, ComparisonName cmp> struct scalar_cmp_op; template<typename LhsScalar, typename RhsScalar, ComparisonName cmp> struct scalar_cmp_op;
template<typename Scalar, ComparisonName cmp> template<typename LhsScalar, typename RhsScalar, ComparisonName cmp>
struct functor_traits<scalar_cmp_op<Scalar, cmp> > { struct functor_traits<scalar_cmp_op<LhsScalar,RhsScalar, cmp> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
PacketAccess = false PacketAccess = false
}; };
}; };
template<ComparisonName Cmp, typename Scalar> template<ComparisonName Cmp, typename LhsScalar, typename RhsScalar>
struct result_of<scalar_cmp_op<Scalar, Cmp>(Scalar,Scalar)> { struct result_of<scalar_cmp_op<LhsScalar, RhsScalar, Cmp>(LhsScalar,RhsScalar)> {
typedef bool type; typedef bool type;
}; };
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_EQ> { template<typename LhsScalar, typename RhsScalar>
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>
{
typedef bool result_type; typedef bool result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
}; };
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LT> { template<typename LhsScalar, typename RhsScalar>
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>
{
typedef bool result_type; typedef bool result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<b;} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<b;}
}; };
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LE> { template<typename LhsScalar, typename RhsScalar>
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>
{
typedef bool result_type; typedef bool result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;}
}; };
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_GT> { template<typename LhsScalar, typename RhsScalar>
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>
{
typedef bool result_type; typedef bool result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;}
}; };
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_GE> { template<typename LhsScalar, typename RhsScalar>
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>
{
typedef bool result_type; typedef bool result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;}
}; };
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_UNORD> { template<typename LhsScalar, typename RhsScalar>
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>
{
typedef bool result_type; typedef bool result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);}
}; };
template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> { template<typename LhsScalar, typename RhsScalar>
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>
{
typedef bool result_type; typedef bool result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;}
}; };
@ -216,7 +259,9 @@ template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> {
* *
* \sa MatrixBase::stableNorm(), class Redux * \sa MatrixBase::stableNorm(), class Redux
*/ */
template<typename Scalar> struct scalar_hypot_op { template<typename Scalar>
struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
{
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
// typedef typename NumTraits<Scalar>::Real result_type; // typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
@ -237,12 +282,12 @@ template<typename Scalar> struct scalar_hypot_op {
} }
}; };
template<typename Scalar> template<typename Scalar>
struct functor_traits<scalar_hypot_op<Scalar> > { struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
enum enum
{ {
Cost = 3 * NumTraits<Scalar>::AddCost + Cost = 3 * NumTraits<Scalar>::AddCost +
2 * NumTraits<Scalar>::MulCost + 2 * NumTraits<Scalar>::MulCost +
2 * NumTraits<Scalar>::template Div<false>::Cost, 2 * scalar_div_cost<Scalar,false>::value,
PacketAccess = false PacketAccess = false
}; };
}; };
@ -250,13 +295,24 @@ struct functor_traits<scalar_hypot_op<Scalar> > {
/** \internal /** \internal
* \brief Template functor to compute the pow of two scalars * \brief Template functor to compute the pow of two scalars
*/ */
template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op { template<typename Scalar, typename Exponent>
EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) struct scalar_pow_op : binary_op_base<Scalar,Exponent>
{
typedef typename ScalarBinaryOpTraits<Scalar,Exponent,scalar_pow_op>::ReturnType result_type;
#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op)
#else
scalar_pow_op() {
typedef Scalar LhsScalar;
typedef Exponent RhsScalar;
EIGEN_SCALAR_BINARY_OP_PLUGIN
}
#endif
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); }
}; };
template<typename Scalar, typename OtherScalar> template<typename Scalar, typename Exponent>
struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > { struct functor_traits<scalar_pow_op<Scalar,Exponent> > {
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
}; };
@ -269,18 +325,27 @@ struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
* *
* \sa class CwiseBinaryOp, MatrixBase::operator- * \sa class CwiseBinaryOp, MatrixBase::operator-
*/ */
template<typename Scalar> struct scalar_difference_op { template<typename LhsScalar,typename RhsScalar>
struct scalar_difference_op : binary_op_base<LhsScalar,RhsScalar>
{
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_difference_op>::ReturnType result_type;
#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } #else
scalar_difference_op() {
EIGEN_SCALAR_BINARY_OP_PLUGIN
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::psub(a,b); } { return internal::psub(a,b); }
}; };
template<typename Scalar> template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_difference_op<Scalar> > { struct functor_traits<scalar_difference_op<LhsScalar,RhsScalar> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
PacketAccess = packet_traits<Scalar>::HasSub PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasSub && packet_traits<RhsScalar>::HasSub
}; };
}; };
@ -289,13 +354,17 @@ struct functor_traits<scalar_difference_op<Scalar> > {
* *
* \sa class CwiseBinaryOp, Cwise::operator/() * \sa class CwiseBinaryOp, Cwise::operator/()
*/ */
template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op { template<typename LhsScalar,typename RhsScalar>
enum { struct scalar_quotient_op : binary_op_base<LhsScalar,RhsScalar>
// TODO vectorize mixed product {
Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_quotient_op>::ReturnType result_type;
}; #ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
#else
scalar_quotient_op() {
EIGEN_SCALAR_BINARY_OP_PLUGIN
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
@ -305,8 +374,8 @@ template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > { struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type; typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
enum { enum {
PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable, PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
Cost = NumTraits<result_type>::template Div<PacketAccess>::Cost Cost = scalar_div_cost<result_type,PacketAccess>::value
}; };
}; };
@ -360,236 +429,50 @@ template<> struct functor_traits<scalar_boolean_xor_op> {
}; };
}; };
/** \internal
* \brief Template functor to compute the incomplete gamma function igamma(a, x)
*
* \sa class CwiseBinaryOp, Cwise::igamma
*/
template<typename Scalar> struct scalar_igamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
using numext::igamma; return igamma(a, x);
}
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const {
return internal::pigammac(a, x);
}
};
template<typename Scalar>
struct functor_traits<scalar_igamma_op<Scalar> > {
enum {
// Guesstimate
Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasIGamma
};
};
/** \internal
* \brief Template functor to compute the complementary incomplete gamma function igammac(a, x)
*
* \sa class CwiseBinaryOp, Cwise::igammac
*/
template<typename Scalar> struct scalar_igammac_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
using numext::igammac; return igammac(a, x);
}
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const
{
return internal::pigammac(a, x);
}
};
template<typename Scalar>
struct functor_traits<scalar_igammac_op<Scalar> > {
enum {
// Guesstimate
Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasIGammac
};
};
//---------- binary functors bound to a constant, thus appearing as a unary functor ---------- //---------- binary functors bound to a constant, thus appearing as a unary functor ----------
/** \internal // The following two classes permits to turn any binary functor into a unary one with one argument bound to a constant value.
* \brief Template functor to multiply a scalar by a fixed other one // They are analogues to std::binder1st/binder2nd but with the following differences:
* // - they are compatible with packetOp
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ // - they are portable across C++ versions (the std::binder* are deprecated in C++11)
*/ template<typename BinaryOp> struct bind1st_op : BinaryOp {
/* NOTE why doing the pset1() in packetOp *is* an optimization ?
* indeed it seems better to declare m_other as a Packet and do the pset1() once typedef typename BinaryOp::first_argument_type first_argument_type;
* in the constructor. However, in practice: typedef typename BinaryOp::second_argument_type second_argument_type;
* - GCC does not like m_other as a Packet and generate a load every time it needs it typedef typename BinaryOp::result_type result_type;
* - on the other hand GCC is able to moves the pset1() outside the loop :)
* - simpler code ;) bind1st_op(const first_argument_type &val) : m_value(val) {}
* (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
*/ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); }
template<typename Scalar>
struct scalar_multiple_op { template<typename Packet>
// FIXME default copy constructors seems bugged with std::complex<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const
EIGEN_DEVICE_FUNC { return BinaryOp::packetOp(internal::pset1<Packet>(m_value), b); }
EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC first_argument_type m_value;
EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { } };
EIGEN_DEVICE_FUNC template<typename BinaryOp> struct functor_traits<bind1st_op<BinaryOp> > : functor_traits<BinaryOp> {};
EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
template<typename BinaryOp> struct bind2nd_op : BinaryOp {
typedef typename BinaryOp::first_argument_type first_argument_type;
typedef typename BinaryOp::second_argument_type second_argument_type;
typedef typename BinaryOp::result_type result_type;
bind2nd_op(const second_argument_type &val) : m_value(val) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); }
template<typename Packet> template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pmul(a, pset1<Packet>(m_other)); } { return BinaryOp::packetOp(a,internal::pset1<Packet>(m_value)); }
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
second_argument_type m_value;
}; };
template<typename Scalar> template<typename BinaryOp> struct functor_traits<bind2nd_op<BinaryOp> > : functor_traits<BinaryOp> {};
struct functor_traits<scalar_multiple_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
template<typename Scalar1, typename Scalar2>
struct scalar_multiple2_op {
typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
};
template<typename Scalar1,typename Scalar2>
struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
/** \internal
* \brief Template functor to divide a scalar by a fixed other one
*
* This functor is used to implement the quotient of a matrix by
* a scalar where the scalar type is not necessarily a floating point type.
*
* \sa class CwiseUnaryOp, MatrixBase::operator/
*/
template<typename Scalar>
struct scalar_quotient1_op {
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::pdiv(a, pset1<Packet>(m_other)); }
typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
};
template<typename Scalar>
struct functor_traits<scalar_quotient1_op<Scalar> >
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
template<typename Scalar1, typename Scalar2>
struct scalar_quotient2_op {
typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; }
typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
};
template<typename Scalar1,typename Scalar2>
struct functor_traits<scalar_quotient2_op<Scalar1,Scalar2> >
{ enum { Cost = 2 * NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
// where the mixing of different types is handled by scalar_product_traits
// In particular, real * complex<real> is allowed.
// FIXME move this to functor_traits adding a functor_default
template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
/** \internal
* \brief Template functor to add a scalar to a fixed other one
* \sa class CwiseUnaryOp, Array::operator+
*/
/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
template<typename Scalar>
struct scalar_add_op {
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { }
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; }
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::padd(a, pset1<Packet>(m_other)); }
const Scalar m_other;
};
template<typename Scalar>
struct functor_traits<scalar_add_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
/** \internal
* \brief Template functor to subtract a fixed scalar to another one
* \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_rsub_op
*/
template<typename Scalar>
struct scalar_sub_op {
EIGEN_DEVICE_FUNC inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC inline scalar_sub_op(const Scalar& other) : m_other(other) { }
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a - m_other; }
template <typename Packet>
EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
{ return internal::psub(a, pset1<Packet>(m_other)); }
const Scalar m_other;
};
template<typename Scalar>
struct functor_traits<scalar_sub_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
/** \internal
* \brief Template functor to subtract a scalar to fixed another one
* \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_sub_op
*/
template<typename Scalar>
struct scalar_rsub_op {
EIGEN_DEVICE_FUNC inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC inline scalar_rsub_op(const Scalar& other) : m_other(other) { }
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other - a; }
template <typename Packet>
EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
{ return internal::psub(pset1<Packet>(m_other), a); }
const Scalar m_other;
};
template<typename Scalar>
struct functor_traits<scalar_rsub_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
/** \internal
* \brief Template functor to raise a scalar to a power
* \sa class CwiseUnaryOp, Cwise::pow
*/
template<typename Scalar>
struct scalar_pow_op {
// FIXME default copy constructors seems bugged with std::complex<>
EIGEN_DEVICE_FUNC inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
EIGEN_DEVICE_FUNC inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
EIGEN_DEVICE_FUNC
inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
const Scalar m_exponent;
};
template<typename Scalar>
struct functor_traits<scalar_pow_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
/** \internal
* \brief Template functor to compute the quotient between a scalar and array entries.
* \sa class CwiseUnaryOp, Cwise::inverse()
*/
template<typename Scalar>
struct scalar_inverse_mult_op {
EIGEN_DEVICE_FUNC scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; }
template<typename Packet>
EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
{ return internal::pdiv(pset1<Packet>(m_other),a); }
Scalar m_other;
};
template<typename Scalar>
struct functor_traits<scalar_inverse_mult_op<Scalar> >
{ enum { PacketAccess = packet_traits<Scalar>::HasDiv, Cost = NumTraits<Scalar>::template Div<PacketAccess>::Cost }; };
} // end namespace internal } // end namespace internal

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_Functor_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_Functor_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/functors COMPONENT Devel
)

View File

@ -18,20 +18,20 @@ template<typename Scalar>
struct scalar_constant_op { struct scalar_constant_op {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() () const { return m_other; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; } template<typename PacketType>
template<typename Index, typename PacketType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const { return internal::pset1<PacketType>(m_other); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1<PacketType>(m_other); }
const Scalar m_other; const Scalar m_other;
}; };
template<typename Scalar> template<typename Scalar>
struct functor_traits<scalar_constant_op<Scalar> > struct functor_traits<scalar_constant_op<Scalar> >
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; }; { enum { Cost = 0 /* as the constant value should be loaded in register only once for the whole expression */,
PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
template<typename Scalar> struct scalar_identity_op { template<typename Scalar> struct scalar_identity_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
template<typename Index> template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }
}; };
template<typename Scalar> template<typename Scalar>
struct functor_traits<scalar_identity_op<Scalar> > struct functor_traits<scalar_identity_op<Scalar> >
@ -55,15 +55,15 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)), m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {} m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
template<typename Index> template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const
{ {
m_base = padd(m_base, pset1<Packet>(m_step)); m_base = padd(m_base, pset1<Packet>(m_step));
return m_low+Scalar(i)*m_step; return m_low+Scalar(i)*m_step;
} }
template<typename Index> template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType) const { return m_base = padd(m_base,m_packetStep); }
const Scalar m_low; const Scalar m_low;
const Scalar m_step; const Scalar m_step;
@ -81,11 +81,11 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {} m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
template<typename Index> template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return m_low+i*m_step; }
template<typename Index> template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
{ return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); } { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
const Scalar m_low; const Scalar m_low;
@ -99,24 +99,24 @@ template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true> struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
{ {
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_length(high-low), m_divisor(num_steps==1?1:num_steps-1), m_interPacket(plset<Packet>(0)) m_low(low), m_length(high-low), m_divisor(convert_index<Scalar>(num_steps==1?1:num_steps-1)), m_interPacket(plset<Packet>(0))
{} {}
template<typename Index> template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Scalar operator() (Index i) const { const Scalar operator() (IndexType i) const {
return m_low + (m_length*Scalar(i))/m_divisor; return m_low + (m_length*Scalar(i))/m_divisor;
} }
template<typename Index> template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Packet packetOp(Index i) const { const Packet packetOp(IndexType i) const {
return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)), return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
pset1<Packet>(m_divisor))); } pset1<Packet>(m_divisor))); }
const Scalar m_low; const Scalar m_low;
const Scalar m_length; const Scalar m_length;
const Index m_divisor; const Scalar m_divisor;
const Packet m_interPacket; const Packet m_interPacket;
}; };
@ -142,29 +142,11 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
: impl((num_steps==1 ? high : low),high,num_steps) : impl((num_steps==1 ? high : low),high,num_steps)
{} {}
template<typename Index> template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return impl(i); }
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since template<typename Packet,typename IndexType>
// there row==0 and col is used for the actual iteration. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const
{
eigen_assert(col==0 || row==0);
return impl(col + row);
}
template<typename Index, typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
// there row==0 and col is used for the actual iteration.
template<typename Index, typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
{
eigen_assert(col==0 || row==0);
return impl.packetOp(col + row);
}
// This proxy object handles the actual required temporaries, the different // This proxy object handles the actual required temporaries, the different
// implementations (random vs. sequential access) as well as the // implementations (random vs. sequential access) as well as the
@ -174,11 +156,11 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl; const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl;
}; };
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta // Linear access is automatically determined from the operator() prototypes available for the given functor.
// to indicate whether a functor allows linear access, just always answering 'yes' except for // If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently
// scalar_identity_op. // and linear access is not possible. In all other cases, linear access is enabled.
template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; }; // Users should not have to deal with this struture.
template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; }; template<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };
} // end namespace internal } // end namespace internal

View File

@ -0,0 +1,25 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TERNARY_FUNCTORS_H
#define EIGEN_TERNARY_FUNCTORS_H
namespace Eigen {
namespace internal {
//---------- associative ternary functors ----------
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TERNARY_FUNCTORS_H

View File

@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library // This file is part of Eigen, a lightweight C++ template library
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
@ -248,7 +248,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
// double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other
: (14 * NumTraits<Scalar>::AddCost + : (14 * NumTraits<Scalar>::AddCost +
6 * NumTraits<Scalar>::MulCost + 6 * NumTraits<Scalar>::MulCost +
NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)) scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
#else #else
Cost = Cost =
(sizeof(Scalar) == 4 (sizeof(Scalar) == 4
@ -257,7 +257,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
// double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other
: (23 * NumTraits<Scalar>::AddCost + : (23 * NumTraits<Scalar>::AddCost +
12 * NumTraits<Scalar>::MulCost + 12 * NumTraits<Scalar>::MulCost +
NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)) scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
#endif #endif
}; };
}; };
@ -266,7 +266,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
* *
* \brief Template functor to compute the logarithm of a scalar * \brief Template functor to compute the logarithm of a scalar
* *
* \sa class CwiseUnaryOp, Cwise::log() * \sa class CwiseUnaryOp, ArrayBase::log()
*/ */
template<typename Scalar> struct scalar_log_op { template<typename Scalar> struct scalar_log_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
@ -293,6 +293,26 @@ struct functor_traits<scalar_log_op<Scalar> > {
}; };
}; };
/** \internal
*
* \brief Template functor to compute the logarithm of 1 plus a scalar value
*
* \sa class CwiseUnaryOp, ArrayBase::log1p()
*/
template<typename Scalar> struct scalar_log1p_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_log1p_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log1p(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog1p(a); }
};
template <typename Scalar>
struct functor_traits<scalar_log1p_op<Scalar> > {
enum {
PacketAccess = packet_traits<Scalar>::HasLog1p,
Cost = functor_traits<scalar_log_op<Scalar> >::Cost // TODO measure cost of log1p
};
};
/** \internal /** \internal
* *
* \brief Template functor to compute the base-10 logarithm of a scalar * \brief Template functor to compute the base-10 logarithm of a scalar
@ -452,142 +472,6 @@ struct functor_traits<scalar_asin_op<Scalar> >
}; };
/** \internal
* \brief Template functor to compute the natural log of the absolute
* value of Gamma of a scalar
* \sa class CwiseUnaryOp, Cwise::lgamma()
*/
template<typename Scalar> struct scalar_lgamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
using numext::lgamma; return lgamma(a);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); }
};
template<typename Scalar>
struct functor_traits<scalar_lgamma_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasLGamma
};
};
/** \internal
* \brief Template functor to compute psi, the derivative of lgamma of a scalar.
* \sa class CwiseUnaryOp, Cwise::digamma()
*/
template<typename Scalar> struct scalar_digamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
using numext::digamma; return digamma(a);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); }
};
template<typename Scalar>
struct functor_traits<scalar_digamma_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasDiGamma
};
};
/** \internal
* \brief Template functor to compute the Riemann Zeta function of two arguments.
* \sa class CwiseUnaryOp, Cwise::zeta()
*/
template<typename Scalar> struct scalar_zeta_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const {
using numext::zeta; return zeta(x, q);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); }
};
template<typename Scalar>
struct functor_traits<scalar_zeta_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasZeta
};
};
/** \internal
* \brief Template functor to compute the polygamma function.
* \sa class CwiseUnaryOp, Cwise::polygamma()
*/
template<typename Scalar> struct scalar_polygamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const {
using numext::polygamma; return polygamma(n, x);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); }
};
template<typename Scalar>
struct functor_traits<scalar_polygamma_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasPolygamma
};
};
/** \internal
* \brief Template functor to compute the Gauss error function of a
* scalar
* \sa class CwiseUnaryOp, Cwise::erf()
*/
template<typename Scalar> struct scalar_erf_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
using numext::erf; return erf(a);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perf(a); }
};
template<typename Scalar>
struct functor_traits<scalar_erf_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasErf
};
};
/** \internal
* \brief Template functor to compute the Complementary Error Function
* of a scalar
* \sa class CwiseUnaryOp, Cwise::erfc()
*/
template<typename Scalar> struct scalar_erfc_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
using numext::erfc; return erfc(a);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perfc(a); }
};
template<typename Scalar>
struct functor_traits<scalar_erfc_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasErfc
};
};
/** \internal /** \internal
* \brief Template functor to compute the atan of a scalar * \brief Template functor to compute the atan of a scalar
* \sa class CwiseUnaryOp, ArrayBase::atan() * \sa class CwiseUnaryOp, ArrayBase::atan()
@ -607,38 +491,39 @@ struct functor_traits<scalar_atan_op<Scalar> >
}; };
}; };
/** \internal /** \internal
* \brief Template functor to compute the tanh of a scalar * \brief Template functor to compute the tanh of a scalar
* \sa class CwiseUnaryOp, ArrayBase::tanh() * \sa class CwiseUnaryOp, ArrayBase::tanh()
*/ */
template<typename Scalar> struct scalar_tanh_op { template <typename Scalar>
struct scalar_tanh_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); } EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); }
template <typename Packet> template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); } EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); }
}; };
template <typename Scalar> template <typename Scalar>
struct functor_traits<scalar_tanh_op<Scalar> > struct functor_traits<scalar_tanh_op<Scalar> > {
{
enum { enum {
PacketAccess = packet_traits<Scalar>::HasTanh, PacketAccess = packet_traits<Scalar>::HasTanh,
Cost = Cost = ( (EIGEN_FAST_MATH && is_same<Scalar,float>::value)
(PacketAccess
// The following numbers are based on the AVX implementation, // The following numbers are based on the AVX implementation,
#ifdef EIGEN_VECTORIZE_FMA #ifdef EIGEN_VECTORIZE_FMA
// Haswell can issue 2 add/mul/madd per cycle. // Haswell can issue 2 add/mul/madd per cycle.
// 9 pmadd, 2 pmul, 1 div, 2 other // 9 pmadd, 2 pmul, 1 div, 2 other
? (2 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost + ? (2 * NumTraits<Scalar>::AddCost +
NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost) 6 * NumTraits<Scalar>::MulCost +
scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
#else #else
? (11 * NumTraits<Scalar>::AddCost + ? (11 * NumTraits<Scalar>::AddCost +
11 * NumTraits<Scalar>::MulCost + 11 * NumTraits<Scalar>::MulCost +
NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost) scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
#endif #endif
// This number assumes a naive implementation of tanh // This number assumes a naive implementation of tanh
: (6 * NumTraits<Scalar>::AddCost + 3 * NumTraits<Scalar>::MulCost + : (6 * NumTraits<Scalar>::AddCost +
2 * NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost + 3 * NumTraits<Scalar>::MulCost +
2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value +
functor_traits<scalar_exp_op<Scalar> >::Cost)) functor_traits<scalar_exp_op<Scalar> >::Cost))
}; };
}; };
@ -880,9 +765,9 @@ struct scalar_sign_op<Scalar,true> {
{ {
typedef typename NumTraits<Scalar>::Real real_type; typedef typename NumTraits<Scalar>::Real real_type;
real_type aa = numext::abs(a); real_type aa = numext::abs(a);
if (aa==0) if (aa==real_type(0))
return Scalar(0); return Scalar(0);
aa = 1./aa; aa = real_type(1)/aa;
return Scalar(real(a)*aa, imag(a)*aa ); return Scalar(real(a)*aa, imag(a)*aa );
} }
//TODO //TODO

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_Product_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_Product_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/products COMPONENT Devel
)

View File

@ -299,16 +299,6 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
if (!useSpecificBlockingSizes(k, m, n)) { if (!useSpecificBlockingSizes(k, m, n)) {
evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor, Index>(k, m, n, num_threads); evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor, Index>(k, m, n, num_threads);
} }
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
kr = 8,
mr = Traits::mr,
nr = Traits::nr
};
if (k > kr) k -= k % kr;
if (m > mr) m -= m % mr;
if (n > nr) n -= n % nr;
} }
template<typename LhsScalar, typename RhsScalar, typename Index> template<typename LhsScalar, typename RhsScalar, typename Index>
@ -363,7 +353,7 @@ class gebp_traits
public: public:
typedef _LhsScalar LhsScalar; typedef _LhsScalar LhsScalar;
typedef _RhsScalar RhsScalar; typedef _RhsScalar RhsScalar;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum { enum {
ConjLhs = _ConjLhs, ConjLhs = _ConjLhs,
@ -444,15 +434,16 @@ public:
template<typename LhsPacketType, typename RhsPacketType, typename AccPacketType> template<typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, AccPacketType& tmp) const EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, AccPacketType& tmp) const
{ {
conj_helper<LhsPacketType,RhsPacketType,ConjLhs,ConjRhs> cj;
// It would be a lot cleaner to call pmadd all the time. Unfortunately if we // It would be a lot cleaner to call pmadd all the time. Unfortunately if we
// let gcc allocate the register in which to store the result of the pmul // let gcc allocate the register in which to store the result of the pmul
// (in the case where there is no FMA) gcc fails to figure out how to avoid // (in the case where there is no FMA) gcc fails to figure out how to avoid
// spilling register. // spilling register.
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
EIGEN_UNUSED_VARIABLE(tmp); EIGEN_UNUSED_VARIABLE(tmp);
c = pmadd(a,b,c); c = cj.pmadd(a,b,c);
#else #else
tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp); tmp = b; tmp = cj.pmul(a,tmp); c = padd(c,tmp);
#endif #endif
} }
@ -467,9 +458,6 @@ public:
r = pmadd(c,alpha,r); r = pmadd(c,alpha,r);
} }
protected:
// conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
// conj_helper<LhsPacket,RhsPacket,ConjLhs,ConjRhs> pcj;
}; };
template<typename RealScalar, bool _ConjLhs> template<typename RealScalar, bool _ConjLhs>
@ -478,7 +466,7 @@ class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
public: public:
typedef std::complex<RealScalar> LhsScalar; typedef std::complex<RealScalar> LhsScalar;
typedef RealScalar RhsScalar; typedef RealScalar RhsScalar;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum { enum {
ConjLhs = _ConjLhs, ConjLhs = _ConjLhs,
@ -860,80 +848,6 @@ protected:
conj_helper<ResPacket,ResPacket,false,ConjRhs> cj; conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
}; };
// helper for the rotating kernel below
template <typename GebpKernel, bool UseRotatingKernel = GebpKernel::UseRotatingKernel>
struct PossiblyRotatingKernelHelper
{
// default implementation, not rotating
typedef typename GebpKernel::Traits Traits;
typedef typename Traits::RhsScalar RhsScalar;
typedef typename Traits::RhsPacket RhsPacket;
typedef typename Traits::AccPacket AccPacket;
const Traits& traits;
PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
template <size_t K, size_t Index>
void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
{
traits.loadRhs(from + (Index+4*K)*Traits::RhsProgress, to);
}
void unrotateResult(AccPacket&,
AccPacket&,
AccPacket&,
AccPacket&)
{
}
};
// rotating implementation
template <typename GebpKernel>
struct PossiblyRotatingKernelHelper<GebpKernel, true>
{
typedef typename GebpKernel::Traits Traits;
typedef typename Traits::RhsScalar RhsScalar;
typedef typename Traits::RhsPacket RhsPacket;
typedef typename Traits::AccPacket AccPacket;
const Traits& traits;
PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {}
template <size_t K, size_t Index>
void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const
{
if (Index == 0) {
to = pload<RhsPacket>(from + 4*K*Traits::RhsProgress);
} else {
EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers");
to = protate<1>(to);
}
}
void unrotateResult(AccPacket& res0,
AccPacket& res1,
AccPacket& res2,
AccPacket& res3)
{
PacketBlock<AccPacket> resblock;
resblock.packet[0] = res0;
resblock.packet[1] = res1;
resblock.packet[2] = res2;
resblock.packet[3] = res3;
ptranspose(resblock);
resblock.packet[3] = protate<1>(resblock.packet[3]);
resblock.packet[2] = protate<2>(resblock.packet[2]);
resblock.packet[1] = protate<3>(resblock.packet[1]);
ptranspose(resblock);
res0 = resblock.packet[0];
res1 = resblock.packet[1];
res2 = resblock.packet[2];
res3 = resblock.packet[3];
}
};
/* optimized GEneral packed Block * packed Panel product kernel /* optimized GEneral packed Block * packed Panel product kernel
* *
* Mixing type logic: C += A * B * Mixing type logic: C += A * B
@ -967,16 +881,6 @@ struct gebp_kernel
ResPacketSize = Traits::ResPacketSize ResPacketSize = Traits::ResPacketSize
}; };
static const bool UseRotatingKernel =
EIGEN_ARCH_ARM &&
internal::is_same<LhsScalar, float>::value &&
internal::is_same<RhsScalar, float>::value &&
internal::is_same<ResScalar, float>::value &&
Traits::LhsPacketSize == 4 &&
Traits::RhsPacketSize == 4 &&
Traits::ResPacketSize == 4;
EIGEN_DONT_INLINE EIGEN_DONT_INLINE
void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB, void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
Index rows, Index depth, Index cols, ResScalar alpha, Index rows, Index depth, Index cols, ResScalar alpha,
@ -1010,8 +914,6 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
// Usually, make sense only with FMA // Usually, make sense only with FMA
if(mr>=3*Traits::LhsProgress) if(mr>=3*Traits::LhsProgress)
{ {
PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits);
// Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth) // Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth)
// and on each largest micro vertical panel of the rhs (depth * nr). // and on each largest micro vertical panel of the rhs (depth * nr).
// Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1. // Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
@ -1074,19 +976,19 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 0>(B_0, blB); \ traits.loadRhs(blB + (0+4*K)*Traits::RhsProgress, B_0); \
traits.madd(A0, B_0, C0, T0); \ traits.madd(A0, B_0, C0, T0); \
traits.madd(A1, B_0, C4, T0); \ traits.madd(A1, B_0, C4, T0); \
traits.madd(A2, B_0, C8, B_0); \ traits.madd(A2, B_0, C8, B_0); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 1>(B_0, blB); \ traits.loadRhs(blB + (1+4*K)*Traits::RhsProgress, B_0); \
traits.madd(A0, B_0, C1, T0); \ traits.madd(A0, B_0, C1, T0); \
traits.madd(A1, B_0, C5, T0); \ traits.madd(A1, B_0, C5, T0); \
traits.madd(A2, B_0, C9, B_0); \ traits.madd(A2, B_0, C9, B_0); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 2>(B_0, blB); \ traits.loadRhs(blB + (2+4*K)*Traits::RhsProgress, B_0); \
traits.madd(A0, B_0, C2, T0); \ traits.madd(A0, B_0, C2, T0); \
traits.madd(A1, B_0, C6, T0); \ traits.madd(A1, B_0, C6, T0); \
traits.madd(A2, B_0, C10, B_0); \ traits.madd(A2, B_0, C10, B_0); \
possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 3>(B_0, blB); \ traits.loadRhs(blB + (3+4*K)*Traits::RhsProgress, B_0); \
traits.madd(A0, B_0, C3 , T0); \ traits.madd(A0, B_0, C3 , T0); \
traits.madd(A1, B_0, C7, T0); \ traits.madd(A1, B_0, C7, T0); \
traits.madd(A2, B_0, C11, B_0); \ traits.madd(A2, B_0, C11, B_0); \
@ -1120,10 +1022,6 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
#undef EIGEN_GEBP_ONESTEP #undef EIGEN_GEBP_ONESTEP
possiblyRotatingKernelHelper.unrotateResult(C0, C1, C2, C3);
possiblyRotatingKernelHelper.unrotateResult(C4, C5, C6, C7);
possiblyRotatingKernelHelper.unrotateResult(C8, C9, C10, C11);
ResPacket R0, R1, R2; ResPacket R0, R1, R2;
ResPacket alphav = pset1<ResPacket>(alpha); ResPacket alphav = pset1<ResPacket>(alpha);
@ -1625,9 +1523,13 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
prefetch(&blA[0]); prefetch(&blA[0]);
const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr]; const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
// NOTE The following piece of code doesn't work for 512 bit registers, // The following piece of code wont work for 512 bit registers
// so we don't call it for registers that contain more than 8 values. // Moreover, if LhsProgress==8 it assumes that there is a half packet of the same size
if( ((SwappedTraits::LhsProgress % 4)==0) && (SwappedTraits::LhsProgress <= 8)) // as nr (which is currently 4) for the return type.
typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
if ((SwappedTraits::LhsProgress % 4) == 0 &&
(SwappedTraits::LhsProgress <= 8) &&
(SwappedTraits::LhsProgress!=8 || unpacket_traits<SResPacketHalf>::size==nr))
{ {
SAccPacket C0, C1, C2, C3; SAccPacket C0, C1, C2, C3;
straits.initAcc(C0); straits.initAcc(C0);

View File

@ -25,7 +25,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
{ {
typedef gebp_traits<RhsScalar,LhsScalar> Traits; typedef gebp_traits<RhsScalar,LhsScalar> Traits;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run( static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth, Index rows, Index cols, Index depth,
const LhsScalar* lhs, Index lhsStride, const LhsScalar* lhs, Index lhsStride,
@ -55,7 +55,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
typedef gebp_traits<LhsScalar,RhsScalar> Traits; typedef gebp_traits<LhsScalar,RhsScalar> Traits;
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static void run(Index rows, Index cols, Index depth, static void run(Index rows, Index cols, Index depth,
const LhsScalar* _lhs, Index lhsStride, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride, const RhsScalar* _rhs, Index rhsStride,
@ -309,8 +309,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
this->m_blockA = m_staticA; this->m_blockA = m_staticA;
this->m_blockB = m_staticB; this->m_blockB = m_staticB;
#else #else
this->m_blockA = reinterpret_cast<LhsScalar*>((std::size_t(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)); this->m_blockA = reinterpret_cast<LhsScalar*>((internal::UIntPtr(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
this->m_blockB = reinterpret_cast<RhsScalar*>((std::size_t(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)); this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
#endif #endif
} }

View File

@ -40,7 +40,7 @@ template <typename Index, typename LhsScalar, int LhsStorageOrder, bool Conjugat
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version> struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride,
const ResScalar& alpha, level3_blocking<RhsScalar,LhsScalar>& blocking) const ResScalar& alpha, level3_blocking<RhsScalar,LhsScalar>& blocking)
@ -57,7 +57,7 @@ template <typename Index, typename LhsScalar, int LhsStorageOrder, bool Conjugat
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version> struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride,
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking) const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)

View File

@ -58,7 +58,7 @@ namespace internal {
template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version> struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum { enum {
Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
@ -140,7 +140,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
// find how many columns do we have to skip to be aligned with the result (if possible) // find how many columns do we have to skip to be aligned with the result (if possible)
Index skipColumns = 0; Index skipColumns = 0;
// if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
if( (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == size) || (size_t(res)%sizeof(ResScalar)) ) if( (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == size) || (UIntPtr(res)%sizeof(ResScalar)) )
{ {
alignedSize = 0; alignedSize = 0;
alignedStart = 0; alignedStart = 0;
@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
alignmentPattern = AllAligned; alignmentPattern = AllAligned;
} }
const Index offset1 = (FirstAligned && alignmentStep==1?3:1); const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
const Index offset3 = (FirstAligned && alignmentStep==1?1:3); const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce) for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
@ -334,7 +334,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version> struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum { enum {
Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
alignmentPattern = AllAligned; alignmentPattern = AllAligned;
} }
const Index offset1 = (FirstAligned && alignmentStep==1?3:1); const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
const Index offset3 = (FirstAligned && alignmentStep==1?1:3); const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)

View File

@ -122,7 +122,7 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \ Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \ a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \ a = a_tmp.data(); \
lda = a_tmp.outerStride(); \ lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _lhs; \ } else a = _lhs; \
if (LhsStorageOrder==RowMajor) uplo='U'; \ if (LhsStorageOrder==RowMajor) uplo='U'; \
\ \
@ -256,7 +256,7 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
b_tmp = lhs.transpose(); \ b_tmp = lhs.transpose(); \
} \ } \
b = b_tmp.data(); \ b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \ } \
\ \
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \

View File

@ -179,7 +179,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
{ {
typedef typename Dest::Scalar ResScalar; typedef typename Dest::Scalar ResScalar;
typedef typename Rhs::Scalar RhsScalar; typedef typename Rhs::Scalar RhsScalar;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols()); eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());

View File

@ -20,7 +20,7 @@ struct triangular_matrix_vector_product;
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version> template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version> struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum { enum {
IsLower = ((Mode&Lower)==Lower), IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag, HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
@ -91,7 +91,7 @@ EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,Con
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version> template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version> struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum { enum {
IsLower = ((Mode&Lower)==Lower), IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag, HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
@ -216,7 +216,7 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
typedef internal::blas_traits<Rhs> RhsBlasTraits; typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);

View File

@ -44,16 +44,29 @@ template<bool Conjugate> struct conj_if;
template<> struct conj_if<true> { template<> struct conj_if<true> {
template<typename T> template<typename T>
inline T operator()(const T& x) { return numext::conj(x); } inline T operator()(const T& x) const { return numext::conj(x); }
template<typename T> template<typename T>
inline T pconj(const T& x) { return internal::pconj(x); } inline T pconj(const T& x) const { return internal::pconj(x); }
}; };
template<> struct conj_if<false> { template<> struct conj_if<false> {
template<typename T> template<typename T>
inline const T& operator()(const T& x) { return x; } inline const T& operator()(const T& x) const { return x; }
template<typename T> template<typename T>
inline const T& pconj(const T& x) { return x; } inline const T& pconj(const T& x) const { return x; }
};
// Generic implementation for custom complex types.
template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
struct conj_helper
{
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
{ return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); }
}; };
template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false> template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
@ -111,7 +124,7 @@ template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::comp
}; };
template<typename From,typename To> struct get_factor { template<typename From,typename To> struct get_factor {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return x; } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
}; };
template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> { template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
@ -135,7 +148,7 @@ class BlasVectorMapper {
template <typename Packet> template <typename Packet>
EIGEN_DEVICE_FUNC bool aligned(Index i) const { EIGEN_DEVICE_FUNC bool aligned(Index i) const {
return (size_t(m_data+i)%sizeof(Packet))==0; return (UIntPtr(m_data+i)%sizeof(Packet))==0;
} }
protected: protected:
@ -227,7 +240,7 @@ class blas_data_mapper {
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
EIGEN_DEVICE_FUNC Index firstAligned(Index size) const { EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
if (size_t(m_data)%sizeof(Scalar)) { if (UIntPtr(m_data)%sizeof(Scalar)) {
return -1; return -1;
} }
return internal::first_default_aligned(m_data, size); return internal::first_default_aligned(m_data, size);
@ -293,17 +306,33 @@ struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
}; };
// pop scalar multiple // pop scalar multiple
template<typename Scalar, typename NestedXpr> template<typename Scalar, typename NestedXpr, typename Plain>
struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> > struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
: blas_traits<NestedXpr> : blas_traits<NestedXpr>
{ {
typedef blas_traits<NestedXpr> Base; typedef blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> XprType; typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType; typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
static inline Scalar extractScalarFactor(const XprType& x) static inline Scalar extractScalarFactor(const XprType& x)
{ return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); } { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
}; };
template<typename Scalar, typename NestedXpr, typename Plain>
struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
: blas_traits<NestedXpr>
{
typedef blas_traits<NestedXpr> Base;
typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }
};
template<typename Scalar, typename Plain1, typename Plain2>
struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>,
const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > >
: blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >
{};
// pop opposite // pop opposite
template<typename Scalar, typename NestedXpr> template<typename Scalar, typename NestedXpr>

View File

@ -1,6 +0,0 @@
FILE(GLOB Eigen_Core_util_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_util_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/util COMPONENT Devel
)

View File

@ -199,7 +199,7 @@ const unsigned int HereditaryBits = RowMajorBit
/** \ingroup enums /** \ingroup enums
* Enum containing possible values for the \c Mode or \c UpLo parameter of * Enum containing possible values for the \c Mode or \c UpLo parameter of
* MatrixBase::selfadjointView() and MatrixBase::triangularView(), and selfadjoint solvers. */ * MatrixBase::selfadjointView() and MatrixBase::triangularView(), and selfadjoint solvers. */
enum { enum UpLoType {
/** View matrix as a lower triangular matrix. */ /** View matrix as a lower triangular matrix. */
Lower=0x1, Lower=0x1,
/** View matrix as an upper triangular matrix. */ /** View matrix as an upper triangular matrix. */
@ -224,7 +224,7 @@ enum {
/** \ingroup enums /** \ingroup enums
* Enum for indicating whether a buffer is aligned or not. */ * Enum for indicating whether a buffer is aligned or not. */
enum { enum AlignmentType {
Unaligned=0, /**< Data pointer has no specific alignment. */ Unaligned=0, /**< Data pointer has no specific alignment. */
Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */ Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */
Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */ Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */
@ -273,7 +273,7 @@ enum DirectionType {
/** \internal \ingroup enums /** \internal \ingroup enums
* Enum to specify how to traverse the entries of a matrix. */ * Enum to specify how to traverse the entries of a matrix. */
enum { enum TraversalType {
/** \internal Default traversal, no vectorization, no index-based access */ /** \internal Default traversal, no vectorization, no index-based access */
DefaultTraversal, DefaultTraversal,
/** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */ /** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */
@ -295,7 +295,7 @@ enum {
/** \internal \ingroup enums /** \internal \ingroup enums
* Enum to specify whether to unroll loops when traversing over the entries of a matrix. */ * Enum to specify whether to unroll loops when traversing over the entries of a matrix. */
enum { enum UnrollingType {
/** \internal Do not unroll loops. */ /** \internal Do not unroll loops. */
NoUnrolling, NoUnrolling,
/** \internal Unroll only the inner loop, but not the outer loop. */ /** \internal Unroll only the inner loop, but not the outer loop. */
@ -307,7 +307,7 @@ enum {
/** \internal \ingroup enums /** \internal \ingroup enums
* Enum to specify whether to use the default (built-in) implementation or the specialization. */ * Enum to specify whether to use the default (built-in) implementation or the specialization. */
enum { enum SpecializedType {
Specialized, Specialized,
BuiltIn BuiltIn
}; };
@ -315,7 +315,7 @@ enum {
/** \ingroup enums /** \ingroup enums
* Enum containing possible values for the \p _Options template parameter of * Enum containing possible values for the \p _Options template parameter of
* Matrix, Array and BandMatrix. */ * Matrix, Array and BandMatrix. */
enum { enum StorageOptions {
/** Storage order is column major (see \ref TopicStorageOrders). */ /** Storage order is column major (see \ref TopicStorageOrders). */
ColMajor = 0, ColMajor = 0,
/** Storage order is row major (see \ref TopicStorageOrders). */ /** Storage order is row major (see \ref TopicStorageOrders). */
@ -328,7 +328,7 @@ enum {
/** \ingroup enums /** \ingroup enums
* Enum for specifying whether to apply or solve on the left or right. */ * Enum for specifying whether to apply or solve on the left or right. */
enum { enum SideType {
/** Apply transformation on the left. */ /** Apply transformation on the left. */
OnTheLeft = 1, OnTheLeft = 1,
/** Apply transformation on the right. */ /** Apply transformation on the right. */
@ -353,7 +353,7 @@ enum Default_t { Default };
/** \internal \ingroup enums /** \internal \ingroup enums
* Used in AmbiVector. */ * Used in AmbiVector. */
enum { enum AmbiVectorMode {
IsDense = 0, IsDense = 0,
IsSparse IsSparse
}; };
@ -479,8 +479,9 @@ namespace Architecture
} }
/** \internal \ingroup enums /** \internal \ingroup enums
* Enum used as template parameter in Product and product evalautors. */ * Enum used as template parameter in Product and product evaluators. */
enum { DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct }; enum ProductImplType
{ DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
/** \internal \ingroup enums /** \internal \ingroup enums
* Enum used in experimental parallel implementation. */ * Enum used in experimental parallel implementation. */
@ -492,7 +493,7 @@ struct Dense {};
/** The type used to identify a general sparse storage. */ /** The type used to identify a general sparse storage. */
struct Sparse {}; struct Sparse {};
/** The type used to identify a general solver (foctored) storage. */ /** The type used to identify a general solver (factored) storage. */
struct SolverStorage {}; struct SolverStorage {};
/** The type used to identify a permutation storage. */ /** The type used to identify a permutation storage. */

View File

@ -14,12 +14,13 @@
// 4512 - assignment operator could not be generated // 4512 - assignment operator could not be generated
// 4522 - 'class' : multiple assignment operators specified // 4522 - 'class' : multiple assignment operators specified
// 4700 - uninitialized local variable 'xyz' used // 4700 - uninitialized local variable 'xyz' used
// 4714 - function marked as __forceinline not inlined
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
// 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning) // 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning( push ) #pragma warning( push )
#endif #endif
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800) #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
#elif defined __INTEL_COMPILER #elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed) // 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
@ -41,6 +42,14 @@
#pragma clang diagnostic push #pragma clang diagnostic push
#endif #endif
#pragma clang diagnostic ignored "-Wconstant-logical-operand" #pragma clang diagnostic ignored "-Wconstant-logical-operand"
#elif defined __GNUC__ && __GNUC__>=6
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma GCC diagnostic push
#endif
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif #endif
#if defined __NVCC__ #if defined __NVCC__
@ -48,11 +57,19 @@
#pragma diag_suppress code_is_unreachable #pragma diag_suppress code_is_unreachable
// Disable the "dynamic initialization in unreachable code" message // Disable the "dynamic initialization in unreachable code" message
#pragma diag_suppress initialization_not_reachable #pragma diag_suppress initialization_not_reachable
// Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are 4 of them) // Disable the "invalid error number" message that we get with older versions of nvcc
#pragma diag_suppress 1222
// Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are many of them and they seem to change with every version of the compiler)
#pragma diag_suppress 2527
#pragma diag_suppress 2529
#pragma diag_suppress 2651 #pragma diag_suppress 2651
#pragma diag_suppress 2653 #pragma diag_suppress 2653
#pragma diag_suppress 2668 #pragma diag_suppress 2668
#pragma diag_suppress 2669
#pragma diag_suppress 2670 #pragma diag_suppress 2670
#pragma diag_suppress 2671
#pragma diag_suppress 2735
#pragma diag_suppress 2737
#endif #endif
#endif // not EIGEN_WARNINGS_DISABLED #endif // not EIGEN_WARNINGS_DISABLED

View File

@ -91,6 +91,7 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp; template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
template<typename ViewOp, typename MatrixType> class CwiseUnaryView; template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp; template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3> class CwiseTernaryOp;
template<typename Decomposition, typename Rhstype> class Solve; template<typename Decomposition, typename Rhstype> class Solve;
template<typename XprType> class Inverse; template<typename XprType> class Inverse;
@ -174,9 +175,11 @@ namespace internal {
// with optional conjugation of the arguments. // with optional conjugation of the arguments.
template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper; template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper;
template<typename Scalar> struct scalar_sum_op; template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op;
template<typename Scalar> struct scalar_difference_op; template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op; template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_min_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_max_op;
template<typename Scalar> struct scalar_opposite_op; template<typename Scalar> struct scalar_opposite_op;
template<typename Scalar> struct scalar_conjugate_op; template<typename Scalar> struct scalar_conjugate_op;
template<typename Scalar> struct scalar_real_op; template<typename Scalar> struct scalar_real_op;
@ -192,27 +195,28 @@ template<typename Scalar> struct scalar_sin_op;
template<typename Scalar> struct scalar_acos_op; template<typename Scalar> struct scalar_acos_op;
template<typename Scalar> struct scalar_asin_op; template<typename Scalar> struct scalar_asin_op;
template<typename Scalar> struct scalar_tan_op; template<typename Scalar> struct scalar_tan_op;
template<typename Scalar> struct scalar_pow_op;
template<typename Scalar> struct scalar_inverse_op; template<typename Scalar> struct scalar_inverse_op;
template<typename Scalar> struct scalar_square_op; template<typename Scalar> struct scalar_square_op;
template<typename Scalar> struct scalar_cube_op; template<typename Scalar> struct scalar_cube_op;
template<typename Scalar, typename NewType> struct scalar_cast_op; template<typename Scalar, typename NewType> struct scalar_cast_op;
template<typename Scalar> struct scalar_multiple_op;
template<typename Scalar> struct scalar_quotient1_op;
template<typename Scalar> struct scalar_min_op;
template<typename Scalar> struct scalar_max_op;
template<typename Scalar> struct scalar_random_op; template<typename Scalar> struct scalar_random_op;
template<typename Scalar> struct scalar_add_op;
template<typename Scalar> struct scalar_constant_op; template<typename Scalar> struct scalar_constant_op;
template<typename Scalar> struct scalar_identity_op; template<typename Scalar> struct scalar_identity_op;
template<typename Scalar,bool iscpx> struct scalar_sign_op; template<typename Scalar,bool iscpx> struct scalar_sign_op;
template<typename Scalar,typename ScalarExponent> struct scalar_pow_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
// SpecialFunctions module
template<typename Scalar> struct scalar_lgamma_op;
template<typename Scalar> struct scalar_digamma_op;
template<typename Scalar> struct scalar_erf_op;
template<typename Scalar> struct scalar_erfc_op;
template<typename Scalar> struct scalar_igamma_op; template<typename Scalar> struct scalar_igamma_op;
template<typename Scalar> struct scalar_igammac_op; template<typename Scalar> struct scalar_igammac_op;
template<typename Scalar> struct scalar_zeta_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op; template<typename Scalar> struct scalar_betainc_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_quotient2_op;
} // end namespace internal } // end namespace internal

4
Eigen/src/Core/util/MKL_support.h Normal file → Executable file
View File

@ -49,7 +49,7 @@
#define EIGEN_USE_LAPACKE #define EIGEN_USE_LAPACKE
#endif #endif
#if defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML) #if defined(EIGEN_USE_MKL_VML)
#define EIGEN_USE_MKL #define EIGEN_USE_MKL
#endif #endif
@ -72,7 +72,7 @@
#endif #endif
#if defined EIGEN_USE_MKL #if defined EIGEN_USE_MKL
#include <mkl_lapacke.h>
#define EIGEN_MKL_VML_THRESHOLD 128 #define EIGEN_MKL_VML_THRESHOLD 128
/* MKL_DOMAIN_BLAS, etc are defined only in 10.3 update 7 */ /* MKL_DOMAIN_BLAS, etc are defined only in 10.3 update 7 */

View File

@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3 #define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 2 #define EIGEN_MAJOR_VERSION 2
#define EIGEN_MINOR_VERSION 92 #define EIGEN_MINOR_VERSION 94
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@ -28,9 +28,9 @@
#define EIGEN_COMP_GNUC 0 #define EIGEN_COMP_GNUC 0
#endif #endif
/// \internal EIGEN_COMP_CLANG set to 1 if the compiler is clang (alias for __clang__) /// \internal EIGEN_COMP_CLANG set to major+minor version (e.g., 307 for clang 3.7) if the compiler is clang
#if defined(__clang__) #if defined(__clang__)
#define EIGEN_COMP_CLANG 1 #define EIGEN_COMP_CLANG (__clang_major__*100+__clang_minor__)
#else #else
#define EIGEN_COMP_CLANG 0 #define EIGEN_COMP_CLANG 0
#endif #endif
@ -71,6 +71,15 @@
#define EIGEN_COMP_MSVC 0 #define EIGEN_COMP_MSVC 0
#endif #endif
// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC:
// name ver MSC_VER
// 2008 9 1500
// 2010 10 1600
// 2012 11 1700
// 2013 12 1800
// 2015 14 1900
// "15" 15 1900
/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC /// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC
#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC) #if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC)
#define EIGEN_COMP_MSVC_STRICT _MSC_VER #define EIGEN_COMP_MSVC_STRICT _MSC_VER
@ -340,50 +349,82 @@
# define __has_feature(x) 0 # define __has_feature(x) 0
#endif #endif
// Upperbound on the C++ version to use.
// Expected values are 03, 11, 14, 17, etc.
// By default, let's use an arbitrarily large C++ version.
#ifndef EIGEN_MAX_CPP_VER
#define EIGEN_MAX_CPP_VER 99
#endif
// Do we support r-value references? // Do we support r-value references?
#if (__has_feature(cxx_rvalue_references) || \ #ifndef EIGEN_HAS_RVALUE_REFERENCES
#if EIGEN_MAX_CPP_VER>=11 && \
(__has_feature(cxx_rvalue_references) || \
(defined(__cplusplus) && __cplusplus >= 201103L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \
(EIGEN_COMP_MSVC >= 1600)) (EIGEN_COMP_MSVC >= 1600))
#define EIGEN_HAVE_RVALUE_REFERENCES #define EIGEN_HAS_RVALUE_REFERENCES 1
#else
#define EIGEN_HAS_RVALUE_REFERENCES 0
#endif
#endif #endif
// Does the compiler support C99? // Does the compiler support C99?
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ #ifndef EIGEN_HAS_C99_MATH
#if EIGEN_MAX_CPP_VER>=11 && \
((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
|| (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
|| (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)))
#define EIGEN_HAS_C99_MATH 1 #define EIGEN_HAS_C99_MATH 1
#else
#define EIGEN_HAS_C99_MATH 0
#endif
#endif #endif
// Does the compiler support result_of? // Does the compiler support result_of?
#if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)) #ifndef EIGEN_HAS_STD_RESULT_OF
#if EIGEN_MAX_CPP_VER>=11 && ((__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)))
#define EIGEN_HAS_STD_RESULT_OF 1 #define EIGEN_HAS_STD_RESULT_OF 1
#else
#define EIGEN_HAS_STD_RESULT_OF 0
#endif
#endif #endif
// Does the compiler support variadic templates? // Does the compiler support variadic templates?
#if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900 #ifndef EIGEN_HAS_VARIADIC_TEMPLATES
// Disable the use of variadic templates when compiling with nvcc on ARM devices: #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
&& ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 )
// ^^ Disable the use of variadic templates when compiling with nvcc on ARM devices:
// this prevents nvcc from crashing when compiling Eigen on Tegra X1 // this prevents nvcc from crashing when compiling Eigen on Tegra X1
#if !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64
#define EIGEN_HAS_VARIADIC_TEMPLATES 1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1
#else
#define EIGEN_HAS_VARIADIC_TEMPLATES 0
#endif #endif
#endif #endif
// Does the compiler support const expressions? // Does the compiler fully support const expressions? (as in c++14)
#ifndef EIGEN_HAS_CONSTEXPR
#ifdef __CUDACC__ #ifdef __CUDACC__
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
#if __cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500) #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
#define EIGEN_HAS_CONSTEXPR 1 #define EIGEN_HAS_CONSTEXPR 1
#endif #endif
#elif __has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
(EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)))
#define EIGEN_HAS_CONSTEXPR 1 #define EIGEN_HAS_CONSTEXPR 1
#endif #endif
#ifndef EIGEN_HAS_CONSTEXPR
#define EIGEN_HAS_CONSTEXPR 0
#endif
#endif
// Does the compiler support C++11 math? // Does the compiler support C++11 math?
// Let's be conservative and enable the default C++11 implementation only if we are sure it exists // Let's be conservative and enable the default C++11 implementation only if we are sure it exists
#ifndef EIGEN_HAS_CXX11_MATH #ifndef EIGEN_HAS_CXX11_MATH
#if (__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ #if EIGEN_MAX_CPP_VER>=11 && ((__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
&& (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC) && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC))
#define EIGEN_HAS_CXX11_MATH 1 #define EIGEN_HAS_CXX11_MATH 1
#else #else
#define EIGEN_HAS_CXX11_MATH 0 #define EIGEN_HAS_CXX11_MATH 0
@ -392,9 +433,10 @@
// Does the compiler support proper C++11 containers? // Does the compiler support proper C++11 containers?
#ifndef EIGEN_HAS_CXX11_CONTAINERS #ifndef EIGEN_HAS_CXX11_CONTAINERS
#if (__cplusplus > 201103L) \ #if EIGEN_MAX_CPP_VER>=11 && \
((__cplusplus > 201103L) \
|| ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
|| EIGEN_COMP_MSVC >= 1900 || EIGEN_COMP_MSVC >= 1900)
#define EIGEN_HAS_CXX11_CONTAINERS 1 #define EIGEN_HAS_CXX11_CONTAINERS 1
#else #else
#define EIGEN_HAS_CXX11_CONTAINERS 0 #define EIGEN_HAS_CXX11_CONTAINERS 0
@ -403,9 +445,11 @@
// Does the compiler support C++11 noexcept? // Does the compiler support C++11 noexcept?
#ifndef EIGEN_HAS_CXX11_NOEXCEPT #ifndef EIGEN_HAS_CXX11_NOEXCEPT
#if (__cplusplus > 201103L) \ #if EIGEN_MAX_CPP_VER>=11 && \
(__has_feature(cxx_noexcept) \
|| (__cplusplus > 201103L) \
|| ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
|| EIGEN_COMP_MSVC >= 1900 || EIGEN_COMP_MSVC >= 1900)
#define EIGEN_HAS_CXX11_NOEXCEPT 1 #define EIGEN_HAS_CXX11_NOEXCEPT 1
#else #else
#define EIGEN_HAS_CXX11_NOEXCEPT 0 #define EIGEN_HAS_CXX11_NOEXCEPT 0
@ -427,6 +471,8 @@
#define EIGEN_CAT2(a,b) a ## b #define EIGEN_CAT2(a,b) a ## b
#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b) #define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)
#define EIGEN_COMMA ,
// convert a token to a string // convert a token to a string
#define EIGEN_MAKESTRING2(a) #a #define EIGEN_MAKESTRING2(a) #a
#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a) #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
@ -725,6 +771,11 @@ namespace Eigen {
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
#endif #endif
#ifndef EIGEN_UNALIGNED_VECTORIZE
#define EIGEN_UNALIGNED_VECTORIZE 1
#endif
//---------------------------------------------------------------------- //----------------------------------------------------------------------
@ -839,18 +890,10 @@ namespace Eigen {
#define EIGEN_IMPLIES(a,b) (!(a) || (b)) #define EIGEN_IMPLIES(a,b) (!(a) || (b))
#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \ // the expression type of a standard coefficient wise binary operation
template<typename OtherDerived> \ #define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived> \
(METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
{ \
return CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived>(derived(), other.derived()); \
}
// the expression type of a cwise product
#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \
CwiseBinaryOp< \ CwiseBinaryOp< \
internal::scalar_product_op< \ EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \
typename internal::traits<LHS>::Scalar, \ typename internal::traits<LHS>::Scalar, \
typename internal::traits<RHS>::Scalar \ typename internal::traits<RHS>::Scalar \
>, \ >, \
@ -858,6 +901,55 @@ namespace Eigen {
const RHS \ const RHS \
> >
#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \
template<typename OtherDerived> \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \
(METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
{ \
return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \
}
#define EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,TYPEA,TYPEB) \
(Eigen::internal::has_ReturnType<Eigen::ScalarBinaryOpTraits<TYPEA,TYPEB,EIGEN_CAT(EIGEN_CAT(Eigen::internal::scalar_,OPNAME),_op)<TYPEA,TYPEB> > >::value)
#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \
CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<typename internal::traits<EXPR>::Scalar,SCALAR>, const EXPR, \
const typename internal::plain_constant_type<EXPR,SCALAR>::type>
#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \
CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<SCALAR,typename internal::traits<EXPR>::Scalar>, \
const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR>
// Workaround for MSVC 2010 (see ML thread "patch with compile for for MSVC 2010")
#if EIGEN_COMP_MSVC_STRICT<=1600
#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if<true,X>::type
#else
#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X
#endif
#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \
template <typename T> EIGEN_DEVICE_FUNC inline \
EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME))\
(METHOD)(const T& scalar) const { \
typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \
return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \
typename internal::plain_constant_type<Derived,PromotedT>::type(derived().rows(), derived().cols(), internal::scalar_constant_op<PromotedT>(scalar))); \
}
#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
template <typename T> EIGEN_DEVICE_FUNC inline friend \
EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME)) \
(METHOD)(const T& scalar, const StorageBaseType& matrix) { \
typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \
return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \
typename internal::plain_constant_type<Derived,PromotedT>::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op<PromotedT>(scalar)), matrix.derived()); \
}
#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \
EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME)
#ifdef EIGEN_EXCEPTIONS #ifdef EIGEN_EXCEPTIONS
# define EIGEN_THROW_X(X) throw X # define EIGEN_THROW_X(X) throw X
# define EIGEN_THROW throw # define EIGEN_THROW throw
@ -865,8 +957,8 @@ namespace Eigen {
# define EIGEN_CATCH(X) catch (X) # define EIGEN_CATCH(X) catch (X)
#else #else
# ifdef __CUDA_ARCH__ # ifdef __CUDA_ARCH__
# define EIGEN_THROW_X(X) asm("trap;") return {} # define EIGEN_THROW_X(X) asm("trap;")
# define EIGEN_THROW asm("trap;"); return {} # define EIGEN_THROW asm("trap;")
# else # else
# define EIGEN_THROW_X(X) std::abort() # define EIGEN_THROW_X(X) std::abort()
# define EIGEN_THROW std::abort() # define EIGEN_THROW std::abort()
@ -875,10 +967,16 @@ namespace Eigen {
# define EIGEN_CATCH(X) else # define EIGEN_CATCH(X) else
#endif #endif
#if EIGEN_HAS_CXX11_NOEXCEPT #if EIGEN_HAS_CXX11_NOEXCEPT
# define EIGEN_INCLUDE_TYPE_TRAITS
# define EIGEN_NOEXCEPT noexcept
# define EIGEN_NOEXCEPT_IF(x) noexcept(x)
# define EIGEN_NO_THROW noexcept(true) # define EIGEN_NO_THROW noexcept(true)
# define EIGEN_EXCEPTION_SPEC(X) noexcept(false) # define EIGEN_EXCEPTION_SPEC(X) noexcept(false)
#else #else
# define EIGEN_NOEXCEPT
# define EIGEN_NOEXCEPT_IF(x)
# define EIGEN_NO_THROW throw() # define EIGEN_NO_THROW throw()
# define EIGEN_EXCEPTION_SPEC(X) throw(X) # define EIGEN_EXCEPTION_SPEC(X) throw(X)
#endif #endif

Some files were not shown because too many files have changed in this diff Show More