diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d37f94f7..9a95b684b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -project(Eigen) +project(Eigen3) cmake_minimum_required(VERSION 2.8.5) @@ -8,6 +8,11 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ") endif() +# Alias Eigen_*_DIR to Eigen3_*_DIR: + +set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR}) +set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR}) + # guard against bad build-type strings if (NOT CMAKE_BUILD_TYPE) @@ -93,9 +98,11 @@ else() endif() option(EIGEN_BUILD_BTL "Build benchmark suite" OFF) -if(NOT WIN32) + +# Disable pkgconfig only for native Windows builds +if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows) option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON) -endif(NOT WIN32) +endif() set(CMAKE_INCLUDE_CURRENT_DIR ON) @@ -120,7 +127,7 @@ endmacro(ei_add_cxx_compiler_flag) if(NOT MSVC) # We assume that other compilers are partly compatible with GNUCC - # clang outputs some warnings for unknwon flags that are not caught by check_cxx_compiler_flag + # clang outputs some warnings for unknown flags that are not caught by check_cxx_compiler_flag # adding -Werror turns such warnings into errors check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR) if(COMPILER_SUPPORT_WERROR) @@ -142,8 +149,11 @@ if(NOT MSVC) ei_add_cxx_compiler_flag("-Wwrite-strings") ei_add_cxx_compiler_flag("-Wformat-security") ei_add_cxx_compiler_flag("-Wshorten-64-to-32") + ei_add_cxx_compiler_flag("-Wlogical-op") ei_add_cxx_compiler_flag("-Wenum-conversion") ei_add_cxx_compiler_flag("-Wc++11-extensions") + ei_add_cxx_compiler_flag("-Wdouble-promotion") +# ei_add_cxx_compiler_flag("-Wconversion") # -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6 # if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0")) @@ -159,7 +169,7 @@ if(NOT MSVC) ei_add_cxx_compiler_flag("-fno-common") ei_add_cxx_compiler_flag("-fstrict-aliasing") ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark - ei_add_cxx_compiler_flag("-wd2304") # disbale ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor + ei_add_cxx_compiler_flag("-wd2304") # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor # The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails @@ -402,7 +412,7 @@ if(EIGEN_BUILD_PKGCONFIG) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc DESTINATION ${PKGCONFIG_INSTALL_DIR} ) -endif(EIGEN_BUILD_PKGCONFIG) +endif() add_subdirectory(Eigen) diff --git a/Eigen/CMakeLists.txt b/Eigen/CMakeLists.txt index a92dd6f6c..9eb502b79 100644 --- a/Eigen/CMakeLists.txt +++ b/Eigen/CMakeLists.txt @@ -16,4 +16,4 @@ install(FILES DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel ) -add_subdirectory(src) +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") diff --git a/Eigen/Cholesky b/Eigen/Cholesky index 705a04cc4..369d1f5ec 100644 --- a/Eigen/Cholesky +++ b/Eigen/Cholesky @@ -31,7 +31,8 @@ #include "src/Cholesky/LLT.h" #include "src/Cholesky/LDLT.h" #ifdef EIGEN_USE_LAPACKE -#include "src/Cholesky/LLT_MKL.h" +#include "src/misc/lapacke.h" +#include "src/Cholesky/LLT_LAPACKE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/Core b/Eigen/Core index d67cb67af..3fabc5a43 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -164,6 +164,7 @@ #if EIGEN_COMP_ICC >= 1110 #include #else + #include #include #include #ifdef EIGEN_VECTORIZE_SSE3 @@ -259,6 +260,11 @@ // for min/max: #include +// for std::is_nothrow_move_assignable +#ifdef EIGEN_INCLUDE_TYPE_TRAITS +#include +#endif + // for outputting debug info #ifdef EIGEN_DEBUG_ASSIGN #include @@ -332,8 +338,8 @@ using std::ptrdiff_t; #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" -#include "src/Core/SpecialFunctions.h" #include "src/Core/GenericPacketMath.h" +#include "src/Core/MathFunctionsImpl.h" #if defined EIGEN_VECTORIZE_AVX512 #include "src/Core/arch/SSE/PacketMath.h" @@ -368,23 +374,29 @@ using std::ptrdiff_t; #include "src/Core/arch/ZVector/Complex.h" #endif +// Half float support #include "src/Core/arch/CUDA/Half.h" +#include "src/Core/arch/CUDA/PacketMathHalf.h" +#include "src/Core/arch/CUDA/TypeCasting.h" #if defined EIGEN_VECTORIZE_CUDA #include "src/Core/arch/CUDA/PacketMath.h" - #include "src/Core/arch/CUDA/PacketMathHalf.h" #include "src/Core/arch/CUDA/MathFunctions.h" - #include "src/Core/arch/CUDA/TypeCasting.h" #endif #include "src/Core/arch/Default/Settings.h" +#include "src/Core/functors/TernaryFunctors.h" #include "src/Core/functors/BinaryFunctors.h" #include "src/Core/functors/UnaryFunctors.h" #include "src/Core/functors/NullaryFunctors.h" #include "src/Core/functors/StlFunctors.h" #include "src/Core/functors/AssignmentFunctors.h" +// Specialized functors to enable the processing of complex numbers +// on CUDA devices +#include "src/Core/arch/CUDA/Complex.h" + #include "src/Core/DenseCoeffsBase.h" #include "src/Core/DenseBase.h" #include "src/Core/MatrixBase.h" @@ -411,6 +423,7 @@ using std::ptrdiff_t; #include "src/Core/PlainObjectBase.h" #include "src/Core/Matrix.h" #include "src/Core/Array.h" +#include "src/Core/CwiseTernaryOp.h" #include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseNullaryOp.h" diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues index ea93eb303..009e529e1 100644 --- a/Eigen/Eigenvalues +++ b/Eigen/Eigenvalues @@ -32,6 +32,7 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/Eigenvalues/Tridiagonalization.h" #include "src/Eigenvalues/RealSchur.h" #include "src/Eigenvalues/EigenSolver.h" @@ -44,9 +45,10 @@ #include "src/Eigenvalues/GeneralizedEigenSolver.h" #include "src/Eigenvalues/MatrixBaseEigenvalues.h" #ifdef EIGEN_USE_LAPACKE -#include "src/Eigenvalues/RealSchur_MKL.h" -#include "src/Eigenvalues/ComplexSchur_MKL.h" -#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h" +#include "src/misc/lapacke.h" +#include "src/Eigenvalues/RealSchur_LAPACKE.h" +#include "src/Eigenvalues/ComplexSchur_LAPACKE.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/LU b/Eigen/LU index 2d70c92de..6f6c55629 100644 --- a/Eigen/LU +++ b/Eigen/LU @@ -28,7 +28,8 @@ #include "src/LU/FullPivLU.h" #include "src/LU/PartialPivLU.h" #ifdef EIGEN_USE_LAPACKE -#include "src/LU/PartialPivLU_MKL.h" +#include "src/misc/lapacke.h" +#include "src/LU/PartialPivLU_LAPACKE.h" #endif #include "src/LU/Determinant.h" #include "src/LU/InverseImpl.h" diff --git a/Eigen/QR b/Eigen/QR index 25c781cc1..80838e3bd 100644 --- a/Eigen/QR +++ b/Eigen/QR @@ -36,8 +36,9 @@ #include "src/QR/ColPivHouseholderQR.h" #include "src/QR/CompleteOrthogonalDecomposition.h" #ifdef EIGEN_USE_LAPACKE -#include "src/QR/HouseholderQR_MKL.h" -#include "src/QR/ColPivHouseholderQR_MKL.h" +#include "src/misc/lapacke.h" +#include "src/QR/HouseholderQR_LAPACKE.h" +#include "src/QR/ColPivHouseholderQR_LAPACKE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/SVD b/Eigen/SVD index b353f3f54..86143c23d 100644 --- a/Eigen/SVD +++ b/Eigen/SVD @@ -31,12 +31,14 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/SVD/UpperBidiagonalization.h" #include "src/SVD/SVDBase.h" #include "src/SVD/JacobiSVD.h" #include "src/SVD/BDCSVD.h" #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) -#include "src/SVD/JacobiSVD_MKL.h" +#include "src/misc/lapacke.h" +#include "src/SVD/JacobiSVD_LAPACKE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/SuperLUSupport b/Eigen/SuperLUSupport index 113f58ee5..59312a82d 100644 --- a/Eigen/SuperLUSupport +++ b/Eigen/SuperLUSupport @@ -43,7 +43,7 @@ namespace Eigen { struct SluMatrix; } * - class SuperLU: a supernodal sequential LU factorization. * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). * - * \warning This wrapper is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. + * \warning This wrapper requires at least versions 4.0 of SuperLU. The 3.x versions are not supported. * * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. * diff --git a/Eigen/src/CMakeLists.txt b/Eigen/src/CMakeLists.txt deleted file mode 100644 index c326f374d..000000000 --- a/Eigen/src/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -file(GLOB Eigen_src_subdirectories "*") -escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") -foreach(f ${Eigen_src_subdirectories}) - if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" ) - add_subdirectory(${f}) - endif() -endforeach() diff --git a/Eigen/src/Cholesky/CMakeLists.txt b/Eigen/src/Cholesky/CMakeLists.txt deleted file mode 100644 index d01488b41..000000000 --- a/Eigen/src/Cholesky/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Cholesky_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Cholesky_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky COMPONENT Devel - ) diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index 538aff956..fcee7b2e3 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -43,6 +43,8 @@ namespace internal { * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky * decomposition to determine whether a system of equations has a solution. * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT */ template class LDLT @@ -52,7 +54,6 @@ template class LDLT enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options & ~RowMajorBit, // these are the options for the TmpMatrixType, we need a ColMajor matrix here! MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, UpLo = _UpLo @@ -61,7 +62,7 @@ template class LDLT typedef typename NumTraits::Real RealScalar; typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 typedef typename MatrixType::StorageIndex StorageIndex; - typedef Matrix TmpMatrixType; + typedef Matrix TmpMatrixType; typedef Transpositions TranspositionType; typedef PermutationMatrix PermutationType; @@ -97,6 +98,7 @@ template class LDLT /** \brief Constructor with decomposition * * This calculates the decomposition for the input \a matrix. + * * \sa LDLT(Index size) */ template @@ -110,6 +112,23 @@ template class LDLT compute(matrix.derived()); } + /** \brief Constructs a LDLT factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa LDLT(const EigenBase&) + */ + template + explicit LDLT(EigenBase& matrix) + : m_matrix(matrix.derived()), + m_transpositions(matrix.rows()), + m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), + m_isInitialized(false) + { + compute(matrix.derived()); + } + /** Clear any existing decomposition * \sa rankUpdate(w,sigma) */ @@ -234,7 +253,7 @@ template class LDLT ComputationInfo info() const { eigen_assert(m_isInitialized && "LDLT is not initialized."); - return Success; + return m_info; } #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -262,6 +281,7 @@ template class LDLT TmpMatrixType m_temporary; internal::SignMatrix m_sign; bool m_isInitialized; + ComputationInfo m_info; }; namespace internal { @@ -279,6 +299,8 @@ template<> struct ldlt_inplace typedef typename TranspositionType::StorageIndex IndexType; eigen_assert(mat.rows()==mat.cols()); const Index size = mat.rows(); + bool found_zero_pivot = false; + bool ret = true; if (size <= 1) { @@ -337,9 +359,27 @@ template<> struct ldlt_inplace // we should only make sure that we do not introduce INF or NaN values. // Remark that LAPACK also uses 0 as the cutoff value. RealScalar realAkk = numext::real(mat.coeffRef(k,k)); - if((rs>0) && (abs(realAkk) > RealScalar(0))) + bool pivot_is_valid = (abs(realAkk) > RealScalar(0)); + + if(k==0 && !pivot_is_valid) + { + // The entire diagonal is zero, there is nothing more to do + // except filling the transpositions, and checking whether the matrix is zero. + sign = ZeroSign; + for(Index j = 0; j0) && pivot_is_valid) A21 /= realAkk; + if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed + else if(!pivot_is_valid) found_zero_pivot = true; + if (sign == PositiveSemiDef) { if (realAkk < static_cast(0)) sign = Indefinite; } else if (sign == NegativeSemiDef) { @@ -350,7 +390,7 @@ template<> struct ldlt_inplace } } - return true; + return ret; } // Reference for the algorithm: Davis and Hager, "Multiple Rank @@ -474,7 +514,7 @@ LDLT& LDLT::compute(const EigenBase::unblocked(m_matrix, m_transpositions, m_temporary, m_sign); + m_info = internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue; m_isInitialized = true; return *this; @@ -602,7 +642,6 @@ MatrixType LDLT::reconstructedMatrix() const return res; } -#ifndef __CUDACC__ /** \cholesky_module * \returns the Cholesky decomposition with full pivoting without square root of \c *this * \sa MatrixBase::ldlt() @@ -624,7 +663,6 @@ MatrixBase::ldlt() const { return LDLT(derived()); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index 19578b216..ddf4875ab 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -41,6 +41,8 @@ template struct LLT_Traits; * Example: \include LLT_example.cpp * Output: \verbinclude LLT_example.out * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT */ /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH) @@ -54,7 +56,6 @@ template class LLT enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; typedef typename MatrixType::Scalar Scalar; @@ -95,6 +96,21 @@ template class LLT compute(matrix.derived()); } + /** \brief Constructs a LDLT factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when + * \c MatrixType is a Eigen::Ref. + * + * \sa LLT(const EigenBase&) + */ + template + explicit LLT(EigenBase& matrix) + : m_matrix(matrix.derived()), + m_isInitialized(false) + { + compute(matrix.derived()); + } + /** \returns a view of the upper triangular matrix U */ inline typename Traits::MatrixU matrixU() const { @@ -491,7 +507,6 @@ MatrixType LLT::reconstructedMatrix() const return matrixL() * matrixL().adjoint().toDenseMatrix(); } -#ifndef __CUDACC__ /** \cholesky_module * \returns the LLT decomposition of \c *this * \sa SelfAdjointView::llt() @@ -513,7 +528,6 @@ SelfAdjointView::llt() const { return LLT(m_matrix); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/Cholesky/LLT_MKL.h b/Eigen/src/Cholesky/LLT_LAPACKE.h similarity index 81% rename from Eigen/src/Cholesky/LLT_MKL.h rename to Eigen/src/Cholesky/LLT_LAPACKE.h index 0d42cb5bc..bc6489e69 100644 --- a/Eigen/src/Cholesky/LLT_MKL.h +++ b/Eigen/src/Cholesky/LLT_LAPACKE.h @@ -25,25 +25,22 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to LAPACKe * LLt decomposition based on LAPACKE_?potrf function. ******************************************************************************** */ -#ifndef EIGEN_LLT_MKL_H -#define EIGEN_LLT_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" -#include +#ifndef EIGEN_LLT_LAPACKE_H +#define EIGEN_LLT_LAPACKE_H namespace Eigen { namespace internal { -template struct mkl_llt; +template struct lapacke_llt; -#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \ -template<> struct mkl_llt \ +#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \ +template<> struct lapacke_llt \ { \ template \ static inline Index potrf(MatrixType& m, char uplo) \ @@ -53,13 +50,13 @@ template<> struct mkl_llt \ EIGTYPE* a; \ eigen_assert(m.rows()==m.cols()); \ /* Set up parameters for ?potrf */ \ - size = m.rows(); \ + size = convert_index(m.rows()); \ StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \ matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ a = &(m.coeffRef(0,0)); \ - lda = m.outerStride(); \ + lda = convert_index(m.outerStride()); \ \ - info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \ + info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \ info = (info==0) ? -1 : info>0 ? info-1 : size; \ return info; \ } \ @@ -69,7 +66,7 @@ template<> struct llt_inplace \ template \ static Index blocked(MatrixType& m) \ { \ - return mkl_llt::potrf(m, 'L'); \ + return lapacke_llt::potrf(m, 'L'); \ } \ template \ static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ @@ -80,7 +77,7 @@ template<> struct llt_inplace \ template \ static Index blocked(MatrixType& m) \ { \ - return mkl_llt::potrf(m, 'U'); \ + return lapacke_llt::potrf(m, 'U'); \ } \ template \ static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ @@ -90,13 +87,13 @@ template<> struct llt_inplace \ } \ }; -EIGEN_MKL_LLT(double, double, d) -EIGEN_MKL_LLT(float, float, s) -EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z) -EIGEN_MKL_LLT(scomplex, MKL_Complex8, c) +EIGEN_LAPACKE_LLT(double, double, d) +EIGEN_LAPACKE_LLT(float, float, s) +EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z) +EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c) } // end namespace internal } // end namespace Eigen -#endif // EIGEN_LLT_MKL_H +#endif // EIGEN_LLT_LAPACKE_H diff --git a/Eigen/src/CholmodSupport/CMakeLists.txt b/Eigen/src/CholmodSupport/CMakeLists.txt deleted file mode 100644 index 814dfa613..000000000 --- a/Eigen/src/CholmodSupport/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_CholmodSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_CholmodSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel - ) diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index 7480d1e24..0d34269fd 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -37,7 +37,7 @@ struct traits > : tra * storage layout. * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. * * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy */ @@ -147,9 +147,9 @@ class Array } #endif -#ifdef EIGEN_HAVE_RVALUE_REFERENCES +#if EIGEN_HAS_RVALUE_REFERENCES EIGEN_DEVICE_FUNC - Array(Array&& other) + Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) : Base(std::move(other)) { Base::_check_template_params(); @@ -157,7 +157,7 @@ class Array Base::_set_noalias(other); } EIGEN_DEVICE_FUNC - Array& operator=(Array&& other) + Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { other.swap(*this); return *this; diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 0443e3032..f0232f65e 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -32,7 +32,7 @@ template class MatrixWrapper; * \tparam Derived is the derived type, e.g., an array or an expression type. * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN. * * \sa class MatrixBase, \ref TopicClassHierarchy */ @@ -52,8 +52,6 @@ template class ArrayBase typedef typename NumTraits::Real RealScalar; typedef DenseBase Base; - using Base::operator*; - using Base::operator/; using Base::RowsAtCompileTime; using Base::ColsAtCompileTime; using Base::SizeAtCompileTime; @@ -89,6 +87,7 @@ template class ArrayBase #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase +#define EIGEN_DOC_UNARY_ADDONS(X,Y) # include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/ArrayCwiseUnaryOps.h" @@ -99,6 +98,7 @@ template class ArrayBase # include EIGEN_ARRAYBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) @@ -178,7 +178,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator-=(const ArrayBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -191,7 +191,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator+=(const ArrayBase& other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -217,7 +217,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator/=(const ArrayBase& other) { - call_assignment(derived(), other.derived(), internal::div_assign_op()); + call_assignment(derived(), other.derived(), internal::div_assign_op()); return derived(); } diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index b1193e421..b7cc7c0e9 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -75,23 +75,24 @@ private: DstIsRowMajor = DstFlags&RowMajorBit, SrcIsRowMajor = SrcFlags&RowMajorBit, StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), - MightVectorize = StorageOrdersAgree + MightVectorize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) - && (functor_traits::PacketAccess), + && bool(functor_traits::PacketAccess), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 - && int(JointAlignment)>=int(InnerRequiredAlignment), - MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), - MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess - && ((int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), + && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), + MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), + MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess + && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ - MaySliceVectorize = MightVectorize && DstHasDirectAccess - && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*InnerPacketSize) + MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) + && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize))) /* slice vectorization can be slow, so we only want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block - in a fixed-size matrix */ + in a fixed-size matrix + However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ }; public: @@ -116,9 +117,9 @@ private: : 1, UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic - && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), + && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) + && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) }; public: @@ -130,11 +131,17 @@ public: : int(NoUnrolling) ) : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(LinearRequiredAlignment)) ? int(CompleteUnrolling) - : int(NoUnrolling) ) + ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment))) + ? int(CompleteUnrolling) + : int(NoUnrolling) ) : int(Traversal) == int(LinearTraversal) ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) +#if EIGEN_UNALIGNED_VECTORIZE + : int(Traversal) == int(SliceVectorizedTraversal) + ? ( bool(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) ) +#endif : int(NoUnrolling) }; @@ -156,6 +163,7 @@ public: EIGEN_DEBUG_VAR(InnerMaxSize) EIGEN_DEBUG_VAR(LinearPacketSize) EIGEN_DEBUG_VAR(InnerPacketSize) + EIGEN_DEBUG_VAR(ActualPacketSize) EIGEN_DEBUG_VAR(StorageOrdersAgree) EIGEN_DEBUG_VAR(MightVectorize) EIGEN_DEBUG_VAR(MayLinearize) @@ -163,6 +171,7 @@ public: EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; + EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost) EIGEN_DEBUG_VAR(UnrollingLimit) EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) @@ -256,13 +265,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime, - JointAlignment = Kernel::AssignmentTraits::JointAlignment, - DefaultAlignment = unpacket_traits::alignment + SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, + DstAlignment = Kernel::AssignmentTraits::DstAlignment }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - kernel.template assignPacketByOuterInner(outer, inner); + kernel.template assignPacketByOuterInner(outer, inner); enum { NextIndex = Index + unpacket_traits::size }; copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); } @@ -274,23 +283,20 @@ struct copy_using_evaluator_innervec_CompleteUnrolling EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } }; -template +template struct copy_using_evaluator_innervec_InnerUnrolling { typedef typename Kernel::PacketType PacketType; - enum { - DefaultAlignment = unpacket_traits::alignment - }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) { - kernel.template assignPacketByOuterInner(outer, Index_); + kernel.template assignPacketByOuterInner(outer, Index_); enum { NextIndex = Index_ + unpacket_traits::size }; - copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); } }; -template -struct copy_using_evaluator_innervec_InnerUnrolling +template +struct copy_using_evaluator_innervec_InnerUnrolling { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } }; @@ -419,9 +425,10 @@ struct dense_assignment_loop::size, + packetSize =unpacket_traits::size, alignedSize = (size/packetSize)*packetSize }; copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); @@ -438,7 +445,8 @@ struct dense_assignment_loop { typedef typename Kernel::PacketType PacketType; enum { - DefaultAlignment = unpacket_traits::alignment + SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, + DstAlignment = Kernel::AssignmentTraits::DstAlignment }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { @@ -447,7 +455,7 @@ struct dense_assignment_loop const Index packetSize = unpacket_traits::size; for(Index outer = 0; outer < outerSize; ++outer) for(Index inner = 0; inner < innerSize; inner+=packetSize) - kernel.template assignPacketByOuterInner(outer, inner); + kernel.template assignPacketByOuterInner(outer, inner); } }; @@ -467,9 +475,11 @@ struct dense_assignment_loop EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::AssignmentTraits Traits; const Index outerSize = kernel.outerSize(); for(Index outer = 0; outer < outerSize; ++outer) - copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); } }; @@ -518,7 +528,7 @@ struct dense_assignment_loop : int(Kernel::AssignmentTraits::DstAlignment) }; const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0); - if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0) + if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0) { // the pointer is not aligend-on scalar, so alignment is not possible return dense_assignment_loop::run(kernel); @@ -549,6 +559,29 @@ struct dense_assignment_loop } }; +#if EIGEN_UNALIGNED_VECTORIZE +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { size = DstXprType::InnerSizeAtCompileTime, + packetSize =unpacket_traits::size, + vectorizableSize = (size/packetSize)*packetSize }; + + for(Index outer = 0; outer < kernel.outerSize(); ++outer) + { + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } + } +}; +#endif + + /*************************************************************************** * Part 4 : Generic dense assignment kernel ***************************************************************************/ @@ -676,14 +709,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX typedef generic_dense_assignment_kernel Kernel; Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); - + dense_assignment_loop::run(kernel); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) { - call_dense_assignment_loop(dst, src, internal::assign_op()); + call_dense_assignment_loop(dst, src, internal::assign_op()); } /*************************************************************************** @@ -705,7 +738,7 @@ template<> struct AssignmentKind { typedef Dense2Dense Ki // This is the main assignment class template< typename DstXprType, typename SrcXprType, typename Functor, typename Kind = typename AssignmentKind< typename evaluator_traits::Shape , typename evaluator_traits::Shape >::Kind, - typename Scalar = typename DstXprType::Scalar> + typename EnableIf = void> struct Assignment; @@ -718,13 +751,13 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst& dst, const Src& src) { - call_assignment(dst, src, internal::assign_op()); + call_assignment(dst, src, internal::assign_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) { - call_assignment(dst, src, internal::assign_op()); + call_assignment(dst, src, internal::assign_op()); } // Deal with "assume-aliasing" @@ -783,7 +816,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias(Dst& dst, const Src& src) { - call_assignment_no_alias(dst, src, internal::assign_op()); + call_assignment_no_alias(dst, src, internal::assign_op()); } template @@ -805,15 +838,17 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) { - call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); } // forward declaration template void check_for_aliasing(const Dst &dst, const Src &src); // Generic Dense to Dense assignment -template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> -struct Assignment +// Note that the last template argument "Weak" is needed to make it possible to perform +// both partial specialization+SFINAE without ambiguous specialization +template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> +struct Assignment { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) @@ -830,11 +865,13 @@ struct Assignment // Generic assignment through evalTo. // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. -template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> -struct Assignment +// Note that the last template argument "Weak" is needed to make it possible to perform +// both partial specialization+SFINAE without ambiguous specialization +template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> +struct Assignment { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); src.evalTo(dst); diff --git a/Eigen/src/Core/Assign_MKL.h b/Eigen/src/Core/Assign_MKL.h old mode 100644 new mode 100755 index 897187a30..6c2ab9264 --- a/Eigen/src/Core/Assign_MKL.h +++ b/Eigen/src/Core/Assign_MKL.h @@ -81,10 +81,10 @@ class vml_assign_traits #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ template< typename DstXprType, typename SrcXprNested> \ - struct Assignment, SrcXprNested>, assign_op, \ - Dense2Dense, typename enable_if::EnableVml,EIGENTYPE>::type> { \ + struct Assignment, SrcXprNested>, assign_op, \ + Dense2Dense, typename enable_if::EnableVml>::type> { \ typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ - static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ if(vml_assign_traits::Traversal==LinearTraversal) { \ VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ @@ -138,22 +138,24 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _) EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ - template< typename DstXprType, typename SrcXprNested> \ - struct Assignment, SrcXprNested>, assign_op, \ - Dense2Dense, typename enable_if::EnableVml,EIGENTYPE>::type> { \ - typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ - static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + template< typename DstXprType, typename SrcXprNested, typename Plain> \ + struct Assignment, SrcXprNested, \ + const CwiseNullaryOp,Plain> >, assign_op, \ + Dense2Dense, typename enable_if::EnableVml>::type> { \ + typedef CwiseBinaryOp, SrcXprNested, \ + const CwiseNullaryOp,Plain> > SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ - VMLTYPE exponent = reinterpret_cast(src.functor().m_exponent); \ + VMLTYPE exponent = reinterpret_cast(src.rhs().functor().m_other); \ if(vml_assign_traits::Traversal==LinearTraversal) \ { \ - VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent, \ + VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \ (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ } else { \ const Index outerSize = dst.outerSize(); \ for(Index outer = 0; outer < outerSize; ++outer) { \ - const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ - &(src.nestedExpression().coeffRef(0, outer)); \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \ + &(src.lhs().coeffRef(0, outer)); \ EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \ (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ diff --git a/Eigen/src/Core/CMakeLists.txt b/Eigen/src/Core/CMakeLists.txt deleted file mode 100644 index 38c3afde9..000000000 --- a/Eigen/src/Core/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -FILE(GLOB Eigen_Core_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel - ) - -ADD_SUBDIRECTORY(products) -ADD_SUBDIRECTORY(util) -ADD_SUBDIRECTORY(arch) -ADD_SUBDIRECTORY(functors) diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index 2abc6605c..d218e9814 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -80,9 +80,7 @@ struct CommaInitializer EIGEN_DEVICE_FUNC CommaInitializer& operator,(const DenseBase& other) { - if(other.cols()==0 || other.rows()==0) - return *this; - if (m_col==m_xpr.cols()) + if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows)) { m_row+=m_currentBlockRows; m_col = 0; @@ -90,15 +88,11 @@ struct CommaInitializer eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() && "Too many rows passed to comma initializer (operator<<)"); } - eigen_assert(m_col - (m_row, m_col) = other; - else - m_xpr.block(m_row, m_col, other.rows(), other.cols()) = other; + m_xpr.template block + (m_row, m_col, other.rows(), other.cols()) = other; m_col += other.cols(); return *this; } @@ -109,9 +103,7 @@ struct CommaInitializer EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) #endif { - eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows() - && m_col == m_xpr.cols() - && "Too few coefficients passed to comma initializer (operator<<)"); + finished(); } /** \returns the built matrix once all its coefficients have been set. @@ -122,7 +114,12 @@ struct CommaInitializer * \endcode */ EIGEN_DEVICE_FUNC - inline XprType& finished() { return m_xpr; } + inline XprType& finished() { + eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0) + && m_col == m_xpr.cols() + && "Too few coefficients passed to comma initializer (operator<<)"); + return m_xpr; + } XprType& m_xpr; // target expression Index m_row; // current row id diff --git a/Eigen/src/Core/ConditionEstimator.h b/Eigen/src/Core/ConditionEstimator.h index 68c5e918e..aa7efdc76 100644 --- a/Eigen/src/Core/ConditionEstimator.h +++ b/Eigen/src/Core/ConditionEstimator.h @@ -32,33 +32,6 @@ struct rcond_compute_sign { } }; -/** \brief Reciprocal condition number estimator. - * - * Computing a decomposition of a dense matrix takes O(n^3) operations, while - * this method estimates the condition number quickly and reliably in O(n^2) - * operations. - * - * \returns an estimate of the reciprocal condition number - * (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and - * its decomposition. Supports the following decompositions: FullPivLU, - * PartialPivLU, LDLT, and LLT. - * - * \sa FullPivLU, PartialPivLU, LDLT, LLT. - */ -template -typename Decomposition::RealScalar -rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec) -{ - typedef typename Decomposition::RealScalar RealScalar; - eigen_assert(dec.rows() == dec.cols()); - if (dec.rows() == 0) return RealScalar(1); - if (matrix_norm == RealScalar(0)) return RealScalar(0); - if (dec.rows() == 1) return RealScalar(1); - const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec); - return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0) - : (RealScalar(1) / inverse_matrix_norm) / matrix_norm); -} - /** * \returns an estimate of ||inv(matrix)||_1 given a decomposition of * \a matrix that implements .solve() and .adjoint().solve() methods. @@ -94,7 +67,15 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp if (n == 0) return 0; + // Disable Index to float conversion warning +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif Vector v = dec.solve(Vector::Ones(n) / Scalar(n)); +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif // lower_bound is a lower bound on // ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1 @@ -151,7 +132,8 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp // Hager's algorithm to vastly underestimate ||matrix||_1. Scalar alternating_sign(RealScalar(1)); for (Index i = 0; i < n; ++i) { - v[i] = alternating_sign * (RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1)))); + // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates + v[i] = alternating_sign * static_cast(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1)))); alternating_sign = -alternating_sign; } v = dec.solve(v); @@ -159,6 +141,33 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp return numext::maxi(lower_bound, alternate_lower_bound); } +/** \brief Reciprocal condition number estimator. + * + * Computing a decomposition of a dense matrix takes O(n^3) operations, while + * this method estimates the condition number quickly and reliably in O(n^2) + * operations. + * + * \returns an estimate of the reciprocal condition number + * (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and + * its decomposition. Supports the following decompositions: FullPivLU, + * PartialPivLU, LDLT, and LLT. + * + * \sa FullPivLU, PartialPivLU, LDLT, LLT. + */ +template +typename Decomposition::RealScalar +rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec) +{ + typedef typename Decomposition::RealScalar RealScalar; + eigen_assert(dec.rows() == dec.cols()); + if (dec.rows() == 0) return RealScalar(1); + if (matrix_norm == RealScalar(0)) return RealScalar(0); + if (dec.rows() == 1) return RealScalar(1); + const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec); + return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0) + : (RealScalar(1) / inverse_matrix_norm) / matrix_norm); +} + } // namespace internal } // namespace Eigen diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 932178f53..00c079bd8 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -41,10 +41,19 @@ template<> struct storage_kind_to_shape { typedef Transp // We currently distinguish the following kind of evaluators: // - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) // - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - ternary_evaluator for expression taking three arguments (CwiseTernaryOp) // - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. // - mapbase_evaluator for Map, Block, Ref // - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) +template< typename T, + typename Arg1Kind = typename evaluator_traits::Kind, + typename Arg2Kind = typename evaluator_traits::Kind, + typename Arg3Kind = typename evaluator_traits::Kind, + typename Arg1Scalar = typename traits::Scalar, + typename Arg2Scalar = typename traits::Scalar, + typename Arg3Scalar = typename traits::Scalar> struct ternary_evaluator; + template< typename T, typename LhsKind = typename evaluator_traits::Kind, typename RhsKind = typename evaluator_traits::Kind, @@ -328,6 +337,120 @@ protected: // Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. // Likewise, there is not need to more sophisticated dispatching here. +template::value, + bool has_unary = has_unary_operator::value, + bool has_binary = has_binary_operator::value> +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp(i,j); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp(i); } +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp(); } +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp(i,j); } +}; + +// We need the following specialization for vector-only functors assigned to a runtime vector, +// for instance, using linspace and assigning a RowVectorXd to a MatrixXd or even a row of a MatrixXd. +// In this case, i==0 and j is used for the actual iteration. +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + eigen_assert(i==0 || j==0); + return op(i+j); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + eigen_assert(i==0 || j==0); + return op.template packetOp(i+j); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp(i); } +}; + +template +struct nullary_wrapper {}; + +#if 0 && EIGEN_COMP_MSVC>0 +// Disable this ugly workaround. This is now handled in traits::match, +// but this piece of code might still become handly if some other weird compilation +// erros pop up again. + +// MSVC exhibits a weird compilation error when +// compiling: +// Eigen::MatrixXf A = MatrixXf::Random(3,3); +// Ref R = 2.f*A; +// and that has_*ary_operator> have not been instantiated yet. +// The "problem" is that evaluator<2.f*A> is instantiated by traits::match<2.f*A> +// and at that time has_*ary_operator returns true regardless of T. +// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>. +// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(), +// and packet() are really instantiated as implemented below: + +// This is a simple wrapper around Index to enforce the re-instantiation of +// has_*ary_operator when needed. +template struct nullary_wrapper_workaround_msvc { + nullary_wrapper_workaround_msvc(const T&); + operator T()const; +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i); + } +}; +#endif // MSVC workaround + template struct evaluator > : evaluator_base > @@ -347,41 +470,44 @@ struct evaluator > }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) - : m_functor(n.functor()) + : m_functor(n.functor()), m_wrapper() { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } typedef typename XprType::CoeffReturnType CoeffReturnType; + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - CoeffReturnType coeff(Index row, Index col) const + CoeffReturnType coeff(IndexType row, IndexType col) const { - return m_functor(row, col); + return m_wrapper(m_functor, row, col); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - CoeffReturnType coeff(Index index) const + CoeffReturnType coeff(IndexType index) const { - return m_functor(index); + return m_wrapper(m_functor,index); } - template + template EIGEN_STRONG_INLINE - PacketType packet(Index row, Index col) const + PacketType packet(IndexType row, IndexType col) const { - return m_functor.template packetOp(row, col); + return m_wrapper.template packetOp(m_functor, row, col); } - template + template EIGEN_STRONG_INLINE - PacketType packet(Index index) const + PacketType packet(IndexType index) const { - return m_functor.template packetOp(index); + return m_wrapper.template packetOp(m_functor, index); } protected: const NullaryOp m_functor; + const internal::nullary_wrapper m_wrapper; }; // -------------------- CwiseUnaryOp -------------------- @@ -442,6 +568,96 @@ protected: evaluator m_argImpl; }; +// -------------------- CwiseTernaryOp -------------------- + +// this is a ternary expression +template +struct evaluator > + : public ternary_evaluator > +{ + typedef CwiseTernaryOp XprType; + typedef ternary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct ternary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseTernaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + Arg1Flags = evaluator::Flags, + Arg2Flags = evaluator::Flags, + Arg3Flags = evaluator::Flags, + SameType = is_same::value && is_same::value, + StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit), + Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & ( + HereditaryBits + | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN( + EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment), + evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_arg1Impl(xpr.arg1()), + m_arg2Impl(xpr.arg2()), + m_arg3Impl(xpr.arg3()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_arg1Impl.template packet(row, col), + m_arg2Impl.template packet(row, col), + m_arg3Impl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + +protected: + const TernaryOp m_functor; + evaluator m_arg1Impl; + evaluator m_arg2Impl; + evaluator m_arg3Impl; +}; + // -------------------- CwiseBinaryOp -------------------- // this is a binary expression @@ -601,73 +817,79 @@ struct mapbase_evaluator : evaluator_base ColsAtCompileTime = XprType::ColsAtCompileTime, CoeffReadCost = NumTraits::ReadCost }; - + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) - : m_data(const_cast(map.data())), - m_xpr(map) + : m_data(const_cast(map.data())), + m_innerStride(map.innerStride()), + m_outerStride(map.outerStride()) { EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; + return m_data[col * colStride() + row * rowStride()]; } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_data[index * m_xpr.innerStride()]; + return m_data[index * m_innerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { - return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; + return m_data[col * colStride() + row * rowStride()]; } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_data[index * m_xpr.innerStride()]; + return m_data[index * m_innerStride.value()]; } - + template EIGEN_STRONG_INLINE - PacketType packet(Index row, Index col) const + PacketType packet(Index row, Index col) const { - PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); + PointerType ptr = m_data + row * rowStride() + col * colStride(); return internal::ploadt(ptr); } template EIGEN_STRONG_INLINE - PacketType packet(Index index) const + PacketType packet(Index index) const { - return internal::ploadt(m_data + index * m_xpr.innerStride()); + return internal::ploadt(m_data + index * m_innerStride.value()); } - + template EIGEN_STRONG_INLINE - void writePacket(Index row, Index col, const PacketType& x) + void writePacket(Index row, Index col, const PacketType& x) { - PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); + PointerType ptr = m_data + row * rowStride() + col * colStride(); return internal::pstoret(ptr, x); } - + template EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketType& x) + void writePacket(Index index, const PacketType& x) { - internal::pstoret(m_data + index * m_xpr.innerStride(), x); + internal::pstoret(m_data + index * m_innerStride.value(), x); } - protected: + EIGEN_DEVICE_FUNC + inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); } + EIGEN_DEVICE_FUNC + inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); } + PointerType m_data; - const XprType& m_xpr; + const internal::variable_if_dynamic m_innerStride; + const internal::variable_if_dynamic m_outerStride; }; template @@ -755,9 +977,7 @@ struct evaluator > OuterStrideAtCompileTime = HasSameStorageOrderAsArgType ? int(outer_stride_at_compile_time::ret) : int(inner_stride_at_compile_time::ret), - MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) - && (InnerStrideAtCompileTime == 1) - ? PacketAccessBit : 0, + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, @@ -884,7 +1104,7 @@ struct block_evaluator(block) { // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime - eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator::Alignment)) == 0) && "data is not aligned"); + eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator::Alignment)) == 0) && "data is not aligned"); } }; @@ -1325,7 +1545,7 @@ struct evaluator > enum { CoeffReadCost = evaluator::CoeffReadCost, - Flags = (unsigned int)evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit, + Flags = (unsigned int)(evaluator::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit, Alignment = 0 }; diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 39820fd7d..aa3297354 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -160,7 +160,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator-=(const MatrixBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -173,7 +173,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator+=(const MatrixBase& other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 3c6508cd0..25c3ef3d7 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -20,7 +20,8 @@ struct traits > : traits::Flags & RowMajorBit }; }; -} + +} // namespace internal /** \class CwiseNullaryOp * \ingroup Core_Module @@ -37,7 +38,23 @@ struct traits > : traits + \c operator()() if the procedural generation does not depend on the coefficient entries (e.g., random numbers) + \c operator()(Index i)if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace) + \c operator()(Index i,Index j)if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1) + + * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors. + * + * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding + * C++11 random number generators. + * + * A nullary expression can also be used to implement custom sophisticated matrix manipulations + * that cannot be covered by the existing set of natively supported matrix manipulations. + * See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations + * on the behavior of CwiseNullaryOp. + * + * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr */ template class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp >::type, internal::no_assignment_operator @@ -62,30 +79,6 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return m_functor.packetOp(rowId, colId); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return m_functor(index); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return m_functor.packetOp(index); - } - /** \returns the functor representing the nullary operation */ EIGEN_DEVICE_FUNC const NullaryOp& functor() const { return m_functor; } @@ -227,7 +220,7 @@ DenseBase::Constant(const Scalar& value) * * The function generates 'size' equally spaced values in the closed interval [low,high]. * This particular version of LinSpaced() uses sequential access, i.e. vector access is - * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization + * assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization * and yields faster code than the random access version. * * When size is set to 1, a vector of length 1 containing 'high' is returned. @@ -396,7 +389,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, con /** * \brief Sets a linearly spaced vector. * - * The function fill *this with equally spaced values in the closed interval [low,high]. + * The function fills *this with equally spaced values in the closed interval [low,high]. * When size is set to 1, a vector of length 1 containing 'high' is returned. * * \only_for_vectors diff --git a/Eigen/src/Core/CwiseTernaryOp.h b/Eigen/src/Core/CwiseTernaryOp.h new file mode 100644 index 000000000..9f3576fec --- /dev/null +++ b/Eigen/src/Core/CwiseTernaryOp.h @@ -0,0 +1,197 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_TERNARY_OP_H +#define EIGEN_CWISE_TERNARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > { + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef typename remove_all::type Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Arg1, Arg2, and Arg3 to have the same scalar type + // (see CwiseTernaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of::type Scalar; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + typedef typename Arg1::Nested Arg1Nested; + typedef typename Arg2::Nested Arg2Nested; + typedef typename Arg3::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + enum { Flags = _Arg1Nested::Flags & RowMajorBit }; +}; +} // end namespace internal + +template +class CwiseTernaryOpImpl; + +/** \class CwiseTernaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise ternary operator is + * applied to two expressions + * + * \tparam TernaryOp template functor implementing the operator + * \tparam Arg1Type the type of the first argument + * \tparam Arg2Type the type of the second argument + * \tparam Arg3Type the type of the third argument + * + * This class represents an expression where a coefficient-wise ternary + * operator is applied to three expressions. + * It is the return type of ternary operators, by which we mean only those + * ternary operators where + * all three arguments are Eigen expressions. + * For example, the return type of betainc(matrix1, matrix2, matrix3) is a + * CwiseTernaryOp. + * + * Most of the time, this is the only way that it is used, so you typically + * don't have to name + * CwiseTernaryOp types explicitly. + * + * \sa MatrixBase::ternaryExpr(const MatrixBase &, const + * MatrixBase &, const CustomTernaryOp &) const, class CwiseBinaryOp, + * class CwiseUnaryOp, class CwiseNullaryOp + */ +template +class CwiseTernaryOp : public CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>, + internal::no_assignment_operator +{ + public: + typedef typename internal::remove_all::type Arg1; + typedef typename internal::remove_all::type Arg2; + typedef typename internal::remove_all::type Arg3; + + typedef typename CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp) + + typedef typename internal::ref_selector::type Arg1Nested; + typedef typename internal::ref_selector::type Arg2Nested; + typedef typename internal::ref_selector::type Arg3Nested; + typedef typename internal::remove_reference::type _Arg1Nested; + typedef typename internal::remove_reference::type _Arg2Nested; + typedef typename internal::remove_reference::type _Arg3Nested; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2, + const Arg3& a3, + const TernaryOp& func = TernaryOp()) + : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) { + // require the sizes to match + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3) + + // The index types should match + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + + eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() && + a1.rows() == a3.rows() && a1.cols() == a3.cols()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg3.rows(); + else if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg2.rows(); + else + return m_arg1.rows(); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg3.cols(); + else if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg2.cols(); + else + return m_arg1.cols(); + } + + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg1Nested& arg1() const { return m_arg1; } + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg2Nested& arg2() const { return m_arg2; } + /** \returns the third argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg3Nested& arg3() const { return m_arg3; } + /** \returns the functor representing the ternary operation */ + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + protected: + Arg1Nested m_arg1; + Arg2Nested m_arg2; + Arg3Nested m_arg3; + const TernaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseTernaryOpImpl + : public internal::generic_xpr_base< + CwiseTernaryOp >::type { + public: + typedef typename internal::generic_xpr_base< + CwiseTernaryOp >::type Base; +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_TERNARY_OP_H diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 5a38e5f22..c110bbf11 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -34,17 +34,15 @@ static inline void check_DenseIndex_is_signed() { * \tparam Derived is the derived type, e.g., a matrix type or an expression. * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. * * \sa \blank \ref TopicClassHierarchy */ template class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN - : public internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real, - DenseCoeffsBase > -#else : public DenseCoeffsBase +#else + : public DenseCoeffsBase #endif // not EIGEN_PARSED_BY_DOXYGEN { public: @@ -73,10 +71,8 @@ template class DenseBase typedef Scalar value_type; typedef typename NumTraits::Real RealScalar; - typedef internal::special_scalar_op_base > Base; + typedef DenseCoeffsBase Base; - using Base::operator*; - using Base::operator/; using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -562,12 +558,15 @@ template class DenseBase EIGEN_DEVICE_FUNC void reverseInPlace(); #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase +#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) # include "../plugins/BlockMethods.h" # ifdef EIGEN_DENSEBASE_PLUGIN # include EIGEN_DENSEBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS - +#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF // disable the use of evalTo for dense objects with a nice compilation error template diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 340484610..82201d96a 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -67,13 +67,13 @@ struct plain_array template EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; } #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ - eigen_assert((reinterpret_cast(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \ + eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \ && "this assertion is explained here: " \ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ " **** READ THIS WEB PAGE !!! ****"); #else #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ - eigen_assert((reinterpret_cast(array) & (sizemask)) == 0 \ + eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \ && "this assertion is explained here: " \ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ " **** READ THIS WEB PAGE !!! ****"); @@ -362,9 +362,9 @@ template class DenseStorage class DenseStorage class DenseStorage class DenseStorage class DenseStorage class DenseStorage return InverseReturnType(diagonal().cwiseInverse()); } - typedef DiagonalWrapper, const DiagonalVectorType> > ScalarMultipleReturnType; EIGEN_DEVICE_FUNC - inline const ScalarMultipleReturnType + inline const DiagonalWrapper operator*(const Scalar& scalar) const { - return ScalarMultipleReturnType(diagonal() * scalar); + return DiagonalWrapper(diagonal() * scalar); } EIGEN_DEVICE_FUNC - friend inline const ScalarMultipleReturnType + friend inline const DiagonalWrapper operator*(const Scalar& scalar, const DiagonalBase& other) { - return ScalarMultipleReturnType(other.diagonal() * scalar); + return DiagonalWrapper(scalar * other.diagonal()); } }; @@ -317,19 +316,19 @@ struct Diagonal2Dense {}; template<> struct AssignmentKind { typedef Diagonal2Dense Kind; }; // Diagonal matrix to Dense assignment -template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> -struct Assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst.setZero(); dst.diagonal() = src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) { dst.diagonal() += src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) { dst.diagonal() -= src.diagonal(); } }; diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 82d58fc0b..1d7f2262e 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -28,22 +28,24 @@ template struct dot_nocheck { - typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + return a.template binaryExpr(b).sum(); } }; template struct dot_nocheck { - typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.transpose().template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + return a.transpose().template binaryExpr(b).sum(); } }; @@ -62,7 +64,7 @@ struct dot_nocheck template template EIGEN_DEVICE_FUNC -typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType +typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -227,9 +229,12 @@ struct lpNorm_selector template struct lpNorm_selector { + typedef typename NumTraits::Scalar>::Real RealScalar; EIGEN_DEVICE_FUNC - static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) + static inline RealScalar run(const MatrixBase& m) { + if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0)) + return RealScalar(0); return m.cwiseAbs().maxCoeff(); } }; @@ -240,6 +245,8 @@ struct lpNorm_selector * of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ * norm, that is the maximum of the absolute values of the coefficients of \c *this. * + * In all cases, if \c *this is empty, then the value 0 is returned. + * * \note For matrices, this function does not compute the operator-norm. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink. * * \sa norm() diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index ba8e09674..f76995af9 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -138,7 +138,7 @@ template template Derived& DenseBase::operator+=(const EigenBase &other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -146,7 +146,7 @@ template template Derived& DenseBase::operator-=(const EigenBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index f7c5f4276..a8c83f168 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -159,20 +159,20 @@ struct gemv_static_vector_if template struct gemv_static_vector_if { - #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 - internal::plain_array m_data; - EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } - #else - // Some architectures cannot align on the stack, - // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. enum { ForceAlignment = internal::packet_traits::Vectorizable, PacketSize = internal::packet_traits::size }; - internal::plain_array m_data; + #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 + internal::plain_array m_data; + EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } + #else + // Some architectures cannot align on the stack, + // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. + internal::plain_array m_data; EIGEN_STRONG_INLINE Scalar* data() { return ForceAlignment - ? reinterpret_cast((reinterpret_cast(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) + ? reinterpret_cast((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) : m_data.array; } #endif @@ -207,7 +207,7 @@ template<> struct gemv_dense_selector typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef Map, Aligned> MappedDest; + typedef Map, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits::size)> MappedDest; ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 679b22f53..07fe0f005 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -62,6 +62,7 @@ struct default_packet_traits HasRsqrt = 0, HasExp = 0, HasLog = 0, + HasLog1p = 0, HasLog10 = 0, HasPow = 0, @@ -82,6 +83,7 @@ struct default_packet_traits HasErfc = 0, HasIGamma = 0, HasIGammac = 0, + HasBetaInc = 0, HasRound = 0, HasFloor = 0, @@ -304,7 +306,7 @@ template EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a // 32-bit pointer operand constraint for inlined asm asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); #endif -#elif !EIGEN_COMP_MSVC +#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC) __builtin_prefetch(addr); #endif } @@ -346,22 +348,6 @@ template EIGEN_DEVICE_FUNC inline typename unpacket_traits EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) { return a; } -template -struct protate_impl -{ - // Empty so attempts to use this unimplemented path will fail to compile. - // Only specializations of this template should be used. -}; - -/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention, - * by the given offset, e.g. for offset == 1: - * (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1]) - */ -template EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a) -{ - return offset ? protate_impl::run(a) : a; -} - /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ template EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) { @@ -419,6 +405,10 @@ Packet pexp(const Packet& a) { using std::exp; return exp(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { using std::log; return log(a); } +/** \internal \returns the log1p of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog1p(const Packet& a) { return numext::log1p(a); } + /** \internal \returns the log10 of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) { using std::log10; return log10(a); } @@ -445,38 +435,6 @@ Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } -/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } - -/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); } - -/** \internal \returns the zeta function of two arguments (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); } - -/** \internal \returns the polygamma function (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); } - -/** \internal \returns the erf(\a a) (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet perf(const Packet& a) { using numext::erf; return erf(a); } - -/** \internal \returns the erfc(\a a) (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } - -/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ -template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } - -/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */ -template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } - /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 05ba6ddb4..769dc255c 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2010-2012 Gael Guennebaud +// Copyright (C) 2010-2016 Gael Guennebaud // Copyright (C) 2010 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -11,13 +11,30 @@ #ifndef EIGEN_GLOBAL_FUNCTIONS_H #define EIGEN_GLOBAL_FUNCTIONS_H -#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \ +#ifdef EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \ + /** \returns an expression of the coefficient-wise DOC_OP of \a x + + DOC_DETAILS + + \sa Math functions, class CwiseUnaryOp + */ \ + template \ + inline const Eigen::CwiseUnaryOp, const Derived> \ + NAME(const Eigen::ArrayBase& x); + +#else + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \ template \ inline const Eigen::CwiseUnaryOp, const Derived> \ (NAME)(const Eigen::ArrayBase& x) { \ return Eigen::CwiseUnaryOp, const Derived>(x.derived()); \ } +#endif // EIGEN_PARSED_BY_DOXYGEN + #define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \ \ template \ @@ -36,47 +53,68 @@ namespace Eigen { - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(zeta,scalar_zeta_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(polygamma,scalar_polygamma_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\sa ArrayBase::imag) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\sa ArrayBase::conjugate) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\sa ArrayBase::inverse) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\sa ArrayBase::sin) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\sa ArrayBase::cos) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\sa ArrayBase::tan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\sa ArrayBase::atan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\sa ArrayBase::asin) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\sa ArrayBase::acos) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign) + /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent. + * + * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar). + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Derived,Constant > + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent), + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent) { + return x.derived().pow(exponent); + } + template - inline const Eigen::CwiseUnaryOp, const Derived> + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow) pow(const Eigen::ArrayBase& x, const typename Derived::Scalar& exponent) { return x.derived().pow(exponent); } +#endif /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. * @@ -86,12 +124,14 @@ namespace Eigen * Output: \verbinclude Cwise_array_power_array.out * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ template - inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) { - return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( x.derived(), exponents.derived() ); @@ -100,66 +140,39 @@ namespace Eigen /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. * * This function computes the coefficient-wise power between a scalar and an array of exponents. - * Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). * * Example: \include Cwise_scalar_power_array.cpp * Output: \verbinclude Cwise_scalar_power_array.out * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Constant,Derived> + pow(const Scalar& x,const Eigen::ArrayBase& x); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar), + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type + pow(const Scalar& x, const Eigen::ArrayBase& exponents) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); + } + template - inline const Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived> - pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) + inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow) + pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) { - typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x); - return Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived>( - constant_x, - exponents.derived() - ); - } - - /** - * \brief Component-wise division of a scalar by array elements. - **/ - template - inline const Eigen::CwiseUnaryOp, const Derived> - operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase& a) - { - return Eigen::CwiseUnaryOp, const Derived>( - a.derived(), - Eigen::internal::scalar_inverse_mult_op(s) - ); + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); } +#endif - /** \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. - * - * This function computes the coefficient-wise incomplete gamma function. - * - */ - template - inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> - igamma(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) - { - return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( - a.derived(), - x.derived() - ); - } - - /** \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays. - * - * This function computes the coefficient-wise complementary incomplete gamma function. - * - */ - template - inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> - igammac(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) - { - return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( - a.derived(), - x.derived() - ); - } namespace internal { diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h index dfd9097cc..94e00f58b 100644 --- a/Eigen/src/Core/IO.h +++ b/Eigen/src/Core/IO.h @@ -125,31 +125,17 @@ DenseBase::format(const IOFormat& fmt) const namespace internal { -template -struct significant_decimals_default_impl -{ - typedef typename NumTraits::Real RealScalar; - static inline int run() - { - using std::ceil; - using std::log; - return cast(ceil(-log(NumTraits::epsilon())/log(RealScalar(10)))); - } -}; - -template -struct significant_decimals_default_impl -{ - static inline int run() - { - return 0; - } -}; - +// NOTE: This helper is kept for backward compatibility with previous code specializing +// this internal::significant_decimals_impl structure. In the future we should directly +// call digits10() which has been introduced in July 2016 in 3.3. template struct significant_decimals_impl - : significant_decimals_default_impl::IsInteger> -{}; +{ + static inline int run() + { + return NumTraits::digits10(); + } +}; /** \internal * print the matrix \a _m to the output stream \a s using the output format \a fmt */ diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h index f3ec84990..f303aebf9 100644 --- a/Eigen/src/Core/Inverse.h +++ b/Eigen/src/Core/Inverse.h @@ -50,7 +50,7 @@ public: typedef typename internal::ref_selector::type Nested; typedef typename internal::remove_all::type NestedExpression; - explicit Inverse(const XprType &xpr) + explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr) : m_xpr(xpr) {} diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 12c464a5a..020f939ad 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -17,10 +17,20 @@ namespace Eigen { -/** \class MapBase - * \ingroup Core_Module +/** \ingroup Core_Module * - * \brief Base class for Map and Block expression with direct access + * \brief Base class for dense Map and Block expression with direct access + * + * This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense + * Map and Block objects with direct access. + * Typical users do not have to directly deal with this class. + * + * This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN. + * See \link TopicCustomizing_Plugins customizing Eigen \endlink for details. + * + * The \c Derived class has to provide the following two methods describing the memory layout: + * \code Index innerStride() const; \endcode + * \code Index outerStride() const; \endcode * * \sa class Map, class Block */ @@ -75,7 +85,9 @@ template class MapBase typedef typename Base::CoeffReturnType CoeffReturnType; + /** \copydoc DenseBase::rows() */ EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); } + /** \copydoc DenseBase::cols() */ EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); } /** Returns a pointer to the first coefficient of the matrix or vector. @@ -86,12 +98,14 @@ template class MapBase */ EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; } + /** \copydoc PlainObjectBase::coeff(Index,Index) const */ EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const { return m_data[colId * colStride() + rowId * rowStride()]; } + /** \copydoc PlainObjectBase::coeff(Index) const */ EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index index) const { @@ -99,12 +113,14 @@ template class MapBase return m_data[index * innerStride()]; } + /** \copydoc PlainObjectBase::coeffRef(Index,Index) const */ EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return this->m_data[colId * colStride() + rowId * rowStride()]; } + /** \copydoc PlainObjectBase::coeffRef(Index) const */ EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { @@ -112,6 +128,7 @@ template class MapBase return this->m_data[index * innerStride()]; } + /** \internal */ template inline PacketScalar packet(Index rowId, Index colId) const { @@ -119,6 +136,7 @@ template class MapBase (m_data + (colId * colStride() + rowId * rowStride())); } + /** \internal */ template inline PacketScalar packet(Index index) const { @@ -126,6 +144,7 @@ template class MapBase return internal::ploadt(m_data + index * innerStride()); } + /** \internal Constructor for fixed size matrices or vectors */ EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { @@ -133,6 +152,7 @@ template class MapBase checkSanity(); } + /** \internal Constructor for dynamically sized vectors */ EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : m_data(dataPtr), @@ -145,6 +165,7 @@ template class MapBase checkSanity(); } + /** \internal Constructor for dynamically sized matrices */ EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : m_data(dataPtr), m_rows(rows), m_cols(cols) @@ -166,7 +187,7 @@ template class MapBase void checkSanity(typename internal::enable_if<(internal::traits::Alignment>0),void*>::type = 0) const { #if EIGEN_MAX_ALIGN_BYTES>0 - eigen_assert(( ((size_t(m_data) % internal::traits::Alignment) == 0) + eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits::Alignment) == 0) || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits::Alignment ) && "data is not aligned"); #endif } @@ -181,6 +202,16 @@ template class MapBase const internal::variable_if_dynamic m_cols; }; +/** \ingroup Core_Module + * + * \brief Base class for non-const dense Map and Block expression with direct access + * + * This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of + * dense Map and Block objects with direct access. + * It inherits MapBase which defines the const variant for reading specific entries. + * + * \sa class Map, class Block + */ template class MapBase : public MapBase { diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 5771abf7d..8d47fb8a4 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -11,7 +11,9 @@ #define EIGEN_MATHFUNCTIONS_H // source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html -#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406 +// TODO this should better be moved to NumTraits +#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L + namespace Eigen { @@ -95,6 +97,19 @@ struct real_default_impl template struct real_impl : real_default_impl {}; +#ifdef __CUDA_ARCH__ +template +struct real_impl > +{ + typedef T RealScalar; + EIGEN_DEVICE_FUNC + static inline T run(const std::complex& x) + { + return x.real(); + } +}; +#endif + template struct real_retval { @@ -130,6 +145,19 @@ struct imag_default_impl template struct imag_impl : imag_default_impl {}; +#ifdef __CUDA_ARCH__ +template +struct imag_impl > +{ + typedef T RealScalar; + EIGEN_DEVICE_FUNC + static inline T run(const std::complex& x) + { + return x.imag(); + } +}; +#endif + template struct imag_retval { @@ -457,30 +485,33 @@ struct arg_retval /**************************************************************************** * Implementation of log1p * ****************************************************************************/ -template::IsComplex > -struct log1p_impl -{ - static inline Scalar run(const Scalar& x) - { + +namespace std_fallback { + // fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar, + // or that there is no suitable std::log1p function available + template + EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) typedef typename NumTraits::Real RealScalar; EIGEN_USING_STD_MATH(log); Scalar x1p = RealScalar(1) + x; return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); } -}; +} -#if EIGEN_HAS_CXX11_MATH template -struct log1p_impl { +struct log1p_impl { static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + #if EIGEN_HAS_CXX11_MATH using std::log1p; + #endif + using std_fallback::log1p; return log1p(x); } }; -#endif + template struct log1p_retval @@ -492,24 +523,26 @@ struct log1p_retval * Implementation of pow * ****************************************************************************/ -template -struct pow_default_impl +template::IsInteger&&NumTraits::IsInteger> +struct pow_impl { - typedef Scalar retval; - static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) + //typedef Scalar retval; + typedef typename ScalarBinaryOpTraits >::ReturnType result_type; + static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) { EIGEN_USING_STD_MATH(pow); return pow(x, y); } }; -template -struct pow_default_impl +template +struct pow_impl { - static EIGEN_DEVICE_FUNC inline Scalar run(Scalar x, Scalar y) + typedef ScalarX result_type; + static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y) { - Scalar res(1); - eigen_assert(!NumTraits::IsSigned || y >= 0); + ScalarX res(1); + eigen_assert(!NumTraits::IsSigned || y >= 0); if(y & 1) res *= x; y >>= 1; while(y) @@ -522,15 +555,6 @@ struct pow_default_impl } }; -template -struct pow_impl : pow_default_impl::IsInteger> {}; - -template -struct pow_retval -{ - typedef Scalar type; -}; - /**************************************************************************** * Implementation of random * ****************************************************************************/ @@ -620,16 +644,18 @@ struct random_default_impl typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; if(y=x the result converted to an unsigned long is still correct. std::size_t range = ScalarX(y)-ScalarX(x); std::size_t offset = 0; // rejection sampling - std::size_t divisor = (range+RAND_MAX-1)/(range+1); - std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX); - + std::size_t divisor = 1; + std::size_t multiplier = 1; + if(range range); - return Scalar(ScalarX(x) + offset); } @@ -790,6 +816,8 @@ template EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex& template EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x); template EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x); +template T generic_fast_tanh_float(const T& a_x); + } // end namespace internal /**************************************************************************** @@ -825,7 +853,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) { - return fmin(x, y); + return fminf(x, y); } template EIGEN_DEVICE_FUNC @@ -837,7 +865,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) { - return fmax(x, y); + return fmaxf(x, y); } #endif @@ -847,7 +875,7 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); -} +} template EIGEN_DEVICE_FUNC @@ -926,11 +954,19 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } -template +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float log1p(const float &x) { return ::log1pf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double log1p(const double &x) { return ::log1p(x); } +#endif + +template EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) +inline typename internal::pow_impl::result_type pow(const ScalarX& x, const ScalarY& y) { - return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); + return internal::pow_impl::run(x, y); } template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } @@ -1036,6 +1072,16 @@ float abs(const float &x) { return ::fabsf(x); } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double abs(const double &x) { return ::fabs(x); } + +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float abs(const std::complex& x) { + return ::hypotf(x.real(), x.imag()); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double abs(const std::complex& x) { + return ::hypot(x.real(), x.imag()); +} #endif template @@ -1181,6 +1227,11 @@ T tanh(const T &x) { return tanh(x); } +#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tanh(float x) { return internal::generic_fast_tanh_float(x); } +#endif + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(const float &x) { return ::tanhf(x); } @@ -1192,7 +1243,7 @@ double tanh(const double &x) { return ::tanh(x); } template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T fmod(const T& a, const T& b) { - EIGEN_USING_STD_MATH(floor); + EIGEN_USING_STD_MATH(fmod); return fmod(a, b); } @@ -1287,11 +1338,12 @@ template struct scalar_fuzzy_default_impl { typedef typename NumTraits::Real RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) { return numext::abs2(x) <= numext::abs2(y) * prec * prec; } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h new file mode 100644 index 000000000..3c9ef22fa --- /dev/null +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -0,0 +1,78 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATHFUNCTIONSIMPL_H +#define EIGEN_MATHFUNCTIONSIMPL_H + +namespace Eigen { + +namespace internal { + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) + Doesn't do anything fancy, just a 13/6-degree rational interpolant which + is accurate up to a couple of ulp in the range [-9, 9], outside of which + the tanh(x) = +/-1. + + This implementation works on both scalars and packets. +*/ +template +T generic_fast_tanh_float(const T& a_x) +{ + // Clamp the inputs to the range [-9, 9] since anything outside + // this range is +/-1.0f in single-precision. + const T plus_9 = pset1(9.f); + const T minus_9 = pset1(-9.f); + // NOTE GCC prior to 6.3 might improperly optimize this max/min + // step such that if a_x is nan, x will be either 9 or -9, + // and tanh will return 1 or -1 instead of nan. + // This is supposed to be fixed in gcc6.3, + // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 + const T x = pmax(minus_9,pmin(plus_9,a_x)); + // The monomial coefficients of the numerator polynomial (odd). + const T alpha_1 = pset1(4.89352455891786e-03f); + const T alpha_3 = pset1(6.37261928875436e-04f); + const T alpha_5 = pset1(1.48572235717979e-05f); + const T alpha_7 = pset1(5.12229709037114e-08f); + const T alpha_9 = pset1(-8.60467152213735e-11f); + const T alpha_11 = pset1(2.00018790482477e-13f); + const T alpha_13 = pset1(-2.76076847742355e-16f); + + // The monomial coefficients of the denominator polynomial (even). + const T beta_0 = pset1(4.89352518554385e-03f); + const T beta_2 = pset1(2.26843463243900e-03f); + const T beta_4 = pset1(1.18534705686654e-04f); + const T beta_6 = pset1(1.19825839466702e-06f); + + // Since the polynomials are odd/even, we need x^2. + const T x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + T p = pmadd(x2, alpha_13, alpha_11); + p = pmadd(x2, p, alpha_9); + p = pmadd(x2, p, alpha_7); + p = pmadd(x2, p, alpha_5); + p = pmadd(x2, p, alpha_3); + p = pmadd(x2, p, alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial p. + T q = pmadd(x2, beta_6, beta_4); + q = pmadd(x2, q, beta_2); + q = pmadd(x2, q, beta_0); + + // Divide the numerator by the denominator. + return pdiv(p, q); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATHFUNCTIONSIMPL_H diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index bcbbbf9ae..90c336d8c 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -27,7 +27,7 @@ private: default_alignment = compute_default_alignment<_Scalar,max_size>::value, actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, required_alignment = unpacket_traits::alignment, - packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0 + packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0 }; public: @@ -106,7 +106,7 @@ public: * \endcode * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN. * * Some notes: * @@ -268,9 +268,9 @@ class Matrix : Base(internal::constructor_without_unaligned_array_assert()) { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } -#ifdef EIGEN_HAVE_RVALUE_REFERENCES +#if EIGEN_HAS_RVALUE_REFERENCES EIGEN_DEVICE_FUNC - Matrix(Matrix&& other) + Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) : Base(std::move(other)) { Base::_check_template_params(); @@ -278,7 +278,7 @@ class Matrix Base::_set_noalias(other); } EIGEN_DEVICE_FUNC - Matrix& operator=(Matrix&& other) + Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { other.swap(*this); return *this; diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 1e66b4e1b..d56df8249 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -41,7 +41,7 @@ namespace Eigen { * \endcode * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. * * \sa \blank \ref TopicClassHierarchy */ @@ -80,8 +80,6 @@ template class MatrixBase using Base::operator-=; using Base::operator*=; using Base::operator/=; - using Base::operator*; - using Base::operator/; typedef typename Base::CoeffReturnType CoeffReturnType; typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType; @@ -100,7 +98,7 @@ template class MatrixBase /** \returns the size of the main diagonal, which is min(rows(),cols()). * \sa rows(), cols(), SizeAtCompileTime. */ EIGEN_DEVICE_FUNC - inline Index diagonalSize() const { return (std::min)(rows(),cols()); } + inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); } typedef typename Base::PlainObject PlainObject; @@ -123,6 +121,7 @@ template class MatrixBase #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase +#define EIGEN_DOC_UNARY_ADDONS(X,Y) # include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" @@ -131,6 +130,7 @@ template class MatrixBase # include EIGEN_MATRIXBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) @@ -195,7 +195,7 @@ template class MatrixBase template EIGEN_DEVICE_FUNC - typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType + typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType dot(const MatrixBase& other) const; EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; @@ -330,15 +330,11 @@ template class MatrixBase /////////// LU module /////////// - EIGEN_DEVICE_FUNC inline const FullPivLU fullPivLu() const; - EIGEN_DEVICE_FUNC inline const PartialPivLU partialPivLu() const; - EIGEN_DEVICE_FUNC inline const PartialPivLU lu() const; - EIGEN_DEVICE_FUNC inline const Inverse inverse() const; template @@ -383,7 +379,7 @@ template class MatrixBase #ifndef EIGEN_PARSED_BY_DOXYGEN /// \internal helper struct to form the return type of the cross product template struct cross_product_return_type { - typedef typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; typedef Matrix type; }; #endif // EIGEN_PARSED_BY_DOXYGEN @@ -405,7 +401,6 @@ template class MatrixBase inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; - inline ScalarMultipleReturnType operator*(const UniformScaling& s) const; // put this as separate enum value to work around possible GCC 4.3 bug (?) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) : ColsAtCompileTime==1 ? Vertical : Horizontal }; @@ -418,8 +413,7 @@ template class MatrixBase typedef Block::ColsAtCompileTime==1 ? SizeMinusOne : 1, internal::traits::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne; - typedef CwiseUnaryOp::Scalar>, - const ConstStartMinusOne > HNormalizedReturnType; + typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType; inline const HNormalizedReturnType hnormalized() const; diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index ffb673cee..33908010b 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -39,7 +39,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); return m_expression; } @@ -47,7 +47,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); return m_expression; } @@ -55,7 +55,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); return m_expression; } diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index e065fa714..dd61195bc 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -12,6 +12,37 @@ namespace Eigen { +namespace internal { + +// default implementation of digits10(), based on numeric_limits if specialized, +// 0 for integer types, and log10(epsilon()) otherwise. +template< typename T, + bool use_numeric_limits = std::numeric_limits::is_specialized, + bool is_integer = NumTraits::IsInteger> +struct default_digits10_impl +{ + static int run() { return std::numeric_limits::digits10; } +}; + +template +struct default_digits10_impl // Floating point +{ + static int run() { + using std::log10; + using std::ceil; + typedef typename NumTraits::Real Real; + return int(ceil(-log10(NumTraits::epsilon()))); + } +}; + +template +struct default_digits10_impl // Integer +{ + static int run() { return 0; } +}; + +} // end namespace internal + /** \class NumTraits * \ingroup Core_Module * @@ -22,14 +53,16 @@ namespace Eigen { * This class stores enums, typedefs and static methods giving information about a numeric type. * * The provided data consists of: - * \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real, - * then \a Real is just a typedef to \a T. If \a T is \c std::complex then \a Real + * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real, + * then \c Real is just a typedef to \a T. If \a T is \c std::complex then \c Real * is a typedef to \a U. - * \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values, + * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values, * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is * only intended as a helper for code that needs to explicitly promote types. + * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex, Literal is defined as \c U. + * Of course, this type must be fully compatible with \a T. In doubt, just use \a T here. * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what * this means, just use \a T here. * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex @@ -42,10 +75,14 @@ namespace Eigen { * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. - * \li An epsilon() function which, unlike std::numeric_limits::epsilon(), returns a \a Real instead of a \a T. + * \li An epsilon() function which, unlike std::numeric_limits::epsilon(), + * it returns a \a Real instead of a \a T. * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default * value by the fuzzy comparison operators. * \li highest() and lowest() functions returning the highest and lowest possible values respectively. + * \li digits10() function returning the number of decimal digits that can be represented without change. This is + * the analogue of std::numeric_limits::digits10 + * which is used as the default implementation if specialized. */ template struct GenericNumTraits @@ -60,23 +97,6 @@ template struct GenericNumTraits MulCost = 1 }; - // Division is messy but important, because it is expensive and throughput - // varies significantly. The following numbers are based on min division - // throughput on Haswell. - template - struct Div { - enum { -#ifdef EIGEN_VECTORIZE_AVX - AVX = true, -#else - AVX = false, -#endif - Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)): - Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8 - }; - }; - - typedef T Real; typedef typename internal::conditional< IsInteger, @@ -84,12 +104,20 @@ template struct GenericNumTraits T >::type NonInteger; typedef T Nested; + typedef T Literal; EIGEN_DEVICE_FUNC static inline Real epsilon() { return numext::numeric_limits::epsilon(); } + + EIGEN_DEVICE_FUNC + static inline int digits10() + { + return internal::default_digits10_impl::run(); + } + EIGEN_DEVICE_FUNC static inline Real dummy_precision() { @@ -145,6 +173,7 @@ template struct NumTraits > : GenericNumTraits > { typedef _Real Real; + typedef typename NumTraits<_Real>::Literal Literal; enum { IsComplex = 1, RequireInitialization = NumTraits<_Real>::RequireInitialization, @@ -157,6 +186,8 @@ template struct NumTraits > static inline Real epsilon() { return NumTraits::epsilon(); } EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return NumTraits::dummy_precision(); } + EIGEN_DEVICE_FUNC + static inline int digits10() { return NumTraits::digits10(); } }; template @@ -168,6 +199,7 @@ struct NumTraits > typedef typename NumTraits::NonInteger NonIntegerScalar; typedef Array NonInteger; typedef ArrayType & Nested; + typedef typename NumTraits::Literal Literal; enum { IsComplex = NumTraits::IsComplex, @@ -185,6 +217,30 @@ struct NumTraits > static inline RealScalar dummy_precision() { return NumTraits::dummy_precision(); } }; +template<> struct NumTraits + : GenericNumTraits +{ + enum { + RequireInitialization = 1, + ReadCost = HugeCost, + AddCost = HugeCost, + MulCost = HugeCost + }; + + static inline int digits10() { return 0; } + +private: + static inline std::string epsilon(); + static inline std::string dummy_precision(); + static inline std::string lowest(); + static inline std::string highest(); + static inline std::string infinity(); + static inline std::string quiet_NaN(); +}; + +// Empty specialization for void to allow template specialization based on NumTraits::Real with T==void and SFINAE. +template<> struct NumTraits {}; + } // end namespace Eigen #endif // EIGEN_NUMTRAITS_H diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index b7a4fcea8..55b4ac057 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -59,33 +59,34 @@ template struct m } // end namespace internal /** \class PlainObjectBase + * \ingroup Core_Module * \brief %Dense storage base class for matrices and arrays. * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN. * * \sa \ref TopicClassHierarchy */ #ifdef EIGEN_PARSED_BY_DOXYGEN -namespace internal { +namespace doxygen { // this is a workaround to doxygen not being able to understand the inheritance logic // when it is hidden by the dense_xpr_base helper struct. /** This class is just a workaround for Doxygen and it does not not actually exist. */ -template struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase {}; +template struct dense_xpr_base_dispatcher; /** This class is just a workaround for Doxygen and it does not not actually exist. */ template -struct dense_xpr_base_dispatcher_for_doxygen > +struct dense_xpr_base_dispatcher > : public MatrixBase > {}; /** This class is just a workaround for Doxygen and it does not not actually exist. */ template -struct dense_xpr_base_dispatcher_for_doxygen > +struct dense_xpr_base_dispatcher > : public ArrayBase > {}; -} // namespace internal +} // namespace doxygen template -class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen +class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher #else template class PlainObjectBase : public internal::dense_xpr_base::type @@ -145,6 +146,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); } + /** This is an overloaded version of DenseCoeffsBase::coeff(Index,Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeff(Index) const for details. */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const { @@ -154,12 +159,20 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + /** This is an overloaded version of DenseCoeffsBase::coeff(Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeff(Index) const for details. */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const { return m_storage.data()[index]; } + /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index,Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeffRef(Index,Index) const for details. */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId) { @@ -169,12 +182,18 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeffRef(Index) const for details. */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_storage.data()[index]; } + /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index). + * It is provided for convenience. */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const { @@ -184,6 +203,8 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index). + * It is provided for convenience. */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const { @@ -471,15 +492,15 @@ class PlainObjectBase : public internal::dense_xpr_base::type } #endif -#ifdef EIGEN_HAVE_RVALUE_REFERENCES +#if EIGEN_HAS_RVALUE_REFERENCES EIGEN_DEVICE_FUNC - PlainObjectBase(PlainObjectBase&& other) + PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT : m_storage( std::move(other.m_storage) ) { } EIGEN_DEVICE_FUNC - PlainObjectBase& operator=(PlainObjectBase&& other) + PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT { using std::swap; swap(m_storage, other.m_storage); @@ -697,7 +718,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type //_resize_to_match(other); // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // it wouldn't allow to copy a row-vector into a column-vector. - internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); return this->derived(); } @@ -713,11 +734,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if::type* = 0) + EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if::type* = 0) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) - m_storage.data()[0] = val0; - m_storage.data()[1] = val1; + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); } template diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 8aa1de081..ae0c94b38 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -16,39 +16,6 @@ template class Pro namespace internal { -// Determine the scalar of Product. This is normally the same as Lhs::Scalar times -// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor. -template::Shape, - typename RhsShape = typename evaluator_traits::Shape > -struct product_result_scalar -{ - typedef typename scalar_product_traits::ReturnType Scalar; -}; - -template -struct product_result_scalar -{ - typedef typename Rhs::Scalar Scalar; -}; - -template - struct product_result_scalar -{ - typedef typename Lhs::Scalar Scalar; -}; - -template -struct product_result_scalar -{ - typedef typename Rhs::Scalar Scalar; -}; - -template - struct product_result_scalar -{ - typedef typename Lhs::Scalar Scalar; -}; - template struct traits > { @@ -59,7 +26,7 @@ struct traits > typedef MatrixXpr XprKind; - typedef typename product_result_scalar::Scalar Scalar; + typedef typename ScalarBinaryOpTraits::Scalar, typename traits::Scalar>::ReturnType Scalar; typedef typename product_promote_storage_type::ret>::ret StorageKind; diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index d9fd888cf..63faca822 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -35,22 +35,28 @@ struct evaluator > EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} }; -// Catch scalar * ( A * B ) and transform it to (A*scalar) * B +// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" // TODO we should apply that rule only if that's really helpful -template -struct evaluator_assume_aliasing, const Product > > +template +struct evaluator_assume_aliasing, + const CwiseNullaryOp, Plain1>, + const Product > > { static const bool value = true; }; -template -struct evaluator, const Product > > - : public evaluator,const Lhs>, Rhs, DefaultProduct> > +template +struct evaluator, + const CwiseNullaryOp, Plain1>, + const Product > > + : public evaluator > { - typedef CwiseUnaryOp, const Product > XprType; - typedef evaluator,const Lhs>, Rhs, DefaultProduct> > Base; - + typedef CwiseBinaryOp, + const CwiseNullaryOp, Plain1>, + const Product > XprType; + typedef evaluator > Base; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) - : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) + : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {} }; @@ -122,13 +128,17 @@ protected: PlainObject m_result; }; +// The following three shortcuts are enabled only if the scalar types match excatly. +// TODO: we could enable them for different scalar types when the product is not vectorized. + // Dense = Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, - typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +struct Assignment, internal::assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); @@ -137,11 +147,12 @@ struct Assignment, internal::assign_op -struct Assignment, internal::add_assign_op, Dense2Dense, - typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +struct Assignment, internal::add_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::addTo(dst, src.lhs(), src.rhs()); @@ -150,11 +161,12 @@ struct Assignment, internal::add_assign_op< // Dense -= Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::sub_assign_op, Dense2Dense, - typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +struct Assignment, internal::sub_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::subTo(dst, src.lhs(), src.rhs()); @@ -165,55 +177,57 @@ struct Assignment, internal::sub_assign_op< // Dense ?= scalar * Product // TODO we should apply that rule if that's really helpful // for instance, this is not good for inner products -template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> -struct Assignment, - const Product >, AssignFunc, Dense2Dense, Scalar> +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain> +struct Assignment, const CwiseNullaryOp,Plain>, + const Product >, AssignFunc, Dense2Dense> { - typedef CwiseUnaryOp, - const Product > SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) + typedef CwiseBinaryOp, + const CwiseNullaryOp,Plain>, + const Product > SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) { - call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); + call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); } }; //---------------------------------------- // Catch "Dense ?= xpr + Product<>" expression to save one temporary // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct -// TODO enable it for "Dense ?= xpr - Product<>" as well. template -struct evaluator_assume_aliasing, const OtherXpr, +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, const Product >, DenseShape > { static const bool value = true; }; -template -struct assignment_from_xpr_plus_product +template +struct assignment_from_xpr_op_product { - typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const Func1& func) + template + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) { - call_assignment_no_alias(dst, src.lhs(), func); + call_assignment_no_alias(dst, src.lhs(), Func1()); call_assignment_no_alias(dst, src.rhs(), Func2()); } }; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::assign_op, internal::add_assign_op > -{}; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::add_assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::add_assign_op, internal::add_assign_op > -{}; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::sub_assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::sub_assign_op, internal::sub_assign_op > -{}; +#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \ + template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \ + struct Assignment, const OtherXpr, \ + const Product >, internal::ASSIGN_OP, Dense2Dense> \ + : assignment_from_xpr_op_product, internal::ASSIGN_OP, internal::ASSIGN_OP2 > \ + {} + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op); + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op); + //---------------------------------------- template @@ -243,7 +257,7 @@ struct generic_product_impl // Column major result template -EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) +void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) { evaluator rhsEval(rhs); typename nested_eval::type actual_lhs(lhs); @@ -251,12 +265,12 @@ EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, cons // FIXME not very good if rhs is real and lhs complex while alpha is real too const Index cols = dst.cols(); for (Index j=0; j -EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) +void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) { evaluator lhsEval(lhs); typename nested_eval::type actual_rhs(rhs); @@ -264,7 +278,7 @@ EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, cons // FIXME not very good if lhs is real and rhs complex while alpha is real too const Index rows = dst.rows(); for (Index i=0; i @@ -319,19 +333,19 @@ struct generic_product_impl_base typedef typename Product::Scalar Scalar; template - static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } template - static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } template - static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } template - static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } }; @@ -345,7 +359,7 @@ struct generic_product_impl typedef typename internal::conditional::type MatrixType; template - static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { internal::gemv_dense_selector typedef typename Product::Scalar Scalar; template - static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // Same as: dst.noalias() = lhs.lazyProduct(rhs); // but easier on the compiler side - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); } template - static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() += lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); } template - static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() -= lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); } // template @@ -423,6 +437,18 @@ struct product_evaluator, ProductTag, DenseShape, EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); +#if 0 + std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n"; + std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n"; + std::cerr << "LhsAlignment= " << LhsAlignment << "\n"; + std::cerr << "RhsAlignment= " << RhsAlignment << "\n"; + std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n"; + std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n"; + std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n"; + std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n"; + std::cerr << "Alignment= " << Alignment << "\n"; + std::cerr << "Flags= " << Flags << "\n"; +#endif } // Everything below here is taken from CoeffBasedProduct.h @@ -473,15 +499,12 @@ struct product_evaluator, ProductTag, DenseShape, SameType = is_same::value, - CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % RhsVecPacketSize) == 0) ), - - CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % LhsVecPacketSize) == 0) ), + CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1), + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 - : (RhsRowMajor && !CanVectorizeLhs), + : (bool(RhsRowMajor) && !CanVectorizeLhs), Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) @@ -492,8 +515,8 @@ struct product_evaluator, ProductTag, DenseShape, LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), - Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) - : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) + Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) : 0, /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside @@ -519,8 +542,8 @@ struct product_evaluator, ProductTag, DenseShape, */ EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const { - const Index row = RowsAtCompileTime == 1 ? 0 : index; - const Index col = RowsAtCompileTime == 1 ? index : 0; + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } @@ -538,8 +561,8 @@ struct product_evaluator, ProductTag, DenseShape, template const PacketType packet(Index index) const { - const Index row = RowsAtCompileTime == 1 ? 0 : index; - const Index col = RowsAtCompileTime == 1 ? index : 0; + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; return packet(row,col); } @@ -579,7 +602,7 @@ struct etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(pset1(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet(UnrollingIndex-1, col), res); + res = pmadd(pset1(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet(Index(UnrollingIndex-1), col), res); } }; @@ -589,7 +612,7 @@ struct etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(lhs.template packet(row, UnrollingIndex-1), pset1(rhs.coeff(UnrollingIndex-1, col)), res); + res = pmadd(lhs.template packet(row, Index(UnrollingIndex-1)), pset1(rhs.coeff(Index(UnrollingIndex-1), col)), res); } }; @@ -598,7 +621,7 @@ struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { - res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); + res = pmul(pset1(lhs.coeff(row, Index(0))),rhs.template packet(Index(0), col)); } }; @@ -607,7 +630,7 @@ struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { - res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); + res = pmul(lhs.template packet(row, Index(0)), pset1(rhs.coeff(Index(0), col))); } }; @@ -616,7 +639,7 @@ struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) { - res = pset1(0); + res = pset1(typename unpacket_traits::type(0)); } }; @@ -625,7 +648,7 @@ struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) { - res = pset1(0); + res = pset1(typename unpacket_traits::type(0)); } }; @@ -634,7 +657,7 @@ struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - res = pset1(0); + res = pset1(typename unpacket_traits::type(0)); for(Index i = 0; i < innerDim; ++i) res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); } @@ -645,7 +668,7 @@ struct etor_product_packet_impl { static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - res = pset1(0); + res = pset1(typename unpacket_traits::type(0)); for(Index i = 0; i < innerDim; ++i) res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); } @@ -730,7 +753,7 @@ template { - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; public: enum { CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, diff --git a/Eigen/src/Core/Random.h b/Eigen/src/Core/Random.h index 02038e9e3..6faf789c7 100644 --- a/Eigen/src/Core/Random.h +++ b/Eigen/src/Core/Random.h @@ -16,8 +16,7 @@ namespace internal { template struct scalar_random_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op) - template - inline const Scalar operator() (Index, Index = 0) const { return random(); } + inline const Scalar operator() () const { return random(); } }; template diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 98b2fd868..b6e8f8887 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -38,8 +38,8 @@ public: enum { MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) && (functor_traits::PacketAccess), - MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit), - MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize + MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit), + MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize }; public: @@ -425,7 +425,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { - return derived().redux(Eigen::internal::scalar_min_op()); + return derived().redux(Eigen::internal::scalar_min_op()); } /** \returns the maximum of all coefficients of \c *this. @@ -435,10 +435,12 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { - return derived().redux(Eigen::internal::scalar_max_op()); + return derived().redux(Eigen::internal::scalar_max_op()); } -/** \returns the sum of all coefficients of *this +/** \returns the sum of all coefficients of \c *this + * + * If \c *this is empty, then the value 0 is returned. * * \sa trace(), prod(), mean() */ @@ -448,7 +450,7 @@ DenseBase::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(0); - return derived().redux(Eigen::internal::scalar_sum_op()); + return derived().redux(Eigen::internal::scalar_sum_op()); } /** \returns the mean of all coefficients of *this @@ -459,7 +461,14 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::mean() const { - return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif } /** \returns the product of all coefficients of *this diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 6e94181f3..bdf24f52a 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -35,7 +35,13 @@ struct traits > || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), OuterStrideMatch = Derived::IsVectorAtCompileTime || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), - AlignmentMatch = (int(traits::Alignment)==int(Unaligned)) || (int(evaluator::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment + // NOTE, this indirection of evaluator::Alignment is needed + // to workaround a very strange bug in MSVC related to the instantiation + // of has_*ary_operator in evaluator. + // This line is surprisingly very sensitive. For instance, simply adding parenthesis + // as "DerivedAlignment = (int(evaluator::Alignment))," will make MSVC fail... + DerivedAlignment = int(evaluator::Alignment), + AlignmentMatch = (int(traits::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment ScalarTypeMatch = internal::is_same::value, MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch }; @@ -262,7 +268,7 @@ template class Ref< template EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { - internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); + internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); Base::construct(m_object); } diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 9fda02691..62d4180da 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -55,6 +55,7 @@ template class SelfAdjointView typedef TriangularBase Base; typedef typename internal::traits::MatrixTypeNested MatrixTypeNested; typedef typename internal::traits::MatrixTypeNestedCleaned MatrixTypeNestedCleaned; + typedef MatrixTypeNestedCleaned NestedExpression; /** \brief The type of coefficients in this matrix */ typedef typename internal::traits::Scalar Scalar; @@ -128,7 +129,7 @@ template class SelfAdjointView } friend EIGEN_DEVICE_FUNC - const SelfAdjointView,MatrixType>,UpLo> + const SelfAdjointView operator*(const Scalar& s, const SelfAdjointView& mat) { return (s*mat.nestedExpression()).template selfadjointView(); @@ -162,6 +163,41 @@ template class SelfAdjointView EIGEN_DEVICE_FUNC SelfAdjointView& rankUpdate(const MatrixBase& u, const Scalar& alpha = Scalar(1)); + /** \returns an expression of a triangular view extracted from the current selfadjoint view of a given triangular part + * + * The parameter \a TriMode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper, + * \c #Lower, \c #StrictlyLower, \c #UnitLower. + * + * If \c TriMode references the same triangular part than \c *this, then this method simply return a \c TriangularView of the nested expression, + * otherwise, the nested expression is first transposed, thus returning a \c TriangularView> object. + * + * \sa MatrixBase::triangularView(), class TriangularView + */ + template + EIGEN_DEVICE_FUNC + typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), + TriangularView, + TriangularView >::type + triangularView() const + { + typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType>::type tmp1(m_matrix); + typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType>::type tmp2(tmp1); + return typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), + TriangularView, + TriangularView >::type(tmp2); + } + + /** \returns a const expression of the main diagonal of the matrix \c *this + * + * This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator. + * + * \sa MatrixBase::diagonal(), class Diagonal */ + EIGEN_DEVICE_FUNC + typename MatrixType::ConstDiagonalReturnType diagonal() const + { + return typename MatrixType::ConstDiagonalReturnType(m_matrix); + } + /////////// Cholesky module /////////// const LLT llt() const; diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 78fff1549..719ed72a5 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -12,11 +12,13 @@ namespace Eigen { +// TODO generalize the scalar type of 'other' + template EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); return derived(); } @@ -24,7 +26,7 @@ template EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); return derived(); } @@ -32,7 +34,7 @@ template EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); return derived(); } @@ -40,7 +42,7 @@ template EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); return derived(); } diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index ba2ee53b8..8fc69c4b8 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -134,10 +134,10 @@ protected: // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense> { typedef Solve SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? src.dec()._solve_impl(src.rhs(), dst); @@ -146,10 +146,10 @@ struct Assignment, internal::assign_op -struct Assignment,RhsType>, internal::assign_op, Dense2Dense, Scalar> +struct Assignment,RhsType>, internal::assign_op, Dense2Dense> { typedef Solve,RhsType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); } @@ -157,10 +157,11 @@ struct Assignment,RhsType>, internal: // Specialization for "dst = dec.adjoint().solve(rhs)" template -struct Assignment, const Transpose >,RhsType>, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, const Transpose >,RhsType>, + internal::assign_op, Dense2Dense> { typedef Solve, const Transpose >,RhsType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); } diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index a33356423..96d3dde50 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -169,7 +169,7 @@ void TriangularViewImpl::solveInPlace(const MatrixBase::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime }; + enum { copy = (internal::traits::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1}; typedef typename internal::conditional::type, OtherDerived&>::type OtherCopy; OtherCopy otherCopy(other); diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 5c5e5028e..e9606ec33 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -367,14 +367,14 @@ template class TriangularViewImpl<_Mat template EIGEN_DEVICE_FUNC TriangularViewType& operator+=(const DenseBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); return derived(); } /** \sa MatrixBase::operator-=() */ template EIGEN_DEVICE_FUNC TriangularViewType& operator-=(const DenseBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -552,7 +552,7 @@ template inline TriangularView& TriangularViewImpl::operator=(const MatrixBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op()); return derived(); } @@ -794,7 +794,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr enum { unroll = DstXprType::SizeAtCompileTime != Dynamic && SrcEvaluatorType::CoeffReadCost < HugeCost - && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT + && DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT }; triangular_assignment_loop::run(kernel); @@ -804,7 +804,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) { - call_triangular_assignment_loop(dst, src, internal::assign_op()); + call_triangular_assignment_loop(dst, src, internal::assign_op()); } template<> struct AssignmentKind { typedef Triangular2Triangular Kind; }; @@ -812,8 +812,8 @@ template<> struct AssignmentKind { typedef Tria template<> struct AssignmentKind { typedef Dense2Triangular Kind; }; -template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> -struct Assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment { EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { @@ -823,8 +823,8 @@ struct Assignment -struct Assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment { EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { @@ -832,8 +832,8 @@ struct Assignment } }; -template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> -struct Assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment { EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { @@ -933,10 +933,10 @@ namespace internal { // Triangular = Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::assign_op::Scalar>, Dense2Triangular> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.setZero(); dst._assignProduct(src, 1); @@ -945,10 +945,10 @@ struct Assignment, internal::assign_ // Triangular += Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::add_assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::add_assign_op::Scalar>, Dense2Triangular> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { dst._assignProduct(src, 1); } @@ -956,10 +956,10 @@ struct Assignment, internal::add_ass // Triangular -= Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::sub_assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::sub_assign_op::Scalar>, Dense2Triangular> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { dst._assignProduct(src, -1); } diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 193891189..dd382e990 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -284,6 +284,7 @@ template class VectorwiseOp typedef typename ReturnType::Type AnyReturnType; typedef PartialReduxExpr, Direction> CountReturnType; typedef typename ReturnType::Type ProdReturnType; + typedef Reverse ConstReverseReturnType; typedef Reverse ReverseReturnType; template struct LpNormReturnType { @@ -456,7 +457,15 @@ template class VectorwiseOp * * \sa DenseBase::reverse() */ EIGEN_DEVICE_FUNC - const ReverseReturnType reverse() const + const ConstReverseReturnType reverse() const + { return ConstReverseReturnType( _expression() ); } + + /** \returns a writable matrix expression + * where each column (or row) are reversed. + * + * \sa reverse() const */ + EIGEN_DEVICE_FUNC + ReverseReturnType reverse() { return ReverseReturnType( _expression() ); } typedef Replicate ReplicateReturnType; @@ -540,7 +549,7 @@ template class VectorwiseOp /** Returns the expression of the sum of the vector \a other to each subvector of \c *this */ template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC - CwiseBinaryOp, + CwiseBinaryOp, const ExpressionTypeNestedCleaned, const typename ExtendedType::Type> operator+(const DenseBase& other) const @@ -553,7 +562,7 @@ template class VectorwiseOp /** Returns the expression of the difference between each subvector of \c *this and the vector \a other */ template EIGEN_DEVICE_FUNC - CwiseBinaryOp, + CwiseBinaryOp, const ExpressionTypeNestedCleaned, const typename ExtendedType::Type> operator-(const DenseBase& other) const diff --git a/Eigen/src/Core/arch/AVX/CMakeLists.txt b/Eigen/src/Core/arch/AVX/CMakeLists.txt deleted file mode 100644 index bdb71ab99..000000000 --- a/Eigen/src/Core/arch/AVX/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_AVX_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_AVX_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AVX COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 98d8e029f..d21ec39dd 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -266,52 +266,10 @@ pexp(const Packet8f& _x) { } // Hyperbolic Tangent function. -// Doesn't do anything fancy, just a 13/6-degree rational interpolant which -// is accurate up to a couple of ulp in the range [-9, 9], outside of which the -// fl(tanh(x)) = +/-1. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f -ptanh(const Packet8f& _x) { - // Clamp the inputs to the range [-9, 9] since anything outside - // this range is +/-1.0f in single-precision. - _EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f); - _EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f); - const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x)); - - // The monomial coefficients of the numerator polynomial (odd). - _EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f); - - // The monomial coefficients of the denominator polynomial (even). - _EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f); - _EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f); - _EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f); - _EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f); - - // Since the polynomials are odd/even, we need x^2. - const Packet8f x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. - Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11); - p = pmadd(x2, p, p8f_alpha_9); - p = pmadd(x2, p, p8f_alpha_7); - p = pmadd(x2, p, p8f_alpha_5); - p = pmadd(x2, p, p8f_alpha_3); - p = pmadd(x2, p, p8f_alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial p. - Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4); - q = pmadd(x2, q, p8f_beta_2); - q = pmadd(x2, q, p8f_beta_0); - - // Divide the numerator by the denominator. - return pdiv(p, q); +ptanh(const Packet8f& x) { + return internal::generic_fast_tanh_float(x); } template <> diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index ba2a6c1e1..beb3e577d 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -97,6 +97,9 @@ template<> struct packet_traits : default_packet_traits }; #endif +template<> struct scalar_div_cost { enum { value = 14 }; }; +template<> struct scalar_div_cost { enum { value = 16 }; }; + /* Proper support for integers is only provided by AVX2. In the meantime, we'll use SSE instructions and packets to deal with integers. template<> struct packet_traits : default_packet_traits @@ -156,7 +159,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv(const Packet8i& /*a*/, co #ifdef __FMA__ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { -#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG +#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) ) // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, // and gcc stupidly generates a vfmadd132ps instruction, // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate @@ -169,7 +172,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& #endif } template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { -#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG +#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) ) // see above Packet4d res = c; __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); diff --git a/Eigen/src/Core/arch/AltiVec/CMakeLists.txt b/Eigen/src/Core/arch/AltiVec/CMakeLists.txt deleted file mode 100644 index 9f8d2e9c4..000000000 --- a/Eigen/src/Core/arch/AltiVec/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_AltiVec_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_AltiVec_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 58c296171..45213f791 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2010 Gael Guennebaud +// Copyright (C) 2010-2016 Konstantinos Margaritis // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,18 +16,20 @@ namespace Eigen { namespace internal { static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; -#ifdef _BIG_ENDIAN +#ifdef __VSX__ +#if defined(_BIG_ENDIAN) static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; #else static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; #endif +#endif //---------- float ---------- struct Packet2cf { - EIGEN_STRONG_INLINE Packet2cf() {} + EIGEN_STRONG_INLINE explicit Packet2cf() : v(p4f_ZERO) {} EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} Packet4f v; }; @@ -39,6 +42,7 @@ template<> struct packet_traits > : default_packet_traits Vectorizable = 1, AlignedOnScalar = 1, size = 2, + HasHalfPacket = 0, HasAdd = 1, HasSub = 1, @@ -49,6 +53,9 @@ template<> struct packet_traits > : default_packet_traits HasAbs2 = 0, HasMin = 0, HasMax = 0, +#ifdef __VSX__ + HasBlend = 1, +#endif HasSetLinear = 0 }; }; @@ -58,7 +65,6 @@ template<> struct unpacket_traits { typedef std::complex type; template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { Packet2cf res; - /* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */ if((ptrdiff_t(&from) % 16) == 0) res.v = pload((const float *)&from); else @@ -67,26 +73,32 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex EIGEN_STRONG_INLINE Packet2cf pload(const std::complex* from) { return Packet2cf(pload((const float *) from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { return Packet2cf(ploadu((const float*) from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) { return pset1(*from); } + +template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { pstore((float*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { pstoreu((float*)to, from.v); } + template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packet2cf>(const std::complex* from, Index stride) { std::complex EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; - return Packet2cf(vec_ld(0, (const float*)af)); + return pload(af); } template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::complex* to, const Packet2cf& from, Index stride) { std::complex EIGEN_ALIGN16 af[2]; - vec_st(from.v, 0, (float*)af); + pstore >((std::complex *) af, from); to[0*stride] = af[0]; to[1*stride] = af[1]; } - -template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); } +template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); } template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); } +template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor(a.v, reinterpret_cast(p4ui_CONJ_XOR))); } template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { @@ -100,30 +112,19 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, con v1 = vec_madd(v1, b.v, p4f_ZERO); // multiply a_im * b and get the conjugate result v2 = vec_madd(v2, b.v, p4f_ZERO); - v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR); + v2 = reinterpret_cast(pxor(v2, reinterpret_cast(p4ui_CONJ_XOR))); // permute back to a proper order v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV); - return Packet2cf(vec_add(v1, v2)); + return Packet2cf(padd(v1, v2)); } -template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); } +template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand(a.v, b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por(a.v, b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor(a.v, b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot(a.v, b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } -template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } - -template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) -{ - return pset1(*from); -} - -template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } -template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } - -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_PPC_PREFETCH(addr); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) { @@ -143,23 +144,23 @@ template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) template<> EIGEN_STRONG_INLINE std::complex predux(const Packet2cf& a) { Packet4f b; - b = (Packet4f) vec_sld(a.v, a.v, 8); - b = padd(a.v, b); - return pfirst(Packet2cf(b)); + b = vec_sld(a.v, a.v, 8); + b = padd(a.v, b); + return pfirst(Packet2cf(b)); } template<> EIGEN_STRONG_INLINE Packet2cf preduxp(const Packet2cf* vecs) { Packet4f b1, b2; #ifdef _BIG_ENDIAN - b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); - b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); + b1 = vec_sld(vecs[0].v, vecs[1].v, 8); + b2 = vec_sld(vecs[1].v, vecs[0].v, 8); #else - b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); - b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); + b1 = vec_sld(vecs[1].v, vecs[0].v, 8); + b2 = vec_sld(vecs[0].v, vecs[1].v, 8); #endif - b2 = (Packet4f) vec_sld(b2, b2, 8); - b2 = padd(b1, b2); + b2 = vec_sld(b2, b2, 8); + b2 = padd(b1, b2); return Packet2cf(b2); } @@ -168,10 +169,10 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const P { Packet4f b; Packet2cf prod; - b = (Packet4f) vec_sld(a.v, a.v, 8); - prod = pmul(a, Packet2cf(b)); + b = vec_sld(a.v, a.v, 8); + prod = pmul(a, Packet2cf(b)); - return pfirst(prod); + return pfirst(prod); } template @@ -223,12 +224,30 @@ template<> struct conj_helper } }; +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const + { return Packet2cf(internal::pmul(x, y.v)); } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const + { return Packet2cf(internal::pmul(x.v, y)); } +}; + template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for AltiVec - Packet2cf res = conj_helper().pmul(a,b); - Packet4f s = vec_madd(b.v, b.v, p4f_ZERO); - return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV)))); + Packet2cf res = conj_helper().pmul(a, b); + Packet4f s = pmul(b.v, b.v); + return Packet2cf(pdiv(res.v, padd(s, vec_perm(s, s, p16uc_COMPLEX32_REV)))); } template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x) @@ -243,6 +262,14 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) kernel.packet[0].v = tmp; } +#ifdef __VSX__ +template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { + Packet2cf result; + result.v = reinterpret_cast(pblend(ifPacket, reinterpret_cast(thenPacket.v), reinterpret_cast(elsePacket.v))); + return result; +} +#endif + //---------- double ---------- #ifdef __VSX__ struct Packet1cd @@ -277,10 +304,10 @@ template<> struct packet_traits > : default_packet_traits template<> struct unpacket_traits { typedef std::complex type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; }; -template<> EIGEN_STRONG_INLINE Packet1cd pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload((const double*)from)); } -template<> EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu((const double*)from)); } -template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } -template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } +template<> EIGEN_STRONG_INLINE Packet1cd pload (const std::complex* from) { return Packet1cd(pload((const double*)from)); } +template<> EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from) { return Packet1cd(ploadu((const double*)from)); } +template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { pstore((double*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { pstoreu((double*)to, from.v); } template<> EIGEN_STRONG_INLINE Packet1cd pset1(const std::complex& from) { /* here we really have to use unaligned loads :( */ return ploadu(&from); } @@ -300,10 +327,10 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet1c to[1*stride] = af[1]; } -template<> EIGEN_STRONG_INLINE Packet1cd padd(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_add(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd psub(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_sub(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd padd(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); } +template<> EIGEN_STRONG_INLINE Packet1cd psub(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); } template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } -template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); } +template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast(p2ul_CONJ_XOR2))); } template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { @@ -317,23 +344,20 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, con v1 = vec_madd(a_re, b.v, p2d_ZERO); // multiply a_im * b and get the conjugate result v2 = vec_madd(a_im, b.v, p2d_ZERO); - v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8); - v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1); + v2 = reinterpret_cast(vec_sld(reinterpret_cast(v2), reinterpret_cast(v2), 8)); + v2 = pxor(v2, reinterpret_cast(p2ul_CONJ_XOR1)); - return Packet1cd(vec_add(v1, v2)); + return Packet1cd(padd(v1, v2)); } -template<> EIGEN_STRONG_INLINE Packet1cd pand (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd por (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd pxor (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd pandnot(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); } +template<> EIGEN_STRONG_INLINE Packet1cd pand (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd por (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pxor (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pandnot(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* from) -{ - return pset1(*from); -} +template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* from) { return pset1(*from); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { vec_dstt((long *)addr, DST_CTRL(2,2,32), DST_CHAN); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_PPC_PREFETCH(addr); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet1cd& a) { @@ -345,20 +369,10 @@ template<> EIGEN_STRONG_INLINE std::complex pfirst(const Pac template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } -template<> EIGEN_STRONG_INLINE std::complex predux(const Packet1cd& a) -{ - return pfirst(a); -} +template<> EIGEN_STRONG_INLINE std::complex predux(const Packet1cd& a) { return pfirst(a); } +template<> EIGEN_STRONG_INLINE Packet1cd preduxp(const Packet1cd* vecs) { return vecs[0]; } -template<> EIGEN_STRONG_INLINE Packet1cd preduxp(const Packet1cd* vecs) -{ - return vecs[0]; -} - -template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& a) -{ - return pfirst(a); -} +template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& a) { return pfirst(a); } template struct palign_impl @@ -402,13 +416,30 @@ template<> struct conj_helper return pconj(internal::pmul(a, b)); } }; +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const + { return Packet1cd(internal::pmul(x, y.v)); } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const + { return Packet1cd(internal::pmul(x.v, y)); } +}; template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for AltiVec Packet1cd res = conj_helper().pmul(a,b); - Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_); - return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_REVERSE64)))); + Packet2d s = pmul(b.v, b.v); + return Packet1cd(pdiv(res.v, padd(s, vec_perm(s, s, p16uc_REVERSE64)))); } EIGEN_STRONG_INLINE Packet1cd pcplxflip/**/(const Packet1cd& x) diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index 9e37e93f8..5511245dd 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -3,6 +3,7 @@ // // Copyright (C) 2007 Julien Pommier // Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2016 Konstantinos Margaritis // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -19,38 +20,79 @@ namespace Eigen { namespace internal { +static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); +static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); +static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); +static _EIGEN_DECLARE_CONST_Packet4i(23, 23); + +static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000); + +/* the smallest non denormalized float number */ +static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000); +static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f +static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff); + +/* natural logarithm computed for 4 simultaneous float + return NaN for x <= 0 +*/ +static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f); + +static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f); +static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f); + +static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f); + +static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f); +static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f); + +#ifdef __VSX__ +static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0); +static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0); +static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5); + +static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437); +static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303); + +static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599); + +static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4); +static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2); +static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1); + +static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6); +static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3); +static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1); +static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0); + +static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125); +static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6); + +static Packet2l p2l_1023 = { 1023, 1023 }; +static Packet2ul p2ul_52 = { 52, 52 }; + +#endif + template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f plog(const Packet4f& _x) { Packet4f x = _x; - _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); - _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); - _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); - _EIGEN_DECLARE_CONST_Packet4i(23, 23); - - _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000); - - /* the smallest non denormalized float number */ - _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000); - _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f - _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff); - - /* natural logarithm computed for 4 simultaneous float - return NaN for x <= 0 - */ - _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f); - Packet4i emm0; @@ -112,36 +154,17 @@ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pexp(const Packet4f& _x) { Packet4f x = _x; - _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); - _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); - _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); - _EIGEN_DECLARE_CONST_Packet4i(23, 23); - - - _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f); - _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f); - - _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f); - - _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f); - _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f); Packet4f tmp, fx; Packet4i emm0; // clamp x - x = vec_max(vec_min(x, p4f_exp_hi), p4f_exp_lo); + x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo); - /* express exp(x) as exp(g + n*log(2)) */ + // express exp(x) as exp(g + n*log(2)) fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half); - fx = vec_floor(fx); + fx = pfloor(fx); tmp = pmul(fx, p4f_cephes_exp_C1); Packet4f z = pmul(fx, p4f_cephes_exp_C2); @@ -171,14 +194,44 @@ Packet4f pexp(const Packet4f& _x) isnumber_mask); } +#ifndef EIGEN_COMP_CLANG +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f prsqrt(const Packet4f& x) +{ + return vec_rsqrt(x); +} +#endif + #ifdef __VSX__ +#ifndef EIGEN_COMP_CLANG +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet2d prsqrt(const Packet2d& x) +{ + return vec_rsqrt(x); +} +#endif + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f psqrt(const Packet4f& x) +{ + return vec_sqrt(x); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet2d psqrt(const Packet2d& x) +{ + return vec_sqrt(x); +} + // VSX support varies between different compilers and even different // versions of the same compiler. For gcc version >= 4.9.3, we can use // vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use // a slow version that works with older compilers. +// Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles +// are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963 static inline Packet2l ConvertToPacket2l(const Packet2d& x) { -#if EIGEN_GNUC_AT_LEAST(5, 0) || \ - (EIGEN_GNUC_AT(4, 9) && __GNUC_PATCHLEVEL__ >= 3) +#if EIGEN_GNUC_AT_LEAST(5, 4) || \ + (EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1) return vec_cts(x, 0); // TODO: check clang version. #else double tmp[2]; @@ -194,36 +247,16 @@ Packet2d pexp(const Packet2d& _x) { Packet2d x = _x; - _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0); - _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0); - _EIGEN_DECLARE_CONST_Packet2d(half, 0.5); - - _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437); - _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303); - - _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599); - - _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4); - _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2); - _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1); - - _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6); - _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3); - _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1); - _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0); - - _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125); - _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6); - Packet2d tmp, fx; Packet2l emm0; // clamp x x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo); - /* express exp(x) as exp(g + n*log(2)) */ - fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half); - fx = vec_floor(fx); + /* express exp(x) as exp(g + n*log(2)) */ + fx = pmadd(x, p2d_cephes_LOG2EF, p2d_half); + + fx = pfloor(fx); tmp = pmul(fx, p2d_cephes_exp_C1); Packet2d z = pmul(fx, p2d_cephes_exp_C2); @@ -249,9 +282,6 @@ Packet2d pexp(const Packet2d& _x) emm0 = ConvertToPacket2l(fx); #ifdef __POWER8_VECTOR__ - static const Packet2l p2l_1023 = { 1023, 1023 }; - static const Packet2ul p2ul_52 = { 52, 52 }; - emm0 = vec_add(emm0, p2l_1023); emm0 = vec_sl(emm0, p2ul_52); #else diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 0dbbc2e42..cbfef3503 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Konstantinos Margaritis +// Copyright (C) 2008-2016 Konstantinos Margaritis // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -42,7 +42,7 @@ typedef __vector unsigned char Packet16uc; // and it doesn't really work to declare them global, so we define macros instead #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \ - Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X) + Packet4f p4f_##NAME = reinterpret_cast(vec_splat_s32(X)) #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \ Packet4i p4i_##NAME = vec_splat_s32(X) @@ -69,13 +69,13 @@ typedef __vector unsigned char Packet16uc; // These constants are endian-agnostic static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0} static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,} -#ifndef __VSX__ static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1} -static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0} -#endif static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16} static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1} static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000} +#ifndef __VSX__ +static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0} +#endif static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 }; static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 }; @@ -95,8 +95,10 @@ static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 }; // Handle endianness properly while loading constants // Define global static constants: #ifdef _BIG_ENDIAN -static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); +static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); +#ifdef __VSX__ static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; +#endif static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; @@ -110,8 +112,8 @@ static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; -static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; -static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; +static Packet16uc p16uc_TRANSPOSE64_HI = p16uc_PSET64_HI + p16uc_HALF64_0_16; //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; +static Packet16uc p16uc_TRANSPOSE64_LO = p16uc_PSET64_LO + p16uc_HALF64_0_16; //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; @@ -121,6 +123,12 @@ static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8 static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; #endif // _BIG_ENDIAN +#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC + #define EIGEN_PPC_PREFETCH(ADDR) __builtin_prefetch(ADDR); +#else + #define EIGEN_PPC_PREFETCH(ADDR) asm( " dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); +#endif + template<> struct packet_traits : default_packet_traits { typedef Packet4f type; @@ -129,15 +137,35 @@ template<> struct packet_traits : default_packet_traits Vectorizable = 1, AlignedOnScalar = 1, size=4, - HasHalfPacket=0, + HasHalfPacket = 1, - // FIXME check the Has* + HasAdd = 1, + HasSub = 1, + HasMul = 1, HasDiv = 1, + HasMin = 1, + HasMax = 1, + HasAbs = 1, HasSin = 0, HasCos = 0, - HasLog = 1, + HasLog = 0, HasExp = 1, - HasSqrt = 0 +#ifdef __VSX__ + HasSqrt = 1, +#if !EIGEN_COMP_CLANG + HasRsqrt = 1, +#else + HasRsqrt = 0, +#endif +#else + HasSqrt = 0, + HasRsqrt = 0, +#endif + HasRound = 1, + HasFloor = 1, + HasCeil = 1, + HasNegate = 1, + HasBlend = 1 }; }; template<> struct packet_traits : default_packet_traits @@ -145,10 +173,16 @@ template<> struct packet_traits : default_packet_traits typedef Packet4i type; typedef Packet4i half; enum { - // FIXME check the Has* Vectorizable = 1, AlignedOnScalar = 1, - size=4 + size = 4, + HasHalfPacket = 0, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 0, + HasBlend = 1 }; }; @@ -200,41 +234,56 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v) s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; return s; } -/* -inline std::ostream & operator <<(std::ostream & s, const Packetbi & v) -{ - union { - Packet4bi v; - unsigned int n[4]; - } vt; - vt.v = v; - s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; - return s; -}*/ - // Need to define them first or we get specialization after instantiation errors -template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } -template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } +template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) +{ + EIGEN_DEBUG_ALIGNED_LOAD +#ifdef __VSX__ + return vec_vsx_ld(0, from); +#else + return vec_ld(0, from); +#endif +} -template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } -template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } +template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) +{ + EIGEN_DEBUG_ALIGNED_LOAD +#ifdef __VSX__ + return vec_vsx_ld(0, from); +#else + return vec_ld(0, from); +#endif +} + +template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) +{ + EIGEN_DEBUG_ALIGNED_STORE +#ifdef __VSX__ + vec_vsx_st(from, 0, to); +#else + vec_st(from, 0, to); +#endif +} + +template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) +{ + EIGEN_DEBUG_ALIGNED_STORE +#ifdef __VSX__ + vec_vsx_st(from, 0, to); +#else + vec_st(from, 0, to); +#endif +} template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { - // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html - float EIGEN_ALIGN16 af[4]; - af[0] = from; - Packet4f vc = pload(af); - vc = vec_splat(vc, 0); - return vc; + Packet4f v = {from, from, from, from}; + return v; } template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { - int EIGEN_ALIGN16 ai[4]; - ai[0] = from; - Packet4i vc = pload(ai); - vc = vec_splat(vc, 0); - return vc; + Packet4i v = {from, from, from, from}; + return v; } template<> EIGEN_STRONG_INLINE void pbroadcast4(const float *a, @@ -294,58 +343,24 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const to[3*stride] = ai[3]; } -template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return vec_add(pset1(a), p4f_COUNTDOWN); } -template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return vec_add(pset1(a), p4i_COUNTDOWN); } +template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return pset1(a) + p4f_COUNTDOWN; } +template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return pset1(a) + p4i_COUNTDOWN; } -template<> EIGEN_STRONG_INLINE Packet4f padd(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i padd(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f padd(const Packet4f& a, const Packet4f& b) { return a + b; } +template<> EIGEN_STRONG_INLINE Packet4i padd(const Packet4i& a, const Packet4i& b) { return a + b; } -template<> EIGEN_STRONG_INLINE Packet4f psub(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i psub(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f psub(const Packet4f& a, const Packet4f& b) { return a - b; } +template<> EIGEN_STRONG_INLINE Packet4i psub(const Packet4i& a, const Packet4i& b) { return a - b; } -template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub(p4f_ZERO, a); } -template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub(p4i_ZERO, a); } +template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; } +template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; } template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } -template<> EIGEN_STRONG_INLINE Packet4f pmul(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,p4f_ZERO); } -/* Commented out: it's actually slower than processing it scalar - * -template<> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const Packet4i& b) -{ - // Detailed in: http://freevec.org/content/32bit_signed_integer_multiplication_altivec - //Set up constants, variables - Packet4i a1, b1, bswap, low_prod, high_prod, prod, prod_, v1sel; +template<> EIGEN_STRONG_INLINE Packet4f pmul(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_ZERO); } +template<> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const Packet4i& b) { return a * b; } - // Get the absolute values - a1 = vec_abs(a); - b1 = vec_abs(b); - - // Get the signs using xor - Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), p4i_ZERO); - - // Do the multiplication for the asbolute values. - bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) p4i_MINUS16 ); - low_prod = vec_mulo((Packet8i) a1, (Packet8i)b1); - high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, p4i_ZERO); - high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) p4i_MINUS16); - prod = vec_add( low_prod, high_prod ); - - // NOR the product and select only the negative elements according to the sign mask - prod_ = vec_nor(prod, prod); - prod_ = vec_sel(p4i_ZERO, prod_, sgn); - - // Add 1 to the result to get the negative numbers - v1sel = vec_sel(p4i_ZERO, p4i_ONE, sgn); - prod_ = vec_add(prod_, v1sel); - - // Merge the results back to the final vector. - prod = vec_sel(prod, prod_, sgn); - - return prod; -} -*/ template<> EIGEN_STRONG_INLINE Packet4f pdiv(const Packet4f& a, const Packet4f& b) { #ifndef __VSX__ // VSX actually provides a div instruction @@ -370,8 +385,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, co } // for some weird raisons, it has to be overloaded for packet of integers -template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); } -template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); } +template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); } +template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; } template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } @@ -391,6 +406,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pxor(const Packet4i& a, const template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } +template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { return vec_round(a); } +template<> EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { return vec_ceil(a); } +template<> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) { return vec_floor(a); } + #ifdef _BIG_ENDIAN template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { @@ -418,12 +437,12 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) // We also need ot redefine little endian loading of Packet4i/Packet4f using VSX template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { - EIGEN_DEBUG_ALIGNED_LOAD + EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from)); } template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { - EIGEN_DEBUG_ALIGNED_LOAD + EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from)); } #endif @@ -494,16 +513,19 @@ template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& } #endif -#ifndef __VSX__ -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } -#endif +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { EIGEN_PPC_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { EIGEN_PPC_PREFETCH(addr); } -template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } -template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } +template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { float EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; } +template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; } -template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); } -template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); } +template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) +{ + return reinterpret_cast(vec_perm(reinterpret_cast(a), reinterpret_cast(a), p16uc_REVERSE32)); +} +template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) +{ + return reinterpret_cast(vec_perm(reinterpret_cast(a), reinterpret_cast(a), p16uc_REVERSE32)); } template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } @@ -511,10 +533,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) { Packet4f b, sum; - b = (Packet4f) vec_sld(a, a, 8); - sum = vec_add(a, b); - b = (Packet4f) vec_sld(sum, sum, 4); - sum = vec_add(sum, b); + b = vec_sld(a, a, 8); + sum = a + b; + b = vec_sld(sum, sum, 4); + sum += b; return pfirst(sum); } @@ -537,11 +559,11 @@ template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) // Now do the summation: // Lines 0+1 - sum[0] = vec_add(sum[0], sum[1]); + sum[0] = sum[0] + sum[1]; // Lines 2+3 - sum[1] = vec_add(sum[2], sum[3]); + sum[1] = sum[2] + sum[3]; // Add the results - sum[0] = vec_add(sum[0], sum[1]); + sum[0] = sum[0] + sum[1]; return sum[0]; } @@ -577,11 +599,11 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) // Now do the summation: // Lines 0+1 - sum[0] = vec_add(sum[0], sum[1]); + sum[0] = sum[0] + sum[1]; // Lines 2+3 - sum[1] = vec_add(sum[2], sum[3]); + sum[1] = sum[2] + sum[3]; // Add the results - sum[0] = vec_add(sum[0], sum[1]); + sum[0] = sum[0] + sum[1]; return sum[0]; } @@ -591,8 +613,8 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) template<> EIGEN_STRONG_INLINE float predux_mul(const Packet4f& a) { Packet4f prod; - prod = pmul(a, (Packet4f)vec_sld(a, a, 8)); - return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4))); + prod = pmul(a, vec_sld(a, a, 8)); + return pfirst(pmul(prod, vec_sld(prod, prod, 4))); } template<> EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) @@ -716,33 +738,52 @@ ptranspose(PacketBlock& kernel) { kernel.packet[3] = vec_mergel(t1, t3); } +template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) { + Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] }; + Packet4ui mask = reinterpret_cast(vec_cmpeq(reinterpret_cast(select), reinterpret_cast(p4i_ONE))); + return vec_sel(elsePacket, thenPacket, mask); +} + +template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) { + Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] }; + Packet4ui mask = reinterpret_cast(vec_cmpeq(reinterpret_cast(select), reinterpret_cast(p4i_ONE))); + return vec_sel(elsePacket, thenPacket, mask); +} + //---------- double ---------- #ifdef __VSX__ typedef __vector double Packet2d; typedef __vector unsigned long long Packet2ul; typedef __vector long long Packet2l; - -static Packet2l p2l_ZERO = (Packet2l) p4i_ZERO; -static Packet2d p2d_ONE = { 1.0, 1.0 }; -static Packet2d p2d_ZERO = (Packet2d) p4f_ZERO; -static Packet2d p2d_ZERO_ = { -0.0, -0.0 }; - -#ifdef _BIG_ENDIAN -static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ZERO, (Packet16uc) p2d_ONE, 8); +#if EIGEN_COMP_CLANG +typedef Packet2ul Packet2bl; #else -static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ONE, (Packet16uc) p2d_ZERO, 8); +typedef __vector __bool long Packet2bl; #endif -static EIGEN_STRONG_INLINE Packet2d vec_splat_dbl(Packet2d& a, int index) +static Packet2l p2l_ONE = { 1, 1 }; +static Packet2l p2l_ZERO = reinterpret_cast(p4i_ZERO); +static Packet2d p2d_ONE = { 1.0, 1.0 }; +static Packet2d p2d_ZERO = reinterpret_cast(p4f_ZERO); +static Packet2d p2d_ZERO_ = { -0.0, -0.0 }; + +#ifdef _BIG_ENDIAN +static Packet2d p2d_COUNTDOWN = reinterpret_cast(vec_sld(reinterpret_cast(p2d_ZERO), reinterpret_cast(p2d_ONE), 8)); +#else +static Packet2d p2d_COUNTDOWN = reinterpret_cast(vec_sld(reinterpret_cast(p2d_ONE), reinterpret_cast(p2d_ZERO), 8)); +#endif + +template Packet2d vec_splat_dbl(Packet2d& a); + +template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<0>(Packet2d& a) { - switch (index) { - case 0: - return (Packet2d) vec_perm(a, a, p16uc_PSET64_HI); - case 1: - return (Packet2d) vec_perm(a, a, p16uc_PSET64_LO); - } - return a; + return reinterpret_cast(vec_perm(a, a, p16uc_PSET64_HI)); +} + +template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<1>(Packet2d& a) +{ + return reinterpret_cast(vec_perm(a, a, p16uc_PSET64_LO)); } template<> struct packet_traits : default_packet_traits @@ -753,16 +794,41 @@ template<> struct packet_traits : default_packet_traits Vectorizable = 1, AlignedOnScalar = 1, size=2, - HasHalfPacket = 0, + HasHalfPacket = 1, + HasAdd = 1, + HasSub = 1, + HasMul = 1, HasDiv = 1, + HasMin = 1, + HasMax = 1, + HasAbs = 1, + HasSin = 0, + HasCos = 0, + HasLog = 0, HasExp = 1, - HasSqrt = 0 + HasSqrt = 1, + HasRsqrt = 1, + HasRound = 1, + HasFloor = 1, + HasCeil = 1, + HasNegate = 1, + HasBlend = 1 }; }; template<> struct unpacket_traits { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; +inline std::ostream & operator <<(std::ostream & s, const Packet2l & v) +{ + union { + Packet2l v; + int64_t n[2]; + } vt; + vt.v = v; + s << vt.n[0] << ", " << vt.n[1]; + return s; +} inline std::ostream & operator <<(std::ostream & s, const Packet2d & v) { @@ -776,28 +842,43 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v) } // Need to define them first or we get specialization after instantiation errors -template<> EIGEN_STRONG_INLINE Packet2d pload(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return (Packet2d) vec_ld(0, (const float *) from); } //FIXME +template<> EIGEN_STRONG_INLINE Packet2d pload(const double* from) +{ + EIGEN_DEBUG_ALIGNED_LOAD +#ifdef __VSX__ + return vec_vsx_ld(0, from); +#else + return vec_ld(0, from); +#endif +} -template<> EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st((Packet4f)from, 0, (float *)to); } +template<> EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) +{ + EIGEN_DEBUG_ALIGNED_STORE +#ifdef __VSX__ + vec_vsx_st(from, 0, to); +#else + vec_st(from, 0, to); +#endif +} template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { - double EIGEN_ALIGN16 af[2]; - af[0] = from; - Packet2d vc = pload(af); - vc = vec_splat_dbl(vc, 0); - return vc; + Packet2d v = {from, from}; + return v; } + template<> EIGEN_STRONG_INLINE void pbroadcast4(const double *a, Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3) { a1 = pload(a); - a0 = vec_splat_dbl(a1, 0); - a1 = vec_splat_dbl(a1, 1); + a0 = vec_splat_dbl<0>(a1); + a1 = vec_splat_dbl<1>(a1); a3 = pload(a+2); - a2 = vec_splat_dbl(a3, 0); - a3 = vec_splat_dbl(a3, 1); + a2 = vec_splat_dbl<0>(a3); + a3 = vec_splat_dbl<1>(a3); } + template<> EIGEN_DEVICE_FUNC inline Packet2d pgather(const double* from, Index stride) { double EIGEN_ALIGN16 af[2]; @@ -812,13 +893,14 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, to[0*stride] = af[0]; to[1*stride] = af[1]; } -template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { return vec_add(pset1(a), p2d_COUNTDOWN); } -template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return vec_add(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { return pset1(a) + p2d_COUNTDOWN; } -template<> EIGEN_STRONG_INLINE Packet2d psub(const Packet2d& a, const Packet2d& b) { return vec_sub(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return a + b; } -template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return psub(p2d_ZERO, a); } +template<> EIGEN_STRONG_INLINE Packet2d psub(const Packet2d& a, const Packet2d& b) { return a - b; } + +template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; } template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } @@ -840,17 +922,22 @@ template<> EIGEN_STRONG_INLINE Packet2d pxor(const Packet2d& a, const template<> EIGEN_STRONG_INLINE Packet2d pandnot(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); } +template<> EIGEN_STRONG_INLINE Packet2d pround(const Packet2d& a) { return vec_round(a); } +template<> EIGEN_STRONG_INLINE Packet2d pceil(const Packet2d& a) { return vec_ceil(a); } +template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { return vec_floor(a); } + template<> EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD - return (Packet2d) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from)); + return (Packet2d) vec_vsx_ld((long)from & 15, (const double*) _EIGEN_ALIGNED_PTR(from)); } + template<> EIGEN_STRONG_INLINE Packet2d ploaddup(const double* from) { Packet2d p; if((ptrdiff_t(from) % 16) == 0) p = pload(from); else p = ploadu(from); - return vec_perm(p, p, p16uc_PSET64_HI); + return vec_splat_dbl<0>(p); } template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& from) @@ -859,32 +946,34 @@ template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to)); } -template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { vec_dstt((const float *) addr, DST_CTRL(2,2,32), DST_CHAN); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { EIGEN_PPC_PREFETCH(addr); } -template<> EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; } - -template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return (Packet2d)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE64); } +template<> EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) +{ + return reinterpret_cast(vec_perm(reinterpret_cast(a), reinterpret_cast(a), p16uc_REVERSE64)); +} template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { Packet2d b, sum; - b = (Packet2d) vec_sld((Packet4ui) a, (Packet4ui)a, 8); - sum = vec_add(a, b); - return pfirst(sum); + b = reinterpret_cast(vec_sld(reinterpret_cast(a), reinterpret_cast(a), 8)); + sum = a + b; + return pfirst(sum); } template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) { Packet2d v[2], sum; - v[0] = vec_add(vecs[0], (Packet2d) vec_sld((Packet4ui) vecs[0], (Packet4ui) vecs[0], 8)); - v[1] = vec_add(vecs[1], (Packet2d) vec_sld((Packet4ui) vecs[1], (Packet4ui) vecs[1], 8)); + v[0] = vecs[0] + reinterpret_cast(vec_sld(reinterpret_cast(vecs[0]), reinterpret_cast(vecs[0]), 8)); + v[1] = vecs[1] + reinterpret_cast(vec_sld(reinterpret_cast(vecs[1]), reinterpret_cast(vecs[1]), 8)); #ifdef _BIG_ENDIAN - sum = (Packet2d) vec_sld((Packet4ui) v[0], (Packet4ui) v[1], 8); + sum = reinterpret_cast(vec_sld(reinterpret_cast(v[0]), reinterpret_cast(v[1]), 8)); #else - sum = (Packet2d) vec_sld((Packet4ui) v[1], (Packet4ui) v[0], 8); + sum = reinterpret_cast(vec_sld(reinterpret_cast(v[1]), reinterpret_cast(v[0]), 8)); #endif return sum; @@ -893,19 +982,19 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) // mul template<> EIGEN_STRONG_INLINE double predux_mul(const Packet2d& a) { - return pfirst(pmul(a, (Packet2d)vec_sld((Packet4ui) a, (Packet4ui) a, 8))); + return pfirst(pmul(a, reinterpret_cast(vec_sld(reinterpret_cast(a), reinterpret_cast(a), 8)))); } // min template<> EIGEN_STRONG_INLINE double predux_min(const Packet2d& a) { - return pfirst(vec_min(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8))); + return pfirst(pmin(a, reinterpret_cast(vec_sld(reinterpret_cast(a), reinterpret_cast(a), 8)))); } // max template<> EIGEN_STRONG_INLINE double predux_max(const Packet2d& a) { - return pfirst(vec_max(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8))); + return pfirst(pmax(a, reinterpret_cast(vec_sld(reinterpret_cast(a), reinterpret_cast(a), 8)))); } template @@ -915,9 +1004,9 @@ struct palign_impl { if (Offset == 1) #ifdef _BIG_ENDIAN - first = (Packet2d) vec_sld((Packet4ui) first, (Packet4ui) second, 8); + first = reinterpret_cast(vec_sld(reinterpret_cast(first), reinterpret_cast(second), 8)); #else - first = (Packet2d) vec_sld((Packet4ui) second, (Packet4ui) first, 8); + first = reinterpret_cast(vec_sld(reinterpret_cast(second), reinterpret_cast(first), 8)); #endif } }; @@ -931,6 +1020,11 @@ ptranspose(PacketBlock& kernel) { kernel.packet[1] = t1; } +template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { + Packet2l select = { ifPacket.select[0], ifPacket.select[1] }; + Packet2bl mask = vec_cmpeq(reinterpret_cast(select), reinterpret_cast(p2l_ONE)); + return vec_sel(elsePacket, thenPacket, mask); +} #endif // __VSX__ } // end namespace internal diff --git a/Eigen/src/Core/arch/CMakeLists.txt b/Eigen/src/Core/arch/CMakeLists.txt deleted file mode 100644 index da9793eca..000000000 --- a/Eigen/src/Core/arch/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -ADD_SUBDIRECTORY(AltiVec) -ADD_SUBDIRECTORY(AVX) -ADD_SUBDIRECTORY(AVX512) -ADD_SUBDIRECTORY(CUDA) -ADD_SUBDIRECTORY(Default) -ADD_SUBDIRECTORY(NEON) -ADD_SUBDIRECTORY(SSE) - - - diff --git a/Eigen/src/Core/arch/CUDA/CMakeLists.txt b/Eigen/src/Core/arch/CUDA/CMakeLists.txt deleted file mode 100644 index 7ba28da7c..000000000 --- a/Eigen/src/Core/arch/CUDA/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_CUDA_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_CUDA_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/CUDA COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h new file mode 100644 index 000000000..9c2536509 --- /dev/null +++ b/Eigen/src/Core/arch/CUDA/Complex.h @@ -0,0 +1,103 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMPLEX_CUDA_H +#define EIGEN_COMPLEX_CUDA_H + +// clang-format off + +namespace Eigen { + +namespace internal { + +#if defined(__CUDACC__) && defined(EIGEN_USE_GPU) + +// Many std::complex methods such as operator+, operator-, operator* and +// operator/ are not constexpr. Due to this, clang does not treat them as device +// functions and thus Eigen functors making use of these operators fail to +// compile. Here, we manually specialize these functors for complex types when +// building for CUDA to avoid non-constexpr methods. + +// Sum +template struct scalar_sum_op, const std::complex > : binary_op_base, const std::complex > { + typedef typename std::complex result_type; + + EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex operator() (const std::complex& a, const std::complex& b) const { + return std::complex(numext::real(a) + numext::real(b), + numext::imag(a) + numext::imag(b)); + } +}; + +template struct scalar_sum_op, std::complex > : scalar_sum_op, const std::complex > {}; + + +// Difference +template struct scalar_difference_op, const std::complex > : binary_op_base, const std::complex > { + typedef typename std::complex result_type; + + EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex operator() (const std::complex& a, const std::complex& b) const { + return std::complex(numext::real(a) - numext::real(b), + numext::imag(a) - numext::imag(b)); + } +}; + +template struct scalar_difference_op, std::complex > : scalar_difference_op, const std::complex > {}; + + +// Product +template struct scalar_product_op, const std::complex > : binary_op_base, const std::complex > { + enum { + Vectorizable = packet_traits>::HasMul + }; + typedef typename std::complex result_type; + + EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex operator() (const std::complex& a, const std::complex& b) const { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + return std::complex(a_real * b_real - a_imag * b_imag, + a_real * b_imag + a_imag * b_real); + } +}; + +template struct scalar_product_op, std::complex > : scalar_product_op, const std::complex > {}; + + +// Quotient +template struct scalar_quotient_op, const std::complex > : binary_op_base, const std::complex > { + enum { + Vectorizable = packet_traits>::HasDiv + }; + typedef typename std::complex result_type; + + EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex operator() (const std::complex& a, const std::complex& b) const { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + const T norm = T(1) / (b_real * b_real + b_imag * b_imag); + return std::complex((a_real * b_real + a_imag * b_imag) * norm, + (a_imag * b_real - a_real * b_imag) * norm); + } +}; + +template struct scalar_quotient_op, std::complex > : scalar_quotient_op, const std::complex > {}; + +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COMPLEX_CUDA_H diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 060c2c805..52892db38 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -1,11 +1,3 @@ -// Standard 16-bit float type, mostly useful for GPUs. Defines a new -// class Eigen::half (inheriting from CUDA's __half struct) with -// operator overloads such that it behaves basically as an arithmetic -// type. It will be quite slow on CPUs (so it is recommended to stay -// in fp32 for CPUs, except for simple parameter conversions, I/O -// to disk and the likes), but fast on GPUs. -// -// // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // @@ -32,6 +24,15 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Standard 16-bit float type, mostly useful for GPUs. Defines a new +// type Eigen::half (inheriting from CUDA's __half struct) with +// operator overloads such that it behaves basically as an arithmetic +// type. It will be quite slow on CPUs (so it is recommended to stay +// in fp32 for CPUs, except for simple parameter conversions, I/O +// to disk and the likes), but fast on GPUs. + + #ifndef EIGEN_HALF_CUDA_H #define EIGEN_HALF_CUDA_H @@ -42,92 +43,93 @@ #endif +namespace Eigen { + +struct half; + +namespace half_impl { + #if !defined(EIGEN_HAS_CUDA_FP16) // Make our own __half definition that is similar to CUDA's. struct __half { - __half() {} - explicit __half(unsigned short raw) : x(raw) {} + EIGEN_DEVICE_FUNC __half() {} + explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {} unsigned short x; }; #endif -namespace Eigen { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h); -namespace internal { +struct half_base : public __half { + EIGEN_DEVICE_FUNC half_base() {} + EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half(h) {} + EIGEN_DEVICE_FUNC half_base(const __half& h) : __half(h) {} +}; -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x); -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff); -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h); - -} // end namespace internal +} // namespace half_impl // Class definition. -struct half : public __half { +struct half : public half_impl::half_base { + #if !defined(EIGEN_HAS_CUDA_FP16) + typedef half_impl::__half __half; + #endif + EIGEN_DEVICE_FUNC half() {} - EIGEN_DEVICE_FUNC half(const __half& h) : __half(h) {} - EIGEN_DEVICE_FUNC half(const half& h) : __half(h) {} + EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {} + EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {} explicit EIGEN_DEVICE_FUNC half(bool b) - : __half(internal::raw_uint16_to_half(b ? 0x3c00 : 0)) {} - explicit EIGEN_DEVICE_FUNC half(unsigned int ui) - : __half(internal::float_to_half_rtne(static_cast(ui))) {} - explicit EIGEN_DEVICE_FUNC half(int i) - : __half(internal::float_to_half_rtne(static_cast(i))) {} - explicit EIGEN_DEVICE_FUNC half(unsigned long ul) - : __half(internal::float_to_half_rtne(static_cast(ul))) {} - explicit EIGEN_DEVICE_FUNC half(long l) - : __half(internal::float_to_half_rtne(static_cast(l))) {} - explicit EIGEN_DEVICE_FUNC half(long long ll) - : __half(internal::float_to_half_rtne(static_cast(ll))) {} - explicit EIGEN_DEVICE_FUNC half(unsigned long long ull) - : __half(internal::float_to_half_rtne(static_cast(ull))) {} + : half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {} + template + explicit EIGEN_DEVICE_FUNC half(const T& val) + : half_impl::half_base(half_impl::float_to_half_rtne(static_cast(val))) {} explicit EIGEN_DEVICE_FUNC half(float f) - : __half(internal::float_to_half_rtne(f)) {} - explicit EIGEN_DEVICE_FUNC half(double d) - : __half(internal::float_to_half_rtne(static_cast(d))) {} + : half_impl::half_base(half_impl::float_to_half_rtne(f)) {} EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const { // +0.0 and -0.0 become false, everything else becomes true. return (x & 0x7fff) != 0; } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { - return internal::half_to_float(*this); + return half_impl::half_to_float(*this); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const { - return static_cast(internal::half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC half& operator=(const half& other) { @@ -136,6 +138,8 @@ struct half : public __half { } }; +namespace half_impl { + #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 // Intrinsics for native fp16 support. Note that on current hardware, @@ -200,55 +204,55 @@ __device__ bool operator >= (const half& a, const half& b) { // Definitions for CPUs and older CUDA, mostly working through conversion // to/from fp32. -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) { return half(float(a) + float(b)); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) { return half(float(a) * float(b)); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) { return half(float(a) - float(b)); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) { return half(float(a) / float(b)); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) { half result; result.x = a.x ^ 0x8000; return result; } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) { a = half(float(a) + float(b)); return a; } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) { a = half(float(a) * float(b)); return a; } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) { a = half(float(a) - float(b)); return a; } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) { a = half(float(a) / float(b)); return a; } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) { return float(a) == float(b); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) { return float(a) != float(b); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) { return float(a) < float(b); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) { return float(a) <= float(b); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) { return float(a) > float(b); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) { return float(a) >= float(b); } @@ -256,8 +260,8 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, co // Division by an index. Do it in full float precision to avoid accuracy // issues in converting the denominator to half. -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) { - return Eigen::half(static_cast(a) / static_cast(b)); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) { + return half(static_cast(a) / static_cast(b)); } // Conversion routines, including fallbacks for the host or older CUDA. @@ -265,9 +269,7 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Ind // these in hardware. If we need more performance on older/other CPUs, they are // also possible to vectorize directly. -namespace internal { - -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) { __half h; h.x = x; return h; @@ -278,7 +280,7 @@ union FP32 { float f; }; -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) { #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 return __float2half(ff); @@ -333,7 +335,7 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) #endif } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) { #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 return __half2float(h); @@ -362,92 +364,69 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) { #endif } -} // end namespace internal +// --- standard functions --- -// Traits. - -namespace internal { - -template<> struct is_arithmetic { enum { value = true }; }; - -} // end namespace internal - -template<> struct NumTraits - : GenericNumTraits -{ - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { - return internal::raw_uint16_to_half(0x0800); - } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return half(1e-3f); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() { - return internal::raw_uint16_to_half(0x7bff); - } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() { - return internal::raw_uint16_to_half(0xfbff); - } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() { - return internal::raw_uint16_to_half(0x7c00); - } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() { - return internal::raw_uint16_to_half(0x7c01); - } -}; - -// Infinity/NaN checks. - -namespace numext { - -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const Eigen::half& a) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) { return (a.x & 0x7fff) == 0x7c00; } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const Eigen::half& a) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) { #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hisnan(a); #else return (a.x & 0x7fff) > 0x7c00; #endif } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const Eigen::half& a) { - return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const half& a) { + return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a)); } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half abs(const Eigen::half& a) { - Eigen::half result; +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) { + half result; result.x = a.x & 0x7FFF; return result; } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exp(const Eigen::half& a) { - return Eigen::half(::expf(float(a))); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { + return half(::expf(float(a))); } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::half& a) { - return Eigen::half(::logf(float(a))); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { +#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + return Eigen::half(::hlog(a)); +#else + return half(::logf(float(a))); +#endif } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) { - return Eigen::half(::sqrtf(float(a))); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) { + return half(numext::log1p(float(a))); } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half pow(const Eigen::half& a, const Eigen::half& b) { - return Eigen::half(::powf(float(a), float(b))); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { + return half(::log10f(float(a))); } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sin(const Eigen::half& a) { - return Eigen::half(::sinf(float(a))); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { + return half(::sqrtf(float(a))); } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half cos(const Eigen::half& a) { - return Eigen::half(::cosf(float(a))); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { + return half(::powf(float(a), float(b))); } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half tan(const Eigen::half& a) { - return Eigen::half(::tanf(float(a))); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) { + return half(::sinf(float(a))); } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half tanh(const Eigen::half& a) { - return Eigen::half(::tanhf(float(a))); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) { + return half(::cosf(float(a))); } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) { - return Eigen::half(::floorf(float(a))); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) { + return half(::tanf(float(a))); } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceil(const Eigen::half& a) { - return Eigen::half(::ceilf(float(a))); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { + return half(::tanhf(float(a))); +} +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { + return half(::floorf(float(a))); +} +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { + return half(::ceilf(float(a))); } -template <> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half mini(const Eigen::half& a, const Eigen::half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hlt(b, a) ? b : a; #else @@ -456,7 +435,7 @@ template <> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half mini(const Eigen:: return f2 < f1 ? b : a; #endif } -template <> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half maxi(const Eigen::half& a, const Eigen::half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) { #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hlt(a, b) ? b : a; #else @@ -466,78 +445,89 @@ template <> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half maxi(const Eigen:: #endif } -#ifdef EIGEN_HAS_C99_MATH -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half lgamma(const Eigen::half& a) { - return Eigen::half(Eigen::numext::lgamma(static_cast(a))); +EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) { + os << static_cast(v); + return os; } -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half digamma(const Eigen::half& a) { - return Eigen::half(Eigen::numext::digamma(static_cast(a))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half zeta(const Eigen::half& x, const Eigen::half& q) { - return Eigen::half(Eigen::numext::zeta(static_cast(x), static_cast(q))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half polygamma(const Eigen::half& n, const Eigen::half& x) { - return Eigen::half(Eigen::numext::polygamma(static_cast(n), static_cast(x))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::half& a) { - return Eigen::half(Eigen::numext::erf(static_cast(a))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) { - return Eigen::half(Eigen::numext::erfc(static_cast(a))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) { - return Eigen::half(Eigen::numext::igamma(static_cast(a), static_cast(x))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) { - return Eigen::half(Eigen::numext::igammac(static_cast(a), static_cast(x))); -} -#endif -} // end namespace numext + +} // end namespace half_impl + +// import Eigen::half_impl::half into Eigen namespace +// using half_impl::half; + +namespace internal { + +template<> +struct random_default_impl +{ + static inline half run(const half& x, const half& y) + { + return x + (y-x) * half(float(std::rand()) / float(RAND_MAX)); + } + static inline half run() + { + return run(half(-1.f), half(1.f)); + } +}; + +template<> struct is_arithmetic { enum { value = true }; }; + +} // end namespace internal + +template<> struct NumTraits + : GenericNumTraits +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { + return half_impl::raw_uint16_to_half(0x0800); + } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half(1e-2f); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() { + return half_impl::raw_uint16_to_half(0x7bff); + } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() { + return half_impl::raw_uint16_to_half(0xfbff); + } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() { + return half_impl::raw_uint16_to_half(0x7c00); + } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() { + return half_impl::raw_uint16_to_half(0x7c01); + } +}; } // end namespace Eigen -// Standard mathematical functions and trancendentals. -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) { +// C-like standard mathematical functions and trancendentals. +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) { Eigen::half result; result.x = a.x & 0x7FFF; return result; } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) { return Eigen::half(::expf(float(a))); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + return Eigen::half(::hlog(a)); +#else return Eigen::half(::logf(float(a))); +#endif } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) { return Eigen::half(::sqrtf(float(a))); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) { return Eigen::half(::powf(float(a), float(b))); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) { return Eigen::half(::floorf(float(a))); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) { +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) { return Eigen::half(::ceilf(float(a))); } -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isnan)(const Eigen::half& a) { - return (Eigen::numext::isnan)(a); -} -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isinf)(const Eigen::half& a) { - return (Eigen::numext::isinf)(a); -} -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int (isfinite)(const Eigen::half& a) { - return !(Eigen::numext::isinf)(a) && !(Eigen::numext::isnan)(a); -} - namespace std { -EIGEN_ALWAYS_INLINE ostream& operator << (ostream& os, const Eigen::half& v) { - os << static_cast(v); - return os; -} - #if __cplusplus > 199711L template <> struct hash { @@ -551,19 +541,45 @@ struct hash { // Add the missing shfl_xor intrinsic -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) { return static_cast(__shfl_xor(static_cast(var), laneMask, width)); } #endif // ldg() has an overload for __half, but we also need one for Eigen::half. -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320 -static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) { - return Eigen::internal::raw_uint16_to_half( +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) { + return Eigen::half_impl::raw_uint16_to_half( __ldg(reinterpret_cast(ptr))); } #endif +#if defined(__CUDA_ARCH__) +namespace Eigen { +namespace numext { + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +bool (isnan)(const Eigen::half& h) { + return (half_impl::isnan)(h); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +bool (isinf)(const Eigen::half& h) { + return (half_impl::isinf)(h); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +bool (isfinite)(const Eigen::half& h) { + return (half_impl::isfinite)(h); +} + +} // namespace Eigen +} // namespace numext +#endif + #endif // EIGEN_HALF_CUDA_H diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 317499b29..0348b41db 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -27,9 +27,22 @@ float4 plog(const float4& a) template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plog(const double2& a) { + using ::log; return make_double2(log(a.x), log(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 plog1p(const float4& a) +{ + return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 plog1p(const double2& a) +{ + return make_double2(log1p(a.x), log1p(a.y)); +} + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pexp(const float4& a) { @@ -39,6 +52,7 @@ float4 pexp(const float4& a) template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pexp(const double2& a) { + using ::exp; return make_double2(exp(a.x), exp(a.y)); } @@ -51,6 +65,7 @@ float4 psqrt(const float4& a) template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psqrt(const double2& a) { + using ::sqrt; return make_double2(sqrt(a.x), sqrt(a.y)); } @@ -66,120 +81,6 @@ double2 prsqrt(const double2& a) return make_double2(rsqrt(a.x), rsqrt(a.y)); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 plgamma(const float4& a) -{ - return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 plgamma(const double2& a) -{ - return make_double2(lgamma(a.x), lgamma(a.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pdigamma(const float4& a) -{ - using numext::digamma; - return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pdigamma(const double2& a) -{ - using numext::digamma; - return make_double2(digamma(a.x), digamma(a.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pzeta(const float4& x, const float4& q) -{ - using numext::zeta; - return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pzeta(const double2& x, const double2& q) -{ - using numext::zeta; - return make_double2(zeta(x.x, q.x), zeta(x.y, q.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 ppolygamma(const float4& n, const float4& x) -{ - using numext::polygamma; - return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 ppolygamma(const double2& n, const double2& x) -{ - using numext::polygamma; - return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 perf(const float4& a) -{ - return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 perf(const double2& a) -{ - return make_double2(erf(a.x), erf(a.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 perfc(const float4& a) -{ - return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 perfc(const double2& a) -{ - return make_double2(erfc(a.x), erfc(a.y)); -} - - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pigamma(const float4& a, const float4& x) -{ - using numext::igamma; - return make_float4( - igamma(a.x, x.x), - igamma(a.y, x.y), - igamma(a.z, x.z), - igamma(a.w, x.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pigamma(const double2& a, const double2& x) -{ - using numext::igamma; - return make_double2(igamma(a.x, x.x), igamma(a.y, x.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pigammac(const float4& a, const float4& x) -{ - using numext::igammac; - return make_float4( - igammac(a.x, x.x), - igammac(a.y, x.y), - igammac(a.z, x.z), - igammac(a.w, x.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pigammac(const double2& a, const double2& x) -{ - using numext::igammac; - return make_double2(igammac(a.x, x.x), igammac(a.y, x.y)); -} #endif diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 932df1092..ad66399e0 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -44,8 +44,9 @@ template<> struct packet_traits : default_packet_traits HasPolygamma = 1, HasErf = 1, HasErfc = 1, - HasIgamma = 1, + HasIGamma = 1, HasIGammac = 1, + HasBetaInc = 1, HasBlend = 0, }; @@ -68,10 +69,13 @@ template<> struct packet_traits : default_packet_traits HasRsqrt = 1, HasLGamma = 1, HasDiGamma = 1, + HasZeta = 1, + HasPolygamma = 1, HasErf = 1, HasErfc = 1, HasIGamma = 1, HasIGammac = 1, + HasBetaInc = 1, HasBlend = 0, }; @@ -278,35 +282,6 @@ template<> EIGEN_DEVICE_FUNC inline double predux_mul(const double2& a) return a.x * a.y; } -template -struct protate_impl -{ - static float4 run(const float4& a) { - if (offset == 0) { - return make_float4(a.x, a.y, a.z, a.w); - } - if (offset == 1) { - return make_float4(a.w, a.x, a.y, a.z); - } - if (offset == 2) { - return make_float4(a.z, a.w, a.x, a.y); - } - return make_float4(a.y, a.z, a.w, a.x); - } -}; - -template -struct protate_impl -{ - static double2 run(const double2& a) { - if (offset == 0) { - return make_double2(a.x, a.y); - } - return make_double2(a.y, a.x); - } -}; - - template<> EIGEN_DEVICE_FUNC inline float4 pabs(const float4& a) { return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); } diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 61d532e4d..82dfc12c9 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -10,22 +10,16 @@ #ifndef EIGEN_PACKET_MATH_HALF_CUDA_H #define EIGEN_PACKET_MATH_HALF_CUDA_H -#if defined(EIGEN_HAS_CUDA_FP16) - -// Make sure this is only available when targeting a GPU: we don't want to -// introduce conflicts between these packet_traits definitions and the ones -// we'll use on the host side (SSE, AVX, ...) -#if defined(__CUDACC__) && defined(EIGEN_USE_GPU) - -// Most of the following operations require arch >= 5.3 -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 namespace Eigen { namespace internal { +// Most of the following operations require arch >= 3.0 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + template<> struct is_arithmetic { enum { value = true }; }; -template<> struct packet_traits : default_packet_traits +template<> struct packet_traits : default_packet_traits { typedef half2 type; typedef half2 half; @@ -34,105 +28,172 @@ template<> struct packet_traits : default_packet_traits AlignedOnScalar = 1, size=2, HasHalfPacket = 0, - HasDiv = 1 + HasAdd = 1, + HasMul = 1, + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasLog = 1, + HasLog1p = 1 }; }; +template<> struct unpacket_traits { typedef Eigen::half type; enum {size=2, alignment=Aligned16}; typedef half2 half; }; -template<> struct unpacket_traits { typedef half type; enum {size=2, alignment=Aligned16}; typedef half2 half; }; - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1(const half& from) { +template<> __device__ EIGEN_STRONG_INLINE half2 pset1(const Eigen::half& from) { return __half2half2(from); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload(const half* from) { +template<> __device__ EIGEN_STRONG_INLINE half2 pload(const Eigen::half* from) { return *reinterpret_cast(from); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu(const half* from) { +template<> __device__ EIGEN_STRONG_INLINE half2 ploadu(const Eigen::half* from) { return __halves2half2(from[0], from[1]); } -template<> EIGEN_STRONG_INLINE half2 ploaddup(const half* from) { +template<> EIGEN_STRONG_INLINE half2 ploaddup(const Eigen::half* from) { return __halves2half2(from[0], from[0]); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(half* to, const half2& from) { +template<> __device__ EIGEN_STRONG_INLINE void pstore(Eigen::half* to, const half2& from) { *reinterpret_cast(to) = from; } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(half* to, const half2& from) { +template<> __device__ EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to, const half2& from) { to[0] = __low2half(from); to[1] = __high2half(from); } template<> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro(const half* from) { - return __ldg((const half2*)from); + __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro(const Eigen::half* from) { +#if __CUDA_ARCH__ >= 350 + return __ldg((const half2*)from); +#else + return __halves2half2(*(from+0), *(from+1)); +#endif } template<> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro(const half* from) { - return __halves2half2(__ldg(from+0), __ldg(from+1)); +__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro(const Eigen::half* from) { +#if __CUDA_ARCH__ >= 350 + return __halves2half2(__ldg(from+0), __ldg(from+1)); +#else + return __halves2half2(*(from+0), *(from+1)); +#endif } -template<> EIGEN_DEVICE_FUNC inline half2 pgather(const half* from, Index stride) { +template<> __device__ EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from, Index stride) { return __halves2half2(from[0*stride], from[1*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(half* to, const half2& from, Index stride) { +template<> __device__ EIGEN_STRONG_INLINE void pscatter(Eigen::half* to, const half2& from, Index stride) { to[stride*0] = __low2half(from); to[stride*1] = __high2half(from); } -template<> EIGEN_DEVICE_FUNC inline half pfirst(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst(const half2& a) { return __low2half(a); } -template<> EIGEN_DEVICE_FUNC inline half2 pabs(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 pabs(const half2& a) { half2 result; result.x = a.x & 0x7FFF7FFF; return result; } -EIGEN_DEVICE_FUNC inline void +__device__ EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { - half a1 = __low2half(kernel.packet[0]); - half a2 = __high2half(kernel.packet[0]); - half b1 = __low2half(kernel.packet[1]); - half b2 = __high2half(kernel.packet[1]); + __half a1 = __low2half(kernel.packet[0]); + __half a2 = __high2half(kernel.packet[0]); + __half b1 = __low2half(kernel.packet[1]); + __half b2 = __high2half(kernel.packet[1]); kernel.packet[0] = __halves2half2(a1, b1); kernel.packet[1] = __halves2half2(a2, b2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset(const half& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 plset(const Eigen::half& a) { +#if __CUDA_ARCH__ >= 530 return __halves2half2(a, __hadd(a, __float2half(1.0f))); +#else + float f = __half2float(a) + 1.0f; + return __halves2half2(a, __float2half(f)); +#endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 padd(const half2& a, const half2& b) { +#if __CUDA_ARCH__ >= 530 return __hadd2(a, b); +#else + float a1 = __low2float(a); + float a2 = __high2float(a); + float b1 = __low2float(b); + float b2 = __high2float(b); + float r1 = a1 + b1; + float r2 = a2 + b2; + return __floats2half2_rn(r1, r2); +#endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 psub(const half2& a, const half2& b) { +#if __CUDA_ARCH__ >= 530 return __hsub2(a, b); +#else + float a1 = __low2float(a); + float a2 = __high2float(a); + float b1 = __low2float(b); + float b2 = __high2float(b); + float r1 = a1 - b1; + float r2 = a2 - b2; + return __floats2half2_rn(r1, r2); +#endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) { +#if __CUDA_ARCH__ >= 530 return __hneg2(a); +#else + float a1 = __low2float(a); + float a2 = __high2float(a); + return __floats2half2_rn(-a1, -a2); +#endif } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; } +template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 pmul(const half2& a, const half2& b) { +#if __CUDA_ARCH__ >= 530 return __hmul2(a, b); +#else + float a1 = __low2float(a); + float a2 = __high2float(a); + float b1 = __low2float(b); + float b2 = __high2float(b); + float r1 = a1 * b1; + float r2 = a2 * b2; + return __floats2half2_rn(r1, r2); +#endif } - template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a, const half2& b, const half2& c) { +template<> __device__ EIGEN_STRONG_INLINE half2 pmadd(const half2& a, const half2& b, const half2& c) { +#if __CUDA_ARCH__ >= 530 return __hfma2(a, b, c); - } +#else + float a1 = __low2float(a); + float a2 = __high2float(a); + float b1 = __low2float(b); + float b2 = __high2float(b); + float c1 = __low2float(c); + float c2 = __high2float(c); + float r1 = a1 * b1 + c1; + float r2 = a2 * b2 + c2; + return __floats2half2_rn(r1, r2); +#endif +} -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 pdiv(const half2& a, const half2& b) { float a1 = __low2float(a); float a2 = __high2float(a); float b1 = __low2float(b); @@ -142,51 +203,529 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& return __floats2half2_rn(r1, r2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 pmin(const half2& a, const half2& b) { float a1 = __low2float(a); float a2 = __high2float(a); float b1 = __low2float(b); float b2 = __high2float(b); - half r1 = a1 < b1 ? __low2half(a) : __low2half(b); - half r2 = a2 < b2 ? __high2half(a) : __high2half(b); + __half r1 = a1 < b1 ? __low2half(a) : __low2half(b); + __half r2 = a2 < b2 ? __high2half(a) : __high2half(b); return __halves2half2(r1, r2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a, const half2& b) { +template<> __device__ EIGEN_STRONG_INLINE half2 pmax(const half2& a, const half2& b) { float a1 = __low2float(a); float a2 = __high2float(a); float b1 = __low2float(b); float b2 = __high2float(b); - half r1 = a1 > b1 ? __low2half(a) : __low2half(b); - half r2 = a2 > b2 ? __high2half(a) : __high2half(b); + __half r1 = a1 > b1 ? __low2half(a) : __low2half(b); + __half r2 = a2 > b2 ? __high2half(a) : __high2half(b); return __halves2half2(r1, r2); } -template<> EIGEN_DEVICE_FUNC inline half predux(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux(const half2& a) { +#if __CUDA_ARCH__ >= 530 return __hadd(__low2half(a), __high2half(a)); +#else + float a1 = __low2float(a); + float a2 = __high2float(a); + return Eigen::half(half_impl::raw_uint16_to_half(__float2half_rn(a1 + a2))); +#endif } -template<> EIGEN_DEVICE_FUNC inline half predux_max(const half2& a) { - half first = __low2half(a); - half second = __high2half(a); +template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max(const half2& a) { +#if __CUDA_ARCH__ >= 530 + __half first = __low2half(a); + __half second = __high2half(a); return __hgt(first, second) ? first : second; +#else + float a1 = __low2float(a); + float a2 = __high2float(a); + return a1 > a2 ? __low2half(a) : __high2half(a); +#endif } -template<> EIGEN_DEVICE_FUNC inline half predux_min(const half2& a) { - half first = __low2half(a); - half second = __high2half(a); +template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min(const half2& a) { +#if __CUDA_ARCH__ >= 530 + __half first = __low2half(a); + __half second = __high2half(a); return __hlt(first, second) ? first : second; +#else + float a1 = __low2float(a); + float a2 = __high2float(a); + return a1 < a2 ? __low2half(a) : __high2half(a); +#endif } -template<> EIGEN_DEVICE_FUNC inline half predux_mul(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul(const half2& a) { +#if __CUDA_ARCH__ >= 530 return __hmul(__low2half(a), __high2half(a)); +#else + float a1 = __low2float(a); + float a2 = __high2float(a); + return Eigen::half(half_impl::raw_uint16_to_half(__float2half_rn(a1 * a2))); +#endif } -} // end namespace internal +template<> __device__ EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float r1 = log1pf(a1); + float r2 = log1pf(a2); + return __floats2half2_rn(r1, r2); +} -} // end namespace Eigen +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 + +template<> __device__ EIGEN_STRONG_INLINE +half2 plog(const half2& a) { + return h2log(a); +} + +template<> __device__ EIGEN_STRONG_INLINE +half2 pexp(const half2& a) { + return h2exp(a); +} + +template<> __device__ EIGEN_STRONG_INLINE +half2 psqrt(const half2& a) { + return h2sqrt(a); +} + +template<> __device__ EIGEN_STRONG_INLINE +half2 prsqrt(const half2& a) { + return h2rsqrt(a); +} + +#else + +template<> __device__ EIGEN_STRONG_INLINE half2 plog(const half2& a) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float r1 = logf(a1); + float r2 = logf(a2); + return __floats2half2_rn(r1, r2); +} + +template<> __device__ EIGEN_STRONG_INLINE half2 pexp(const half2& a) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float r1 = expf(a1); + float r2 = expf(a2); + return __floats2half2_rn(r1, r2); +} + +template<> __device__ EIGEN_STRONG_INLINE half2 psqrt(const half2& a) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float r1 = sqrtf(a1); + float r2 = sqrtf(a2); + return __floats2half2_rn(r1, r2); +} + +template<> __device__ EIGEN_STRONG_INLINE half2 prsqrt(const half2& a) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float r1 = rsqrtf(a1); + float r2 = rsqrtf(a2); + return __floats2half2_rn(r1, r2); +} #endif + +#elif defined EIGEN_VECTORIZE_AVX + +typedef struct { + __m128i x; +} Packet8h; + + +template<> struct is_arithmetic { enum { value = true }; }; + +template <> +struct packet_traits : default_packet_traits { + typedef Packet8h type; + // There is no half-size packet for Packet8h. + typedef Packet8h half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 8, + HasHalfPacket = 0, + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0, + HasDiv = 0, + HasSqrt = 0, + HasRsqrt = 0, + HasExp = 0, + HasLog = 0, + HasBlend = 0 + }; +}; + + +template<> struct unpacket_traits { typedef Eigen::half type; enum {size=8, alignment=Aligned16}; typedef Packet8h half; }; + +template<> EIGEN_STRONG_INLINE Packet8h pset1(const Eigen::half& from) { + Packet8h result; + result.x = _mm_set1_epi16(from.x); + return result; +} + +template<> EIGEN_STRONG_INLINE Eigen::half pfirst(const Packet8h& from) { + return half_impl::raw_uint16_to_half(static_cast(_mm_extract_epi16(from.x, 0))); +} + +template<> EIGEN_STRONG_INLINE Packet8h pload(const Eigen::half* from) { + Packet8h result; + result.x = _mm_load_si128(reinterpret_cast(from)); + return result; +} + +template<> EIGEN_STRONG_INLINE Packet8h ploadu(const Eigen::half* from) { + Packet8h result; + result.x = _mm_loadu_si128(reinterpret_cast(from)); + return result; +} + +template<> EIGEN_STRONG_INLINE void pstore(Eigen::half* to, const Packet8h& from) { + _mm_store_si128(reinterpret_cast<__m128i*>(to), from.x); +} + +template<> EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to, const Packet8h& from) { + _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from.x); +} + +template<> EIGEN_STRONG_INLINE Packet8h +ploadquad(const Eigen::half* from) { + Packet8h result; + unsigned short a = from[0].x; + unsigned short b = from[1].x; + result.x = _mm_set_epi16(b, b, b, b, a, a, a, a); + return result; +} + +EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h& a) { +#ifdef EIGEN_HAS_FP16_C + return _mm256_cvtph_ps(a.x); +#else + EIGEN_ALIGN32 Eigen::half aux[8]; + pstore(aux, a); + float f0(aux[0]); + float f1(aux[1]); + float f2(aux[2]); + float f3(aux[3]); + float f4(aux[4]); + float f5(aux[5]); + float f6(aux[6]); + float f7(aux[7]); + + return _mm256_set_ps(f7, f6, f5, f4, f3, f2, f1, f0); #endif +} + +EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) { +#ifdef EIGEN_HAS_FP16_C + Packet8h result; + result.x = _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC); + return result; +#else + EIGEN_ALIGN32 float aux[8]; + pstore(aux, a); + Eigen::half h0(aux[0]); + Eigen::half h1(aux[1]); + Eigen::half h2(aux[2]); + Eigen::half h3(aux[3]); + Eigen::half h4(aux[4]); + Eigen::half h5(aux[5]); + Eigen::half h6(aux[6]); + Eigen::half h7(aux[7]); + + Packet8h result; + result.x = _mm_set_epi16(h7.x, h6.x, h5.x, h4.x, h3.x, h2.x, h1.x, h0.x); + return result; #endif +} + +template<> EIGEN_STRONG_INLINE Packet8h pconj(const Packet8h& a) { return a; } + +template<> EIGEN_STRONG_INLINE Packet8h padd(const Packet8h& a, const Packet8h& b) { + Packet8f af = half2float(a); + Packet8f bf = half2float(b); + Packet8f rf = padd(af, bf); + return float2half(rf); +} + +template<> EIGEN_STRONG_INLINE Packet8h pmul(const Packet8h& a, const Packet8h& b) { + Packet8f af = half2float(a); + Packet8f bf = half2float(b); + Packet8f rf = pmul(af, bf); + return float2half(rf); +} + +template<> EIGEN_STRONG_INLINE Packet8h pgather(const Eigen::half* from, Index stride) +{ + Packet8h result; + result.x = _mm_set_epi16(from[7*stride].x, from[6*stride].x, from[5*stride].x, from[4*stride].x, from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x); + return result; +} + +template<> EIGEN_STRONG_INLINE void pscatter(Eigen::half* to, const Packet8h& from, Index stride) +{ + EIGEN_ALIGN32 Eigen::half aux[8]; + pstore(aux, from); + to[stride*0].x = aux[0].x; + to[stride*1].x = aux[1].x; + to[stride*2].x = aux[2].x; + to[stride*3].x = aux[3].x; + to[stride*4].x = aux[4].x; + to[stride*5].x = aux[5].x; + to[stride*6].x = aux[6].x; + to[stride*7].x = aux[7].x; +} + +EIGEN_STRONG_INLINE void +ptranspose(PacketBlock& kernel) { + __m128i a = kernel.packet[0].x; + __m128i b = kernel.packet[1].x; + __m128i c = kernel.packet[2].x; + __m128i d = kernel.packet[3].x; + __m128i e = kernel.packet[4].x; + __m128i f = kernel.packet[5].x; + __m128i g = kernel.packet[6].x; + __m128i h = kernel.packet[7].x; + + __m128i a03b03 = _mm_unpacklo_epi16(a, b); + __m128i c03d03 = _mm_unpacklo_epi16(c, d); + __m128i e03f03 = _mm_unpacklo_epi16(e, f); + __m128i g03h03 = _mm_unpacklo_epi16(g, h); + __m128i a47b47 = _mm_unpackhi_epi16(a, b); + __m128i c47d47 = _mm_unpackhi_epi16(c, d); + __m128i e47f47 = _mm_unpackhi_epi16(e, f); + __m128i g47h47 = _mm_unpackhi_epi16(g, h); + + __m128i a01b01c01d01 = _mm_unpacklo_epi32(a03b03, c03d03); + __m128i a23b23c23d23 = _mm_unpackhi_epi32(a03b03, c03d03); + __m128i e01f01g01h01 = _mm_unpacklo_epi32(e03f03, g03h03); + __m128i e23f23g23h23 = _mm_unpackhi_epi32(e03f03, g03h03); + __m128i a45b45c45d45 = _mm_unpacklo_epi32(a47b47, c47d47); + __m128i a67b67c67d67 = _mm_unpackhi_epi32(a47b47, c47d47); + __m128i e45f45g45h45 = _mm_unpacklo_epi32(e47f47, g47h47); + __m128i e67f67g67h67 = _mm_unpackhi_epi32(e47f47, g47h47); + + __m128i a0b0c0d0e0f0g0h0 = _mm_unpacklo_epi64(a01b01c01d01, e01f01g01h01); + __m128i a1b1c1d1e1f1g1h1 = _mm_unpackhi_epi64(a01b01c01d01, e01f01g01h01); + __m128i a2b2c2d2e2f2g2h2 = _mm_unpacklo_epi64(a23b23c23d23, e23f23g23h23); + __m128i a3b3c3d3e3f3g3h3 = _mm_unpackhi_epi64(a23b23c23d23, e23f23g23h23); + __m128i a4b4c4d4e4f4g4h4 = _mm_unpacklo_epi64(a45b45c45d45, e45f45g45h45); + __m128i a5b5c5d5e5f5g5h5 = _mm_unpackhi_epi64(a45b45c45d45, e45f45g45h45); + __m128i a6b6c6d6e6f6g6h6 = _mm_unpacklo_epi64(a67b67c67d67, e67f67g67h67); + __m128i a7b7c7d7e7f7g7h7 = _mm_unpackhi_epi64(a67b67c67d67, e67f67g67h67); + + kernel.packet[0].x = a0b0c0d0e0f0g0h0; + kernel.packet[1].x = a1b1c1d1e1f1g1h1; + kernel.packet[2].x = a2b2c2d2e2f2g2h2; + kernel.packet[3].x = a3b3c3d3e3f3g3h3; + kernel.packet[4].x = a4b4c4d4e4f4g4h4; + kernel.packet[5].x = a5b5c5d5e5f5g5h5; + kernel.packet[6].x = a6b6c6d6e6f6g6h6; + kernel.packet[7].x = a7b7c7d7e7f7g7h7; +} + +EIGEN_STRONG_INLINE void +ptranspose(PacketBlock& kernel) { + EIGEN_ALIGN32 Eigen::half in[4][8]; + pstore(in[0], kernel.packet[0]); + pstore(in[1], kernel.packet[1]); + pstore(in[2], kernel.packet[2]); + pstore(in[3], kernel.packet[3]); + + EIGEN_ALIGN32 Eigen::half out[4][8]; + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + out[i][j] = in[j][2*i]; + } + for (int j = 0; j < 4; ++j) { + out[i][j+4] = in[j][2*i+1]; + } + } + + kernel.packet[0] = pload(out[0]); + kernel.packet[1] = pload(out[1]); + kernel.packet[2] = pload(out[2]); + kernel.packet[3] = pload(out[3]); +} + + +// Disable the following code since it's broken on too many platforms / compilers. +//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC) +#elif 0 + +typedef struct { + __m64 x; +} Packet4h; + + +template<> struct is_arithmetic { enum { value = true }; }; + +template <> +struct packet_traits : default_packet_traits { + typedef Packet4h type; + // There is no half-size packet for Packet4h. + typedef Packet4h half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 4, + HasHalfPacket = 0, + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0, + HasDiv = 0, + HasSqrt = 0, + HasRsqrt = 0, + HasExp = 0, + HasLog = 0, + HasBlend = 0 + }; +}; + + +template<> struct unpacket_traits { typedef Eigen::half type; enum {size=4, alignment=Aligned16}; typedef Packet4h half; }; + +template<> EIGEN_STRONG_INLINE Packet4h pset1(const Eigen::half& from) { + Packet4h result; + result.x = _mm_set1_pi16(from.x); + return result; +} + +template<> EIGEN_STRONG_INLINE Eigen::half pfirst(const Packet4h& from) { + return half_impl::raw_uint16_to_half(static_cast(_mm_cvtsi64_si32(from.x))); +} + +template<> EIGEN_STRONG_INLINE Packet4h pconj(const Packet4h& a) { return a; } + +template<> EIGEN_STRONG_INLINE Packet4h padd(const Packet4h& a, const Packet4h& b) { + __int64_t a64 = _mm_cvtm64_si64(a.x); + __int64_t b64 = _mm_cvtm64_si64(b.x); + + Eigen::half h[4]; + + Eigen::half ha = half_impl::raw_uint16_to_half(static_cast(a64)); + Eigen::half hb = half_impl::raw_uint16_to_half(static_cast(b64)); + h[0] = ha + hb; + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 16)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 16)); + h[1] = ha + hb; + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 32)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 32)); + h[2] = ha + hb; + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 48)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 48)); + h[3] = ha + hb; + Packet4h result; + result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x); + return result; +} + +template<> EIGEN_STRONG_INLINE Packet4h pmul(const Packet4h& a, const Packet4h& b) { + __int64_t a64 = _mm_cvtm64_si64(a.x); + __int64_t b64 = _mm_cvtm64_si64(b.x); + + Eigen::half h[4]; + + Eigen::half ha = half_impl::raw_uint16_to_half(static_cast(a64)); + Eigen::half hb = half_impl::raw_uint16_to_half(static_cast(b64)); + h[0] = ha * hb; + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 16)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 16)); + h[1] = ha * hb; + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 32)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 32)); + h[2] = ha * hb; + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 48)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 48)); + h[3] = ha * hb; + Packet4h result; + result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x); + return result; +} + +template<> EIGEN_STRONG_INLINE Packet4h pload(const Eigen::half* from) { + Packet4h result; + result.x = _mm_cvtsi64_m64(*reinterpret_cast(from)); + return result; +} + +template<> EIGEN_STRONG_INLINE Packet4h ploadu(const Eigen::half* from) { + Packet4h result; + result.x = _mm_cvtsi64_m64(*reinterpret_cast(from)); + return result; +} + +template<> EIGEN_STRONG_INLINE void pstore(Eigen::half* to, const Packet4h& from) { + __int64_t r = _mm_cvtm64_si64(from.x); + *(reinterpret_cast<__int64_t*>(to)) = r; +} + +template<> EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to, const Packet4h& from) { + __int64_t r = _mm_cvtm64_si64(from.x); + *(reinterpret_cast<__int64_t*>(to)) = r; +} + +template<> EIGEN_STRONG_INLINE Packet4h +ploadquad(const Eigen::half* from) { + return pset1(*from); +} + +template<> EIGEN_STRONG_INLINE Packet4h pgather(const Eigen::half* from, Index stride) +{ + Packet4h result; + result.x = _mm_set_pi16(from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x); + return result; +} + +template<> EIGEN_STRONG_INLINE void pscatter(Eigen::half* to, const Packet4h& from, Index stride) +{ + __int64_t a = _mm_cvtm64_si64(from.x); + to[stride*0].x = static_cast(a); + to[stride*1].x = static_cast(a >> 16); + to[stride*2].x = static_cast(a >> 32); + to[stride*3].x = static_cast(a >> 48); +} + +EIGEN_STRONG_INLINE void +ptranspose(PacketBlock& kernel) { + __m64 T0 = _mm_unpacklo_pi16(kernel.packet[0].x, kernel.packet[1].x); + __m64 T1 = _mm_unpacklo_pi16(kernel.packet[2].x, kernel.packet[3].x); + __m64 T2 = _mm_unpackhi_pi16(kernel.packet[0].x, kernel.packet[1].x); + __m64 T3 = _mm_unpackhi_pi16(kernel.packet[2].x, kernel.packet[3].x); + + kernel.packet[0].x = _mm_unpacklo_pi32(T0, T1); + kernel.packet[1].x = _mm_unpackhi_pi32(T0, T1); + kernel.packet[2].x = _mm_unpacklo_pi32(T2, T3); + kernel.packet[3].x = _mm_unpackhi_pi32(T2, T3); +} + +#endif + +} +} + #endif // EIGEN_PACKET_MATH_HALF_CUDA_H diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h index 396b38eaf..31f1c523a 100644 --- a/Eigen/src/Core/arch/CUDA/TypeCasting.h +++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h @@ -14,50 +14,48 @@ namespace Eigen { namespace internal { -#if defined(EIGEN_HAS_CUDA_FP16) - template<> -struct scalar_cast_op { +struct scalar_cast_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) - typedef half result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const float& a) const { - #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + typedef Eigen::half result_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const { + #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 return __float2half(a); #else - return half(a); + return Eigen::half(a); #endif } }; template<> -struct functor_traits > +struct functor_traits > { enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; template<> -struct scalar_cast_op { +struct scalar_cast_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) - typedef half result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const int& a) const { - #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + typedef Eigen::half result_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const { + #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 return __float2half(static_cast(a)); #else - return half(static_cast(a)); + return Eigen::half(static_cast(a)); #endif } }; template<> -struct functor_traits > +struct functor_traits > { enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; template<> -struct scalar_cast_op { +struct scalar_cast_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) typedef float result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const half& a) const { - #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const { + #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 return __half2float(a); #else return static_cast(a); @@ -66,15 +64,15 @@ struct scalar_cast_op { }; template<> -struct functor_traits > +struct functor_traits > { enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 template <> -struct type_casting_traits { +struct type_casting_traits { enum { VectorizedCast = 1, SrcCoeffRatio = 2, @@ -89,7 +87,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast(con } template <> -struct type_casting_traits { +struct type_casting_traits { enum { VectorizedCast = 1, SrcCoeffRatio = 1, @@ -97,12 +95,87 @@ struct type_casting_traits { }; }; -template<> EIGEN_STRONG_INLINE half2 pcast(const float4& a) { +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast(const float4& a) { // Simply discard the second half of the input - return __float22half2_rn(make_float2(a.x, a.y)); + return __floats2half2_rn(a.x, a.y); +} + +#elif defined EIGEN_VECTORIZE_AVX + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8h& a) { + return half2float(a); +} + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet8h pcast(const Packet8f& a) { + return float2half(a); +} + +// Disable the following code since it's broken on too many platforms / compilers. +//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC) +#elif 0 + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4h& a) { + __int64_t a64 = _mm_cvtm64_si64(a.x); + Eigen::half h = raw_uint16_to_half(static_cast(a64)); + float f1 = static_cast(h); + h = raw_uint16_to_half(static_cast(a64 >> 16)); + float f2 = static_cast(h); + h = raw_uint16_to_half(static_cast(a64 >> 32)); + float f3 = static_cast(h); + h = raw_uint16_to_half(static_cast(a64 >> 48)); + float f4 = static_cast(h); + return _mm_set_ps(f4, f3, f2, f1); +} + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4h pcast(const Packet4f& a) { + EIGEN_ALIGN16 float aux[4]; + pstore(aux, a); + Eigen::half h0(aux[0]); + Eigen::half h1(aux[1]); + Eigen::half h2(aux[2]); + Eigen::half h3(aux[3]); + + Packet4h result; + result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x); + return result; } -#endif #endif } // end namespace internal diff --git a/Eigen/src/Core/arch/Default/CMakeLists.txt b/Eigen/src/Core/arch/Default/CMakeLists.txt deleted file mode 100644 index 339c091d1..000000000 --- a/Eigen/src/Core/arch/Default/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_Default_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_Default_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/Default COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/NEON/CMakeLists.txt b/Eigen/src/Core/arch/NEON/CMakeLists.txt deleted file mode 100644 index fd4d4af50..000000000 --- a/Eigen/src/Core/arch/NEON/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_NEON_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_NEON_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/NEON COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index d2d467936..3e121dce5 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2010 Gael Guennebaud +// Copyright (C) 2010 Konstantinos Margaritis // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -14,8 +15,15 @@ namespace Eigen { namespace internal { -static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); -static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); +inline uint32x4_t p4ui_CONJ_XOR() { + static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; + return vld1q_u32( conj_XOR_DATA ); +} + +inline uint32x2_t p2ui_CONJ_XOR() { + static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 }; + return vld1_u32( conj_XOR_DATA ); +} //---------- float ---------- struct Packet2cf @@ -64,7 +72,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Pa template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { Packet4ui b = vreinterpretq_u32_f32(a.v); - return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR))); + return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR()))); } template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) @@ -80,7 +88,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, con // Multiply the imag a with b v2 = vmulq_f32(v2, b.v); // Conjugate v2 - v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR)); + v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR())); // Swap real/imag elements in v2. v2 = vrev64q_f32(v2); // Add and return the result @@ -195,7 +203,7 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const P // Multiply the imag a with b v2 = vmul_f32(v2, a2); // Conjugate v2 - v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR)); + v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR())); // Swap real/imag elements in v2. v2 = vrev64_f32(v2); // Add v1, v2 @@ -274,7 +282,8 @@ ptranspose(PacketBlock& kernel) { //---------- double ---------- #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG -static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000); +const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 }; +static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA ); struct Packet1cd { diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 3224c36bd..2a8f58d74 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2008-2009 Gael Guennebaud -// Copyright (C) 2010 Konstantinos Margaritis +// Copyright (C) 2010 Konstantinos Margaritis // Heavily based on Gael's SSE version. // // This Source Code Form is subject to the terms of the Mozilla @@ -49,17 +49,6 @@ typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1(X) -#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG - //Special treatment for Apple's llvm-gcc, its NEON packet types are unions - #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}} - #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}} -#else - //Default initializer for packets - #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y} - #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W} -#endif - - // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function // which available on LLVM and GCC (at least) #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC @@ -122,12 +111,14 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { - Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); + const float32_t f[] = {0, 1, 2, 3}; + Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { - Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); + const int32_t i[] = {0, 1, 2, 3}; + Packet4i countdown = vld1q_s32(i); return vaddq_s32(pset1(a), countdown); } @@ -334,22 +325,6 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return vcombine_s32(a_hi, a_lo); } -template -struct protate_impl -{ - static Packet4f run(const Packet4f& a) { - return vextq_f32(a, a, offset); - } -}; - -template -struct protate_impl -{ - static Packet4i run(const Packet4i& a) { - return vextq_s32(a, a, offset); - } -}; - template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); } @@ -601,7 +576,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { r template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { - Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1); + const double countdown_raw[] = {0.0,1.0}; + const Packet2d countdown = vld1q_f64(countdown_raw); return vaddq_f64(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); } @@ -679,14 +655,6 @@ template<> EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { retu template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); } -template -struct protate_impl -{ - static Packet2d run(const Packet2d& a) { - return vextq_f64(a, a, offset); - } -}; - template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); } #if EIGEN_COMP_CLANG && defined(__apple_build_version__) diff --git a/Eigen/src/Core/arch/SSE/CMakeLists.txt b/Eigen/src/Core/arch/SSE/CMakeLists.txt deleted file mode 100644 index 46ea7cc62..000000000 --- a/Eigen/src/Core/arch/SSE/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_SSE_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_SSE_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/SSE COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 28f103eeb..ac2fd8103 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -517,52 +517,10 @@ Packet2d prsqrt(const Packet2d& x) { } // Hyperbolic Tangent function. -// Doesn't do anything fancy, just a 13/6-degree rational interpolant which -// is accurate up to a couple of ulp in the range [-9, 9], outside of which the -// fl(tanh(x)) = +/-1. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f -ptanh(const Packet4f& _x) { - // Clamp the inputs to the range [-9, 9] since anything outside - // this range is +/-1.0f in single-precision. - _EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f); - _EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f); - const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x)); - - // The monomial coefficients of the numerator polynomial (odd). - _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f); - - // The monomial coefficients of the denominator polynomial (even). - _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f); - _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f); - _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f); - _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f); - - // Since the polynomials are odd/even, we need x^2. - const Packet4f x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. - Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11); - p = pmadd(x2, p, p4f_alpha_9); - p = pmadd(x2, p, p4f_alpha_7); - p = pmadd(x2, p, p4f_alpha_5); - p = pmadd(x2, p, p4f_alpha_3); - p = pmadd(x2, p, p4f_alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial p. - Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4); - q = pmadd(x2, q, p4f_beta_2); - q = pmadd(x2, q, p4f_beta_0); - - // Divide the numerator by the denominator. - return pdiv(p, q); +ptanh(const Packet4f& x) { + return internal::generic_fast_tanh_float(x); } } // end namespace internal diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 451034560..baad692e3 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -162,6 +162,11 @@ template<> struct unpacket_traits { typedef float type; enum {size=4, template<> struct unpacket_traits { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; template<> struct unpacket_traits { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +#ifndef EIGEN_VECTORIZE_AVX +template<> struct scalar_div_cost { enum { value = 7 }; }; +template<> struct scalar_div_cost { enum { value = 8 }; }; +#endif + #if EIGEN_COMP_MSVC==1500 // Workaround MSVC 9 internal compiler error. // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode @@ -434,30 +439,6 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return _mm_shuffle_epi32(a,0x1B); } -template -struct protate_impl -{ - static Packet4f run(const Packet4f& a) { - return vec4f_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4); - } -}; - -template -struct protate_impl -{ - static Packet4i run(const Packet4i& a) { - return vec4i_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4); - } -}; - -template -struct protate_impl -{ - static Packet2d run(const Packet2d& a) { - return vec2d_swizzle1(a, offset, (offset + 1) % 2); - } -}; - template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF)); @@ -837,6 +818,16 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons #endif } +// Scalar path for pmadd with FMA to ensure consistency with vectorized path. +#ifdef __FMA__ +template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) { + return ::fmaf(a,b,c); +} +template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) { + return ::fma(a,b,c); +} +#endif + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/ZVector/CMakeLists.txt b/Eigen/src/Core/arch/ZVector/CMakeLists.txt deleted file mode 100644 index 5eb0957eb..000000000 --- a/Eigen/src/Core/arch/ZVector/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_ZVector_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_ZVector_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/ZVector COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h index 9a8735ac1..e9d83eca6 100644 --- a/Eigen/src/Core/arch/ZVector/Complex.h +++ b/Eigen/src/Core/arch/ZVector/Complex.h @@ -57,21 +57,6 @@ template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex< template<> EIGEN_STRONG_INLINE Packet1cd pset1(const std::complex& from) { /* here we really have to use unaligned loads :( */ return ploadu(&from); } -template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather, Packet1cd>(const std::complex* from, Index stride) -{ - std::complex EIGEN_ALIGN16 af[2]; - af[0] = from[0*stride]; - af[1] = from[1*stride]; - return pload(af); -} -template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet1cd>(std::complex* to, const Packet1cd& from, Index stride) -{ - std::complex EIGEN_ALIGN16 af[2]; - pstore >(af, from); - to[0*stride] = af[0]; - to[1*stride] = af[1]; -} - template<> EIGEN_STRONG_INLINE Packet1cd padd(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); } template<> EIGEN_STRONG_INLINE Packet1cd psub(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); } template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h index d55ae6096..9b373c783 100644 --- a/Eigen/src/Core/functors/AssignmentFunctors.h +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -18,20 +18,24 @@ namespace internal { * \brief Template functor for scalar/packet assignment * */ -template struct assign_op { +template struct assign_op { EIGEN_EMPTY_STRUCT_CTOR(assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,b); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,b); } }; -template -struct functor_traits > { + +// Empty overload for void type (used by PermutationMatrix +template struct assign_op {}; + +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost, - PacketAccess = packet_traits::Vectorizable + Cost = NumTraits::ReadCost, + PacketAccess = is_same::value && packet_traits::Vectorizable && packet_traits::Vectorizable }; }; @@ -39,20 +43,20 @@ struct functor_traits > { * \brief Template functor for scalar/packet assignment with addition * */ -template struct add_assign_op { +template struct add_assign_op { EIGEN_EMPTY_STRUCT_CTOR(add_assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,internal::padd(internal::ploadt(a),b)); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,internal::padd(internal::ploadt(a),b)); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost + NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = is_same::value && packet_traits::HasAdd }; }; @@ -60,20 +64,20 @@ struct functor_traits > { * \brief Template functor for scalar/packet assignment with subtraction * */ -template struct sub_assign_op { +template struct sub_assign_op { EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,internal::psub(internal::ploadt(a),b)); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,internal::psub(internal::ploadt(a),b)); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost + NumTraits::AddCost, - PacketAccess = packet_traits::HasSub + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = is_same::value && packet_traits::HasSub }; }; @@ -98,30 +102,28 @@ struct functor_traits > { PacketAccess = is_same::value && packet_traits::HasMul }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; /** \internal * \brief Template functor for scalar/packet assignment with diviving * */ -template struct div_assign_op { +template struct div_assign_op { EIGEN_EMPTY_STRUCT_CTOR(div_assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a /= b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a /= b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,internal::pdiv(internal::ploadt(a),b)); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,internal::pdiv(internal::ploadt(a),b)); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost + NumTraits::MulCost, - PacketAccess = packet_traits::HasDiv + Cost = NumTraits::ReadCost + NumTraits::MulCost, + PacketAccess = is_same::value && packet_traits::HasDiv }; }; - /** \internal * \brief Template functor for scalar/packet assignment with swapping * diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 5cd8ca950..d82ffed02 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -16,27 +16,43 @@ namespace internal { //---------- associative binary functors ---------- +template +struct binary_op_base +{ + typedef Arg1 first_argument_type; + typedef Arg2 second_argument_type; +}; + /** \internal * \brief Template functor to compute the sum of two scalars * * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum() */ -template struct scalar_sum_op { -// typedef Scalar result_type; +template +struct scalar_sum_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } +#else + scalar_sum_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::padd(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, // rough estimate! + PacketAccess = is_same::value && packet_traits::HasAdd && packet_traits::HasAdd + // TODO vectorize mixed sum }; }; @@ -45,7 +61,7 @@ struct functor_traits > { * This is required to solve Bug 426. * \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast() */ -template<> struct scalar_sum_op : scalar_sum_op { +template<> struct scalar_sum_op : scalar_sum_op { EIGEN_DEPRECATED scalar_sum_op() {} }; @@ -56,13 +72,17 @@ template<> struct scalar_sum_op : scalar_sum_op { * * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() */ -template struct scalar_product_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasMul && packet_traits::HasMul - }; - typedef typename scalar_product_traits::ReturnType result_type; +template +struct scalar_product_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) +#else + scalar_product_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const @@ -75,7 +95,8 @@ template struct functor_traits > { enum { Cost = (NumTraits::MulCost + NumTraits::MulCost)/2, // rough estimate! - PacketAccess = scalar_product_op::Vectorizable + PacketAccess = is_same::value && packet_traits::HasMul && packet_traits::HasMul + // TODO vectorize mixed product }; }; @@ -84,13 +105,15 @@ struct functor_traits > { * * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y) */ -template struct scalar_conj_product_op { +template +struct scalar_conj_product_op : binary_op_base +{ enum { Conj = NumTraits::IsComplex }; - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const @@ -113,21 +136,24 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() */ -template struct scalar_min_op { +template +struct scalar_min_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmin(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux_min(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMin + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = internal::is_same::value && packet_traits::HasMin }; }; @@ -136,21 +162,24 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() */ -template struct scalar_max_op { +template +struct scalar_max_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmax(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux_max(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMax + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = internal::is_same::value && packet_traits::HasMax }; }; @@ -158,56 +187,70 @@ struct functor_traits > { * \brief Template functors for comparison of two scalars * \todo Implement packet-comparisons */ -template struct scalar_cmp_op; +template struct scalar_cmp_op; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, PacketAccess = false }; }; -template -struct result_of(Scalar,Scalar)> { +template +struct result_of(LhsScalar,RhsScalar)> { typedef bool type; }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;} }; @@ -216,7 +259,9 @@ template struct scalar_cmp_op { * * \sa MatrixBase::stableNorm(), class Redux */ -template struct scalar_hypot_op { +template +struct scalar_hypot_op : binary_op_base +{ EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) // typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const @@ -237,12 +282,12 @@ template struct scalar_hypot_op { } }; template -struct functor_traits > { +struct functor_traits > { enum { Cost = 3 * NumTraits::AddCost + 2 * NumTraits::MulCost + - 2 * NumTraits::template Div::Cost, + 2 * scalar_div_cost::value, PacketAccess = false }; }; @@ -250,13 +295,24 @@ struct functor_traits > { /** \internal * \brief Template functor to compute the pow of two scalars */ -template struct scalar_binary_pow_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) +template +struct scalar_pow_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN + EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op) +#else + scalar_pow_op() { + typedef Scalar LhsScalar; + typedef Exponent RhsScalar; + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC - inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } + inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; @@ -269,18 +325,27 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::operator- */ -template struct scalar_difference_op { +template +struct scalar_difference_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } +#else + scalar_difference_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::psub(a,b); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasSub + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = is_same::value && packet_traits::HasSub && packet_traits::HasSub }; }; @@ -289,13 +354,17 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, Cwise::operator/() */ -template struct scalar_quotient_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv - }; - typedef typename scalar_product_traits::ReturnType result_type; +template +struct scalar_quotient_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) +#else + scalar_quotient_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const @@ -305,8 +374,8 @@ template struct functor_traits > { typedef typename scalar_quotient_op::result_type result_type; enum { - PacketAccess = scalar_quotient_op::Vectorizable, - Cost = NumTraits::template Div::Cost + PacketAccess = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv, + Cost = scalar_div_cost::value }; }; @@ -360,236 +429,50 @@ template<> struct functor_traits { }; }; -/** \internal - * \brief Template functor to compute the incomplete gamma function igamma(a, x) - * - * \sa class CwiseBinaryOp, Cwise::igamma - */ -template struct scalar_igamma_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { - using numext::igamma; return igamma(a, x); - } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const { - return internal::pigammac(a, x); - } -}; -template -struct functor_traits > { - enum { - // Guesstimate - Cost = 20 * NumTraits::MulCost + 10 * NumTraits::AddCost, - PacketAccess = packet_traits::HasIGamma - }; -}; - - -/** \internal - * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x) - * - * \sa class CwiseBinaryOp, Cwise::igammac - */ -template struct scalar_igammac_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { - using numext::igammac; return igammac(a, x); - } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const - { - return internal::pigammac(a, x); - } -}; -template -struct functor_traits > { - enum { - // Guesstimate - Cost = 20 * NumTraits::MulCost + 10 * NumTraits::AddCost, - PacketAccess = packet_traits::HasIGammac - }; -}; //---------- binary functors bound to a constant, thus appearing as a unary functor ---------- -/** \internal - * \brief Template functor to multiply a scalar by a fixed other one - * - * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ - */ -/* NOTE why doing the pset1() in packetOp *is* an optimization ? - * indeed it seems better to declare m_other as a Packet and do the pset1() once - * in the constructor. However, in practice: - * - GCC does not like m_other as a Packet and generate a load every time it needs it - * - on the other hand GCC is able to moves the pset1() outside the loop :) - * - simpler code ;) - * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) - */ -template -struct scalar_multiple_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; +// The following two classes permits to turn any binary functor into a unary one with one argument bound to a constant value. +// They are analogues to std::binder1st/binder2nd but with the following differences: +// - they are compatible with packetOp +// - they are portable across C++ versions (the std::binder* are deprecated in C++11) +template struct bind1st_op : BinaryOp { -template -struct scalar_multiple2_op { - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; + typedef typename BinaryOp::first_argument_type first_argument_type; + typedef typename BinaryOp::second_argument_type second_argument_type; + typedef typename BinaryOp::result_type result_type; -/** \internal - * \brief Template functor to divide a scalar by a fixed other one - * - * This functor is used to implement the quotient of a matrix by - * a scalar where the scalar type is not necessarily a floating point type. - * - * \sa class CwiseUnaryOp, MatrixBase::operator/ - */ -template -struct scalar_quotient1_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pdiv(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; + bind1st_op(const first_argument_type &val) : m_value(val) {} -template -struct scalar_quotient2_op { - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); } -// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication -// where the mixing of different types is handled by scalar_product_traits -// In particular, real * complex is allowed. -// FIXME move this to functor_traits adding a functor_default -template struct functor_is_product_like { enum { ret = 0 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; - - -/** \internal - * \brief Template functor to add a scalar to a fixed other one - * \sa class CwiseUnaryOp, Array::operator+ - */ -/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */ -template -struct scalar_add_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::padd(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to subtract a fixed scalar to another one - * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_rsub_op - */ -template -struct scalar_sub_op { - EIGEN_DEVICE_FUNC inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_sub_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a - m_other; } - template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::psub(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to subtract a scalar to fixed another one - * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_sub_op - */ -template -struct scalar_rsub_op { - EIGEN_DEVICE_FUNC inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_rsub_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other - a; } - template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::psub(pset1(m_other), a); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to raise a scalar to a power - * \sa class CwiseUnaryOp, Cwise::pow - */ -template -struct scalar_pow_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } - EIGEN_DEVICE_FUNC inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} - EIGEN_DEVICE_FUNC - inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } - const Scalar m_exponent; -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to compute the quotient between a scalar and array entries. - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_mult_op { - EIGEN_DEVICE_FUNC scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; } template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(m_other),a); } - Scalar m_other; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const + { return BinaryOp::packetOp(internal::pset1(m_value), b); } + + first_argument_type m_value; }; -template -struct functor_traits > -{ enum { PacketAccess = packet_traits::HasDiv, Cost = NumTraits::template Div::Cost }; }; +template struct functor_traits > : functor_traits {}; + + +template struct bind2nd_op : BinaryOp { + + typedef typename BinaryOp::first_argument_type first_argument_type; + typedef typename BinaryOp::second_argument_type second_argument_type; + typedef typename BinaryOp::result_type result_type; + + bind2nd_op(const second_argument_type &val) : m_value(val) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return BinaryOp::packetOp(a,internal::pset1(m_value)); } + + second_argument_type m_value; +}; +template struct functor_traits > : functor_traits {}; } // end namespace internal diff --git a/Eigen/src/Core/functors/CMakeLists.txt b/Eigen/src/Core/functors/CMakeLists.txt deleted file mode 100644 index f4b99a9c3..000000000 --- a/Eigen/src/Core/functors/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_Functor_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_Functor_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/functors COMPONENT Devel - ) diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index c5836d048..a2154d3b5 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -18,20 +18,20 @@ template struct scalar_constant_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1(m_other); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() () const { return m_other; } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const { return internal::pset1(m_other); } const Scalar m_other; }; template struct functor_traits > -{ enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; +{ enum { Cost = 0 /* as the constant value should be loaded in register only once for the whole expression */, + PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; template struct scalar_identity_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); } }; template struct functor_traits > @@ -55,15 +55,15 @@ struct linspaced_op_impl m_packetStep(pset1(unpacket_traits::size*m_step)), m_base(padd(pset1(low), pmul(pset1(m_step),plset(-unpacket_traits::size)))) {} - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { m_base = padd(m_base, pset1(m_step)); return m_low+Scalar(i)*m_step; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType) const { return m_base = padd(m_base,m_packetStep); } const Scalar m_low; const Scalar m_step; @@ -81,11 +81,11 @@ struct linspaced_op_impl m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), m_lowPacket(pset1(m_low)), m_stepPacket(pset1(m_step)), m_interPacket(plset(0)) {} - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return m_low+i*m_step; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(Scalar(i)),m_interPacket))); } const Scalar m_low; @@ -99,24 +99,24 @@ template struct linspaced_op_impl { linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : - m_low(low), m_length(high-low), m_divisor(num_steps==1?1:num_steps-1), m_interPacket(plset(0)) + m_low(low), m_length(high-low), m_divisor(convert_index(num_steps==1?1:num_steps-1)), m_interPacket(plset(0)) {} - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Scalar operator() (Index i) const { + const Scalar operator() (IndexType i) const { return m_low + (m_length*Scalar(i))/m_divisor; } - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Packet packetOp(Index i) const { + const Packet packetOp(IndexType i) const { return internal::padd(pset1(m_low), pdiv(pmul(pset1(m_length), padd(pset1(Scalar(i)),m_interPacket)), pset1(m_divisor))); } const Scalar m_low; const Scalar m_length; - const Index m_divisor; + const Scalar m_divisor; const Packet m_interPacket; }; @@ -142,29 +142,11 @@ template struct linspa : impl((num_steps==1 ? high : low),high,num_steps) {} - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return impl(i); } - // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since - // there row==0 and col is used for the actual iteration. - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const - { - eigen_assert(col==0 || row==0); - return impl(col + row); - } - - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } - - // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since - // there row==0 and col is used for the actual iteration. - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const - { - eigen_assert(col==0 || row==0); - return impl.packetOp(col + row); - } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); } // This proxy object handles the actual required temporaries, the different // implementations (random vs. sequential access) as well as the @@ -174,11 +156,11 @@ template struct linspa const linspaced_op_impl::IsInteger?true:RandomAccess),NumTraits::IsInteger> impl; }; -// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta -// to indicate whether a functor allows linear access, just always answering 'yes' except for -// scalar_identity_op. -template struct functor_has_linear_access { enum { ret = 1 }; }; -template struct functor_has_linear_access > { enum { ret = 0 }; }; +// Linear access is automatically determined from the operator() prototypes available for the given functor. +// If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently +// and linear access is not possible. In all other cases, linear access is enabled. +// Users should not have to deal with this struture. +template struct functor_has_linear_access { enum { ret = !has_binary_operator::value }; }; } // end namespace internal diff --git a/Eigen/src/Core/functors/TernaryFunctors.h b/Eigen/src/Core/functors/TernaryFunctors.h new file mode 100644 index 000000000..b254e96c6 --- /dev/null +++ b/Eigen/src/Core/functors/TernaryFunctors.h @@ -0,0 +1,25 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TERNARY_FUNCTORS_H +#define EIGEN_TERNARY_FUNCTORS_H + +namespace Eigen { + +namespace internal { + +//---------- associative ternary functors ---------- + + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TERNARY_FUNCTORS_H diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 5baba1494..2009f8e57 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2016 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -248,7 +248,7 @@ struct functor_traits > { // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other : (14 * NumTraits::AddCost + 6 * NumTraits::MulCost + - NumTraits::template Div::HasDiv>::Cost)) + scalar_div_cost::HasDiv>::value)) #else Cost = (sizeof(Scalar) == 4 @@ -257,7 +257,7 @@ struct functor_traits > { // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other : (23 * NumTraits::AddCost + 12 * NumTraits::MulCost + - NumTraits::template Div::HasDiv>::Cost)) + scalar_div_cost::HasDiv>::value)) #endif }; }; @@ -266,7 +266,7 @@ struct functor_traits > { * * \brief Template functor to compute the logarithm of a scalar * - * \sa class CwiseUnaryOp, Cwise::log() + * \sa class CwiseUnaryOp, ArrayBase::log() */ template struct scalar_log_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) @@ -293,6 +293,26 @@ struct functor_traits > { }; }; +/** \internal + * + * \brief Template functor to compute the logarithm of 1 plus a scalar value + * + * \sa class CwiseUnaryOp, ArrayBase::log1p() + */ +template struct scalar_log1p_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_log1p_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log1p(a); } + template + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog1p(a); } +}; +template +struct functor_traits > { + enum { + PacketAccess = packet_traits::HasLog1p, + Cost = functor_traits >::Cost // TODO measure cost of log1p + }; +}; + /** \internal * * \brief Template functor to compute the base-10 logarithm of a scalar @@ -452,142 +472,6 @@ struct functor_traits > }; -/** \internal - * \brief Template functor to compute the natural log of the absolute - * value of Gamma of a scalar - * \sa class CwiseUnaryOp, Cwise::lgamma() - */ -template struct scalar_lgamma_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { - using numext::lgamma; return lgamma(a); - } - typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); } -}; -template -struct functor_traits > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, - PacketAccess = packet_traits::HasLGamma - }; -}; - -/** \internal - * \brief Template functor to compute psi, the derivative of lgamma of a scalar. - * \sa class CwiseUnaryOp, Cwise::digamma() - */ -template struct scalar_digamma_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { - using numext::digamma; return digamma(a); - } - typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); } -}; -template -struct functor_traits > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, - PacketAccess = packet_traits::HasDiGamma - }; -}; - -/** \internal - * \brief Template functor to compute the Riemann Zeta function of two arguments. - * \sa class CwiseUnaryOp, Cwise::zeta() - */ -template struct scalar_zeta_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const { - using numext::zeta; return zeta(x, q); - } - typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); } -}; -template -struct functor_traits > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, - PacketAccess = packet_traits::HasZeta - }; -}; - -/** \internal - * \brief Template functor to compute the polygamma function. - * \sa class CwiseUnaryOp, Cwise::polygamma() - */ -template struct scalar_polygamma_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const { - using numext::polygamma; return polygamma(n, x); - } - typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); } -}; -template -struct functor_traits > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, - PacketAccess = packet_traits::HasPolygamma - }; -}; - -/** \internal - * \brief Template functor to compute the Gauss error function of a - * scalar - * \sa class CwiseUnaryOp, Cwise::erf() - */ -template struct scalar_erf_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { - using numext::erf; return erf(a); - } - typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perf(a); } -}; -template -struct functor_traits > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, - PacketAccess = packet_traits::HasErf - }; -}; - -/** \internal - * \brief Template functor to compute the Complementary Error Function - * of a scalar - * \sa class CwiseUnaryOp, Cwise::erfc() - */ -template struct scalar_erfc_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { - using numext::erfc; return erfc(a); - } - typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perfc(a); } -}; -template -struct functor_traits > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, - PacketAccess = packet_traits::HasErfc - }; -}; - - /** \internal * \brief Template functor to compute the atan of a scalar * \sa class CwiseUnaryOp, ArrayBase::atan() @@ -607,39 +491,40 @@ struct functor_traits > }; }; - /** \internal * \brief Template functor to compute the tanh of a scalar * \sa class CwiseUnaryOp, ArrayBase::tanh() */ -template struct scalar_tanh_op { +template +struct scalar_tanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tanh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); } template - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); } }; -template -struct functor_traits > -{ + +template +struct functor_traits > { enum { PacketAccess = packet_traits::HasTanh, - Cost = - (PacketAccess - // The following numbers are based on the AVX implementation, + Cost = ( (EIGEN_FAST_MATH && is_same::value) +// The following numbers are based on the AVX implementation, #ifdef EIGEN_VECTORIZE_FMA - // Haswell can issue 2 add/mul/madd per cycle. - // 9 pmadd, 2 pmul, 1 div, 2 other - ? (2 * NumTraits::AddCost + 6 * NumTraits::MulCost + - NumTraits::template Div::HasDiv>::Cost) + // Haswell can issue 2 add/mul/madd per cycle. + // 9 pmadd, 2 pmul, 1 div, 2 other + ? (2 * NumTraits::AddCost + + 6 * NumTraits::MulCost + + scalar_div_cost::HasDiv>::value) #else - ? (11 * NumTraits::AddCost + - 11 * NumTraits::MulCost + - NumTraits::template Div::HasDiv>::Cost) + ? (11 * NumTraits::AddCost + + 11 * NumTraits::MulCost + + scalar_div_cost::HasDiv>::value) #endif - // This number assumes a naive implementation of tanh - : (6 * NumTraits::AddCost + 3 * NumTraits::MulCost + - 2 * NumTraits::template Div::HasDiv>::Cost + - functor_traits >::Cost)) + // This number assumes a naive implementation of tanh + : (6 * NumTraits::AddCost + + 3 * NumTraits::MulCost + + 2 * scalar_div_cost::HasDiv>::value + + functor_traits >::Cost)) }; }; @@ -880,9 +765,9 @@ struct scalar_sign_op { { typedef typename NumTraits::Real real_type; real_type aa = numext::abs(a); - if (aa==0) + if (aa==real_type(0)) return Scalar(0); - aa = 1./aa; + aa = real_type(1)/aa; return Scalar(real(a)*aa, imag(a)*aa ); } //TODO diff --git a/Eigen/src/Core/products/CMakeLists.txt b/Eigen/src/Core/products/CMakeLists.txt deleted file mode 100644 index 21fc94ae3..000000000 --- a/Eigen/src/Core/products/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_Product_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_Product_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/products COMPONENT Devel - ) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index a96c7bfd4..10d132957 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -299,16 +299,6 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads if (!useSpecificBlockingSizes(k, m, n)) { evaluateProductBlockingSizesHeuristic(k, m, n, num_threads); } - - typedef gebp_traits Traits; - enum { - kr = 8, - mr = Traits::mr, - nr = Traits::nr - }; - if (k > kr) k -= k % kr; - if (m > mr) m -= m % mr; - if (n > nr) n -= n % nr; } template @@ -363,7 +353,7 @@ class gebp_traits public: typedef _LhsScalar LhsScalar; typedef _RhsScalar RhsScalar; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, @@ -444,15 +434,16 @@ public: template EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, AccPacketType& tmp) const { + conj_helper cj; // It would be a lot cleaner to call pmadd all the time. Unfortunately if we // let gcc allocate the register in which to store the result of the pmul // (in the case where there is no FMA) gcc fails to figure out how to avoid // spilling register. #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD EIGEN_UNUSED_VARIABLE(tmp); - c = pmadd(a,b,c); + c = cj.pmadd(a,b,c); #else - tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp); + tmp = b; tmp = cj.pmul(a,tmp); c = padd(c,tmp); #endif } @@ -467,9 +458,6 @@ public: r = pmadd(c,alpha,r); } -protected: -// conj_helper cj; -// conj_helper pcj; }; template @@ -478,7 +466,7 @@ class gebp_traits, RealScalar, _ConjLhs, false> public: typedef std::complex LhsScalar; typedef RealScalar RhsScalar; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, @@ -860,80 +848,6 @@ protected: conj_helper cj; }; -// helper for the rotating kernel below -template -struct PossiblyRotatingKernelHelper -{ - // default implementation, not rotating - - typedef typename GebpKernel::Traits Traits; - typedef typename Traits::RhsScalar RhsScalar; - typedef typename Traits::RhsPacket RhsPacket; - typedef typename Traits::AccPacket AccPacket; - - const Traits& traits; - PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {} - - - template - void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const - { - traits.loadRhs(from + (Index+4*K)*Traits::RhsProgress, to); - } - - void unrotateResult(AccPacket&, - AccPacket&, - AccPacket&, - AccPacket&) - { - } -}; - -// rotating implementation -template -struct PossiblyRotatingKernelHelper -{ - typedef typename GebpKernel::Traits Traits; - typedef typename Traits::RhsScalar RhsScalar; - typedef typename Traits::RhsPacket RhsPacket; - typedef typename Traits::AccPacket AccPacket; - - const Traits& traits; - PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {} - - template - void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const - { - if (Index == 0) { - to = pload(from + 4*K*Traits::RhsProgress); - } else { - EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers"); - to = protate<1>(to); - } - } - - void unrotateResult(AccPacket& res0, - AccPacket& res1, - AccPacket& res2, - AccPacket& res3) - { - PacketBlock resblock; - resblock.packet[0] = res0; - resblock.packet[1] = res1; - resblock.packet[2] = res2; - resblock.packet[3] = res3; - ptranspose(resblock); - resblock.packet[3] = protate<1>(resblock.packet[3]); - resblock.packet[2] = protate<2>(resblock.packet[2]); - resblock.packet[1] = protate<3>(resblock.packet[1]); - ptranspose(resblock); - res0 = resblock.packet[0]; - res1 = resblock.packet[1]; - res2 = resblock.packet[2]; - res3 = resblock.packet[3]; - } -}; - /* optimized GEneral packed Block * packed Panel product kernel * * Mixing type logic: C += A * B @@ -967,16 +881,6 @@ struct gebp_kernel ResPacketSize = Traits::ResPacketSize }; - - static const bool UseRotatingKernel = - EIGEN_ARCH_ARM && - internal::is_same::value && - internal::is_same::value && - internal::is_same::value && - Traits::LhsPacketSize == 4 && - Traits::RhsPacketSize == 4 && - Traits::ResPacketSize == 4; - EIGEN_DONT_INLINE void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha, @@ -1009,9 +913,7 @@ void gebp_kernel=3*Traits::LhsProgress) - { - PossiblyRotatingKernelHelper possiblyRotatingKernelHelper(traits); - + { // Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth) // and on each largest micro vertical panel of the rhs (depth * nr). // Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1. @@ -1074,19 +976,19 @@ void gebp_kernel(B_0, blB); \ + traits.loadRhs(blB + (0+4*K)*Traits::RhsProgress, B_0); \ traits.madd(A0, B_0, C0, T0); \ traits.madd(A1, B_0, C4, T0); \ traits.madd(A2, B_0, C8, B_0); \ - possiblyRotatingKernelHelper.template loadOrRotateRhs(B_0, blB); \ + traits.loadRhs(blB + (1+4*K)*Traits::RhsProgress, B_0); \ traits.madd(A0, B_0, C1, T0); \ traits.madd(A1, B_0, C5, T0); \ traits.madd(A2, B_0, C9, B_0); \ - possiblyRotatingKernelHelper.template loadOrRotateRhs(B_0, blB); \ + traits.loadRhs(blB + (2+4*K)*Traits::RhsProgress, B_0); \ traits.madd(A0, B_0, C2, T0); \ traits.madd(A1, B_0, C6, T0); \ traits.madd(A2, B_0, C10, B_0); \ - possiblyRotatingKernelHelper.template loadOrRotateRhs(B_0, blB); \ + traits.loadRhs(blB + (3+4*K)*Traits::RhsProgress, B_0); \ traits.madd(A0, B_0, C3 , T0); \ traits.madd(A1, B_0, C7, T0); \ traits.madd(A2, B_0, C11, B_0); \ @@ -1120,10 +1022,6 @@ void gebp_kernel(alpha); @@ -1625,9 +1523,13 @@ void gebp_kernel::half SResPacketHalf; + if ((SwappedTraits::LhsProgress % 4) == 0 && + (SwappedTraits::LhsProgress <= 8) && + (SwappedTraits::LhsProgress!=8 || unpacket_traits::size==nr)) { SAccPacket C0, C1, C2, C3; straits.initAcc(C0); diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index a39c7808c..b1465c3b5 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -25,7 +25,7 @@ struct general_matrix_matrix_product Traits; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, const LhsScalar* lhs, Index lhsStride, @@ -55,7 +55,7 @@ struct general_matrix_matrix_product Traits; -typedef typename scalar_product_traits::ReturnType ResScalar; +typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static void run(Index rows, Index cols, Index depth, const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, @@ -309,8 +309,8 @@ class gemm_blocking_spacem_blockA = m_staticA; this->m_blockB = m_staticB; #else - this->m_blockA = reinterpret_cast((std::size_t(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)); - this->m_blockB = reinterpret_cast((std::size_t(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)); + this->m_blockA = reinterpret_cast((internal::UIntPtr(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)); + this->m_blockB = reinterpret_cast((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)); #endif } diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 80ba89465..29d6dc721 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -40,7 +40,7 @@ template struct general_matrix_matrix_triangular_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha, level3_blocking& blocking) @@ -57,7 +57,7 @@ template struct general_matrix_matrix_triangular_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha, level3_blocking& blocking) diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index 8b7dca45f..3c1a7fc40 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -58,7 +58,7 @@ namespace internal { template struct general_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable @@ -140,7 +140,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product struct general_matrix_vector_product { -typedef typename scalar_product_traits::ReturnType ResScalar; +typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable @@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \ a_tmp = lhs.conjugate(); \ a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ + lda = convert_index(a_tmp.outerStride()); \ } else a = _lhs; \ if (LhsStorageOrder==RowMajor) uplo='U'; \ \ @@ -256,7 +256,7 @@ struct product_selfadjoint_matrix(b_tmp.outerStride()); \ } \ \ BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index d8d30267e..d97f8caa7 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -179,7 +179,7 @@ struct selfadjoint_product_impl { typedef typename Dest::Scalar ResScalar; typedef typename Rhs::Scalar RhsScalar; - typedef Map, Aligned> MappedDest; + typedef Map, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits::size)> MappedDest; eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols()); diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index f79840aa7..4b292e74d 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -20,7 +20,7 @@ struct triangular_matrix_vector_product; template struct triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = ((Mode&Lower)==Lower), HasUnitDiag = (Mode & UnitDiag)==UnitDiag, @@ -91,7 +91,7 @@ EIGEN_DONT_INLINE void triangular_matrix_vector_product struct triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = ((Mode&Lower)==Lower), HasUnitDiag = (Mode & UnitDiag)==UnitDiag, @@ -216,7 +216,7 @@ template struct trmv_selector typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef Map, Aligned> MappedDest; + typedef Map, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits::size)> MappedDest; typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(lhs); typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(rhs); diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 498db3a70..6e6ee119b 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -44,16 +44,29 @@ template struct conj_if; template<> struct conj_if { template - inline T operator()(const T& x) { return numext::conj(x); } + inline T operator()(const T& x) const { return numext::conj(x); } template - inline T pconj(const T& x) { return internal::pconj(x); } + inline T pconj(const T& x) const { return internal::pconj(x); } }; template<> struct conj_if { template - inline const T& operator()(const T& x) { return x; } + inline const T& operator()(const T& x) const { return x; } template - inline const T& pconj(const T& x) { return x; } + inline const T& pconj(const T& x) const { return x; } +}; + +// Generic implementation for custom complex types. +template +struct conj_helper +{ + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; + + EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const + { return conj_if()(x) * conj_if()(y); } }; template struct conj_helper @@ -111,7 +124,7 @@ template struct conj_helper struct get_factor { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return x; } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); } }; template struct get_factor::Real> { @@ -135,7 +148,7 @@ class BlasVectorMapper { template EIGEN_DEVICE_FUNC bool aligned(Index i) const { - return (size_t(m_data+i)%sizeof(Packet))==0; + return (UIntPtr(m_data+i)%sizeof(Packet))==0; } protected: @@ -227,7 +240,7 @@ class blas_data_mapper { EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC Index firstAligned(Index size) const { - if (size_t(m_data)%sizeof(Scalar)) { + if (UIntPtr(m_data)%sizeof(Scalar)) { return -1; } return internal::first_default_aligned(m_data, size); @@ -293,17 +306,33 @@ struct blas_traits, NestedXpr> > }; // pop scalar multiple -template -struct blas_traits, NestedXpr> > +template +struct blas_traits, const CwiseNullaryOp,Plain>, NestedXpr> > : blas_traits { typedef blas_traits Base; - typedef CwiseUnaryOp, NestedXpr> XprType; + typedef CwiseBinaryOp, const CwiseNullaryOp,Plain>, NestedXpr> XprType; typedef typename Base::ExtractType ExtractType; - static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } + static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); } static inline Scalar extractScalarFactor(const XprType& x) - { return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); } + { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); } }; +template +struct blas_traits, NestedXpr, const CwiseNullaryOp,Plain> > > + : blas_traits +{ + typedef blas_traits Base; + typedef CwiseBinaryOp, NestedXpr, const CwiseNullaryOp,Plain> > XprType; + typedef typename Base::ExtractType ExtractType; + static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); } + static inline Scalar extractScalarFactor(const XprType& x) + { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; } +}; +template +struct blas_traits, const CwiseNullaryOp,Plain1>, + const CwiseNullaryOp,Plain2> > > + : blas_traits,Plain1> > +{}; // pop opposite template diff --git a/Eigen/src/Core/util/CMakeLists.txt b/Eigen/src/Core/util/CMakeLists.txt deleted file mode 100644 index a1e2e521f..000000000 --- a/Eigen/src/Core/util/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_util_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_util_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/util COMPONENT Devel - ) diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 5f71ba3df..7587d6842 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -199,7 +199,7 @@ const unsigned int HereditaryBits = RowMajorBit /** \ingroup enums * Enum containing possible values for the \c Mode or \c UpLo parameter of * MatrixBase::selfadjointView() and MatrixBase::triangularView(), and selfadjoint solvers. */ -enum { +enum UpLoType { /** View matrix as a lower triangular matrix. */ Lower=0x1, /** View matrix as an upper triangular matrix. */ @@ -224,7 +224,7 @@ enum { /** \ingroup enums * Enum for indicating whether a buffer is aligned or not. */ -enum { +enum AlignmentType { Unaligned=0, /**< Data pointer has no specific alignment. */ Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */ Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */ @@ -273,7 +273,7 @@ enum DirectionType { /** \internal \ingroup enums * Enum to specify how to traverse the entries of a matrix. */ -enum { +enum TraversalType { /** \internal Default traversal, no vectorization, no index-based access */ DefaultTraversal, /** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */ @@ -295,7 +295,7 @@ enum { /** \internal \ingroup enums * Enum to specify whether to unroll loops when traversing over the entries of a matrix. */ -enum { +enum UnrollingType { /** \internal Do not unroll loops. */ NoUnrolling, /** \internal Unroll only the inner loop, but not the outer loop. */ @@ -307,7 +307,7 @@ enum { /** \internal \ingroup enums * Enum to specify whether to use the default (built-in) implementation or the specialization. */ -enum { +enum SpecializedType { Specialized, BuiltIn }; @@ -315,7 +315,7 @@ enum { /** \ingroup enums * Enum containing possible values for the \p _Options template parameter of * Matrix, Array and BandMatrix. */ -enum { +enum StorageOptions { /** Storage order is column major (see \ref TopicStorageOrders). */ ColMajor = 0, /** Storage order is row major (see \ref TopicStorageOrders). */ @@ -328,7 +328,7 @@ enum { /** \ingroup enums * Enum for specifying whether to apply or solve on the left or right. */ -enum { +enum SideType { /** Apply transformation on the left. */ OnTheLeft = 1, /** Apply transformation on the right. */ @@ -353,7 +353,7 @@ enum Default_t { Default }; /** \internal \ingroup enums * Used in AmbiVector. */ -enum { +enum AmbiVectorMode { IsDense = 0, IsSparse }; @@ -479,8 +479,9 @@ namespace Architecture } /** \internal \ingroup enums - * Enum used as template parameter in Product and product evalautors. */ -enum { DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct }; + * Enum used as template parameter in Product and product evaluators. */ +enum ProductImplType +{ DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct }; /** \internal \ingroup enums * Enum used in experimental parallel implementation. */ @@ -492,7 +493,7 @@ struct Dense {}; /** The type used to identify a general sparse storage. */ struct Sparse {}; -/** The type used to identify a general solver (foctored) storage. */ +/** The type used to identify a general solver (factored) storage. */ struct SolverStorage {}; /** The type used to identify a permutation storage. */ diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h index cb27acff7..7559e129c 100755 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -14,12 +14,13 @@ // 4512 - assignment operator could not be generated // 4522 - 'class' : multiple assignment operators specified // 4700 - uninitialized local variable 'xyz' used + // 4714 - function marked as __forceinline not inlined // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow // 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning) #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800) + #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) @@ -41,6 +42,14 @@ #pragma clang diagnostic push #endif #pragma clang diagnostic ignored "-Wconstant-logical-operand" + +#elif defined __GNUC__ && __GNUC__>=6 + + #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS + #pragma GCC diagnostic push + #endif + #pragma GCC diagnostic ignored "-Wignored-attributes" + #endif #if defined __NVCC__ @@ -48,11 +57,19 @@ #pragma diag_suppress code_is_unreachable // Disable the "dynamic initialization in unreachable code" message #pragma diag_suppress initialization_not_reachable - // Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are 4 of them) + // Disable the "invalid error number" message that we get with older versions of nvcc + #pragma diag_suppress 1222 + // Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are many of them and they seem to change with every version of the compiler) + #pragma diag_suppress 2527 + #pragma diag_suppress 2529 #pragma diag_suppress 2651 #pragma diag_suppress 2653 #pragma diag_suppress 2668 + #pragma diag_suppress 2669 #pragma diag_suppress 2670 + #pragma diag_suppress 2671 + #pragma diag_suppress 2735 + #pragma diag_suppress 2737 #endif #endif // not EIGEN_WARNINGS_DISABLED diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index a102e5457..ea107393a 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -91,6 +91,7 @@ template class CwiseNullaryOp; template class CwiseUnaryOp; template class CwiseUnaryView; template class CwiseBinaryOp; +template class CwiseTernaryOp; template class Solve; template class Inverse; @@ -174,9 +175,11 @@ namespace internal { // with optional conjugation of the arguments. template struct conj_helper; -template struct scalar_sum_op; -template struct scalar_difference_op; -template struct scalar_conj_product_op; +template struct scalar_sum_op; +template struct scalar_difference_op; +template struct scalar_conj_product_op; +template struct scalar_min_op; +template struct scalar_max_op; template struct scalar_opposite_op; template struct scalar_conjugate_op; template struct scalar_real_op; @@ -192,27 +195,28 @@ template struct scalar_sin_op; template struct scalar_acos_op; template struct scalar_asin_op; template struct scalar_tan_op; -template struct scalar_pow_op; template struct scalar_inverse_op; template struct scalar_square_op; template struct scalar_cube_op; template struct scalar_cast_op; -template struct scalar_multiple_op; -template struct scalar_quotient1_op; -template struct scalar_min_op; -template struct scalar_max_op; template struct scalar_random_op; -template struct scalar_add_op; template struct scalar_constant_op; template struct scalar_identity_op; template struct scalar_sign_op; +template struct scalar_pow_op; +template struct scalar_hypot_op; +template struct scalar_product_op; +template struct scalar_quotient_op; + +// SpecialFunctions module +template struct scalar_lgamma_op; +template struct scalar_digamma_op; +template struct scalar_erf_op; +template struct scalar_erfc_op; template struct scalar_igamma_op; template struct scalar_igammac_op; - -template struct scalar_product_op; -template struct scalar_multiple2_op; -template struct scalar_quotient_op; -template struct scalar_quotient2_op; +template struct scalar_zeta_op; +template struct scalar_betainc_op; } // end namespace internal diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h old mode 100644 new mode 100755 index 8c9239b1d..26b59669e --- a/Eigen/src/Core/util/MKL_support.h +++ b/Eigen/src/Core/util/MKL_support.h @@ -49,7 +49,7 @@ #define EIGEN_USE_LAPACKE #endif -#if defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML) +#if defined(EIGEN_USE_MKL_VML) #define EIGEN_USE_MKL #endif @@ -72,7 +72,7 @@ #endif #if defined EIGEN_USE_MKL -#include + #define EIGEN_MKL_VML_THRESHOLD 128 /* MKL_DOMAIN_BLAS, etc are defined only in 10.3 update 7 */ diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index acb936ebe..9069d8e6b 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 2 -#define EIGEN_MINOR_VERSION 92 +#define EIGEN_MINOR_VERSION 94 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -28,9 +28,9 @@ #define EIGEN_COMP_GNUC 0 #endif -/// \internal EIGEN_COMP_CLANG set to 1 if the compiler is clang (alias for __clang__) +/// \internal EIGEN_COMP_CLANG set to major+minor version (e.g., 307 for clang 3.7) if the compiler is clang #if defined(__clang__) - #define EIGEN_COMP_CLANG 1 + #define EIGEN_COMP_CLANG (__clang_major__*100+__clang_minor__) #else #define EIGEN_COMP_CLANG 0 #endif @@ -71,6 +71,15 @@ #define EIGEN_COMP_MSVC 0 #endif +// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC: +// name ver MSC_VER +// 2008 9 1500 +// 2010 10 1600 +// 2012 11 1700 +// 2013 12 1800 +// 2015 14 1900 +// "15" 15 1900 + /// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC #if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC) #define EIGEN_COMP_MSVC_STRICT _MSC_VER @@ -340,50 +349,82 @@ # define __has_feature(x) 0 #endif +// Upperbound on the C++ version to use. +// Expected values are 03, 11, 14, 17, etc. +// By default, let's use an arbitrarily large C++ version. +#ifndef EIGEN_MAX_CPP_VER +#define EIGEN_MAX_CPP_VER 99 +#endif + // Do we support r-value references? -#if (__has_feature(cxx_rvalue_references) || \ +#ifndef EIGEN_HAS_RVALUE_REFERENCES +#if EIGEN_MAX_CPP_VER>=11 && \ + (__has_feature(cxx_rvalue_references) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ (EIGEN_COMP_MSVC >= 1600)) - #define EIGEN_HAVE_RVALUE_REFERENCES + #define EIGEN_HAS_RVALUE_REFERENCES 1 +#else + #define EIGEN_HAS_RVALUE_REFERENCES 0 +#endif #endif // Does the compiler support C99? -#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ +#ifndef EIGEN_HAS_C99_MATH +#if EIGEN_MAX_CPP_VER>=11 && \ + ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ - || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) -#define EIGEN_HAS_C99_MATH 1 + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))) + #define EIGEN_HAS_C99_MATH 1 +#else + #define EIGEN_HAS_C99_MATH 0 +#endif #endif // Does the compiler support result_of? -#if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)) +#ifndef EIGEN_HAS_STD_RESULT_OF +#if EIGEN_MAX_CPP_VER>=11 && ((__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L))) #define EIGEN_HAS_STD_RESULT_OF 1 +#else +#define EIGEN_HAS_STD_RESULT_OF 0 +#endif #endif // Does the compiler support variadic templates? -#if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900 -// Disable the use of variadic templates when compiling with nvcc on ARM devices: -// this prevents nvcc from crashing when compiling Eigen on Tegra X1 -#if !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 +#ifndef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ + && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 ) + // ^^ Disable the use of variadic templates when compiling with nvcc on ARM devices: + // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 +#else +#define EIGEN_HAS_VARIADIC_TEMPLATES 0 #endif #endif -// Does the compiler support const expressions? +// Does the compiler fully support const expressions? (as in c++14) +#ifndef EIGEN_HAS_CONSTEXPR + #ifdef __CUDACC__ // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above -#if __cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500) +#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) #define EIGEN_HAS_CONSTEXPR 1 #endif -#elif __has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ - (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) +#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ + (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L))) #define EIGEN_HAS_CONSTEXPR 1 #endif +#ifndef EIGEN_HAS_CONSTEXPR +#define EIGEN_HAS_CONSTEXPR 0 +#endif + +#endif + // Does the compiler support C++11 math? // Let's be conservative and enable the default C++11 implementation only if we are sure it exists #ifndef EIGEN_HAS_CXX11_MATH - #if (__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ - && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC) + #if EIGEN_MAX_CPP_VER>=11 && ((__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ + && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC)) #define EIGEN_HAS_CXX11_MATH 1 #else #define EIGEN_HAS_CXX11_MATH 0 @@ -392,9 +433,10 @@ // Does the compiler support proper C++11 containers? #ifndef EIGEN_HAS_CXX11_CONTAINERS - #if (__cplusplus > 201103L) \ + #if EIGEN_MAX_CPP_VER>=11 && \ + ((__cplusplus > 201103L) \ || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ - || EIGEN_COMP_MSVC >= 1900 + || EIGEN_COMP_MSVC >= 1900) #define EIGEN_HAS_CXX11_CONTAINERS 1 #else #define EIGEN_HAS_CXX11_CONTAINERS 0 @@ -403,9 +445,11 @@ // Does the compiler support C++11 noexcept? #ifndef EIGEN_HAS_CXX11_NOEXCEPT - #if (__cplusplus > 201103L) \ + #if EIGEN_MAX_CPP_VER>=11 && \ + (__has_feature(cxx_noexcept) \ + || (__cplusplus > 201103L) \ || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ - || EIGEN_COMP_MSVC >= 1900 + || EIGEN_COMP_MSVC >= 1900) #define EIGEN_HAS_CXX11_NOEXCEPT 1 #else #define EIGEN_HAS_CXX11_NOEXCEPT 0 @@ -427,6 +471,8 @@ #define EIGEN_CAT2(a,b) a ## b #define EIGEN_CAT(a,b) EIGEN_CAT2(a,b) +#define EIGEN_COMMA , + // convert a token to a string #define EIGEN_MAKESTRING2(a) #a #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a) @@ -725,6 +771,11 @@ namespace Eigen { #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES #endif + +#ifndef EIGEN_UNALIGNED_VECTORIZE +#define EIGEN_UNALIGNED_VECTORIZE 1 +#endif + //---------------------------------------------------------------------- @@ -839,18 +890,10 @@ namespace Eigen { #define EIGEN_IMPLIES(a,b) (!(a) || (b)) -#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \ - template \ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ - (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ - { \ - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ - } - -// the expression type of a cwise product -#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \ +// the expression type of a standard coefficient wise binary operation +#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \ CwiseBinaryOp< \ - internal::scalar_product_op< \ + EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \ typename internal::traits::Scalar, \ typename internal::traits::Scalar \ >, \ @@ -858,6 +901,55 @@ namespace Eigen { const RHS \ > +#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \ + (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ + { \ + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \ + } + +#define EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,TYPEA,TYPEB) \ + (Eigen::internal::has_ReturnType > >::value) + +#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \ + CwiseBinaryOp::Scalar,SCALAR>, const EXPR, \ + const typename internal::plain_constant_type::type> + +#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \ + CwiseBinaryOp::Scalar>, \ + const typename internal::plain_constant_type::type, const EXPR> + +// Workaround for MSVC 2010 (see ML thread "patch with compile for for MSVC 2010") +#if EIGEN_COMP_MSVC_STRICT<=1600 +#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if::type +#else +#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X +#endif + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \ + template EIGEN_DEVICE_FUNC inline \ + EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg::type,OPNAME))\ + (METHOD)(const T& scalar) const { \ + typedef typename internal::promote_scalar_arg::type PromotedT; \ + return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \ + typename internal::plain_constant_type::type(derived().rows(), derived().cols(), internal::scalar_constant_op(scalar))); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + template EIGEN_DEVICE_FUNC inline friend \ + EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg::type,Derived,OPNAME)) \ + (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ + typedef typename internal::promote_scalar_arg::type PromotedT; \ + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \ + typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op(scalar)), matrix.derived()); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) + + #ifdef EIGEN_EXCEPTIONS # define EIGEN_THROW_X(X) throw X # define EIGEN_THROW throw @@ -865,8 +957,8 @@ namespace Eigen { # define EIGEN_CATCH(X) catch (X) #else # ifdef __CUDA_ARCH__ -# define EIGEN_THROW_X(X) asm("trap;") return {} -# define EIGEN_THROW asm("trap;"); return {} +# define EIGEN_THROW_X(X) asm("trap;") +# define EIGEN_THROW asm("trap;") # else # define EIGEN_THROW_X(X) std::abort() # define EIGEN_THROW std::abort() @@ -875,10 +967,16 @@ namespace Eigen { # define EIGEN_CATCH(X) else #endif + #if EIGEN_HAS_CXX11_NOEXCEPT +# define EIGEN_INCLUDE_TYPE_TRAITS +# define EIGEN_NOEXCEPT noexcept +# define EIGEN_NOEXCEPT_IF(x) noexcept(x) # define EIGEN_NO_THROW noexcept(true) # define EIGEN_EXCEPTION_SPEC(X) noexcept(false) #else +# define EIGEN_NOEXCEPT +# define EIGEN_NOEXCEPT_IF(x) # define EIGEN_NO_THROW throw() # define EIGEN_EXCEPTION_SPEC(X) throw(X) #endif diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 5f8bf15b2..0439655ca 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -275,6 +275,7 @@ template EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T * destruct_elements_of_array(ptr, i); EIGEN_THROW; } + return NULL; } /***************************************************************************** @@ -305,6 +306,7 @@ template EIGEN_DEVICE_FUNC inline T* aligned_new(size_t size) aligned_free(result); EIGEN_THROW; } + return result; } template EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(size_t size) @@ -320,6 +322,7 @@ template EIGEN_DEVICE_FUNC inline T* conditional_aligned conditional_aligned_free(result); EIGEN_THROW; } + return result; } /** \internal Deletes objects constructed with aligned_new @@ -445,7 +448,7 @@ EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) // so that all elements of the array have the same alignment. return 0; } - else if( (std::size_t(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0) + else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0) { // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size. // Consequently, no element of the array is well aligned. @@ -453,7 +456,7 @@ EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) } else { - Index first = (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask; + Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask; return (first < size) ? first : size; } } @@ -487,7 +490,7 @@ template EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* template struct smart_copy_helper { EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) { - std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start); + IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); memcpy(target, start, size); @@ -510,7 +513,7 @@ template void smart_memmove(const T* start, const T* end, T* target) template struct smart_memmove_helper { static inline void run(const T* start, const T* end, T* target) { - std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start); + IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); std::memmove(target, start, size); @@ -623,7 +626,7 @@ template void swap(scoped_array &a,scoped_array &b) #if EIGEN_DEFAULT_ALIGN_BYTES>0 // We always manually re-align the result of EIGEN_ALLOCA. // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. - #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast((reinterpret_cast(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) #else #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE) #endif diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h old mode 100644 new mode 100755 index 24e8a6d8a..d4460bb77 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -16,8 +16,22 @@ #include #endif +#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L +#include +#endif + namespace Eigen { +typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex; + +/** + * \brief The Index type as used for the API. + * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. + * \sa \blank \ref TopicPreprocessorDirectives, StorageIndex. + */ + +typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index; + namespace internal { /** \internal @@ -27,6 +41,16 @@ namespace internal { * we however don't want to add a dependency to Boost. */ +// Only recent versions of ICC complain about using ptrdiff_t to hold pointers, +// and older versions do not provide *intptr_t types. +#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L +typedef std::intptr_t IntPtr; +typedef std::uintptr_t UIntPtr; +#else +typedef std::ptrdiff_t IntPtr; +typedef std::size_t UIntPtr; +#endif + struct true_type { enum { value = 1 }; }; struct false_type { enum { value = 0 }; }; @@ -115,7 +139,14 @@ private: public: static From ms_from; +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif enum { value = sizeof(test(ms_from, 0))==sizeof(yes) }; +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif }; template @@ -128,7 +159,7 @@ struct is_convertible /** \internal Allows to enable/disable an overload * according to a compile time condition. */ -template struct enable_if; +template struct enable_if; template struct enable_if { typedef T type; }; @@ -254,7 +285,7 @@ protected: * upcoming next STL generation (using a templated result member). * If none of these members is provided, then the type of the first argument is returned. FIXME, that behavior is a pretty bad hack. */ -#ifdef EIGEN_HAS_STD_RESULT_OF +#if EIGEN_HAS_STD_RESULT_OF template struct result_of { typedef typename std::result_of::type type1; typedef typename remove_all::type type; @@ -311,8 +342,74 @@ struct result_of { enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; typedef typename binary_result_of_select::type type; }; + +template +struct ternary_result_of_select {typedef typename internal::remove_all::type type;}; + +template +struct ternary_result_of_select +{typedef typename Func::result_type type;}; + +template +struct ternary_result_of_select +{typedef typename Func::template result::type type;}; + +template +struct result_of { + template + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + template + static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); + static has_none testFunctor(...); + + // note that the following indirection is needed for gcc-3.3 + enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; + typedef typename ternary_result_of_select::type type; +}; #endif +struct meta_yes { char a[1]; }; +struct meta_no { char a[2]; }; + +// Check whether T::ReturnType does exist +template +struct has_ReturnType +{ + template static meta_yes testFunctor(typename C::ReturnType const *); + template static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(0)) == sizeof(meta_yes) }; +}; + +template const T& return_ref(); + +template +struct has_nullary_operator +{ + template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref().operator()())>0)>::type * = 0); + static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; +}; + +template +struct has_unary_operator +{ + template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref().operator()(IndexType(0)))>0)>::type * = 0); + static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; +}; + +template +struct has_binary_operator +{ + template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref().operator()(IndexType(0),IndexType(0)))>0)>::type * = 0); + static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; +}; + /** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer. * Usage example: \code meta_sqrt<1023>::ret \endcode */ @@ -358,33 +455,6 @@ template struct scalar_product_traits enum { Defined = 0 }; }; -template struct scalar_product_traits -{ - enum { - // Cost = NumTraits::MulCost, - Defined = 1 - }; - typedef T ReturnType; -}; - -template struct scalar_product_traits > -{ - enum { - // Cost = 2*NumTraits::MulCost, - Defined = 1 - }; - typedef std::complex ReturnType; -}; - -template struct scalar_product_traits, T> -{ - enum { - // Cost = 2*NumTraits::MulCost, - Defined = 1 - }; - typedef std::complex ReturnType; -}; - // FIXME quick workaround around current limitation of result_of // template // struct result_of(ArgType0,ArgType1)> { diff --git a/Eigen/src/Core/util/ReenableStupidWarnings.h b/Eigen/src/Core/util/ReenableStupidWarnings.h index a23fab198..86b60f52f 100644 --- a/Eigen/src/Core/util/ReenableStupidWarnings.h +++ b/Eigen/src/Core/util/ReenableStupidWarnings.h @@ -8,17 +8,20 @@ #pragma warning pop #elif defined __clang__ #pragma clang diagnostic pop + #elif defined __GNUC__ && __GNUC__>=6 + #pragma GCC diagnostic pop #endif #if defined __NVCC__ // Don't reenable the diagnostic messages, as it turns out these messages need // to be disabled at the point of the template instantiation (i.e the user code) -// otherwise they'll be triggeredby nvcc. +// otherwise they'll be triggered by nvcc. // #pragma diag_default code_is_unreachable // #pragma diag_default initialization_not_reachable // #pragma diag_default 2651 // #pragma diag_default 2653 #endif + #endif #endif // EIGEN_WARNINGS_DISABLED diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index afae2e51e..4fd8891c6 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -26,7 +26,7 @@ #ifndef EIGEN_NO_STATIC_ASSERT - #if __has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600) + #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)) // if native static_assert is enabled, let's use it #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG); @@ -98,7 +98,9 @@ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, - THIS_TYPE_IS_NOT_SUPPORTED + THIS_TYPE_IS_NOT_SUPPORTED, + STORAGE_KIND_MUST_MATCH, + STORAGE_INDEX_MUST_MATCH }; }; @@ -165,7 +167,7 @@ #define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \ ( \ - (int(internal::size_of_xpr_at_compile_time::ret)==0 && int(internal::size_of_xpr_at_compile_time::ret)==0) \ + (int(Eigen::internal::size_of_xpr_at_compile_time::ret)==0 && int(Eigen::internal::size_of_xpr_at_compile_time::ret)==0) \ || (\ (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \ || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \ @@ -192,16 +194,16 @@ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS) #define EIGEN_STATIC_ASSERT_LVALUE(Derived) \ - EIGEN_STATIC_ASSERT(internal::is_lvalue::value, \ + EIGEN_STATIC_ASSERT(Eigen::internal::is_lvalue::value, \ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY) #define EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) \ - EIGEN_STATIC_ASSERT((internal::is_same::XprKind, ArrayXpr>::value), \ + EIGEN_STATIC_ASSERT((Eigen::internal::is_same::XprKind, ArrayXpr>::value), \ THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES) #define EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) \ - EIGEN_STATIC_ASSERT((internal::is_same::XprKind, \ - typename internal::traits::XprKind \ + EIGEN_STATIC_ASSERT((Eigen::internal::is_same::XprKind, \ + typename Eigen::internal::traits::XprKind \ >::value), \ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES) diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index a001c473a..088a65240 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -24,16 +24,6 @@ namespace Eigen { -typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex; - -/** - * \brief The Index type as used for the API. - * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. - * \sa \blank \ref TopicPreprocessorDirectives, StorageIndex. - */ - -typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index; - namespace internal { template @@ -45,6 +35,56 @@ inline IndexDest convert_index(const IndexSrc& idx) { } +// promote_scalar_arg is an helper used in operation between an expression and a scalar, like: +// expression * scalar +// Its role is to determine how the type T of the scalar operand should be promoted given the scalar type ExprScalar of the given expression. +// The IsSupported template parameter must be provided by the caller as: internal::has_ReturnType >::value using the proper order for ExprScalar and T. +// Then the logic is as follows: +// - if the operation is natively supported as defined by IsSupported, then the scalar type is not promoted, and T is returned. +// - otherwise, NumTraits::Literal is returned if T is implicitly convertible to NumTraits::Literal AND that this does not imply a float to integer conversion. +// - otherwise, ExprScalar is returned if T is implicitly convertible to ExprScalar AND that this does not imply a float to integer conversion. +// - In all other cases, the promoted type is not defined, and the respective operation is thus invalid and not available (SFINAE). +template +struct promote_scalar_arg; + +template +struct promote_scalar_arg +{ + typedef T type; +}; + +// Recursively check safe conversion to PromotedType, and then ExprScalar if they are different. +template::value, + bool IsSafe = NumTraits::IsInteger || !NumTraits::IsInteger> +struct promote_scalar_arg_unsupported; + +// Start recursion with NumTraits::Literal +template +struct promote_scalar_arg : promote_scalar_arg_unsupported::Literal> {}; + +// We found a match! +template +struct promote_scalar_arg_unsupported +{ + typedef PromotedType type; +}; + +// No match, but no real-to-integer issues, and ExprScalar and current PromotedType are different, +// so let's try to promote to ExprScalar +template +struct promote_scalar_arg_unsupported + : promote_scalar_arg_unsupported +{}; + +// Unsafe real-to-integer, let's stop. +template +struct promote_scalar_arg_unsupported {}; + +// T is not even convertible to ExprScalar, let's stop. +template +struct promote_scalar_arg_unsupported {}; + //classes inheriting no_assignment_operator don't generate a default operator=. class no_assignment_operator { @@ -67,9 +107,9 @@ template class variable_if_dynamic { public: EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic) - EIGEN_DEVICE_FUNC explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } - EIGEN_DEVICE_FUNC static T value() { return T(Value); } - EIGEN_DEVICE_FUNC void setValue(T) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {} }; template class variable_if_dynamic @@ -77,9 +117,9 @@ template class variable_if_dynamic T m_value; EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); } public: - EIGEN_DEVICE_FUNC explicit variable_if_dynamic(T value) : m_value(value) {} - EIGEN_DEVICE_FUNC T value() const { return m_value; } - EIGEN_DEVICE_FUNC void setValue(T value) { m_value = value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; } }; /** \internal like variable_if_dynamic but for DynamicIndex @@ -88,9 +128,9 @@ template class variable_if_dynamicindex { public: EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex) - EIGEN_DEVICE_FUNC explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } - EIGEN_DEVICE_FUNC static T value() { return T(Value); } - EIGEN_DEVICE_FUNC void setValue(T) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {} }; template class variable_if_dynamicindex @@ -98,9 +138,9 @@ template class variable_if_dynamicindex T m_value; EIGEN_DEVICE_FUNC variable_if_dynamicindex() { eigen_assert(false); } public: - EIGEN_DEVICE_FUNC explicit variable_if_dynamicindex(T value) : m_value(value) {} - EIGEN_DEVICE_FUNC T value() const { return m_value; } - EIGEN_DEVICE_FUNC void setValue(T value) { m_value = value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T value) : m_value(value) {} + EIGEN_DEVICE_FUNC T EIGEN_STRONG_INLINE value() const { return m_value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; } }; template struct functor_traits @@ -450,52 +490,6 @@ struct generic_xpr_base typedef typename dense_xpr_base::type type; }; -/** \internal Helper base class to add a scalar multiple operator - * overloads for complex types */ -template::value > -struct special_scalar_op_base : public BaseType -{ - // dummy operator* so that the - // "using special_scalar_op_base::operator*" compiles - struct dummy {}; - void operator*(dummy) const; - void operator/(dummy) const; -}; - -template -struct special_scalar_op_base : public BaseType -{ - const CwiseUnaryOp, const Derived> - operator*(const OtherScalar& scalar) const - { -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - return CwiseUnaryOp, const Derived> - (*static_cast(this), scalar_multiple2_op(scalar)); - } - - inline friend const CwiseUnaryOp, const Derived> - operator*(const OtherScalar& scalar, const Derived& matrix) - { -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - return static_cast(matrix).operator*(scalar); - } - - const CwiseUnaryOp, const Derived> - operator/(const OtherScalar& scalar) const - { -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - return CwiseUnaryOp, const Derived> - (*static_cast(this), scalar_quotient2_op(scalar)); - } -}; - template struct cast_return_type { typedef typename XprType::Scalar CurrentScalarType; @@ -622,6 +616,20 @@ struct plain_diag_type >::type type; }; +template +struct plain_constant_type +{ + enum { Options = (traits::Flags&RowMajorBit)?RowMajor:0 }; + + typedef Array::RowsAtCompileTime, traits::ColsAtCompileTime, + Options, traits::MaxRowsAtCompileTime,traits::MaxColsAtCompileTime> array_type; + + typedef Matrix::RowsAtCompileTime, traits::ColsAtCompileTime, + Options, traits::MaxRowsAtCompileTime,traits::MaxColsAtCompileTime> matrix_type; + + typedef CwiseNullaryOp, const typename conditional::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type; +}; + template struct is_lvalue { @@ -656,10 +664,27 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if struct is_same_or_void { enum { value = is_same::value }; }; -template struct is_same_or_void { enum { value = 1 }; }; -template struct is_same_or_void { enum { value = 1 }; }; -template<> struct is_same_or_void { enum { value = 1 }; }; +// Internal helper defining the cost of a scalar division for the type T. +// The default heuristic can be specialized for each scalar type and architecture. +template +struct scalar_div_cost { + enum { value = 8*NumTraits::MulCost }; +}; + +template +struct scalar_div_cost, Vectorized> { + enum { value = 2*scalar_div_cost::value + + 6*NumTraits::MulCost + + 3*NumTraits::AddCost + }; +}; + + +template +struct scalar_div_cost::type> { enum { value = 24 }; }; +template +struct scalar_div_cost::type> { enum { value = 21 }; }; + #ifdef EIGEN_DEBUG_ASSIGN std::string demangle_traversal(int t) @@ -695,17 +720,95 @@ std::string demangle_flags(int f) } // end namespace internal -// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor -// that would take two operands of different types. If there were such an example, then this check should be -// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as -// currently they take only one typename Scalar template parameter. + +/** \class ScalarBinaryOpTraits + * \ingroup Core_Module + * + * \brief Determines whether the given binary operation of two numeric types is allowed and what the scalar return type is. + * + * This class permits to control the scalar return type of any binary operation performed on two different scalar types through (partial) template specializations. + * + * For instance, let \c U1, \c U2 and \c U3 be three user defined scalar types for which most operations between instances of \c U1 and \c U2 returns an \c U3. + * You can let %Eigen knows that by defining: + \code + template + struct ScalarBinaryOpTraits { typedef U3 ReturnType; }; + template + struct ScalarBinaryOpTraits { typedef U3 ReturnType; }; + \endcode + * You can then explicitly disable some particular operations to get more explicit error messages: + \code + template<> + struct ScalarBinaryOpTraits > {}; + \endcode + * Or customize the return type for individual operation: + \code + template<> + struct ScalarBinaryOpTraits > { typedef U1 ReturnType; }; + \endcode + * + * By default, the following generic combinations are supported: + + + + + +
ScalarAScalarBBinaryOpReturnTypeNote
\c T \c T \c * \c T
\c NumTraits::Real \c T \c * \c T Only if \c NumTraits::IsComplex
\c T \c NumTraits::Real \c * \c T Only if \c NumTraits::IsComplex
+ * + * \sa CwiseBinaryOp + */ +template > +struct ScalarBinaryOpTraits +#ifndef EIGEN_PARSED_BY_DOXYGEN + // for backward compatibility, use the hints given by the (deprecated) internal::scalar_product_traits class. + : internal::scalar_product_traits +#endif // EIGEN_PARSED_BY_DOXYGEN +{}; + +template +struct ScalarBinaryOpTraits +{ + typedef T ReturnType; +}; + +template +struct ScalarBinaryOpTraits::IsComplex,T>::type>::Real, BinaryOp> +{ + typedef T ReturnType; +}; +template +struct ScalarBinaryOpTraits::IsComplex,T>::type>::Real, T, BinaryOp> +{ + typedef T ReturnType; +}; + +// For Matrix * Permutation +template +struct ScalarBinaryOpTraits +{ + typedef T ReturnType; +}; + +// For Permutation * Matrix +template +struct ScalarBinaryOpTraits +{ + typedef T ReturnType; +}; + +// for Permutation*Permutation +template +struct ScalarBinaryOpTraits +{ + typedef void ReturnType; +}; + +// We require Lhs and Rhs to have "compatible" scalar types. // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to // add together a float matrix and a double matrix. #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT((internal::functor_is_product_like::ret \ - ? int(internal::scalar_product_traits::Defined) \ - : int(internal::is_same_or_void::value)), \ + EIGEN_STATIC_ASSERT((Eigen::internal::has_ReturnType >::value), \ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) } // end namespace Eigen diff --git a/Eigen/src/Eigenvalues/CMakeLists.txt b/Eigen/src/Eigenvalues/CMakeLists.txt deleted file mode 100644 index 193e02685..000000000 --- a/Eigen/src/Eigenvalues/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_EIGENVALUES_SRCS "*.h") - -INSTALL(FILES - ${Eigen_EIGENVALUES_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Eigenvalues COMPONENT Devel - ) diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h similarity index 69% rename from Eigen/src/Eigenvalues/ComplexSchur_MKL.h rename to Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h index e20c3725b..4980a3ede 100644 --- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +++ b/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h @@ -25,21 +25,19 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to LAPACKe * Complex Schur needed to complex unsymmetrical eigenvalues/eigenvectors. ******************************************************************************** */ -#ifndef EIGEN_COMPLEX_SCHUR_MKL_H -#define EIGEN_COMPLEX_SCHUR_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" +#ifndef EIGEN_COMPLEX_SCHUR_LAPACKE_H +#define EIGEN_COMPLEX_SCHUR_LAPACKE_H namespace Eigen { -/** \internal Specialization for the data types supported by MKL */ +/** \internal Specialization for the data types supported by LAPACKe */ -#define EIGEN_MKL_SCHUR_COMPLEX(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ +#define EIGEN_LAPACKE_SCHUR_COMPLEX(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX, LAPACKE_PREFIX_U, EIGCOLROW, LAPACKE_COLROW) \ template<> template inline \ ComplexSchur >& \ ComplexSchur >::compute(const EigenBase& matrix, bool computeU) \ @@ -60,18 +58,18 @@ ComplexSchur >::compute(const Eigen m_matUisUptodate = computeU; \ return *this; \ } \ - lapack_int n = matrix.cols(), sdim, info; \ - lapack_int matrix_order = MKLCOLROW; \ + lapack_int n = internal::convert_index(matrix.cols()), sdim, info; \ + lapack_int matrix_order = LAPACKE_COLROW; \ char jobvs, sort='N'; \ - LAPACK_##MKLPREFIX_U##_SELECT1 select = 0; \ + LAPACK_##LAPACKE_PREFIX_U##_SELECT1 select = 0; \ jobvs = (computeU) ? 'V' : 'N'; \ m_matU.resize(n, n); \ - lapack_int ldvs = m_matU.outerStride(); \ + lapack_int ldvs = internal::convert_index(m_matU.outerStride()); \ m_matT = matrix; \ - lapack_int lda = m_matT.outerStride(); \ + lapack_int lda = internal::convert_index(m_matT.outerStride()); \ Matrix w; \ w.resize(n, 1);\ - info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)w.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ + info = LAPACKE_##LAPACKE_PREFIX##gees( matrix_order, jobvs, sort, select, n, (LAPACKE_TYPE*)m_matT.data(), lda, &sdim, (LAPACKE_TYPE*)w.data(), (LAPACKE_TYPE*)m_matU.data(), ldvs ); \ if(info == 0) \ m_info = Success; \ else \ @@ -83,11 +81,11 @@ ComplexSchur >::compute(const Eigen \ } -EIGEN_MKL_SCHUR_COMPLEX(dcomplex, MKL_Complex16, z, Z, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_COMPLEX(scomplex, MKL_Complex8, c, C, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_COMPLEX(dcomplex, MKL_Complex16, z, Z, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_SCHUR_COMPLEX(scomplex, MKL_Complex8, c, C, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_LAPACKE_SCHUR_COMPLEX(dcomplex, lapack_complex_double, z, Z, ColMajor, LAPACK_COL_MAJOR) +EIGEN_LAPACKE_SCHUR_COMPLEX(scomplex, lapack_complex_float, c, C, ColMajor, LAPACK_COL_MAJOR) +EIGEN_LAPACKE_SCHUR_COMPLEX(dcomplex, lapack_complex_double, z, Z, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_LAPACKE_SCHUR_COMPLEX(scomplex, lapack_complex_float, c, C, RowMajor, LAPACK_ROW_MAJOR) } // end namespace Eigen -#endif // EIGEN_COMPLEX_SCHUR_MKL_H +#endif // EIGEN_COMPLEX_SCHUR_LAPACKE_H diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h index 532ca7d63..f205b185d 100644 --- a/Eigen/src/Eigenvalues/EigenSolver.h +++ b/Eigen/src/Eigenvalues/EigenSolver.h @@ -324,11 +324,12 @@ template MatrixType EigenSolver::pseudoEigenvalueMatrix() const { eigen_assert(m_isInitialized && "EigenSolver is not initialized."); + const RealScalar precision = RealScalar(2)*NumTraits::epsilon(); Index n = m_eivalues.rows(); MatrixType matD = MatrixType::Zero(n,n); for (Index i=0; i::EigenvectorsType EigenSolver::eige { eigen_assert(m_isInitialized && "EigenSolver is not initialized."); eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues."); + const RealScalar precision = RealScalar(2)*NumTraits::epsilon(); Index n = m_eivec.cols(); EigenvectorsType matV(n,n); for (Index j=0; j(); @@ -451,26 +453,6 @@ EigenSolver::compute(const EigenBase& matrix, bool comput return *this; } -// Complex scalar division. -template -std::complex cdiv(const Scalar& xr, const Scalar& xi, const Scalar& yr, const Scalar& yi) -{ - using std::abs; - Scalar r,d; - if (abs(yr) > abs(yi)) - { - r = yi/yr; - d = yr + r*yi; - return std::complex((xr + r*xi)/d, (xi - r*xr)/d); - } - else - { - r = yr/yi; - d = yi + r*yr; - return std::complex((r*xr + xi)/d, (r*xi - xr)/d); - } -} - template void EigenSolver::doComputeEigenvectors() @@ -503,7 +485,7 @@ void EigenSolver::doComputeEigenvectors() Scalar lastr(0), lastw(0); Index l = n; - m_matT.coeffRef(n,n) = 1.0; + m_matT.coeffRef(n,n) = Scalar(1); for (Index i = n-1; i >= 0; i--) { Scalar w = m_matT.coeff(i,i) - p; @@ -557,7 +539,7 @@ void EigenSolver::doComputeEigenvectors() } else { - std::complex cc = cdiv(Scalar(0),-m_matT.coeff(n-1,n),m_matT.coeff(n-1,n-1)-p,q); + ComplexScalar cc = ComplexScalar(Scalar(0),-m_matT.coeff(n-1,n)) / ComplexScalar(m_matT.coeff(n-1,n-1)-p,q); m_matT.coeffRef(n-1,n-1) = numext::real(cc); m_matT.coeffRef(n-1,n) = numext::imag(cc); } @@ -580,7 +562,7 @@ void EigenSolver::doComputeEigenvectors() l = i; if (m_eivalues.coeff(i).imag() == RealScalar(0)) { - std::complex cc = cdiv(-ra,-sa,w,q); + ComplexScalar cc = ComplexScalar(-ra,-sa) / ComplexScalar(w,q); m_matT.coeffRef(i,n-1) = numext::real(cc); m_matT.coeffRef(i,n) = numext::imag(cc); } @@ -594,7 +576,7 @@ void EigenSolver::doComputeEigenvectors() if ((vr == Scalar(0)) && (vi == Scalar(0))) vr = eps * norm * (abs(w) + abs(q) + abs(x) + abs(y) + abs(lastw)); - std::complex cc = cdiv(x*lastra-lastw*ra+q*sa,x*lastsa-lastw*sa-q*ra,vr,vi); + ComplexScalar cc = ComplexScalar(x*lastra-lastw*ra+q*sa,x*lastsa-lastw*sa-q*ra) / ComplexScalar(vr,vi); m_matT.coeffRef(i,n-1) = numext::real(cc); m_matT.coeffRef(i,n) = numext::imag(cc); if (abs(x) > (abs(lastw) + abs(q))) @@ -604,7 +586,7 @@ void EigenSolver::doComputeEigenvectors() } else { - cc = cdiv(-lastra-y*m_matT.coeff(i,n-1),-lastsa-y*m_matT.coeff(i,n),lastw,q); + cc = ComplexScalar(-lastra-y*m_matT.coeff(i,n-1),-lastsa-y*m_matT.coeff(i,n)) / ComplexScalar(lastw,q); m_matT.coeffRef(i+1,n-1) = numext::real(cc); m_matT.coeffRef(i+1,n) = numext::imag(cc); } diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index a9d6790d5..36a91dffc 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -1,8 +1,9 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2012 Gael Guennebaud +// Copyright (C) 2012-2016 Gael Guennebaud // Copyright (C) 2010,2012 Jitse Niesen +// Copyright (C) 2016 Tobias Wood // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -89,7 +90,7 @@ template class GeneralizedEigenSolver */ typedef Matrix VectorType; - /** \brief Type for vector of complex scalar values eigenvalues as returned by betas(). + /** \brief Type for vector of complex scalar values eigenvalues as returned by alphas(). * * This is a column vector with entries of type #ComplexScalar. * The length of the vector is the size of #MatrixType. @@ -114,7 +115,14 @@ template class GeneralizedEigenSolver * * \sa compute() for an example. */ - GeneralizedEigenSolver() : m_eivec(), m_alphas(), m_betas(), m_isInitialized(false), m_realQZ(), m_matS(), m_tmp() {} + GeneralizedEigenSolver() + : m_eivec(), + m_alphas(), + m_betas(), + m_valuesOkay(false), + m_vectorsOkay(false), + m_realQZ() + {} /** \brief Default constructor with memory preallocation * @@ -126,10 +134,9 @@ template class GeneralizedEigenSolver : m_eivec(size, size), m_alphas(size), m_betas(size), - m_isInitialized(false), - m_eigenvectorsOk(false), + m_valuesOkay(false), + m_vectorsOkay(false), m_realQZ(size), - m_matS(size, size), m_tmp(size) {} @@ -149,10 +156,9 @@ template class GeneralizedEigenSolver : m_eivec(A.rows(), A.cols()), m_alphas(A.cols()), m_betas(A.cols()), - m_isInitialized(false), - m_eigenvectorsOk(false), + m_valuesOkay(false), + m_vectorsOkay(false), m_realQZ(A.cols()), - m_matS(A.rows(), A.cols()), m_tmp(A.cols()) { compute(A, B, computeEigenvectors); @@ -160,22 +166,20 @@ template class GeneralizedEigenSolver /* \brief Returns the computed generalized eigenvectors. * - * \returns %Matrix whose columns are the (possibly complex) eigenvectors. + * \returns %Matrix whose columns are the (possibly complex) right eigenvectors. + * i.e. the eigenvectors that solve (A - l*B)x = 0. The ordering matches the eigenvalues. * * \pre Either the constructor * GeneralizedEigenSolver(const MatrixType&,const MatrixType&, bool) or the member function * compute(const MatrixType&, const MatrixType& bool) has been called before, and * \p computeEigenvectors was set to true (the default). * - * Column \f$ k \f$ of the returned matrix is an eigenvector corresponding - * to eigenvalue number \f$ k \f$ as returned by eigenvalues(). The - * eigenvectors are normalized to have (Euclidean) norm equal to one. The - * matrix returned by this function is the matrix \f$ V \f$ in the - * generalized eigendecomposition \f$ A = B V D V^{-1} \f$, if it exists. - * * \sa eigenvalues() */ -// EigenvectorsType eigenvectors() const; + EigenvectorsType eigenvectors() const { + eigen_assert(m_vectorsOkay && "Eigenvectors for GeneralizedEigenSolver were not calculated."); + return m_eivec; + } /** \brief Returns an expression of the computed generalized eigenvalues. * @@ -197,7 +201,7 @@ template class GeneralizedEigenSolver */ EigenvalueType eigenvalues() const { - eigen_assert(m_isInitialized && "GeneralizedEigenSolver is not initialized."); + eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized."); return EigenvalueType(m_alphas,m_betas); } @@ -208,7 +212,7 @@ template class GeneralizedEigenSolver * \sa betas(), eigenvalues() */ ComplexVectorType alphas() const { - eigen_assert(m_isInitialized && "GeneralizedEigenSolver is not initialized."); + eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized."); return m_alphas; } @@ -219,7 +223,7 @@ template class GeneralizedEigenSolver * \sa alphas(), eigenvalues() */ VectorType betas() const { - eigen_assert(m_isInitialized && "GeneralizedEigenSolver is not initialized."); + eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized."); return m_betas; } @@ -250,7 +254,7 @@ template class GeneralizedEigenSolver ComputationInfo info() const { - eigen_assert(m_isInitialized && "EigenSolver is not initialized."); + eigen_assert(m_valuesOkay && "EigenSolver is not initialized."); return m_realQZ.info(); } @@ -270,29 +274,14 @@ template class GeneralizedEigenSolver EIGEN_STATIC_ASSERT(!NumTraits::IsComplex, NUMERIC_TYPE_MUST_BE_REAL); } - MatrixType m_eivec; + EigenvectorsType m_eivec; ComplexVectorType m_alphas; VectorType m_betas; - bool m_isInitialized; - bool m_eigenvectorsOk; + bool m_valuesOkay, m_vectorsOkay; RealQZ m_realQZ; - MatrixType m_matS; - - typedef Matrix ColumnVectorType; - ColumnVectorType m_tmp; + ComplexVectorType m_tmp; }; -//template -//typename GeneralizedEigenSolver::EigenvectorsType GeneralizedEigenSolver::eigenvectors() const -//{ -// eigen_assert(m_isInitialized && "EigenSolver is not initialized."); -// eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues."); -// Index n = m_eivec.cols(); -// EigenvectorsType matV(n,n); -// // TODO -// return matV; -//} - template GeneralizedEigenSolver& GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors) @@ -302,46 +291,126 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp using std::sqrt; using std::abs; eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows()); - + Index size = A.cols(); + m_valuesOkay = false; + m_vectorsOkay = false; // Reduce to generalized real Schur form: // A = Q S Z and B = Q T Z m_realQZ.compute(A, B, computeEigenvectors); - if (m_realQZ.info() == Success) { - m_matS = m_realQZ.matrixS(); + // Resize storage + m_alphas.resize(size); + m_betas.resize(size); if (computeEigenvectors) - m_eivec = m_realQZ.matrixZ().transpose(); - - // Compute eigenvalues from matS - m_alphas.resize(A.cols()); - m_betas.resize(A.cols()); - Index i = 0; - while (i < A.cols()) { - if (i == A.cols() - 1 || m_matS.coeff(i+1, i) == Scalar(0)) + m_eivec.resize(size,size); + m_tmp.resize(size); + } + + // Aliases: + Map v(reinterpret_cast(m_tmp.data()), size); + ComplexVectorType &cv = m_tmp; + const MatrixType &mZ = m_realQZ.matrixZ(); + const MatrixType &mS = m_realQZ.matrixS(); + const MatrixType &mT = m_realQZ.matrixT(); + + Index i = 0; + while (i < size) + { + if (i == size - 1 || mS.coeff(i+1, i) == Scalar(0)) { - m_alphas.coeffRef(i) = m_matS.coeff(i, i); - m_betas.coeffRef(i) = m_realQZ.matrixT().coeff(i,i); + // Real eigenvalue + m_alphas.coeffRef(i) = mS.diagonal().coeff(i); + m_betas.coeffRef(i) = mT.diagonal().coeff(i); + if (computeEigenvectors) + { + v.setConstant(Scalar(0.0)); + v.coeffRef(i) = Scalar(1.0); + // For singular eigenvalues do nothing more + if(abs(m_betas.coeffRef(i)) >= (std::numeric_limits::min)()) + { + // Non-singular eigenvalue + const Scalar alpha = real(m_alphas.coeffRef(i)); + const Scalar beta = m_betas.coeffRef(i); + for (Index j = i-1; j >= 0; j--) + { + const Index st = j+1; + const Index sz = i-j; + if (j > 0 && mS.coeff(j, j-1) != Scalar(0)) + { + // 2x2 block + Matrix rhs = (alpha*mT.template block<2,Dynamic>(j-1,st,2,sz) - beta*mS.template block<2,Dynamic>(j-1,st,2,sz)) .lazyProduct( v.segment(st,sz) ); + Matrix lhs = beta * mS.template block<2,2>(j-1,j-1) - alpha * mT.template block<2,2>(j-1,j-1); + v.template segment<2>(j-1) = lhs.partialPivLu().solve(rhs); + j--; + } + else + { + v.coeffRef(j) = -v.segment(st,sz).transpose().cwiseProduct(beta*mS.block(j,st,1,sz) - alpha*mT.block(j,st,1,sz)).sum() / (beta*mS.coeffRef(j,j) - alpha*mT.coeffRef(j,j)); + } + } + } + m_eivec.col(i).real().noalias() = mZ.transpose() * v; + m_eivec.col(i).real().normalize(); + m_eivec.col(i).imag().setConstant(0); + } ++i; } else { - Scalar p = Scalar(0.5) * (m_matS.coeff(i, i) - m_matS.coeff(i+1, i+1)); - Scalar z = sqrt(abs(p * p + m_matS.coeff(i+1, i) * m_matS.coeff(i, i+1))); - m_alphas.coeffRef(i) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, z); - m_alphas.coeffRef(i+1) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, -z); + // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a positive diagonal 2x2 block T + // Then taking beta=T_00*T_11, we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00): - m_betas.coeffRef(i) = m_realQZ.matrixT().coeff(i,i); - m_betas.coeffRef(i+1) = m_realQZ.matrixT().coeff(i,i); + // T = [a 0] + // [0 b] + RealScalar a = mT.diagonal().coeff(i), + b = mT.diagonal().coeff(i+1); + const RealScalar beta = m_betas.coeffRef(i) = m_betas.coeffRef(i+1) = a*b; + + // ^^ NOTE: using diagonal()(i) instead of coeff(i,i) workarounds a MSVC bug. + Matrix S2 = mS.template block<2,2>(i,i) * Matrix(b,a).asDiagonal(); + + Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1)); + Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1))); + const ComplexScalar alpha = ComplexScalar(S2.coeff(1,1) + p, (beta > 0) ? z : -z); + m_alphas.coeffRef(i) = conj(alpha); + m_alphas.coeffRef(i+1) = alpha; + + if (computeEigenvectors) { + // Compute eigenvector in position (i+1) and then position (i) is just the conjugate + cv.setZero(); + cv.coeffRef(i+1) = Scalar(1.0); + // here, the "static_cast" workaound expression template issues. + cv.coeffRef(i) = -(static_cast(beta*mS.coeffRef(i,i+1)) - alpha*mT.coeffRef(i,i+1)) + / (static_cast(beta*mS.coeffRef(i,i)) - alpha*mT.coeffRef(i,i)); + for (Index j = i-1; j >= 0; j--) + { + const Index st = j+1; + const Index sz = i+1-j; + if (j > 0 && mS.coeff(j, j-1) != Scalar(0)) + { + // 2x2 block + Matrix rhs = (alpha*mT.template block<2,Dynamic>(j-1,st,2,sz) - beta*mS.template block<2,Dynamic>(j-1,st,2,sz)) .lazyProduct( cv.segment(st,sz) ); + Matrix lhs = beta * mS.template block<2,2>(j-1,j-1) - alpha * mT.template block<2,2>(j-1,j-1); + cv.template segment<2>(j-1) = lhs.partialPivLu().solve(rhs); + j--; + } else { + cv.coeffRef(j) = cv.segment(st,sz).transpose().cwiseProduct(beta*mS.block(j,st,1,sz) - alpha*mT.block(j,st,1,sz)).sum() + / (alpha*mT.coeffRef(j,j) - static_cast(beta*mS.coeffRef(j,j))); + } + } + m_eivec.col(i+1).noalias() = (mZ.transpose() * cv); + m_eivec.col(i+1).normalize(); + m_eivec.col(i) = m_eivec.col(i+1).conjugate(); + } i += 2; } } + + m_valuesOkay = true; + m_vectorsOkay = computeEigenvectors; } - - m_isInitialized = true; - m_eigenvectorsOk = false;//computeEigenvectors; - return *this; } diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index a62071d42..b3a910dd9 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -552,7 +552,6 @@ namespace Eigen { m_T.coeffRef(l,l-1) = Scalar(0.0); } - template RealQZ& RealQZ::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ) { @@ -616,6 +615,37 @@ namespace Eigen { } // check if we converged before reaching iterations limit m_info = (local_iter j_left, j_right; + internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right); + + // Apply resulting Jacobi rotations + m_S.applyOnTheLeft(i,i+1,j_left); + m_S.applyOnTheRight(i,i+1,j_right); + m_T.applyOnTheLeft(i,i+1,j_left); + m_T.applyOnTheRight(i,i+1,j_right); + m_T(i+1,i) = m_T(i,i+1) = Scalar(0); + + if(m_computeQZ) { + m_Q.applyOnTheRight(i,i+1,j_left.transpose()); + m_Z.applyOnTheLeft(i,i+1,j_right.transpose()); + } + + i++; + } + } + } + return *this; } // end compute diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h index f4ded69b6..d6a339f07 100644 --- a/Eigen/src/Eigenvalues/RealSchur.h +++ b/Eigen/src/Eigenvalues/RealSchur.h @@ -253,19 +253,25 @@ RealSchur& RealSchur::compute(const EigenBase if (maxIters == -1) maxIters = m_maxIterationsPerRow * matrix.rows(); + Scalar scale = matrix.derived().cwiseAbs().maxCoeff(); + // Step 1. Reduce to Hessenberg form - m_hess.compute(matrix.derived()); + m_hess.compute(matrix.derived()/scale); // Step 2. Reduce to real Schur form computeFromHessenberg(m_hess.matrixH(), m_hess.matrixQ(), computeU); + + m_matT *= scale; return *this; } template template RealSchur& RealSchur::computeFromHessenberg(const HessMatrixType& matrixH, const OrthMatrixType& matrixQ, bool computeU) -{ - m_matT = matrixH; +{ + using std::abs; + + m_matT = matrixH; if(computeU) m_matU = matrixQ; diff --git a/Eigen/src/Eigenvalues/RealSchur_MKL.h b/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h similarity index 67% rename from Eigen/src/Eigenvalues/RealSchur_MKL.h rename to Eigen/src/Eigenvalues/RealSchur_LAPACKE.h index e80926400..2c2251715 100644 --- a/Eigen/src/Eigenvalues/RealSchur_MKL.h +++ b/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h @@ -25,39 +25,37 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to LAPACKe * Real Schur needed to real unsymmetrical eigenvalues/eigenvectors. ******************************************************************************** */ -#ifndef EIGEN_REAL_SCHUR_MKL_H -#define EIGEN_REAL_SCHUR_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" +#ifndef EIGEN_REAL_SCHUR_LAPACKE_H +#define EIGEN_REAL_SCHUR_LAPACKE_H namespace Eigen { -/** \internal Specialization for the data types supported by MKL */ +/** \internal Specialization for the data types supported by LAPACKe */ -#define EIGEN_MKL_SCHUR_REAL(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ +#define EIGEN_LAPACKE_SCHUR_REAL(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX, LAPACKE_PREFIX_U, EIGCOLROW, LAPACKE_COLROW) \ template<> template inline \ RealSchur >& \ RealSchur >::compute(const EigenBase& matrix, bool computeU) \ { \ eigen_assert(matrix.cols() == matrix.rows()); \ \ - lapack_int n = matrix.cols(), sdim, info; \ - lapack_int matrix_order = MKLCOLROW; \ + lapack_int n = internal::convert_index(matrix.cols()), sdim, info; \ + lapack_int matrix_order = LAPACKE_COLROW; \ char jobvs, sort='N'; \ - LAPACK_##MKLPREFIX_U##_SELECT2 select = 0; \ + LAPACK_##LAPACKE_PREFIX_U##_SELECT2 select = 0; \ jobvs = (computeU) ? 'V' : 'N'; \ m_matU.resize(n, n); \ - lapack_int ldvs = m_matU.outerStride(); \ + lapack_int ldvs = internal::convert_index(m_matU.outerStride()); \ m_matT = matrix; \ - lapack_int lda = m_matT.outerStride(); \ + lapack_int lda = internal::convert_index(m_matT.outerStride()); \ Matrix wr, wi; \ wr.resize(n, 1); wi.resize(n, 1); \ - info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)wr.data(), (MKLTYPE*)wi.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ + info = LAPACKE_##LAPACKE_PREFIX##gees( matrix_order, jobvs, sort, select, n, (LAPACKE_TYPE*)m_matT.data(), lda, &sdim, (LAPACKE_TYPE*)wr.data(), (LAPACKE_TYPE*)wi.data(), (LAPACKE_TYPE*)m_matU.data(), ldvs ); \ if(info == 0) \ m_info = Success; \ else \ @@ -69,11 +67,11 @@ RealSchur >::compute(const EigenBas \ } -EIGEN_MKL_SCHUR_REAL(double, double, d, D, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_REAL(float, float, s, S, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_REAL(double, double, d, D, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_SCHUR_REAL(float, float, s, S, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_LAPACKE_SCHUR_REAL(double, double, d, D, ColMajor, LAPACK_COL_MAJOR) +EIGEN_LAPACKE_SCHUR_REAL(float, float, s, S, ColMajor, LAPACK_COL_MAJOR) +EIGEN_LAPACKE_SCHUR_REAL(double, double, d, D, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_LAPACKE_SCHUR_REAL(float, float, s, S, RowMajor, LAPACK_ROW_MAJOR) } // end namespace Eigen -#endif // EIGEN_REAL_SCHUR_MKL_H +#endif // EIGEN_REAL_SCHUR_LAPACKE_H diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index 469ea5e4e..a9f56c4f5 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -414,7 +414,7 @@ SelfAdjointEigenSolver& SelfAdjointEigenSolver if(n==1) { - m_eivalues.coeffRef(0,0) = numext::real(matrix(0,0)); + m_eivalues.coeffRef(0,0) = numext::real(matrix.diagonal()[0]); if(computeEigenvectors) m_eivec.setOnes(n,n); m_info = Success; @@ -458,7 +458,7 @@ SelfAdjointEigenSolver& SelfAdjointEigenSolver { m_eivec.setIdentity(diag.size(), diag.size()); } - m_info = computeFromTridiagonal_impl(m_eivalues, m_subdiag, m_maxIterations, computeEigenvectors, m_eivec); + m_info = internal::computeFromTridiagonal_impl(m_eivalues, m_subdiag, m_maxIterations, computeEigenvectors, m_eivec); m_isInitialized = true; m_eigenvectorsOk = computeEigenvectors; @@ -492,15 +492,16 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag typedef typename DiagType::RealScalar RealScalar; const RealScalar considerAsZero = (std::numeric_limits::min)(); + const RealScalar precision = RealScalar(2)*NumTraits::epsilon(); while (end>0) { for (Index i = start; i0 && subdiag[end-1]==0) + while (end>0 && subdiag[end-1]==RealScalar(0)) { end--; } @@ -568,8 +569,8 @@ template struct direct_selfadjoint_eigenvalues EigenvectorsType& eivecs = solver.m_eivec; VectorType& eivals = solver.m_eivalues; - // map the matrix coefficients to [-1:1] to avoid over- and underflow. - Scalar scale = mat.cwiseAbs().maxCoeff(); - scale = numext::maxi(scale,Scalar(1)); - MatrixType scaledMat = mat / scale; - + // Shift the matrix to the mean eigenvalue and map the matrix coefficients to [-1:1] to avoid over- and underflow. + Scalar shift = mat.trace() / Scalar(2); + MatrixType scaledMat = mat; + scaledMat.coeffRef(0,1) = mat.coeff(1,0); + scaledMat.diagonal().array() -= shift; + Scalar scale = scaledMat.cwiseAbs().maxCoeff(); + if(scale > Scalar(0)) + scaledMat /= scale; + // Compute the eigenvalues computeRoots(scaledMat,eivals); - + // compute the eigen vectors if(computeEigenvectors) { @@ -774,10 +779,11 @@ struct direct_selfadjoint_eigenvalues eivecs.col(0) << eivecs.col(1).unitOrthogonal(); } } - + // Rescale back to the original size. eivals *= scale; - + eivals.array() += shift; + solver.m_info = Success; solver.m_isInitialized = true; solver.m_eigenvectorsOk = computeEigenvectors; @@ -809,14 +815,14 @@ static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index sta // RealScalar mu = diag[end] - e2 / (td + (td>0 ? 1 : -1) * sqrt(td*td + e2)); // This explain the following, somewhat more complicated, version: RealScalar mu = diag[end]; - if(td==0) + if(td==RealScalar(0)) mu -= abs(e); else { RealScalar e2 = numext::abs2(subdiag[end-1]); RealScalar h = numext::hypot(td,e); - if(e2==0) mu -= (e / (td + (td>0 ? 1 : -1))) * (e / h); - else mu -= e2 / (td + (td>0 ? h : -h)); + if(e2==RealScalar(0)) mu -= (e / (td + (td>RealScalar(0) ? RealScalar(1) : RealScalar(-1)))) * (e / h); + else mu -= e2 / (td + (td>RealScalar(0) ? h : -h)); } RealScalar x = diag[start] - mu; diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h similarity index 66% rename from Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h rename to Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h index 3499dc78a..3891cf883 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h @@ -25,21 +25,19 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to LAPACKe * Self-adjoint eigenvalues/eigenvectors. ******************************************************************************** */ -#ifndef EIGEN_SAEIGENSOLVER_MKL_H -#define EIGEN_SAEIGENSOLVER_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" +#ifndef EIGEN_SAEIGENSOLVER_LAPACKE_H +#define EIGEN_SAEIGENSOLVER_LAPACKE_H namespace Eigen { -/** \internal Specialization for the data types supported by MKL */ +/** \internal Specialization for the data types supported by LAPACKe */ -#define EIGEN_MKL_EIG_SELFADJ(EIGTYPE, MKLTYPE, MKLRTYPE, MKLNAME, EIGCOLROW, MKLCOLROW ) \ +#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW, LAPACKE_COLROW ) \ template<> template inline \ SelfAdjointEigenSolver >& \ SelfAdjointEigenSolver >::compute(const EigenBase& matrix, int options) \ @@ -49,7 +47,7 @@ SelfAdjointEigenSolver >::compute(c && (options&EigVecMask)!=EigVecMask \ && "invalid option parameter"); \ bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \ - lapack_int n = matrix.cols(), lda, matrix_order, info; \ + lapack_int n = internal::convert_index(matrix.cols()), lda, matrix_order, info; \ m_eivalues.resize(n,1); \ m_subdiag.resize(n-1); \ m_eivec = matrix; \ @@ -64,12 +62,12 @@ SelfAdjointEigenSolver >::compute(c return *this; \ } \ \ - lda = m_eivec.outerStride(); \ - matrix_order=MKLCOLROW; \ + lda = internal::convert_index(m_eivec.outerStride()); \ + matrix_order=LAPACKE_COLROW; \ char jobz, uplo='L'/*, range='A'*/; \ jobz = computeEigenvectors ? 'V' : 'N'; \ \ - info = LAPACKE_##MKLNAME( matrix_order, jobz, uplo, n, (MKLTYPE*)m_eivec.data(), lda, (MKLRTYPE*)m_eivalues.data() ); \ + info = LAPACKE_##LAPACKE_NAME( matrix_order, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \ m_info = (info==0) ? Success : NoConvergence; \ m_isInitialized = true; \ m_eigenvectorsOk = computeEigenvectors; \ @@ -77,15 +75,15 @@ SelfAdjointEigenSolver >::compute(c } -EIGEN_MKL_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_EIG_SELFADJ(dcomplex, MKL_Complex16, double, zheev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_EIG_SELFADJ(scomplex, MKL_Complex8, float, cheev, ColMajor, LAPACK_COL_MAJOR) +EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR) +EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR) +EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, ColMajor, LAPACK_COL_MAJOR) +EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_EIG_SELFADJ(dcomplex, MKL_Complex16, double, zheev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_EIG_SELFADJ(scomplex, MKL_Complex8, float, cheev, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, RowMajor, LAPACK_ROW_MAJOR) } // end namespace Eigen diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h index 2030b5be1..1d102c17b 100644 --- a/Eigen/src/Eigenvalues/Tridiagonalization.h +++ b/Eigen/src/Eigenvalues/Tridiagonalization.h @@ -367,10 +367,10 @@ void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs) hCoeffs.tail(n-i-1).noalias() = (matA.bottomRightCorner(remainingSize,remainingSize).template selfadjointView() * (conj(h) * matA.col(i).tail(remainingSize))); - hCoeffs.tail(n-i-1) += (conj(h)*Scalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1); + hCoeffs.tail(n-i-1) += (conj(h)*RealScalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1); matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView() - .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), -1); + .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), Scalar(-1)); matA.col(i).coeffRef(i+1) = beta; hCoeffs.coeffRef(i) = h; diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index 03f1a11f8..d20d17492 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -36,8 +36,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) typedef NumTraits ScalarTraits; typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 typedef typename ScalarTraits::Real RealScalar; - typedef typename ScalarTraits::NonInteger NonInteger; + typedef typename ScalarTraits::NonInteger NonInteger; typedef Matrix VectorType; + typedef CwiseBinaryOp, const VectorType, const VectorType> VectorTypeSum; /** Define constants to name the corners of a 1D, 2D or 3D axis aligned bounding box */ enum CornerType @@ -111,16 +112,15 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) inline VectorType& (max)() { return m_max; } /** \returns the center of the box */ - inline const CwiseUnaryOp, - const CwiseBinaryOp, const VectorType, const VectorType> > + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient) center() const - { return (m_min+m_max)/2; } + { return (m_min+m_max)/RealScalar(2); } /** \returns the lengths of the sides of the bounding box. * Note that this function does not get the same * result for integral or floating scalar types: see */ - inline const CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> sizes() const + inline const CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> sizes() const { return m_max - m_min; } /** \returns the volume of the bounding box */ @@ -131,7 +131,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * if the length of the diagonal is needed: diagonal().norm() * will provide it. */ - inline CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> diagonal() const + inline CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> diagonal() const { return sizes(); } /** \returns the vertex of the bounding box at the corner defined by diff --git a/Eigen/src/Geometry/AngleAxis.h b/Eigen/src/Geometry/AngleAxis.h index 7fdb8ae83..571062d00 100644 --- a/Eigen/src/Geometry/AngleAxis.h +++ b/Eigen/src/Geometry/AngleAxis.h @@ -158,7 +158,8 @@ typedef AngleAxis AngleAxisf; typedef AngleAxis AngleAxisd; /** Set \c *this from a \b unit quaternion. - * The resulting axis is normalized. + * + * The resulting axis is normalized, and the computed angle is in the [0,pi] range. * * This function implicitly normalizes the quaternion \a q. */ @@ -167,12 +168,16 @@ template AngleAxis& AngleAxis::operator=(const QuaternionBase& q) { using std::atan2; + using std::abs; Scalar n = q.vec().norm(); if(n::epsilon()) n = q.vec().stableNorm(); - if (n > Scalar(0)) + + if (n != Scalar(0)) { - m_angle = Scalar(2)*atan2(n, q.w()); + m_angle = Scalar(2)*atan2(n, abs(q.w())); + if(q.w() < 0) + n = -n; m_axis = q.vec() / n; } else diff --git a/Eigen/src/Geometry/CMakeLists.txt b/Eigen/src/Geometry/CMakeLists.txt deleted file mode 100644 index f8f728b84..000000000 --- a/Eigen/src/Geometry/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -FILE(GLOB Eigen_Geometry_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Geometry_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry COMPONENT Devel - ) - -ADD_SUBDIRECTORY(arch) diff --git a/Eigen/src/Geometry/EulerAngles.h b/Eigen/src/Geometry/EulerAngles.h index b875b7a13..4865e58aa 100644 --- a/Eigen/src/Geometry/EulerAngles.h +++ b/Eigen/src/Geometry/EulerAngles.h @@ -55,7 +55,12 @@ MatrixBase::eulerAngles(Index a0, Index a1, Index a2) const res[0] = atan2(coeff(j,i), coeff(k,i)); if((odd && res[0]Scalar(0))) { - res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + if(res[0] > Scalar(0)) { + res[0] -= Scalar(EIGEN_PI); + } + else { + res[0] += Scalar(EIGEN_PI); + } Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm(); res[1] = -atan2(s2, coeff(i,i)); } @@ -84,7 +89,12 @@ MatrixBase::eulerAngles(Index a0, Index a1, Index a2) const res[0] = atan2(coeff(j,k), coeff(k,k)); Scalar c2 = Vector2(coeff(i,i), coeff(i,j)).norm(); if((odd && res[0]Scalar(0))) { - res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + if(res[0] > Scalar(0)) { + res[0] -= Scalar(EIGEN_PI); + } + else { + res[0] += Scalar(EIGEN_PI); + } res[1] = atan2(-coeff(i,k), -c2); } else diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h index cd52b5470..a23068c8d 100644 --- a/Eigen/src/Geometry/Homogeneous.h +++ b/Eigen/src/Geometry/Homogeneous.h @@ -329,10 +329,10 @@ protected: // dense = homogeneous template< typename DstXprType, typename ArgType, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense> { typedef Homogeneous SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.template topRows(src.nestedExpression().rows()) = src.nestedExpression(); dst.row(dst.rows()-1).setOnes(); @@ -341,10 +341,10 @@ struct Assignment, internal::assign_op // dense = homogeneous template< typename DstXprType, typename ArgType, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense> { typedef Homogeneous SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.template leftCols(src.nestedExpression().cols()) = src.nestedExpression(); dst.col(dst.cols()-1).setOnes(); @@ -373,7 +373,7 @@ struct homogeneous_right_product_refactoring_helper typedef typename Rhs::ConstRowXpr ConstantColumn; typedef Replicate ConstantBlock; typedef Product LinearProduct; - typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; + typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; }; template @@ -402,6 +402,18 @@ struct generic_product_impl, DenseShape, Homog } }; +// TODO: the following specialization is to address a regression from 3.2 to 3.3 +// In the future, this path should be optimized. +template +struct generic_product_impl, TriangularShape, HomogeneousShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous& rhs) + { + dst.noalias() = lhs * rhs.eval(); + } +}; + template struct homogeneous_left_product_refactoring_helper { @@ -414,7 +426,7 @@ struct homogeneous_left_product_refactoring_helper typedef typename Lhs::ConstColXpr ConstantColumn; typedef Replicate ConstantBlock; typedef Product LinearProduct; - typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; + typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; }; template diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 32d1499c6..c4a0eabb5 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -271,6 +271,8 @@ public: explicit inline Quaternion(const Quaternion& other) { m_coeffs = other.coeffs().template cast(); } + static Quaternion UnitRandom(); + template static Quaternion FromTwoVectors(const MatrixBase& a, const MatrixBase& b); @@ -609,6 +611,24 @@ inline Derived& QuaternionBase::setFromTwoVectors(const MatrixBase +Quaternion Quaternion::UnitRandom() +{ + using std::sqrt; + using std::sin; + using std::cos; + const Scalar u1 = internal::random(0, 1), + u2 = internal::random(0, 2*EIGEN_PI), + u3 = internal::random(0, 2*EIGEN_PI); + const Scalar a = sqrt(1 - u1), + b = sqrt(u1); + return Quaternion (a * sin(u2), a * cos(u2), b * sin(u3), b * cos(u3)); +} + /** Returns a quaternion representing a rotation between * the two arbitrary vectors \a a and \a b. In other words, the built @@ -706,7 +726,7 @@ QuaternionBase::slerp(const Scalar& t, const QuaternionBase::epsilon(); + const Scalar one = Scalar(1) - NumTraits::epsilon(); Scalar d = this->dot(other); Scalar absD = abs(d); diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h index 5ab0d5920..b42a7df70 100644 --- a/Eigen/src/Geometry/Rotation2D.h +++ b/Eigen/src/Geometry/Rotation2D.h @@ -82,15 +82,15 @@ public: /** \returns the rotation angle in [0,2pi] */ inline Scalar smallestPositiveAngle() const { - Scalar tmp = fmod(m_angle,Scalar(2)*EIGEN_PI); - return tmpScalar(EIGEN_PI)) tmp -= Scalar(2)*Scalar(EIGEN_PI); - else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2)*Scalar(EIGEN_PI); + Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI)); + if(tmp>Scalar(EIGEN_PI)) tmp -= Scalar(2*EIGEN_PI); + else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2*EIGEN_PI); return tmp; } diff --git a/Eigen/src/Geometry/Scaling.h b/Eigen/src/Geometry/Scaling.h index 643138199..3e12681b0 100644 --- a/Eigen/src/Geometry/Scaling.h +++ b/Eigen/src/Geometry/Scaling.h @@ -107,12 +107,15 @@ public: /** \addtogroup Geometry_Module */ //@{ -/** Concatenates a linear transformation matrix and a uniform scaling */ +/** Concatenates a linear transformation matrix and a uniform scaling + * \relates UniformScaling + */ // NOTE this operator is defiend in MatrixBase and not as a friend function // of UniformScaling to fix an internal crash of Intel's ICC -template typename MatrixBase::ScalarMultipleReturnType -MatrixBase::operator*(const UniformScaling& s) const -{ return derived() * s.factor(); } +template +EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,product) +operator*(const MatrixBase& matrix, const UniformScaling& s) +{ return matrix.derived() * s.factor(); } /** Constructs a uniform scaling from scale factor \a s */ static inline UniformScaling Scaling(float s) { return UniformScaling(s); } diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 75f20bda6..8f6c62d63 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -32,7 +32,8 @@ template< typename TransformType, typename MatrixType, int Case = transform_traits::IsProjective ? 0 : int(MatrixType::RowsAtCompileTime) == int(transform_traits::HDim) ? 1 - : 2> + : 2, + int RhsCols = MatrixType::ColsAtCompileTime> struct transform_right_product_impl; template< typename Other, @@ -192,7 +193,7 @@ template struct transform_make_affine; * preprocessor token EIGEN_QT_SUPPORT is defined. * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_TRANSFORM_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TRANSFORM_PLUGIN. * * \sa class Matrix, class Quaternion */ @@ -436,7 +437,7 @@ public: */ // note: this function is defined here because some compilers cannot find the respective declaration template - EIGEN_STRONG_INLINE const typename OtherDerived::PlainObject + EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl::ResultType operator * (const EigenBase &other) const { return internal::transform_right_product_impl::run(*this,other.derived()); } @@ -463,7 +464,7 @@ public: operator * (const DiagonalBase &b) const { TransformTimeDiagonalReturnType res(*this); - res.linear() *= b; + res.linearExt() *= b; return res; } @@ -577,7 +578,7 @@ public: return res; } - inline Transform& operator*=(const DiagonalMatrix& s) { linear() *= s; return *this; } + inline Transform& operator*=(const DiagonalMatrix& s) { linearExt() *= s; return *this; } template inline Transform& operator=(const RotationBase& r); @@ -852,7 +853,7 @@ Transform::prescale(const MatrixBase &oth { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS) - m_matrix.template block(0,0).noalias() = (other.asDiagonal() * m_matrix.template block(0,0)); + affine().noalias() = (other.asDiagonal() * affine()); return *this; } @@ -1072,7 +1073,7 @@ void Transform::computeRotationScaling(RotationMatrixTy } } -/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being +/** decomposes the linear part of the transformation as a product scaling x rotation, the scaling being * not necessarily positive. * * If either pointer is zero, the corresponding computation is skipped. @@ -1287,8 +1288,8 @@ struct transform_product_result }; }; -template< typename TransformType, typename MatrixType > -struct transform_right_product_impl< TransformType, MatrixType, 0 > +template< typename TransformType, typename MatrixType, int RhsCols> +struct transform_right_product_impl< TransformType, MatrixType, 0, RhsCols> { typedef typename MatrixType::PlainObject ResultType; @@ -1298,8 +1299,8 @@ struct transform_right_product_impl< TransformType, MatrixType, 0 > } }; -template< typename TransformType, typename MatrixType > -struct transform_right_product_impl< TransformType, MatrixType, 1 > +template< typename TransformType, typename MatrixType, int RhsCols> +struct transform_right_product_impl< TransformType, MatrixType, 1, RhsCols> { enum { Dim = TransformType::Dim, @@ -1324,8 +1325,8 @@ struct transform_right_product_impl< TransformType, MatrixType, 1 > } }; -template< typename TransformType, typename MatrixType > -struct transform_right_product_impl< TransformType, MatrixType, 2 > +template< typename TransformType, typename MatrixType, int RhsCols> +struct transform_right_product_impl< TransformType, MatrixType, 2, RhsCols> { enum { Dim = TransformType::Dim, @@ -1348,6 +1349,30 @@ struct transform_right_product_impl< TransformType, MatrixType, 2 > } }; +template< typename TransformType, typename MatrixType > +struct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is a vector of size Dim +{ + typedef typename TransformType::MatrixType TransformMatrix; + enum { + Dim = TransformType::Dim, + HDim = TransformType::HDim, + OtherRows = MatrixType::RowsAtCompileTime, + WorkingRows = EIGEN_PLAIN_ENUM_MIN(TransformMatrix::RowsAtCompileTime,HDim) + }; + + typedef typename MatrixType::PlainObject ResultType; + + static EIGEN_STRONG_INLINE ResultType run(const TransformType& T, const MatrixType& other) + { + EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES); + + Matrix rhs; + rhs.template head() = other; rhs[Dim] = typename ResultType::Scalar(1); + Matrix res(T.matrix() * rhs); + return res.template head(); + } +}; + /********************************************************** *** Specializations of operator* with lhs EigenBase *** **********************************************************/ diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h index 82d7777f0..b9b9a590c 100644 --- a/Eigen/src/Geometry/Translation.h +++ b/Eigen/src/Geometry/Translation.h @@ -130,8 +130,10 @@ public: } /** Applies translation to vector */ - inline VectorType operator* (const VectorType& other) const - { return m_coeffs + other; } + template + inline typename internal::enable_if::type + operator* (const MatrixBase& vec) const + { return m_coeffs + vec.derived(); } /** \returns the inverse translation (opposite) */ Translation inverse() const { return Translation(-m_coeffs); } diff --git a/Eigen/src/Geometry/arch/CMakeLists.txt b/Eigen/src/Geometry/arch/CMakeLists.txt deleted file mode 100644 index 1267a79c7..000000000 --- a/Eigen/src/Geometry/arch/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Geometry_arch_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Geometry_arch_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry/arch COMPONENT Devel - ) diff --git a/Eigen/src/Householder/CMakeLists.txt b/Eigen/src/Householder/CMakeLists.txt deleted file mode 100644 index ce4937db0..000000000 --- a/Eigen/src/Householder/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Householder_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Householder_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Householder COMPONENT Devel - ) diff --git a/Eigen/src/Householder/Householder.h b/Eigen/src/Householder/Householder.h index 4c1f499a1..80de2c305 100644 --- a/Eigen/src/Householder/Householder.h +++ b/Eigen/src/Householder/Householder.h @@ -119,7 +119,7 @@ void MatrixBase::applyHouseholderOnTheLeft( { *this *= Scalar(1)-tau; } - else + else if(tau!=Scalar(0)) { Map::type> tmp(workspace,cols()); Block bottom(derived(), 1, 0, rows()-1, cols()); @@ -156,7 +156,7 @@ void MatrixBase::applyHouseholderOnTheRight( { *this *= Scalar(1)-tau; } - else + else if(tau!=Scalar(0)) { Map::type> tmp(workspace,rows()); Block right(derived(), 0, 1, rows(), cols()-1); diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h index e9f3ebf88..3ce0a693d 100644 --- a/Eigen/src/Householder/HouseholderSequence.h +++ b/Eigen/src/Householder/HouseholderSequence.h @@ -108,7 +108,7 @@ struct hseq_side_dependent_impl template struct matrix_type_times_scalar_type { - typedef typename scalar_product_traits::ReturnType + typedef typename ScalarBinaryOpTraits::ReturnType ResultScalar; typedef Matrix Type; @@ -243,7 +243,7 @@ template class HouseholderS { workspace.resize(rows()); Index vecs = m_length; - if(is_same_dense(dst,m_vectors)) + if(internal::is_same_dense(dst,m_vectors)) { // in-place dst.diagonal().setOnes(); @@ -304,7 +304,7 @@ template class HouseholderS /** \internal */ template inline void applyThisOnTheLeft(Dest& dst) const { - Matrix workspace(dst.cols()); + Matrix workspace; applyThisOnTheLeft(dst, workspace); } diff --git a/Eigen/src/IterativeLinearSolvers/CMakeLists.txt b/Eigen/src/IterativeLinearSolvers/CMakeLists.txt deleted file mode 100644 index 59ccc0072..000000000 --- a/Eigen/src/IterativeLinearSolvers/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_IterativeLinearSolvers_SRCS "*.h") - -INSTALL(FILES - ${Eigen_IterativeLinearSolvers_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/IterativeLinearSolvers COMPONENT Devel - ) diff --git a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h index 35923be3d..0498db396 100644 --- a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +++ b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h @@ -44,6 +44,7 @@ public: typedef typename internal::traits::Scalar Scalar; typedef typename internal::traits::PlainObject PlainObject; typedef typename internal::generic_xpr_base, MatrixXpr, typename internal::traits::StorageKind>::type Base; + typedef typename internal::ref_selector::type Nested; SolveWithGuess(const Decomposition &dec, const RhsType &rhs, const GuessType &guess) : m_dec(dec), m_rhs(rhs), m_guess(guess) @@ -81,7 +82,8 @@ struct evaluator > : m_result(solve.rows(), solve.cols()) { ::new (static_cast(this)) Base(m_result); - solve.dec()._solve_with_guess_impl(solve.rhs(), m_result, solve().guess()); + m_result = solve.guess(); + solve.dec()._solve_with_guess_impl(solve.rhs(), m_result); } protected: @@ -91,10 +93,10 @@ protected: // Specialization for "dst = dec.solveWithGuess(rhs)" // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense> { typedef SolveWithGuess SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? dst = src.guess(); diff --git a/Eigen/src/Jacobi/CMakeLists.txt b/Eigen/src/Jacobi/CMakeLists.txt deleted file mode 100644 index 490dac626..000000000 --- a/Eigen/src/Jacobi/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Jacobi_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Jacobi_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Jacobi COMPONENT Devel - ) diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h index 55de15e87..d25af8e90 100644 --- a/Eigen/src/Jacobi/Jacobi.h +++ b/Eigen/src/Jacobi/Jacobi.h @@ -85,7 +85,8 @@ bool JacobiRotation::makeJacobi(const RealScalar& x, const Scalar& y, co using std::sqrt; using std::abs; typedef typename NumTraits::Real RealScalar; - if(y == Scalar(0)) + RealScalar deno = RealScalar(2)*abs(y); + if(deno < (std::numeric_limits::min)()) { m_c = Scalar(1); m_s = Scalar(0); @@ -93,7 +94,7 @@ bool JacobiRotation::makeJacobi(const RealScalar& x, const Scalar& y, co } else { - RealScalar tau = (x-z)/(RealScalar(2)*abs(y)); + RealScalar tau = (x-z)/deno; RealScalar w = sqrt(numext::abs2(tau) + RealScalar(1)); RealScalar t; if(tau>RealScalar(0)) diff --git a/Eigen/src/LU/CMakeLists.txt b/Eigen/src/LU/CMakeLists.txt deleted file mode 100644 index e0d8d78c1..000000000 --- a/Eigen/src/LU/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -FILE(GLOB Eigen_LU_SRCS "*.h") - -INSTALL(FILES - ${Eigen_LU_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU COMPONENT Devel - ) - -ADD_SUBDIRECTORY(arch) diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 64b9eb7f1..03b6af706 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -52,6 +52,8 @@ template struct traits > * \include class_FullPivLU.cpp * Output: \verbinclude class_FullPivLU.out * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse() */ template class FullPivLU @@ -97,6 +99,15 @@ template class FullPivLU template explicit FullPivLU(const EigenBase& matrix); + /** \brief Constructs a LU factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa FullPivLU(const EigenBase&) + */ + template + explicit FullPivLU(EigenBase& matrix); + /** Computes the LU decomposition of the given matrix. * * \param matrix the matrix of which to compute the LU decomposition. @@ -105,7 +116,11 @@ template class FullPivLU * \returns a reference to *this */ template - FullPivLU& compute(const EigenBase& matrix); + FullPivLU& compute(const EigenBase& matrix) { + m_lu = matrix.derived(); + computeInPlace(); + return *this; + } /** \returns the LU decomposition matrix: the upper-triangular part is U, the * unit-lower-triangular part is L (at least for square matrices; in the non-square @@ -141,7 +156,7 @@ template class FullPivLU * * \sa permutationQ() */ - inline const PermutationPType& permutationP() const + EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const { eigen_assert(m_isInitialized && "LU is not initialized."); return m_p; @@ -391,8 +406,8 @@ template class FullPivLU MatrixType reconstructedMatrix() const; - inline Index rows() const { return m_lu.rows(); } - inline Index cols() const { return m_lu.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lu.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_lu.cols(); } #ifndef EIGEN_PARSED_BY_DOXYGEN template @@ -418,9 +433,10 @@ template class FullPivLU PermutationQType m_q; IntColVectorType m_rowsTranspositions; IntRowVectorType m_colsTranspositions; - Index m_det_pq, m_nonzero_pivots; + Index m_nonzero_pivots; RealScalar m_l1_norm; RealScalar m_maxpivot, m_prescribedThreshold; + signed char m_det_pq; bool m_isInitialized, m_usePrescribedThreshold; }; @@ -458,25 +474,28 @@ FullPivLU::FullPivLU(const EigenBase& matrix) template template -FullPivLU& FullPivLU::compute(const EigenBase& matrix) +FullPivLU::FullPivLU(EigenBase& matrix) + : m_lu(matrix.derived()), + m_p(matrix.rows()), + m_q(matrix.cols()), + m_rowsTranspositions(matrix.rows()), + m_colsTranspositions(matrix.cols()), + m_isInitialized(false), + m_usePrescribedThreshold(false) { - check_template_parameters(); - - // the permutations are stored as int indices, so just to be sure: - eigen_assert(matrix.rows()<=NumTraits::highest() && matrix.cols()<=NumTraits::highest()); - - m_lu = matrix.derived(); - m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff(); - computeInPlace(); - - m_isInitialized = true; - return *this; } template void FullPivLU::computeInPlace() { + check_template_parameters(); + + // the permutations are stored as int indices, so just to be sure: + eigen_assert(m_lu.rows()<=NumTraits::highest() && m_lu.cols()<=NumTraits::highest()); + + m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff(); + const Index size = m_lu.diagonalSize(); const Index rows = m_lu.rows(); const Index cols = m_lu.cols(); @@ -556,6 +575,8 @@ void FullPivLU::computeInPlace() m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k)); m_det_pq = (number_of_transpositions%2) ? -1 : 1; + + m_isInitialized = true; } template @@ -838,12 +859,12 @@ namespace internal { /***** Implementation of inverse() *****************************************************/ -template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { typedef FullPivLU LuType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } @@ -858,14 +879,12 @@ struct Assignment >, internal::assign_ * * \sa class FullPivLU */ -#ifndef __CUDACC__ template inline const FullPivLU::PlainObject> MatrixBase::fullPivLu() const { return FullPivLU(eval()); } -#endif } // end namespace Eigen diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h index e202a55cb..3134632e1 100644 --- a/Eigen/src/LU/InverseImpl.h +++ b/Eigen/src/LU/InverseImpl.h @@ -286,11 +286,11 @@ struct compute_inverse_and_det_with_check namespace internal { // Specialization for "dense = dense_xpr.inverse()" -template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment, internal::assign_op, Dense2Dense> { typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime); diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 2e6d91939..d43961887 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -26,6 +26,17 @@ template struct traits > }; }; +template +struct enable_if_ref; +// { +// typedef Derived type; +// }; + +template +struct enable_if_ref,Derived> { + typedef Derived type; +}; + } // end namespace internal /** \ingroup LU_Module @@ -57,6 +68,8 @@ template struct traits > * * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP(). * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU */ template class PartialPivLU @@ -102,8 +115,22 @@ template class PartialPivLU template explicit PartialPivLU(const EigenBase& matrix); + /** Constructor for \link InplaceDecomposition inplace decomposition \endlink + * + * \param matrix the matrix of which to compute the LU decomposition. + * + * \warning The matrix should have full rank (e.g. if it's square, it should be invertible). + * If you need to deal with non-full rank, use class FullPivLU instead. + */ template - PartialPivLU& compute(const EigenBase& matrix); + explicit PartialPivLU(EigenBase& matrix); + + template + PartialPivLU& compute(const EigenBase& matrix) { + m_lu = matrix.derived(); + compute(); + return *this; + } /** \returns the LU decomposition matrix: the upper-triangular part is U, the * unit-lower-triangular part is L (at least for square matrices; in the non-square @@ -251,11 +278,13 @@ template class PartialPivLU EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } + void compute(); + MatrixType m_lu; PermutationType m_p; TranspositionType m_rowsTranspositions; - Index m_det_p; RealScalar m_l1_norm; + signed char m_det_p; bool m_isInitialized; }; @@ -264,8 +293,8 @@ PartialPivLU::PartialPivLU() : m_lu(), m_p(), m_rowsTranspositions(), - m_det_p(0), m_l1_norm(0), + m_det_p(0), m_isInitialized(false) { } @@ -275,8 +304,8 @@ PartialPivLU::PartialPivLU(Index size) : m_lu(size, size), m_p(size), m_rowsTranspositions(size), - m_det_p(0), m_l1_norm(0), + m_det_p(0), m_isInitialized(false) { } @@ -284,16 +313,29 @@ PartialPivLU::PartialPivLU(Index size) template template PartialPivLU::PartialPivLU(const EigenBase& matrix) - : m_lu(matrix.rows(), matrix.rows()), + : m_lu(matrix.rows(),matrix.cols()), m_p(matrix.rows()), m_rowsTranspositions(matrix.rows()), - m_det_p(0), m_l1_norm(0), + m_det_p(0), m_isInitialized(false) { compute(matrix.derived()); } +template +template +PartialPivLU::PartialPivLU(EigenBase& matrix) + : m_lu(matrix.derived()), + m_p(matrix.rows()), + m_rowsTranspositions(matrix.rows()), + m_l1_norm(0), + m_det_p(0), + m_isInitialized(false) +{ + compute(); +} + namespace internal { /** \internal This is the blocked version of fullpivlu_unblocked() */ @@ -434,7 +476,7 @@ struct partial_lu_impl // update permutations and apply them to A_0 for(Index i=k; i(k)); A_0.row(i).swap(A_0.row(piv)); } @@ -470,19 +512,17 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t } // end namespace internal template -template -PartialPivLU& PartialPivLU::compute(const EigenBase& matrix) +void PartialPivLU::compute() { check_template_parameters(); // the row permutation is stored as int indices, so just to be sure: - eigen_assert(matrix.rows()::highest()); + eigen_assert(m_lu.rows()::highest()); - m_lu = matrix.derived(); m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff(); - eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices"); - const Index size = matrix.rows(); + eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices"); + const Index size = m_lu.rows(); m_rowsTranspositions.resize(size); @@ -493,7 +533,6 @@ PartialPivLU& PartialPivLU::compute(const EigenBase @@ -525,12 +564,12 @@ MatrixType PartialPivLU::reconstructedMatrix() const namespace internal { /***** Implementation of inverse() *****************************************************/ -template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { typedef PartialPivLU LuType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } @@ -545,14 +584,12 @@ struct Assignment >, internal::assi * * \sa class PartialPivLU */ -#ifndef __CUDACC__ template inline const PartialPivLU::PlainObject> MatrixBase::partialPivLu() const { return PartialPivLU(eval()); } -#endif /** \lu_module * @@ -562,14 +599,12 @@ MatrixBase::partialPivLu() const * * \sa class PartialPivLU */ -#ifndef __CUDACC__ template inline const PartialPivLU::PlainObject> MatrixBase::lu() const { return PartialPivLU(eval()); } -#endif } // end namespace Eigen diff --git a/Eigen/src/LU/PartialPivLU_MKL.h b/Eigen/src/LU/PartialPivLU_LAPACKE.h similarity index 77% rename from Eigen/src/LU/PartialPivLU_MKL.h rename to Eigen/src/LU/PartialPivLU_LAPACKE.h index 9035953c8..755168a94 100644 --- a/Eigen/src/LU/PartialPivLU_MKL.h +++ b/Eigen/src/LU/PartialPivLU_LAPACKE.h @@ -25,7 +25,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to LAPACKe * LU decomposition with partial pivoting based on LAPACKE_?getrf function. ******************************************************************************** */ @@ -33,20 +33,18 @@ #ifndef EIGEN_PARTIALLU_LAPACK_H #define EIGEN_PARTIALLU_LAPACK_H -#include "Eigen/src/Core/util/MKL_support.h" - namespace Eigen { namespace internal { -/** \internal Specialization for the data types supported by MKL */ +/** \internal Specialization for the data types supported by LAPACKe */ -#define EIGEN_MKL_LU_PARTPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \ +#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \ template \ struct partial_lu_impl \ { \ /* \internal performs the LU decomposition in-place of the matrix represented */ \ - static lapack_int blocked_lu(lapack_int rows, lapack_int cols, EIGTYPE* lu_data, lapack_int luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ + static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ { \ EIGEN_UNUSED_VARIABLE(maxBlockSize);\ lapack_int matrix_order, first_zero_pivot; \ @@ -54,14 +52,14 @@ struct partial_lu_impl \ EIGTYPE* a; \ /* Set up parameters for ?getrf */ \ matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - lda = luStride; \ + lda = convert_index(luStride); \ a = lu_data; \ ipiv = row_transpositions; \ - m = rows; \ - n = cols; \ + m = convert_index(rows); \ + n = convert_index(cols); \ nb_transpositions = 0; \ \ - info = LAPACKE_##MKLPREFIX##getrf( matrix_order, m, n, (MKLTYPE*)a, lda, ipiv ); \ + info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \ \ for(int i=0;i \ } \ }; -EIGEN_MKL_LU_PARTPIV(double, double, d) -EIGEN_MKL_LU_PARTPIV(float, float, s) -EIGEN_MKL_LU_PARTPIV(dcomplex, MKL_Complex16, z) -EIGEN_MKL_LU_PARTPIV(scomplex, MKL_Complex8, c) +EIGEN_LAPACKE_LU_PARTPIV(double, double, d) +EIGEN_LAPACKE_LU_PARTPIV(float, float, s) +EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z) +EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float, c) } // end namespace internal diff --git a/Eigen/src/LU/arch/CMakeLists.txt b/Eigen/src/LU/arch/CMakeLists.txt deleted file mode 100644 index f6b7ed9ec..000000000 --- a/Eigen/src/LU/arch/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_LU_arch_SRCS "*.h") - -INSTALL(FILES - ${Eigen_LU_arch_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU/arch COMPONENT Devel - ) diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h index e1470c664..ebb64a62b 100644 --- a/Eigen/src/LU/arch/Inverse_SSE.h +++ b/Eigen/src/LU/arch/Inverse_SSE.h @@ -153,10 +153,12 @@ struct compute_inverse_size4 iC = _mm_mul_ps(rd,iC); iD = _mm_mul_ps(rd,iD); - result.template writePacket( 0, _mm_shuffle_ps(iA,iB,0x77)); - result.template writePacket( 4, _mm_shuffle_ps(iA,iB,0x22)); - result.template writePacket( 8, _mm_shuffle_ps(iC,iD,0x77)); - result.template writePacket(12, _mm_shuffle_ps(iC,iD,0x22)); + Index res_stride = result.outerStride(); + float* res = result.data(); + pstoret(res+0, _mm_shuffle_ps(iA,iB,0x77)); + pstoret(res+res_stride, _mm_shuffle_ps(iA,iB,0x22)); + pstoret(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77)); + pstoret(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22)); } }; @@ -316,14 +318,16 @@ struct compute_inverse_size4 iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1); iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2); - result.template writePacket( 0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1)); // iA# / det - result.template writePacket( 4, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2)); - result.template writePacket( 2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1)); // iB# / det - result.template writePacket( 6, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2)); - result.template writePacket( 8, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1)); // iC# / det - result.template writePacket(12, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2)); - result.template writePacket(10, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1)); // iD# / det - result.template writePacket(14, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2)); + Index res_stride = result.outerStride(); + double* res = result.data(); + pstoret(res+0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1)); + pstoret(res+res_stride, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2)); + pstoret(res+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1)); + pstoret(res+res_stride+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2)); + pstoret(res+2*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1)); + pstoret(res+3*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2)); + pstoret(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1)); + pstoret(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2)); } }; diff --git a/Eigen/src/MetisSupport/CMakeLists.txt b/Eigen/src/MetisSupport/CMakeLists.txt deleted file mode 100644 index 2bad31416..000000000 --- a/Eigen/src/MetisSupport/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_MetisSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_MetisSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/MetisSupport COMPONENT Devel - ) diff --git a/Eigen/src/OrderingMethods/CMakeLists.txt b/Eigen/src/OrderingMethods/CMakeLists.txt deleted file mode 100644 index 9f4bb2758..000000000 --- a/Eigen/src/OrderingMethods/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_OrderingMethods_SRCS "*.h") - -INSTALL(FILES - ${Eigen_OrderingMethods_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/OrderingMethods COMPONENT Devel - ) diff --git a/Eigen/src/PaStiXSupport/CMakeLists.txt b/Eigen/src/PaStiXSupport/CMakeLists.txt deleted file mode 100644 index 28c657e9b..000000000 --- a/Eigen/src/PaStiXSupport/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_PastixSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_PastixSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/PaStiXSupport COMPONENT Devel - ) diff --git a/Eigen/src/PardisoSupport/CMakeLists.txt b/Eigen/src/PardisoSupport/CMakeLists.txt deleted file mode 100644 index a097ab401..000000000 --- a/Eigen/src/PardisoSupport/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_PardisoSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_PardisoSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/PardisoSupport COMPONENT Devel - ) diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h index 80d914f25..091c3970e 100644 --- a/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/Eigen/src/PardisoSupport/PardisoSupport.h @@ -183,7 +183,7 @@ class PardisoImpl : public SparseSolverBase { if(m_isInitialized) // Factorization ran at least once { - internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, m_size,0, 0, 0, m_perm.data(), 0, + internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, internal::convert_index(m_size),0, 0, 0, m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); m_isInitialized = false; } @@ -194,11 +194,11 @@ class PardisoImpl : public SparseSolverBase m_type = type; bool symmetric = std::abs(m_type) < 10; m_iparm[0] = 1; // No solver default - m_iparm[1] = 3; // use Metis for the ordering - m_iparm[2] = 1; // Numbers of processors, value of OMP_NUM_THREADS + m_iparm[1] = 2; // use Metis for the ordering + m_iparm[2] = 0; // Reserved. Set to zero. (??Numbers of processors, value of OMP_NUM_THREADS??) m_iparm[3] = 0; // No iterative-direct algorithm m_iparm[4] = 0; // No user fill-in reducing permutation - m_iparm[5] = 0; // Write solution into x + m_iparm[5] = 0; // Write solution into x, b is left unchanged m_iparm[6] = 0; // Not in use m_iparm[7] = 2; // Max numbers of iterative refinement steps m_iparm[8] = 0; // Not in use @@ -219,7 +219,8 @@ class PardisoImpl : public SparseSolverBase m_iparm[26] = 0; // No matrix checker m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0; m_iparm[34] = 1; // C indexing - m_iparm[59] = 1; // Automatic switch between In-Core and Out-of-Core modes + m_iparm[36] = 0; // CSR + m_iparm[59] = 0; // 0 - In-Core ; 1 - Automatic switch between In-Core and Out-of-Core modes ; 2 - Out-of-Core memset(m_pt, 0, sizeof(m_pt)); } @@ -246,7 +247,7 @@ class PardisoImpl : public SparseSolverBase mutable SparseMatrixType m_matrix; mutable ComputationInfo m_info; bool m_analysisIsOk, m_factorizationIsOk; - Index m_type, m_msglvl; + StorageIndex m_type, m_msglvl; mutable void *m_pt[64]; mutable ParameterType m_iparm; mutable IntColVectorType m_perm; @@ -265,10 +266,9 @@ Derived& PardisoImpl::compute(const MatrixType& a) derived().getMatrix(a); Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 12, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 12, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); - manageErrorCode(error); m_analysisIsOk = true; m_factorizationIsOk = true; @@ -287,7 +287,7 @@ Derived& PardisoImpl::analyzePattern(const MatrixType& a) derived().getMatrix(a); Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 11, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 11, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); @@ -306,8 +306,8 @@ Derived& PardisoImpl::factorize(const MatrixType& a) derived().getMatrix(a); - Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 22, m_size, + Index error; + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 22, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); @@ -354,9 +354,9 @@ void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase } Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 33, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 33, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), - m_perm.data(), nrhs, m_iparm.data(), m_msglvl, + m_perm.data(), internal::convert_index(nrhs), m_iparm.data(), m_msglvl, rhs_ptr, x.derived().data()); manageErrorCode(error); @@ -371,6 +371,9 @@ void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase * using the Intel MKL PARDISO library. The sparse matrix A must be squared and invertible. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * * \implsparsesolverconcept @@ -421,6 +424,9 @@ class PardisoLU : public PardisoImpl< PardisoLU > * using the Intel MKL PARDISO library. The sparse matrix A must be selfajoint and positive definite. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used. * Upper|Lower can be used to tell both triangular parts can be used as input. @@ -480,6 +486,9 @@ class PardisoLLT : public PardisoImpl< PardisoLLT > * For complex matrices, A can also be symmetric only, see the \a Options template parameter. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam Options can be any bitwise combination of Upper, Lower, and Symmetric. The default is Upper, meaning only the upper triangular part has to be used. * Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix. diff --git a/Eigen/src/QR/CMakeLists.txt b/Eigen/src/QR/CMakeLists.txt deleted file mode 100644 index 96f43d7f5..000000000 --- a/Eigen/src/QR/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_QR_SRCS "*.h") - -INSTALL(FILES - ${Eigen_QR_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/QR COMPONENT Devel - ) diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 7c559f952..9650781d6 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -41,6 +41,8 @@ template struct traits > * This decomposition performs column pivoting in order to be rank-revealing and improve * numerical stability. It is slower than HouseholderQR, and faster than FullPivHouseholderQR. * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::colPivHouseholderQr() */ template class ColPivHouseholderQR @@ -51,7 +53,6 @@ template class ColPivHouseholderQR enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; @@ -59,7 +60,6 @@ template class ColPivHouseholderQR typedef typename MatrixType::RealScalar RealScalar; // FIXME should be int typedef typename MatrixType::StorageIndex StorageIndex; - typedef Matrix MatrixQType; typedef typename internal::plain_diag_type::type HCoeffsType; typedef PermutationMatrix PermutationType; typedef typename internal::plain_row_type::type IntRowVectorType; @@ -135,6 +135,27 @@ template class ColPivHouseholderQR compute(matrix.derived()); } + /** \brief Constructs a QR factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa ColPivHouseholderQR(const EigenBase&) + */ + template + explicit ColPivHouseholderQR(EigenBase& matrix) + : m_qr(matrix.derived()), + m_hCoeffs((std::min)(matrix.rows(),matrix.cols())), + m_colsPermutation(PermIndexType(matrix.cols())), + m_colsTranspositions(matrix.cols()), + m_temp(matrix.cols()), + m_colNormsUpdated(matrix.cols()), + m_colNormsDirect(matrix.cols()), + m_isInitialized(false), + m_usePrescribedThreshold(false) + { + computeInPlace(); + } + /** This method finds a solution x to the equation Ax=b, where A is the matrix of which * *this is the QR decomposition, if any exists. * @@ -142,9 +163,6 @@ template class ColPivHouseholderQR * * \returns a solution. * - * \note The case where b is a matrix is not yet implemented. Also, this - * code is space inefficient. - * * \note_about_checking_solutions * * \note_about_arbitrary_choice_of_solution @@ -453,21 +471,19 @@ template template ColPivHouseholderQR& ColPivHouseholderQR::compute(const EigenBase& matrix) { - check_template_parameters(); - - // the column permutation is stored as int indices, so just to be sure: - eigen_assert(matrix.cols()<=NumTraits::highest()); - - m_qr = matrix; - + m_qr = matrix.derived(); computeInPlace(); - return *this; } template void ColPivHouseholderQR::computeInPlace() { + check_template_parameters(); + + // the column permutation is stored as int indices, so just to be sure: + eigen_assert(m_qr.cols()<=NumTraits::highest()); + using std::abs; Index rows = m_qr.rows(); @@ -598,11 +614,11 @@ void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType & namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense> { typedef ColPivHouseholderQR QrType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } @@ -621,7 +637,6 @@ typename ColPivHouseholderQR::HouseholderSequenceType ColPivHousehol return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate()); } -#ifndef __CUDACC__ /** \return the column-pivoting Householder QR decomposition of \c *this. * * \sa class ColPivHouseholderQR @@ -632,7 +647,6 @@ MatrixBase::colPivHouseholderQr() const { return ColPivHouseholderQR(eval()); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h similarity index 67% rename from Eigen/src/QR/ColPivHouseholderQR_MKL.h rename to Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h index 1203d0d36..4e9651f83 100644 --- a/Eigen/src/QR/ColPivHouseholderQR_MKL.h +++ b/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h @@ -25,22 +25,20 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to LAPACKe * Householder QR decomposition of a matrix with column pivoting based on * LAPACKE_?geqp3 function. ******************************************************************************** */ -#ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_MKL_H -#define EIGEN_COLPIVOTINGHOUSEHOLDERQR_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" +#ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_LAPACKE_H +#define EIGEN_COLPIVOTINGHOUSEHOLDERQR_LAPACKE_H namespace Eigen { -/** \internal Specialization for the data types supported by MKL */ +/** \internal Specialization for the data types supported by LAPACKe */ -#define EIGEN_MKL_QR_COLPIV(EIGTYPE, MKLTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ +#define EIGEN_LAPACKE_QR_COLPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX, EIGCOLROW, LAPACKE_COLROW) \ template<> template inline \ ColPivHouseholderQR >& \ ColPivHouseholderQR >::compute( \ @@ -65,34 +63,35 @@ ColPivHouseholderQR(m_qr.outerStride()); \ + lapack_int matrix_order = LAPACKE_COLROW; \ + LAPACKE_##LAPACKE_PREFIX##geqp3( matrix_order, internal::convert_index(rows), internal::convert_index(cols), \ + (LAPACKE_TYPE*)m_qr.data(), lda, (lapack_int*)m_colsPermutation.indices().data(), (LAPACKE_TYPE*)m_hCoeffs.data()); \ m_isInitialized = true; \ m_maxpivot=m_qr.diagonal().cwiseAbs().maxCoeff(); \ m_hCoeffs.adjointInPlace(); \ RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold(); \ lapack_int *perm = m_colsPermutation.indices().data(); \ - for(i=0;i premultiplied_threshold);\ } \ - for(i=0;i > * * \param MatrixType the type of the matrix of which we are computing the COD. * - * This class performs a rank-revealing complete ortogonal decomposition of a + * This class performs a rank-revealing complete orthogonal decomposition of a * matrix \b A into matrices \b P, \b Q, \b T, and \b Z such that * \f[ - * \mathbf{A} \, \mathbf{P} = \mathbf{Q} \, \begin{matrix} \mathbf{T} & - * \mathbf{0} \\ \mathbf{0} & \mathbf{0} \end{matrix} \, \mathbf{Z} + * \mathbf{A} \, \mathbf{P} = \mathbf{Q} \, + * \begin{bmatrix} \mathbf{T} & \mathbf{0} \\ + * \mathbf{0} & \mathbf{0} \end{bmatrix} \, \mathbf{Z} * \f] * by using Householder transformations. Here, \b P is a permutation matrix, * \b Q and \b Z are unitary matrices and \b T an upper triangular matrix of * size rank-by-rank. \b A may be rank deficient. * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::completeOrthogonalDecomposition() */ template @@ -48,16 +51,12 @@ class CompleteOrthogonalDecomposition { enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::StorageIndex StorageIndex; - typedef Matrix - MatrixQType; typedef typename internal::plain_diag_type::type HCoeffsType; typedef PermutationMatrix PermutationType; @@ -114,12 +113,29 @@ class CompleteOrthogonalDecomposition { explicit CompleteOrthogonalDecomposition(const EigenBase& matrix) : m_cpqr(matrix.rows(), matrix.cols()), m_zCoeffs((std::min)(matrix.rows(), matrix.cols())), - m_temp(matrix.cols()) { + m_temp(matrix.cols()) + { compute(matrix.derived()); } + /** \brief Constructs a complete orthogonal decomposition from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa CompleteOrthogonalDecomposition(const EigenBase&) + */ + template + explicit CompleteOrthogonalDecomposition(EigenBase& matrix) + : m_cpqr(matrix.derived()), + m_zCoeffs((std::min)(matrix.rows(), matrix.cols())), + m_temp(matrix.cols()) + { + computeInPlace(); + } + + /** This method computes the minimum-norm solution X to a least squares - * problem \f[\mathrm{minimize} ||A X - B|| \f], where \b A is the matrix of + * problem \f[\mathrm{minimize} \|A X - B\|, \f] where \b A is the matrix of * which \c *this is the complete orthogonal decomposition. * * \param B the right-hand sides of the problem to solve. @@ -165,7 +181,12 @@ class CompleteOrthogonalDecomposition { const MatrixType& matrixT() const { return m_cpqr.matrixQR(); } template - CompleteOrthogonalDecomposition& compute(const EigenBase& matrix); + CompleteOrthogonalDecomposition& compute(const EigenBase& matrix) { + // Compute the column pivoted QR factorization A P = Q R. + m_cpqr.compute(matrix); + computeInPlace(); + return *this; + } /** \returns a const reference to the column permutation matrix */ const PermutationType& colsPermutation() const { @@ -354,6 +375,8 @@ class CompleteOrthogonalDecomposition { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } + void computeInPlace(); + /** Overwrites \b rhs with \f$ \mathbf{Z}^* * \mathbf{rhs} \f$. */ template @@ -384,20 +407,16 @@ CompleteOrthogonalDecomposition::logAbsDeterminant() const { * CompleteOrthogonalDecomposition(const MatrixType&) */ template -template -CompleteOrthogonalDecomposition& CompleteOrthogonalDecomposition< - MatrixType>::compute(const EigenBase& matrix) { +void CompleteOrthogonalDecomposition::computeInPlace() +{ check_template_parameters(); // the column permutation is stored as int indices, so just to be sure: - eigen_assert(matrix.cols() <= NumTraits::highest()); - - // Compute the column pivoted QR factorization A P = Q R. - m_cpqr.compute(matrix); + eigen_assert(m_cpqr.cols() <= NumTraits::highest()); const Index rank = m_cpqr.rank(); - const Index cols = matrix.cols(); - const Index rows = matrix.rows(); + const Index cols = m_cpqr.cols(); + const Index rows = m_cpqr.rows(); m_zCoeffs.resize((std::min)(rows, cols)); m_temp.resize(cols); @@ -443,7 +462,6 @@ CompleteOrthogonalDecomposition& CompleteOrthogonalDecomposition< } } } - return *this; } template @@ -509,12 +527,12 @@ void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl( namespace internal { -template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { typedef CompleteOrthogonalDecomposition CodType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.rows())); } @@ -529,7 +547,6 @@ CompleteOrthogonalDecomposition::householderQ() const { return m_cpqr.householderQ(); } -#ifndef __CUDACC__ /** \return the complete orthogonal decomposition of \c *this. * * \sa class CompleteOrthogonalDecomposition @@ -539,7 +556,6 @@ const CompleteOrthogonalDecomposition::PlainObject> MatrixBase::completeOrthogonalDecomposition() const { return CompleteOrthogonalDecomposition(eval()); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index 32a10f3fe..e0e15100d 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -50,6 +50,8 @@ struct traits > * This decomposition performs a very prudent full pivoting in order to be rank-revealing and achieve optimal * numerical stability. The trade-off is that it is slower than HouseholderQR and ColPivHouseholderQR. * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::fullPivHouseholderQr() */ template class FullPivHouseholderQR @@ -60,7 +62,6 @@ template class FullPivHouseholderQR enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; @@ -135,6 +136,26 @@ template class FullPivHouseholderQR compute(matrix.derived()); } + /** \brief Constructs a QR factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa FullPivHouseholderQR(const EigenBase&) + */ + template + explicit FullPivHouseholderQR(EigenBase& matrix) + : m_qr(matrix.derived()), + m_hCoeffs((std::min)(matrix.rows(), matrix.cols())), + m_rows_transpositions((std::min)(matrix.rows(), matrix.cols())), + m_cols_transpositions((std::min)(matrix.rows(), matrix.cols())), + m_cols_permutation(matrix.cols()), + m_temp(matrix.cols()), + m_isInitialized(false), + m_usePrescribedThreshold(false) + { + computeInPlace(); + } + /** This method finds a solution x to the equation Ax=b, where A is the matrix of which * \c *this is the QR decomposition. * @@ -143,9 +164,6 @@ template class FullPivHouseholderQR * \returns the exact or least-square solution if the rank is greater or equal to the number of columns of A, * and an arbitrary solution otherwise. * - * \note The case where b is a matrix is not yet implemented. Also, this - * code is space inefficient. - * * \note_about_checking_solutions * * \note_about_arbitrary_choice_of_solution @@ -430,18 +448,16 @@ template template FullPivHouseholderQR& FullPivHouseholderQR::compute(const EigenBase& matrix) { - check_template_parameters(); - m_qr = matrix.derived(); - computeInPlace(); - return *this; } template void FullPivHouseholderQR::computeInPlace() { + check_template_parameters(); + using std::abs; Index rows = m_qr.rows(); Index cols = m_qr.cols(); @@ -560,11 +576,11 @@ void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense> { typedef FullPivHouseholderQR QrType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } @@ -644,7 +660,6 @@ inline typename FullPivHouseholderQR::MatrixQReturnType FullPivHouse return MatrixQReturnType(m_qr, m_hCoeffs, m_rows_transpositions); } -#ifndef __CUDACC__ /** \return the full-pivoting Householder QR decomposition of \c *this. * * \sa class FullPivHouseholderQR @@ -655,7 +670,6 @@ MatrixBase::fullPivHouseholderQr() const { return FullPivHouseholderQR(eval()); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h index 03bc8e6cd..3513d995c 100644 --- a/Eigen/src/QR/HouseholderQR.h +++ b/Eigen/src/QR/HouseholderQR.h @@ -37,6 +37,8 @@ namespace Eigen { * This Householder QR decomposition is faster, but less numerically stable and less feature-full than * FullPivHouseholderQR or ColPivHouseholderQR. * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::householderQr() */ template class HouseholderQR @@ -47,7 +49,6 @@ template class HouseholderQR enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; @@ -102,6 +103,24 @@ template class HouseholderQR compute(matrix.derived()); } + + /** \brief Constructs a QR factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when + * \c MatrixType is a Eigen::Ref. + * + * \sa HouseholderQR(const EigenBase&) + */ + template + explicit HouseholderQR(EigenBase& matrix) + : m_qr(matrix.derived()), + m_hCoeffs((std::min)(matrix.rows(),matrix.cols())), + m_temp(matrix.cols()), + m_isInitialized(false) + { + computeInPlace(); + } + /** This method finds a solution x to the equation Ax=b, where A is the matrix of which * *this is the QR decomposition, if any exists. * @@ -109,9 +128,6 @@ template class HouseholderQR * * \returns a solution. * - * \note The case where b is a matrix is not yet implemented. Also, this - * code is space inefficient. - * * \note_about_checking_solutions * * \note_about_arbitrary_choice_of_solution @@ -151,7 +167,11 @@ template class HouseholderQR } template - HouseholderQR& compute(const EigenBase& matrix); + HouseholderQR& compute(const EigenBase& matrix) { + m_qr = matrix.derived(); + computeInPlace(); + return *this; + } /** \returns the absolute value of the determinant of the matrix of which * *this is the QR decomposition. It has only linear complexity @@ -203,6 +223,8 @@ template class HouseholderQR { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } + + void computeInPlace(); MatrixType m_qr; HCoeffsType m_hCoeffs; @@ -354,16 +376,14 @@ void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) c * \sa class HouseholderQR, HouseholderQR(const MatrixType&) */ template -template -HouseholderQR& HouseholderQR::compute(const EigenBase& matrix) +void HouseholderQR::computeInPlace() { check_template_parameters(); - Index rows = matrix.rows(); - Index cols = matrix.cols(); + Index rows = m_qr.rows(); + Index cols = m_qr.cols(); Index size = (std::min)(rows,cols); - m_qr = matrix.derived(); m_hCoeffs.resize(size); m_temp.resize(cols); @@ -371,10 +391,8 @@ HouseholderQR& HouseholderQR::compute(const EigenBase::run(m_qr, m_hCoeffs, 48, m_temp.data()); m_isInitialized = true; - return *this; } -#ifndef __CUDACC__ /** \return the Householder QR decomposition of \c *this. * * \sa class HouseholderQR @@ -385,7 +403,6 @@ MatrixBase::householderQr() const { return HouseholderQR(eval()); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/QR/HouseholderQR_MKL.h b/Eigen/src/QR/HouseholderQR_LAPACKE.h similarity index 80% rename from Eigen/src/QR/HouseholderQR_MKL.h rename to Eigen/src/QR/HouseholderQR_LAPACKE.h index 84ab640a1..1dc7d5363 100644 --- a/Eigen/src/QR/HouseholderQR_MKL.h +++ b/Eigen/src/QR/HouseholderQR_LAPACKE.h @@ -25,24 +25,22 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to LAPACKe * Householder QR decomposition of a matrix w/o pivoting based on * LAPACKE_?geqrf function. ******************************************************************************** */ -#ifndef EIGEN_QR_MKL_H -#define EIGEN_QR_MKL_H - -#include "../Core/util/MKL_support.h" +#ifndef EIGEN_QR_LAPACKE_H +#define EIGEN_QR_LAPACKE_H namespace Eigen { namespace internal { -/** \internal Specialization for the data types supported by MKL */ +/** \internal Specialization for the data types supported by LAPACKe */ -#define EIGEN_MKL_QR_NOPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \ +#define EIGEN_LAPACKE_QR_NOPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \ template \ struct householder_qr_inplace_blocked \ { \ @@ -53,18 +51,18 @@ struct householder_qr_inplace_blocked \ lapack_int n = (lapack_int) mat.cols(); \ lapack_int lda = (lapack_int) mat.outerStride(); \ lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - LAPACKE_##MKLPREFIX##geqrf( matrix_order, m, n, (MKLTYPE*)mat.data(), lda, (MKLTYPE*)hCoeffs.data()); \ + LAPACKE_##LAPACKE_PREFIX##geqrf( matrix_order, m, n, (LAPACKE_TYPE*)mat.data(), lda, (LAPACKE_TYPE*)hCoeffs.data()); \ hCoeffs.adjointInPlace(); \ } \ }; -EIGEN_MKL_QR_NOPIV(double, double, d) -EIGEN_MKL_QR_NOPIV(float, float, s) -EIGEN_MKL_QR_NOPIV(dcomplex, MKL_Complex16, z) -EIGEN_MKL_QR_NOPIV(scomplex, MKL_Complex8, c) +EIGEN_LAPACKE_QR_NOPIV(double, double, d) +EIGEN_LAPACKE_QR_NOPIV(float, float, s) +EIGEN_LAPACKE_QR_NOPIV(dcomplex, lapack_complex_double, z) +EIGEN_LAPACKE_QR_NOPIV(scomplex, lapack_complex_float, c) } // end namespace internal } // end namespace Eigen -#endif // EIGEN_QR_MKL_H +#endif // EIGEN_QR_LAPACKE_H diff --git a/Eigen/src/SPQRSupport/CMakeLists.txt b/Eigen/src/SPQRSupport/CMakeLists.txt deleted file mode 100644 index 4968beaf2..000000000 --- a/Eigen/src/SPQRSupport/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SPQRSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SPQRSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SPQRSupport/ COMPONENT Devel - ) diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index 3552c87bf..25fca6f4d 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h @@ -11,7 +11,7 @@ // Copyright (C) 2013 Jean Ceccato // Copyright (C) 2013 Pierre Zoppitelli // Copyright (C) 2013 Jitse Niesen -// Copyright (C) 2014 Gael Guennebaud +// Copyright (C) 2014-2016 Gael Guennebaud // // Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -21,6 +21,7 @@ #define EIGEN_BDCSVD_H // #define EIGEN_BDCSVD_DEBUG_VERBOSE // #define EIGEN_BDCSVD_SANITY_CHECKS + namespace Eigen { #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE @@ -49,6 +50,18 @@ struct traits > * * \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition * + * This class first reduces the input matrix to bi-diagonal form using class UpperBidiagonalization, + * and then performs a divide-and-conquer diagonalization. Small blocks are diagonalized using class JacobiSVD. + * You can control the switching size with the setSwitchSize() method, default is 16. + * For small matrice (<16), it is thus preferable to directly use JacobiSVD. For larger ones, BDCSVD is highly + * recommended and can several order of magnitude faster. + * + * \warning this algorithm is unlikely to provide accurate result when compiled with unsafe math optimizations. + * For instance, this concerns Intel's compiler (ICC), which perfroms such optimization by default unless + * you compile with the \c -fp-model \c precise option. Likewise, the \c -ffast-math option of GCC or clang will + * significantly degrade the accuracy. + * + * \sa class JacobiSVD */ template class BDCSVD : public SVDBase > @@ -228,6 +241,8 @@ BDCSVD& BDCSVD::compute(const MatrixType& matrix, unsign #endif allocate(matrix.rows(), matrix.cols(), computationOptions); using std::abs; + + const RealScalar considerZero = (std::numeric_limits::min)(); //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return if(matrix.cols() < m_algoswap) @@ -266,7 +281,7 @@ BDCSVD& BDCSVD::compute(const MatrixType& matrix, unsign { RealScalar a = abs(m_computed.coeff(i, i)); m_singularValues.coeffRef(i) = a * scale; - if (a == 0) + if (a::divide (Index firstCol, Index lastCol, Index firstRowW, using std::abs; const Index n = lastCol - firstCol + 1; const Index k = n/2; + const RealScalar considerZero = (std::numeric_limits::min)(); RealScalar alphaK; RealScalar betaK; RealScalar r0; @@ -434,7 +450,7 @@ void BDCSVD::divide (Index firstCol, Index lastCol, Index firstRowW, f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1); } if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1; - if (r0 == 0) + if (r0::divide (Index firstCol, Index lastCol, Index firstRowW, template void BDCSVD::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V) { + const RealScalar considerZero = (std::numeric_limits::min)(); + using std::abs; ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n); m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal(); ArrayRef diag = m_workspace.head(n); @@ -575,7 +593,7 @@ void BDCSVD::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec while(actual_n>1 && diag(actual_n-1)==0) --actual_n; Index m = 0; // size of the deflated problem for(Index k=0;kconsiderZero) m_workspaceI(m++) = k; Map perm(m_workspaceI.data(),m); @@ -600,7 +618,7 @@ void BDCSVD::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec { Index actual_n = n; - while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + while(actual_n>1 && abs(col0(actual_n-1))0) : " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n"; @@ -680,6 +698,7 @@ typename BDCSVD::RealScalar BDCSVD::secularEq(RealScalar res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu)); } return res; + } template @@ -746,14 +765,14 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d RealScalar muPrev, muCur; if (shift == left) { - muPrev = (right - left) * 0.1; + muPrev = (right - left) * RealScalar(0.1); if (k == actual_n-1) muCur = right - left; - else muCur = (right - left) * 0.5; + else muCur = (right - left) * RealScalar(0.5); } else { - muPrev = -(right - left) * 0.1; - muCur = -(right - left) * 0.5; + muPrev = -(right - left) * RealScalar(0.1); + muCur = -(right - left) * RealScalar(0.5); } RealScalar fPrev = secularEq(muPrev, col0, diag, perm, diagShifted, shift); @@ -798,15 +817,15 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d RealScalar leftShifted, rightShifted; if (shift == left) { - leftShifted = RealScalar(1)/NumTraits::highest(); + leftShifted = (std::numeric_limits::min)(); // I don't understand why the case k==0 would be special there: // if (k == 0) rightShifted = right - left; else - rightShifted = (k==actual_n-1) ? right : ((right - left) * 0.6); // theoretically we can take 0.5, but let's be safe + rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.6)); // theoretically we can take 0.5, but let's be safe } else { - leftShifted = -(right - left) * 0.6; - rightShifted = -RealScalar(1)/NumTraits::highest(); + leftShifted = -(right - left) * RealScalar(0.6); + rightShifted = -(std::numeric_limits::min)(); } RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift); @@ -817,7 +836,10 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE if(!(fLeft * fRight<0)) + { + std::cout << "fLeft: " << leftShifted << " - " << diagShifted.head(10).transpose() << "\n ; " << bool(left==shift) << " " << (left-shift) << "\n"; std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; + } #endif eigen_internal_assert(fLeft * fRight < 0); @@ -1028,8 +1050,9 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index Diagonal fulldiag(m_computed); VectorBlock,Dynamic> diag(fulldiag, firstCol+shift, length); + const RealScalar considerZero = (std::numeric_limits::min)(); RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); - RealScalar epsilon_strict = NumTraits::epsilon() * maxDiag; + RealScalar epsilon_strict = numext::maxi(considerZero,NumTraits::epsilon() * maxDiag); RealScalar epsilon_coarse = 8 * NumTraits::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag); #ifdef EIGEN_BDCSVD_SANITY_CHECKS @@ -1082,7 +1105,7 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index { // Check for total deflation // If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting - bool total_deflation = (col0.tail(length-1).array()==RealScalar(0)).all(); + bool total_deflation = (col0.tail(length-1).array()::deflation(Index firstCol, Index lastCol, Index k, Index // Move deflated diagonal entries at the end. for(Index i=1; i::deflation(Index firstCol, Index lastCol, Index k, Index for(Index i=1; i::deflation(Index firstCol, Index lastCol, Index k, Index //condition 4.4 { Index i = length-1; - while(i>0 && (diag(i)==0 || col0(i)==0)) --i; + while(i>0 && (abs(diag(i))1;--i) if( (diag(i) - diag(i-1)) < NumTraits::epsilon()*maxDiag ) { @@ -1177,7 +1200,7 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index #ifdef EIGEN_BDCSVD_SANITY_CHECKS for(Index j=2;j } }; -template -void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, - JacobiRotation *j_left, - JacobiRotation *j_right) -{ - using std::sqrt; - using std::abs; - Matrix m; - m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), - numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); - JacobiRotation rot1; - RealScalar t = m.coeff(0,0) + m.coeff(1,1); - RealScalar d = m.coeff(1,0) - m.coeff(0,1); - if(d == RealScalar(0)) - { - rot1.s() = RealScalar(0); - rot1.c() = RealScalar(1); - } - else - { - // If d!=0, then t/d cannot overflow because the magnitude of the - // entries forming d are not too small compared to the ones forming t. - RealScalar u = t / d; - RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); - rot1.s() = RealScalar(1) / tmp; - rot1.c() = u / tmp; - } - m.applyOnTheLeft(0,1,rot1); - j_right->makeJacobi(m,0,1); - *j_left = rot1 * j_right->transpose(); -} - template struct traits > { @@ -697,10 +665,8 @@ JacobiSVD::compute(const MatrixType& matrix, unsig // only worsening the precision of U and V as we accumulate more rotations const RealScalar precision = RealScalar(2) * NumTraits::epsilon(); - // limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286) - // FIXME What about considerering any denormal numbers as zero, using: - // const RealScalar considerAsZero = (std::numeric_limits::min)(); - const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits::denorm_min(); + // limit for denormal numbers to be considered zero in order to avoid infinite loops (see bug 286) + const RealScalar considerAsZero = (std::numeric_limits::min)(); // Scaling factor to reduce over/under-flows RealScalar scale = matrix.cwiseAbs().maxCoeff(); @@ -745,7 +711,7 @@ JacobiSVD::compute(const MatrixType& matrix, unsig { finished = false; // perform SVD decomposition of 2x2 sub-matrix corresponding to indices p,q to make it diagonal - // the complex to real operation returns true is the updated 2x2 block is not already diagonal + // the complex to real operation returns true if the updated 2x2 block is not already diagonal if(internal::svd_precondition_2x2_block_to_be_real::run(m_workMatrix, *this, p, q, maxDiagEntry)) { JacobiRotation j_left, j_right; @@ -759,7 +725,7 @@ JacobiSVD::compute(const MatrixType& matrix, unsig if(computeV()) m_matrixV.applyOnTheRight(p,q,j_right); // keep track of the largest diagonal coefficient - maxDiagEntry = numext::maxi(maxDiagEntry,numext::maxi(abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q)))); + maxDiagEntry = numext::maxi(maxDiagEntry,numext::maxi(abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q)))); } } } @@ -770,9 +736,22 @@ JacobiSVD::compute(const MatrixType& matrix, unsig for(Index i = 0; i < m_diagSize; ++i) { - RealScalar a = abs(m_workMatrix.coeff(i,i)); - m_singularValues.coeffRef(i) = a; - if(computeU() && (a!=RealScalar(0))) m_matrixU.col(i) *= m_workMatrix.coeff(i,i)/a; + // For a complex matrix, some diagonal coefficients might note have been + // treated by svd_precondition_2x2_block_to_be_real, and the imaginary part + // of some diagonal entry might not be null. + if(NumTraits::IsComplex && abs(numext::imag(m_workMatrix.coeff(i,i)))>considerAsZero) + { + RealScalar a = abs(m_workMatrix.coeff(i,i)); + m_singularValues.coeffRef(i) = abs(a); + if(computeU()) m_matrixU.col(i) *= m_workMatrix.coeff(i,i)/a; + } + else + { + // m_workMatrix.coeff(i,i) is already real, no difficulty: + RealScalar a = numext::real(m_workMatrix.coeff(i,i)); + m_singularValues.coeffRef(i) = abs(a); + if(computeU() && (a::compute(const MatrixType& matrix, unsig return *this; } -#ifndef __CUDACC__ /** \svd_module * * \return the singular value decomposition of \c *this computed by two-sided @@ -816,7 +794,6 @@ MatrixBase::jacobiSvd(unsigned int computationOptions) const { return JacobiSVD(*this, computationOptions); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/SVD/JacobiSVD_MKL.h b/Eigen/src/SVD/JacobiSVD_LAPACKE.h similarity index 62% rename from Eigen/src/SVD/JacobiSVD_MKL.h rename to Eigen/src/SVD/JacobiSVD_LAPACKE.h index 14e461c4e..50272154f 100644 --- a/Eigen/src/SVD/JacobiSVD_MKL.h +++ b/Eigen/src/SVD/JacobiSVD_LAPACKE.h @@ -25,21 +25,19 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to LAPACKe * Singular Value Decomposition - SVD. ******************************************************************************** */ -#ifndef EIGEN_JACOBISVD_MKL_H -#define EIGEN_JACOBISVD_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" +#ifndef EIGEN_JACOBISVD_LAPACKE_H +#define EIGEN_JACOBISVD_LAPACKE_H namespace Eigen { -/** \internal Specialization for the data types supported by MKL */ +/** \internal Specialization for the data types supported by LAPACKe */ -#define EIGEN_MKL_SVD(EIGTYPE, MKLTYPE, MKLRTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ +#define EIGEN_LAPACKE_SVD(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_PREFIX, EIGCOLROW, LAPACKE_COLROW) \ template<> inline \ JacobiSVD, ColPivHouseholderQRPreconditioner>& \ JacobiSVD, ColPivHouseholderQRPreconditioner>::compute(const Matrix& matrix, unsigned int computationOptions) \ @@ -52,41 +50,41 @@ JacobiSVD, ColPiv /*const RealScalar precision = RealScalar(2) * NumTraits::epsilon();*/ \ m_nonzeroSingularValues = m_diagSize; \ \ - lapack_int lda = matrix.outerStride(), ldu, ldvt; \ - lapack_int matrix_order = MKLCOLROW; \ + lapack_int lda = internal::convert_index(matrix.outerStride()), ldu, ldvt; \ + lapack_int matrix_order = LAPACKE_COLROW; \ char jobu, jobvt; \ - MKLTYPE *u, *vt, dummy; \ + LAPACKE_TYPE *u, *vt, dummy; \ jobu = (m_computeFullU) ? 'A' : (m_computeThinU) ? 'S' : 'N'; \ jobvt = (m_computeFullV) ? 'A' : (m_computeThinV) ? 'S' : 'N'; \ if (computeU()) { \ - ldu = m_matrixU.outerStride(); \ - u = (MKLTYPE*)m_matrixU.data(); \ + ldu = internal::convert_index(m_matrixU.outerStride()); \ + u = (LAPACKE_TYPE*)m_matrixU.data(); \ } else { ldu=1; u=&dummy; }\ MatrixType localV; \ - ldvt = (m_computeFullV) ? m_cols : (m_computeThinV) ? m_diagSize : 1; \ + ldvt = (m_computeFullV) ? internal::convert_index(m_cols) : (m_computeThinV) ? internal::convert_index(m_diagSize) : 1; \ if (computeV()) { \ localV.resize(ldvt, m_cols); \ - vt = (MKLTYPE*)localV.data(); \ + vt = (LAPACKE_TYPE*)localV.data(); \ } else { ldvt=1; vt=&dummy; }\ - Matrix superb; superb.resize(m_diagSize, 1); \ + Matrix superb; superb.resize(m_diagSize, 1); \ MatrixType m_temp; m_temp = matrix; \ - LAPACKE_##MKLPREFIX##gesvd( matrix_order, jobu, jobvt, m_rows, m_cols, (MKLTYPE*)m_temp.data(), lda, (MKLRTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \ + LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index(m_rows), internal::convert_index(m_cols), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \ if (computeV()) m_matrixV = localV.adjoint(); \ /* for(int i=0;i::min)()); + RealScalar premultiplied_threshold = numext::maxi(m_singularValues.coeff(0) * threshold(), (std::numeric_limits::min)()); Index i = m_nonzeroSingularValues-1; while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i; return i+1; diff --git a/Eigen/src/SparseCholesky/CMakeLists.txt b/Eigen/src/SparseCholesky/CMakeLists.txt deleted file mode 100644 index 375a59d7a..000000000 --- a/Eigen/src/SparseCholesky/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SparseCholesky_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SparseCholesky_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseCholesky COMPONENT Devel - ) diff --git a/Eigen/src/SparseCore/CMakeLists.txt b/Eigen/src/SparseCore/CMakeLists.txt deleted file mode 100644 index d860452a6..000000000 --- a/Eigen/src/SparseCore/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SparseCore_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SparseCore_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseCore COMPONENT Devel - ) diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index 0f6835846..492eb0a29 100644 --- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -143,7 +143,7 @@ struct conservative_sparse_sparse_product_selector=rhs.cols()) + if(lhs.rows()>rhs.cols()) { ColMajorMatrix resCol(lhs.rows(),rhs.cols()); // perform sorted insertion diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index 4a8dd12e4..fa5386599 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -34,8 +34,8 @@ template inline Derived& SparseMatrixBase::operator=(const SparseMatrixBase& other) { // by default sparse evaluation do not alias, so we can safely bypass the generic call_assignment routine - internal::Assignment > - ::run(derived(), other.derived(), internal::assign_op()); + internal::Assignment > + ::run(derived(), other.derived(), internal::assign_op()); return derived(); } @@ -124,24 +124,24 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src) } // Generic Sparse to Sparse assignment -template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> -struct Assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { assign_sparse_to_sparse(dst.derived(), src.derived()); } }; // Generic Sparse to Dense assignment -template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> -struct Assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment { static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - if(internal::is_same >::value) + if(internal::is_same >::value) dst.setZero(); internal::evaluator srcEval(src); @@ -156,10 +156,10 @@ struct Assignment // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error template -struct Assignment, internal::assign_op, Sparse2Sparse, Scalar> +struct Assignment, internal::assign_op, Sparse2Sparse> { typedef Solve SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec()._solve_impl(src.rhs(), dst); } @@ -169,14 +169,15 @@ struct Diagonal2Sparse {}; template<> struct AssignmentKind { typedef Diagonal2Sparse Kind; }; -template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> -struct Assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment { typedef typename DstXprType::StorageIndex StorageIndex; + typedef typename DstXprType::Scalar Scalar; typedef Array ArrayXI; typedef Array ArrayXS; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { Index size = src.diagonal().size(); dst.makeCompressed(); @@ -187,15 +188,15 @@ struct Assignment } template - static void run(SparseMatrixBase &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrixBase &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst.diagonal() = src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) { dst.diagonal() += src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) { dst.diagonal() -= src.diagonal(); } }; } // end namespace internal diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 82fae8c4b..13e8b0bf1 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -189,9 +189,9 @@ public: StorageIndex p = StorageIndex(start); for(Index k=0; k(tmp.innerVector(k).nonZeros()); if(!m_matrix.isCompressed()) - matrix.innerNonZeroPtr()[m_outerStart+k] = StorageIndex(nnz_k); + matrix.innerNonZeroPtr()[m_outerStart+k] = nnz_k; matrix.outerIndexPtr()[m_outerStart+k] = p; p += nnz_k; } @@ -504,6 +504,7 @@ template class unary_evaluator, IteratorBased>::InnerVectorInnerIterator : public EvalIterator { + enum { IsRowMajor = unary_evaluator::IsRowMajor }; const XprType& m_block; Index m_end; public: @@ -528,6 +529,7 @@ public: template class unary_evaluator, IteratorBased>::OuterVectorInnerIterator { + enum { IsRowMajor = unary_evaluator::IsRowMajor }; const unary_evaluator& m_eval; Index m_outerPos; Index m_innerIndex; diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index 15854a73b..55ad91f46 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -106,6 +106,25 @@ class SparseCompressedBase /** \returns whether \c *this is in compressed form. */ inline bool isCompressed() const { return innerNonZeroPtr()==0; } + /** \returns a read-only view of the stored coefficients as a 1D array expression. + * + * \warning this method is for \b compressed \b storage \b only, and it will trigger an assertion otherwise. + * + * \sa valuePtr(), isCompressed() */ + const Map > coeffs() const { eigen_assert(isCompressed()); return Array::Map(valuePtr(),nonZeros()); } + + /** \returns a read-write view of the stored coefficients as a 1D array expression + * + * \warning this method is for \b compressed \b storage \b only, and it will trigger an assertion otherwise. + * + * Here is an example: + * \include SparseMatrix_coeffs.cpp + * and the output is: + * \include SparseMatrix_coeffs.out + * + * \sa valuePtr(), isCompressed() */ + Map > coeffs() { eigen_assert(isCompressed()); return Array::Map(valuePtr(),nonZeros()); } + protected: /** Default constructor. Do nothing. */ SparseCompressedBase() {} diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index c57d9ac59..aad7b7d79 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -28,6 +28,9 @@ namespace Eigen { // generic sparse // 4 - dense op dense product dense // generic dense +// +// TODO to ease compiler job, we could specialize product/quotient with a scalar +// and fallback to cwise-unary evaluator using bind1st_op and bind2nd_op. template class CwiseBinaryOpImpl @@ -165,7 +168,7 @@ public: public: EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer) - : m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_id(-1), m_innerSize(aEval.m_expr.rhs().innerSize()) + : m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_value(0), m_id(-1), m_innerSize(aEval.m_expr.rhs().innerSize()) { this->operator++(); } @@ -189,7 +192,7 @@ public: return *this; } - EIGEN_STRONG_INLINE Scalar value() const { return m_value; } + EIGEN_STRONG_INLINE Scalar value() const { eigen_internal_assert(m_idoperator++(); } @@ -277,7 +280,7 @@ public: return *this; } - EIGEN_STRONG_INLINE Scalar value() const { return m_value; } + EIGEN_STRONG_INLINE Scalar value() const { eigen_internal_assert(m_id -struct binary_evaluator, Lhs, Rhs>, IteratorBased, IteratorBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IteratorBased, IteratorBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef typename evaluator::InnerIterator LhsIterator; typedef typename evaluator::InnerIterator RhsIterator; typedef CwiseBinaryOp XprType; @@ -407,12 +410,12 @@ protected: }; // "dense .* sparse" -template -struct binary_evaluator, Lhs, Rhs>, IndexBased, IteratorBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IndexBased, IteratorBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef evaluator LhsEvaluator; typedef typename evaluator::InnerIterator RhsIterator; typedef CwiseBinaryOp XprType; @@ -480,12 +483,12 @@ protected: }; // "sparse .* dense" -template -struct binary_evaluator, Lhs, Rhs>, IteratorBased, IndexBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IteratorBased, IndexBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef typename evaluator::InnerIterator LhsIterator; typedef evaluator RhsEvaluator; typedef CwiseBinaryOp XprType; @@ -579,7 +582,7 @@ template template Derived& SparseMatrixBase::operator+=(const DiagonalBase& other) { - call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); + call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -587,7 +590,7 @@ template template Derived& SparseMatrixBase::operator-=(const DiagonalBase& other) { - call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); + call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -600,31 +603,31 @@ SparseMatrixBase::cwiseProduct(const MatrixBase &other) c } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> operator+(const MatrixBase &a, const SparseMatrixBase &b) { - return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> operator+(const SparseMatrixBase &a, const MatrixBase &b) { - return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> operator-(const MatrixBase &a, const SparseMatrixBase &b) { - return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> operator-(const SparseMatrixBase &a, const MatrixBase &b) { - return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index c9da8a2bb..0547db596 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -72,14 +72,16 @@ struct sparse_time_dense_product_impl -struct scalar_product_traits > -{ - enum { - Defined = 1 - }; - typedef typename CwiseUnaryOp, T2>::PlainObject ReturnType; -}; +// -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators +// template +// struct ScalarBinaryOpTraits > +// { +// enum { +// Defined = 1 +// }; +// typedef typename CwiseUnaryOp, T2>::PlainObject ReturnType; +// }; + template struct sparse_time_dense_product_impl { @@ -95,7 +97,7 @@ struct sparse_time_dense_product_impl::ReturnType rhs_j(alpha * rhs.coeff(j,c)); + typename ScalarBinaryOpTraits::ReturnType rhs_j(alpha * rhs.coeff(j,c)); for(LhsInnerIterator it(lhsEval,j); it ;++it) res.coeffRef(it.index(),c) += it.value() * rhs_j; } diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h index eb241c3e2..f99be3379 100644 --- a/Eigen/src/SparseCore/SparseMap.h +++ b/Eigen/src/SparseCore/SparseMap.h @@ -166,12 +166,17 @@ class SparseMapBase using Base::innerIndexPtr; using Base::outerIndexPtr; using Base::innerNonZeroPtr; - inline Scalar* valuePtr() { return Base::m_values; } + /** \copydoc SparseMatrix::valuePtr */ + inline Scalar* valuePtr() { return Base::m_values; } + /** \copydoc SparseMatrix::innerIndexPtr */ inline StorageIndex* innerIndexPtr() { return Base::m_innerIndices; } + /** \copydoc SparseMatrix::outerIndexPtr */ inline StorageIndex* outerIndexPtr() { return Base::m_outerIndex; } + /** \copydoc SparseMatrix::innerNonZeroPtr */ inline StorageIndex* innerNonZeroPtr() { return Base::m_innerNonZeros; } //---------------------------------------- + /** \copydoc SparseMatrix::coeffRef */ inline Scalar& coeffRef(Index row, Index col) { const Index outer = IsRowMajor ? row : col; @@ -181,14 +186,14 @@ class SparseMapBase Index end = Base::isCompressed() ? Base::m_outerIndex[outer+1] : start + Base::m_innerNonZeros[outer]; eigen_assert(end>=start && "you probably called coeffRef on a non finalized matrix"); eigen_assert(end>start && "coeffRef cannot be called on a zero coefficient"); - Index* r = std::lower_bound(&Base::m_innerIndices[start],&Base::m_innerIndices[end],inner); + StorageIndex* r = std::lower_bound(&Base::m_innerIndices[start],&Base::m_innerIndices[end],inner); const Index id = r - &Base::m_innerIndices[0]; eigen_assert((*r==inner) && (id(Base::m_values)[id]; } inline SparseMapBase(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr, StorageIndex* innerIndexPtr, - Scalar* valuePtr, StorageIndex* innerNonZerosPtr = 0) + Scalar* valuePtr, StorageIndex* innerNonZerosPtr = 0) : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr) {} @@ -233,13 +238,15 @@ class Map * stored as a sparse format as defined by the pointers \a outerIndexPtr, \a innerIndexPtr, and \a valuePtr. * If the optional parameter \a innerNonZerosPtr is the null pointer, then a standard compressed format is assumed. * + * This constructor is available only if \c SparseMatrixType is non-const. + * * More details on the expected storage schemes are given in the \ref TutorialSparse "manual pages". */ inline Map(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr, StorageIndex* innerIndexPtr, Scalar* valuePtr, StorageIndex* innerNonZerosPtr = 0) : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr) {} - +#ifndef EIGEN_PARSED_BY_DOXYGEN /** Empty destructor */ inline ~Map() {} }; @@ -254,7 +261,12 @@ class Map, Options, StrideType enum { IsRowMajor = Base::IsRowMajor }; public: - +#endif + /** This is the const version of the above constructor. + * + * This constructor is available only if \c SparseMatrixType is const, e.g.: + * \code Map > \endcode + */ inline Map(Index rows, Index cols, Index nnz, const StorageIndex* outerIndexPtr, const StorageIndex* innerIndexPtr, const Scalar* valuePtr, const StorageIndex* innerNonZerosPtr = 0) : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 760e151eb..64ca5fc44 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -35,7 +35,7 @@ namespace Eigen { * \tparam _Index the type of the indices. It has to be a \b signed type (e.g., short, int, std::ptrdiff_t). Default is \c int. * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEMATRIX_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_SPARSEMATRIX_PLUGIN. */ namespace internal { @@ -440,7 +440,7 @@ class SparseMatrix template void setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func); - void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } + void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } template void collapseDuplicates(DupFunctor dup_func = DupFunctor()); @@ -979,7 +979,7 @@ template template void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) { - internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); + internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); } /** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied: @@ -1080,7 +1080,7 @@ EIGEN_DONT_INLINE SparseMatrix& SparseMatrix class SparseMatrixBase -#ifndef EIGEN_PARSED_BY_DOXYGEN - : public internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real, - EigenBase > -#else : public EigenBase -#endif // not EIGEN_PARSED_BY_DOXYGEN { public: @@ -142,12 +136,20 @@ template class SparseMatrixBase inline Derived& const_cast_derived() const { return *static_cast(const_cast(this)); } - typedef internal::special_scalar_op_base > Base; - using Base::operator*; - using Base::operator/; + typedef EigenBase Base; + #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::SparseMatrixBase +#ifdef EIGEN_PARSED_BY_DOXYGEN +#define EIGEN_DOC_UNARY_ADDONS(METHOD,OP) /**

This method does not change the sparsity of \c *this: the OP is applied to explicitly stored coefficients only. \sa SparseCompressedBase::coeffs()

*/ +#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /**

\warning This method returns a read-only expression for any sparse matrices. \sa \ref TutorialSparse_SubMatrices "Sparse block operations"

*/ +#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) /**

\warning This method returns a read-write expression for COND sparse matrices only. Otherwise, the returned expression is read-only. \sa \ref TutorialSparse_SubMatrices "Sparse block operations"

*/ +#else +#define EIGEN_DOC_UNARY_ADDONS(X,Y) +#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) +#endif # include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" @@ -156,8 +158,10 @@ template class SparseMatrixBase # ifdef EIGEN_SPARSEMATRIXBASE_PLUGIN # include EIGEN_SPARSEMATRIXBASE_PLUGIN # endif -# undef EIGEN_CURRENT_STORAGE_BASE_CLASS #undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS +#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF /** \returns the number of rows. \sa cols() */ inline Index rows() const { return derived().rows(); } @@ -263,7 +267,7 @@ template class SparseMatrixBase Derived& operator/=(const Scalar& other); template struct CwiseProductDenseReturnType { - typedef CwiseBinaryOp::Scalar, typename internal::traits::Scalar >::ReturnType>, diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index cbd0db71b..7a5ad0635 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -45,7 +45,7 @@ struct generic_product_impl // dense += sparse * sparse template - static void addTo(Dest& dst, const ActualLhs& lhs, const Rhs& rhs, int* = typename enable_if::Shape,DenseShape>::value,int*>::type(0) ) + static void addTo(Dest& dst, const ActualLhs& lhs, const Rhs& rhs, typename enable_if::Shape,DenseShape>::value,int*>::type* = 0) { typedef typename nested_eval::type LhsNested; typedef typename nested_eval::type RhsNested; @@ -57,7 +57,7 @@ struct generic_product_impl // dense -= sparse * sparse template - static void subTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, int* = typename enable_if::Shape,DenseShape>::value,int*>::type(0) ) + static void subTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, typename enable_if::Shape,DenseShape>::value,int*>::type* = 0) { addTo(dst, -lhs, rhs); } @@ -99,10 +99,10 @@ struct generic_product_impl -struct Assignment, internal::assign_op, Sparse2Dense> +struct Assignment, internal::assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { generic_product_impl::evalTo(dst,src.lhs(),src.rhs()); } @@ -110,10 +110,10 @@ struct Assignment, internal::assig // dense += sparse-product (can be sparse*sparse, sparse*perm, etc.) template< typename DstXprType, typename Lhs, typename Rhs> -struct Assignment, internal::add_assign_op, Sparse2Dense> +struct Assignment, internal::add_assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { generic_product_impl::addTo(dst,src.lhs(),src.rhs()); } @@ -121,24 +121,24 @@ struct Assignment, internal::add_a // dense -= sparse-product (can be sparse*sparse, sparse*perm, etc.) template< typename DstXprType, typename Lhs, typename Rhs> -struct Assignment, internal::sub_assign_op, Sparse2Dense> +struct Assignment, internal::sub_assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { generic_product_impl::subTo(dst,src.lhs(),src.rhs()); } }; template -struct evaluator > > +struct unary_evaluator >, IteratorBased> : public evaluator::PlainObject> { typedef SparseView > XprType; typedef typename XprType::PlainObject PlainObject; typedef evaluator Base; - - explicit evaluator(const XprType& xpr) + + explicit unary_evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { using std::abs; @@ -147,13 +147,13 @@ struct evaluator > > typedef typename nested_eval::type RhsNested; LhsNested lhsNested(xpr.nestedExpression().lhs()); RhsNested rhsNested(xpr.nestedExpression().rhs()); - + internal::sparse_sparse_product_with_pruning_selector::type, typename remove_all::type, PlainObject>::run(lhsNested,rhsNested,m_result, abs(xpr.reference())*xpr.epsilon()); } - -protected: + +protected: PlainObject m_result; }; diff --git a/Eigen/src/SparseCore/SparseRedux.h b/Eigen/src/SparseCore/SparseRedux.h index 2a9718cfb..458774962 100644 --- a/Eigen/src/SparseCore/SparseRedux.h +++ b/Eigen/src/SparseCore/SparseRedux.h @@ -30,7 +30,10 @@ typename internal::traits >::Scalar SparseMatrix<_Scalar,_Options,_Index>::sum() const { eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix"); - return Matrix::Map(m_data.valuePtr(), m_data.size()).sum(); + if(this->isCompressed()) + return Matrix::Map(m_data.valuePtr(), m_data.size()).sum(); + else + return Base::sum(); } template diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index b92bb17e2..d31d9babf 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -218,18 +218,18 @@ struct SparseSelfAdjoint2Sparse {}; template<> struct AssignmentKind { typedef SparseSelfAdjoint2Sparse Kind; }; template<> struct AssignmentKind { typedef Sparse2Sparse Kind; }; -template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> -struct Assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment { typedef typename DstXprType::StorageIndex StorageIndex; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { internal::permute_symm_to_fullsymm(src.matrix(), dst); } template - static void run(DynamicSparseMatrix& dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DynamicSparseMatrix& dst, const SrcXprType &src, const internal::assign_op &/*func*/) { // TODO directly evaluate into dst; SparseMatrix tmp(dst.rows(),dst.cols()); @@ -250,11 +250,11 @@ template LhsEval; - typedef typename evaluator::InnerIterator LhsIterator; + typedef typename internal::nested_eval::type SparseLhsTypeNested; + typedef typename internal::remove_all::type SparseLhsTypeNestedCleaned; + typedef evaluator LhsEval; + typedef typename LhsEval::InnerIterator LhsIterator; typedef typename SparseLhsType::Scalar LhsScalar; enum { @@ -266,39 +266,53 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons ProcessSecondHalf = !ProcessFirstHalf }; - LhsEval lhsEval(lhs); - - for (Index j=0; j::ReturnType rhs_j(alpha*rhs(j,k)); + // accumulator for partial scalar product + typename DenseResType::Scalar res_j(0); + for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i) + { + LhsScalar lhs_ij = i.value(); + if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij); + res_j += lhs_ij * rhs(i.index(),k); + res(i.index(),k) += numext::conj(lhs_ij) * rhs_j; + } + res(j,k) += alpha * res_j; + + // handle diagonal coeff + if (ProcessFirstHalf && i && (i.index()==j)) + res(j,k) += alpha * i.value() * rhs(j,k); } - for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i) - { - Index a = LhsIsRowMajor ? j : i.index(); - Index b = LhsIsRowMajor ? i.index() : j; - LhsScalar v = i.value(); - res.row(a) += (v) * rhs.row(b); - res.row(b) += numext::conj(v) * rhs.row(a); - } - if (ProcessFirstHalf && i && (i.index()==j)) - res.row(j) += i.value() * rhs.row(j); } } template struct generic_product_impl +: generic_product_impl_base > { template - static void evalTo(Dest& dst, const LhsView& lhsView, const Rhs& rhs) + static void scaleAndAddTo(Dest& dst, const LhsView& lhsView, const Rhs& rhs, const typename Dest::Scalar& alpha) { typedef typename LhsView::_MatrixTypeNested Lhs; typedef typename nested_eval::type LhsNested; @@ -306,16 +320,16 @@ struct generic_product_impl(lhsNested, rhsNested, dst, typename Dest::Scalar(1)); + internal::sparse_selfadjoint_time_dense_product(lhsNested, rhsNested, dst, alpha); } }; template struct generic_product_impl +: generic_product_impl_base > { template - static void evalTo(Dest& dst, const Lhs& lhs, const RhsView& rhsView) + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const RhsView& rhsView, const typename Dest::Scalar& alpha) { typedef typename RhsView::_MatrixTypeNested Rhs; typedef typename nested_eval::type LhsNested; @@ -323,10 +337,9 @@ struct generic_product_impl dstT(dst); - internal::sparse_selfadjoint_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, typename Dest::Scalar(1)); + internal::sparse_selfadjoint_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); } }; @@ -586,12 +599,12 @@ class SparseSymmetricPermutationProduct namespace internal { template -struct Assignment, internal::assign_op, Sparse2Sparse> +struct Assignment, internal::assign_op, Sparse2Sparse> { typedef SparseSymmetricPermutationProduct SrcXprType; typedef typename DstXprType::StorageIndex DstIndex; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) { // internal::permute_symm_to_fullsymm(m_matrix,_dest,m_perm.indices().data()); SparseMatrix tmp; @@ -600,7 +613,7 @@ struct Assignment } template - static void run(SparseSelfAdjointView& dst, const SrcXprType &src, const internal::assign_op &) + static void run(SparseSelfAdjointView& dst, const SrcXprType &src, const internal::assign_op &) { internal::permute_symm_to_symm(src.matrix(),dst.matrix(),src.perm().indices().data()); } diff --git a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h index 20078f72c..21c419002 100644 --- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h @@ -51,7 +51,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); res.reserve(estimated_nnz_prod); - double ratioColRes = double(estimated_nnz_prod)/double(lhs.rows()*rhs.cols()); + double ratioColRes = double(estimated_nnz_prod)/(double(lhs.rows())*double(rhs.cols())); for (Index j=0; j(Bc0[0]); - b10 = pset1(Bc0[1]); - if(RK==4) b20 = pset1(Bc0[2]); - if(RK==4) b30 = pset1(Bc0[3]); - b01 = pset1(Bc1[0]); - b11 = pset1(Bc1[1]); - if(RK==4) b21 = pset1(Bc1[2]); - if(RK==4) b31 = pset1(Bc1[3]); + { b00 = pset1(Bc0[0]); } + { b10 = pset1(Bc0[1]); } + if(RK==4) { b20 = pset1(Bc0[2]); } + if(RK==4) { b30 = pset1(Bc0[3]); } + { b01 = pset1(Bc1[0]); } + { b11 = pset1(Bc1[1]); } + if(RK==4) { b21 = pset1(Bc1[2]); } + if(RK==4) { b31 = pset1(Bc1[3]); } Packet a0, a1, a2, a3, c0, c1, t0, t1; diff --git a/Eigen/src/SparseQR/CMakeLists.txt b/Eigen/src/SparseQR/CMakeLists.txt deleted file mode 100644 index f9ddf2bdb..000000000 --- a/Eigen/src/SparseQR/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SparseQR_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SparseQR_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseQR/ COMPONENT Devel - ) diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index acd7f7e10..2d4498b03 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -705,12 +705,12 @@ struct evaluator_traits > }; template< typename DstXprType, typename SparseQRType> -struct Assignment, internal::assign_op, Sparse2Sparse> +struct Assignment, internal::assign_op, Sparse2Sparse> { typedef SparseQRMatrixQReturnType SrcXprType; typedef typename DstXprType::Scalar Scalar; typedef typename DstXprType::StorageIndex StorageIndex; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows()); idMat.setIdentity(); @@ -721,12 +721,12 @@ struct Assignment, internal: }; template< typename DstXprType, typename SparseQRType> -struct Assignment, internal::assign_op, Sparse2Dense> +struct Assignment, internal::assign_op, Sparse2Dense> { typedef SparseQRMatrixQReturnType SrcXprType; typedef typename DstXprType::Scalar Scalar; typedef typename DstXprType::StorageIndex StorageIndex; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows()); } diff --git a/Eigen/src/StlSupport/CMakeLists.txt b/Eigen/src/StlSupport/CMakeLists.txt deleted file mode 100644 index 0f094f637..000000000 --- a/Eigen/src/StlSupport/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_StlSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_StlSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/StlSupport COMPONENT Devel - ) diff --git a/Eigen/src/SuperLUSupport/CMakeLists.txt b/Eigen/src/SuperLUSupport/CMakeLists.txt deleted file mode 100644 index b28ebe583..000000000 --- a/Eigen/src/SuperLUSupport/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SuperLUSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SuperLUSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SuperLUSupport COMPONENT Devel - ) diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index 7e2efd452..88c44bcd0 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -10,15 +10,16 @@ #ifndef EIGEN_SUPERLUSUPPORT_H #define EIGEN_SUPERLUSUPPORT_H -namespace Eigen { +namespace Eigen { +#if defined(SUPERLU_MAJOR_VERSION) && (SUPERLU_MAJOR_VERSION >= 5) #define DECL_GSSVX(PREFIX,FLOATTYPE,KEYTYPE) \ extern "C" { \ extern void PREFIX##gssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, \ char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *, \ void *, int, SuperMatrix *, SuperMatrix *, \ FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, \ - mem_usage_t *, SuperLUStat_t *, int *); \ + GlobalLU_t *, mem_usage_t *, SuperLUStat_t *, int *); \ } \ inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A, \ int *perm_c, int *perm_r, int *etree, char *equed, \ @@ -28,12 +29,37 @@ namespace Eigen { FLOATTYPE *recip_pivot_growth, \ FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE *berr, \ SuperLUStat_t *stats, int *info, KEYTYPE) { \ - mem_usage_t mem_usage; \ + mem_usage_t mem_usage; \ + GlobalLU_t gLU; \ + PREFIX##gssvx(options, A, perm_c, perm_r, etree, equed, R, C, L, \ + U, work, lwork, B, X, recip_pivot_growth, rcond, \ + ferr, berr, &gLU, &mem_usage, stats, info); \ + return mem_usage.for_lu; /* bytes used by the factor storage */ \ + } +#else // version < 5.0 +#define DECL_GSSVX(PREFIX,FLOATTYPE,KEYTYPE) \ + extern "C" { \ + extern void PREFIX##gssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, \ + char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *, \ + void *, int, SuperMatrix *, SuperMatrix *, \ + FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, \ + mem_usage_t *, SuperLUStat_t *, int *); \ + } \ + inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A, \ + int *perm_c, int *perm_r, int *etree, char *equed, \ + FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L, \ + SuperMatrix *U, void *work, int lwork, \ + SuperMatrix *B, SuperMatrix *X, \ + FLOATTYPE *recip_pivot_growth, \ + FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE *berr, \ + SuperLUStat_t *stats, int *info, KEYTYPE) { \ + mem_usage_t mem_usage; \ PREFIX##gssvx(options, A, perm_c, perm_r, etree, equed, R, C, L, \ U, work, lwork, B, X, recip_pivot_growth, rcond, \ ferr, berr, &mem_usage, stats, info); \ return mem_usage.for_lu; /* bytes used by the factor storage */ \ } +#endif DECL_GSSVX(s,float,float) DECL_GSSVX(c,float,std::complex) diff --git a/Eigen/src/UmfPackSupport/CMakeLists.txt b/Eigen/src/UmfPackSupport/CMakeLists.txt deleted file mode 100644 index a57de0020..000000000 --- a/Eigen/src/UmfPackSupport/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_UmfPackSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_UmfPackSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/UmfPackSupport COMPONENT Devel - ) diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h index 929a01acb..dc74de935 100644 --- a/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -379,7 +379,7 @@ class UmfPackLU : public SparseSolverBase > mutable bool m_extractedDataAreDirty; private: - UmfPackLU(UmfPackLU& ) { } + UmfPackLU(const UmfPackLU& ) { } }; diff --git a/Eigen/src/misc/CMakeLists.txt b/Eigen/src/misc/CMakeLists.txt deleted file mode 100644 index a58ffb745..000000000 --- a/Eigen/src/misc/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_misc_SRCS "*.h") - -INSTALL(FILES - ${Eigen_misc_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/misc COMPONENT Devel - ) diff --git a/Eigen/src/misc/RealSvd2x2.h b/Eigen/src/misc/RealSvd2x2.h new file mode 100644 index 000000000..abb4d3c2f --- /dev/null +++ b/Eigen/src/misc/RealSvd2x2.h @@ -0,0 +1,55 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Benoit Jacob +// Copyright (C) 2013-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REALSVD2X2_H +#define EIGEN_REALSVD2X2_H + +namespace Eigen { + +namespace internal { + +template +void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, + JacobiRotation *j_left, + JacobiRotation *j_right) +{ + using std::sqrt; + using std::abs; + Matrix m; + m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), + numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); + JacobiRotation rot1; + RealScalar t = m.coeff(0,0) + m.coeff(1,1); + RealScalar d = m.coeff(1,0) - m.coeff(0,1); + + if(abs(d) < (std::numeric_limits::min)()) + { + rot1.s() = RealScalar(0); + rot1.c() = RealScalar(1); + } + else + { + // If d!=0, then t/d cannot overflow because the magnitude of the + // entries forming d are not too small compared to the ones forming t. + RealScalar u = t / d; + RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); + rot1.s() = RealScalar(1) / tmp; + rot1.c() = u / tmp; + } + m.applyOnTheLeft(0,1,rot1); + j_right->makeJacobi(m,0,1); + *j_left = rot1 * j_right->transpose(); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_REALSVD2X2_H diff --git a/Eigen/src/misc/lapacke.h b/Eigen/src/misc/lapacke.h new file mode 100755 index 000000000..8c7e79b03 --- /dev/null +++ b/Eigen/src/misc/lapacke.h @@ -0,0 +1,16291 @@ +/***************************************************************************** + Copyright (c) 2010, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +****************************************************************************** +* Contents: Native C interface to LAPACK +* Author: Intel Corporation +* Generated November, 2011 +*****************************************************************************/ + +#ifndef _MKL_LAPACKE_H_ + +#ifndef _LAPACKE_H_ +#define _LAPACKE_H_ + +/* +* Turn on HAVE_LAPACK_CONFIG_H to redefine C-LAPACK datatypes +*/ +#ifdef HAVE_LAPACK_CONFIG_H +#include "lapacke_config.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include + +#ifndef lapack_int +#define lapack_int int +#endif + +#ifndef lapack_logical +#define lapack_logical lapack_int +#endif + +/* Complex types are structures equivalent to the +* Fortran complex types COMPLEX(4) and COMPLEX(8). +* +* One can also redefine the types with his own types +* for example by including in the code definitions like +* +* #define lapack_complex_float std::complex +* #define lapack_complex_double std::complex +* +* or define these types in the command line: +* +* -Dlapack_complex_float="std::complex" +* -Dlapack_complex_double="std::complex" +*/ + +#ifndef LAPACK_COMPLEX_CUSTOM + +/* Complex type (single precision) */ +#ifndef lapack_complex_float +#include +#define lapack_complex_float float _Complex +#endif + +#ifndef lapack_complex_float_real +#define lapack_complex_float_real(z) (creal(z)) +#endif + +#ifndef lapack_complex_float_imag +#define lapack_complex_float_imag(z) (cimag(z)) +#endif + +lapack_complex_float lapack_make_complex_float( float re, float im ); + +/* Complex type (double precision) */ +#ifndef lapack_complex_double +#include +#define lapack_complex_double double _Complex +#endif + +#ifndef lapack_complex_double_real +#define lapack_complex_double_real(z) (creal(z)) +#endif + +#ifndef lapack_complex_double_imag +#define lapack_complex_double_imag(z) (cimag(z)) +#endif + +lapack_complex_double lapack_make_complex_double( double re, double im ); + +#endif + +#ifndef LAPACKE_malloc +#define LAPACKE_malloc( size ) malloc( size ) +#endif +#ifndef LAPACKE_free +#define LAPACKE_free( p ) free( p ) +#endif + +#define LAPACK_C2INT( x ) (lapack_int)(*((float*)&x )) +#define LAPACK_Z2INT( x ) (lapack_int)(*((double*)&x )) + +#define LAPACK_ROW_MAJOR 101 +#define LAPACK_COL_MAJOR 102 + +#define LAPACK_WORK_MEMORY_ERROR -1010 +#define LAPACK_TRANSPOSE_MEMORY_ERROR -1011 + +/* Callback logical functions of one, two, or three arguments are used +* to select eigenvalues to sort to the top left of the Schur form. +* The value is selected if function returns TRUE (non-zero). */ + +typedef lapack_logical (*LAPACK_S_SELECT2) ( const float*, const float* ); +typedef lapack_logical (*LAPACK_S_SELECT3) + ( const float*, const float*, const float* ); +typedef lapack_logical (*LAPACK_D_SELECT2) ( const double*, const double* ); +typedef lapack_logical (*LAPACK_D_SELECT3) + ( const double*, const double*, const double* ); + +typedef lapack_logical (*LAPACK_C_SELECT1) ( const lapack_complex_float* ); +typedef lapack_logical (*LAPACK_C_SELECT2) + ( const lapack_complex_float*, const lapack_complex_float* ); +typedef lapack_logical (*LAPACK_Z_SELECT1) ( const lapack_complex_double* ); +typedef lapack_logical (*LAPACK_Z_SELECT2) + ( const lapack_complex_double*, const lapack_complex_double* ); + +#include "lapacke_mangling.h" + +#define LAPACK_lsame LAPACK_GLOBAL(lsame,LSAME) +lapack_logical LAPACK_lsame( char* ca, char* cb, + lapack_int lca, lapack_int lcb ); + +/* C-LAPACK function prototypes */ + +lapack_int LAPACKE_sbdsdc( int matrix_order, char uplo, char compq, + lapack_int n, float* d, float* e, float* u, + lapack_int ldu, float* vt, lapack_int ldvt, float* q, + lapack_int* iq ); +lapack_int LAPACKE_dbdsdc( int matrix_order, char uplo, char compq, + lapack_int n, double* d, double* e, double* u, + lapack_int ldu, double* vt, lapack_int ldvt, + double* q, lapack_int* iq ); + +lapack_int LAPACKE_sbdsqr( int matrix_order, char uplo, lapack_int n, + lapack_int ncvt, lapack_int nru, lapack_int ncc, + float* d, float* e, float* vt, lapack_int ldvt, + float* u, lapack_int ldu, float* c, lapack_int ldc ); +lapack_int LAPACKE_dbdsqr( int matrix_order, char uplo, lapack_int n, + lapack_int ncvt, lapack_int nru, lapack_int ncc, + double* d, double* e, double* vt, lapack_int ldvt, + double* u, lapack_int ldu, double* c, + lapack_int ldc ); +lapack_int LAPACKE_cbdsqr( int matrix_order, char uplo, lapack_int n, + lapack_int ncvt, lapack_int nru, lapack_int ncc, + float* d, float* e, lapack_complex_float* vt, + lapack_int ldvt, lapack_complex_float* u, + lapack_int ldu, lapack_complex_float* c, + lapack_int ldc ); +lapack_int LAPACKE_zbdsqr( int matrix_order, char uplo, lapack_int n, + lapack_int ncvt, lapack_int nru, lapack_int ncc, + double* d, double* e, lapack_complex_double* vt, + lapack_int ldvt, lapack_complex_double* u, + lapack_int ldu, lapack_complex_double* c, + lapack_int ldc ); + +lapack_int LAPACKE_sdisna( char job, lapack_int m, lapack_int n, const float* d, + float* sep ); +lapack_int LAPACKE_ddisna( char job, lapack_int m, lapack_int n, + const double* d, double* sep ); + +lapack_int LAPACKE_sgbbrd( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int ncc, lapack_int kl, + lapack_int ku, float* ab, lapack_int ldab, float* d, + float* e, float* q, lapack_int ldq, float* pt, + lapack_int ldpt, float* c, lapack_int ldc ); +lapack_int LAPACKE_dgbbrd( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int ncc, lapack_int kl, + lapack_int ku, double* ab, lapack_int ldab, + double* d, double* e, double* q, lapack_int ldq, + double* pt, lapack_int ldpt, double* c, + lapack_int ldc ); +lapack_int LAPACKE_cgbbrd( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int ncc, lapack_int kl, + lapack_int ku, lapack_complex_float* ab, + lapack_int ldab, float* d, float* e, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* pt, lapack_int ldpt, + lapack_complex_float* c, lapack_int ldc ); +lapack_int LAPACKE_zgbbrd( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int ncc, lapack_int kl, + lapack_int ku, lapack_complex_double* ab, + lapack_int ldab, double* d, double* e, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* pt, lapack_int ldpt, + lapack_complex_double* c, lapack_int ldc ); + +lapack_int LAPACKE_sgbcon( int matrix_order, char norm, lapack_int n, + lapack_int kl, lapack_int ku, const float* ab, + lapack_int ldab, const lapack_int* ipiv, float anorm, + float* rcond ); +lapack_int LAPACKE_dgbcon( int matrix_order, char norm, lapack_int n, + lapack_int kl, lapack_int ku, const double* ab, + lapack_int ldab, const lapack_int* ipiv, + double anorm, double* rcond ); +lapack_int LAPACKE_cgbcon( int matrix_order, char norm, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_float* ab, lapack_int ldab, + const lapack_int* ipiv, float anorm, float* rcond ); +lapack_int LAPACKE_zgbcon( int matrix_order, char norm, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_double* ab, lapack_int ldab, + const lapack_int* ipiv, double anorm, + double* rcond ); + +lapack_int LAPACKE_sgbequ( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const float* ab, + lapack_int ldab, float* r, float* c, float* rowcnd, + float* colcnd, float* amax ); +lapack_int LAPACKE_dgbequ( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const double* ab, + lapack_int ldab, double* r, double* c, + double* rowcnd, double* colcnd, double* amax ); +lapack_int LAPACKE_cgbequ( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_float* ab, lapack_int ldab, + float* r, float* c, float* rowcnd, float* colcnd, + float* amax ); +lapack_int LAPACKE_zgbequ( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_double* ab, lapack_int ldab, + double* r, double* c, double* rowcnd, double* colcnd, + double* amax ); + +lapack_int LAPACKE_sgbequb( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const float* ab, + lapack_int ldab, float* r, float* c, float* rowcnd, + float* colcnd, float* amax ); +lapack_int LAPACKE_dgbequb( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const double* ab, + lapack_int ldab, double* r, double* c, + double* rowcnd, double* colcnd, double* amax ); +lapack_int LAPACKE_cgbequb( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_float* ab, lapack_int ldab, + float* r, float* c, float* rowcnd, float* colcnd, + float* amax ); +lapack_int LAPACKE_zgbequb( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_double* ab, lapack_int ldab, + double* r, double* c, double* rowcnd, + double* colcnd, double* amax ); + +lapack_int LAPACKE_sgbrfs( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const float* ab, lapack_int ldab, const float* afb, + lapack_int ldafb, const lapack_int* ipiv, + const float* b, lapack_int ldb, float* x, + lapack_int ldx, float* ferr, float* berr ); +lapack_int LAPACKE_dgbrfs( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const double* ab, lapack_int ldab, const double* afb, + lapack_int ldafb, const lapack_int* ipiv, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* ferr, double* berr ); +lapack_int LAPACKE_cgbrfs( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const lapack_complex_float* ab, lapack_int ldab, + const lapack_complex_float* afb, lapack_int ldafb, + const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_zgbrfs( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const lapack_complex_double* ab, lapack_int ldab, + const lapack_complex_double* afb, lapack_int ldafb, + const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_sgbrfsx( int matrix_order, char trans, char equed, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, const float* ab, lapack_int ldab, + const float* afb, lapack_int ldafb, + const lapack_int* ipiv, const float* r, + const float* c, const float* b, lapack_int ldb, + float* x, lapack_int ldx, float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_dgbrfsx( int matrix_order, char trans, char equed, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, const double* ab, lapack_int ldab, + const double* afb, lapack_int ldafb, + const lapack_int* ipiv, const double* r, + const double* c, const double* b, lapack_int ldb, + double* x, lapack_int ldx, double* rcond, + double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params ); +lapack_int LAPACKE_cgbrfsx( int matrix_order, char trans, char equed, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, const lapack_complex_float* ab, + lapack_int ldab, const lapack_complex_float* afb, + lapack_int ldafb, const lapack_int* ipiv, + const float* r, const float* c, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params ); +lapack_int LAPACKE_zgbrfsx( int matrix_order, char trans, char equed, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, const lapack_complex_double* ab, + lapack_int ldab, const lapack_complex_double* afb, + lapack_int ldafb, const lapack_int* ipiv, + const double* r, const double* c, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params ); + +lapack_int LAPACKE_sgbsv( int matrix_order, lapack_int n, lapack_int kl, + lapack_int ku, lapack_int nrhs, float* ab, + lapack_int ldab, lapack_int* ipiv, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dgbsv( int matrix_order, lapack_int n, lapack_int kl, + lapack_int ku, lapack_int nrhs, double* ab, + lapack_int ldab, lapack_int* ipiv, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cgbsv( int matrix_order, lapack_int n, lapack_int kl, + lapack_int ku, lapack_int nrhs, + lapack_complex_float* ab, lapack_int ldab, + lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zgbsv( int matrix_order, lapack_int n, lapack_int kl, + lapack_int ku, lapack_int nrhs, + lapack_complex_double* ab, lapack_int ldab, + lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_sgbsvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, float* ab, lapack_int ldab, + float* afb, lapack_int ldafb, lapack_int* ipiv, + char* equed, float* r, float* c, float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + float* rpivot ); +lapack_int LAPACKE_dgbsvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, double* ab, lapack_int ldab, + double* afb, lapack_int ldafb, lapack_int* ipiv, + char* equed, double* r, double* c, double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + double* rpivot ); +lapack_int LAPACKE_cgbsvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, lapack_complex_float* ab, + lapack_int ldab, lapack_complex_float* afb, + lapack_int ldafb, lapack_int* ipiv, char* equed, + float* r, float* c, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr, float* rpivot ); +lapack_int LAPACKE_zgbsvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, lapack_complex_double* ab, + lapack_int ldab, lapack_complex_double* afb, + lapack_int ldafb, lapack_int* ipiv, char* equed, + double* r, double* c, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* rcond, double* ferr, + double* berr, double* rpivot ); + +lapack_int LAPACKE_sgbsvxx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, float* ab, lapack_int ldab, + float* afb, lapack_int ldafb, lapack_int* ipiv, + char* equed, float* r, float* c, float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_dgbsvxx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, double* ab, lapack_int ldab, + double* afb, lapack_int ldafb, lapack_int* ipiv, + char* equed, double* r, double* c, double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params ); +lapack_int LAPACKE_cgbsvxx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, lapack_complex_float* ab, + lapack_int ldab, lapack_complex_float* afb, + lapack_int ldafb, lapack_int* ipiv, char* equed, + float* r, float* c, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* rpvgrw, + float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params ); +lapack_int LAPACKE_zgbsvxx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, lapack_complex_double* ab, + lapack_int ldab, lapack_complex_double* afb, + lapack_int ldafb, lapack_int* ipiv, char* equed, + double* r, double* c, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* rcond, double* rpvgrw, + double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params ); + +lapack_int LAPACKE_sgbtrf( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, float* ab, + lapack_int ldab, lapack_int* ipiv ); +lapack_int LAPACKE_dgbtrf( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, double* ab, + lapack_int ldab, lapack_int* ipiv ); +lapack_int LAPACKE_cgbtrf( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + lapack_complex_float* ab, lapack_int ldab, + lapack_int* ipiv ); +lapack_int LAPACKE_zgbtrf( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + lapack_complex_double* ab, lapack_int ldab, + lapack_int* ipiv ); + +lapack_int LAPACKE_sgbtrs( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const float* ab, lapack_int ldab, + const lapack_int* ipiv, float* b, lapack_int ldb ); +lapack_int LAPACKE_dgbtrs( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const double* ab, lapack_int ldab, + const lapack_int* ipiv, double* b, lapack_int ldb ); +lapack_int LAPACKE_cgbtrs( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const lapack_complex_float* ab, lapack_int ldab, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zgbtrs( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const lapack_complex_double* ab, lapack_int ldab, + const lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_sgebak( int matrix_order, char job, char side, lapack_int n, + lapack_int ilo, lapack_int ihi, const float* scale, + lapack_int m, float* v, lapack_int ldv ); +lapack_int LAPACKE_dgebak( int matrix_order, char job, char side, lapack_int n, + lapack_int ilo, lapack_int ihi, const double* scale, + lapack_int m, double* v, lapack_int ldv ); +lapack_int LAPACKE_cgebak( int matrix_order, char job, char side, lapack_int n, + lapack_int ilo, lapack_int ihi, const float* scale, + lapack_int m, lapack_complex_float* v, + lapack_int ldv ); +lapack_int LAPACKE_zgebak( int matrix_order, char job, char side, lapack_int n, + lapack_int ilo, lapack_int ihi, const double* scale, + lapack_int m, lapack_complex_double* v, + lapack_int ldv ); + +lapack_int LAPACKE_sgebal( int matrix_order, char job, lapack_int n, float* a, + lapack_int lda, lapack_int* ilo, lapack_int* ihi, + float* scale ); +lapack_int LAPACKE_dgebal( int matrix_order, char job, lapack_int n, double* a, + lapack_int lda, lapack_int* ilo, lapack_int* ihi, + double* scale ); +lapack_int LAPACKE_cgebal( int matrix_order, char job, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* ilo, lapack_int* ihi, float* scale ); +lapack_int LAPACKE_zgebal( int matrix_order, char job, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* ilo, lapack_int* ihi, double* scale ); + +lapack_int LAPACKE_sgebrd( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* d, float* e, + float* tauq, float* taup ); +lapack_int LAPACKE_dgebrd( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* d, double* e, + double* tauq, double* taup ); +lapack_int LAPACKE_cgebrd( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, float* d, + float* e, lapack_complex_float* tauq, + lapack_complex_float* taup ); +lapack_int LAPACKE_zgebrd( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, double* d, + double* e, lapack_complex_double* tauq, + lapack_complex_double* taup ); + +lapack_int LAPACKE_sgecon( int matrix_order, char norm, lapack_int n, + const float* a, lapack_int lda, float anorm, + float* rcond ); +lapack_int LAPACKE_dgecon( int matrix_order, char norm, lapack_int n, + const double* a, lapack_int lda, double anorm, + double* rcond ); +lapack_int LAPACKE_cgecon( int matrix_order, char norm, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float anorm, float* rcond ); +lapack_int LAPACKE_zgecon( int matrix_order, char norm, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double anorm, double* rcond ); + +lapack_int LAPACKE_sgeequ( int matrix_order, lapack_int m, lapack_int n, + const float* a, lapack_int lda, float* r, float* c, + float* rowcnd, float* colcnd, float* amax ); +lapack_int LAPACKE_dgeequ( int matrix_order, lapack_int m, lapack_int n, + const double* a, lapack_int lda, double* r, + double* c, double* rowcnd, double* colcnd, + double* amax ); +lapack_int LAPACKE_cgeequ( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* r, float* c, float* rowcnd, float* colcnd, + float* amax ); +lapack_int LAPACKE_zgeequ( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* r, double* c, double* rowcnd, double* colcnd, + double* amax ); + +lapack_int LAPACKE_sgeequb( int matrix_order, lapack_int m, lapack_int n, + const float* a, lapack_int lda, float* r, float* c, + float* rowcnd, float* colcnd, float* amax ); +lapack_int LAPACKE_dgeequb( int matrix_order, lapack_int m, lapack_int n, + const double* a, lapack_int lda, double* r, + double* c, double* rowcnd, double* colcnd, + double* amax ); +lapack_int LAPACKE_cgeequb( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* r, float* c, float* rowcnd, float* colcnd, + float* amax ); +lapack_int LAPACKE_zgeequb( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* r, double* c, double* rowcnd, + double* colcnd, double* amax ); + +lapack_int LAPACKE_sgees( int matrix_order, char jobvs, char sort, + LAPACK_S_SELECT2 select, lapack_int n, float* a, + lapack_int lda, lapack_int* sdim, float* wr, + float* wi, float* vs, lapack_int ldvs ); +lapack_int LAPACKE_dgees( int matrix_order, char jobvs, char sort, + LAPACK_D_SELECT2 select, lapack_int n, double* a, + lapack_int lda, lapack_int* sdim, double* wr, + double* wi, double* vs, lapack_int ldvs ); +lapack_int LAPACKE_cgees( int matrix_order, char jobvs, char sort, + LAPACK_C_SELECT1 select, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* sdim, lapack_complex_float* w, + lapack_complex_float* vs, lapack_int ldvs ); +lapack_int LAPACKE_zgees( int matrix_order, char jobvs, char sort, + LAPACK_Z_SELECT1 select, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* sdim, lapack_complex_double* w, + lapack_complex_double* vs, lapack_int ldvs ); + +lapack_int LAPACKE_sgeesx( int matrix_order, char jobvs, char sort, + LAPACK_S_SELECT2 select, char sense, lapack_int n, + float* a, lapack_int lda, lapack_int* sdim, + float* wr, float* wi, float* vs, lapack_int ldvs, + float* rconde, float* rcondv ); +lapack_int LAPACKE_dgeesx( int matrix_order, char jobvs, char sort, + LAPACK_D_SELECT2 select, char sense, lapack_int n, + double* a, lapack_int lda, lapack_int* sdim, + double* wr, double* wi, double* vs, lapack_int ldvs, + double* rconde, double* rcondv ); +lapack_int LAPACKE_cgeesx( int matrix_order, char jobvs, char sort, + LAPACK_C_SELECT1 select, char sense, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* sdim, lapack_complex_float* w, + lapack_complex_float* vs, lapack_int ldvs, + float* rconde, float* rcondv ); +lapack_int LAPACKE_zgeesx( int matrix_order, char jobvs, char sort, + LAPACK_Z_SELECT1 select, char sense, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* sdim, lapack_complex_double* w, + lapack_complex_double* vs, lapack_int ldvs, + double* rconde, double* rcondv ); + +lapack_int LAPACKE_sgeev( int matrix_order, char jobvl, char jobvr, + lapack_int n, float* a, lapack_int lda, float* wr, + float* wi, float* vl, lapack_int ldvl, float* vr, + lapack_int ldvr ); +lapack_int LAPACKE_dgeev( int matrix_order, char jobvl, char jobvr, + lapack_int n, double* a, lapack_int lda, double* wr, + double* wi, double* vl, lapack_int ldvl, double* vr, + lapack_int ldvr ); +lapack_int LAPACKE_cgeev( int matrix_order, char jobvl, char jobvr, + lapack_int n, lapack_complex_float* a, lapack_int lda, + lapack_complex_float* w, lapack_complex_float* vl, + lapack_int ldvl, lapack_complex_float* vr, + lapack_int ldvr ); +lapack_int LAPACKE_zgeev( int matrix_order, char jobvl, char jobvr, + lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* w, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr ); + +lapack_int LAPACKE_sgeevx( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, float* a, + lapack_int lda, float* wr, float* wi, float* vl, + lapack_int ldvl, float* vr, lapack_int ldvr, + lapack_int* ilo, lapack_int* ihi, float* scale, + float* abnrm, float* rconde, float* rcondv ); +lapack_int LAPACKE_dgeevx( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, double* a, + lapack_int lda, double* wr, double* wi, double* vl, + lapack_int ldvl, double* vr, lapack_int ldvr, + lapack_int* ilo, lapack_int* ihi, double* scale, + double* abnrm, double* rconde, double* rcondv ); +lapack_int LAPACKE_cgeevx( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* w, lapack_complex_float* vl, + lapack_int ldvl, lapack_complex_float* vr, + lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, + float* scale, float* abnrm, float* rconde, + float* rcondv ); +lapack_int LAPACKE_zgeevx( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* w, lapack_complex_double* vl, + lapack_int ldvl, lapack_complex_double* vr, + lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, + double* scale, double* abnrm, double* rconde, + double* rcondv ); + +lapack_int LAPACKE_sgehrd( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, float* a, lapack_int lda, + float* tau ); +lapack_int LAPACKE_dgehrd( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, double* a, lapack_int lda, + double* tau ); +lapack_int LAPACKE_cgehrd( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tau ); +lapack_int LAPACKE_zgehrd( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tau ); + +lapack_int LAPACKE_sgejsv( int matrix_order, char joba, char jobu, char jobv, + char jobr, char jobt, char jobp, lapack_int m, + lapack_int n, float* a, lapack_int lda, float* sva, + float* u, lapack_int ldu, float* v, lapack_int ldv, + float* stat, lapack_int* istat ); +lapack_int LAPACKE_dgejsv( int matrix_order, char joba, char jobu, char jobv, + char jobr, char jobt, char jobp, lapack_int m, + lapack_int n, double* a, lapack_int lda, double* sva, + double* u, lapack_int ldu, double* v, lapack_int ldv, + double* stat, lapack_int* istat ); + +lapack_int LAPACKE_sgelq2( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau ); +lapack_int LAPACKE_dgelq2( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau ); +lapack_int LAPACKE_cgelq2( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau ); +lapack_int LAPACKE_zgelq2( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau ); + +lapack_int LAPACKE_sgelqf( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau ); +lapack_int LAPACKE_dgelqf( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau ); +lapack_int LAPACKE_cgelqf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau ); +lapack_int LAPACKE_zgelqf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau ); + +lapack_int LAPACKE_sgels( int matrix_order, char trans, lapack_int m, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* b, lapack_int ldb ); +lapack_int LAPACKE_dgels( int matrix_order, char trans, lapack_int m, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* b, lapack_int ldb ); +lapack_int LAPACKE_cgels( int matrix_order, char trans, lapack_int m, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zgels( int matrix_order, char trans, lapack_int m, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_sgelsd( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, float* b, + lapack_int ldb, float* s, float rcond, + lapack_int* rank ); +lapack_int LAPACKE_dgelsd( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* b, lapack_int ldb, double* s, double rcond, + lapack_int* rank ); +lapack_int LAPACKE_cgelsd( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, float* s, float rcond, + lapack_int* rank ); +lapack_int LAPACKE_zgelsd( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, double* s, double rcond, + lapack_int* rank ); + +lapack_int LAPACKE_sgelss( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, float* b, + lapack_int ldb, float* s, float rcond, + lapack_int* rank ); +lapack_int LAPACKE_dgelss( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* b, lapack_int ldb, double* s, double rcond, + lapack_int* rank ); +lapack_int LAPACKE_cgelss( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, float* s, float rcond, + lapack_int* rank ); +lapack_int LAPACKE_zgelss( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, double* s, double rcond, + lapack_int* rank ); + +lapack_int LAPACKE_sgelsy( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, float* b, + lapack_int ldb, lapack_int* jpvt, float rcond, + lapack_int* rank ); +lapack_int LAPACKE_dgelsy( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* b, lapack_int ldb, lapack_int* jpvt, + double rcond, lapack_int* rank ); +lapack_int LAPACKE_cgelsy( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, lapack_int* jpvt, float rcond, + lapack_int* rank ); +lapack_int LAPACKE_zgelsy( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_int* jpvt, double rcond, + lapack_int* rank ); + +lapack_int LAPACKE_sgeqlf( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau ); +lapack_int LAPACKE_dgeqlf( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau ); +lapack_int LAPACKE_cgeqlf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau ); +lapack_int LAPACKE_zgeqlf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau ); + +lapack_int LAPACKE_sgeqp3( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, lapack_int* jpvt, + float* tau ); +lapack_int LAPACKE_dgeqp3( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, lapack_int* jpvt, + double* tau ); +lapack_int LAPACKE_cgeqp3( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* jpvt, lapack_complex_float* tau ); +lapack_int LAPACKE_zgeqp3( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* jpvt, lapack_complex_double* tau ); + +lapack_int LAPACKE_sgeqpf( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, lapack_int* jpvt, + float* tau ); +lapack_int LAPACKE_dgeqpf( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, lapack_int* jpvt, + double* tau ); +lapack_int LAPACKE_cgeqpf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* jpvt, lapack_complex_float* tau ); +lapack_int LAPACKE_zgeqpf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* jpvt, lapack_complex_double* tau ); + +lapack_int LAPACKE_sgeqr2( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau ); +lapack_int LAPACKE_dgeqr2( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau ); +lapack_int LAPACKE_cgeqr2( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau ); +lapack_int LAPACKE_zgeqr2( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau ); + +lapack_int LAPACKE_sgeqrf( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau ); +lapack_int LAPACKE_dgeqrf( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau ); +lapack_int LAPACKE_cgeqrf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau ); +lapack_int LAPACKE_zgeqrf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau ); + +lapack_int LAPACKE_sgeqrfp( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau ); +lapack_int LAPACKE_dgeqrfp( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau ); +lapack_int LAPACKE_cgeqrfp( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau ); +lapack_int LAPACKE_zgeqrfp( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau ); + +lapack_int LAPACKE_sgerfs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const float* af, lapack_int ldaf, + const lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr ); +lapack_int LAPACKE_dgerfs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const double* a, lapack_int lda, + const double* af, lapack_int ldaf, + const lapack_int* ipiv, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* ferr, double* berr ); +lapack_int LAPACKE_cgerfs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_zgerfs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_sgerfsx( int matrix_order, char trans, char equed, + lapack_int n, lapack_int nrhs, const float* a, + lapack_int lda, const float* af, lapack_int ldaf, + const lapack_int* ipiv, const float* r, + const float* c, const float* b, lapack_int ldb, + float* x, lapack_int ldx, float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_dgerfsx( int matrix_order, char trans, char equed, + lapack_int n, lapack_int nrhs, const double* a, + lapack_int lda, const double* af, lapack_int ldaf, + const lapack_int* ipiv, const double* r, + const double* c, const double* b, lapack_int ldb, + double* x, lapack_int ldx, double* rcond, + double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params ); +lapack_int LAPACKE_cgerfsx( int matrix_order, char trans, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* af, lapack_int ldaf, + const lapack_int* ipiv, const float* r, + const float* c, const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_zgerfsx( int matrix_order, char trans, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* af, lapack_int ldaf, + const lapack_int* ipiv, const double* r, + const double* c, const lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params ); + +lapack_int LAPACKE_sgerqf( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau ); +lapack_int LAPACKE_dgerqf( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau ); +lapack_int LAPACKE_cgerqf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau ); +lapack_int LAPACKE_zgerqf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau ); + +lapack_int LAPACKE_sgesdd( int matrix_order, char jobz, lapack_int m, + lapack_int n, float* a, lapack_int lda, float* s, + float* u, lapack_int ldu, float* vt, + lapack_int ldvt ); +lapack_int LAPACKE_dgesdd( int matrix_order, char jobz, lapack_int m, + lapack_int n, double* a, lapack_int lda, double* s, + double* u, lapack_int ldu, double* vt, + lapack_int ldvt ); +lapack_int LAPACKE_cgesdd( int matrix_order, char jobz, lapack_int m, + lapack_int n, lapack_complex_float* a, + lapack_int lda, float* s, lapack_complex_float* u, + lapack_int ldu, lapack_complex_float* vt, + lapack_int ldvt ); +lapack_int LAPACKE_zgesdd( int matrix_order, char jobz, lapack_int m, + lapack_int n, lapack_complex_double* a, + lapack_int lda, double* s, lapack_complex_double* u, + lapack_int ldu, lapack_complex_double* vt, + lapack_int ldvt ); + +lapack_int LAPACKE_sgesv( int matrix_order, lapack_int n, lapack_int nrhs, + float* a, lapack_int lda, lapack_int* ipiv, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dgesv( int matrix_order, lapack_int n, lapack_int nrhs, + double* a, lapack_int lda, lapack_int* ipiv, + double* b, lapack_int ldb ); +lapack_int LAPACKE_cgesv( int matrix_order, lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zgesv( int matrix_order, lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); +lapack_int LAPACKE_dsgesv( int matrix_order, lapack_int n, lapack_int nrhs, + double* a, lapack_int lda, lapack_int* ipiv, + double* b, lapack_int ldb, double* x, lapack_int ldx, + lapack_int* iter ); +lapack_int LAPACKE_zcgesv( int matrix_order, lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, lapack_int* iter ); + +lapack_int LAPACKE_sgesvd( int matrix_order, char jobu, char jobvt, + lapack_int m, lapack_int n, float* a, lapack_int lda, + float* s, float* u, lapack_int ldu, float* vt, + lapack_int ldvt, float* superb ); +lapack_int LAPACKE_dgesvd( int matrix_order, char jobu, char jobvt, + lapack_int m, lapack_int n, double* a, + lapack_int lda, double* s, double* u, lapack_int ldu, + double* vt, lapack_int ldvt, double* superb ); +lapack_int LAPACKE_cgesvd( int matrix_order, char jobu, char jobvt, + lapack_int m, lapack_int n, lapack_complex_float* a, + lapack_int lda, float* s, lapack_complex_float* u, + lapack_int ldu, lapack_complex_float* vt, + lapack_int ldvt, float* superb ); +lapack_int LAPACKE_zgesvd( int matrix_order, char jobu, char jobvt, + lapack_int m, lapack_int n, lapack_complex_double* a, + lapack_int lda, double* s, lapack_complex_double* u, + lapack_int ldu, lapack_complex_double* vt, + lapack_int ldvt, double* superb ); + +lapack_int LAPACKE_sgesvj( int matrix_order, char joba, char jobu, char jobv, + lapack_int m, lapack_int n, float* a, lapack_int lda, + float* sva, lapack_int mv, float* v, lapack_int ldv, + float* stat ); +lapack_int LAPACKE_dgesvj( int matrix_order, char joba, char jobu, char jobv, + lapack_int m, lapack_int n, double* a, + lapack_int lda, double* sva, lapack_int mv, + double* v, lapack_int ldv, double* stat ); + +lapack_int LAPACKE_sgesvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* r, float* c, + float* b, lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + float* rpivot ); +lapack_int LAPACKE_dgesvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* r, double* c, + double* b, lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + double* rpivot ); +lapack_int LAPACKE_cgesvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* r, float* c, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + float* rpivot ); +lapack_int LAPACKE_zgesvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* r, double* c, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + double* rpivot ); + +lapack_int LAPACKE_sgesvxx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* r, float* c, + float* b, lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_dgesvxx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* r, double* c, + double* b, lapack_int ldb, double* x, + lapack_int ldx, double* rcond, double* rpvgrw, + double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params ); +lapack_int LAPACKE_cgesvxx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* r, float* c, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_zgesvxx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* r, double* c, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params ); + +lapack_int LAPACKE_sgetf2( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, lapack_int* ipiv ); +lapack_int LAPACKE_dgetf2( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, lapack_int* ipiv ); +lapack_int LAPACKE_cgetf2( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* ipiv ); +lapack_int LAPACKE_zgetf2( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv ); + +lapack_int LAPACKE_sgetrf( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, lapack_int* ipiv ); +lapack_int LAPACKE_dgetrf( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, lapack_int* ipiv ); +lapack_int LAPACKE_cgetrf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* ipiv ); +lapack_int LAPACKE_zgetrf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv ); + +lapack_int LAPACKE_sgetri( int matrix_order, lapack_int n, float* a, + lapack_int lda, const lapack_int* ipiv ); +lapack_int LAPACKE_dgetri( int matrix_order, lapack_int n, double* a, + lapack_int lda, const lapack_int* ipiv ); +lapack_int LAPACKE_cgetri( int matrix_order, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv ); +lapack_int LAPACKE_zgetri( int matrix_order, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv ); + +lapack_int LAPACKE_sgetrs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const lapack_int* ipiv, float* b, lapack_int ldb ); +lapack_int LAPACKE_dgetrs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const double* a, lapack_int lda, + const lapack_int* ipiv, double* b, lapack_int ldb ); +lapack_int LAPACKE_cgetrs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zgetrs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_sggbak( int matrix_order, char job, char side, lapack_int n, + lapack_int ilo, lapack_int ihi, const float* lscale, + const float* rscale, lapack_int m, float* v, + lapack_int ldv ); +lapack_int LAPACKE_dggbak( int matrix_order, char job, char side, lapack_int n, + lapack_int ilo, lapack_int ihi, const double* lscale, + const double* rscale, lapack_int m, double* v, + lapack_int ldv ); +lapack_int LAPACKE_cggbak( int matrix_order, char job, char side, lapack_int n, + lapack_int ilo, lapack_int ihi, const float* lscale, + const float* rscale, lapack_int m, + lapack_complex_float* v, lapack_int ldv ); +lapack_int LAPACKE_zggbak( int matrix_order, char job, char side, lapack_int n, + lapack_int ilo, lapack_int ihi, const double* lscale, + const double* rscale, lapack_int m, + lapack_complex_double* v, lapack_int ldv ); + +lapack_int LAPACKE_sggbal( int matrix_order, char job, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, + lapack_int* ilo, lapack_int* ihi, float* lscale, + float* rscale ); +lapack_int LAPACKE_dggbal( int matrix_order, char job, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, + lapack_int* ilo, lapack_int* ihi, double* lscale, + double* rscale ); +lapack_int LAPACKE_cggbal( int matrix_order, char job, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_int* ilo, lapack_int* ihi, float* lscale, + float* rscale ); +lapack_int LAPACKE_zggbal( int matrix_order, char job, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_int* ilo, lapack_int* ihi, double* lscale, + double* rscale ); + +lapack_int LAPACKE_sgges( int matrix_order, char jobvsl, char jobvsr, char sort, + LAPACK_S_SELECT3 selctg, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, + lapack_int* sdim, float* alphar, float* alphai, + float* beta, float* vsl, lapack_int ldvsl, float* vsr, + lapack_int ldvsr ); +lapack_int LAPACKE_dgges( int matrix_order, char jobvsl, char jobvsr, char sort, + LAPACK_D_SELECT3 selctg, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, + lapack_int* sdim, double* alphar, double* alphai, + double* beta, double* vsl, lapack_int ldvsl, + double* vsr, lapack_int ldvsr ); +lapack_int LAPACKE_cgges( int matrix_order, char jobvsl, char jobvsr, char sort, + LAPACK_C_SELECT2 selctg, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_int* sdim, lapack_complex_float* alpha, + lapack_complex_float* beta, lapack_complex_float* vsl, + lapack_int ldvsl, lapack_complex_float* vsr, + lapack_int ldvsr ); +lapack_int LAPACKE_zgges( int matrix_order, char jobvsl, char jobvsr, char sort, + LAPACK_Z_SELECT2 selctg, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_int* sdim, lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* vsl, lapack_int ldvsl, + lapack_complex_double* vsr, lapack_int ldvsr ); + +lapack_int LAPACKE_sggesx( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_S_SELECT3 selctg, char sense, + lapack_int n, float* a, lapack_int lda, float* b, + lapack_int ldb, lapack_int* sdim, float* alphar, + float* alphai, float* beta, float* vsl, + lapack_int ldvsl, float* vsr, lapack_int ldvsr, + float* rconde, float* rcondv ); +lapack_int LAPACKE_dggesx( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_D_SELECT3 selctg, char sense, + lapack_int n, double* a, lapack_int lda, double* b, + lapack_int ldb, lapack_int* sdim, double* alphar, + double* alphai, double* beta, double* vsl, + lapack_int ldvsl, double* vsr, lapack_int ldvsr, + double* rconde, double* rcondv ); +lapack_int LAPACKE_cggesx( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_C_SELECT2 selctg, char sense, + lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, lapack_int* sdim, + lapack_complex_float* alpha, + lapack_complex_float* beta, + lapack_complex_float* vsl, lapack_int ldvsl, + lapack_complex_float* vsr, lapack_int ldvsr, + float* rconde, float* rcondv ); +lapack_int LAPACKE_zggesx( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_Z_SELECT2 selctg, char sense, + lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_int* sdim, + lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* vsl, lapack_int ldvsl, + lapack_complex_double* vsr, lapack_int ldvsr, + double* rconde, double* rcondv ); + +lapack_int LAPACKE_sggev( int matrix_order, char jobvl, char jobvr, + lapack_int n, float* a, lapack_int lda, float* b, + lapack_int ldb, float* alphar, float* alphai, + float* beta, float* vl, lapack_int ldvl, float* vr, + lapack_int ldvr ); +lapack_int LAPACKE_dggev( int matrix_order, char jobvl, char jobvr, + lapack_int n, double* a, lapack_int lda, double* b, + lapack_int ldb, double* alphar, double* alphai, + double* beta, double* vl, lapack_int ldvl, double* vr, + lapack_int ldvr ); +lapack_int LAPACKE_cggev( int matrix_order, char jobvl, char jobvr, + lapack_int n, lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* alpha, + lapack_complex_float* beta, lapack_complex_float* vl, + lapack_int ldvl, lapack_complex_float* vr, + lapack_int ldvr ); +lapack_int LAPACKE_zggev( int matrix_order, char jobvl, char jobvr, + lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr ); + +lapack_int LAPACKE_sggevx( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, + float* alphar, float* alphai, float* beta, float* vl, + lapack_int ldvl, float* vr, lapack_int ldvr, + lapack_int* ilo, lapack_int* ihi, float* lscale, + float* rscale, float* abnrm, float* bbnrm, + float* rconde, float* rcondv ); +lapack_int LAPACKE_dggevx( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, + double* alphar, double* alphai, double* beta, + double* vl, lapack_int ldvl, double* vr, + lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, + double* lscale, double* rscale, double* abnrm, + double* bbnrm, double* rconde, double* rcondv ); +lapack_int LAPACKE_cggevx( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* alpha, + lapack_complex_float* beta, lapack_complex_float* vl, + lapack_int ldvl, lapack_complex_float* vr, + lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, + float* lscale, float* rscale, float* abnrm, + float* bbnrm, float* rconde, float* rcondv ); +lapack_int LAPACKE_zggevx( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr, + lapack_int* ilo, lapack_int* ihi, double* lscale, + double* rscale, double* abnrm, double* bbnrm, + double* rconde, double* rcondv ); + +lapack_int LAPACKE_sggglm( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, float* a, lapack_int lda, float* b, + lapack_int ldb, float* d, float* x, float* y ); +lapack_int LAPACKE_dggglm( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, double* a, lapack_int lda, double* b, + lapack_int ldb, double* d, double* x, double* y ); +lapack_int LAPACKE_cggglm( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* d, + lapack_complex_float* x, lapack_complex_float* y ); +lapack_int LAPACKE_zggglm( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* d, + lapack_complex_double* x, lapack_complex_double* y ); + +lapack_int LAPACKE_sgghrd( int matrix_order, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + float* a, lapack_int lda, float* b, lapack_int ldb, + float* q, lapack_int ldq, float* z, lapack_int ldz ); +lapack_int LAPACKE_dgghrd( int matrix_order, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + double* a, lapack_int lda, double* b, lapack_int ldb, + double* q, lapack_int ldq, double* z, + lapack_int ldz ); +lapack_int LAPACKE_cgghrd( int matrix_order, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zgghrd( int matrix_order, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* z, lapack_int ldz ); + +lapack_int LAPACKE_sgglse( int matrix_order, lapack_int m, lapack_int n, + lapack_int p, float* a, lapack_int lda, float* b, + lapack_int ldb, float* c, float* d, float* x ); +lapack_int LAPACKE_dgglse( int matrix_order, lapack_int m, lapack_int n, + lapack_int p, double* a, lapack_int lda, double* b, + lapack_int ldb, double* c, double* d, double* x ); +lapack_int LAPACKE_cgglse( int matrix_order, lapack_int m, lapack_int n, + lapack_int p, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* c, + lapack_complex_float* d, lapack_complex_float* x ); +lapack_int LAPACKE_zgglse( int matrix_order, lapack_int m, lapack_int n, + lapack_int p, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* c, + lapack_complex_double* d, lapack_complex_double* x ); + +lapack_int LAPACKE_sggqrf( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, float* a, lapack_int lda, float* taua, + float* b, lapack_int ldb, float* taub ); +lapack_int LAPACKE_dggqrf( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, double* a, lapack_int lda, + double* taua, double* b, lapack_int ldb, + double* taub ); +lapack_int LAPACKE_cggqrf( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* taua, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* taub ); +lapack_int LAPACKE_zggqrf( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* taua, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* taub ); + +lapack_int LAPACKE_sggrqf( int matrix_order, lapack_int m, lapack_int p, + lapack_int n, float* a, lapack_int lda, float* taua, + float* b, lapack_int ldb, float* taub ); +lapack_int LAPACKE_dggrqf( int matrix_order, lapack_int m, lapack_int p, + lapack_int n, double* a, lapack_int lda, + double* taua, double* b, lapack_int ldb, + double* taub ); +lapack_int LAPACKE_cggrqf( int matrix_order, lapack_int m, lapack_int p, + lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* taua, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* taub ); +lapack_int LAPACKE_zggrqf( int matrix_order, lapack_int m, lapack_int p, + lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* taua, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* taub ); + +lapack_int LAPACKE_sggsvd( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int n, lapack_int p, + lapack_int* k, lapack_int* l, float* a, + lapack_int lda, float* b, lapack_int ldb, + float* alpha, float* beta, float* u, lapack_int ldu, + float* v, lapack_int ldv, float* q, lapack_int ldq, + lapack_int* iwork ); +lapack_int LAPACKE_dggsvd( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int n, lapack_int p, + lapack_int* k, lapack_int* l, double* a, + lapack_int lda, double* b, lapack_int ldb, + double* alpha, double* beta, double* u, + lapack_int ldu, double* v, lapack_int ldv, double* q, + lapack_int ldq, lapack_int* iwork ); +lapack_int LAPACKE_cggsvd( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int n, lapack_int p, + lapack_int* k, lapack_int* l, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + float* alpha, float* beta, lapack_complex_float* u, + lapack_int ldu, lapack_complex_float* v, + lapack_int ldv, lapack_complex_float* q, + lapack_int ldq, lapack_int* iwork ); +lapack_int LAPACKE_zggsvd( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int n, lapack_int p, + lapack_int* k, lapack_int* l, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + double* alpha, double* beta, + lapack_complex_double* u, lapack_int ldu, + lapack_complex_double* v, lapack_int ldv, + lapack_complex_double* q, lapack_int ldq, + lapack_int* iwork ); + +lapack_int LAPACKE_sggsvp( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int p, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, float tola, + float tolb, lapack_int* k, lapack_int* l, float* u, + lapack_int ldu, float* v, lapack_int ldv, float* q, + lapack_int ldq ); +lapack_int LAPACKE_dggsvp( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int p, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, + double tola, double tolb, lapack_int* k, + lapack_int* l, double* u, lapack_int ldu, double* v, + lapack_int ldv, double* q, lapack_int ldq ); +lapack_int LAPACKE_cggsvp( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int p, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, float tola, + float tolb, lapack_int* k, lapack_int* l, + lapack_complex_float* u, lapack_int ldu, + lapack_complex_float* v, lapack_int ldv, + lapack_complex_float* q, lapack_int ldq ); +lapack_int LAPACKE_zggsvp( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int p, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + double tola, double tolb, lapack_int* k, + lapack_int* l, lapack_complex_double* u, + lapack_int ldu, lapack_complex_double* v, + lapack_int ldv, lapack_complex_double* q, + lapack_int ldq ); + +lapack_int LAPACKE_sgtcon( char norm, lapack_int n, const float* dl, + const float* d, const float* du, const float* du2, + const lapack_int* ipiv, float anorm, float* rcond ); +lapack_int LAPACKE_dgtcon( char norm, lapack_int n, const double* dl, + const double* d, const double* du, const double* du2, + const lapack_int* ipiv, double anorm, + double* rcond ); +lapack_int LAPACKE_cgtcon( char norm, lapack_int n, + const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + const lapack_complex_float* du2, + const lapack_int* ipiv, float anorm, float* rcond ); +lapack_int LAPACKE_zgtcon( char norm, lapack_int n, + const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + const lapack_complex_double* du2, + const lapack_int* ipiv, double anorm, + double* rcond ); + +lapack_int LAPACKE_sgtrfs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const float* dl, const float* d, + const float* du, const float* dlf, const float* df, + const float* duf, const float* du2, + const lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr ); +lapack_int LAPACKE_dgtrfs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const double* dl, const double* d, + const double* du, const double* dlf, + const double* df, const double* duf, + const double* du2, const lapack_int* ipiv, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* ferr, double* berr ); +lapack_int LAPACKE_cgtrfs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + const lapack_complex_float* dlf, + const lapack_complex_float* df, + const lapack_complex_float* duf, + const lapack_complex_float* du2, + const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_zgtrfs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + const lapack_complex_double* dlf, + const lapack_complex_double* df, + const lapack_complex_double* duf, + const lapack_complex_double* du2, + const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_sgtsv( int matrix_order, lapack_int n, lapack_int nrhs, + float* dl, float* d, float* du, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dgtsv( int matrix_order, lapack_int n, lapack_int nrhs, + double* dl, double* d, double* du, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cgtsv( int matrix_order, lapack_int n, lapack_int nrhs, + lapack_complex_float* dl, lapack_complex_float* d, + lapack_complex_float* du, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zgtsv( int matrix_order, lapack_int n, lapack_int nrhs, + lapack_complex_double* dl, lapack_complex_double* d, + lapack_complex_double* du, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_sgtsvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, const float* dl, + const float* d, const float* du, float* dlf, + float* df, float* duf, float* du2, lapack_int* ipiv, + const float* b, lapack_int ldb, float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr ); +lapack_int LAPACKE_dgtsvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, const double* dl, + const double* d, const double* du, double* dlf, + double* df, double* duf, double* du2, + lapack_int* ipiv, const double* b, lapack_int ldb, + double* x, lapack_int ldx, double* rcond, + double* ferr, double* berr ); +lapack_int LAPACKE_cgtsvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + lapack_complex_float* dlf, lapack_complex_float* df, + lapack_complex_float* duf, lapack_complex_float* du2, + lapack_int* ipiv, const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr ); +lapack_int LAPACKE_zgtsvx( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + lapack_complex_double* dlf, + lapack_complex_double* df, + lapack_complex_double* duf, + lapack_complex_double* du2, lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr ); + +lapack_int LAPACKE_sgttrf( lapack_int n, float* dl, float* d, float* du, + float* du2, lapack_int* ipiv ); +lapack_int LAPACKE_dgttrf( lapack_int n, double* dl, double* d, double* du, + double* du2, lapack_int* ipiv ); +lapack_int LAPACKE_cgttrf( lapack_int n, lapack_complex_float* dl, + lapack_complex_float* d, lapack_complex_float* du, + lapack_complex_float* du2, lapack_int* ipiv ); +lapack_int LAPACKE_zgttrf( lapack_int n, lapack_complex_double* dl, + lapack_complex_double* d, lapack_complex_double* du, + lapack_complex_double* du2, lapack_int* ipiv ); + +lapack_int LAPACKE_sgttrs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const float* dl, const float* d, + const float* du, const float* du2, + const lapack_int* ipiv, float* b, lapack_int ldb ); +lapack_int LAPACKE_dgttrs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const double* dl, const double* d, + const double* du, const double* du2, + const lapack_int* ipiv, double* b, lapack_int ldb ); +lapack_int LAPACKE_cgttrs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + const lapack_complex_float* du2, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zgttrs( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + const lapack_complex_double* du2, + const lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_chbev( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int kd, lapack_complex_float* ab, + lapack_int ldab, float* w, lapack_complex_float* z, + lapack_int ldz ); +lapack_int LAPACKE_zhbev( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int kd, lapack_complex_double* ab, + lapack_int ldab, double* w, lapack_complex_double* z, + lapack_int ldz ); + +lapack_int LAPACKE_chbevd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int kd, lapack_complex_float* ab, + lapack_int ldab, float* w, lapack_complex_float* z, + lapack_int ldz ); +lapack_int LAPACKE_zhbevd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int kd, lapack_complex_double* ab, + lapack_int ldab, double* w, lapack_complex_double* z, + lapack_int ldz ); + +lapack_int LAPACKE_chbevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_int kd, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* q, lapack_int ldq, float vl, + float vu, lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int ldz, lapack_int* ifail ); +lapack_int LAPACKE_zhbevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_int kd, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* q, lapack_int ldq, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_int* ifail ); + +lapack_int LAPACKE_chbgst( int matrix_order, char vect, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, + lapack_complex_float* ab, lapack_int ldab, + const lapack_complex_float* bb, lapack_int ldbb, + lapack_complex_float* x, lapack_int ldx ); +lapack_int LAPACKE_zhbgst( int matrix_order, char vect, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, + lapack_complex_double* ab, lapack_int ldab, + const lapack_complex_double* bb, lapack_int ldbb, + lapack_complex_double* x, lapack_int ldx ); + +lapack_int LAPACKE_chbgv( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* bb, lapack_int ldbb, float* w, + lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zhbgv( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* bb, lapack_int ldbb, double* w, + lapack_complex_double* z, lapack_int ldz ); + +lapack_int LAPACKE_chbgvd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* bb, lapack_int ldbb, float* w, + lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zhbgvd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* bb, lapack_int ldbb, + double* w, lapack_complex_double* z, + lapack_int ldz ); + +lapack_int LAPACKE_chbgvx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* bb, lapack_int ldbb, + lapack_complex_float* q, lapack_int ldq, float vl, + float vu, lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int ldz, lapack_int* ifail ); +lapack_int LAPACKE_zhbgvx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* bb, lapack_int ldbb, + lapack_complex_double* q, lapack_int ldq, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_int* ifail ); + +lapack_int LAPACKE_chbtrd( int matrix_order, char vect, char uplo, lapack_int n, + lapack_int kd, lapack_complex_float* ab, + lapack_int ldab, float* d, float* e, + lapack_complex_float* q, lapack_int ldq ); +lapack_int LAPACKE_zhbtrd( int matrix_order, char vect, char uplo, lapack_int n, + lapack_int kd, lapack_complex_double* ab, + lapack_int ldab, double* d, double* e, + lapack_complex_double* q, lapack_int ldq ); + +lapack_int LAPACKE_checon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, float anorm, float* rcond ); +lapack_int LAPACKE_zhecon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, double anorm, + double* rcond ); + +lapack_int LAPACKE_cheequb( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* s, float* scond, float* amax ); +lapack_int LAPACKE_zheequb( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* s, double* scond, double* amax ); + +lapack_int LAPACKE_cheev( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, float* w ); +lapack_int LAPACKE_zheev( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, double* w ); + +lapack_int LAPACKE_cheevd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, float* w ); +lapack_int LAPACKE_zheevd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + double* w ); + +lapack_int LAPACKE_cheevr( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_complex_float* a, + lapack_int lda, float vl, float vu, lapack_int il, + lapack_int iu, float abstol, lapack_int* m, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_int* isuppz ); +lapack_int LAPACKE_zheevr( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_complex_double* a, + lapack_int lda, double vl, double vu, lapack_int il, + lapack_int iu, double abstol, lapack_int* m, + double* w, lapack_complex_double* z, lapack_int ldz, + lapack_int* isuppz ); + +lapack_int LAPACKE_cheevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_complex_float* a, + lapack_int lda, float vl, float vu, lapack_int il, + lapack_int iu, float abstol, lapack_int* m, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_int* ifail ); +lapack_int LAPACKE_zheevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_complex_double* a, + lapack_int lda, double vl, double vu, lapack_int il, + lapack_int iu, double abstol, lapack_int* m, + double* w, lapack_complex_double* z, lapack_int ldz, + lapack_int* ifail ); + +lapack_int LAPACKE_chegst( int matrix_order, lapack_int itype, char uplo, + lapack_int n, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zhegst( int matrix_order, lapack_int itype, char uplo, + lapack_int n, lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_chegv( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, float* w ); +lapack_int LAPACKE_zhegv( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, double* w ); + +lapack_int LAPACKE_chegvd( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, float* w ); +lapack_int LAPACKE_zhegvd( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, double* w ); + +lapack_int LAPACKE_chegvx( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, float vl, + float vu, lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int ldz, lapack_int* ifail ); +lapack_int LAPACKE_zhegvx( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_int* ifail ); + +lapack_int LAPACKE_cherfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_zherfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_cherfsx( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* af, lapack_int ldaf, + const lapack_int* ipiv, const float* s, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params ); +lapack_int LAPACKE_zherfsx( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* af, lapack_int ldaf, + const lapack_int* ipiv, const double* s, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params ); + +lapack_int LAPACKE_chesv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zhesv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_chesvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, lapack_complex_float* af, + lapack_int ldaf, lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr ); +lapack_int LAPACKE_zhesvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, lapack_complex_double* af, + lapack_int ldaf, lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr ); + +lapack_int LAPACKE_chesvxx( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* s, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_zhesvxx( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* s, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params ); + +lapack_int LAPACKE_chetrd( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, float* d, + float* e, lapack_complex_float* tau ); +lapack_int LAPACKE_zhetrd( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, double* d, + double* e, lapack_complex_double* tau ); + +lapack_int LAPACKE_chetrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* ipiv ); +lapack_int LAPACKE_zhetrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv ); + +lapack_int LAPACKE_chetri( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv ); +lapack_int LAPACKE_zhetri( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv ); + +lapack_int LAPACKE_chetrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zhetrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_chfrk( int matrix_order, char transr, char uplo, char trans, + lapack_int n, lapack_int k, float alpha, + const lapack_complex_float* a, lapack_int lda, + float beta, lapack_complex_float* c ); +lapack_int LAPACKE_zhfrk( int matrix_order, char transr, char uplo, char trans, + lapack_int n, lapack_int k, double alpha, + const lapack_complex_double* a, lapack_int lda, + double beta, lapack_complex_double* c ); + +lapack_int LAPACKE_shgeqz( int matrix_order, char job, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + float* h, lapack_int ldh, float* t, lapack_int ldt, + float* alphar, float* alphai, float* beta, float* q, + lapack_int ldq, float* z, lapack_int ldz ); +lapack_int LAPACKE_dhgeqz( int matrix_order, char job, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + double* h, lapack_int ldh, double* t, lapack_int ldt, + double* alphar, double* alphai, double* beta, + double* q, lapack_int ldq, double* z, + lapack_int ldz ); +lapack_int LAPACKE_chgeqz( int matrix_order, char job, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + lapack_complex_float* h, lapack_int ldh, + lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* alpha, + lapack_complex_float* beta, lapack_complex_float* q, + lapack_int ldq, lapack_complex_float* z, + lapack_int ldz ); +lapack_int LAPACKE_zhgeqz( int matrix_order, char job, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + lapack_complex_double* h, lapack_int ldh, + lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* z, lapack_int ldz ); + +lapack_int LAPACKE_chpcon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, + const lapack_int* ipiv, float anorm, float* rcond ); +lapack_int LAPACKE_zhpcon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, + const lapack_int* ipiv, double anorm, + double* rcond ); + +lapack_int LAPACKE_chpev( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_float* ap, float* w, + lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zhpev( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_double* ap, double* w, + lapack_complex_double* z, lapack_int ldz ); + +lapack_int LAPACKE_chpevd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_float* ap, float* w, + lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zhpevd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_double* ap, double* w, + lapack_complex_double* z, lapack_int ldz ); + +lapack_int LAPACKE_chpevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_complex_float* ap, float vl, + float vu, lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int ldz, lapack_int* ifail ); +lapack_int LAPACKE_zhpevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_complex_double* ap, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_int* ifail ); + +lapack_int LAPACKE_chpgst( int matrix_order, lapack_int itype, char uplo, + lapack_int n, lapack_complex_float* ap, + const lapack_complex_float* bp ); +lapack_int LAPACKE_zhpgst( int matrix_order, lapack_int itype, char uplo, + lapack_int n, lapack_complex_double* ap, + const lapack_complex_double* bp ); + +lapack_int LAPACKE_chpgv( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, lapack_complex_float* ap, + lapack_complex_float* bp, float* w, + lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zhpgv( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, lapack_complex_double* ap, + lapack_complex_double* bp, double* w, + lapack_complex_double* z, lapack_int ldz ); + +lapack_int LAPACKE_chpgvd( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, lapack_complex_float* ap, + lapack_complex_float* bp, float* w, + lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zhpgvd( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, lapack_complex_double* ap, + lapack_complex_double* bp, double* w, + lapack_complex_double* z, lapack_int ldz ); + +lapack_int LAPACKE_chpgvx( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, + lapack_complex_float* ap, lapack_complex_float* bp, + float vl, float vu, lapack_int il, lapack_int iu, + float abstol, lapack_int* m, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_int* ifail ); +lapack_int LAPACKE_zhpgvx( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, + lapack_complex_double* ap, lapack_complex_double* bp, + double vl, double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_int* ifail ); + +lapack_int LAPACKE_chprfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + const lapack_complex_float* afp, + const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_zhprfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* ap, + const lapack_complex_double* afp, + const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_chpsv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* ap, + lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zhpsv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* ap, + lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_chpsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + lapack_complex_float* afp, lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr ); +lapack_int LAPACKE_zhpsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* ap, + lapack_complex_double* afp, lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr ); + +lapack_int LAPACKE_chptrd( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap, float* d, float* e, + lapack_complex_float* tau ); +lapack_int LAPACKE_zhptrd( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap, double* d, double* e, + lapack_complex_double* tau ); + +lapack_int LAPACKE_chptrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap, lapack_int* ipiv ); +lapack_int LAPACKE_zhptrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap, lapack_int* ipiv ); + +lapack_int LAPACKE_chptri( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap, const lapack_int* ipiv ); +lapack_int LAPACKE_zhptri( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap, const lapack_int* ipiv ); + +lapack_int LAPACKE_chptrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zhptrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* ap, + const lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_shsein( int matrix_order, char job, char eigsrc, char initv, + lapack_logical* select, lapack_int n, const float* h, + lapack_int ldh, float* wr, const float* wi, + float* vl, lapack_int ldvl, float* vr, + lapack_int ldvr, lapack_int mm, lapack_int* m, + lapack_int* ifaill, lapack_int* ifailr ); +lapack_int LAPACKE_dhsein( int matrix_order, char job, char eigsrc, char initv, + lapack_logical* select, lapack_int n, + const double* h, lapack_int ldh, double* wr, + const double* wi, double* vl, lapack_int ldvl, + double* vr, lapack_int ldvr, lapack_int mm, + lapack_int* m, lapack_int* ifaill, + lapack_int* ifailr ); +lapack_int LAPACKE_chsein( int matrix_order, char job, char eigsrc, char initv, + const lapack_logical* select, lapack_int n, + const lapack_complex_float* h, lapack_int ldh, + lapack_complex_float* w, lapack_complex_float* vl, + lapack_int ldvl, lapack_complex_float* vr, + lapack_int ldvr, lapack_int mm, lapack_int* m, + lapack_int* ifaill, lapack_int* ifailr ); +lapack_int LAPACKE_zhsein( int matrix_order, char job, char eigsrc, char initv, + const lapack_logical* select, lapack_int n, + const lapack_complex_double* h, lapack_int ldh, + lapack_complex_double* w, lapack_complex_double* vl, + lapack_int ldvl, lapack_complex_double* vr, + lapack_int ldvr, lapack_int mm, lapack_int* m, + lapack_int* ifaill, lapack_int* ifailr ); + +lapack_int LAPACKE_shseqr( int matrix_order, char job, char compz, lapack_int n, + lapack_int ilo, lapack_int ihi, float* h, + lapack_int ldh, float* wr, float* wi, float* z, + lapack_int ldz ); +lapack_int LAPACKE_dhseqr( int matrix_order, char job, char compz, lapack_int n, + lapack_int ilo, lapack_int ihi, double* h, + lapack_int ldh, double* wr, double* wi, double* z, + lapack_int ldz ); +lapack_int LAPACKE_chseqr( int matrix_order, char job, char compz, lapack_int n, + lapack_int ilo, lapack_int ihi, + lapack_complex_float* h, lapack_int ldh, + lapack_complex_float* w, lapack_complex_float* z, + lapack_int ldz ); +lapack_int LAPACKE_zhseqr( int matrix_order, char job, char compz, lapack_int n, + lapack_int ilo, lapack_int ihi, + lapack_complex_double* h, lapack_int ldh, + lapack_complex_double* w, lapack_complex_double* z, + lapack_int ldz ); + +lapack_int LAPACKE_clacgv( lapack_int n, lapack_complex_float* x, + lapack_int incx ); +lapack_int LAPACKE_zlacgv( lapack_int n, lapack_complex_double* x, + lapack_int incx ); + +lapack_int LAPACKE_slacpy( int matrix_order, char uplo, lapack_int m, + lapack_int n, const float* a, lapack_int lda, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dlacpy( int matrix_order, char uplo, lapack_int m, + lapack_int n, const double* a, lapack_int lda, double* b, + lapack_int ldb ); +lapack_int LAPACKE_clacpy( int matrix_order, char uplo, lapack_int m, + lapack_int n, const lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zlacpy( int matrix_order, char uplo, lapack_int m, + lapack_int n, const lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_zlag2c( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + lapack_complex_float* sa, lapack_int ldsa ); + +lapack_int LAPACKE_slag2d( int matrix_order, lapack_int m, lapack_int n, + const float* sa, lapack_int ldsa, double* a, + lapack_int lda ); + +lapack_int LAPACKE_dlag2s( int matrix_order, lapack_int m, lapack_int n, + const double* a, lapack_int lda, float* sa, + lapack_int ldsa ); + +lapack_int LAPACKE_clag2z( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_float* sa, lapack_int ldsa, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_slagge( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const float* d, + float* a, lapack_int lda, lapack_int* iseed ); +lapack_int LAPACKE_dlagge( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const double* d, + double* a, lapack_int lda, lapack_int* iseed ); +lapack_int LAPACKE_clagge( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const float* d, + lapack_complex_float* a, lapack_int lda, + lapack_int* iseed ); +lapack_int LAPACKE_zlagge( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const double* d, + lapack_complex_double* a, lapack_int lda, + lapack_int* iseed ); + +float LAPACKE_slamch( char cmach ); +double LAPACKE_dlamch( char cmach ); + +float LAPACKE_slange( int matrix_order, char norm, lapack_int m, + lapack_int n, const float* a, lapack_int lda ); +double LAPACKE_dlange( int matrix_order, char norm, lapack_int m, + lapack_int n, const double* a, lapack_int lda ); +float LAPACKE_clange( int matrix_order, char norm, lapack_int m, + lapack_int n, const lapack_complex_float* a, + lapack_int lda ); +double LAPACKE_zlange( int matrix_order, char norm, lapack_int m, + lapack_int n, const lapack_complex_double* a, + lapack_int lda ); + +float LAPACKE_clanhe( int matrix_order, char norm, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda ); +double LAPACKE_zlanhe( int matrix_order, char norm, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda ); + +float LAPACKE_slansy( int matrix_order, char norm, char uplo, lapack_int n, + const float* a, lapack_int lda ); +double LAPACKE_dlansy( int matrix_order, char norm, char uplo, lapack_int n, + const double* a, lapack_int lda ); +float LAPACKE_clansy( int matrix_order, char norm, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda ); +double LAPACKE_zlansy( int matrix_order, char norm, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda ); + +float LAPACKE_slantr( int matrix_order, char norm, char uplo, char diag, + lapack_int m, lapack_int n, const float* a, + lapack_int lda ); +double LAPACKE_dlantr( int matrix_order, char norm, char uplo, char diag, + lapack_int m, lapack_int n, const double* a, + lapack_int lda ); +float LAPACKE_clantr( int matrix_order, char norm, char uplo, char diag, + lapack_int m, lapack_int n, const lapack_complex_float* a, + lapack_int lda ); +double LAPACKE_zlantr( int matrix_order, char norm, char uplo, char diag, + lapack_int m, lapack_int n, const lapack_complex_double* a, + lapack_int lda ); + + +lapack_int LAPACKE_slarfb( int matrix_order, char side, char trans, char direct, + char storev, lapack_int m, lapack_int n, + lapack_int k, const float* v, lapack_int ldv, + const float* t, lapack_int ldt, float* c, + lapack_int ldc ); +lapack_int LAPACKE_dlarfb( int matrix_order, char side, char trans, char direct, + char storev, lapack_int m, lapack_int n, + lapack_int k, const double* v, lapack_int ldv, + const double* t, lapack_int ldt, double* c, + lapack_int ldc ); +lapack_int LAPACKE_clarfb( int matrix_order, char side, char trans, char direct, + char storev, lapack_int m, lapack_int n, + lapack_int k, const lapack_complex_float* v, + lapack_int ldv, const lapack_complex_float* t, + lapack_int ldt, lapack_complex_float* c, + lapack_int ldc ); +lapack_int LAPACKE_zlarfb( int matrix_order, char side, char trans, char direct, + char storev, lapack_int m, lapack_int n, + lapack_int k, const lapack_complex_double* v, + lapack_int ldv, const lapack_complex_double* t, + lapack_int ldt, lapack_complex_double* c, + lapack_int ldc ); + +lapack_int LAPACKE_slarfg( lapack_int n, float* alpha, float* x, + lapack_int incx, float* tau ); +lapack_int LAPACKE_dlarfg( lapack_int n, double* alpha, double* x, + lapack_int incx, double* tau ); +lapack_int LAPACKE_clarfg( lapack_int n, lapack_complex_float* alpha, + lapack_complex_float* x, lapack_int incx, + lapack_complex_float* tau ); +lapack_int LAPACKE_zlarfg( lapack_int n, lapack_complex_double* alpha, + lapack_complex_double* x, lapack_int incx, + lapack_complex_double* tau ); + +lapack_int LAPACKE_slarft( int matrix_order, char direct, char storev, + lapack_int n, lapack_int k, const float* v, + lapack_int ldv, const float* tau, float* t, + lapack_int ldt ); +lapack_int LAPACKE_dlarft( int matrix_order, char direct, char storev, + lapack_int n, lapack_int k, const double* v, + lapack_int ldv, const double* tau, double* t, + lapack_int ldt ); +lapack_int LAPACKE_clarft( int matrix_order, char direct, char storev, + lapack_int n, lapack_int k, + const lapack_complex_float* v, lapack_int ldv, + const lapack_complex_float* tau, + lapack_complex_float* t, lapack_int ldt ); +lapack_int LAPACKE_zlarft( int matrix_order, char direct, char storev, + lapack_int n, lapack_int k, + const lapack_complex_double* v, lapack_int ldv, + const lapack_complex_double* tau, + lapack_complex_double* t, lapack_int ldt ); + +lapack_int LAPACKE_slarfx( int matrix_order, char side, lapack_int m, + lapack_int n, const float* v, float tau, float* c, + lapack_int ldc, float* work ); +lapack_int LAPACKE_dlarfx( int matrix_order, char side, lapack_int m, + lapack_int n, const double* v, double tau, double* c, + lapack_int ldc, double* work ); +lapack_int LAPACKE_clarfx( int matrix_order, char side, lapack_int m, + lapack_int n, const lapack_complex_float* v, + lapack_complex_float tau, lapack_complex_float* c, + lapack_int ldc, lapack_complex_float* work ); +lapack_int LAPACKE_zlarfx( int matrix_order, char side, lapack_int m, + lapack_int n, const lapack_complex_double* v, + lapack_complex_double tau, lapack_complex_double* c, + lapack_int ldc, lapack_complex_double* work ); + +lapack_int LAPACKE_slarnv( lapack_int idist, lapack_int* iseed, lapack_int n, + float* x ); +lapack_int LAPACKE_dlarnv( lapack_int idist, lapack_int* iseed, lapack_int n, + double* x ); +lapack_int LAPACKE_clarnv( lapack_int idist, lapack_int* iseed, lapack_int n, + lapack_complex_float* x ); +lapack_int LAPACKE_zlarnv( lapack_int idist, lapack_int* iseed, lapack_int n, + lapack_complex_double* x ); + +lapack_int LAPACKE_slaset( int matrix_order, char uplo, lapack_int m, + lapack_int n, float alpha, float beta, float* a, + lapack_int lda ); +lapack_int LAPACKE_dlaset( int matrix_order, char uplo, lapack_int m, + lapack_int n, double alpha, double beta, double* a, + lapack_int lda ); +lapack_int LAPACKE_claset( int matrix_order, char uplo, lapack_int m, + lapack_int n, lapack_complex_float alpha, + lapack_complex_float beta, lapack_complex_float* a, + lapack_int lda ); +lapack_int LAPACKE_zlaset( int matrix_order, char uplo, lapack_int m, + lapack_int n, lapack_complex_double alpha, + lapack_complex_double beta, lapack_complex_double* a, + lapack_int lda ); + +lapack_int LAPACKE_slasrt( char id, lapack_int n, float* d ); +lapack_int LAPACKE_dlasrt( char id, lapack_int n, double* d ); + +lapack_int LAPACKE_slaswp( int matrix_order, lapack_int n, float* a, + lapack_int lda, lapack_int k1, lapack_int k2, + const lapack_int* ipiv, lapack_int incx ); +lapack_int LAPACKE_dlaswp( int matrix_order, lapack_int n, double* a, + lapack_int lda, lapack_int k1, lapack_int k2, + const lapack_int* ipiv, lapack_int incx ); +lapack_int LAPACKE_claswp( int matrix_order, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int k1, lapack_int k2, const lapack_int* ipiv, + lapack_int incx ); +lapack_int LAPACKE_zlaswp( int matrix_order, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int k1, lapack_int k2, const lapack_int* ipiv, + lapack_int incx ); + +lapack_int LAPACKE_slatms( int matrix_order, lapack_int m, lapack_int n, + char dist, lapack_int* iseed, char sym, float* d, + lapack_int mode, float cond, float dmax, + lapack_int kl, lapack_int ku, char pack, float* a, + lapack_int lda ); +lapack_int LAPACKE_dlatms( int matrix_order, lapack_int m, lapack_int n, + char dist, lapack_int* iseed, char sym, double* d, + lapack_int mode, double cond, double dmax, + lapack_int kl, lapack_int ku, char pack, double* a, + lapack_int lda ); +lapack_int LAPACKE_clatms( int matrix_order, lapack_int m, lapack_int n, + char dist, lapack_int* iseed, char sym, float* d, + lapack_int mode, float cond, float dmax, + lapack_int kl, lapack_int ku, char pack, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_zlatms( int matrix_order, lapack_int m, lapack_int n, + char dist, lapack_int* iseed, char sym, double* d, + lapack_int mode, double cond, double dmax, + lapack_int kl, lapack_int ku, char pack, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_slauum( int matrix_order, char uplo, lapack_int n, float* a, + lapack_int lda ); +lapack_int LAPACKE_dlauum( int matrix_order, char uplo, lapack_int n, double* a, + lapack_int lda ); +lapack_int LAPACKE_clauum( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_zlauum( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_sopgtr( int matrix_order, char uplo, lapack_int n, + const float* ap, const float* tau, float* q, + lapack_int ldq ); +lapack_int LAPACKE_dopgtr( int matrix_order, char uplo, lapack_int n, + const double* ap, const double* tau, double* q, + lapack_int ldq ); + +lapack_int LAPACKE_sopmtr( int matrix_order, char side, char uplo, char trans, + lapack_int m, lapack_int n, const float* ap, + const float* tau, float* c, lapack_int ldc ); +lapack_int LAPACKE_dopmtr( int matrix_order, char side, char uplo, char trans, + lapack_int m, lapack_int n, const double* ap, + const double* tau, double* c, lapack_int ldc ); + +lapack_int LAPACKE_sorgbr( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int k, float* a, lapack_int lda, + const float* tau ); +lapack_int LAPACKE_dorgbr( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int k, double* a, + lapack_int lda, const double* tau ); + +lapack_int LAPACKE_sorghr( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, float* a, lapack_int lda, + const float* tau ); +lapack_int LAPACKE_dorghr( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, double* a, lapack_int lda, + const double* tau ); + +lapack_int LAPACKE_sorglq( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, float* a, lapack_int lda, + const float* tau ); +lapack_int LAPACKE_dorglq( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, double* a, lapack_int lda, + const double* tau ); + +lapack_int LAPACKE_sorgql( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, float* a, lapack_int lda, + const float* tau ); +lapack_int LAPACKE_dorgql( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, double* a, lapack_int lda, + const double* tau ); + +lapack_int LAPACKE_sorgqr( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, float* a, lapack_int lda, + const float* tau ); +lapack_int LAPACKE_dorgqr( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, double* a, lapack_int lda, + const double* tau ); + +lapack_int LAPACKE_sorgrq( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, float* a, lapack_int lda, + const float* tau ); +lapack_int LAPACKE_dorgrq( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, double* a, lapack_int lda, + const double* tau ); + +lapack_int LAPACKE_sorgtr( int matrix_order, char uplo, lapack_int n, float* a, + lapack_int lda, const float* tau ); +lapack_int LAPACKE_dorgtr( int matrix_order, char uplo, lapack_int n, double* a, + lapack_int lda, const double* tau ); + +lapack_int LAPACKE_sormbr( int matrix_order, char vect, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const float* a, lapack_int lda, const float* tau, + float* c, lapack_int ldc ); +lapack_int LAPACKE_dormbr( int matrix_order, char vect, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const double* a, lapack_int lda, const double* tau, + double* c, lapack_int ldc ); + +lapack_int LAPACKE_sormhr( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int ilo, + lapack_int ihi, const float* a, lapack_int lda, + const float* tau, float* c, lapack_int ldc ); +lapack_int LAPACKE_dormhr( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int ilo, + lapack_int ihi, const double* a, lapack_int lda, + const double* tau, double* c, lapack_int ldc ); + +lapack_int LAPACKE_sormlq( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const float* a, lapack_int lda, const float* tau, + float* c, lapack_int ldc ); +lapack_int LAPACKE_dormlq( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const double* a, lapack_int lda, const double* tau, + double* c, lapack_int ldc ); + +lapack_int LAPACKE_sormql( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const float* a, lapack_int lda, const float* tau, + float* c, lapack_int ldc ); +lapack_int LAPACKE_dormql( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const double* a, lapack_int lda, const double* tau, + double* c, lapack_int ldc ); + +lapack_int LAPACKE_sormqr( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const float* a, lapack_int lda, const float* tau, + float* c, lapack_int ldc ); +lapack_int LAPACKE_dormqr( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const double* a, lapack_int lda, const double* tau, + double* c, lapack_int ldc ); + +lapack_int LAPACKE_sormrq( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const float* a, lapack_int lda, const float* tau, + float* c, lapack_int ldc ); +lapack_int LAPACKE_dormrq( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const double* a, lapack_int lda, const double* tau, + double* c, lapack_int ldc ); + +lapack_int LAPACKE_sormrz( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, const float* a, lapack_int lda, + const float* tau, float* c, lapack_int ldc ); +lapack_int LAPACKE_dormrz( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, const double* a, lapack_int lda, + const double* tau, double* c, lapack_int ldc ); + +lapack_int LAPACKE_sormtr( int matrix_order, char side, char uplo, char trans, + lapack_int m, lapack_int n, const float* a, + lapack_int lda, const float* tau, float* c, + lapack_int ldc ); +lapack_int LAPACKE_dormtr( int matrix_order, char side, char uplo, char trans, + lapack_int m, lapack_int n, const double* a, + lapack_int lda, const double* tau, double* c, + lapack_int ldc ); + +lapack_int LAPACKE_spbcon( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const float* ab, lapack_int ldab, + float anorm, float* rcond ); +lapack_int LAPACKE_dpbcon( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const double* ab, lapack_int ldab, + double anorm, double* rcond ); +lapack_int LAPACKE_cpbcon( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const lapack_complex_float* ab, + lapack_int ldab, float anorm, float* rcond ); +lapack_int LAPACKE_zpbcon( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const lapack_complex_double* ab, + lapack_int ldab, double anorm, double* rcond ); + +lapack_int LAPACKE_spbequ( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const float* ab, lapack_int ldab, + float* s, float* scond, float* amax ); +lapack_int LAPACKE_dpbequ( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const double* ab, lapack_int ldab, + double* s, double* scond, double* amax ); +lapack_int LAPACKE_cpbequ( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const lapack_complex_float* ab, + lapack_int ldab, float* s, float* scond, + float* amax ); +lapack_int LAPACKE_zpbequ( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const lapack_complex_double* ab, + lapack_int ldab, double* s, double* scond, + double* amax ); + +lapack_int LAPACKE_spbrfs( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, const float* ab, + lapack_int ldab, const float* afb, lapack_int ldafb, + const float* b, lapack_int ldb, float* x, + lapack_int ldx, float* ferr, float* berr ); +lapack_int LAPACKE_dpbrfs( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, const double* ab, + lapack_int ldab, const double* afb, lapack_int ldafb, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* ferr, double* berr ); +lapack_int LAPACKE_cpbrfs( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + const lapack_complex_float* ab, lapack_int ldab, + const lapack_complex_float* afb, lapack_int ldafb, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_zpbrfs( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + const lapack_complex_double* ab, lapack_int ldab, + const lapack_complex_double* afb, lapack_int ldafb, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_spbstf( int matrix_order, char uplo, lapack_int n, + lapack_int kb, float* bb, lapack_int ldbb ); +lapack_int LAPACKE_dpbstf( int matrix_order, char uplo, lapack_int n, + lapack_int kb, double* bb, lapack_int ldbb ); +lapack_int LAPACKE_cpbstf( int matrix_order, char uplo, lapack_int n, + lapack_int kb, lapack_complex_float* bb, + lapack_int ldbb ); +lapack_int LAPACKE_zpbstf( int matrix_order, char uplo, lapack_int n, + lapack_int kb, lapack_complex_double* bb, + lapack_int ldbb ); + +lapack_int LAPACKE_spbsv( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, float* ab, + lapack_int ldab, float* b, lapack_int ldb ); +lapack_int LAPACKE_dpbsv( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, double* ab, + lapack_int ldab, double* b, lapack_int ldb ); +lapack_int LAPACKE_cpbsv( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zpbsv( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_spbsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, float* ab, + lapack_int ldab, float* afb, lapack_int ldafb, + char* equed, float* s, float* b, lapack_int ldb, + float* x, lapack_int ldx, float* rcond, float* ferr, + float* berr ); +lapack_int LAPACKE_dpbsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, double* ab, + lapack_int ldab, double* afb, lapack_int ldafb, + char* equed, double* s, double* b, lapack_int ldb, + double* x, lapack_int ldx, double* rcond, + double* ferr, double* berr ); +lapack_int LAPACKE_cpbsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* afb, lapack_int ldafb, + char* equed, float* s, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr ); +lapack_int LAPACKE_zpbsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* afb, lapack_int ldafb, + char* equed, double* s, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* rcond, double* ferr, + double* berr ); + +lapack_int LAPACKE_spbtrf( int matrix_order, char uplo, lapack_int n, + lapack_int kd, float* ab, lapack_int ldab ); +lapack_int LAPACKE_dpbtrf( int matrix_order, char uplo, lapack_int n, + lapack_int kd, double* ab, lapack_int ldab ); +lapack_int LAPACKE_cpbtrf( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_complex_float* ab, + lapack_int ldab ); +lapack_int LAPACKE_zpbtrf( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_complex_double* ab, + lapack_int ldab ); + +lapack_int LAPACKE_spbtrs( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, const float* ab, + lapack_int ldab, float* b, lapack_int ldb ); +lapack_int LAPACKE_dpbtrs( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, const double* ab, + lapack_int ldab, double* b, lapack_int ldb ); +lapack_int LAPACKE_cpbtrs( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + const lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zpbtrs( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + const lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_spftrf( int matrix_order, char transr, char uplo, + lapack_int n, float* a ); +lapack_int LAPACKE_dpftrf( int matrix_order, char transr, char uplo, + lapack_int n, double* a ); +lapack_int LAPACKE_cpftrf( int matrix_order, char transr, char uplo, + lapack_int n, lapack_complex_float* a ); +lapack_int LAPACKE_zpftrf( int matrix_order, char transr, char uplo, + lapack_int n, lapack_complex_double* a ); + +lapack_int LAPACKE_spftri( int matrix_order, char transr, char uplo, + lapack_int n, float* a ); +lapack_int LAPACKE_dpftri( int matrix_order, char transr, char uplo, + lapack_int n, double* a ); +lapack_int LAPACKE_cpftri( int matrix_order, char transr, char uplo, + lapack_int n, lapack_complex_float* a ); +lapack_int LAPACKE_zpftri( int matrix_order, char transr, char uplo, + lapack_int n, lapack_complex_double* a ); + +lapack_int LAPACKE_spftrs( int matrix_order, char transr, char uplo, + lapack_int n, lapack_int nrhs, const float* a, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dpftrs( int matrix_order, char transr, char uplo, + lapack_int n, lapack_int nrhs, const double* a, + double* b, lapack_int ldb ); +lapack_int LAPACKE_cpftrs( int matrix_order, char transr, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zpftrs( int matrix_order, char transr, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_spocon( int matrix_order, char uplo, lapack_int n, + const float* a, lapack_int lda, float anorm, + float* rcond ); +lapack_int LAPACKE_dpocon( int matrix_order, char uplo, lapack_int n, + const double* a, lapack_int lda, double anorm, + double* rcond ); +lapack_int LAPACKE_cpocon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float anorm, float* rcond ); +lapack_int LAPACKE_zpocon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double anorm, double* rcond ); + +lapack_int LAPACKE_spoequ( int matrix_order, lapack_int n, const float* a, + lapack_int lda, float* s, float* scond, + float* amax ); +lapack_int LAPACKE_dpoequ( int matrix_order, lapack_int n, const double* a, + lapack_int lda, double* s, double* scond, + double* amax ); +lapack_int LAPACKE_cpoequ( int matrix_order, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* s, float* scond, float* amax ); +lapack_int LAPACKE_zpoequ( int matrix_order, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* s, double* scond, double* amax ); + +lapack_int LAPACKE_spoequb( int matrix_order, lapack_int n, const float* a, + lapack_int lda, float* s, float* scond, + float* amax ); +lapack_int LAPACKE_dpoequb( int matrix_order, lapack_int n, const double* a, + lapack_int lda, double* s, double* scond, + double* amax ); +lapack_int LAPACKE_cpoequb( int matrix_order, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* s, float* scond, float* amax ); +lapack_int LAPACKE_zpoequb( int matrix_order, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* s, double* scond, double* amax ); + +lapack_int LAPACKE_sporfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const float* af, lapack_int ldaf, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr ); +lapack_int LAPACKE_dporfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* a, lapack_int lda, + const double* af, lapack_int ldaf, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* ferr, double* berr ); +lapack_int LAPACKE_cporfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* af, + lapack_int ldaf, const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* ferr, float* berr ); +lapack_int LAPACKE_zporfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* af, + lapack_int ldaf, const lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* ferr, double* berr ); + +lapack_int LAPACKE_sporfsx( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, const float* a, + lapack_int lda, const float* af, lapack_int ldaf, + const float* s, const float* b, lapack_int ldb, + float* x, lapack_int ldx, float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_dporfsx( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, const double* a, + lapack_int lda, const double* af, lapack_int ldaf, + const double* s, const double* b, lapack_int ldb, + double* x, lapack_int ldx, double* rcond, + double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params ); +lapack_int LAPACKE_cporfsx( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* af, lapack_int ldaf, + const float* s, const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_zporfsx( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* af, lapack_int ldaf, + const double* s, const lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params ); + +lapack_int LAPACKE_sposv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dposv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cposv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zposv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb ); +lapack_int LAPACKE_dsposv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* b, lapack_int ldb, double* x, lapack_int ldx, + lapack_int* iter ); +lapack_int LAPACKE_zcposv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, lapack_int* iter ); + +lapack_int LAPACKE_sposvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, float* af, + lapack_int ldaf, char* equed, float* s, float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr ); +lapack_int LAPACKE_dposvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* af, lapack_int ldaf, char* equed, double* s, + double* b, lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr ); +lapack_int LAPACKE_cposvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* af, + lapack_int ldaf, char* equed, float* s, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr ); +lapack_int LAPACKE_zposvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* af, + lapack_int ldaf, char* equed, double* s, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr ); + +lapack_int LAPACKE_sposvxx( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* af, lapack_int ldaf, + char* equed, float* s, float* b, lapack_int ldb, + float* x, lapack_int ldx, float* rcond, + float* rpvgrw, float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params ); +lapack_int LAPACKE_dposvxx( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* af, lapack_int ldaf, + char* equed, double* s, double* b, lapack_int ldb, + double* x, lapack_int ldx, double* rcond, + double* rpvgrw, double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params ); +lapack_int LAPACKE_cposvxx( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + char* equed, float* s, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* rpvgrw, + float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params ); +lapack_int LAPACKE_zposvxx( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + char* equed, double* s, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* rcond, double* rpvgrw, + double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params ); + +lapack_int LAPACKE_spotrf( int matrix_order, char uplo, lapack_int n, float* a, + lapack_int lda ); +lapack_int LAPACKE_dpotrf( int matrix_order, char uplo, lapack_int n, double* a, + lapack_int lda ); +lapack_int LAPACKE_cpotrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_zpotrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_spotri( int matrix_order, char uplo, lapack_int n, float* a, + lapack_int lda ); +lapack_int LAPACKE_dpotri( int matrix_order, char uplo, lapack_int n, double* a, + lapack_int lda ); +lapack_int LAPACKE_cpotri( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_zpotri( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_spotrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dpotrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* a, lapack_int lda, + double* b, lapack_int ldb ); +lapack_int LAPACKE_cpotrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zpotrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_sppcon( int matrix_order, char uplo, lapack_int n, + const float* ap, float anorm, float* rcond ); +lapack_int LAPACKE_dppcon( int matrix_order, char uplo, lapack_int n, + const double* ap, double anorm, double* rcond ); +lapack_int LAPACKE_cppcon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, float anorm, + float* rcond ); +lapack_int LAPACKE_zppcon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, double anorm, + double* rcond ); + +lapack_int LAPACKE_sppequ( int matrix_order, char uplo, lapack_int n, + const float* ap, float* s, float* scond, + float* amax ); +lapack_int LAPACKE_dppequ( int matrix_order, char uplo, lapack_int n, + const double* ap, double* s, double* scond, + double* amax ); +lapack_int LAPACKE_cppequ( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, float* s, + float* scond, float* amax ); +lapack_int LAPACKE_zppequ( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, double* s, + double* scond, double* amax ); + +lapack_int LAPACKE_spprfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* ap, const float* afp, + const float* b, lapack_int ldb, float* x, + lapack_int ldx, float* ferr, float* berr ); +lapack_int LAPACKE_dpprfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* ap, const double* afp, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* ferr, double* berr ); +lapack_int LAPACKE_cpprfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + const lapack_complex_float* afp, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_zpprfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* ap, + const lapack_complex_double* afp, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_sppsv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, float* ap, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dppsv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, double* ap, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cppsv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* ap, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zppsv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* ap, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_sppsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, float* ap, float* afp, char* equed, + float* s, float* b, lapack_int ldb, float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr ); +lapack_int LAPACKE_dppsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, double* ap, double* afp, + char* equed, double* s, double* b, lapack_int ldb, + double* x, lapack_int ldx, double* rcond, + double* ferr, double* berr ); +lapack_int LAPACKE_cppsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* ap, + lapack_complex_float* afp, char* equed, float* s, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr ); +lapack_int LAPACKE_zppsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* ap, + lapack_complex_double* afp, char* equed, double* s, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr ); + +lapack_int LAPACKE_spptrf( int matrix_order, char uplo, lapack_int n, + float* ap ); +lapack_int LAPACKE_dpptrf( int matrix_order, char uplo, lapack_int n, + double* ap ); +lapack_int LAPACKE_cpptrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap ); +lapack_int LAPACKE_zpptrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap ); + +lapack_int LAPACKE_spptri( int matrix_order, char uplo, lapack_int n, + float* ap ); +lapack_int LAPACKE_dpptri( int matrix_order, char uplo, lapack_int n, + double* ap ); +lapack_int LAPACKE_cpptri( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap ); +lapack_int LAPACKE_zpptri( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap ); + +lapack_int LAPACKE_spptrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* ap, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dpptrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* ap, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cpptrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zpptrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* ap, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_spstrf( int matrix_order, char uplo, lapack_int n, float* a, + lapack_int lda, lapack_int* piv, lapack_int* rank, + float tol ); +lapack_int LAPACKE_dpstrf( int matrix_order, char uplo, lapack_int n, double* a, + lapack_int lda, lapack_int* piv, lapack_int* rank, + double tol ); +lapack_int LAPACKE_cpstrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* piv, lapack_int* rank, float tol ); +lapack_int LAPACKE_zpstrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* piv, lapack_int* rank, double tol ); + +lapack_int LAPACKE_sptcon( lapack_int n, const float* d, const float* e, + float anorm, float* rcond ); +lapack_int LAPACKE_dptcon( lapack_int n, const double* d, const double* e, + double anorm, double* rcond ); +lapack_int LAPACKE_cptcon( lapack_int n, const float* d, + const lapack_complex_float* e, float anorm, + float* rcond ); +lapack_int LAPACKE_zptcon( lapack_int n, const double* d, + const lapack_complex_double* e, double anorm, + double* rcond ); + +lapack_int LAPACKE_spteqr( int matrix_order, char compz, lapack_int n, float* d, + float* e, float* z, lapack_int ldz ); +lapack_int LAPACKE_dpteqr( int matrix_order, char compz, lapack_int n, + double* d, double* e, double* z, lapack_int ldz ); +lapack_int LAPACKE_cpteqr( int matrix_order, char compz, lapack_int n, float* d, + float* e, lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zpteqr( int matrix_order, char compz, lapack_int n, + double* d, double* e, lapack_complex_double* z, + lapack_int ldz ); + +lapack_int LAPACKE_sptrfs( int matrix_order, lapack_int n, lapack_int nrhs, + const float* d, const float* e, const float* df, + const float* ef, const float* b, lapack_int ldb, + float* x, lapack_int ldx, float* ferr, float* berr ); +lapack_int LAPACKE_dptrfs( int matrix_order, lapack_int n, lapack_int nrhs, + const double* d, const double* e, const double* df, + const double* ef, const double* b, lapack_int ldb, + double* x, lapack_int ldx, double* ferr, + double* berr ); +lapack_int LAPACKE_cptrfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* d, + const lapack_complex_float* e, const float* df, + const lapack_complex_float* ef, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_zptrfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* d, + const lapack_complex_double* e, const double* df, + const lapack_complex_double* ef, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_sptsv( int matrix_order, lapack_int n, lapack_int nrhs, + float* d, float* e, float* b, lapack_int ldb ); +lapack_int LAPACKE_dptsv( int matrix_order, lapack_int n, lapack_int nrhs, + double* d, double* e, double* b, lapack_int ldb ); +lapack_int LAPACKE_cptsv( int matrix_order, lapack_int n, lapack_int nrhs, + float* d, lapack_complex_float* e, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zptsv( int matrix_order, lapack_int n, lapack_int nrhs, + double* d, lapack_complex_double* e, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_sptsvx( int matrix_order, char fact, lapack_int n, + lapack_int nrhs, const float* d, const float* e, + float* df, float* ef, const float* b, lapack_int ldb, + float* x, lapack_int ldx, float* rcond, float* ferr, + float* berr ); +lapack_int LAPACKE_dptsvx( int matrix_order, char fact, lapack_int n, + lapack_int nrhs, const double* d, const double* e, + double* df, double* ef, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr ); +lapack_int LAPACKE_cptsvx( int matrix_order, char fact, lapack_int n, + lapack_int nrhs, const float* d, + const lapack_complex_float* e, float* df, + lapack_complex_float* ef, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr ); +lapack_int LAPACKE_zptsvx( int matrix_order, char fact, lapack_int n, + lapack_int nrhs, const double* d, + const lapack_complex_double* e, double* df, + lapack_complex_double* ef, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr ); + +lapack_int LAPACKE_spttrf( lapack_int n, float* d, float* e ); +lapack_int LAPACKE_dpttrf( lapack_int n, double* d, double* e ); +lapack_int LAPACKE_cpttrf( lapack_int n, float* d, lapack_complex_float* e ); +lapack_int LAPACKE_zpttrf( lapack_int n, double* d, lapack_complex_double* e ); + +lapack_int LAPACKE_spttrs( int matrix_order, lapack_int n, lapack_int nrhs, + const float* d, const float* e, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dpttrs( int matrix_order, lapack_int n, lapack_int nrhs, + const double* d, const double* e, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cpttrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* d, + const lapack_complex_float* e, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zpttrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* d, + const lapack_complex_double* e, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_ssbev( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int kd, float* ab, lapack_int ldab, float* w, + float* z, lapack_int ldz ); +lapack_int LAPACKE_dsbev( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int kd, double* ab, lapack_int ldab, double* w, + double* z, lapack_int ldz ); + +lapack_int LAPACKE_ssbevd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int kd, float* ab, lapack_int ldab, float* w, + float* z, lapack_int ldz ); +lapack_int LAPACKE_dsbevd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int kd, double* ab, lapack_int ldab, + double* w, double* z, lapack_int ldz ); + +lapack_int LAPACKE_ssbevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_int kd, float* ab, + lapack_int ldab, float* q, lapack_int ldq, float vl, + float vu, lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, lapack_int ldz, + lapack_int* ifail ); +lapack_int LAPACKE_dsbevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_int kd, double* ab, + lapack_int ldab, double* q, lapack_int ldq, + double vl, double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, double* z, + lapack_int ldz, lapack_int* ifail ); + +lapack_int LAPACKE_ssbgst( int matrix_order, char vect, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, float* ab, + lapack_int ldab, const float* bb, lapack_int ldbb, + float* x, lapack_int ldx ); +lapack_int LAPACKE_dsbgst( int matrix_order, char vect, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, double* ab, + lapack_int ldab, const double* bb, lapack_int ldbb, + double* x, lapack_int ldx ); + +lapack_int LAPACKE_ssbgv( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, float* ab, + lapack_int ldab, float* bb, lapack_int ldbb, float* w, + float* z, lapack_int ldz ); +lapack_int LAPACKE_dsbgv( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, double* ab, + lapack_int ldab, double* bb, lapack_int ldbb, + double* w, double* z, lapack_int ldz ); + +lapack_int LAPACKE_ssbgvd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, float* ab, + lapack_int ldab, float* bb, lapack_int ldbb, + float* w, float* z, lapack_int ldz ); +lapack_int LAPACKE_dsbgvd( int matrix_order, char jobz, char uplo, lapack_int n, + lapack_int ka, lapack_int kb, double* ab, + lapack_int ldab, double* bb, lapack_int ldbb, + double* w, double* z, lapack_int ldz ); + +lapack_int LAPACKE_ssbgvx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + float* ab, lapack_int ldab, float* bb, + lapack_int ldbb, float* q, lapack_int ldq, float vl, + float vu, lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, lapack_int ldz, + lapack_int* ifail ); +lapack_int LAPACKE_dsbgvx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + double* ab, lapack_int ldab, double* bb, + lapack_int ldbb, double* q, lapack_int ldq, + double vl, double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, double* z, + lapack_int ldz, lapack_int* ifail ); + +lapack_int LAPACKE_ssbtrd( int matrix_order, char vect, char uplo, lapack_int n, + lapack_int kd, float* ab, lapack_int ldab, float* d, + float* e, float* q, lapack_int ldq ); +lapack_int LAPACKE_dsbtrd( int matrix_order, char vect, char uplo, lapack_int n, + lapack_int kd, double* ab, lapack_int ldab, + double* d, double* e, double* q, lapack_int ldq ); + +lapack_int LAPACKE_ssfrk( int matrix_order, char transr, char uplo, char trans, + lapack_int n, lapack_int k, float alpha, + const float* a, lapack_int lda, float beta, + float* c ); +lapack_int LAPACKE_dsfrk( int matrix_order, char transr, char uplo, char trans, + lapack_int n, lapack_int k, double alpha, + const double* a, lapack_int lda, double beta, + double* c ); + +lapack_int LAPACKE_sspcon( int matrix_order, char uplo, lapack_int n, + const float* ap, const lapack_int* ipiv, float anorm, + float* rcond ); +lapack_int LAPACKE_dspcon( int matrix_order, char uplo, lapack_int n, + const double* ap, const lapack_int* ipiv, + double anorm, double* rcond ); +lapack_int LAPACKE_cspcon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, + const lapack_int* ipiv, float anorm, float* rcond ); +lapack_int LAPACKE_zspcon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, + const lapack_int* ipiv, double anorm, + double* rcond ); + +lapack_int LAPACKE_sspev( int matrix_order, char jobz, char uplo, lapack_int n, + float* ap, float* w, float* z, lapack_int ldz ); +lapack_int LAPACKE_dspev( int matrix_order, char jobz, char uplo, lapack_int n, + double* ap, double* w, double* z, lapack_int ldz ); + +lapack_int LAPACKE_sspevd( int matrix_order, char jobz, char uplo, lapack_int n, + float* ap, float* w, float* z, lapack_int ldz ); +lapack_int LAPACKE_dspevd( int matrix_order, char jobz, char uplo, lapack_int n, + double* ap, double* w, double* z, lapack_int ldz ); + +lapack_int LAPACKE_sspevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, float* ap, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, lapack_int ldz, + lapack_int* ifail ); +lapack_int LAPACKE_dspevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, double* ap, double vl, double vu, + lapack_int il, lapack_int iu, double abstol, + lapack_int* m, double* w, double* z, lapack_int ldz, + lapack_int* ifail ); + +lapack_int LAPACKE_sspgst( int matrix_order, lapack_int itype, char uplo, + lapack_int n, float* ap, const float* bp ); +lapack_int LAPACKE_dspgst( int matrix_order, lapack_int itype, char uplo, + lapack_int n, double* ap, const double* bp ); + +lapack_int LAPACKE_sspgv( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, float* ap, float* bp, + float* w, float* z, lapack_int ldz ); +lapack_int LAPACKE_dspgv( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, double* ap, double* bp, + double* w, double* z, lapack_int ldz ); + +lapack_int LAPACKE_sspgvd( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, float* ap, float* bp, + float* w, float* z, lapack_int ldz ); +lapack_int LAPACKE_dspgvd( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, double* ap, double* bp, + double* w, double* z, lapack_int ldz ); + +lapack_int LAPACKE_sspgvx( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, float* ap, + float* bp, float vl, float vu, lapack_int il, + lapack_int iu, float abstol, lapack_int* m, float* w, + float* z, lapack_int ldz, lapack_int* ifail ); +lapack_int LAPACKE_dspgvx( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, double* ap, + double* bp, double vl, double vu, lapack_int il, + lapack_int iu, double abstol, lapack_int* m, + double* w, double* z, lapack_int ldz, + lapack_int* ifail ); + +lapack_int LAPACKE_ssprfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* ap, const float* afp, + const lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr ); +lapack_int LAPACKE_dsprfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* ap, const double* afp, + const lapack_int* ipiv, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* ferr, double* berr ); +lapack_int LAPACKE_csprfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + const lapack_complex_float* afp, + const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_zsprfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* ap, + const lapack_complex_double* afp, + const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_sspsv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, float* ap, lapack_int* ipiv, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dspsv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, double* ap, lapack_int* ipiv, + double* b, lapack_int ldb ); +lapack_int LAPACKE_cspsv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* ap, + lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zspsv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* ap, + lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_sspsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const float* ap, float* afp, + lapack_int* ipiv, const float* b, lapack_int ldb, + float* x, lapack_int ldx, float* rcond, float* ferr, + float* berr ); +lapack_int LAPACKE_dspsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const double* ap, double* afp, + lapack_int* ipiv, const double* b, lapack_int ldb, + double* x, lapack_int ldx, double* rcond, + double* ferr, double* berr ); +lapack_int LAPACKE_cspsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + lapack_complex_float* afp, lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr ); +lapack_int LAPACKE_zspsvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* ap, + lapack_complex_double* afp, lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr ); + +lapack_int LAPACKE_ssptrd( int matrix_order, char uplo, lapack_int n, float* ap, + float* d, float* e, float* tau ); +lapack_int LAPACKE_dsptrd( int matrix_order, char uplo, lapack_int n, + double* ap, double* d, double* e, double* tau ); + +lapack_int LAPACKE_ssptrf( int matrix_order, char uplo, lapack_int n, float* ap, + lapack_int* ipiv ); +lapack_int LAPACKE_dsptrf( int matrix_order, char uplo, lapack_int n, + double* ap, lapack_int* ipiv ); +lapack_int LAPACKE_csptrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap, lapack_int* ipiv ); +lapack_int LAPACKE_zsptrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap, lapack_int* ipiv ); + +lapack_int LAPACKE_ssptri( int matrix_order, char uplo, lapack_int n, float* ap, + const lapack_int* ipiv ); +lapack_int LAPACKE_dsptri( int matrix_order, char uplo, lapack_int n, + double* ap, const lapack_int* ipiv ); +lapack_int LAPACKE_csptri( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap, const lapack_int* ipiv ); +lapack_int LAPACKE_zsptri( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap, const lapack_int* ipiv ); + +lapack_int LAPACKE_ssptrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* ap, + const lapack_int* ipiv, float* b, lapack_int ldb ); +lapack_int LAPACKE_dsptrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* ap, + const lapack_int* ipiv, double* b, lapack_int ldb ); +lapack_int LAPACKE_csptrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zsptrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* ap, + const lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_sstebz( char range, char order, lapack_int n, float vl, + float vu, lapack_int il, lapack_int iu, float abstol, + const float* d, const float* e, lapack_int* m, + lapack_int* nsplit, float* w, lapack_int* iblock, + lapack_int* isplit ); +lapack_int LAPACKE_dstebz( char range, char order, lapack_int n, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, const double* d, const double* e, + lapack_int* m, lapack_int* nsplit, double* w, + lapack_int* iblock, lapack_int* isplit ); + +lapack_int LAPACKE_sstedc( int matrix_order, char compz, lapack_int n, float* d, + float* e, float* z, lapack_int ldz ); +lapack_int LAPACKE_dstedc( int matrix_order, char compz, lapack_int n, + double* d, double* e, double* z, lapack_int ldz ); +lapack_int LAPACKE_cstedc( int matrix_order, char compz, lapack_int n, float* d, + float* e, lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zstedc( int matrix_order, char compz, lapack_int n, + double* d, double* e, lapack_complex_double* z, + lapack_int ldz ); + +lapack_int LAPACKE_sstegr( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, lapack_int ldz, + lapack_int* isuppz ); +lapack_int LAPACKE_dstegr( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, double* z, + lapack_int ldz, lapack_int* isuppz ); +lapack_int LAPACKE_cstegr( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int ldz, lapack_int* isuppz ); +lapack_int LAPACKE_zstegr( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_int* isuppz ); + +lapack_int LAPACKE_sstein( int matrix_order, lapack_int n, const float* d, + const float* e, lapack_int m, const float* w, + const lapack_int* iblock, const lapack_int* isplit, + float* z, lapack_int ldz, lapack_int* ifailv ); +lapack_int LAPACKE_dstein( int matrix_order, lapack_int n, const double* d, + const double* e, lapack_int m, const double* w, + const lapack_int* iblock, const lapack_int* isplit, + double* z, lapack_int ldz, lapack_int* ifailv ); +lapack_int LAPACKE_cstein( int matrix_order, lapack_int n, const float* d, + const float* e, lapack_int m, const float* w, + const lapack_int* iblock, const lapack_int* isplit, + lapack_complex_float* z, lapack_int ldz, + lapack_int* ifailv ); +lapack_int LAPACKE_zstein( int matrix_order, lapack_int n, const double* d, + const double* e, lapack_int m, const double* w, + const lapack_int* iblock, const lapack_int* isplit, + lapack_complex_double* z, lapack_int ldz, + lapack_int* ifailv ); + +lapack_int LAPACKE_sstemr( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, float vu, + lapack_int il, lapack_int iu, lapack_int* m, + float* w, float* z, lapack_int ldz, lapack_int nzc, + lapack_int* isuppz, lapack_logical* tryrac ); +lapack_int LAPACKE_dstemr( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + lapack_int* m, double* w, double* z, lapack_int ldz, + lapack_int nzc, lapack_int* isuppz, + lapack_logical* tryrac ); +lapack_int LAPACKE_cstemr( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, float vu, + lapack_int il, lapack_int iu, lapack_int* m, + float* w, lapack_complex_float* z, lapack_int ldz, + lapack_int nzc, lapack_int* isuppz, + lapack_logical* tryrac ); +lapack_int LAPACKE_zstemr( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + lapack_int* m, double* w, lapack_complex_double* z, + lapack_int ldz, lapack_int nzc, lapack_int* isuppz, + lapack_logical* tryrac ); + +lapack_int LAPACKE_ssteqr( int matrix_order, char compz, lapack_int n, float* d, + float* e, float* z, lapack_int ldz ); +lapack_int LAPACKE_dsteqr( int matrix_order, char compz, lapack_int n, + double* d, double* e, double* z, lapack_int ldz ); +lapack_int LAPACKE_csteqr( int matrix_order, char compz, lapack_int n, float* d, + float* e, lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zsteqr( int matrix_order, char compz, lapack_int n, + double* d, double* e, lapack_complex_double* z, + lapack_int ldz ); + +lapack_int LAPACKE_ssterf( lapack_int n, float* d, float* e ); +lapack_int LAPACKE_dsterf( lapack_int n, double* d, double* e ); + +lapack_int LAPACKE_sstev( int matrix_order, char jobz, lapack_int n, float* d, + float* e, float* z, lapack_int ldz ); +lapack_int LAPACKE_dstev( int matrix_order, char jobz, lapack_int n, double* d, + double* e, double* z, lapack_int ldz ); + +lapack_int LAPACKE_sstevd( int matrix_order, char jobz, lapack_int n, float* d, + float* e, float* z, lapack_int ldz ); +lapack_int LAPACKE_dstevd( int matrix_order, char jobz, lapack_int n, double* d, + double* e, double* z, lapack_int ldz ); + +lapack_int LAPACKE_sstevr( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, lapack_int ldz, + lapack_int* isuppz ); +lapack_int LAPACKE_dstevr( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, double* z, + lapack_int ldz, lapack_int* isuppz ); + +lapack_int LAPACKE_sstevx( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, lapack_int ldz, + lapack_int* ifail ); +lapack_int LAPACKE_dstevx( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, double* z, + lapack_int ldz, lapack_int* ifail ); + +lapack_int LAPACKE_ssycon( int matrix_order, char uplo, lapack_int n, + const float* a, lapack_int lda, + const lapack_int* ipiv, float anorm, float* rcond ); +lapack_int LAPACKE_dsycon( int matrix_order, char uplo, lapack_int n, + const double* a, lapack_int lda, + const lapack_int* ipiv, double anorm, + double* rcond ); +lapack_int LAPACKE_csycon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, float anorm, float* rcond ); +lapack_int LAPACKE_zsycon( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, double anorm, + double* rcond ); + +lapack_int LAPACKE_ssyequb( int matrix_order, char uplo, lapack_int n, + const float* a, lapack_int lda, float* s, + float* scond, float* amax ); +lapack_int LAPACKE_dsyequb( int matrix_order, char uplo, lapack_int n, + const double* a, lapack_int lda, double* s, + double* scond, double* amax ); +lapack_int LAPACKE_csyequb( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* s, float* scond, float* amax ); +lapack_int LAPACKE_zsyequb( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* s, double* scond, double* amax ); + +lapack_int LAPACKE_ssyev( int matrix_order, char jobz, char uplo, lapack_int n, + float* a, lapack_int lda, float* w ); +lapack_int LAPACKE_dsyev( int matrix_order, char jobz, char uplo, lapack_int n, + double* a, lapack_int lda, double* w ); + +lapack_int LAPACKE_ssyevd( int matrix_order, char jobz, char uplo, lapack_int n, + float* a, lapack_int lda, float* w ); +lapack_int LAPACKE_dsyevd( int matrix_order, char jobz, char uplo, lapack_int n, + double* a, lapack_int lda, double* w ); + +lapack_int LAPACKE_ssyevr( int matrix_order, char jobz, char range, char uplo, + lapack_int n, float* a, lapack_int lda, float vl, + float vu, lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, lapack_int ldz, + lapack_int* isuppz ); +lapack_int LAPACKE_dsyevr( int matrix_order, char jobz, char range, char uplo, + lapack_int n, double* a, lapack_int lda, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, double* z, + lapack_int ldz, lapack_int* isuppz ); + +lapack_int LAPACKE_ssyevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, float* a, lapack_int lda, float vl, + float vu, lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, lapack_int ldz, + lapack_int* ifail ); +lapack_int LAPACKE_dsyevx( int matrix_order, char jobz, char range, char uplo, + lapack_int n, double* a, lapack_int lda, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, double* z, + lapack_int ldz, lapack_int* ifail ); + +lapack_int LAPACKE_ssygst( int matrix_order, lapack_int itype, char uplo, + lapack_int n, float* a, lapack_int lda, + const float* b, lapack_int ldb ); +lapack_int LAPACKE_dsygst( int matrix_order, lapack_int itype, char uplo, + lapack_int n, double* a, lapack_int lda, + const double* b, lapack_int ldb ); + +lapack_int LAPACKE_ssygv( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, float* a, lapack_int lda, + float* b, lapack_int ldb, float* w ); +lapack_int LAPACKE_dsygv( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, double* a, lapack_int lda, + double* b, lapack_int ldb, double* w ); + +lapack_int LAPACKE_ssygvd( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, float* a, lapack_int lda, + float* b, lapack_int ldb, float* w ); +lapack_int LAPACKE_dsygvd( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, double* a, lapack_int lda, + double* b, lapack_int ldb, double* w ); + +lapack_int LAPACKE_ssygvx( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, float vl, + float vu, lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, lapack_int ldz, + lapack_int* ifail ); +lapack_int LAPACKE_dsygvx( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, double* z, + lapack_int ldz, lapack_int* ifail ); + +lapack_int LAPACKE_ssyrfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const float* af, lapack_int ldaf, + const lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr ); +lapack_int LAPACKE_dsyrfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* a, lapack_int lda, + const double* af, lapack_int ldaf, + const lapack_int* ipiv, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* ferr, double* berr ); +lapack_int LAPACKE_csyrfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_zsyrfs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_ssyrfsx( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, const float* a, + lapack_int lda, const float* af, lapack_int ldaf, + const lapack_int* ipiv, const float* s, + const float* b, lapack_int ldb, float* x, + lapack_int ldx, float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_dsyrfsx( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, const double* a, + lapack_int lda, const double* af, lapack_int ldaf, + const lapack_int* ipiv, const double* s, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params ); +lapack_int LAPACKE_csyrfsx( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* af, lapack_int ldaf, + const lapack_int* ipiv, const float* s, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params ); +lapack_int LAPACKE_zsyrfsx( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* af, lapack_int ldaf, + const lapack_int* ipiv, const double* s, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params ); + +lapack_int LAPACKE_ssysv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + lapack_int* ipiv, float* b, lapack_int ldb ); +lapack_int LAPACKE_dsysv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + lapack_int* ipiv, double* b, lapack_int ldb ); +lapack_int LAPACKE_csysv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zsysv( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_ssysvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + float* af, lapack_int ldaf, lapack_int* ipiv, + const float* b, lapack_int ldb, float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr ); +lapack_int LAPACKE_dsysvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const double* a, lapack_int lda, + double* af, lapack_int ldaf, lapack_int* ipiv, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* rcond, double* ferr, + double* berr ); +lapack_int LAPACKE_csysvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, lapack_complex_float* af, + lapack_int ldaf, lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr ); +lapack_int LAPACKE_zsysvx( int matrix_order, char fact, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, lapack_complex_double* af, + lapack_int ldaf, lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr ); + +lapack_int LAPACKE_ssysvxx( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* s, float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_dsysvxx( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* s, double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params ); +lapack_int LAPACKE_csysvxx( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* s, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params ); +lapack_int LAPACKE_zsysvxx( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* s, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params ); + +lapack_int LAPACKE_ssytrd( int matrix_order, char uplo, lapack_int n, float* a, + lapack_int lda, float* d, float* e, float* tau ); +lapack_int LAPACKE_dsytrd( int matrix_order, char uplo, lapack_int n, double* a, + lapack_int lda, double* d, double* e, double* tau ); + +lapack_int LAPACKE_ssytrf( int matrix_order, char uplo, lapack_int n, float* a, + lapack_int lda, lapack_int* ipiv ); +lapack_int LAPACKE_dsytrf( int matrix_order, char uplo, lapack_int n, double* a, + lapack_int lda, lapack_int* ipiv ); +lapack_int LAPACKE_csytrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* ipiv ); +lapack_int LAPACKE_zsytrf( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv ); + +lapack_int LAPACKE_ssytri( int matrix_order, char uplo, lapack_int n, float* a, + lapack_int lda, const lapack_int* ipiv ); +lapack_int LAPACKE_dsytri( int matrix_order, char uplo, lapack_int n, double* a, + lapack_int lda, const lapack_int* ipiv ); +lapack_int LAPACKE_csytri( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv ); +lapack_int LAPACKE_zsytri( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv ); + +lapack_int LAPACKE_ssytrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const lapack_int* ipiv, float* b, lapack_int ldb ); +lapack_int LAPACKE_dsytrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* a, lapack_int lda, + const lapack_int* ipiv, double* b, lapack_int ldb ); +lapack_int LAPACKE_csytrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zsytrs( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_stbcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, lapack_int kd, const float* ab, + lapack_int ldab, float* rcond ); +lapack_int LAPACKE_dtbcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, lapack_int kd, const double* ab, + lapack_int ldab, double* rcond ); +lapack_int LAPACKE_ctbcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, lapack_int kd, + const lapack_complex_float* ab, lapack_int ldab, + float* rcond ); +lapack_int LAPACKE_ztbcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, lapack_int kd, + const lapack_complex_double* ab, lapack_int ldab, + double* rcond ); + +lapack_int LAPACKE_stbrfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int kd, lapack_int nrhs, + const float* ab, lapack_int ldab, const float* b, + lapack_int ldb, const float* x, lapack_int ldx, + float* ferr, float* berr ); +lapack_int LAPACKE_dtbrfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int kd, lapack_int nrhs, + const double* ab, lapack_int ldab, const double* b, + lapack_int ldb, const double* x, lapack_int ldx, + double* ferr, double* berr ); +lapack_int LAPACKE_ctbrfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int kd, lapack_int nrhs, + const lapack_complex_float* ab, lapack_int ldab, + const lapack_complex_float* b, lapack_int ldb, + const lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr ); +lapack_int LAPACKE_ztbrfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int kd, lapack_int nrhs, + const lapack_complex_double* ab, lapack_int ldab, + const lapack_complex_double* b, lapack_int ldb, + const lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_stbtrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int kd, lapack_int nrhs, + const float* ab, lapack_int ldab, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dtbtrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int kd, lapack_int nrhs, + const double* ab, lapack_int ldab, double* b, + lapack_int ldb ); +lapack_int LAPACKE_ctbtrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int kd, lapack_int nrhs, + const lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_ztbtrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int kd, lapack_int nrhs, + const lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_stfsm( int matrix_order, char transr, char side, char uplo, + char trans, char diag, lapack_int m, lapack_int n, + float alpha, const float* a, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dtfsm( int matrix_order, char transr, char side, char uplo, + char trans, char diag, lapack_int m, lapack_int n, + double alpha, const double* a, double* b, + lapack_int ldb ); +lapack_int LAPACKE_ctfsm( int matrix_order, char transr, char side, char uplo, + char trans, char diag, lapack_int m, lapack_int n, + lapack_complex_float alpha, + const lapack_complex_float* a, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_ztfsm( int matrix_order, char transr, char side, char uplo, + char trans, char diag, lapack_int m, lapack_int n, + lapack_complex_double alpha, + const lapack_complex_double* a, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_stftri( int matrix_order, char transr, char uplo, char diag, + lapack_int n, float* a ); +lapack_int LAPACKE_dtftri( int matrix_order, char transr, char uplo, char diag, + lapack_int n, double* a ); +lapack_int LAPACKE_ctftri( int matrix_order, char transr, char uplo, char diag, + lapack_int n, lapack_complex_float* a ); +lapack_int LAPACKE_ztftri( int matrix_order, char transr, char uplo, char diag, + lapack_int n, lapack_complex_double* a ); + +lapack_int LAPACKE_stfttp( int matrix_order, char transr, char uplo, + lapack_int n, const float* arf, float* ap ); +lapack_int LAPACKE_dtfttp( int matrix_order, char transr, char uplo, + lapack_int n, const double* arf, double* ap ); +lapack_int LAPACKE_ctfttp( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_float* arf, + lapack_complex_float* ap ); +lapack_int LAPACKE_ztfttp( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_double* arf, + lapack_complex_double* ap ); + +lapack_int LAPACKE_stfttr( int matrix_order, char transr, char uplo, + lapack_int n, const float* arf, float* a, + lapack_int lda ); +lapack_int LAPACKE_dtfttr( int matrix_order, char transr, char uplo, + lapack_int n, const double* arf, double* a, + lapack_int lda ); +lapack_int LAPACKE_ctfttr( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_float* arf, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_ztfttr( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_double* arf, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_stgevc( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + const float* s, lapack_int lds, const float* p, + lapack_int ldp, float* vl, lapack_int ldvl, + float* vr, lapack_int ldvr, lapack_int mm, + lapack_int* m ); +lapack_int LAPACKE_dtgevc( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + const double* s, lapack_int lds, const double* p, + lapack_int ldp, double* vl, lapack_int ldvl, + double* vr, lapack_int ldvr, lapack_int mm, + lapack_int* m ); +lapack_int LAPACKE_ctgevc( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_float* s, lapack_int lds, + const lapack_complex_float* p, lapack_int ldp, + lapack_complex_float* vl, lapack_int ldvl, + lapack_complex_float* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m ); +lapack_int LAPACKE_ztgevc( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_double* s, lapack_int lds, + const lapack_complex_double* p, lapack_int ldp, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m ); + +lapack_int LAPACKE_stgexc( int matrix_order, lapack_logical wantq, + lapack_logical wantz, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, float* q, + lapack_int ldq, float* z, lapack_int ldz, + lapack_int* ifst, lapack_int* ilst ); +lapack_int LAPACKE_dtgexc( int matrix_order, lapack_logical wantq, + lapack_logical wantz, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, double* q, + lapack_int ldq, double* z, lapack_int ldz, + lapack_int* ifst, lapack_int* ilst ); +lapack_int LAPACKE_ctgexc( int matrix_order, lapack_logical wantq, + lapack_logical wantz, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* z, lapack_int ldz, + lapack_int ifst, lapack_int ilst ); +lapack_int LAPACKE_ztgexc( int matrix_order, lapack_logical wantq, + lapack_logical wantz, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* z, lapack_int ldz, + lapack_int ifst, lapack_int ilst ); + +lapack_int LAPACKE_stgsen( int matrix_order, lapack_int ijob, + lapack_logical wantq, lapack_logical wantz, + const lapack_logical* select, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, + float* alphar, float* alphai, float* beta, float* q, + lapack_int ldq, float* z, lapack_int ldz, + lapack_int* m, float* pl, float* pr, float* dif ); +lapack_int LAPACKE_dtgsen( int matrix_order, lapack_int ijob, + lapack_logical wantq, lapack_logical wantz, + const lapack_logical* select, lapack_int n, + double* a, lapack_int lda, double* b, lapack_int ldb, + double* alphar, double* alphai, double* beta, + double* q, lapack_int ldq, double* z, lapack_int ldz, + lapack_int* m, double* pl, double* pr, double* dif ); +lapack_int LAPACKE_ctgsen( int matrix_order, lapack_int ijob, + lapack_logical wantq, lapack_logical wantz, + const lapack_logical* select, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* alpha, + lapack_complex_float* beta, lapack_complex_float* q, + lapack_int ldq, lapack_complex_float* z, + lapack_int ldz, lapack_int* m, float* pl, float* pr, + float* dif ); +lapack_int LAPACKE_ztgsen( int matrix_order, lapack_int ijob, + lapack_logical wantq, lapack_logical wantz, + const lapack_logical* select, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* z, lapack_int ldz, + lapack_int* m, double* pl, double* pr, double* dif ); + +lapack_int LAPACKE_stgsja( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int p, lapack_int n, + lapack_int k, lapack_int l, float* a, lapack_int lda, + float* b, lapack_int ldb, float tola, float tolb, + float* alpha, float* beta, float* u, lapack_int ldu, + float* v, lapack_int ldv, float* q, lapack_int ldq, + lapack_int* ncycle ); +lapack_int LAPACKE_dtgsja( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int p, lapack_int n, + lapack_int k, lapack_int l, double* a, + lapack_int lda, double* b, lapack_int ldb, + double tola, double tolb, double* alpha, + double* beta, double* u, lapack_int ldu, double* v, + lapack_int ldv, double* q, lapack_int ldq, + lapack_int* ncycle ); +lapack_int LAPACKE_ctgsja( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int p, lapack_int n, + lapack_int k, lapack_int l, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, float tola, float tolb, float* alpha, + float* beta, lapack_complex_float* u, lapack_int ldu, + lapack_complex_float* v, lapack_int ldv, + lapack_complex_float* q, lapack_int ldq, + lapack_int* ncycle ); +lapack_int LAPACKE_ztgsja( int matrix_order, char jobu, char jobv, char jobq, + lapack_int m, lapack_int p, lapack_int n, + lapack_int k, lapack_int l, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, double tola, double tolb, + double* alpha, double* beta, + lapack_complex_double* u, lapack_int ldu, + lapack_complex_double* v, lapack_int ldv, + lapack_complex_double* q, lapack_int ldq, + lapack_int* ncycle ); + +lapack_int LAPACKE_stgsna( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const float* a, lapack_int lda, const float* b, + lapack_int ldb, const float* vl, lapack_int ldvl, + const float* vr, lapack_int ldvr, float* s, + float* dif, lapack_int mm, lapack_int* m ); +lapack_int LAPACKE_dtgsna( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const double* a, lapack_int lda, const double* b, + lapack_int ldb, const double* vl, lapack_int ldvl, + const double* vr, lapack_int ldvr, double* s, + double* dif, lapack_int mm, lapack_int* m ); +lapack_int LAPACKE_ctgsna( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + const lapack_complex_float* vl, lapack_int ldvl, + const lapack_complex_float* vr, lapack_int ldvr, + float* s, float* dif, lapack_int mm, lapack_int* m ); +lapack_int LAPACKE_ztgsna( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* b, lapack_int ldb, + const lapack_complex_double* vl, lapack_int ldvl, + const lapack_complex_double* vr, lapack_int ldvr, + double* s, double* dif, lapack_int mm, + lapack_int* m ); + +lapack_int LAPACKE_stgsyl( int matrix_order, char trans, lapack_int ijob, + lapack_int m, lapack_int n, const float* a, + lapack_int lda, const float* b, lapack_int ldb, + float* c, lapack_int ldc, const float* d, + lapack_int ldd, const float* e, lapack_int lde, + float* f, lapack_int ldf, float* scale, float* dif ); +lapack_int LAPACKE_dtgsyl( int matrix_order, char trans, lapack_int ijob, + lapack_int m, lapack_int n, const double* a, + lapack_int lda, const double* b, lapack_int ldb, + double* c, lapack_int ldc, const double* d, + lapack_int ldd, const double* e, lapack_int lde, + double* f, lapack_int ldf, double* scale, + double* dif ); +lapack_int LAPACKE_ctgsyl( int matrix_order, char trans, lapack_int ijob, + lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* c, lapack_int ldc, + const lapack_complex_float* d, lapack_int ldd, + const lapack_complex_float* e, lapack_int lde, + lapack_complex_float* f, lapack_int ldf, + float* scale, float* dif ); +lapack_int LAPACKE_ztgsyl( int matrix_order, char trans, lapack_int ijob, + lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* c, lapack_int ldc, + const lapack_complex_double* d, lapack_int ldd, + const lapack_complex_double* e, lapack_int lde, + lapack_complex_double* f, lapack_int ldf, + double* scale, double* dif ); + +lapack_int LAPACKE_stpcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, const float* ap, float* rcond ); +lapack_int LAPACKE_dtpcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, const double* ap, double* rcond ); +lapack_int LAPACKE_ctpcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, const lapack_complex_float* ap, + float* rcond ); +lapack_int LAPACKE_ztpcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, const lapack_complex_double* ap, + double* rcond ); + +lapack_int LAPACKE_stprfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, const float* ap, + const float* b, lapack_int ldb, const float* x, + lapack_int ldx, float* ferr, float* berr ); +lapack_int LAPACKE_dtprfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, const double* ap, + const double* b, lapack_int ldb, const double* x, + lapack_int ldx, double* ferr, double* berr ); +lapack_int LAPACKE_ctprfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* ap, + const lapack_complex_float* b, lapack_int ldb, + const lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr ); +lapack_int LAPACKE_ztprfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* ap, + const lapack_complex_double* b, lapack_int ldb, + const lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_stptri( int matrix_order, char uplo, char diag, lapack_int n, + float* ap ); +lapack_int LAPACKE_dtptri( int matrix_order, char uplo, char diag, lapack_int n, + double* ap ); +lapack_int LAPACKE_ctptri( int matrix_order, char uplo, char diag, lapack_int n, + lapack_complex_float* ap ); +lapack_int LAPACKE_ztptri( int matrix_order, char uplo, char diag, lapack_int n, + lapack_complex_double* ap ); + +lapack_int LAPACKE_stptrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, const float* ap, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dtptrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, const double* ap, + double* b, lapack_int ldb ); +lapack_int LAPACKE_ctptrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* ap, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_ztptrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* ap, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_stpttf( int matrix_order, char transr, char uplo, + lapack_int n, const float* ap, float* arf ); +lapack_int LAPACKE_dtpttf( int matrix_order, char transr, char uplo, + lapack_int n, const double* ap, double* arf ); +lapack_int LAPACKE_ctpttf( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_float* ap, + lapack_complex_float* arf ); +lapack_int LAPACKE_ztpttf( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_double* ap, + lapack_complex_double* arf ); + +lapack_int LAPACKE_stpttr( int matrix_order, char uplo, lapack_int n, + const float* ap, float* a, lapack_int lda ); +lapack_int LAPACKE_dtpttr( int matrix_order, char uplo, lapack_int n, + const double* ap, double* a, lapack_int lda ); +lapack_int LAPACKE_ctpttr( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_ztpttr( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_strcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, const float* a, lapack_int lda, + float* rcond ); +lapack_int LAPACKE_dtrcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, const double* a, lapack_int lda, + double* rcond ); +lapack_int LAPACKE_ctrcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, const lapack_complex_float* a, + lapack_int lda, float* rcond ); +lapack_int LAPACKE_ztrcon( int matrix_order, char norm, char uplo, char diag, + lapack_int n, const lapack_complex_double* a, + lapack_int lda, double* rcond ); + +lapack_int LAPACKE_strevc( int matrix_order, char side, char howmny, + lapack_logical* select, lapack_int n, const float* t, + lapack_int ldt, float* vl, lapack_int ldvl, + float* vr, lapack_int ldvr, lapack_int mm, + lapack_int* m ); +lapack_int LAPACKE_dtrevc( int matrix_order, char side, char howmny, + lapack_logical* select, lapack_int n, + const double* t, lapack_int ldt, double* vl, + lapack_int ldvl, double* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m ); +lapack_int LAPACKE_ctrevc( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* vl, lapack_int ldvl, + lapack_complex_float* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m ); +lapack_int LAPACKE_ztrevc( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m ); + +lapack_int LAPACKE_strexc( int matrix_order, char compq, lapack_int n, float* t, + lapack_int ldt, float* q, lapack_int ldq, + lapack_int* ifst, lapack_int* ilst ); +lapack_int LAPACKE_dtrexc( int matrix_order, char compq, lapack_int n, + double* t, lapack_int ldt, double* q, lapack_int ldq, + lapack_int* ifst, lapack_int* ilst ); +lapack_int LAPACKE_ctrexc( int matrix_order, char compq, lapack_int n, + lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* q, lapack_int ldq, + lapack_int ifst, lapack_int ilst ); +lapack_int LAPACKE_ztrexc( int matrix_order, char compq, lapack_int n, + lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* q, lapack_int ldq, + lapack_int ifst, lapack_int ilst ); + +lapack_int LAPACKE_strrfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, const float* a, + lapack_int lda, const float* b, lapack_int ldb, + const float* x, lapack_int ldx, float* ferr, + float* berr ); +lapack_int LAPACKE_dtrrfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, const double* a, + lapack_int lda, const double* b, lapack_int ldb, + const double* x, lapack_int ldx, double* ferr, + double* berr ); +lapack_int LAPACKE_ctrrfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + const lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr ); +lapack_int LAPACKE_ztrrfs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* b, lapack_int ldb, + const lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr ); + +lapack_int LAPACKE_strsen( int matrix_order, char job, char compq, + const lapack_logical* select, lapack_int n, float* t, + lapack_int ldt, float* q, lapack_int ldq, float* wr, + float* wi, lapack_int* m, float* s, float* sep ); +lapack_int LAPACKE_dtrsen( int matrix_order, char job, char compq, + const lapack_logical* select, lapack_int n, + double* t, lapack_int ldt, double* q, lapack_int ldq, + double* wr, double* wi, lapack_int* m, double* s, + double* sep ); +lapack_int LAPACKE_ctrsen( int matrix_order, char job, char compq, + const lapack_logical* select, lapack_int n, + lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* w, lapack_int* m, float* s, + float* sep ); +lapack_int LAPACKE_ztrsen( int matrix_order, char job, char compq, + const lapack_logical* select, lapack_int n, + lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* w, lapack_int* m, double* s, + double* sep ); + +lapack_int LAPACKE_strsna( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const float* t, lapack_int ldt, const float* vl, + lapack_int ldvl, const float* vr, lapack_int ldvr, + float* s, float* sep, lapack_int mm, lapack_int* m ); +lapack_int LAPACKE_dtrsna( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const double* t, lapack_int ldt, const double* vl, + lapack_int ldvl, const double* vr, lapack_int ldvr, + double* s, double* sep, lapack_int mm, + lapack_int* m ); +lapack_int LAPACKE_ctrsna( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_float* t, lapack_int ldt, + const lapack_complex_float* vl, lapack_int ldvl, + const lapack_complex_float* vr, lapack_int ldvr, + float* s, float* sep, lapack_int mm, lapack_int* m ); +lapack_int LAPACKE_ztrsna( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_double* t, lapack_int ldt, + const lapack_complex_double* vl, lapack_int ldvl, + const lapack_complex_double* vr, lapack_int ldvr, + double* s, double* sep, lapack_int mm, + lapack_int* m ); + +lapack_int LAPACKE_strsyl( int matrix_order, char trana, char tranb, + lapack_int isgn, lapack_int m, lapack_int n, + const float* a, lapack_int lda, const float* b, + lapack_int ldb, float* c, lapack_int ldc, + float* scale ); +lapack_int LAPACKE_dtrsyl( int matrix_order, char trana, char tranb, + lapack_int isgn, lapack_int m, lapack_int n, + const double* a, lapack_int lda, const double* b, + lapack_int ldb, double* c, lapack_int ldc, + double* scale ); +lapack_int LAPACKE_ctrsyl( int matrix_order, char trana, char tranb, + lapack_int isgn, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* c, lapack_int ldc, + float* scale ); +lapack_int LAPACKE_ztrsyl( int matrix_order, char trana, char tranb, + lapack_int isgn, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* c, lapack_int ldc, + double* scale ); + +lapack_int LAPACKE_strtri( int matrix_order, char uplo, char diag, lapack_int n, + float* a, lapack_int lda ); +lapack_int LAPACKE_dtrtri( int matrix_order, char uplo, char diag, lapack_int n, + double* a, lapack_int lda ); +lapack_int LAPACKE_ctrtri( int matrix_order, char uplo, char diag, lapack_int n, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_ztrtri( int matrix_order, char uplo, char diag, lapack_int n, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_strtrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, const float* a, + lapack_int lda, float* b, lapack_int ldb ); +lapack_int LAPACKE_dtrtrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, const double* a, + lapack_int lda, double* b, lapack_int ldb ); +lapack_int LAPACKE_ctrtrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_ztrtrs( int matrix_order, char uplo, char trans, char diag, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_strttf( int matrix_order, char transr, char uplo, + lapack_int n, const float* a, lapack_int lda, + float* arf ); +lapack_int LAPACKE_dtrttf( int matrix_order, char transr, char uplo, + lapack_int n, const double* a, lapack_int lda, + double* arf ); +lapack_int LAPACKE_ctrttf( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_float* a, + lapack_int lda, lapack_complex_float* arf ); +lapack_int LAPACKE_ztrttf( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_double* a, + lapack_int lda, lapack_complex_double* arf ); + +lapack_int LAPACKE_strttp( int matrix_order, char uplo, lapack_int n, + const float* a, lapack_int lda, float* ap ); +lapack_int LAPACKE_dtrttp( int matrix_order, char uplo, lapack_int n, + const double* a, lapack_int lda, double* ap ); +lapack_int LAPACKE_ctrttp( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + lapack_complex_float* ap ); +lapack_int LAPACKE_ztrttp( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + lapack_complex_double* ap ); + +lapack_int LAPACKE_stzrzf( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau ); +lapack_int LAPACKE_dtzrzf( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau ); +lapack_int LAPACKE_ctzrzf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau ); +lapack_int LAPACKE_ztzrzf( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau ); + +lapack_int LAPACKE_cungbr( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int k, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau ); +lapack_int LAPACKE_zungbr( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int k, lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* tau ); + +lapack_int LAPACKE_cunghr( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau ); +lapack_int LAPACKE_zunghr( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* tau ); + +lapack_int LAPACKE_cunglq( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau ); +lapack_int LAPACKE_zunglq( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* tau ); + +lapack_int LAPACKE_cungql( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau ); +lapack_int LAPACKE_zungql( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* tau ); + +lapack_int LAPACKE_cungqr( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau ); +lapack_int LAPACKE_zungqr( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* tau ); + +lapack_int LAPACKE_cungrq( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau ); +lapack_int LAPACKE_zungrq( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* tau ); + +lapack_int LAPACKE_cungtr( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau ); +lapack_int LAPACKE_zungtr( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau ); + +lapack_int LAPACKE_cunmbr( int matrix_order, char vect, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc ); +lapack_int LAPACKE_zunmbr( int matrix_order, char vect, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc ); + +lapack_int LAPACKE_cunmhr( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int ilo, + lapack_int ihi, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc ); +lapack_int LAPACKE_zunmhr( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int ilo, + lapack_int ihi, const lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc ); + +lapack_int LAPACKE_cunmlq( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc ); +lapack_int LAPACKE_zunmlq( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc ); + +lapack_int LAPACKE_cunmql( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc ); +lapack_int LAPACKE_zunmql( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc ); + +lapack_int LAPACKE_cunmqr( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc ); +lapack_int LAPACKE_zunmqr( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc ); + +lapack_int LAPACKE_cunmrq( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc ); +lapack_int LAPACKE_zunmrq( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc ); + +lapack_int LAPACKE_cunmrz( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc ); +lapack_int LAPACKE_zunmrz( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, const lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc ); + +lapack_int LAPACKE_cunmtr( int matrix_order, char side, char uplo, char trans, + lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc ); +lapack_int LAPACKE_zunmtr( int matrix_order, char side, char uplo, char trans, + lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc ); + +lapack_int LAPACKE_cupgtr( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, + const lapack_complex_float* tau, + lapack_complex_float* q, lapack_int ldq ); +lapack_int LAPACKE_zupgtr( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, + const lapack_complex_double* tau, + lapack_complex_double* q, lapack_int ldq ); + +lapack_int LAPACKE_cupmtr( int matrix_order, char side, char uplo, char trans, + lapack_int m, lapack_int n, + const lapack_complex_float* ap, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc ); +lapack_int LAPACKE_zupmtr( int matrix_order, char side, char uplo, char trans, + lapack_int m, lapack_int n, + const lapack_complex_double* ap, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc ); + +lapack_int LAPACKE_sbdsdc_work( int matrix_order, char uplo, char compq, + lapack_int n, float* d, float* e, float* u, + lapack_int ldu, float* vt, lapack_int ldvt, + float* q, lapack_int* iq, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dbdsdc_work( int matrix_order, char uplo, char compq, + lapack_int n, double* d, double* e, double* u, + lapack_int ldu, double* vt, lapack_int ldvt, + double* q, lapack_int* iq, double* work, + lapack_int* iwork ); + +lapack_int LAPACKE_sbdsqr_work( int matrix_order, char uplo, lapack_int n, + lapack_int ncvt, lapack_int nru, lapack_int ncc, + float* d, float* e, float* vt, lapack_int ldvt, + float* u, lapack_int ldu, float* c, + lapack_int ldc, float* work ); +lapack_int LAPACKE_dbdsqr_work( int matrix_order, char uplo, lapack_int n, + lapack_int ncvt, lapack_int nru, lapack_int ncc, + double* d, double* e, double* vt, + lapack_int ldvt, double* u, lapack_int ldu, + double* c, lapack_int ldc, double* work ); +lapack_int LAPACKE_cbdsqr_work( int matrix_order, char uplo, lapack_int n, + lapack_int ncvt, lapack_int nru, lapack_int ncc, + float* d, float* e, lapack_complex_float* vt, + lapack_int ldvt, lapack_complex_float* u, + lapack_int ldu, lapack_complex_float* c, + lapack_int ldc, float* work ); +lapack_int LAPACKE_zbdsqr_work( int matrix_order, char uplo, lapack_int n, + lapack_int ncvt, lapack_int nru, lapack_int ncc, + double* d, double* e, lapack_complex_double* vt, + lapack_int ldvt, lapack_complex_double* u, + lapack_int ldu, lapack_complex_double* c, + lapack_int ldc, double* work ); + +lapack_int LAPACKE_sdisna_work( char job, lapack_int m, lapack_int n, + const float* d, float* sep ); +lapack_int LAPACKE_ddisna_work( char job, lapack_int m, lapack_int n, + const double* d, double* sep ); + +lapack_int LAPACKE_sgbbrd_work( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int ncc, lapack_int kl, + lapack_int ku, float* ab, lapack_int ldab, + float* d, float* e, float* q, lapack_int ldq, + float* pt, lapack_int ldpt, float* c, + lapack_int ldc, float* work ); +lapack_int LAPACKE_dgbbrd_work( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int ncc, lapack_int kl, + lapack_int ku, double* ab, lapack_int ldab, + double* d, double* e, double* q, lapack_int ldq, + double* pt, lapack_int ldpt, double* c, + lapack_int ldc, double* work ); +lapack_int LAPACKE_cgbbrd_work( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int ncc, lapack_int kl, + lapack_int ku, lapack_complex_float* ab, + lapack_int ldab, float* d, float* e, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* pt, lapack_int ldpt, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zgbbrd_work( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int ncc, lapack_int kl, + lapack_int ku, lapack_complex_double* ab, + lapack_int ldab, double* d, double* e, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* pt, lapack_int ldpt, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sgbcon_work( int matrix_order, char norm, lapack_int n, + lapack_int kl, lapack_int ku, const float* ab, + lapack_int ldab, const lapack_int* ipiv, + float anorm, float* rcond, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dgbcon_work( int matrix_order, char norm, lapack_int n, + lapack_int kl, lapack_int ku, const double* ab, + lapack_int ldab, const lapack_int* ipiv, + double anorm, double* rcond, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cgbcon_work( int matrix_order, char norm, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_float* ab, lapack_int ldab, + const lapack_int* ipiv, float anorm, + float* rcond, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zgbcon_work( int matrix_order, char norm, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_double* ab, + lapack_int ldab, const lapack_int* ipiv, + double anorm, double* rcond, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sgbequ_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const float* ab, + lapack_int ldab, float* r, float* c, + float* rowcnd, float* colcnd, float* amax ); +lapack_int LAPACKE_dgbequ_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const double* ab, + lapack_int ldab, double* r, double* c, + double* rowcnd, double* colcnd, double* amax ); +lapack_int LAPACKE_cgbequ_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_float* ab, lapack_int ldab, + float* r, float* c, float* rowcnd, + float* colcnd, float* amax ); +lapack_int LAPACKE_zgbequ_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_double* ab, + lapack_int ldab, double* r, double* c, + double* rowcnd, double* colcnd, double* amax ); + +lapack_int LAPACKE_sgbequb_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const float* ab, + lapack_int ldab, float* r, float* c, + float* rowcnd, float* colcnd, float* amax ); +lapack_int LAPACKE_dgbequb_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const double* ab, + lapack_int ldab, double* r, double* c, + double* rowcnd, double* colcnd, double* amax ); +lapack_int LAPACKE_cgbequb_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_float* ab, + lapack_int ldab, float* r, float* c, + float* rowcnd, float* colcnd, float* amax ); +lapack_int LAPACKE_zgbequb_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + const lapack_complex_double* ab, + lapack_int ldab, double* r, double* c, + double* rowcnd, double* colcnd, double* amax ); + +lapack_int LAPACKE_sgbrfs_work( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const float* ab, lapack_int ldab, + const float* afb, lapack_int ldafb, + const lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dgbrfs_work( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const double* ab, lapack_int ldab, + const double* afb, lapack_int ldafb, + const lapack_int* ipiv, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* ferr, double* berr, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cgbrfs_work( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const lapack_complex_float* ab, lapack_int ldab, + const lapack_complex_float* afb, + lapack_int ldafb, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zgbrfs_work( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const lapack_complex_double* ab, + lapack_int ldab, + const lapack_complex_double* afb, + lapack_int ldafb, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sgbrfsx_work( int matrix_order, char trans, char equed, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, const float* ab, + lapack_int ldab, const float* afb, + lapack_int ldafb, const lapack_int* ipiv, + const float* r, const float* c, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dgbrfsx_work( int matrix_order, char trans, char equed, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, const double* ab, + lapack_int ldab, const double* afb, + lapack_int ldafb, const lapack_int* ipiv, + const double* r, const double* c, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cgbrfsx_work( int matrix_order, char trans, char equed, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, + const lapack_complex_float* ab, + lapack_int ldab, + const lapack_complex_float* afb, + lapack_int ldafb, const lapack_int* ipiv, + const float* r, const float* c, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zgbrfsx_work( int matrix_order, char trans, char equed, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, + const lapack_complex_double* ab, + lapack_int ldab, + const lapack_complex_double* afb, + lapack_int ldafb, const lapack_int* ipiv, + const double* r, const double* c, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_sgbsv_work( int matrix_order, lapack_int n, lapack_int kl, + lapack_int ku, lapack_int nrhs, float* ab, + lapack_int ldab, lapack_int* ipiv, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dgbsv_work( int matrix_order, lapack_int n, lapack_int kl, + lapack_int ku, lapack_int nrhs, double* ab, + lapack_int ldab, lapack_int* ipiv, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cgbsv_work( int matrix_order, lapack_int n, lapack_int kl, + lapack_int ku, lapack_int nrhs, + lapack_complex_float* ab, lapack_int ldab, + lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zgbsv_work( int matrix_order, lapack_int n, lapack_int kl, + lapack_int ku, lapack_int nrhs, + lapack_complex_double* ab, lapack_int ldab, + lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_sgbsvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, float* ab, lapack_int ldab, + float* afb, lapack_int ldafb, lapack_int* ipiv, + char* equed, float* r, float* c, float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + float* work, lapack_int* iwork ); +lapack_int LAPACKE_dgbsvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, double* ab, lapack_int ldab, + double* afb, lapack_int ldafb, lapack_int* ipiv, + char* equed, double* r, double* c, double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_cgbsvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, lapack_complex_float* ab, + lapack_int ldab, lapack_complex_float* afb, + lapack_int ldafb, lapack_int* ipiv, char* equed, + float* r, float* c, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zgbsvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, lapack_complex_double* ab, + lapack_int ldab, lapack_complex_double* afb, + lapack_int ldafb, lapack_int* ipiv, char* equed, + double* r, double* c, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* rcond, double* ferr, + double* berr, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_sgbsvxx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, float* ab, lapack_int ldab, + float* afb, lapack_int ldafb, lapack_int* ipiv, + char* equed, float* r, float* c, float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dgbsvxx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, double* ab, lapack_int ldab, + double* afb, lapack_int ldafb, + lapack_int* ipiv, char* equed, double* r, + double* c, double* b, lapack_int ldb, + double* x, lapack_int ldx, double* rcond, + double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cgbsvxx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, lapack_complex_float* ab, + lapack_int ldab, lapack_complex_float* afb, + lapack_int ldafb, lapack_int* ipiv, + char* equed, float* r, float* c, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zgbsvxx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int kl, lapack_int ku, + lapack_int nrhs, lapack_complex_double* ab, + lapack_int ldab, lapack_complex_double* afb, + lapack_int ldafb, lapack_int* ipiv, + char* equed, double* r, double* c, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_sgbtrf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, float* ab, + lapack_int ldab, lapack_int* ipiv ); +lapack_int LAPACKE_dgbtrf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, double* ab, + lapack_int ldab, lapack_int* ipiv ); +lapack_int LAPACKE_cgbtrf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + lapack_complex_float* ab, lapack_int ldab, + lapack_int* ipiv ); +lapack_int LAPACKE_zgbtrf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, + lapack_complex_double* ab, lapack_int ldab, + lapack_int* ipiv ); + +lapack_int LAPACKE_sgbtrs_work( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const float* ab, lapack_int ldab, + const lapack_int* ipiv, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dgbtrs_work( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const double* ab, lapack_int ldab, + const lapack_int* ipiv, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cgbtrs_work( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const lapack_complex_float* ab, lapack_int ldab, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zgbtrs_work( int matrix_order, char trans, lapack_int n, + lapack_int kl, lapack_int ku, lapack_int nrhs, + const lapack_complex_double* ab, + lapack_int ldab, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_sgebak_work( int matrix_order, char job, char side, + lapack_int n, lapack_int ilo, lapack_int ihi, + const float* scale, lapack_int m, float* v, + lapack_int ldv ); +lapack_int LAPACKE_dgebak_work( int matrix_order, char job, char side, + lapack_int n, lapack_int ilo, lapack_int ihi, + const double* scale, lapack_int m, double* v, + lapack_int ldv ); +lapack_int LAPACKE_cgebak_work( int matrix_order, char job, char side, + lapack_int n, lapack_int ilo, lapack_int ihi, + const float* scale, lapack_int m, + lapack_complex_float* v, lapack_int ldv ); +lapack_int LAPACKE_zgebak_work( int matrix_order, char job, char side, + lapack_int n, lapack_int ilo, lapack_int ihi, + const double* scale, lapack_int m, + lapack_complex_double* v, lapack_int ldv ); + +lapack_int LAPACKE_sgebal_work( int matrix_order, char job, lapack_int n, + float* a, lapack_int lda, lapack_int* ilo, + lapack_int* ihi, float* scale ); +lapack_int LAPACKE_dgebal_work( int matrix_order, char job, lapack_int n, + double* a, lapack_int lda, lapack_int* ilo, + lapack_int* ihi, double* scale ); +lapack_int LAPACKE_cgebal_work( int matrix_order, char job, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* ilo, lapack_int* ihi, + float* scale ); +lapack_int LAPACKE_zgebal_work( int matrix_order, char job, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* ilo, lapack_int* ihi, + double* scale ); + +lapack_int LAPACKE_sgebrd_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* d, float* e, + float* tauq, float* taup, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dgebrd_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* d, double* e, + double* tauq, double* taup, double* work, + lapack_int lwork ); +lapack_int LAPACKE_cgebrd_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + float* d, float* e, lapack_complex_float* tauq, + lapack_complex_float* taup, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgebrd_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + double* d, double* e, + lapack_complex_double* tauq, + lapack_complex_double* taup, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sgecon_work( int matrix_order, char norm, lapack_int n, + const float* a, lapack_int lda, float anorm, + float* rcond, float* work, lapack_int* iwork ); +lapack_int LAPACKE_dgecon_work( int matrix_order, char norm, lapack_int n, + const double* a, lapack_int lda, double anorm, + double* rcond, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cgecon_work( int matrix_order, char norm, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float anorm, float* rcond, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zgecon_work( int matrix_order, char norm, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double anorm, double* rcond, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sgeequ_work( int matrix_order, lapack_int m, lapack_int n, + const float* a, lapack_int lda, float* r, + float* c, float* rowcnd, float* colcnd, + float* amax ); +lapack_int LAPACKE_dgeequ_work( int matrix_order, lapack_int m, lapack_int n, + const double* a, lapack_int lda, double* r, + double* c, double* rowcnd, double* colcnd, + double* amax ); +lapack_int LAPACKE_cgeequ_work( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* r, float* c, float* rowcnd, + float* colcnd, float* amax ); +lapack_int LAPACKE_zgeequ_work( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* r, double* c, double* rowcnd, + double* colcnd, double* amax ); + +lapack_int LAPACKE_sgeequb_work( int matrix_order, lapack_int m, lapack_int n, + const float* a, lapack_int lda, float* r, + float* c, float* rowcnd, float* colcnd, + float* amax ); +lapack_int LAPACKE_dgeequb_work( int matrix_order, lapack_int m, lapack_int n, + const double* a, lapack_int lda, double* r, + double* c, double* rowcnd, double* colcnd, + double* amax ); +lapack_int LAPACKE_cgeequb_work( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* r, float* c, float* rowcnd, + float* colcnd, float* amax ); +lapack_int LAPACKE_zgeequb_work( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* r, double* c, double* rowcnd, + double* colcnd, double* amax ); + +lapack_int LAPACKE_sgees_work( int matrix_order, char jobvs, char sort, + LAPACK_S_SELECT2 select, lapack_int n, float* a, + lapack_int lda, lapack_int* sdim, float* wr, + float* wi, float* vs, lapack_int ldvs, + float* work, lapack_int lwork, + lapack_logical* bwork ); +lapack_int LAPACKE_dgees_work( int matrix_order, char jobvs, char sort, + LAPACK_D_SELECT2 select, lapack_int n, double* a, + lapack_int lda, lapack_int* sdim, double* wr, + double* wi, double* vs, lapack_int ldvs, + double* work, lapack_int lwork, + lapack_logical* bwork ); +lapack_int LAPACKE_cgees_work( int matrix_order, char jobvs, char sort, + LAPACK_C_SELECT1 select, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* sdim, lapack_complex_float* w, + lapack_complex_float* vs, lapack_int ldvs, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_logical* bwork ); +lapack_int LAPACKE_zgees_work( int matrix_order, char jobvs, char sort, + LAPACK_Z_SELECT1 select, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* sdim, lapack_complex_double* w, + lapack_complex_double* vs, lapack_int ldvs, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_logical* bwork ); + +lapack_int LAPACKE_sgeesx_work( int matrix_order, char jobvs, char sort, + LAPACK_S_SELECT2 select, char sense, + lapack_int n, float* a, lapack_int lda, + lapack_int* sdim, float* wr, float* wi, + float* vs, lapack_int ldvs, float* rconde, + float* rcondv, float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork, + lapack_logical* bwork ); +lapack_int LAPACKE_dgeesx_work( int matrix_order, char jobvs, char sort, + LAPACK_D_SELECT2 select, char sense, + lapack_int n, double* a, lapack_int lda, + lapack_int* sdim, double* wr, double* wi, + double* vs, lapack_int ldvs, double* rconde, + double* rcondv, double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork, + lapack_logical* bwork ); +lapack_int LAPACKE_cgeesx_work( int matrix_order, char jobvs, char sort, + LAPACK_C_SELECT1 select, char sense, + lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_int* sdim, + lapack_complex_float* w, + lapack_complex_float* vs, lapack_int ldvs, + float* rconde, float* rcondv, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_logical* bwork ); +lapack_int LAPACKE_zgeesx_work( int matrix_order, char jobvs, char sort, + LAPACK_Z_SELECT1 select, char sense, + lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_int* sdim, + lapack_complex_double* w, + lapack_complex_double* vs, lapack_int ldvs, + double* rconde, double* rcondv, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_logical* bwork ); + +lapack_int LAPACKE_sgeev_work( int matrix_order, char jobvl, char jobvr, + lapack_int n, float* a, lapack_int lda, + float* wr, float* wi, float* vl, lapack_int ldvl, + float* vr, lapack_int ldvr, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dgeev_work( int matrix_order, char jobvl, char jobvr, + lapack_int n, double* a, lapack_int lda, + double* wr, double* wi, double* vl, + lapack_int ldvl, double* vr, lapack_int ldvr, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cgeev_work( int matrix_order, char jobvl, char jobvr, + lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* w, + lapack_complex_float* vl, lapack_int ldvl, + lapack_complex_float* vr, lapack_int ldvr, + lapack_complex_float* work, lapack_int lwork, + float* rwork ); +lapack_int LAPACKE_zgeev_work( int matrix_order, char jobvl, char jobvr, + lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* w, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr, + lapack_complex_double* work, lapack_int lwork, + double* rwork ); + +lapack_int LAPACKE_sgeevx_work( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, float* a, + lapack_int lda, float* wr, float* wi, float* vl, + lapack_int ldvl, float* vr, lapack_int ldvr, + lapack_int* ilo, lapack_int* ihi, float* scale, + float* abnrm, float* rconde, float* rcondv, + float* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_dgeevx_work( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, double* a, + lapack_int lda, double* wr, double* wi, + double* vl, lapack_int ldvl, double* vr, + lapack_int ldvr, lapack_int* ilo, + lapack_int* ihi, double* scale, double* abnrm, + double* rconde, double* rcondv, double* work, + lapack_int lwork, lapack_int* iwork ); +lapack_int LAPACKE_cgeevx_work( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* w, + lapack_complex_float* vl, lapack_int ldvl, + lapack_complex_float* vr, lapack_int ldvr, + lapack_int* ilo, lapack_int* ihi, float* scale, + float* abnrm, float* rconde, float* rcondv, + lapack_complex_float* work, lapack_int lwork, + float* rwork ); +lapack_int LAPACKE_zgeevx_work( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* w, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr, + lapack_int* ilo, lapack_int* ihi, double* scale, + double* abnrm, double* rconde, double* rcondv, + lapack_complex_double* work, lapack_int lwork, + double* rwork ); + +lapack_int LAPACKE_sgehrd_work( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, float* a, lapack_int lda, + float* tau, float* work, lapack_int lwork ); +lapack_int LAPACKE_dgehrd_work( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, double* a, lapack_int lda, + double* tau, double* work, lapack_int lwork ); +lapack_int LAPACKE_cgehrd_work( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgehrd_work( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sgejsv_work( int matrix_order, char joba, char jobu, + char jobv, char jobr, char jobt, char jobp, + lapack_int m, lapack_int n, float* a, + lapack_int lda, float* sva, float* u, + lapack_int ldu, float* v, lapack_int ldv, + float* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_dgejsv_work( int matrix_order, char joba, char jobu, + char jobv, char jobr, char jobt, char jobp, + lapack_int m, lapack_int n, double* a, + lapack_int lda, double* sva, double* u, + lapack_int ldu, double* v, lapack_int ldv, + double* work, lapack_int lwork, + lapack_int* iwork ); + +lapack_int LAPACKE_sgelq2_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau, + float* work ); +lapack_int LAPACKE_dgelq2_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau, + double* work ); +lapack_int LAPACKE_cgelq2_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau, + lapack_complex_float* work ); +lapack_int LAPACKE_zgelq2_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau, + lapack_complex_double* work ); + +lapack_int LAPACKE_sgelqf_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dgelqf_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cgelqf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgelqf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sgels_work( int matrix_order, char trans, lapack_int m, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* b, lapack_int ldb, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dgels_work( int matrix_order, char trans, lapack_int m, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* b, lapack_int ldb, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cgels_work( int matrix_order, char trans, lapack_int m, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgels_work( int matrix_order, char trans, lapack_int m, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sgelsd_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* b, lapack_int ldb, float* s, float rcond, + lapack_int* rank, float* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_dgelsd_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* b, lapack_int ldb, double* s, + double rcond, lapack_int* rank, double* work, + lapack_int lwork, lapack_int* iwork ); +lapack_int LAPACKE_cgelsd_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, float* s, float rcond, + lapack_int* rank, lapack_complex_float* work, + lapack_int lwork, float* rwork, + lapack_int* iwork ); +lapack_int LAPACKE_zgelsd_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, double* s, double rcond, + lapack_int* rank, lapack_complex_double* work, + lapack_int lwork, double* rwork, + lapack_int* iwork ); + +lapack_int LAPACKE_sgelss_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* b, lapack_int ldb, float* s, float rcond, + lapack_int* rank, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dgelss_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* b, lapack_int ldb, double* s, + double rcond, lapack_int* rank, double* work, + lapack_int lwork ); +lapack_int LAPACKE_cgelss_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, float* s, float rcond, + lapack_int* rank, lapack_complex_float* work, + lapack_int lwork, float* rwork ); +lapack_int LAPACKE_zgelss_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, double* s, double rcond, + lapack_int* rank, lapack_complex_double* work, + lapack_int lwork, double* rwork ); + +lapack_int LAPACKE_sgelsy_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* b, lapack_int ldb, lapack_int* jpvt, + float rcond, lapack_int* rank, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dgelsy_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* b, lapack_int ldb, lapack_int* jpvt, + double rcond, lapack_int* rank, double* work, + lapack_int lwork ); +lapack_int LAPACKE_cgelsy_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, lapack_int* jpvt, float rcond, + lapack_int* rank, lapack_complex_float* work, + lapack_int lwork, float* rwork ); +lapack_int LAPACKE_zgelsy_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_int* jpvt, double rcond, + lapack_int* rank, lapack_complex_double* work, + lapack_int lwork, double* rwork ); + +lapack_int LAPACKE_sgeqlf_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dgeqlf_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cgeqlf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgeqlf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sgeqp3_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, lapack_int* jpvt, + float* tau, float* work, lapack_int lwork ); +lapack_int LAPACKE_dgeqp3_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, lapack_int* jpvt, + double* tau, double* work, lapack_int lwork ); +lapack_int LAPACKE_cgeqp3_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* jpvt, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork, + float* rwork ); +lapack_int LAPACKE_zgeqp3_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* jpvt, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork, + double* rwork ); + +lapack_int LAPACKE_sgeqpf_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, lapack_int* jpvt, + float* tau, float* work ); +lapack_int LAPACKE_dgeqpf_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, lapack_int* jpvt, + double* tau, double* work ); +lapack_int LAPACKE_cgeqpf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* jpvt, lapack_complex_float* tau, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zgeqpf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* jpvt, lapack_complex_double* tau, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sgeqr2_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau, + float* work ); +lapack_int LAPACKE_dgeqr2_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau, + double* work ); +lapack_int LAPACKE_cgeqr2_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau, + lapack_complex_float* work ); +lapack_int LAPACKE_zgeqr2_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau, + lapack_complex_double* work ); + +lapack_int LAPACKE_sgeqrf_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dgeqrf_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cgeqrf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgeqrf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sgeqrfp_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dgeqrfp_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cgeqrfp_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgeqrfp_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau, + lapack_complex_double* work, + lapack_int lwork ); + +lapack_int LAPACKE_sgerfs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const float* af, lapack_int ldaf, + const lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dgerfs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const double* a, + lapack_int lda, const double* af, + lapack_int ldaf, const lapack_int* ipiv, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* ferr, double* berr, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_cgerfs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zgerfs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sgerfsx_work( int matrix_order, char trans, char equed, + lapack_int n, lapack_int nrhs, const float* a, + lapack_int lda, const float* af, + lapack_int ldaf, const lapack_int* ipiv, + const float* r, const float* c, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dgerfsx_work( int matrix_order, char trans, char equed, + lapack_int n, lapack_int nrhs, const double* a, + lapack_int lda, const double* af, + lapack_int ldaf, const lapack_int* ipiv, + const double* r, const double* c, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cgerfsx_work( int matrix_order, char trans, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* af, + lapack_int ldaf, const lapack_int* ipiv, + const float* r, const float* c, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zgerfsx_work( int matrix_order, char trans, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* af, + lapack_int ldaf, const lapack_int* ipiv, + const double* r, const double* c, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_sgerqf_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dgerqf_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cgerqf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgerqf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sgesdd_work( int matrix_order, char jobz, lapack_int m, + lapack_int n, float* a, lapack_int lda, + float* s, float* u, lapack_int ldu, float* vt, + lapack_int ldvt, float* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_dgesdd_work( int matrix_order, char jobz, lapack_int m, + lapack_int n, double* a, lapack_int lda, + double* s, double* u, lapack_int ldu, + double* vt, lapack_int ldvt, double* work, + lapack_int lwork, lapack_int* iwork ); +lapack_int LAPACKE_cgesdd_work( int matrix_order, char jobz, lapack_int m, + lapack_int n, lapack_complex_float* a, + lapack_int lda, float* s, + lapack_complex_float* u, lapack_int ldu, + lapack_complex_float* vt, lapack_int ldvt, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_int* iwork ); +lapack_int LAPACKE_zgesdd_work( int matrix_order, char jobz, lapack_int m, + lapack_int n, lapack_complex_double* a, + lapack_int lda, double* s, + lapack_complex_double* u, lapack_int ldu, + lapack_complex_double* vt, lapack_int ldvt, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_int* iwork ); + +lapack_int LAPACKE_sgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, + float* a, lapack_int lda, lapack_int* ipiv, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, + double* a, lapack_int lda, lapack_int* ipiv, + double* b, lapack_int ldb ); +lapack_int LAPACKE_cgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); +lapack_int LAPACKE_dsgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, + double* a, lapack_int lda, lapack_int* ipiv, + double* b, lapack_int ldb, double* x, + lapack_int ldx, double* work, float* swork, + lapack_int* iter ); +lapack_int LAPACKE_zcgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, lapack_complex_double* work, + lapack_complex_float* swork, double* rwork, + lapack_int* iter ); + +lapack_int LAPACKE_sgesvd_work( int matrix_order, char jobu, char jobvt, + lapack_int m, lapack_int n, float* a, + lapack_int lda, float* s, float* u, + lapack_int ldu, float* vt, lapack_int ldvt, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dgesvd_work( int matrix_order, char jobu, char jobvt, + lapack_int m, lapack_int n, double* a, + lapack_int lda, double* s, double* u, + lapack_int ldu, double* vt, lapack_int ldvt, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cgesvd_work( int matrix_order, char jobu, char jobvt, + lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + float* s, lapack_complex_float* u, + lapack_int ldu, lapack_complex_float* vt, + lapack_int ldvt, lapack_complex_float* work, + lapack_int lwork, float* rwork ); +lapack_int LAPACKE_zgesvd_work( int matrix_order, char jobu, char jobvt, + lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + double* s, lapack_complex_double* u, + lapack_int ldu, lapack_complex_double* vt, + lapack_int ldvt, lapack_complex_double* work, + lapack_int lwork, double* rwork ); + +lapack_int LAPACKE_sgesvj_work( int matrix_order, char joba, char jobu, + char jobv, lapack_int m, lapack_int n, float* a, + lapack_int lda, float* sva, lapack_int mv, + float* v, lapack_int ldv, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dgesvj_work( int matrix_order, char joba, char jobu, + char jobv, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* sva, + lapack_int mv, double* v, lapack_int ldv, + double* work, lapack_int lwork ); + +lapack_int LAPACKE_sgesvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* r, + float* c, float* b, lapack_int ldb, float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr, float* work, lapack_int* iwork ); +lapack_int LAPACKE_dgesvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* r, + double* c, double* b, lapack_int ldb, double* x, + lapack_int ldx, double* rcond, double* ferr, + double* berr, double* work, lapack_int* iwork ); +lapack_int LAPACKE_cgesvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* r, + float* c, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zgesvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* r, + double* c, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* rcond, double* ferr, + double* berr, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_sgesvxx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* r, + float* c, float* b, lapack_int ldb, float* x, + lapack_int ldx, float* rcond, float* rpvgrw, + float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dgesvxx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* r, + double* c, double* b, lapack_int ldb, + double* x, lapack_int ldx, double* rcond, + double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cgesvxx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* r, + float* c, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* rpvgrw, + float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zgesvxx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* r, + double* c, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* rcond, double* rpvgrw, + double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sgetf2_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, lapack_int* ipiv ); +lapack_int LAPACKE_dgetf2_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, lapack_int* ipiv ); +lapack_int LAPACKE_cgetf2_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* ipiv ); +lapack_int LAPACKE_zgetf2_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv ); + +lapack_int LAPACKE_sgetrf_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, lapack_int* ipiv ); +lapack_int LAPACKE_dgetrf_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, lapack_int* ipiv ); +lapack_int LAPACKE_cgetrf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* ipiv ); +lapack_int LAPACKE_zgetrf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv ); + +lapack_int LAPACKE_sgetri_work( int matrix_order, lapack_int n, float* a, + lapack_int lda, const lapack_int* ipiv, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dgetri_work( int matrix_order, lapack_int n, double* a, + lapack_int lda, const lapack_int* ipiv, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cgetri_work( int matrix_order, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgetri_work( int matrix_order, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sgetrs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const lapack_int* ipiv, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dgetrs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const double* a, + lapack_int lda, const lapack_int* ipiv, + double* b, lapack_int ldb ); +lapack_int LAPACKE_cgetrs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zgetrs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_sggbak_work( int matrix_order, char job, char side, + lapack_int n, lapack_int ilo, lapack_int ihi, + const float* lscale, const float* rscale, + lapack_int m, float* v, lapack_int ldv ); +lapack_int LAPACKE_dggbak_work( int matrix_order, char job, char side, + lapack_int n, lapack_int ilo, lapack_int ihi, + const double* lscale, const double* rscale, + lapack_int m, double* v, lapack_int ldv ); +lapack_int LAPACKE_cggbak_work( int matrix_order, char job, char side, + lapack_int n, lapack_int ilo, lapack_int ihi, + const float* lscale, const float* rscale, + lapack_int m, lapack_complex_float* v, + lapack_int ldv ); +lapack_int LAPACKE_zggbak_work( int matrix_order, char job, char side, + lapack_int n, lapack_int ilo, lapack_int ihi, + const double* lscale, const double* rscale, + lapack_int m, lapack_complex_double* v, + lapack_int ldv ); + +lapack_int LAPACKE_sggbal_work( int matrix_order, char job, lapack_int n, + float* a, lapack_int lda, float* b, + lapack_int ldb, lapack_int* ilo, + lapack_int* ihi, float* lscale, float* rscale, + float* work ); +lapack_int LAPACKE_dggbal_work( int matrix_order, char job, lapack_int n, + double* a, lapack_int lda, double* b, + lapack_int ldb, lapack_int* ilo, + lapack_int* ihi, double* lscale, double* rscale, + double* work ); +lapack_int LAPACKE_cggbal_work( int matrix_order, char job, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_int* ilo, lapack_int* ihi, float* lscale, + float* rscale, float* work ); +lapack_int LAPACKE_zggbal_work( int matrix_order, char job, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_int* ilo, lapack_int* ihi, + double* lscale, double* rscale, double* work ); + +lapack_int LAPACKE_sgges_work( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_S_SELECT3 selctg, lapack_int n, + float* a, lapack_int lda, float* b, + lapack_int ldb, lapack_int* sdim, float* alphar, + float* alphai, float* beta, float* vsl, + lapack_int ldvsl, float* vsr, lapack_int ldvsr, + float* work, lapack_int lwork, + lapack_logical* bwork ); +lapack_int LAPACKE_dgges_work( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_D_SELECT3 selctg, lapack_int n, + double* a, lapack_int lda, double* b, + lapack_int ldb, lapack_int* sdim, double* alphar, + double* alphai, double* beta, double* vsl, + lapack_int ldvsl, double* vsr, lapack_int ldvsr, + double* work, lapack_int lwork, + lapack_logical* bwork ); +lapack_int LAPACKE_cgges_work( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_C_SELECT2 selctg, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_int* sdim, lapack_complex_float* alpha, + lapack_complex_float* beta, + lapack_complex_float* vsl, lapack_int ldvsl, + lapack_complex_float* vsr, lapack_int ldvsr, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_logical* bwork ); +lapack_int LAPACKE_zgges_work( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_Z_SELECT2 selctg, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_int* sdim, lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* vsl, lapack_int ldvsl, + lapack_complex_double* vsr, lapack_int ldvsr, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_logical* bwork ); + +lapack_int LAPACKE_sggesx_work( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_S_SELECT3 selctg, char sense, + lapack_int n, float* a, lapack_int lda, + float* b, lapack_int ldb, lapack_int* sdim, + float* alphar, float* alphai, float* beta, + float* vsl, lapack_int ldvsl, float* vsr, + lapack_int ldvsr, float* rconde, float* rcondv, + float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork, + lapack_logical* bwork ); +lapack_int LAPACKE_dggesx_work( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_D_SELECT3 selctg, char sense, + lapack_int n, double* a, lapack_int lda, + double* b, lapack_int ldb, lapack_int* sdim, + double* alphar, double* alphai, double* beta, + double* vsl, lapack_int ldvsl, double* vsr, + lapack_int ldvsr, double* rconde, + double* rcondv, double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork, + lapack_logical* bwork ); +lapack_int LAPACKE_cggesx_work( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_C_SELECT2 selctg, char sense, + lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, lapack_int* sdim, + lapack_complex_float* alpha, + lapack_complex_float* beta, + lapack_complex_float* vsl, lapack_int ldvsl, + lapack_complex_float* vsr, lapack_int ldvsr, + float* rconde, float* rcondv, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_int* iwork, + lapack_int liwork, lapack_logical* bwork ); +lapack_int LAPACKE_zggesx_work( int matrix_order, char jobvsl, char jobvsr, + char sort, LAPACK_Z_SELECT2 selctg, char sense, + lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_int* sdim, + lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* vsl, lapack_int ldvsl, + lapack_complex_double* vsr, lapack_int ldvsr, + double* rconde, double* rcondv, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_int* iwork, + lapack_int liwork, lapack_logical* bwork ); + +lapack_int LAPACKE_sggev_work( int matrix_order, char jobvl, char jobvr, + lapack_int n, float* a, lapack_int lda, float* b, + lapack_int ldb, float* alphar, float* alphai, + float* beta, float* vl, lapack_int ldvl, + float* vr, lapack_int ldvr, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dggev_work( int matrix_order, char jobvl, char jobvr, + lapack_int n, double* a, lapack_int lda, + double* b, lapack_int ldb, double* alphar, + double* alphai, double* beta, double* vl, + lapack_int ldvl, double* vr, lapack_int ldvr, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cggev_work( int matrix_order, char jobvl, char jobvr, + lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* alpha, + lapack_complex_float* beta, + lapack_complex_float* vl, lapack_int ldvl, + lapack_complex_float* vr, lapack_int ldvr, + lapack_complex_float* work, lapack_int lwork, + float* rwork ); +lapack_int LAPACKE_zggev_work( int matrix_order, char jobvl, char jobvr, + lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr, + lapack_complex_double* work, lapack_int lwork, + double* rwork ); + +lapack_int LAPACKE_sggevx_work( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, + float* alphar, float* alphai, float* beta, + float* vl, lapack_int ldvl, float* vr, + lapack_int ldvr, lapack_int* ilo, + lapack_int* ihi, float* lscale, float* rscale, + float* abnrm, float* bbnrm, float* rconde, + float* rcondv, float* work, lapack_int lwork, + lapack_int* iwork, lapack_logical* bwork ); +lapack_int LAPACKE_dggevx_work( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, + double* alphar, double* alphai, double* beta, + double* vl, lapack_int ldvl, double* vr, + lapack_int ldvr, lapack_int* ilo, + lapack_int* ihi, double* lscale, double* rscale, + double* abnrm, double* bbnrm, double* rconde, + double* rcondv, double* work, lapack_int lwork, + lapack_int* iwork, lapack_logical* bwork ); +lapack_int LAPACKE_cggevx_work( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* alpha, + lapack_complex_float* beta, + lapack_complex_float* vl, lapack_int ldvl, + lapack_complex_float* vr, lapack_int ldvr, + lapack_int* ilo, lapack_int* ihi, float* lscale, + float* rscale, float* abnrm, float* bbnrm, + float* rconde, float* rcondv, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_int* iwork, + lapack_logical* bwork ); +lapack_int LAPACKE_zggevx_work( int matrix_order, char balanc, char jobvl, + char jobvr, char sense, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr, + lapack_int* ilo, lapack_int* ihi, + double* lscale, double* rscale, double* abnrm, + double* bbnrm, double* rconde, double* rcondv, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_int* iwork, + lapack_logical* bwork ); + +lapack_int LAPACKE_sggglm_work( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, float* a, lapack_int lda, + float* b, lapack_int ldb, float* d, float* x, + float* y, float* work, lapack_int lwork ); +lapack_int LAPACKE_dggglm_work( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, double* a, lapack_int lda, + double* b, lapack_int ldb, double* d, double* x, + double* y, double* work, lapack_int lwork ); +lapack_int LAPACKE_cggglm_work( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* d, + lapack_complex_float* x, + lapack_complex_float* y, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zggglm_work( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* d, + lapack_complex_double* x, + lapack_complex_double* y, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sgghrd_work( int matrix_order, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + float* a, lapack_int lda, float* b, + lapack_int ldb, float* q, lapack_int ldq, + float* z, lapack_int ldz ); +lapack_int LAPACKE_dgghrd_work( int matrix_order, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + double* a, lapack_int lda, double* b, + lapack_int ldb, double* q, lapack_int ldq, + double* z, lapack_int ldz ); +lapack_int LAPACKE_cgghrd_work( int matrix_order, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* z, lapack_int ldz ); +lapack_int LAPACKE_zgghrd_work( int matrix_order, char compq, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* z, lapack_int ldz ); + +lapack_int LAPACKE_sgglse_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int p, float* a, lapack_int lda, + float* b, lapack_int ldb, float* c, float* d, + float* x, float* work, lapack_int lwork ); +lapack_int LAPACKE_dgglse_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int p, double* a, lapack_int lda, + double* b, lapack_int ldb, double* c, double* d, + double* x, double* work, lapack_int lwork ); +lapack_int LAPACKE_cgglse_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int p, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* c, + lapack_complex_float* d, + lapack_complex_float* x, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgglse_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int p, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* c, + lapack_complex_double* d, + lapack_complex_double* x, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sggqrf_work( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, float* a, lapack_int lda, + float* taua, float* b, lapack_int ldb, + float* taub, float* work, lapack_int lwork ); +lapack_int LAPACKE_dggqrf_work( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, double* a, lapack_int lda, + double* taua, double* b, lapack_int ldb, + double* taub, double* work, lapack_int lwork ); +lapack_int LAPACKE_cggqrf_work( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* taua, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* taub, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zggqrf_work( int matrix_order, lapack_int n, lapack_int m, + lapack_int p, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* taua, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* taub, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sggrqf_work( int matrix_order, lapack_int m, lapack_int p, + lapack_int n, float* a, lapack_int lda, + float* taua, float* b, lapack_int ldb, + float* taub, float* work, lapack_int lwork ); +lapack_int LAPACKE_dggrqf_work( int matrix_order, lapack_int m, lapack_int p, + lapack_int n, double* a, lapack_int lda, + double* taua, double* b, lapack_int ldb, + double* taub, double* work, lapack_int lwork ); +lapack_int LAPACKE_cggrqf_work( int matrix_order, lapack_int m, lapack_int p, + lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* taua, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* taub, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zggrqf_work( int matrix_order, lapack_int m, lapack_int p, + lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* taua, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* taub, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_sggsvd_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int n, + lapack_int p, lapack_int* k, lapack_int* l, + float* a, lapack_int lda, float* b, + lapack_int ldb, float* alpha, float* beta, + float* u, lapack_int ldu, float* v, + lapack_int ldv, float* q, lapack_int ldq, + float* work, lapack_int* iwork ); +lapack_int LAPACKE_dggsvd_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int n, + lapack_int p, lapack_int* k, lapack_int* l, + double* a, lapack_int lda, double* b, + lapack_int ldb, double* alpha, double* beta, + double* u, lapack_int ldu, double* v, + lapack_int ldv, double* q, lapack_int ldq, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_cggsvd_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int n, + lapack_int p, lapack_int* k, lapack_int* l, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + float* alpha, float* beta, + lapack_complex_float* u, lapack_int ldu, + lapack_complex_float* v, lapack_int ldv, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* work, float* rwork, + lapack_int* iwork ); +lapack_int LAPACKE_zggsvd_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int n, + lapack_int p, lapack_int* k, lapack_int* l, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + double* alpha, double* beta, + lapack_complex_double* u, lapack_int ldu, + lapack_complex_double* v, lapack_int ldv, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* work, double* rwork, + lapack_int* iwork ); + +lapack_int LAPACKE_sggsvp_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int p, + lapack_int n, float* a, lapack_int lda, + float* b, lapack_int ldb, float tola, + float tolb, lapack_int* k, lapack_int* l, + float* u, lapack_int ldu, float* v, + lapack_int ldv, float* q, lapack_int ldq, + lapack_int* iwork, float* tau, float* work ); +lapack_int LAPACKE_dggsvp_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int p, + lapack_int n, double* a, lapack_int lda, + double* b, lapack_int ldb, double tola, + double tolb, lapack_int* k, lapack_int* l, + double* u, lapack_int ldu, double* v, + lapack_int ldv, double* q, lapack_int ldq, + lapack_int* iwork, double* tau, double* work ); +lapack_int LAPACKE_cggsvp_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int p, + lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, float tola, float tolb, + lapack_int* k, lapack_int* l, + lapack_complex_float* u, lapack_int ldu, + lapack_complex_float* v, lapack_int ldv, + lapack_complex_float* q, lapack_int ldq, + lapack_int* iwork, float* rwork, + lapack_complex_float* tau, + lapack_complex_float* work ); +lapack_int LAPACKE_zggsvp_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int p, + lapack_int n, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, double tola, double tolb, + lapack_int* k, lapack_int* l, + lapack_complex_double* u, lapack_int ldu, + lapack_complex_double* v, lapack_int ldv, + lapack_complex_double* q, lapack_int ldq, + lapack_int* iwork, double* rwork, + lapack_complex_double* tau, + lapack_complex_double* work ); + +lapack_int LAPACKE_sgtcon_work( char norm, lapack_int n, const float* dl, + const float* d, const float* du, + const float* du2, const lapack_int* ipiv, + float anorm, float* rcond, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dgtcon_work( char norm, lapack_int n, const double* dl, + const double* d, const double* du, + const double* du2, const lapack_int* ipiv, + double anorm, double* rcond, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cgtcon_work( char norm, lapack_int n, + const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + const lapack_complex_float* du2, + const lapack_int* ipiv, float anorm, + float* rcond, lapack_complex_float* work ); +lapack_int LAPACKE_zgtcon_work( char norm, lapack_int n, + const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + const lapack_complex_double* du2, + const lapack_int* ipiv, double anorm, + double* rcond, lapack_complex_double* work ); + +lapack_int LAPACKE_sgtrfs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const float* dl, + const float* d, const float* du, + const float* dlf, const float* df, + const float* duf, const float* du2, + const lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dgtrfs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const double* dl, + const double* d, const double* du, + const double* dlf, const double* df, + const double* duf, const double* du2, + const lapack_int* ipiv, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* ferr, double* berr, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cgtrfs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + const lapack_complex_float* dlf, + const lapack_complex_float* df, + const lapack_complex_float* duf, + const lapack_complex_float* du2, + const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zgtrfs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, + const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + const lapack_complex_double* dlf, + const lapack_complex_double* df, + const lapack_complex_double* duf, + const lapack_complex_double* du2, + const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs, + float* dl, float* d, float* du, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs, + double* dl, double* d, double* du, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs, + lapack_complex_float* dl, + lapack_complex_float* d, + lapack_complex_float* du, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs, + lapack_complex_double* dl, + lapack_complex_double* d, + lapack_complex_double* du, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_sgtsvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, const float* dl, + const float* d, const float* du, float* dlf, + float* df, float* duf, float* du2, + lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + float* work, lapack_int* iwork ); +lapack_int LAPACKE_dgtsvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, const double* dl, + const double* d, const double* du, double* dlf, + double* df, double* duf, double* du2, + lapack_int* ipiv, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_cgtsvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + lapack_complex_float* dlf, + lapack_complex_float* df, + lapack_complex_float* duf, + lapack_complex_float* du2, lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zgtsvx_work( int matrix_order, char fact, char trans, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + lapack_complex_double* dlf, + lapack_complex_double* df, + lapack_complex_double* duf, + lapack_complex_double* du2, lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sgttrf_work( lapack_int n, float* dl, float* d, float* du, + float* du2, lapack_int* ipiv ); +lapack_int LAPACKE_dgttrf_work( lapack_int n, double* dl, double* d, double* du, + double* du2, lapack_int* ipiv ); +lapack_int LAPACKE_cgttrf_work( lapack_int n, lapack_complex_float* dl, + lapack_complex_float* d, + lapack_complex_float* du, + lapack_complex_float* du2, lapack_int* ipiv ); +lapack_int LAPACKE_zgttrf_work( lapack_int n, lapack_complex_double* dl, + lapack_complex_double* d, + lapack_complex_double* du, + lapack_complex_double* du2, lapack_int* ipiv ); + +lapack_int LAPACKE_sgttrs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const float* dl, + const float* d, const float* du, + const float* du2, const lapack_int* ipiv, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dgttrs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const double* dl, + const double* d, const double* du, + const double* du2, const lapack_int* ipiv, + double* b, lapack_int ldb ); +lapack_int LAPACKE_cgttrs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + const lapack_complex_float* du2, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zgttrs_work( int matrix_order, char trans, lapack_int n, + lapack_int nrhs, + const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + const lapack_complex_double* du2, + const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_chbev_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int kd, + lapack_complex_float* ab, lapack_int ldab, + float* w, lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zhbev_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int kd, + lapack_complex_double* ab, lapack_int ldab, + double* w, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_chbevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int kd, + lapack_complex_float* ab, lapack_int ldab, + float* w, lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* work, + lapack_int lwork, float* rwork, + lapack_int lrwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_zhbevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int kd, + lapack_complex_double* ab, lapack_int ldab, + double* w, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* work, + lapack_int lwork, double* rwork, + lapack_int lrwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_chbevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, lapack_int kd, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* q, lapack_int ldq, + float vl, float vu, lapack_int il, + lapack_int iu, float abstol, lapack_int* m, + float* w, lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* work, + float* rwork, lapack_int* iwork, + lapack_int* ifail ); +lapack_int LAPACKE_zhbevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, lapack_int kd, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* q, lapack_int ldq, + double vl, double vu, lapack_int il, + lapack_int iu, double abstol, lapack_int* m, + double* w, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* work, + double* rwork, lapack_int* iwork, + lapack_int* ifail ); + +lapack_int LAPACKE_chbgst_work( int matrix_order, char vect, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + lapack_complex_float* ab, lapack_int ldab, + const lapack_complex_float* bb, lapack_int ldbb, + lapack_complex_float* x, lapack_int ldx, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zhbgst_work( int matrix_order, char vect, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + lapack_complex_double* ab, lapack_int ldab, + const lapack_complex_double* bb, + lapack_int ldbb, lapack_complex_double* x, + lapack_int ldx, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_chbgv_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* bb, lapack_int ldbb, + float* w, lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zhbgv_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* bb, lapack_int ldbb, + double* w, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_chbgvd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* bb, lapack_int ldbb, + float* w, lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* work, + lapack_int lwork, float* rwork, + lapack_int lrwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_zhbgvd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* bb, lapack_int ldbb, + double* w, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* work, + lapack_int lwork, double* rwork, + lapack_int lrwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_chbgvx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, lapack_int ka, + lapack_int kb, lapack_complex_float* ab, + lapack_int ldab, lapack_complex_float* bb, + lapack_int ldbb, lapack_complex_float* q, + lapack_int ldq, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* work, float* rwork, + lapack_int* iwork, lapack_int* ifail ); +lapack_int LAPACKE_zhbgvx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, lapack_int ka, + lapack_int kb, lapack_complex_double* ab, + lapack_int ldab, lapack_complex_double* bb, + lapack_int ldbb, lapack_complex_double* q, + lapack_int ldq, double vl, double vu, + lapack_int il, lapack_int iu, double abstol, + lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* work, double* rwork, + lapack_int* iwork, lapack_int* ifail ); + +lapack_int LAPACKE_chbtrd_work( int matrix_order, char vect, char uplo, + lapack_int n, lapack_int kd, + lapack_complex_float* ab, lapack_int ldab, + float* d, float* e, lapack_complex_float* q, + lapack_int ldq, lapack_complex_float* work ); +lapack_int LAPACKE_zhbtrd_work( int matrix_order, char vect, char uplo, + lapack_int n, lapack_int kd, + lapack_complex_double* ab, lapack_int ldab, + double* d, double* e, lapack_complex_double* q, + lapack_int ldq, lapack_complex_double* work ); + +lapack_int LAPACKE_checon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, float anorm, + float* rcond, lapack_complex_float* work ); +lapack_int LAPACKE_zhecon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, double anorm, + double* rcond, lapack_complex_double* work ); + +lapack_int LAPACKE_cheequb_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* s, float* scond, float* amax, + lapack_complex_float* work ); +lapack_int LAPACKE_zheequb_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* s, double* scond, double* amax, + lapack_complex_double* work ); + +lapack_int LAPACKE_cheev_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_complex_float* a, + lapack_int lda, float* w, + lapack_complex_float* work, lapack_int lwork, + float* rwork ); +lapack_int LAPACKE_zheev_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_complex_double* a, + lapack_int lda, double* w, + lapack_complex_double* work, lapack_int lwork, + double* rwork ); + +lapack_int LAPACKE_cheevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_complex_float* a, + lapack_int lda, float* w, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_int lrwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_zheevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_complex_double* a, + lapack_int lda, double* w, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_int lrwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_cheevr_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + float vl, float vu, lapack_int il, + lapack_int iu, float abstol, lapack_int* m, + float* w, lapack_complex_float* z, + lapack_int ldz, lapack_int* isuppz, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_int lrwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_zheevr_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + double vl, double vu, lapack_int il, + lapack_int iu, double abstol, lapack_int* m, + double* w, lapack_complex_double* z, + lapack_int ldz, lapack_int* isuppz, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_int lrwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_cheevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + float vl, float vu, lapack_int il, + lapack_int iu, float abstol, lapack_int* m, + float* w, lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* work, + lapack_int lwork, float* rwork, + lapack_int* iwork, lapack_int* ifail ); +lapack_int LAPACKE_zheevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + double vl, double vu, lapack_int il, + lapack_int iu, double abstol, lapack_int* m, + double* w, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* work, + lapack_int lwork, double* rwork, + lapack_int* iwork, lapack_int* ifail ); + +lapack_int LAPACKE_chegst_work( int matrix_order, lapack_int itype, char uplo, + lapack_int n, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zhegst_work( int matrix_order, lapack_int itype, char uplo, + lapack_int n, lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_chegv_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb, float* w, + lapack_complex_float* work, lapack_int lwork, + float* rwork ); +lapack_int LAPACKE_zhegv_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + double* w, lapack_complex_double* work, + lapack_int lwork, double* rwork ); + +lapack_int LAPACKE_chegvd_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + float* w, lapack_complex_float* work, + lapack_int lwork, float* rwork, + lapack_int lrwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_zhegvd_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + double* w, lapack_complex_double* work, + lapack_int lwork, double* rwork, + lapack_int lrwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_chegvx_work( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + float vl, float vu, lapack_int il, + lapack_int iu, float abstol, lapack_int* m, + float* w, lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* work, + lapack_int lwork, float* rwork, + lapack_int* iwork, lapack_int* ifail ); +lapack_int LAPACKE_zhegvx_work( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + double vl, double vu, lapack_int il, + lapack_int iu, double abstol, lapack_int* m, + double* w, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* work, + lapack_int lwork, double* rwork, + lapack_int* iwork, lapack_int* ifail ); + +lapack_int LAPACKE_cherfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zherfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_cherfsx_work( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* af, + lapack_int ldaf, const lapack_int* ipiv, + const float* s, const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zherfsx_work( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* af, + lapack_int ldaf, const lapack_int* ipiv, + const double* s, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_chesv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zhesv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_chesvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + lapack_int* ipiv, const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr, lapack_complex_float* work, + lapack_int lwork, float* rwork ); +lapack_int LAPACKE_zhesvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, lapack_int lwork, + double* rwork ); + +lapack_int LAPACKE_chesvxx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* s, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zhesvxx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* s, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_chetrd_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + float* d, float* e, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zhetrd_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + double* d, double* e, + lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_chetrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* ipiv, lapack_complex_float* work, + lapack_int lwork ); +lapack_int LAPACKE_zhetrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv, lapack_complex_double* work, + lapack_int lwork ); + +lapack_int LAPACKE_chetri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_float* work ); +lapack_int LAPACKE_zhetri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_double* work ); + +lapack_int LAPACKE_chetrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zhetrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_chfrk_work( int matrix_order, char transr, char uplo, + char trans, lapack_int n, lapack_int k, + float alpha, const lapack_complex_float* a, + lapack_int lda, float beta, + lapack_complex_float* c ); +lapack_int LAPACKE_zhfrk_work( int matrix_order, char transr, char uplo, + char trans, lapack_int n, lapack_int k, + double alpha, const lapack_complex_double* a, + lapack_int lda, double beta, + lapack_complex_double* c ); + +lapack_int LAPACKE_shgeqz_work( int matrix_order, char job, char compq, + char compz, lapack_int n, lapack_int ilo, + lapack_int ihi, float* h, lapack_int ldh, + float* t, lapack_int ldt, float* alphar, + float* alphai, float* beta, float* q, + lapack_int ldq, float* z, lapack_int ldz, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dhgeqz_work( int matrix_order, char job, char compq, + char compz, lapack_int n, lapack_int ilo, + lapack_int ihi, double* h, lapack_int ldh, + double* t, lapack_int ldt, double* alphar, + double* alphai, double* beta, double* q, + lapack_int ldq, double* z, lapack_int ldz, + double* work, lapack_int lwork ); +lapack_int LAPACKE_chgeqz_work( int matrix_order, char job, char compq, + char compz, lapack_int n, lapack_int ilo, + lapack_int ihi, lapack_complex_float* h, + lapack_int ldh, lapack_complex_float* t, + lapack_int ldt, lapack_complex_float* alpha, + lapack_complex_float* beta, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* work, lapack_int lwork, + float* rwork ); +lapack_int LAPACKE_zhgeqz_work( int matrix_order, char job, char compq, + char compz, lapack_int n, lapack_int ilo, + lapack_int ihi, lapack_complex_double* h, + lapack_int ldh, lapack_complex_double* t, + lapack_int ldt, lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* work, lapack_int lwork, + double* rwork ); + +lapack_int LAPACKE_chpcon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, + const lapack_int* ipiv, float anorm, + float* rcond, lapack_complex_float* work ); +lapack_int LAPACKE_zhpcon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, + const lapack_int* ipiv, double anorm, + double* rcond, lapack_complex_double* work ); + +lapack_int LAPACKE_chpev_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_complex_float* ap, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zhpev_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_complex_double* ap, + double* w, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_chpevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_complex_float* ap, + float* w, lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* work, + lapack_int lwork, float* rwork, + lapack_int lrwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_zhpevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_complex_double* ap, + double* w, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* work, + lapack_int lwork, double* rwork, + lapack_int lrwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_chpevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, + lapack_complex_float* ap, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* work, float* rwork, + lapack_int* iwork, lapack_int* ifail ); +lapack_int LAPACKE_zhpevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, + lapack_complex_double* ap, double vl, double vu, + lapack_int il, lapack_int iu, double abstol, + lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* work, double* rwork, + lapack_int* iwork, lapack_int* ifail ); + +lapack_int LAPACKE_chpgst_work( int matrix_order, lapack_int itype, char uplo, + lapack_int n, lapack_complex_float* ap, + const lapack_complex_float* bp ); +lapack_int LAPACKE_zhpgst_work( int matrix_order, lapack_int itype, char uplo, + lapack_int n, lapack_complex_double* ap, + const lapack_complex_double* bp ); + +lapack_int LAPACKE_chpgv_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, + lapack_complex_float* ap, + lapack_complex_float* bp, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zhpgv_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, + lapack_complex_double* ap, + lapack_complex_double* bp, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_chpgvd_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, + lapack_complex_float* ap, + lapack_complex_float* bp, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_int lrwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_zhpgvd_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, + lapack_complex_double* ap, + lapack_complex_double* bp, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_int lrwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_chpgvx_work( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, + lapack_complex_float* ap, + lapack_complex_float* bp, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* work, float* rwork, + lapack_int* iwork, lapack_int* ifail ); +lapack_int LAPACKE_zhpgvx_work( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, + lapack_complex_double* ap, + lapack_complex_double* bp, double vl, double vu, + lapack_int il, lapack_int iu, double abstol, + lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* work, double* rwork, + lapack_int* iwork, lapack_int* ifail ); + +lapack_int LAPACKE_chprfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + const lapack_complex_float* afp, + const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zhprfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, + const lapack_complex_double* ap, + const lapack_complex_double* afp, + const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_chpsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* ap, + lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zhpsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* ap, + lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_chpsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* ap, + lapack_complex_float* afp, lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zhpsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* ap, + lapack_complex_double* afp, lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_chptrd_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap, float* d, float* e, + lapack_complex_float* tau ); +lapack_int LAPACKE_zhptrd_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap, double* d, double* e, + lapack_complex_double* tau ); + +lapack_int LAPACKE_chptrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap, lapack_int* ipiv ); +lapack_int LAPACKE_zhptrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap, lapack_int* ipiv ); + +lapack_int LAPACKE_chptri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap, + const lapack_int* ipiv, + lapack_complex_float* work ); +lapack_int LAPACKE_zhptri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap, + const lapack_int* ipiv, + lapack_complex_double* work ); + +lapack_int LAPACKE_chptrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zhptrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, + const lapack_complex_double* ap, + const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_shsein_work( int matrix_order, char job, char eigsrc, + char initv, lapack_logical* select, + lapack_int n, const float* h, lapack_int ldh, + float* wr, const float* wi, float* vl, + lapack_int ldvl, float* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, float* work, + lapack_int* ifaill, lapack_int* ifailr ); +lapack_int LAPACKE_dhsein_work( int matrix_order, char job, char eigsrc, + char initv, lapack_logical* select, + lapack_int n, const double* h, lapack_int ldh, + double* wr, const double* wi, double* vl, + lapack_int ldvl, double* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, double* work, + lapack_int* ifaill, lapack_int* ifailr ); +lapack_int LAPACKE_chsein_work( int matrix_order, char job, char eigsrc, + char initv, const lapack_logical* select, + lapack_int n, const lapack_complex_float* h, + lapack_int ldh, lapack_complex_float* w, + lapack_complex_float* vl, lapack_int ldvl, + lapack_complex_float* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, + lapack_complex_float* work, float* rwork, + lapack_int* ifaill, lapack_int* ifailr ); +lapack_int LAPACKE_zhsein_work( int matrix_order, char job, char eigsrc, + char initv, const lapack_logical* select, + lapack_int n, const lapack_complex_double* h, + lapack_int ldh, lapack_complex_double* w, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, + lapack_complex_double* work, double* rwork, + lapack_int* ifaill, lapack_int* ifailr ); + +lapack_int LAPACKE_shseqr_work( int matrix_order, char job, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + float* h, lapack_int ldh, float* wr, float* wi, + float* z, lapack_int ldz, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dhseqr_work( int matrix_order, char job, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + double* h, lapack_int ldh, double* wr, + double* wi, double* z, lapack_int ldz, + double* work, lapack_int lwork ); +lapack_int LAPACKE_chseqr_work( int matrix_order, char job, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + lapack_complex_float* h, lapack_int ldh, + lapack_complex_float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zhseqr_work( int matrix_order, char job, char compz, + lapack_int n, lapack_int ilo, lapack_int ihi, + lapack_complex_double* h, lapack_int ldh, + lapack_complex_double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_clacgv_work( lapack_int n, lapack_complex_float* x, + lapack_int incx ); +lapack_int LAPACKE_zlacgv_work( lapack_int n, lapack_complex_double* x, + lapack_int incx ); + +lapack_int LAPACKE_slacpy_work( int matrix_order, char uplo, lapack_int m, + lapack_int n, const float* a, lapack_int lda, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dlacpy_work( int matrix_order, char uplo, lapack_int m, + lapack_int n, const double* a, lapack_int lda, + double* b, lapack_int ldb ); +lapack_int LAPACKE_clacpy_work( int matrix_order, char uplo, lapack_int m, + lapack_int n, const lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zlacpy_work( int matrix_order, char uplo, lapack_int m, + lapack_int n, const lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_zlag2c_work( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + lapack_complex_float* sa, lapack_int ldsa ); + +lapack_int LAPACKE_slag2d_work( int matrix_order, lapack_int m, lapack_int n, + const float* sa, lapack_int ldsa, double* a, + lapack_int lda ); + +lapack_int LAPACKE_dlag2s_work( int matrix_order, lapack_int m, lapack_int n, + const double* a, lapack_int lda, float* sa, + lapack_int ldsa ); + +lapack_int LAPACKE_clag2z_work( int matrix_order, lapack_int m, lapack_int n, + const lapack_complex_float* sa, lapack_int ldsa, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_slagge_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const float* d, + float* a, lapack_int lda, lapack_int* iseed, + float* work ); +lapack_int LAPACKE_dlagge_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const double* d, + double* a, lapack_int lda, lapack_int* iseed, + double* work ); +lapack_int LAPACKE_clagge_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const float* d, + lapack_complex_float* a, lapack_int lda, + lapack_int* iseed, lapack_complex_float* work ); +lapack_int LAPACKE_zlagge_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int kl, lapack_int ku, const double* d, + lapack_complex_double* a, lapack_int lda, + lapack_int* iseed, + lapack_complex_double* work ); + +lapack_int LAPACKE_claghe_work( int matrix_order, lapack_int n, lapack_int k, + const float* d, lapack_complex_float* a, + lapack_int lda, lapack_int* iseed, + lapack_complex_float* work ); +lapack_int LAPACKE_zlaghe_work( int matrix_order, lapack_int n, lapack_int k, + const double* d, lapack_complex_double* a, + lapack_int lda, lapack_int* iseed, + lapack_complex_double* work ); + +lapack_int LAPACKE_slagsy_work( int matrix_order, lapack_int n, lapack_int k, + const float* d, float* a, lapack_int lda, + lapack_int* iseed, float* work ); +lapack_int LAPACKE_dlagsy_work( int matrix_order, lapack_int n, lapack_int k, + const double* d, double* a, lapack_int lda, + lapack_int* iseed, double* work ); +lapack_int LAPACKE_clagsy_work( int matrix_order, lapack_int n, lapack_int k, + const float* d, lapack_complex_float* a, + lapack_int lda, lapack_int* iseed, + lapack_complex_float* work ); +lapack_int LAPACKE_zlagsy_work( int matrix_order, lapack_int n, lapack_int k, + const double* d, lapack_complex_double* a, + lapack_int lda, lapack_int* iseed, + lapack_complex_double* work ); + +lapack_int LAPACKE_slapmr_work( int matrix_order, lapack_logical forwrd, + lapack_int m, lapack_int n, float* x, + lapack_int ldx, lapack_int* k ); +lapack_int LAPACKE_dlapmr_work( int matrix_order, lapack_logical forwrd, + lapack_int m, lapack_int n, double* x, + lapack_int ldx, lapack_int* k ); +lapack_int LAPACKE_clapmr_work( int matrix_order, lapack_logical forwrd, + lapack_int m, lapack_int n, + lapack_complex_float* x, lapack_int ldx, + lapack_int* k ); +lapack_int LAPACKE_zlapmr_work( int matrix_order, lapack_logical forwrd, + lapack_int m, lapack_int n, + lapack_complex_double* x, lapack_int ldx, + lapack_int* k ); + +lapack_int LAPACKE_slartgp_work( float f, float g, float* cs, float* sn, + float* r ); +lapack_int LAPACKE_dlartgp_work( double f, double g, double* cs, double* sn, + double* r ); + +lapack_int LAPACKE_slartgs_work( float x, float y, float sigma, float* cs, + float* sn ); +lapack_int LAPACKE_dlartgs_work( double x, double y, double sigma, double* cs, + double* sn ); + +float LAPACKE_slapy2_work( float x, float y ); +double LAPACKE_dlapy2_work( double x, double y ); + +float LAPACKE_slapy3_work( float x, float y, float z ); +double LAPACKE_dlapy3_work( double x, double y, double z ); + +float LAPACKE_slamch_work( char cmach ); +double LAPACKE_dlamch_work( char cmach ); + +float LAPACKE_slange_work( int matrix_order, char norm, lapack_int m, + lapack_int n, const float* a, lapack_int lda, + float* work ); +double LAPACKE_dlange_work( int matrix_order, char norm, lapack_int m, + lapack_int n, const double* a, lapack_int lda, + double* work ); +float LAPACKE_clange_work( int matrix_order, char norm, lapack_int m, + lapack_int n, const lapack_complex_float* a, + lapack_int lda, float* work ); +double LAPACKE_zlange_work( int matrix_order, char norm, lapack_int m, + lapack_int n, const lapack_complex_double* a, + lapack_int lda, double* work ); + +float LAPACKE_clanhe_work( int matrix_order, char norm, char uplo, + lapack_int n, const lapack_complex_float* a, + lapack_int lda, float* work ); +double LAPACKE_zlanhe_work( int matrix_order, char norm, char uplo, + lapack_int n, const lapack_complex_double* a, + lapack_int lda, double* work ); + +float LAPACKE_slansy_work( int matrix_order, char norm, char uplo, + lapack_int n, const float* a, lapack_int lda, + float* work ); +double LAPACKE_dlansy_work( int matrix_order, char norm, char uplo, + lapack_int n, const double* a, lapack_int lda, + double* work ); +float LAPACKE_clansy_work( int matrix_order, char norm, char uplo, + lapack_int n, const lapack_complex_float* a, + lapack_int lda, float* work ); +double LAPACKE_zlansy_work( int matrix_order, char norm, char uplo, + lapack_int n, const lapack_complex_double* a, + lapack_int lda, double* work ); + +float LAPACKE_slantr_work( int matrix_order, char norm, char uplo, + char diag, lapack_int m, lapack_int n, const float* a, + lapack_int lda, float* work ); +double LAPACKE_dlantr_work( int matrix_order, char norm, char uplo, + char diag, lapack_int m, lapack_int n, + const double* a, lapack_int lda, double* work ); +float LAPACKE_clantr_work( int matrix_order, char norm, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* work ); +double LAPACKE_zlantr_work( int matrix_order, char norm, char uplo, + char diag, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* work ); + +lapack_int LAPACKE_slarfb_work( int matrix_order, char side, char trans, + char direct, char storev, lapack_int m, + lapack_int n, lapack_int k, const float* v, + lapack_int ldv, const float* t, lapack_int ldt, + float* c, lapack_int ldc, float* work, + lapack_int ldwork ); +lapack_int LAPACKE_dlarfb_work( int matrix_order, char side, char trans, + char direct, char storev, lapack_int m, + lapack_int n, lapack_int k, const double* v, + lapack_int ldv, const double* t, lapack_int ldt, + double* c, lapack_int ldc, double* work, + lapack_int ldwork ); +lapack_int LAPACKE_clarfb_work( int matrix_order, char side, char trans, + char direct, char storev, lapack_int m, + lapack_int n, lapack_int k, + const lapack_complex_float* v, lapack_int ldv, + const lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work, lapack_int ldwork ); +lapack_int LAPACKE_zlarfb_work( int matrix_order, char side, char trans, + char direct, char storev, lapack_int m, + lapack_int n, lapack_int k, + const lapack_complex_double* v, lapack_int ldv, + const lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work, + lapack_int ldwork ); + +lapack_int LAPACKE_slarfg_work( lapack_int n, float* alpha, float* x, + lapack_int incx, float* tau ); +lapack_int LAPACKE_dlarfg_work( lapack_int n, double* alpha, double* x, + lapack_int incx, double* tau ); +lapack_int LAPACKE_clarfg_work( lapack_int n, lapack_complex_float* alpha, + lapack_complex_float* x, lapack_int incx, + lapack_complex_float* tau ); +lapack_int LAPACKE_zlarfg_work( lapack_int n, lapack_complex_double* alpha, + lapack_complex_double* x, lapack_int incx, + lapack_complex_double* tau ); + +lapack_int LAPACKE_slarft_work( int matrix_order, char direct, char storev, + lapack_int n, lapack_int k, const float* v, + lapack_int ldv, const float* tau, float* t, + lapack_int ldt ); +lapack_int LAPACKE_dlarft_work( int matrix_order, char direct, char storev, + lapack_int n, lapack_int k, const double* v, + lapack_int ldv, const double* tau, double* t, + lapack_int ldt ); +lapack_int LAPACKE_clarft_work( int matrix_order, char direct, char storev, + lapack_int n, lapack_int k, + const lapack_complex_float* v, lapack_int ldv, + const lapack_complex_float* tau, + lapack_complex_float* t, lapack_int ldt ); +lapack_int LAPACKE_zlarft_work( int matrix_order, char direct, char storev, + lapack_int n, lapack_int k, + const lapack_complex_double* v, lapack_int ldv, + const lapack_complex_double* tau, + lapack_complex_double* t, lapack_int ldt ); + +lapack_int LAPACKE_slarfx_work( int matrix_order, char side, lapack_int m, + lapack_int n, const float* v, float tau, + float* c, lapack_int ldc, float* work ); +lapack_int LAPACKE_dlarfx_work( int matrix_order, char side, lapack_int m, + lapack_int n, const double* v, double tau, + double* c, lapack_int ldc, double* work ); +lapack_int LAPACKE_clarfx_work( int matrix_order, char side, lapack_int m, + lapack_int n, const lapack_complex_float* v, + lapack_complex_float tau, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work ); +lapack_int LAPACKE_zlarfx_work( int matrix_order, char side, lapack_int m, + lapack_int n, const lapack_complex_double* v, + lapack_complex_double tau, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work ); + +lapack_int LAPACKE_slarnv_work( lapack_int idist, lapack_int* iseed, + lapack_int n, float* x ); +lapack_int LAPACKE_dlarnv_work( lapack_int idist, lapack_int* iseed, + lapack_int n, double* x ); +lapack_int LAPACKE_clarnv_work( lapack_int idist, lapack_int* iseed, + lapack_int n, lapack_complex_float* x ); +lapack_int LAPACKE_zlarnv_work( lapack_int idist, lapack_int* iseed, + lapack_int n, lapack_complex_double* x ); + +lapack_int LAPACKE_slaset_work( int matrix_order, char uplo, lapack_int m, + lapack_int n, float alpha, float beta, float* a, + lapack_int lda ); +lapack_int LAPACKE_dlaset_work( int matrix_order, char uplo, lapack_int m, + lapack_int n, double alpha, double beta, + double* a, lapack_int lda ); +lapack_int LAPACKE_claset_work( int matrix_order, char uplo, lapack_int m, + lapack_int n, lapack_complex_float alpha, + lapack_complex_float beta, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_zlaset_work( int matrix_order, char uplo, lapack_int m, + lapack_int n, lapack_complex_double alpha, + lapack_complex_double beta, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_slasrt_work( char id, lapack_int n, float* d ); +lapack_int LAPACKE_dlasrt_work( char id, lapack_int n, double* d ); + +lapack_int LAPACKE_slaswp_work( int matrix_order, lapack_int n, float* a, + lapack_int lda, lapack_int k1, lapack_int k2, + const lapack_int* ipiv, lapack_int incx ); +lapack_int LAPACKE_dlaswp_work( int matrix_order, lapack_int n, double* a, + lapack_int lda, lapack_int k1, lapack_int k2, + const lapack_int* ipiv, lapack_int incx ); +lapack_int LAPACKE_claswp_work( int matrix_order, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int k1, lapack_int k2, + const lapack_int* ipiv, lapack_int incx ); +lapack_int LAPACKE_zlaswp_work( int matrix_order, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int k1, lapack_int k2, + const lapack_int* ipiv, lapack_int incx ); + +lapack_int LAPACKE_slatms_work( int matrix_order, lapack_int m, lapack_int n, + char dist, lapack_int* iseed, char sym, + float* d, lapack_int mode, float cond, + float dmax, lapack_int kl, lapack_int ku, + char pack, float* a, lapack_int lda, + float* work ); +lapack_int LAPACKE_dlatms_work( int matrix_order, lapack_int m, lapack_int n, + char dist, lapack_int* iseed, char sym, + double* d, lapack_int mode, double cond, + double dmax, lapack_int kl, lapack_int ku, + char pack, double* a, lapack_int lda, + double* work ); +lapack_int LAPACKE_clatms_work( int matrix_order, lapack_int m, lapack_int n, + char dist, lapack_int* iseed, char sym, + float* d, lapack_int mode, float cond, + float dmax, lapack_int kl, lapack_int ku, + char pack, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* work ); +lapack_int LAPACKE_zlatms_work( int matrix_order, lapack_int m, lapack_int n, + char dist, lapack_int* iseed, char sym, + double* d, lapack_int mode, double cond, + double dmax, lapack_int kl, lapack_int ku, + char pack, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* work ); + +lapack_int LAPACKE_slauum_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda ); +lapack_int LAPACKE_dlauum_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda ); +lapack_int LAPACKE_clauum_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_zlauum_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_sopgtr_work( int matrix_order, char uplo, lapack_int n, + const float* ap, const float* tau, float* q, + lapack_int ldq, float* work ); +lapack_int LAPACKE_dopgtr_work( int matrix_order, char uplo, lapack_int n, + const double* ap, const double* tau, double* q, + lapack_int ldq, double* work ); + +lapack_int LAPACKE_sopmtr_work( int matrix_order, char side, char uplo, + char trans, lapack_int m, lapack_int n, + const float* ap, const float* tau, float* c, + lapack_int ldc, float* work ); +lapack_int LAPACKE_dopmtr_work( int matrix_order, char side, char uplo, + char trans, lapack_int m, lapack_int n, + const double* ap, const double* tau, double* c, + lapack_int ldc, double* work ); + +lapack_int LAPACKE_sorgbr_work( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int k, float* a, + lapack_int lda, const float* tau, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dorgbr_work( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int k, double* a, + lapack_int lda, const double* tau, double* work, + lapack_int lwork ); + +lapack_int LAPACKE_sorghr_work( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, float* a, lapack_int lda, + const float* tau, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dorghr_work( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, double* a, lapack_int lda, + const double* tau, double* work, + lapack_int lwork ); + +lapack_int LAPACKE_sorglq_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, float* a, lapack_int lda, + const float* tau, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dorglq_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, double* a, lapack_int lda, + const double* tau, double* work, + lapack_int lwork ); + +lapack_int LAPACKE_sorgql_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, float* a, lapack_int lda, + const float* tau, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dorgql_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, double* a, lapack_int lda, + const double* tau, double* work, + lapack_int lwork ); + +lapack_int LAPACKE_sorgqr_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, float* a, lapack_int lda, + const float* tau, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dorgqr_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, double* a, lapack_int lda, + const double* tau, double* work, + lapack_int lwork ); + +lapack_int LAPACKE_sorgrq_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, float* a, lapack_int lda, + const float* tau, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dorgrq_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, double* a, lapack_int lda, + const double* tau, double* work, + lapack_int lwork ); + +lapack_int LAPACKE_sorgtr_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda, const float* tau, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dorgtr_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda, const double* tau, + double* work, lapack_int lwork ); + +lapack_int LAPACKE_sormbr_work( int matrix_order, char vect, char side, + char trans, lapack_int m, lapack_int n, + lapack_int k, const float* a, lapack_int lda, + const float* tau, float* c, lapack_int ldc, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dormbr_work( int matrix_order, char vect, char side, + char trans, lapack_int m, lapack_int n, + lapack_int k, const double* a, lapack_int lda, + const double* tau, double* c, lapack_int ldc, + double* work, lapack_int lwork ); + +lapack_int LAPACKE_sormhr_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int ilo, + lapack_int ihi, const float* a, lapack_int lda, + const float* tau, float* c, lapack_int ldc, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dormhr_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int ilo, + lapack_int ihi, const double* a, lapack_int lda, + const double* tau, double* c, lapack_int ldc, + double* work, lapack_int lwork ); + +lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const float* a, lapack_int lda, + const float* tau, float* c, lapack_int ldc, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const double* a, lapack_int lda, + const double* tau, double* c, lapack_int ldc, + double* work, lapack_int lwork ); + +lapack_int LAPACKE_sormql_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const float* a, lapack_int lda, + const float* tau, float* c, lapack_int ldc, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dormql_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const double* a, lapack_int lda, + const double* tau, double* c, lapack_int ldc, + double* work, lapack_int lwork ); + +lapack_int LAPACKE_sormqr_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const float* a, lapack_int lda, + const float* tau, float* c, lapack_int ldc, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dormqr_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const double* a, lapack_int lda, + const double* tau, double* c, lapack_int ldc, + double* work, lapack_int lwork ); + +lapack_int LAPACKE_sormrq_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const float* a, lapack_int lda, + const float* tau, float* c, lapack_int ldc, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dormrq_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const double* a, lapack_int lda, + const double* tau, double* c, lapack_int ldc, + double* work, lapack_int lwork ); + +lapack_int LAPACKE_sormrz_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, const float* a, lapack_int lda, + const float* tau, float* c, lapack_int ldc, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dormrz_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, const double* a, lapack_int lda, + const double* tau, double* c, lapack_int ldc, + double* work, lapack_int lwork ); + +lapack_int LAPACKE_sormtr_work( int matrix_order, char side, char uplo, + char trans, lapack_int m, lapack_int n, + const float* a, lapack_int lda, + const float* tau, float* c, lapack_int ldc, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dormtr_work( int matrix_order, char side, char uplo, + char trans, lapack_int m, lapack_int n, + const double* a, lapack_int lda, + const double* tau, double* c, lapack_int ldc, + double* work, lapack_int lwork ); + +lapack_int LAPACKE_spbcon_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const float* ab, lapack_int ldab, + float anorm, float* rcond, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dpbcon_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const double* ab, + lapack_int ldab, double anorm, double* rcond, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_cpbcon_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const lapack_complex_float* ab, + lapack_int ldab, float anorm, float* rcond, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zpbcon_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const lapack_complex_double* ab, + lapack_int ldab, double anorm, double* rcond, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_spbequ_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const float* ab, lapack_int ldab, + float* s, float* scond, float* amax ); +lapack_int LAPACKE_dpbequ_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const double* ab, + lapack_int ldab, double* s, double* scond, + double* amax ); +lapack_int LAPACKE_cpbequ_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const lapack_complex_float* ab, + lapack_int ldab, float* s, float* scond, + float* amax ); +lapack_int LAPACKE_zpbequ_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, const lapack_complex_double* ab, + lapack_int ldab, double* s, double* scond, + double* amax ); + +lapack_int LAPACKE_spbrfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, const float* ab, + lapack_int ldab, const float* afb, + lapack_int ldafb, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dpbrfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + const double* ab, lapack_int ldab, + const double* afb, lapack_int ldafb, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* ferr, double* berr, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_cpbrfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + const lapack_complex_float* ab, lapack_int ldab, + const lapack_complex_float* afb, + lapack_int ldafb, const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zpbrfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + const lapack_complex_double* ab, + lapack_int ldab, + const lapack_complex_double* afb, + lapack_int ldafb, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_spbstf_work( int matrix_order, char uplo, lapack_int n, + lapack_int kb, float* bb, lapack_int ldbb ); +lapack_int LAPACKE_dpbstf_work( int matrix_order, char uplo, lapack_int n, + lapack_int kb, double* bb, lapack_int ldbb ); +lapack_int LAPACKE_cpbstf_work( int matrix_order, char uplo, lapack_int n, + lapack_int kb, lapack_complex_float* bb, + lapack_int ldbb ); +lapack_int LAPACKE_zpbstf_work( int matrix_order, char uplo, lapack_int n, + lapack_int kb, lapack_complex_double* bb, + lapack_int ldbb ); + +lapack_int LAPACKE_spbsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, float* ab, + lapack_int ldab, float* b, lapack_int ldb ); +lapack_int LAPACKE_dpbsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, double* ab, + lapack_int ldab, double* b, lapack_int ldb ); +lapack_int LAPACKE_cpbsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zpbsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_spbsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int kd, lapack_int nrhs, + float* ab, lapack_int ldab, float* afb, + lapack_int ldafb, char* equed, float* s, + float* b, lapack_int ldb, float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr, float* work, lapack_int* iwork ); +lapack_int LAPACKE_dpbsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int kd, lapack_int nrhs, + double* ab, lapack_int ldab, double* afb, + lapack_int ldafb, char* equed, double* s, + double* b, lapack_int ldb, double* x, + lapack_int ldx, double* rcond, double* ferr, + double* berr, double* work, lapack_int* iwork ); +lapack_int LAPACKE_cpbsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int kd, lapack_int nrhs, + lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* afb, lapack_int ldafb, + char* equed, float* s, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zpbsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int kd, lapack_int nrhs, + lapack_complex_double* ab, lapack_int ldab, + lapack_complex_double* afb, lapack_int ldafb, + char* equed, double* s, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_spbtrf_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, float* ab, lapack_int ldab ); +lapack_int LAPACKE_dpbtrf_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, double* ab, lapack_int ldab ); +lapack_int LAPACKE_cpbtrf_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_complex_float* ab, + lapack_int ldab ); +lapack_int LAPACKE_zpbtrf_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_complex_double* ab, + lapack_int ldab ); + +lapack_int LAPACKE_spbtrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, const float* ab, + lapack_int ldab, float* b, lapack_int ldb ); +lapack_int LAPACKE_dpbtrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + const double* ab, lapack_int ldab, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cpbtrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + const lapack_complex_float* ab, lapack_int ldab, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zpbtrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int kd, lapack_int nrhs, + const lapack_complex_double* ab, + lapack_int ldab, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_spftrf_work( int matrix_order, char transr, char uplo, + lapack_int n, float* a ); +lapack_int LAPACKE_dpftrf_work( int matrix_order, char transr, char uplo, + lapack_int n, double* a ); +lapack_int LAPACKE_cpftrf_work( int matrix_order, char transr, char uplo, + lapack_int n, lapack_complex_float* a ); +lapack_int LAPACKE_zpftrf_work( int matrix_order, char transr, char uplo, + lapack_int n, lapack_complex_double* a ); + +lapack_int LAPACKE_spftri_work( int matrix_order, char transr, char uplo, + lapack_int n, float* a ); +lapack_int LAPACKE_dpftri_work( int matrix_order, char transr, char uplo, + lapack_int n, double* a ); +lapack_int LAPACKE_cpftri_work( int matrix_order, char transr, char uplo, + lapack_int n, lapack_complex_float* a ); +lapack_int LAPACKE_zpftri_work( int matrix_order, char transr, char uplo, + lapack_int n, lapack_complex_double* a ); + +lapack_int LAPACKE_spftrs_work( int matrix_order, char transr, char uplo, + lapack_int n, lapack_int nrhs, const float* a, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dpftrs_work( int matrix_order, char transr, char uplo, + lapack_int n, lapack_int nrhs, const double* a, + double* b, lapack_int ldb ); +lapack_int LAPACKE_cpftrs_work( int matrix_order, char transr, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zpftrs_work( int matrix_order, char transr, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_spocon_work( int matrix_order, char uplo, lapack_int n, + const float* a, lapack_int lda, float anorm, + float* rcond, float* work, lapack_int* iwork ); +lapack_int LAPACKE_dpocon_work( int matrix_order, char uplo, lapack_int n, + const double* a, lapack_int lda, double anorm, + double* rcond, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cpocon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float anorm, float* rcond, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zpocon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double anorm, double* rcond, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_spoequ_work( int matrix_order, lapack_int n, const float* a, + lapack_int lda, float* s, float* scond, + float* amax ); +lapack_int LAPACKE_dpoequ_work( int matrix_order, lapack_int n, const double* a, + lapack_int lda, double* s, double* scond, + double* amax ); +lapack_int LAPACKE_cpoequ_work( int matrix_order, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* s, float* scond, float* amax ); +lapack_int LAPACKE_zpoequ_work( int matrix_order, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* s, double* scond, double* amax ); + +lapack_int LAPACKE_spoequb_work( int matrix_order, lapack_int n, const float* a, + lapack_int lda, float* s, float* scond, + float* amax ); +lapack_int LAPACKE_dpoequb_work( int matrix_order, lapack_int n, + const double* a, lapack_int lda, double* s, + double* scond, double* amax ); +lapack_int LAPACKE_cpoequb_work( int matrix_order, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* s, float* scond, float* amax ); +lapack_int LAPACKE_zpoequb_work( int matrix_order, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* s, double* scond, double* amax ); + +lapack_int LAPACKE_sporfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const float* af, lapack_int ldaf, + const float* b, lapack_int ldb, float* x, + lapack_int ldx, float* ferr, float* berr, + float* work, lapack_int* iwork ); +lapack_int LAPACKE_dporfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* a, + lapack_int lda, const double* af, + lapack_int ldaf, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* ferr, double* berr, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cporfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* af, + lapack_int ldaf, const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zporfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* af, + lapack_int ldaf, const lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sporfsx_work( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, const float* a, + lapack_int lda, const float* af, + lapack_int ldaf, const float* s, + const float* b, lapack_int ldb, float* x, + lapack_int ldx, float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dporfsx_work( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, const double* a, + lapack_int lda, const double* af, + lapack_int ldaf, const double* s, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cporfsx_work( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* af, + lapack_int ldaf, const float* s, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zporfsx_work( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* af, + lapack_int ldaf, const double* s, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_sposv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dposv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* b, lapack_int ldb ); +lapack_int LAPACKE_cposv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zposv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb ); +lapack_int LAPACKE_dsposv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + double* b, lapack_int ldb, double* x, + lapack_int ldx, double* work, float* swork, + lapack_int* iter ); +lapack_int LAPACKE_zcposv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, lapack_complex_double* work, + lapack_complex_float* swork, double* rwork, + lapack_int* iter ); + +lapack_int LAPACKE_sposvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* af, lapack_int ldaf, + char* equed, float* s, float* b, lapack_int ldb, + float* x, lapack_int ldx, float* rcond, + float* ferr, float* berr, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dposvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* af, lapack_int ldaf, + char* equed, double* s, double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_cposvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + char* equed, float* s, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zposvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + char* equed, double* s, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sposvxx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* af, lapack_int ldaf, + char* equed, float* s, float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dposvxx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* af, lapack_int ldaf, + char* equed, double* s, double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cposvxx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + char* equed, float* s, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* rpvgrw, + float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zposvxx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + char* equed, double* s, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_spotrf_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda ); +lapack_int LAPACKE_dpotrf_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda ); +lapack_int LAPACKE_cpotrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_zpotrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_spotri_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda ); +lapack_int LAPACKE_dpotri_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda ); +lapack_int LAPACKE_cpotri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_zpotri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_spotrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dpotrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* a, + lapack_int lda, double* b, lapack_int ldb ); +lapack_int LAPACKE_cpotrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zpotrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_sppcon_work( int matrix_order, char uplo, lapack_int n, + const float* ap, float anorm, float* rcond, + float* work, lapack_int* iwork ); +lapack_int LAPACKE_dppcon_work( int matrix_order, char uplo, lapack_int n, + const double* ap, double anorm, double* rcond, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_cppcon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, float anorm, + float* rcond, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zppcon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, double anorm, + double* rcond, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_sppequ_work( int matrix_order, char uplo, lapack_int n, + const float* ap, float* s, float* scond, + float* amax ); +lapack_int LAPACKE_dppequ_work( int matrix_order, char uplo, lapack_int n, + const double* ap, double* s, double* scond, + double* amax ); +lapack_int LAPACKE_cppequ_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, float* s, + float* scond, float* amax ); +lapack_int LAPACKE_zppequ_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, double* s, + double* scond, double* amax ); + +lapack_int LAPACKE_spprfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* ap, + const float* afp, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dpprfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* ap, + const double* afp, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* ferr, double* berr, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cpprfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + const lapack_complex_float* afp, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zpprfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, + const lapack_complex_double* ap, + const lapack_complex_double* afp, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sppsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, float* ap, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dppsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, double* ap, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cppsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* ap, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zppsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* ap, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_sppsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, float* ap, + float* afp, char* equed, float* s, float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + float* work, lapack_int* iwork ); +lapack_int LAPACKE_dppsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, double* ap, + double* afp, char* equed, double* s, double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_cppsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_float* ap, + lapack_complex_float* afp, char* equed, + float* s, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zppsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_double* ap, + lapack_complex_double* afp, char* equed, + double* s, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* x, + lapack_int ldx, double* rcond, double* ferr, + double* berr, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_spptrf_work( int matrix_order, char uplo, lapack_int n, + float* ap ); +lapack_int LAPACKE_dpptrf_work( int matrix_order, char uplo, lapack_int n, + double* ap ); +lapack_int LAPACKE_cpptrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap ); +lapack_int LAPACKE_zpptrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap ); + +lapack_int LAPACKE_spptri_work( int matrix_order, char uplo, lapack_int n, + float* ap ); +lapack_int LAPACKE_dpptri_work( int matrix_order, char uplo, lapack_int n, + double* ap ); +lapack_int LAPACKE_cpptri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap ); +lapack_int LAPACKE_zpptri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap ); + +lapack_int LAPACKE_spptrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* ap, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dpptrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* ap, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cpptrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zpptrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, + const lapack_complex_double* ap, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_spstrf_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda, lapack_int* piv, + lapack_int* rank, float tol, float* work ); +lapack_int LAPACKE_dpstrf_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda, lapack_int* piv, + lapack_int* rank, double tol, double* work ); +lapack_int LAPACKE_cpstrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* piv, lapack_int* rank, float tol, + float* work ); +lapack_int LAPACKE_zpstrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* piv, lapack_int* rank, double tol, + double* work ); + +lapack_int LAPACKE_sptcon_work( lapack_int n, const float* d, const float* e, + float anorm, float* rcond, float* work ); +lapack_int LAPACKE_dptcon_work( lapack_int n, const double* d, const double* e, + double anorm, double* rcond, double* work ); +lapack_int LAPACKE_cptcon_work( lapack_int n, const float* d, + const lapack_complex_float* e, float anorm, + float* rcond, float* work ); +lapack_int LAPACKE_zptcon_work( lapack_int n, const double* d, + const lapack_complex_double* e, double anorm, + double* rcond, double* work ); + +lapack_int LAPACKE_spteqr_work( int matrix_order, char compz, lapack_int n, + float* d, float* e, float* z, lapack_int ldz, + float* work ); +lapack_int LAPACKE_dpteqr_work( int matrix_order, char compz, lapack_int n, + double* d, double* e, double* z, lapack_int ldz, + double* work ); +lapack_int LAPACKE_cpteqr_work( int matrix_order, char compz, lapack_int n, + float* d, float* e, lapack_complex_float* z, + lapack_int ldz, float* work ); +lapack_int LAPACKE_zpteqr_work( int matrix_order, char compz, lapack_int n, + double* d, double* e, lapack_complex_double* z, + lapack_int ldz, double* work ); + +lapack_int LAPACKE_sptrfs_work( int matrix_order, lapack_int n, lapack_int nrhs, + const float* d, const float* e, const float* df, + const float* ef, const float* b, lapack_int ldb, + float* x, lapack_int ldx, float* ferr, + float* berr, float* work ); +lapack_int LAPACKE_dptrfs_work( int matrix_order, lapack_int n, lapack_int nrhs, + const double* d, const double* e, + const double* df, const double* ef, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* ferr, double* berr, + double* work ); +lapack_int LAPACKE_cptrfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* d, + const lapack_complex_float* e, const float* df, + const lapack_complex_float* ef, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zptrfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* d, + const lapack_complex_double* e, + const double* df, + const lapack_complex_double* ef, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sptsv_work( int matrix_order, lapack_int n, lapack_int nrhs, + float* d, float* e, float* b, lapack_int ldb ); +lapack_int LAPACKE_dptsv_work( int matrix_order, lapack_int n, lapack_int nrhs, + double* d, double* e, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cptsv_work( int matrix_order, lapack_int n, lapack_int nrhs, + float* d, lapack_complex_float* e, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zptsv_work( int matrix_order, lapack_int n, lapack_int nrhs, + double* d, lapack_complex_double* e, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_sptsvx_work( int matrix_order, char fact, lapack_int n, + lapack_int nrhs, const float* d, const float* e, + float* df, float* ef, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + float* work ); +lapack_int LAPACKE_dptsvx_work( int matrix_order, char fact, lapack_int n, + lapack_int nrhs, const double* d, + const double* e, double* df, double* ef, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* rcond, double* ferr, + double* berr, double* work ); +lapack_int LAPACKE_cptsvx_work( int matrix_order, char fact, lapack_int n, + lapack_int nrhs, const float* d, + const lapack_complex_float* e, float* df, + lapack_complex_float* ef, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zptsvx_work( int matrix_order, char fact, lapack_int n, + lapack_int nrhs, const double* d, + const lapack_complex_double* e, double* df, + lapack_complex_double* ef, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_spttrf_work( lapack_int n, float* d, float* e ); +lapack_int LAPACKE_dpttrf_work( lapack_int n, double* d, double* e ); +lapack_int LAPACKE_cpttrf_work( lapack_int n, float* d, + lapack_complex_float* e ); +lapack_int LAPACKE_zpttrf_work( lapack_int n, double* d, + lapack_complex_double* e ); + +lapack_int LAPACKE_spttrs_work( int matrix_order, lapack_int n, lapack_int nrhs, + const float* d, const float* e, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dpttrs_work( int matrix_order, lapack_int n, lapack_int nrhs, + const double* d, const double* e, double* b, + lapack_int ldb ); +lapack_int LAPACKE_cpttrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* d, + const lapack_complex_float* e, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zpttrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* d, + const lapack_complex_double* e, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_ssbev_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int kd, float* ab, + lapack_int ldab, float* w, float* z, + lapack_int ldz, float* work ); +lapack_int LAPACKE_dsbev_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int kd, double* ab, + lapack_int ldab, double* w, double* z, + lapack_int ldz, double* work ); + +lapack_int LAPACKE_ssbevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int kd, float* ab, + lapack_int ldab, float* w, float* z, + lapack_int ldz, float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_dsbevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int kd, double* ab, + lapack_int ldab, double* w, double* z, + lapack_int ldz, double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_ssbevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, lapack_int kd, + float* ab, lapack_int ldab, float* q, + lapack_int ldq, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, + lapack_int ldz, float* work, lapack_int* iwork, + lapack_int* ifail ); +lapack_int LAPACKE_dsbevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, lapack_int kd, + double* ab, lapack_int ldab, double* q, + lapack_int ldq, double vl, double vu, + lapack_int il, lapack_int iu, double abstol, + lapack_int* m, double* w, double* z, + lapack_int ldz, double* work, lapack_int* iwork, + lapack_int* ifail ); + +lapack_int LAPACKE_ssbgst_work( int matrix_order, char vect, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + float* ab, lapack_int ldab, const float* bb, + lapack_int ldbb, float* x, lapack_int ldx, + float* work ); +lapack_int LAPACKE_dsbgst_work( int matrix_order, char vect, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + double* ab, lapack_int ldab, const double* bb, + lapack_int ldbb, double* x, lapack_int ldx, + double* work ); + +lapack_int LAPACKE_ssbgv_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + float* ab, lapack_int ldab, float* bb, + lapack_int ldbb, float* w, float* z, + lapack_int ldz, float* work ); +lapack_int LAPACKE_dsbgv_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + double* ab, lapack_int ldab, double* bb, + lapack_int ldbb, double* w, double* z, + lapack_int ldz, double* work ); + +lapack_int LAPACKE_ssbgvd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + float* ab, lapack_int ldab, float* bb, + lapack_int ldbb, float* w, float* z, + lapack_int ldz, float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_dsbgvd_work( int matrix_order, char jobz, char uplo, + lapack_int n, lapack_int ka, lapack_int kb, + double* ab, lapack_int ldab, double* bb, + lapack_int ldbb, double* w, double* z, + lapack_int ldz, double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_ssbgvx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, lapack_int ka, + lapack_int kb, float* ab, lapack_int ldab, + float* bb, lapack_int ldbb, float* q, + lapack_int ldq, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, + lapack_int ldz, float* work, lapack_int* iwork, + lapack_int* ifail ); +lapack_int LAPACKE_dsbgvx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, lapack_int ka, + lapack_int kb, double* ab, lapack_int ldab, + double* bb, lapack_int ldbb, double* q, + lapack_int ldq, double vl, double vu, + lapack_int il, lapack_int iu, double abstol, + lapack_int* m, double* w, double* z, + lapack_int ldz, double* work, lapack_int* iwork, + lapack_int* ifail ); + +lapack_int LAPACKE_ssbtrd_work( int matrix_order, char vect, char uplo, + lapack_int n, lapack_int kd, float* ab, + lapack_int ldab, float* d, float* e, float* q, + lapack_int ldq, float* work ); +lapack_int LAPACKE_dsbtrd_work( int matrix_order, char vect, char uplo, + lapack_int n, lapack_int kd, double* ab, + lapack_int ldab, double* d, double* e, + double* q, lapack_int ldq, double* work ); + +lapack_int LAPACKE_ssfrk_work( int matrix_order, char transr, char uplo, + char trans, lapack_int n, lapack_int k, + float alpha, const float* a, lapack_int lda, + float beta, float* c ); +lapack_int LAPACKE_dsfrk_work( int matrix_order, char transr, char uplo, + char trans, lapack_int n, lapack_int k, + double alpha, const double* a, lapack_int lda, + double beta, double* c ); + +lapack_int LAPACKE_sspcon_work( int matrix_order, char uplo, lapack_int n, + const float* ap, const lapack_int* ipiv, + float anorm, float* rcond, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dspcon_work( int matrix_order, char uplo, lapack_int n, + const double* ap, const lapack_int* ipiv, + double anorm, double* rcond, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_cspcon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, + const lapack_int* ipiv, float anorm, + float* rcond, lapack_complex_float* work ); +lapack_int LAPACKE_zspcon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, + const lapack_int* ipiv, double anorm, + double* rcond, lapack_complex_double* work ); + +lapack_int LAPACKE_sspev_work( int matrix_order, char jobz, char uplo, + lapack_int n, float* ap, float* w, float* z, + lapack_int ldz, float* work ); +lapack_int LAPACKE_dspev_work( int matrix_order, char jobz, char uplo, + lapack_int n, double* ap, double* w, double* z, + lapack_int ldz, double* work ); + +lapack_int LAPACKE_sspevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, float* ap, float* w, float* z, + lapack_int ldz, float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_dspevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, double* ap, double* w, double* z, + lapack_int ldz, double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_sspevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, float* ap, float vl, + float vu, lapack_int il, lapack_int iu, + float abstol, lapack_int* m, float* w, float* z, + lapack_int ldz, float* work, lapack_int* iwork, + lapack_int* ifail ); +lapack_int LAPACKE_dspevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, double* ap, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + double* z, lapack_int ldz, double* work, + lapack_int* iwork, lapack_int* ifail ); + +lapack_int LAPACKE_sspgst_work( int matrix_order, lapack_int itype, char uplo, + lapack_int n, float* ap, const float* bp ); +lapack_int LAPACKE_dspgst_work( int matrix_order, lapack_int itype, char uplo, + lapack_int n, double* ap, const double* bp ); + +lapack_int LAPACKE_sspgv_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, float* ap, float* bp, + float* w, float* z, lapack_int ldz, + float* work ); +lapack_int LAPACKE_dspgv_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, double* ap, double* bp, + double* w, double* z, lapack_int ldz, + double* work ); + +lapack_int LAPACKE_sspgvd_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, float* ap, float* bp, + float* w, float* z, lapack_int ldz, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_dspgvd_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, double* ap, double* bp, + double* w, double* z, lapack_int ldz, + double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_sspgvx_work( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, float* ap, + float* bp, float vl, float vu, lapack_int il, + lapack_int iu, float abstol, lapack_int* m, + float* w, float* z, lapack_int ldz, float* work, + lapack_int* iwork, lapack_int* ifail ); +lapack_int LAPACKE_dspgvx_work( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, double* ap, + double* bp, double vl, double vu, lapack_int il, + lapack_int iu, double abstol, lapack_int* m, + double* w, double* z, lapack_int ldz, + double* work, lapack_int* iwork, + lapack_int* ifail ); + +lapack_int LAPACKE_ssprfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* ap, + const float* afp, const lapack_int* ipiv, + const float* b, lapack_int ldb, float* x, + lapack_int ldx, float* ferr, float* berr, + float* work, lapack_int* iwork ); +lapack_int LAPACKE_dsprfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* ap, + const double* afp, const lapack_int* ipiv, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* ferr, double* berr, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_csprfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + const lapack_complex_float* afp, + const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zsprfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, + const lapack_complex_double* ap, + const lapack_complex_double* afp, + const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_sspsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, float* ap, lapack_int* ipiv, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dspsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, double* ap, lapack_int* ipiv, + double* b, lapack_int ldb ); +lapack_int LAPACKE_cspsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* ap, + lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zspsv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* ap, + lapack_int* ipiv, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_sspsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, const float* ap, + float* afp, lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + float* work, lapack_int* iwork ); +lapack_int LAPACKE_dspsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, const double* ap, + double* afp, lapack_int* ipiv, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_cspsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* ap, + lapack_complex_float* afp, lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zspsvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* ap, + lapack_complex_double* afp, lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_ssptrd_work( int matrix_order, char uplo, lapack_int n, + float* ap, float* d, float* e, float* tau ); +lapack_int LAPACKE_dsptrd_work( int matrix_order, char uplo, lapack_int n, + double* ap, double* d, double* e, double* tau ); + +lapack_int LAPACKE_ssptrf_work( int matrix_order, char uplo, lapack_int n, + float* ap, lapack_int* ipiv ); +lapack_int LAPACKE_dsptrf_work( int matrix_order, char uplo, lapack_int n, + double* ap, lapack_int* ipiv ); +lapack_int LAPACKE_csptrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap, lapack_int* ipiv ); +lapack_int LAPACKE_zsptrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap, lapack_int* ipiv ); + +lapack_int LAPACKE_ssptri_work( int matrix_order, char uplo, lapack_int n, + float* ap, const lapack_int* ipiv, + float* work ); +lapack_int LAPACKE_dsptri_work( int matrix_order, char uplo, lapack_int n, + double* ap, const lapack_int* ipiv, + double* work ); +lapack_int LAPACKE_csptri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* ap, + const lapack_int* ipiv, + lapack_complex_float* work ); +lapack_int LAPACKE_zsptri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* ap, + const lapack_int* ipiv, + lapack_complex_double* work ); + +lapack_int LAPACKE_ssptrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* ap, + const lapack_int* ipiv, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dsptrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* ap, + const lapack_int* ipiv, double* b, + lapack_int ldb ); +lapack_int LAPACKE_csptrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* ap, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_zsptrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, + const lapack_complex_double* ap, + const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_sstebz_work( char range, char order, lapack_int n, float vl, + float vu, lapack_int il, lapack_int iu, + float abstol, const float* d, const float* e, + lapack_int* m, lapack_int* nsplit, float* w, + lapack_int* iblock, lapack_int* isplit, + float* work, lapack_int* iwork ); +lapack_int LAPACKE_dstebz_work( char range, char order, lapack_int n, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, const double* d, const double* e, + lapack_int* m, lapack_int* nsplit, double* w, + lapack_int* iblock, lapack_int* isplit, + double* work, lapack_int* iwork ); + +lapack_int LAPACKE_sstedc_work( int matrix_order, char compz, lapack_int n, + float* d, float* e, float* z, lapack_int ldz, + float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_dstedc_work( int matrix_order, char compz, lapack_int n, + double* d, double* e, double* z, lapack_int ldz, + double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_cstedc_work( int matrix_order, char compz, lapack_int n, + float* d, float* e, lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* work, + lapack_int lwork, float* rwork, + lapack_int lrwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_zstedc_work( int matrix_order, char compz, lapack_int n, + double* d, double* e, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* work, + lapack_int lwork, double* rwork, + lapack_int lrwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_sstegr_work( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, + float vu, lapack_int il, lapack_int iu, + float abstol, lapack_int* m, float* w, float* z, + lapack_int ldz, lapack_int* isuppz, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_dstegr_work( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + double* z, lapack_int ldz, lapack_int* isuppz, + double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_cstegr_work( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, + float vu, lapack_int il, lapack_int iu, + float abstol, lapack_int* m, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_int* isuppz, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_zstegr_work( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_int* isuppz, double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_sstein_work( int matrix_order, lapack_int n, const float* d, + const float* e, lapack_int m, const float* w, + const lapack_int* iblock, + const lapack_int* isplit, float* z, + lapack_int ldz, float* work, lapack_int* iwork, + lapack_int* ifailv ); +lapack_int LAPACKE_dstein_work( int matrix_order, lapack_int n, const double* d, + const double* e, lapack_int m, const double* w, + const lapack_int* iblock, + const lapack_int* isplit, double* z, + lapack_int ldz, double* work, lapack_int* iwork, + lapack_int* ifailv ); +lapack_int LAPACKE_cstein_work( int matrix_order, lapack_int n, const float* d, + const float* e, lapack_int m, const float* w, + const lapack_int* iblock, + const lapack_int* isplit, + lapack_complex_float* z, lapack_int ldz, + float* work, lapack_int* iwork, + lapack_int* ifailv ); +lapack_int LAPACKE_zstein_work( int matrix_order, lapack_int n, const double* d, + const double* e, lapack_int m, const double* w, + const lapack_int* iblock, + const lapack_int* isplit, + lapack_complex_double* z, lapack_int ldz, + double* work, lapack_int* iwork, + lapack_int* ifailv ); + +lapack_int LAPACKE_sstemr_work( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, + float vu, lapack_int il, lapack_int iu, + lapack_int* m, float* w, float* z, + lapack_int ldz, lapack_int nzc, + lapack_int* isuppz, lapack_logical* tryrac, + float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_dstemr_work( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + lapack_int* m, double* w, double* z, + lapack_int ldz, lapack_int nzc, + lapack_int* isuppz, lapack_logical* tryrac, + double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_cstemr_work( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, + float vu, lapack_int il, lapack_int iu, + lapack_int* m, float* w, + lapack_complex_float* z, lapack_int ldz, + lapack_int nzc, lapack_int* isuppz, + lapack_logical* tryrac, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_zstemr_work( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + lapack_int* m, double* w, + lapack_complex_double* z, lapack_int ldz, + lapack_int nzc, lapack_int* isuppz, + lapack_logical* tryrac, double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_ssteqr_work( int matrix_order, char compz, lapack_int n, + float* d, float* e, float* z, lapack_int ldz, + float* work ); +lapack_int LAPACKE_dsteqr_work( int matrix_order, char compz, lapack_int n, + double* d, double* e, double* z, lapack_int ldz, + double* work ); +lapack_int LAPACKE_csteqr_work( int matrix_order, char compz, lapack_int n, + float* d, float* e, lapack_complex_float* z, + lapack_int ldz, float* work ); +lapack_int LAPACKE_zsteqr_work( int matrix_order, char compz, lapack_int n, + double* d, double* e, lapack_complex_double* z, + lapack_int ldz, double* work ); + +lapack_int LAPACKE_ssterf_work( lapack_int n, float* d, float* e ); +lapack_int LAPACKE_dsterf_work( lapack_int n, double* d, double* e ); + +lapack_int LAPACKE_sstev_work( int matrix_order, char jobz, lapack_int n, + float* d, float* e, float* z, lapack_int ldz, + float* work ); +lapack_int LAPACKE_dstev_work( int matrix_order, char jobz, lapack_int n, + double* d, double* e, double* z, lapack_int ldz, + double* work ); + +lapack_int LAPACKE_sstevd_work( int matrix_order, char jobz, lapack_int n, + float* d, float* e, float* z, lapack_int ldz, + float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_dstevd_work( int matrix_order, char jobz, lapack_int n, + double* d, double* e, double* z, lapack_int ldz, + double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_sstevr_work( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, + float vu, lapack_int il, lapack_int iu, + float abstol, lapack_int* m, float* w, float* z, + lapack_int ldz, lapack_int* isuppz, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_dstevr_work( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + double* z, lapack_int ldz, lapack_int* isuppz, + double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_sstevx_work( int matrix_order, char jobz, char range, + lapack_int n, float* d, float* e, float vl, + float vu, lapack_int il, lapack_int iu, + float abstol, lapack_int* m, float* w, float* z, + lapack_int ldz, float* work, lapack_int* iwork, + lapack_int* ifail ); +lapack_int LAPACKE_dstevx_work( int matrix_order, char jobz, char range, + lapack_int n, double* d, double* e, double vl, + double vu, lapack_int il, lapack_int iu, + double abstol, lapack_int* m, double* w, + double* z, lapack_int ldz, double* work, + lapack_int* iwork, lapack_int* ifail ); + +lapack_int LAPACKE_ssycon_work( int matrix_order, char uplo, lapack_int n, + const float* a, lapack_int lda, + const lapack_int* ipiv, float anorm, + float* rcond, float* work, lapack_int* iwork ); +lapack_int LAPACKE_dsycon_work( int matrix_order, char uplo, lapack_int n, + const double* a, lapack_int lda, + const lapack_int* ipiv, double anorm, + double* rcond, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_csycon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, float anorm, + float* rcond, lapack_complex_float* work ); +lapack_int LAPACKE_zsycon_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, double anorm, + double* rcond, lapack_complex_double* work ); + +lapack_int LAPACKE_ssyequb_work( int matrix_order, char uplo, lapack_int n, + const float* a, lapack_int lda, float* s, + float* scond, float* amax, float* work ); +lapack_int LAPACKE_dsyequb_work( int matrix_order, char uplo, lapack_int n, + const double* a, lapack_int lda, double* s, + double* scond, double* amax, double* work ); +lapack_int LAPACKE_csyequb_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* s, float* scond, float* amax, + lapack_complex_float* work ); +lapack_int LAPACKE_zsyequb_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* s, double* scond, double* amax, + lapack_complex_double* work ); + +lapack_int LAPACKE_ssyev_work( int matrix_order, char jobz, char uplo, + lapack_int n, float* a, lapack_int lda, float* w, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dsyev_work( int matrix_order, char jobz, char uplo, + lapack_int n, double* a, lapack_int lda, + double* w, double* work, lapack_int lwork ); + +lapack_int LAPACKE_ssyevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, float* a, lapack_int lda, + float* w, float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_dsyevd_work( int matrix_order, char jobz, char uplo, + lapack_int n, double* a, lapack_int lda, + double* w, double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_ssyevr_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, float* a, + lapack_int lda, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, + lapack_int ldz, lapack_int* isuppz, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_dsyevr_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, double* a, + lapack_int lda, double vl, double vu, + lapack_int il, lapack_int iu, double abstol, + lapack_int* m, double* w, double* z, + lapack_int ldz, lapack_int* isuppz, + double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_ssyevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, float* a, + lapack_int lda, float vl, float vu, + lapack_int il, lapack_int iu, float abstol, + lapack_int* m, float* w, float* z, + lapack_int ldz, float* work, lapack_int lwork, + lapack_int* iwork, lapack_int* ifail ); +lapack_int LAPACKE_dsyevx_work( int matrix_order, char jobz, char range, + char uplo, lapack_int n, double* a, + lapack_int lda, double vl, double vu, + lapack_int il, lapack_int iu, double abstol, + lapack_int* m, double* w, double* z, + lapack_int ldz, double* work, lapack_int lwork, + lapack_int* iwork, lapack_int* ifail ); + +lapack_int LAPACKE_ssygst_work( int matrix_order, lapack_int itype, char uplo, + lapack_int n, float* a, lapack_int lda, + const float* b, lapack_int ldb ); +lapack_int LAPACKE_dsygst_work( int matrix_order, lapack_int itype, char uplo, + lapack_int n, double* a, lapack_int lda, + const double* b, lapack_int ldb ); + +lapack_int LAPACKE_ssygv_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, + float* w, float* work, lapack_int lwork ); +lapack_int LAPACKE_dsygv_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, + double* w, double* work, lapack_int lwork ); + +lapack_int LAPACKE_ssygvd_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, + float* w, float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_dsygvd_work( int matrix_order, lapack_int itype, char jobz, + char uplo, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, + double* w, double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_ssygvx_work( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, + float vl, float vu, lapack_int il, + lapack_int iu, float abstol, lapack_int* m, + float* w, float* z, lapack_int ldz, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int* ifail ); +lapack_int LAPACKE_dsygvx_work( int matrix_order, lapack_int itype, char jobz, + char range, char uplo, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, + double vl, double vu, lapack_int il, + lapack_int iu, double abstol, lapack_int* m, + double* w, double* z, lapack_int ldz, + double* work, lapack_int lwork, + lapack_int* iwork, lapack_int* ifail ); + +lapack_int LAPACKE_ssyrfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const float* af, lapack_int ldaf, + const lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* ferr, float* berr, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dsyrfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* a, + lapack_int lda, const double* af, + lapack_int ldaf, const lapack_int* ipiv, + const double* b, lapack_int ldb, double* x, + lapack_int ldx, double* ferr, double* berr, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_csyrfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_zsyrfs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_complex_double* af, + lapack_int ldaf, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_ssyrfsx_work( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, const float* a, + lapack_int lda, const float* af, + lapack_int ldaf, const lapack_int* ipiv, + const float* s, const float* b, lapack_int ldb, + float* x, lapack_int ldx, float* rcond, + float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dsyrfsx_work( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, const double* a, + lapack_int lda, const double* af, + lapack_int ldaf, const lapack_int* ipiv, + const double* s, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_csyrfsx_work( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* af, + lapack_int ldaf, const lapack_int* ipiv, + const float* s, const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zsyrfsx_work( int matrix_order, char uplo, char equed, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* af, + lapack_int ldaf, const lapack_int* ipiv, + const double* s, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_ssysv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, float* a, lapack_int lda, + lapack_int* ipiv, float* b, lapack_int ldb, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dsysv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, double* a, lapack_int lda, + lapack_int* ipiv, double* b, lapack_int ldb, + double* work, lapack_int lwork ); +lapack_int LAPACKE_csysv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_float* a, + lapack_int lda, lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zsysv_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, lapack_complex_double* a, + lapack_int lda, lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_ssysvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, const float* a, + lapack_int lda, float* af, lapack_int ldaf, + lapack_int* ipiv, const float* b, + lapack_int ldb, float* x, lapack_int ldx, + float* rcond, float* ferr, float* berr, + float* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_dsysvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, const double* a, + lapack_int lda, double* af, lapack_int ldaf, + lapack_int* ipiv, const double* b, + lapack_int ldb, double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + double* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_csysvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + lapack_int* ipiv, const lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* x, + lapack_int ldx, float* rcond, float* ferr, + float* berr, lapack_complex_float* work, + lapack_int lwork, float* rwork ); +lapack_int LAPACKE_zsysvx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + lapack_int* ipiv, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, lapack_int lwork, + double* rwork ); + +lapack_int LAPACKE_ssysvxx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, float* a, + lapack_int lda, float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* s, + float* b, lapack_int ldb, float* x, + lapack_int ldx, float* rcond, float* rpvgrw, + float* berr, lapack_int n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int nparams, float* params, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dsysvxx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, double* a, + lapack_int lda, double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* s, + double* b, lapack_int ldb, double* x, + lapack_int ldx, double* rcond, double* rpvgrw, + double* berr, lapack_int n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int nparams, double* params, + double* work, lapack_int* iwork ); +lapack_int LAPACKE_csysvxx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, float* s, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* x, lapack_int ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int nparams, + float* params, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_zsysvxx_work( int matrix_order, char fact, char uplo, + lapack_int n, lapack_int nrhs, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* af, lapack_int ldaf, + lapack_int* ipiv, char* equed, double* s, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* x, lapack_int ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int nparams, + double* params, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_ssytrd_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda, float* d, float* e, + float* tau, float* work, lapack_int lwork ); +lapack_int LAPACKE_dsytrd_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda, double* d, double* e, + double* tau, double* work, lapack_int lwork ); + +lapack_int LAPACKE_ssytrf_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda, lapack_int* ipiv, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dsytrf_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda, lapack_int* ipiv, + double* work, lapack_int lwork ); +lapack_int LAPACKE_csytrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_int* ipiv, lapack_complex_float* work, + lapack_int lwork ); +lapack_int LAPACKE_zsytrf_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_int* ipiv, lapack_complex_double* work, + lapack_int lwork ); + +lapack_int LAPACKE_ssytri_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda, + const lapack_int* ipiv, float* work ); +lapack_int LAPACKE_dsytri_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda, + const lapack_int* ipiv, double* work ); +lapack_int LAPACKE_csytri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_float* work ); +lapack_int LAPACKE_zsytri_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_double* work ); + +lapack_int LAPACKE_ssytrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const lapack_int* ipiv, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dsytrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* a, + lapack_int lda, const lapack_int* ipiv, + double* b, lapack_int ldb ); +lapack_int LAPACKE_csytrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_zsytrs_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_stbcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, lapack_int kd, + const float* ab, lapack_int ldab, float* rcond, + float* work, lapack_int* iwork ); +lapack_int LAPACKE_dtbcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, lapack_int kd, + const double* ab, lapack_int ldab, + double* rcond, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_ctbcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, lapack_int kd, + const lapack_complex_float* ab, lapack_int ldab, + float* rcond, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_ztbcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, lapack_int kd, + const lapack_complex_double* ab, + lapack_int ldab, double* rcond, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_stbrfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int kd, + lapack_int nrhs, const float* ab, + lapack_int ldab, const float* b, lapack_int ldb, + const float* x, lapack_int ldx, float* ferr, + float* berr, float* work, lapack_int* iwork ); +lapack_int LAPACKE_dtbrfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int kd, + lapack_int nrhs, const double* ab, + lapack_int ldab, const double* b, + lapack_int ldb, const double* x, lapack_int ldx, + double* ferr, double* berr, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_ctbrfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int kd, + lapack_int nrhs, const lapack_complex_float* ab, + lapack_int ldab, const lapack_complex_float* b, + lapack_int ldb, const lapack_complex_float* x, + lapack_int ldx, float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_ztbrfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int kd, + lapack_int nrhs, + const lapack_complex_double* ab, + lapack_int ldab, const lapack_complex_double* b, + lapack_int ldb, const lapack_complex_double* x, + lapack_int ldx, double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_stbtrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int kd, + lapack_int nrhs, const float* ab, + lapack_int ldab, float* b, lapack_int ldb ); +lapack_int LAPACKE_dtbtrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int kd, + lapack_int nrhs, const double* ab, + lapack_int ldab, double* b, lapack_int ldb ); +lapack_int LAPACKE_ctbtrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int kd, + lapack_int nrhs, const lapack_complex_float* ab, + lapack_int ldab, lapack_complex_float* b, + lapack_int ldb ); +lapack_int LAPACKE_ztbtrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int kd, + lapack_int nrhs, + const lapack_complex_double* ab, + lapack_int ldab, lapack_complex_double* b, + lapack_int ldb ); + +lapack_int LAPACKE_stfsm_work( int matrix_order, char transr, char side, + char uplo, char trans, char diag, lapack_int m, + lapack_int n, float alpha, const float* a, + float* b, lapack_int ldb ); +lapack_int LAPACKE_dtfsm_work( int matrix_order, char transr, char side, + char uplo, char trans, char diag, lapack_int m, + lapack_int n, double alpha, const double* a, + double* b, lapack_int ldb ); +lapack_int LAPACKE_ctfsm_work( int matrix_order, char transr, char side, + char uplo, char trans, char diag, lapack_int m, + lapack_int n, lapack_complex_float alpha, + const lapack_complex_float* a, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_ztfsm_work( int matrix_order, char transr, char side, + char uplo, char trans, char diag, lapack_int m, + lapack_int n, lapack_complex_double alpha, + const lapack_complex_double* a, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_stftri_work( int matrix_order, char transr, char uplo, + char diag, lapack_int n, float* a ); +lapack_int LAPACKE_dtftri_work( int matrix_order, char transr, char uplo, + char diag, lapack_int n, double* a ); +lapack_int LAPACKE_ctftri_work( int matrix_order, char transr, char uplo, + char diag, lapack_int n, + lapack_complex_float* a ); +lapack_int LAPACKE_ztftri_work( int matrix_order, char transr, char uplo, + char diag, lapack_int n, + lapack_complex_double* a ); + +lapack_int LAPACKE_stfttp_work( int matrix_order, char transr, char uplo, + lapack_int n, const float* arf, float* ap ); +lapack_int LAPACKE_dtfttp_work( int matrix_order, char transr, char uplo, + lapack_int n, const double* arf, double* ap ); +lapack_int LAPACKE_ctfttp_work( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_float* arf, + lapack_complex_float* ap ); +lapack_int LAPACKE_ztfttp_work( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_double* arf, + lapack_complex_double* ap ); + +lapack_int LAPACKE_stfttr_work( int matrix_order, char transr, char uplo, + lapack_int n, const float* arf, float* a, + lapack_int lda ); +lapack_int LAPACKE_dtfttr_work( int matrix_order, char transr, char uplo, + lapack_int n, const double* arf, double* a, + lapack_int lda ); +lapack_int LAPACKE_ctfttr_work( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_float* arf, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_ztfttr_work( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_double* arf, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_stgevc_work( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + const float* s, lapack_int lds, const float* p, + lapack_int ldp, float* vl, lapack_int ldvl, + float* vr, lapack_int ldvr, lapack_int mm, + lapack_int* m, float* work ); +lapack_int LAPACKE_dtgevc_work( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + const double* s, lapack_int lds, + const double* p, lapack_int ldp, double* vl, + lapack_int ldvl, double* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, double* work ); +lapack_int LAPACKE_ctgevc_work( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_float* s, lapack_int lds, + const lapack_complex_float* p, lapack_int ldp, + lapack_complex_float* vl, lapack_int ldvl, + lapack_complex_float* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_ztgevc_work( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_double* s, lapack_int lds, + const lapack_complex_double* p, lapack_int ldp, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_stgexc_work( int matrix_order, lapack_logical wantq, + lapack_logical wantz, lapack_int n, float* a, + lapack_int lda, float* b, lapack_int ldb, + float* q, lapack_int ldq, float* z, + lapack_int ldz, lapack_int* ifst, + lapack_int* ilst, float* work, + lapack_int lwork ); +lapack_int LAPACKE_dtgexc_work( int matrix_order, lapack_logical wantq, + lapack_logical wantz, lapack_int n, double* a, + lapack_int lda, double* b, lapack_int ldb, + double* q, lapack_int ldq, double* z, + lapack_int ldz, lapack_int* ifst, + lapack_int* ilst, double* work, + lapack_int lwork ); +lapack_int LAPACKE_ctgexc_work( int matrix_order, lapack_logical wantq, + lapack_logical wantz, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* z, lapack_int ldz, + lapack_int ifst, lapack_int ilst ); +lapack_int LAPACKE_ztgexc_work( int matrix_order, lapack_logical wantq, + lapack_logical wantz, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* z, lapack_int ldz, + lapack_int ifst, lapack_int ilst ); + +lapack_int LAPACKE_stgsen_work( int matrix_order, lapack_int ijob, + lapack_logical wantq, lapack_logical wantz, + const lapack_logical* select, lapack_int n, + float* a, lapack_int lda, float* b, + lapack_int ldb, float* alphar, float* alphai, + float* beta, float* q, lapack_int ldq, float* z, + lapack_int ldz, lapack_int* m, float* pl, + float* pr, float* dif, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); +lapack_int LAPACKE_dtgsen_work( int matrix_order, lapack_int ijob, + lapack_logical wantq, lapack_logical wantz, + const lapack_logical* select, lapack_int n, + double* a, lapack_int lda, double* b, + lapack_int ldb, double* alphar, double* alphai, + double* beta, double* q, lapack_int ldq, + double* z, lapack_int ldz, lapack_int* m, + double* pl, double* pr, double* dif, + double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_ctgsen_work( int matrix_order, lapack_int ijob, + lapack_logical wantq, lapack_logical wantz, + const lapack_logical* select, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* alpha, + lapack_complex_float* beta, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* z, lapack_int ldz, + lapack_int* m, float* pl, float* pr, float* dif, + lapack_complex_float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_ztgsen_work( int matrix_order, lapack_int ijob, + lapack_logical wantq, lapack_logical wantz, + const lapack_logical* select, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* alpha, + lapack_complex_double* beta, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* z, lapack_int ldz, + lapack_int* m, double* pl, double* pr, + double* dif, lapack_complex_double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_stgsja_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int p, + lapack_int n, lapack_int k, lapack_int l, + float* a, lapack_int lda, float* b, + lapack_int ldb, float tola, float tolb, + float* alpha, float* beta, float* u, + lapack_int ldu, float* v, lapack_int ldv, + float* q, lapack_int ldq, float* work, + lapack_int* ncycle ); +lapack_int LAPACKE_dtgsja_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int p, + lapack_int n, lapack_int k, lapack_int l, + double* a, lapack_int lda, double* b, + lapack_int ldb, double tola, double tolb, + double* alpha, double* beta, double* u, + lapack_int ldu, double* v, lapack_int ldv, + double* q, lapack_int ldq, double* work, + lapack_int* ncycle ); +lapack_int LAPACKE_ctgsja_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int p, + lapack_int n, lapack_int k, lapack_int l, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + float tola, float tolb, float* alpha, + float* beta, lapack_complex_float* u, + lapack_int ldu, lapack_complex_float* v, + lapack_int ldv, lapack_complex_float* q, + lapack_int ldq, lapack_complex_float* work, + lapack_int* ncycle ); +lapack_int LAPACKE_ztgsja_work( int matrix_order, char jobu, char jobv, + char jobq, lapack_int m, lapack_int p, + lapack_int n, lapack_int k, lapack_int l, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + double tola, double tolb, double* alpha, + double* beta, lapack_complex_double* u, + lapack_int ldu, lapack_complex_double* v, + lapack_int ldv, lapack_complex_double* q, + lapack_int ldq, lapack_complex_double* work, + lapack_int* ncycle ); + +lapack_int LAPACKE_stgsna_work( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const float* a, lapack_int lda, const float* b, + lapack_int ldb, const float* vl, + lapack_int ldvl, const float* vr, + lapack_int ldvr, float* s, float* dif, + lapack_int mm, lapack_int* m, float* work, + lapack_int lwork, lapack_int* iwork ); +lapack_int LAPACKE_dtgsna_work( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const double* a, lapack_int lda, + const double* b, lapack_int ldb, + const double* vl, lapack_int ldvl, + const double* vr, lapack_int ldvr, double* s, + double* dif, lapack_int mm, lapack_int* m, + double* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_ctgsna_work( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + const lapack_complex_float* vl, lapack_int ldvl, + const lapack_complex_float* vr, lapack_int ldvr, + float* s, float* dif, lapack_int mm, + lapack_int* m, lapack_complex_float* work, + lapack_int lwork, lapack_int* iwork ); +lapack_int LAPACKE_ztgsna_work( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* b, lapack_int ldb, + const lapack_complex_double* vl, + lapack_int ldvl, + const lapack_complex_double* vr, + lapack_int ldvr, double* s, double* dif, + lapack_int mm, lapack_int* m, + lapack_complex_double* work, lapack_int lwork, + lapack_int* iwork ); + +lapack_int LAPACKE_stgsyl_work( int matrix_order, char trans, lapack_int ijob, + lapack_int m, lapack_int n, const float* a, + lapack_int lda, const float* b, lapack_int ldb, + float* c, lapack_int ldc, const float* d, + lapack_int ldd, const float* e, lapack_int lde, + float* f, lapack_int ldf, float* scale, + float* dif, float* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_dtgsyl_work( int matrix_order, char trans, lapack_int ijob, + lapack_int m, lapack_int n, const double* a, + lapack_int lda, const double* b, lapack_int ldb, + double* c, lapack_int ldc, const double* d, + lapack_int ldd, const double* e, lapack_int lde, + double* f, lapack_int ldf, double* scale, + double* dif, double* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_ctgsyl_work( int matrix_order, char trans, lapack_int ijob, + lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* c, lapack_int ldc, + const lapack_complex_float* d, lapack_int ldd, + const lapack_complex_float* e, lapack_int lde, + lapack_complex_float* f, lapack_int ldf, + float* scale, float* dif, + lapack_complex_float* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_ztgsyl_work( int matrix_order, char trans, lapack_int ijob, + lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* c, lapack_int ldc, + const lapack_complex_double* d, lapack_int ldd, + const lapack_complex_double* e, lapack_int lde, + lapack_complex_double* f, lapack_int ldf, + double* scale, double* dif, + lapack_complex_double* work, lapack_int lwork, + lapack_int* iwork ); + +lapack_int LAPACKE_stpcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, const float* ap, + float* rcond, float* work, lapack_int* iwork ); +lapack_int LAPACKE_dtpcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, const double* ap, + double* rcond, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_ctpcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, + const lapack_complex_float* ap, float* rcond, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_ztpcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, + const lapack_complex_double* ap, double* rcond, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_stprfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const float* ap, const float* b, lapack_int ldb, + const float* x, lapack_int ldx, float* ferr, + float* berr, float* work, lapack_int* iwork ); +lapack_int LAPACKE_dtprfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const double* ap, const double* b, + lapack_int ldb, const double* x, lapack_int ldx, + double* ferr, double* berr, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_ctprfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const lapack_complex_float* ap, + const lapack_complex_float* b, lapack_int ldb, + const lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_ztprfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const lapack_complex_double* ap, + const lapack_complex_double* b, lapack_int ldb, + const lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_stptri_work( int matrix_order, char uplo, char diag, + lapack_int n, float* ap ); +lapack_int LAPACKE_dtptri_work( int matrix_order, char uplo, char diag, + lapack_int n, double* ap ); +lapack_int LAPACKE_ctptri_work( int matrix_order, char uplo, char diag, + lapack_int n, lapack_complex_float* ap ); +lapack_int LAPACKE_ztptri_work( int matrix_order, char uplo, char diag, + lapack_int n, lapack_complex_double* ap ); + +lapack_int LAPACKE_stptrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const float* ap, float* b, lapack_int ldb ); +lapack_int LAPACKE_dtptrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const double* ap, double* b, lapack_int ldb ); +lapack_int LAPACKE_ctptrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const lapack_complex_float* ap, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_ztptrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const lapack_complex_double* ap, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_stpttf_work( int matrix_order, char transr, char uplo, + lapack_int n, const float* ap, float* arf ); +lapack_int LAPACKE_dtpttf_work( int matrix_order, char transr, char uplo, + lapack_int n, const double* ap, double* arf ); +lapack_int LAPACKE_ctpttf_work( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_float* ap, + lapack_complex_float* arf ); +lapack_int LAPACKE_ztpttf_work( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_double* ap, + lapack_complex_double* arf ); + +lapack_int LAPACKE_stpttr_work( int matrix_order, char uplo, lapack_int n, + const float* ap, float* a, lapack_int lda ); +lapack_int LAPACKE_dtpttr_work( int matrix_order, char uplo, lapack_int n, + const double* ap, double* a, lapack_int lda ); +lapack_int LAPACKE_ctpttr_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_ztpttr_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_strcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, const float* a, + lapack_int lda, float* rcond, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dtrcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, const double* a, + lapack_int lda, double* rcond, double* work, + lapack_int* iwork ); +lapack_int LAPACKE_ctrcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + float* rcond, lapack_complex_float* work, + float* rwork ); +lapack_int LAPACKE_ztrcon_work( int matrix_order, char norm, char uplo, + char diag, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + double* rcond, lapack_complex_double* work, + double* rwork ); + +lapack_int LAPACKE_strevc_work( int matrix_order, char side, char howmny, + lapack_logical* select, lapack_int n, + const float* t, lapack_int ldt, float* vl, + lapack_int ldvl, float* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, float* work ); +lapack_int LAPACKE_dtrevc_work( int matrix_order, char side, char howmny, + lapack_logical* select, lapack_int n, + const double* t, lapack_int ldt, double* vl, + lapack_int ldvl, double* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, double* work ); +lapack_int LAPACKE_ctrevc_work( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* vl, lapack_int ldvl, + lapack_complex_float* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_ztrevc_work( int matrix_order, char side, char howmny, + const lapack_logical* select, lapack_int n, + lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* vl, lapack_int ldvl, + lapack_complex_double* vr, lapack_int ldvr, + lapack_int mm, lapack_int* m, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_strexc_work( int matrix_order, char compq, lapack_int n, + float* t, lapack_int ldt, float* q, + lapack_int ldq, lapack_int* ifst, + lapack_int* ilst, float* work ); +lapack_int LAPACKE_dtrexc_work( int matrix_order, char compq, lapack_int n, + double* t, lapack_int ldt, double* q, + lapack_int ldq, lapack_int* ifst, + lapack_int* ilst, double* work ); +lapack_int LAPACKE_ctrexc_work( int matrix_order, char compq, lapack_int n, + lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* q, lapack_int ldq, + lapack_int ifst, lapack_int ilst ); +lapack_int LAPACKE_ztrexc_work( int matrix_order, char compq, lapack_int n, + lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* q, lapack_int ldq, + lapack_int ifst, lapack_int ilst ); + +lapack_int LAPACKE_strrfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const float* a, lapack_int lda, const float* b, + lapack_int ldb, const float* x, lapack_int ldx, + float* ferr, float* berr, float* work, + lapack_int* iwork ); +lapack_int LAPACKE_dtrrfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const double* a, lapack_int lda, + const double* b, lapack_int ldb, + const double* x, lapack_int ldx, double* ferr, + double* berr, double* work, lapack_int* iwork ); +lapack_int LAPACKE_ctrrfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + const lapack_complex_float* x, lapack_int ldx, + float* ferr, float* berr, + lapack_complex_float* work, float* rwork ); +lapack_int LAPACKE_ztrrfs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* b, lapack_int ldb, + const lapack_complex_double* x, lapack_int ldx, + double* ferr, double* berr, + lapack_complex_double* work, double* rwork ); + +lapack_int LAPACKE_strsen_work( int matrix_order, char job, char compq, + const lapack_logical* select, lapack_int n, + float* t, lapack_int ldt, float* q, + lapack_int ldq, float* wr, float* wi, + lapack_int* m, float* s, float* sep, + float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_dtrsen_work( int matrix_order, char job, char compq, + const lapack_logical* select, lapack_int n, + double* t, lapack_int ldt, double* q, + lapack_int ldq, double* wr, double* wi, + lapack_int* m, double* s, double* sep, + double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); +lapack_int LAPACKE_ctrsen_work( int matrix_order, char job, char compq, + const lapack_logical* select, lapack_int n, + lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* w, lapack_int* m, + float* s, float* sep, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_ztrsen_work( int matrix_order, char job, char compq, + const lapack_logical* select, lapack_int n, + lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* w, lapack_int* m, + double* s, double* sep, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_strsna_work( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const float* t, lapack_int ldt, const float* vl, + lapack_int ldvl, const float* vr, + lapack_int ldvr, float* s, float* sep, + lapack_int mm, lapack_int* m, float* work, + lapack_int ldwork, lapack_int* iwork ); +lapack_int LAPACKE_dtrsna_work( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const double* t, lapack_int ldt, + const double* vl, lapack_int ldvl, + const double* vr, lapack_int ldvr, double* s, + double* sep, lapack_int mm, lapack_int* m, + double* work, lapack_int ldwork, + lapack_int* iwork ); +lapack_int LAPACKE_ctrsna_work( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_float* t, lapack_int ldt, + const lapack_complex_float* vl, lapack_int ldvl, + const lapack_complex_float* vr, lapack_int ldvr, + float* s, float* sep, lapack_int mm, + lapack_int* m, lapack_complex_float* work, + lapack_int ldwork, float* rwork ); +lapack_int LAPACKE_ztrsna_work( int matrix_order, char job, char howmny, + const lapack_logical* select, lapack_int n, + const lapack_complex_double* t, lapack_int ldt, + const lapack_complex_double* vl, + lapack_int ldvl, + const lapack_complex_double* vr, + lapack_int ldvr, double* s, double* sep, + lapack_int mm, lapack_int* m, + lapack_complex_double* work, lapack_int ldwork, + double* rwork ); + +lapack_int LAPACKE_strsyl_work( int matrix_order, char trana, char tranb, + lapack_int isgn, lapack_int m, lapack_int n, + const float* a, lapack_int lda, const float* b, + lapack_int ldb, float* c, lapack_int ldc, + float* scale ); +lapack_int LAPACKE_dtrsyl_work( int matrix_order, char trana, char tranb, + lapack_int isgn, lapack_int m, lapack_int n, + const double* a, lapack_int lda, + const double* b, lapack_int ldb, double* c, + lapack_int ldc, double* scale ); +lapack_int LAPACKE_ctrsyl_work( int matrix_order, char trana, char tranb, + lapack_int isgn, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* c, lapack_int ldc, + float* scale ); +lapack_int LAPACKE_ztrsyl_work( int matrix_order, char trana, char tranb, + lapack_int isgn, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* c, lapack_int ldc, + double* scale ); + +lapack_int LAPACKE_strtri_work( int matrix_order, char uplo, char diag, + lapack_int n, float* a, lapack_int lda ); +lapack_int LAPACKE_dtrtri_work( int matrix_order, char uplo, char diag, + lapack_int n, double* a, lapack_int lda ); +lapack_int LAPACKE_ctrtri_work( int matrix_order, char uplo, char diag, + lapack_int n, lapack_complex_float* a, + lapack_int lda ); +lapack_int LAPACKE_ztrtri_work( int matrix_order, char uplo, char diag, + lapack_int n, lapack_complex_double* a, + lapack_int lda ); + +lapack_int LAPACKE_strtrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const float* a, lapack_int lda, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dtrtrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const double* a, lapack_int lda, double* b, + lapack_int ldb ); +lapack_int LAPACKE_ctrtrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_ztrtrs_work( int matrix_order, char uplo, char trans, + char diag, lapack_int n, lapack_int nrhs, + const lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_strttf_work( int matrix_order, char transr, char uplo, + lapack_int n, const float* a, lapack_int lda, + float* arf ); +lapack_int LAPACKE_dtrttf_work( int matrix_order, char transr, char uplo, + lapack_int n, const double* a, lapack_int lda, + double* arf ); +lapack_int LAPACKE_ctrttf_work( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_float* a, + lapack_int lda, lapack_complex_float* arf ); +lapack_int LAPACKE_ztrttf_work( int matrix_order, char transr, char uplo, + lapack_int n, const lapack_complex_double* a, + lapack_int lda, lapack_complex_double* arf ); + +lapack_int LAPACKE_strttp_work( int matrix_order, char uplo, lapack_int n, + const float* a, lapack_int lda, float* ap ); +lapack_int LAPACKE_dtrttp_work( int matrix_order, char uplo, lapack_int n, + const double* a, lapack_int lda, double* ap ); +lapack_int LAPACKE_ctrttp_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + lapack_complex_float* ap ); +lapack_int LAPACKE_ztrttp_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + lapack_complex_double* ap ); + +lapack_int LAPACKE_stzrzf_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* tau, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dtzrzf_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* tau, + double* work, lapack_int lwork ); +lapack_int LAPACKE_ctzrzf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_ztzrzf_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cungbr_work( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int k, + lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zungbr_work( int matrix_order, char vect, lapack_int m, + lapack_int n, lapack_int k, + lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cunghr_work( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zunghr_work( int matrix_order, lapack_int n, lapack_int ilo, + lapack_int ihi, lapack_complex_double* a, + lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cunglq_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zunglq_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_double* a, + lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cungql_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zungql_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_double* a, + lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cungqr_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zungqr_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_double* a, + lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cungrq_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zungrq_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int k, lapack_complex_double* a, + lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cungtr_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zungtr_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cunmbr_work( int matrix_order, char vect, char side, + char trans, lapack_int m, lapack_int n, + lapack_int k, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zunmbr_work( int matrix_order, char vect, char side, + char trans, lapack_int m, lapack_int n, + lapack_int k, const lapack_complex_double* a, + lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cunmhr_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int ilo, + lapack_int ihi, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zunmhr_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int ilo, + lapack_int ihi, const lapack_complex_double* a, + lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cunmql_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zunmql_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cunmqr_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zunmqr_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cunmrq_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zunmrq_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cunmrz_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, const lapack_complex_float* a, + lapack_int lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zunmrz_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, const lapack_complex_double* a, + lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cunmtr_work( int matrix_order, char side, char uplo, + char trans, lapack_int m, lapack_int n, + const lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zunmtr_work( int matrix_order, char side, char uplo, + char trans, lapack_int m, lapack_int n, + const lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work, lapack_int lwork ); + +lapack_int LAPACKE_cupgtr_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_float* ap, + const lapack_complex_float* tau, + lapack_complex_float* q, lapack_int ldq, + lapack_complex_float* work ); +lapack_int LAPACKE_zupgtr_work( int matrix_order, char uplo, lapack_int n, + const lapack_complex_double* ap, + const lapack_complex_double* tau, + lapack_complex_double* q, lapack_int ldq, + lapack_complex_double* work ); + +lapack_int LAPACKE_cupmtr_work( int matrix_order, char side, char uplo, + char trans, lapack_int m, lapack_int n, + const lapack_complex_float* ap, + const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int ldc, + lapack_complex_float* work ); +lapack_int LAPACKE_zupmtr_work( int matrix_order, char side, char uplo, + char trans, lapack_int m, lapack_int n, + const lapack_complex_double* ap, + const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int ldc, + lapack_complex_double* work ); + +lapack_int LAPACKE_claghe( int matrix_order, lapack_int n, lapack_int k, + const float* d, lapack_complex_float* a, + lapack_int lda, lapack_int* iseed ); +lapack_int LAPACKE_zlaghe( int matrix_order, lapack_int n, lapack_int k, + const double* d, lapack_complex_double* a, + lapack_int lda, lapack_int* iseed ); + +lapack_int LAPACKE_slagsy( int matrix_order, lapack_int n, lapack_int k, + const float* d, float* a, lapack_int lda, + lapack_int* iseed ); +lapack_int LAPACKE_dlagsy( int matrix_order, lapack_int n, lapack_int k, + const double* d, double* a, lapack_int lda, + lapack_int* iseed ); +lapack_int LAPACKE_clagsy( int matrix_order, lapack_int n, lapack_int k, + const float* d, lapack_complex_float* a, + lapack_int lda, lapack_int* iseed ); +lapack_int LAPACKE_zlagsy( int matrix_order, lapack_int n, lapack_int k, + const double* d, lapack_complex_double* a, + lapack_int lda, lapack_int* iseed ); + +lapack_int LAPACKE_slapmr( int matrix_order, lapack_logical forwrd, + lapack_int m, lapack_int n, float* x, lapack_int ldx, + lapack_int* k ); +lapack_int LAPACKE_dlapmr( int matrix_order, lapack_logical forwrd, + lapack_int m, lapack_int n, double* x, + lapack_int ldx, lapack_int* k ); +lapack_int LAPACKE_clapmr( int matrix_order, lapack_logical forwrd, + lapack_int m, lapack_int n, lapack_complex_float* x, + lapack_int ldx, lapack_int* k ); +lapack_int LAPACKE_zlapmr( int matrix_order, lapack_logical forwrd, + lapack_int m, lapack_int n, lapack_complex_double* x, + lapack_int ldx, lapack_int* k ); + + +float LAPACKE_slapy2( float x, float y ); +double LAPACKE_dlapy2( double x, double y ); + +float LAPACKE_slapy3( float x, float y, float z ); +double LAPACKE_dlapy3( double x, double y, double z ); + +lapack_int LAPACKE_slartgp( float f, float g, float* cs, float* sn, float* r ); +lapack_int LAPACKE_dlartgp( double f, double g, double* cs, double* sn, + double* r ); + +lapack_int LAPACKE_slartgs( float x, float y, float sigma, float* cs, + float* sn ); +lapack_int LAPACKE_dlartgs( double x, double y, double sigma, double* cs, + double* sn ); + + +//LAPACK 3.3.0 +lapack_int LAPACKE_cbbcsd( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, lapack_int m, + lapack_int p, lapack_int q, float* theta, float* phi, + lapack_complex_float* u1, lapack_int ldu1, + lapack_complex_float* u2, lapack_int ldu2, + lapack_complex_float* v1t, lapack_int ldv1t, + lapack_complex_float* v2t, lapack_int ldv2t, + float* b11d, float* b11e, float* b12d, float* b12e, + float* b21d, float* b21e, float* b22d, float* b22e ); +lapack_int LAPACKE_cbbcsd_work( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, + lapack_int m, lapack_int p, lapack_int q, + float* theta, float* phi, + lapack_complex_float* u1, lapack_int ldu1, + lapack_complex_float* u2, lapack_int ldu2, + lapack_complex_float* v1t, lapack_int ldv1t, + lapack_complex_float* v2t, lapack_int ldv2t, + float* b11d, float* b11e, float* b12d, + float* b12e, float* b21d, float* b21e, + float* b22d, float* b22e, float* rwork, + lapack_int lrwork ); +lapack_int LAPACKE_cheswapr( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int i1, + lapack_int i2 ); +lapack_int LAPACKE_cheswapr_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int i1, + lapack_int i2 ); +lapack_int LAPACKE_chetri2( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv ); +lapack_int LAPACKE_chetri2_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_chetri2x( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, lapack_int nb ); +lapack_int LAPACKE_chetri2x_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int nb ); +lapack_int LAPACKE_chetrs2( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_chetrs2_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* work ); +lapack_int LAPACKE_csyconv( int matrix_order, char uplo, char way, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv ); +lapack_int LAPACKE_csyconv_work( int matrix_order, char uplo, char way, + lapack_int n, lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* work ); +lapack_int LAPACKE_csyswapr( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int i1, + lapack_int i2 ); +lapack_int LAPACKE_csyswapr_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int i1, + lapack_int i2 ); +lapack_int LAPACKE_csytri2( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv ); +lapack_int LAPACKE_csytri2_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_csytri2x( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, lapack_int nb ); +lapack_int LAPACKE_csytri2x_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int nb ); +lapack_int LAPACKE_csytrs2( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_csytrs2_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_float* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* work ); +lapack_int LAPACKE_cunbdb( int matrix_order, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, + lapack_complex_float* x11, lapack_int ldx11, + lapack_complex_float* x12, lapack_int ldx12, + lapack_complex_float* x21, lapack_int ldx21, + lapack_complex_float* x22, lapack_int ldx22, + float* theta, float* phi, + lapack_complex_float* taup1, + lapack_complex_float* taup2, + lapack_complex_float* tauq1, + lapack_complex_float* tauq2 ); +lapack_int LAPACKE_cunbdb_work( int matrix_order, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, + lapack_complex_float* x11, lapack_int ldx11, + lapack_complex_float* x12, lapack_int ldx12, + lapack_complex_float* x21, lapack_int ldx21, + lapack_complex_float* x22, lapack_int ldx22, + float* theta, float* phi, + lapack_complex_float* taup1, + lapack_complex_float* taup2, + lapack_complex_float* tauq1, + lapack_complex_float* tauq2, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_cuncsd( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, + lapack_complex_float* x11, lapack_int ldx11, + lapack_complex_float* x12, lapack_int ldx12, + lapack_complex_float* x21, lapack_int ldx21, + lapack_complex_float* x22, lapack_int ldx22, + float* theta, lapack_complex_float* u1, + lapack_int ldu1, lapack_complex_float* u2, + lapack_int ldu2, lapack_complex_float* v1t, + lapack_int ldv1t, lapack_complex_float* v2t, + lapack_int ldv2t ); +lapack_int LAPACKE_cuncsd_work( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, + char signs, lapack_int m, lapack_int p, + lapack_int q, lapack_complex_float* x11, + lapack_int ldx11, lapack_complex_float* x12, + lapack_int ldx12, lapack_complex_float* x21, + lapack_int ldx21, lapack_complex_float* x22, + lapack_int ldx22, float* theta, + lapack_complex_float* u1, lapack_int ldu1, + lapack_complex_float* u2, lapack_int ldu2, + lapack_complex_float* v1t, lapack_int ldv1t, + lapack_complex_float* v2t, lapack_int ldv2t, + lapack_complex_float* work, lapack_int lwork, + float* rwork, lapack_int lrwork, + lapack_int* iwork ); +lapack_int LAPACKE_dbbcsd( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, lapack_int m, + lapack_int p, lapack_int q, double* theta, + double* phi, double* u1, lapack_int ldu1, double* u2, + lapack_int ldu2, double* v1t, lapack_int ldv1t, + double* v2t, lapack_int ldv2t, double* b11d, + double* b11e, double* b12d, double* b12e, + double* b21d, double* b21e, double* b22d, + double* b22e ); +lapack_int LAPACKE_dbbcsd_work( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, + lapack_int m, lapack_int p, lapack_int q, + double* theta, double* phi, double* u1, + lapack_int ldu1, double* u2, lapack_int ldu2, + double* v1t, lapack_int ldv1t, double* v2t, + lapack_int ldv2t, double* b11d, double* b11e, + double* b12d, double* b12e, double* b21d, + double* b21e, double* b22d, double* b22e, + double* work, lapack_int lwork ); +lapack_int LAPACKE_dorbdb( int matrix_order, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, + double* x11, lapack_int ldx11, double* x12, + lapack_int ldx12, double* x21, lapack_int ldx21, + double* x22, lapack_int ldx22, double* theta, + double* phi, double* taup1, double* taup2, + double* tauq1, double* tauq2 ); +lapack_int LAPACKE_dorbdb_work( int matrix_order, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, + double* x11, lapack_int ldx11, double* x12, + lapack_int ldx12, double* x21, lapack_int ldx21, + double* x22, lapack_int ldx22, double* theta, + double* phi, double* taup1, double* taup2, + double* tauq1, double* tauq2, double* work, + lapack_int lwork ); +lapack_int LAPACKE_dorcsd( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, + double* x11, lapack_int ldx11, double* x12, + lapack_int ldx12, double* x21, lapack_int ldx21, + double* x22, lapack_int ldx22, double* theta, + double* u1, lapack_int ldu1, double* u2, + lapack_int ldu2, double* v1t, lapack_int ldv1t, + double* v2t, lapack_int ldv2t ); +lapack_int LAPACKE_dorcsd_work( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, + char signs, lapack_int m, lapack_int p, + lapack_int q, double* x11, lapack_int ldx11, + double* x12, lapack_int ldx12, double* x21, + lapack_int ldx21, double* x22, lapack_int ldx22, + double* theta, double* u1, lapack_int ldu1, + double* u2, lapack_int ldu2, double* v1t, + lapack_int ldv1t, double* v2t, lapack_int ldv2t, + double* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_dsyconv( int matrix_order, char uplo, char way, lapack_int n, + double* a, lapack_int lda, const lapack_int* ipiv ); +lapack_int LAPACKE_dsyconv_work( int matrix_order, char uplo, char way, + lapack_int n, double* a, lapack_int lda, + const lapack_int* ipiv, double* work ); +lapack_int LAPACKE_dsyswapr( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int i1, lapack_int i2 ); +lapack_int LAPACKE_dsyswapr_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int i1, lapack_int i2 ); +lapack_int LAPACKE_dsytri2( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda, const lapack_int* ipiv ); +lapack_int LAPACKE_dsytri2_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int lwork ); +lapack_int LAPACKE_dsytri2x( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda, const lapack_int* ipiv, + lapack_int nb ); +lapack_int LAPACKE_dsytri2x_work( int matrix_order, char uplo, lapack_int n, + double* a, lapack_int lda, + const lapack_int* ipiv, double* work, + lapack_int nb ); +lapack_int LAPACKE_dsytrs2( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* a, lapack_int lda, + const lapack_int* ipiv, double* b, lapack_int ldb ); +lapack_int LAPACKE_dsytrs2_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const double* a, + lapack_int lda, const lapack_int* ipiv, + double* b, lapack_int ldb, double* work ); +lapack_int LAPACKE_sbbcsd( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, lapack_int m, + lapack_int p, lapack_int q, float* theta, float* phi, + float* u1, lapack_int ldu1, float* u2, + lapack_int ldu2, float* v1t, lapack_int ldv1t, + float* v2t, lapack_int ldv2t, float* b11d, + float* b11e, float* b12d, float* b12e, float* b21d, + float* b21e, float* b22d, float* b22e ); +lapack_int LAPACKE_sbbcsd_work( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, + lapack_int m, lapack_int p, lapack_int q, + float* theta, float* phi, float* u1, + lapack_int ldu1, float* u2, lapack_int ldu2, + float* v1t, lapack_int ldv1t, float* v2t, + lapack_int ldv2t, float* b11d, float* b11e, + float* b12d, float* b12e, float* b21d, + float* b21e, float* b22d, float* b22e, + float* work, lapack_int lwork ); +lapack_int LAPACKE_sorbdb( int matrix_order, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, float* x11, + lapack_int ldx11, float* x12, lapack_int ldx12, + float* x21, lapack_int ldx21, float* x22, + lapack_int ldx22, float* theta, float* phi, + float* taup1, float* taup2, float* tauq1, + float* tauq2 ); +lapack_int LAPACKE_sorbdb_work( int matrix_order, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, + float* x11, lapack_int ldx11, float* x12, + lapack_int ldx12, float* x21, lapack_int ldx21, + float* x22, lapack_int ldx22, float* theta, + float* phi, float* taup1, float* taup2, + float* tauq1, float* tauq2, float* work, + lapack_int lwork ); +lapack_int LAPACKE_sorcsd( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, float* x11, + lapack_int ldx11, float* x12, lapack_int ldx12, + float* x21, lapack_int ldx21, float* x22, + lapack_int ldx22, float* theta, float* u1, + lapack_int ldu1, float* u2, lapack_int ldu2, + float* v1t, lapack_int ldv1t, float* v2t, + lapack_int ldv2t ); +lapack_int LAPACKE_sorcsd_work( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, + char signs, lapack_int m, lapack_int p, + lapack_int q, float* x11, lapack_int ldx11, + float* x12, lapack_int ldx12, float* x21, + lapack_int ldx21, float* x22, lapack_int ldx22, + float* theta, float* u1, lapack_int ldu1, + float* u2, lapack_int ldu2, float* v1t, + lapack_int ldv1t, float* v2t, lapack_int ldv2t, + float* work, lapack_int lwork, + lapack_int* iwork ); +lapack_int LAPACKE_ssyconv( int matrix_order, char uplo, char way, lapack_int n, + float* a, lapack_int lda, const lapack_int* ipiv ); +lapack_int LAPACKE_ssyconv_work( int matrix_order, char uplo, char way, + lapack_int n, float* a, lapack_int lda, + const lapack_int* ipiv, float* work ); +lapack_int LAPACKE_ssyswapr( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int i1, lapack_int i2 ); +lapack_int LAPACKE_ssyswapr_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int i1, lapack_int i2 ); +lapack_int LAPACKE_ssytri2( int matrix_order, char uplo, lapack_int n, float* a, + lapack_int lda, const lapack_int* ipiv ); +lapack_int LAPACKE_ssytri2_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_ssytri2x( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda, const lapack_int* ipiv, + lapack_int nb ); +lapack_int LAPACKE_ssytri2x_work( int matrix_order, char uplo, lapack_int n, + float* a, lapack_int lda, + const lapack_int* ipiv, float* work, + lapack_int nb ); +lapack_int LAPACKE_ssytrs2( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* a, lapack_int lda, + const lapack_int* ipiv, float* b, lapack_int ldb ); +lapack_int LAPACKE_ssytrs2_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const float* a, + lapack_int lda, const lapack_int* ipiv, + float* b, lapack_int ldb, float* work ); +lapack_int LAPACKE_zbbcsd( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, lapack_int m, + lapack_int p, lapack_int q, double* theta, + double* phi, lapack_complex_double* u1, + lapack_int ldu1, lapack_complex_double* u2, + lapack_int ldu2, lapack_complex_double* v1t, + lapack_int ldv1t, lapack_complex_double* v2t, + lapack_int ldv2t, double* b11d, double* b11e, + double* b12d, double* b12e, double* b21d, + double* b21e, double* b22d, double* b22e ); +lapack_int LAPACKE_zbbcsd_work( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, + lapack_int m, lapack_int p, lapack_int q, + double* theta, double* phi, + lapack_complex_double* u1, lapack_int ldu1, + lapack_complex_double* u2, lapack_int ldu2, + lapack_complex_double* v1t, lapack_int ldv1t, + lapack_complex_double* v2t, lapack_int ldv2t, + double* b11d, double* b11e, double* b12d, + double* b12e, double* b21d, double* b21e, + double* b22d, double* b22e, double* rwork, + lapack_int lrwork ); +lapack_int LAPACKE_zheswapr( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int i1, + lapack_int i2 ); +lapack_int LAPACKE_zheswapr_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int i1, + lapack_int i2 ); +lapack_int LAPACKE_zhetri2( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv ); +lapack_int LAPACKE_zhetri2_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int lwork ); +lapack_int LAPACKE_zhetri2x( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, lapack_int nb ); +lapack_int LAPACKE_zhetri2x_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int nb ); +lapack_int LAPACKE_zhetrs2( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); +lapack_int LAPACKE_zhetrs2_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* work ); +lapack_int LAPACKE_zsyconv( int matrix_order, char uplo, char way, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv ); +lapack_int LAPACKE_zsyconv_work( int matrix_order, char uplo, char way, + lapack_int n, lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* work ); +lapack_int LAPACKE_zsyswapr( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int i1, + lapack_int i2 ); +lapack_int LAPACKE_zsyswapr_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int i1, + lapack_int i2 ); +lapack_int LAPACKE_zsytri2( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv ); +lapack_int LAPACKE_zsytri2_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int lwork ); +lapack_int LAPACKE_zsytri2x( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, lapack_int nb ); +lapack_int LAPACKE_zsytri2x_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int nb ); +lapack_int LAPACKE_zsytrs2( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb ); +lapack_int LAPACKE_zsytrs2_work( int matrix_order, char uplo, lapack_int n, + lapack_int nrhs, const lapack_complex_double* a, + lapack_int lda, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* work ); +lapack_int LAPACKE_zunbdb( int matrix_order, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, + lapack_complex_double* x11, lapack_int ldx11, + lapack_complex_double* x12, lapack_int ldx12, + lapack_complex_double* x21, lapack_int ldx21, + lapack_complex_double* x22, lapack_int ldx22, + double* theta, double* phi, + lapack_complex_double* taup1, + lapack_complex_double* taup2, + lapack_complex_double* tauq1, + lapack_complex_double* tauq2 ); +lapack_int LAPACKE_zunbdb_work( int matrix_order, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, + lapack_complex_double* x11, lapack_int ldx11, + lapack_complex_double* x12, lapack_int ldx12, + lapack_complex_double* x21, lapack_int ldx21, + lapack_complex_double* x22, lapack_int ldx22, + double* theta, double* phi, + lapack_complex_double* taup1, + lapack_complex_double* taup2, + lapack_complex_double* tauq1, + lapack_complex_double* tauq2, + lapack_complex_double* work, lapack_int lwork ); +lapack_int LAPACKE_zuncsd( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, char signs, + lapack_int m, lapack_int p, lapack_int q, + lapack_complex_double* x11, lapack_int ldx11, + lapack_complex_double* x12, lapack_int ldx12, + lapack_complex_double* x21, lapack_int ldx21, + lapack_complex_double* x22, lapack_int ldx22, + double* theta, lapack_complex_double* u1, + lapack_int ldu1, lapack_complex_double* u2, + lapack_int ldu2, lapack_complex_double* v1t, + lapack_int ldv1t, lapack_complex_double* v2t, + lapack_int ldv2t ); +lapack_int LAPACKE_zuncsd_work( int matrix_order, char jobu1, char jobu2, + char jobv1t, char jobv2t, char trans, + char signs, lapack_int m, lapack_int p, + lapack_int q, lapack_complex_double* x11, + lapack_int ldx11, lapack_complex_double* x12, + lapack_int ldx12, lapack_complex_double* x21, + lapack_int ldx21, lapack_complex_double* x22, + lapack_int ldx22, double* theta, + lapack_complex_double* u1, lapack_int ldu1, + lapack_complex_double* u2, lapack_int ldu2, + lapack_complex_double* v1t, lapack_int ldv1t, + lapack_complex_double* v2t, lapack_int ldv2t, + lapack_complex_double* work, lapack_int lwork, + double* rwork, lapack_int lrwork, + lapack_int* iwork ); +//LAPACK 3.4.0 +lapack_int LAPACKE_sgemqrt( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int nb, const float* v, lapack_int ldv, + const float* t, lapack_int ldt, float* c, + lapack_int ldc ); +lapack_int LAPACKE_dgemqrt( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int nb, const double* v, lapack_int ldv, + const double* t, lapack_int ldt, double* c, + lapack_int ldc ); +lapack_int LAPACKE_cgemqrt( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int nb, const lapack_complex_float* v, + lapack_int ldv, const lapack_complex_float* t, + lapack_int ldt, lapack_complex_float* c, + lapack_int ldc ); +lapack_int LAPACKE_zgemqrt( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int nb, const lapack_complex_double* v, + lapack_int ldv, const lapack_complex_double* t, + lapack_int ldt, lapack_complex_double* c, + lapack_int ldc ); + +lapack_int LAPACKE_sgeqrt( int matrix_order, lapack_int m, lapack_int n, + lapack_int nb, float* a, lapack_int lda, float* t, + lapack_int ldt ); +lapack_int LAPACKE_dgeqrt( int matrix_order, lapack_int m, lapack_int n, + lapack_int nb, double* a, lapack_int lda, double* t, + lapack_int ldt ); +lapack_int LAPACKE_cgeqrt( int matrix_order, lapack_int m, lapack_int n, + lapack_int nb, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* t, + lapack_int ldt ); +lapack_int LAPACKE_zgeqrt( int matrix_order, lapack_int m, lapack_int n, + lapack_int nb, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* t, + lapack_int ldt ); + +lapack_int LAPACKE_sgeqrt2( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* t, + lapack_int ldt ); +lapack_int LAPACKE_dgeqrt2( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* t, + lapack_int ldt ); +lapack_int LAPACKE_cgeqrt2( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* t, lapack_int ldt ); +lapack_int LAPACKE_zgeqrt2( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* t, lapack_int ldt ); + +lapack_int LAPACKE_sgeqrt3( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* t, + lapack_int ldt ); +lapack_int LAPACKE_dgeqrt3( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* t, + lapack_int ldt ); +lapack_int LAPACKE_cgeqrt3( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* t, lapack_int ldt ); +lapack_int LAPACKE_zgeqrt3( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* t, lapack_int ldt ); + +lapack_int LAPACKE_stpmqrt( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, lapack_int nb, const float* v, + lapack_int ldv, const float* t, lapack_int ldt, + float* a, lapack_int lda, float* b, + lapack_int ldb ); +lapack_int LAPACKE_dtpmqrt( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, lapack_int nb, const double* v, + lapack_int ldv, const double* t, lapack_int ldt, + double* a, lapack_int lda, double* b, + lapack_int ldb ); +lapack_int LAPACKE_ctpmqrt( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, lapack_int nb, + const lapack_complex_float* v, lapack_int ldv, + const lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb ); +lapack_int LAPACKE_ztpmqrt( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, lapack_int nb, + const lapack_complex_double* v, lapack_int ldv, + const lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb ); + +lapack_int LAPACKE_dtpqrt( int matrix_order, lapack_int m, lapack_int n, + lapack_int l, lapack_int nb, double* a, + lapack_int lda, double* b, lapack_int ldb, double* t, + lapack_int ldt ); +lapack_int LAPACKE_ctpqrt( int matrix_order, lapack_int m, lapack_int n, + lapack_int l, lapack_int nb, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* t, + lapack_complex_float* b, lapack_int ldb, + lapack_int ldt ); +lapack_int LAPACKE_ztpqrt( int matrix_order, lapack_int m, lapack_int n, + lapack_int l, lapack_int nb, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* t, lapack_int ldt ); + +lapack_int LAPACKE_stpqrt2( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* b, lapack_int ldb, + float* t, lapack_int ldt ); +lapack_int LAPACKE_dtpqrt2( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* b, + lapack_int ldb, double* t, lapack_int ldt ); +lapack_int LAPACKE_ctpqrt2( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* t, lapack_int ldt ); +lapack_int LAPACKE_ztpqrt2( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* t, lapack_int ldt ); + +lapack_int LAPACKE_stprfb( int matrix_order, char side, char trans, char direct, + char storev, lapack_int m, lapack_int n, + lapack_int k, lapack_int l, const float* v, + lapack_int ldv, const float* t, lapack_int ldt, + float* a, lapack_int lda, float* b, lapack_int ldb, + lapack_int myldwork ); +lapack_int LAPACKE_dtprfb( int matrix_order, char side, char trans, char direct, + char storev, lapack_int m, lapack_int n, + lapack_int k, lapack_int l, const double* v, + lapack_int ldv, const double* t, lapack_int ldt, + double* a, lapack_int lda, double* b, lapack_int ldb, + lapack_int myldwork ); +lapack_int LAPACKE_ctprfb( int matrix_order, char side, char trans, char direct, + char storev, lapack_int m, lapack_int n, + lapack_int k, lapack_int l, + const lapack_complex_float* v, lapack_int ldv, + const lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_int myldwork ); +lapack_int LAPACKE_ztprfb( int matrix_order, char side, char trans, char direct, + char storev, lapack_int m, lapack_int n, + lapack_int k, lapack_int l, + const lapack_complex_double* v, lapack_int ldv, + const lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_int myldwork ); + +lapack_int LAPACKE_sgemqrt_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int nb, const float* v, lapack_int ldv, + const float* t, lapack_int ldt, float* c, + lapack_int ldc, float* work ); +lapack_int LAPACKE_dgemqrt_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int nb, const double* v, lapack_int ldv, + const double* t, lapack_int ldt, double* c, + lapack_int ldc, double* work ); +lapack_int LAPACKE_cgemqrt_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int nb, const lapack_complex_float* v, + lapack_int ldv, const lapack_complex_float* t, + lapack_int ldt, lapack_complex_float* c, + lapack_int ldc, lapack_complex_float* work ); +lapack_int LAPACKE_zgemqrt_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int nb, const lapack_complex_double* v, + lapack_int ldv, const lapack_complex_double* t, + lapack_int ldt, lapack_complex_double* c, + lapack_int ldc, lapack_complex_double* work ); + +lapack_int LAPACKE_sgeqrt_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nb, float* a, lapack_int lda, + float* t, lapack_int ldt, float* work ); +lapack_int LAPACKE_dgeqrt_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nb, double* a, lapack_int lda, + double* t, lapack_int ldt, double* work ); +lapack_int LAPACKE_cgeqrt_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nb, lapack_complex_float* a, + lapack_int lda, lapack_complex_float* t, + lapack_int ldt, lapack_complex_float* work ); +lapack_int LAPACKE_zgeqrt_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int nb, lapack_complex_double* a, + lapack_int lda, lapack_complex_double* t, + lapack_int ldt, lapack_complex_double* work ); + +lapack_int LAPACKE_sgeqrt2_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* t, + lapack_int ldt ); +lapack_int LAPACKE_dgeqrt2_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* t, + lapack_int ldt ); +lapack_int LAPACKE_cgeqrt2_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* t, lapack_int ldt ); +lapack_int LAPACKE_zgeqrt2_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* t, lapack_int ldt ); + +lapack_int LAPACKE_sgeqrt3_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* t, + lapack_int ldt ); +lapack_int LAPACKE_dgeqrt3_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* t, + lapack_int ldt ); +lapack_int LAPACKE_cgeqrt3_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* t, lapack_int ldt ); +lapack_int LAPACKE_zgeqrt3_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* t, lapack_int ldt ); + +lapack_int LAPACKE_stpmqrt_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, lapack_int nb, const float* v, + lapack_int ldv, const float* t, lapack_int ldt, + float* a, lapack_int lda, float* b, + lapack_int ldb, float* work ); +lapack_int LAPACKE_dtpmqrt_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, lapack_int nb, const double* v, + lapack_int ldv, const double* t, + lapack_int ldt, double* a, lapack_int lda, + double* b, lapack_int ldb, double* work ); +lapack_int LAPACKE_ctpmqrt_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, lapack_int nb, + const lapack_complex_float* v, lapack_int ldv, + const lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* work ); +lapack_int LAPACKE_ztpmqrt_work( int matrix_order, char side, char trans, + lapack_int m, lapack_int n, lapack_int k, + lapack_int l, lapack_int nb, + const lapack_complex_double* v, lapack_int ldv, + const lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* work ); + +lapack_int LAPACKE_dtpqrt_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int l, lapack_int nb, double* a, + lapack_int lda, double* b, lapack_int ldb, + double* t, lapack_int ldt, double* work ); +lapack_int LAPACKE_ctpqrt_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int l, lapack_int nb, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* t, + lapack_complex_float* b, lapack_int ldb, + lapack_int ldt, lapack_complex_float* work ); +lapack_int LAPACKE_ztpqrt_work( int matrix_order, lapack_int m, lapack_int n, + lapack_int l, lapack_int nb, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* work ); + +lapack_int LAPACKE_stpqrt2_work( int matrix_order, lapack_int m, lapack_int n, + float* a, lapack_int lda, float* b, + lapack_int ldb, float* t, lapack_int ldt ); +lapack_int LAPACKE_dtpqrt2_work( int matrix_order, lapack_int m, lapack_int n, + double* a, lapack_int lda, double* b, + lapack_int ldb, double* t, lapack_int ldt ); +lapack_int LAPACKE_ctpqrt2_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* t, lapack_int ldt ); +lapack_int LAPACKE_ztpqrt2_work( int matrix_order, lapack_int m, lapack_int n, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* t, lapack_int ldt ); + +lapack_int LAPACKE_stprfb_work( int matrix_order, char side, char trans, + char direct, char storev, lapack_int m, + lapack_int n, lapack_int k, lapack_int l, + const float* v, lapack_int ldv, const float* t, + lapack_int ldt, float* a, lapack_int lda, + float* b, lapack_int ldb, const float* mywork, + lapack_int myldwork ); +lapack_int LAPACKE_dtprfb_work( int matrix_order, char side, char trans, + char direct, char storev, lapack_int m, + lapack_int n, lapack_int k, lapack_int l, + const double* v, lapack_int ldv, + const double* t, lapack_int ldt, double* a, + lapack_int lda, double* b, lapack_int ldb, + const double* mywork, lapack_int myldwork ); +lapack_int LAPACKE_ctprfb_work( int matrix_order, char side, char trans, + char direct, char storev, lapack_int m, + lapack_int n, lapack_int k, lapack_int l, + const lapack_complex_float* v, lapack_int ldv, + const lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* b, lapack_int ldb, + const float* mywork, lapack_int myldwork ); +lapack_int LAPACKE_ztprfb_work( int matrix_order, char side, char trans, + char direct, char storev, lapack_int m, + lapack_int n, lapack_int k, lapack_int l, + const lapack_complex_double* v, lapack_int ldv, + const lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* b, lapack_int ldb, + const double* mywork, lapack_int myldwork ); +//LAPACK 3.X.X +lapack_int LAPACKE_csyr( int matrix_order, char uplo, lapack_int n, + lapack_complex_float alpha, + const lapack_complex_float* x, lapack_int incx, + lapack_complex_float* a, lapack_int lda ); +lapack_int LAPACKE_zsyr( int matrix_order, char uplo, lapack_int n, + lapack_complex_double alpha, + const lapack_complex_double* x, lapack_int incx, + lapack_complex_double* a, lapack_int lda ); + +lapack_int LAPACKE_csyr_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_float alpha, + const lapack_complex_float* x, + lapack_int incx, lapack_complex_float* a, + lapack_int lda ); +lapack_int LAPACKE_zsyr_work( int matrix_order, char uplo, lapack_int n, + lapack_complex_double alpha, + const lapack_complex_double* x, + lapack_int incx, lapack_complex_double* a, + lapack_int lda ); + + + +#define LAPACK_sgetrf LAPACK_GLOBAL(sgetrf,SGETRF) +#define LAPACK_dgetrf LAPACK_GLOBAL(dgetrf,DGETRF) +#define LAPACK_cgetrf LAPACK_GLOBAL(cgetrf,CGETRF) +#define LAPACK_zgetrf LAPACK_GLOBAL(zgetrf,ZGETRF) +#define LAPACK_sgbtrf LAPACK_GLOBAL(sgbtrf,SGBTRF) +#define LAPACK_dgbtrf LAPACK_GLOBAL(dgbtrf,DGBTRF) +#define LAPACK_cgbtrf LAPACK_GLOBAL(cgbtrf,CGBTRF) +#define LAPACK_zgbtrf LAPACK_GLOBAL(zgbtrf,ZGBTRF) +#define LAPACK_sgttrf LAPACK_GLOBAL(sgttrf,SGTTRF) +#define LAPACK_dgttrf LAPACK_GLOBAL(dgttrf,DGTTRF) +#define LAPACK_cgttrf LAPACK_GLOBAL(cgttrf,CGTTRF) +#define LAPACK_zgttrf LAPACK_GLOBAL(zgttrf,ZGTTRF) +#define LAPACK_spotrf LAPACK_GLOBAL(spotrf,SPOTRF) +#define LAPACK_dpotrf LAPACK_GLOBAL(dpotrf,DPOTRF) +#define LAPACK_cpotrf LAPACK_GLOBAL(cpotrf,CPOTRF) +#define LAPACK_zpotrf LAPACK_GLOBAL(zpotrf,ZPOTRF) +#define LAPACK_dpstrf LAPACK_GLOBAL(dpstrf,DPSTRF) +#define LAPACK_spstrf LAPACK_GLOBAL(spstrf,SPSTRF) +#define LAPACK_zpstrf LAPACK_GLOBAL(zpstrf,ZPSTRF) +#define LAPACK_cpstrf LAPACK_GLOBAL(cpstrf,CPSTRF) +#define LAPACK_dpftrf LAPACK_GLOBAL(dpftrf,DPFTRF) +#define LAPACK_spftrf LAPACK_GLOBAL(spftrf,SPFTRF) +#define LAPACK_zpftrf LAPACK_GLOBAL(zpftrf,ZPFTRF) +#define LAPACK_cpftrf LAPACK_GLOBAL(cpftrf,CPFTRF) +#define LAPACK_spptrf LAPACK_GLOBAL(spptrf,SPPTRF) +#define LAPACK_dpptrf LAPACK_GLOBAL(dpptrf,DPPTRF) +#define LAPACK_cpptrf LAPACK_GLOBAL(cpptrf,CPPTRF) +#define LAPACK_zpptrf LAPACK_GLOBAL(zpptrf,ZPPTRF) +#define LAPACK_spbtrf LAPACK_GLOBAL(spbtrf,SPBTRF) +#define LAPACK_dpbtrf LAPACK_GLOBAL(dpbtrf,DPBTRF) +#define LAPACK_cpbtrf LAPACK_GLOBAL(cpbtrf,CPBTRF) +#define LAPACK_zpbtrf LAPACK_GLOBAL(zpbtrf,ZPBTRF) +#define LAPACK_spttrf LAPACK_GLOBAL(spttrf,SPTTRF) +#define LAPACK_dpttrf LAPACK_GLOBAL(dpttrf,DPTTRF) +#define LAPACK_cpttrf LAPACK_GLOBAL(cpttrf,CPTTRF) +#define LAPACK_zpttrf LAPACK_GLOBAL(zpttrf,ZPTTRF) +#define LAPACK_ssytrf LAPACK_GLOBAL(ssytrf,SSYTRF) +#define LAPACK_dsytrf LAPACK_GLOBAL(dsytrf,DSYTRF) +#define LAPACK_csytrf LAPACK_GLOBAL(csytrf,CSYTRF) +#define LAPACK_zsytrf LAPACK_GLOBAL(zsytrf,ZSYTRF) +#define LAPACK_chetrf LAPACK_GLOBAL(chetrf,CHETRF) +#define LAPACK_zhetrf LAPACK_GLOBAL(zhetrf,ZHETRF) +#define LAPACK_ssptrf LAPACK_GLOBAL(ssptrf,SSPTRF) +#define LAPACK_dsptrf LAPACK_GLOBAL(dsptrf,DSPTRF) +#define LAPACK_csptrf LAPACK_GLOBAL(csptrf,CSPTRF) +#define LAPACK_zsptrf LAPACK_GLOBAL(zsptrf,ZSPTRF) +#define LAPACK_chptrf LAPACK_GLOBAL(chptrf,CHPTRF) +#define LAPACK_zhptrf LAPACK_GLOBAL(zhptrf,ZHPTRF) +#define LAPACK_sgetrs LAPACK_GLOBAL(sgetrs,SGETRS) +#define LAPACK_dgetrs LAPACK_GLOBAL(dgetrs,DGETRS) +#define LAPACK_cgetrs LAPACK_GLOBAL(cgetrs,CGETRS) +#define LAPACK_zgetrs LAPACK_GLOBAL(zgetrs,ZGETRS) +#define LAPACK_sgbtrs LAPACK_GLOBAL(sgbtrs,SGBTRS) +#define LAPACK_dgbtrs LAPACK_GLOBAL(dgbtrs,DGBTRS) +#define LAPACK_cgbtrs LAPACK_GLOBAL(cgbtrs,CGBTRS) +#define LAPACK_zgbtrs LAPACK_GLOBAL(zgbtrs,ZGBTRS) +#define LAPACK_sgttrs LAPACK_GLOBAL(sgttrs,SGTTRS) +#define LAPACK_dgttrs LAPACK_GLOBAL(dgttrs,DGTTRS) +#define LAPACK_cgttrs LAPACK_GLOBAL(cgttrs,CGTTRS) +#define LAPACK_zgttrs LAPACK_GLOBAL(zgttrs,ZGTTRS) +#define LAPACK_spotrs LAPACK_GLOBAL(spotrs,SPOTRS) +#define LAPACK_dpotrs LAPACK_GLOBAL(dpotrs,DPOTRS) +#define LAPACK_cpotrs LAPACK_GLOBAL(cpotrs,CPOTRS) +#define LAPACK_zpotrs LAPACK_GLOBAL(zpotrs,ZPOTRS) +#define LAPACK_dpftrs LAPACK_GLOBAL(dpftrs,DPFTRS) +#define LAPACK_spftrs LAPACK_GLOBAL(spftrs,SPFTRS) +#define LAPACK_zpftrs LAPACK_GLOBAL(zpftrs,ZPFTRS) +#define LAPACK_cpftrs LAPACK_GLOBAL(cpftrs,CPFTRS) +#define LAPACK_spptrs LAPACK_GLOBAL(spptrs,SPPTRS) +#define LAPACK_dpptrs LAPACK_GLOBAL(dpptrs,DPPTRS) +#define LAPACK_cpptrs LAPACK_GLOBAL(cpptrs,CPPTRS) +#define LAPACK_zpptrs LAPACK_GLOBAL(zpptrs,ZPPTRS) +#define LAPACK_spbtrs LAPACK_GLOBAL(spbtrs,SPBTRS) +#define LAPACK_dpbtrs LAPACK_GLOBAL(dpbtrs,DPBTRS) +#define LAPACK_cpbtrs LAPACK_GLOBAL(cpbtrs,CPBTRS) +#define LAPACK_zpbtrs LAPACK_GLOBAL(zpbtrs,ZPBTRS) +#define LAPACK_spttrs LAPACK_GLOBAL(spttrs,SPTTRS) +#define LAPACK_dpttrs LAPACK_GLOBAL(dpttrs,DPTTRS) +#define LAPACK_cpttrs LAPACK_GLOBAL(cpttrs,CPTTRS) +#define LAPACK_zpttrs LAPACK_GLOBAL(zpttrs,ZPTTRS) +#define LAPACK_ssytrs LAPACK_GLOBAL(ssytrs,SSYTRS) +#define LAPACK_dsytrs LAPACK_GLOBAL(dsytrs,DSYTRS) +#define LAPACK_csytrs LAPACK_GLOBAL(csytrs,CSYTRS) +#define LAPACK_zsytrs LAPACK_GLOBAL(zsytrs,ZSYTRS) +#define LAPACK_chetrs LAPACK_GLOBAL(chetrs,CHETRS) +#define LAPACK_zhetrs LAPACK_GLOBAL(zhetrs,ZHETRS) +#define LAPACK_ssptrs LAPACK_GLOBAL(ssptrs,SSPTRS) +#define LAPACK_dsptrs LAPACK_GLOBAL(dsptrs,DSPTRS) +#define LAPACK_csptrs LAPACK_GLOBAL(csptrs,CSPTRS) +#define LAPACK_zsptrs LAPACK_GLOBAL(zsptrs,ZSPTRS) +#define LAPACK_chptrs LAPACK_GLOBAL(chptrs,CHPTRS) +#define LAPACK_zhptrs LAPACK_GLOBAL(zhptrs,ZHPTRS) +#define LAPACK_strtrs LAPACK_GLOBAL(strtrs,STRTRS) +#define LAPACK_dtrtrs LAPACK_GLOBAL(dtrtrs,DTRTRS) +#define LAPACK_ctrtrs LAPACK_GLOBAL(ctrtrs,CTRTRS) +#define LAPACK_ztrtrs LAPACK_GLOBAL(ztrtrs,ZTRTRS) +#define LAPACK_stptrs LAPACK_GLOBAL(stptrs,STPTRS) +#define LAPACK_dtptrs LAPACK_GLOBAL(dtptrs,DTPTRS) +#define LAPACK_ctptrs LAPACK_GLOBAL(ctptrs,CTPTRS) +#define LAPACK_ztptrs LAPACK_GLOBAL(ztptrs,ZTPTRS) +#define LAPACK_stbtrs LAPACK_GLOBAL(stbtrs,STBTRS) +#define LAPACK_dtbtrs LAPACK_GLOBAL(dtbtrs,DTBTRS) +#define LAPACK_ctbtrs LAPACK_GLOBAL(ctbtrs,CTBTRS) +#define LAPACK_ztbtrs LAPACK_GLOBAL(ztbtrs,ZTBTRS) +#define LAPACK_sgecon LAPACK_GLOBAL(sgecon,SGECON) +#define LAPACK_dgecon LAPACK_GLOBAL(dgecon,DGECON) +#define LAPACK_cgecon LAPACK_GLOBAL(cgecon,CGECON) +#define LAPACK_zgecon LAPACK_GLOBAL(zgecon,ZGECON) +#define LAPACK_sgbcon LAPACK_GLOBAL(sgbcon,SGBCON) +#define LAPACK_dgbcon LAPACK_GLOBAL(dgbcon,DGBCON) +#define LAPACK_cgbcon LAPACK_GLOBAL(cgbcon,CGBCON) +#define LAPACK_zgbcon LAPACK_GLOBAL(zgbcon,ZGBCON) +#define LAPACK_sgtcon LAPACK_GLOBAL(sgtcon,SGTCON) +#define LAPACK_dgtcon LAPACK_GLOBAL(dgtcon,DGTCON) +#define LAPACK_cgtcon LAPACK_GLOBAL(cgtcon,CGTCON) +#define LAPACK_zgtcon LAPACK_GLOBAL(zgtcon,ZGTCON) +#define LAPACK_spocon LAPACK_GLOBAL(spocon,SPOCON) +#define LAPACK_dpocon LAPACK_GLOBAL(dpocon,DPOCON) +#define LAPACK_cpocon LAPACK_GLOBAL(cpocon,CPOCON) +#define LAPACK_zpocon LAPACK_GLOBAL(zpocon,ZPOCON) +#define LAPACK_sppcon LAPACK_GLOBAL(sppcon,SPPCON) +#define LAPACK_dppcon LAPACK_GLOBAL(dppcon,DPPCON) +#define LAPACK_cppcon LAPACK_GLOBAL(cppcon,CPPCON) +#define LAPACK_zppcon LAPACK_GLOBAL(zppcon,ZPPCON) +#define LAPACK_spbcon LAPACK_GLOBAL(spbcon,SPBCON) +#define LAPACK_dpbcon LAPACK_GLOBAL(dpbcon,DPBCON) +#define LAPACK_cpbcon LAPACK_GLOBAL(cpbcon,CPBCON) +#define LAPACK_zpbcon LAPACK_GLOBAL(zpbcon,ZPBCON) +#define LAPACK_sptcon LAPACK_GLOBAL(sptcon,SPTCON) +#define LAPACK_dptcon LAPACK_GLOBAL(dptcon,DPTCON) +#define LAPACK_cptcon LAPACK_GLOBAL(cptcon,CPTCON) +#define LAPACK_zptcon LAPACK_GLOBAL(zptcon,ZPTCON) +#define LAPACK_ssycon LAPACK_GLOBAL(ssycon,SSYCON) +#define LAPACK_dsycon LAPACK_GLOBAL(dsycon,DSYCON) +#define LAPACK_csycon LAPACK_GLOBAL(csycon,CSYCON) +#define LAPACK_zsycon LAPACK_GLOBAL(zsycon,ZSYCON) +#define LAPACK_checon LAPACK_GLOBAL(checon,CHECON) +#define LAPACK_zhecon LAPACK_GLOBAL(zhecon,ZHECON) +#define LAPACK_sspcon LAPACK_GLOBAL(sspcon,SSPCON) +#define LAPACK_dspcon LAPACK_GLOBAL(dspcon,DSPCON) +#define LAPACK_cspcon LAPACK_GLOBAL(cspcon,CSPCON) +#define LAPACK_zspcon LAPACK_GLOBAL(zspcon,ZSPCON) +#define LAPACK_chpcon LAPACK_GLOBAL(chpcon,CHPCON) +#define LAPACK_zhpcon LAPACK_GLOBAL(zhpcon,ZHPCON) +#define LAPACK_strcon LAPACK_GLOBAL(strcon,STRCON) +#define LAPACK_dtrcon LAPACK_GLOBAL(dtrcon,DTRCON) +#define LAPACK_ctrcon LAPACK_GLOBAL(ctrcon,CTRCON) +#define LAPACK_ztrcon LAPACK_GLOBAL(ztrcon,ZTRCON) +#define LAPACK_stpcon LAPACK_GLOBAL(stpcon,STPCON) +#define LAPACK_dtpcon LAPACK_GLOBAL(dtpcon,DTPCON) +#define LAPACK_ctpcon LAPACK_GLOBAL(ctpcon,CTPCON) +#define LAPACK_ztpcon LAPACK_GLOBAL(ztpcon,ZTPCON) +#define LAPACK_stbcon LAPACK_GLOBAL(stbcon,STBCON) +#define LAPACK_dtbcon LAPACK_GLOBAL(dtbcon,DTBCON) +#define LAPACK_ctbcon LAPACK_GLOBAL(ctbcon,CTBCON) +#define LAPACK_ztbcon LAPACK_GLOBAL(ztbcon,ZTBCON) +#define LAPACK_sgerfs LAPACK_GLOBAL(sgerfs,SGERFS) +#define LAPACK_dgerfs LAPACK_GLOBAL(dgerfs,DGERFS) +#define LAPACK_cgerfs LAPACK_GLOBAL(cgerfs,CGERFS) +#define LAPACK_zgerfs LAPACK_GLOBAL(zgerfs,ZGERFS) +#define LAPACK_dgerfsx LAPACK_GLOBAL(dgerfsx,DGERFSX) +#define LAPACK_sgerfsx LAPACK_GLOBAL(sgerfsx,SGERFSX) +#define LAPACK_zgerfsx LAPACK_GLOBAL(zgerfsx,ZGERFSX) +#define LAPACK_cgerfsx LAPACK_GLOBAL(cgerfsx,CGERFSX) +#define LAPACK_sgbrfs LAPACK_GLOBAL(sgbrfs,SGBRFS) +#define LAPACK_dgbrfs LAPACK_GLOBAL(dgbrfs,DGBRFS) +#define LAPACK_cgbrfs LAPACK_GLOBAL(cgbrfs,CGBRFS) +#define LAPACK_zgbrfs LAPACK_GLOBAL(zgbrfs,ZGBRFS) +#define LAPACK_dgbrfsx LAPACK_GLOBAL(dgbrfsx,DGBRFSX) +#define LAPACK_sgbrfsx LAPACK_GLOBAL(sgbrfsx,SGBRFSX) +#define LAPACK_zgbrfsx LAPACK_GLOBAL(zgbrfsx,ZGBRFSX) +#define LAPACK_cgbrfsx LAPACK_GLOBAL(cgbrfsx,CGBRFSX) +#define LAPACK_sgtrfs LAPACK_GLOBAL(sgtrfs,SGTRFS) +#define LAPACK_dgtrfs LAPACK_GLOBAL(dgtrfs,DGTRFS) +#define LAPACK_cgtrfs LAPACK_GLOBAL(cgtrfs,CGTRFS) +#define LAPACK_zgtrfs LAPACK_GLOBAL(zgtrfs,ZGTRFS) +#define LAPACK_sporfs LAPACK_GLOBAL(sporfs,SPORFS) +#define LAPACK_dporfs LAPACK_GLOBAL(dporfs,DPORFS) +#define LAPACK_cporfs LAPACK_GLOBAL(cporfs,CPORFS) +#define LAPACK_zporfs LAPACK_GLOBAL(zporfs,ZPORFS) +#define LAPACK_dporfsx LAPACK_GLOBAL(dporfsx,DPORFSX) +#define LAPACK_sporfsx LAPACK_GLOBAL(sporfsx,SPORFSX) +#define LAPACK_zporfsx LAPACK_GLOBAL(zporfsx,ZPORFSX) +#define LAPACK_cporfsx LAPACK_GLOBAL(cporfsx,CPORFSX) +#define LAPACK_spprfs LAPACK_GLOBAL(spprfs,SPPRFS) +#define LAPACK_dpprfs LAPACK_GLOBAL(dpprfs,DPPRFS) +#define LAPACK_cpprfs LAPACK_GLOBAL(cpprfs,CPPRFS) +#define LAPACK_zpprfs LAPACK_GLOBAL(zpprfs,ZPPRFS) +#define LAPACK_spbrfs LAPACK_GLOBAL(spbrfs,SPBRFS) +#define LAPACK_dpbrfs LAPACK_GLOBAL(dpbrfs,DPBRFS) +#define LAPACK_cpbrfs LAPACK_GLOBAL(cpbrfs,CPBRFS) +#define LAPACK_zpbrfs LAPACK_GLOBAL(zpbrfs,ZPBRFS) +#define LAPACK_sptrfs LAPACK_GLOBAL(sptrfs,SPTRFS) +#define LAPACK_dptrfs LAPACK_GLOBAL(dptrfs,DPTRFS) +#define LAPACK_cptrfs LAPACK_GLOBAL(cptrfs,CPTRFS) +#define LAPACK_zptrfs LAPACK_GLOBAL(zptrfs,ZPTRFS) +#define LAPACK_ssyrfs LAPACK_GLOBAL(ssyrfs,SSYRFS) +#define LAPACK_dsyrfs LAPACK_GLOBAL(dsyrfs,DSYRFS) +#define LAPACK_csyrfs LAPACK_GLOBAL(csyrfs,CSYRFS) +#define LAPACK_zsyrfs LAPACK_GLOBAL(zsyrfs,ZSYRFS) +#define LAPACK_dsyrfsx LAPACK_GLOBAL(dsyrfsx,DSYRFSX) +#define LAPACK_ssyrfsx LAPACK_GLOBAL(ssyrfsx,SSYRFSX) +#define LAPACK_zsyrfsx LAPACK_GLOBAL(zsyrfsx,ZSYRFSX) +#define LAPACK_csyrfsx LAPACK_GLOBAL(csyrfsx,CSYRFSX) +#define LAPACK_cherfs LAPACK_GLOBAL(cherfs,CHERFS) +#define LAPACK_zherfs LAPACK_GLOBAL(zherfs,ZHERFS) +#define LAPACK_zherfsx LAPACK_GLOBAL(zherfsx,ZHERFSX) +#define LAPACK_cherfsx LAPACK_GLOBAL(cherfsx,CHERFSX) +#define LAPACK_ssprfs LAPACK_GLOBAL(ssprfs,SSPRFS) +#define LAPACK_dsprfs LAPACK_GLOBAL(dsprfs,DSPRFS) +#define LAPACK_csprfs LAPACK_GLOBAL(csprfs,CSPRFS) +#define LAPACK_zsprfs LAPACK_GLOBAL(zsprfs,ZSPRFS) +#define LAPACK_chprfs LAPACK_GLOBAL(chprfs,CHPRFS) +#define LAPACK_zhprfs LAPACK_GLOBAL(zhprfs,ZHPRFS) +#define LAPACK_strrfs LAPACK_GLOBAL(strrfs,STRRFS) +#define LAPACK_dtrrfs LAPACK_GLOBAL(dtrrfs,DTRRFS) +#define LAPACK_ctrrfs LAPACK_GLOBAL(ctrrfs,CTRRFS) +#define LAPACK_ztrrfs LAPACK_GLOBAL(ztrrfs,ZTRRFS) +#define LAPACK_stprfs LAPACK_GLOBAL(stprfs,STPRFS) +#define LAPACK_dtprfs LAPACK_GLOBAL(dtprfs,DTPRFS) +#define LAPACK_ctprfs LAPACK_GLOBAL(ctprfs,CTPRFS) +#define LAPACK_ztprfs LAPACK_GLOBAL(ztprfs,ZTPRFS) +#define LAPACK_stbrfs LAPACK_GLOBAL(stbrfs,STBRFS) +#define LAPACK_dtbrfs LAPACK_GLOBAL(dtbrfs,DTBRFS) +#define LAPACK_ctbrfs LAPACK_GLOBAL(ctbrfs,CTBRFS) +#define LAPACK_ztbrfs LAPACK_GLOBAL(ztbrfs,ZTBRFS) +#define LAPACK_sgetri LAPACK_GLOBAL(sgetri,SGETRI) +#define LAPACK_dgetri LAPACK_GLOBAL(dgetri,DGETRI) +#define LAPACK_cgetri LAPACK_GLOBAL(cgetri,CGETRI) +#define LAPACK_zgetri LAPACK_GLOBAL(zgetri,ZGETRI) +#define LAPACK_spotri LAPACK_GLOBAL(spotri,SPOTRI) +#define LAPACK_dpotri LAPACK_GLOBAL(dpotri,DPOTRI) +#define LAPACK_cpotri LAPACK_GLOBAL(cpotri,CPOTRI) +#define LAPACK_zpotri LAPACK_GLOBAL(zpotri,ZPOTRI) +#define LAPACK_dpftri LAPACK_GLOBAL(dpftri,DPFTRI) +#define LAPACK_spftri LAPACK_GLOBAL(spftri,SPFTRI) +#define LAPACK_zpftri LAPACK_GLOBAL(zpftri,ZPFTRI) +#define LAPACK_cpftri LAPACK_GLOBAL(cpftri,CPFTRI) +#define LAPACK_spptri LAPACK_GLOBAL(spptri,SPPTRI) +#define LAPACK_dpptri LAPACK_GLOBAL(dpptri,DPPTRI) +#define LAPACK_cpptri LAPACK_GLOBAL(cpptri,CPPTRI) +#define LAPACK_zpptri LAPACK_GLOBAL(zpptri,ZPPTRI) +#define LAPACK_ssytri LAPACK_GLOBAL(ssytri,SSYTRI) +#define LAPACK_dsytri LAPACK_GLOBAL(dsytri,DSYTRI) +#define LAPACK_csytri LAPACK_GLOBAL(csytri,CSYTRI) +#define LAPACK_zsytri LAPACK_GLOBAL(zsytri,ZSYTRI) +#define LAPACK_chetri LAPACK_GLOBAL(chetri,CHETRI) +#define LAPACK_zhetri LAPACK_GLOBAL(zhetri,ZHETRI) +#define LAPACK_ssptri LAPACK_GLOBAL(ssptri,SSPTRI) +#define LAPACK_dsptri LAPACK_GLOBAL(dsptri,DSPTRI) +#define LAPACK_csptri LAPACK_GLOBAL(csptri,CSPTRI) +#define LAPACK_zsptri LAPACK_GLOBAL(zsptri,ZSPTRI) +#define LAPACK_chptri LAPACK_GLOBAL(chptri,CHPTRI) +#define LAPACK_zhptri LAPACK_GLOBAL(zhptri,ZHPTRI) +#define LAPACK_strtri LAPACK_GLOBAL(strtri,STRTRI) +#define LAPACK_dtrtri LAPACK_GLOBAL(dtrtri,DTRTRI) +#define LAPACK_ctrtri LAPACK_GLOBAL(ctrtri,CTRTRI) +#define LAPACK_ztrtri LAPACK_GLOBAL(ztrtri,ZTRTRI) +#define LAPACK_dtftri LAPACK_GLOBAL(dtftri,DTFTRI) +#define LAPACK_stftri LAPACK_GLOBAL(stftri,STFTRI) +#define LAPACK_ztftri LAPACK_GLOBAL(ztftri,ZTFTRI) +#define LAPACK_ctftri LAPACK_GLOBAL(ctftri,CTFTRI) +#define LAPACK_stptri LAPACK_GLOBAL(stptri,STPTRI) +#define LAPACK_dtptri LAPACK_GLOBAL(dtptri,DTPTRI) +#define LAPACK_ctptri LAPACK_GLOBAL(ctptri,CTPTRI) +#define LAPACK_ztptri LAPACK_GLOBAL(ztptri,ZTPTRI) +#define LAPACK_sgeequ LAPACK_GLOBAL(sgeequ,SGEEQU) +#define LAPACK_dgeequ LAPACK_GLOBAL(dgeequ,DGEEQU) +#define LAPACK_cgeequ LAPACK_GLOBAL(cgeequ,CGEEQU) +#define LAPACK_zgeequ LAPACK_GLOBAL(zgeequ,ZGEEQU) +#define LAPACK_dgeequb LAPACK_GLOBAL(dgeequb,DGEEQUB) +#define LAPACK_sgeequb LAPACK_GLOBAL(sgeequb,SGEEQUB) +#define LAPACK_zgeequb LAPACK_GLOBAL(zgeequb,ZGEEQUB) +#define LAPACK_cgeequb LAPACK_GLOBAL(cgeequb,CGEEQUB) +#define LAPACK_sgbequ LAPACK_GLOBAL(sgbequ,SGBEQU) +#define LAPACK_dgbequ LAPACK_GLOBAL(dgbequ,DGBEQU) +#define LAPACK_cgbequ LAPACK_GLOBAL(cgbequ,CGBEQU) +#define LAPACK_zgbequ LAPACK_GLOBAL(zgbequ,ZGBEQU) +#define LAPACK_dgbequb LAPACK_GLOBAL(dgbequb,DGBEQUB) +#define LAPACK_sgbequb LAPACK_GLOBAL(sgbequb,SGBEQUB) +#define LAPACK_zgbequb LAPACK_GLOBAL(zgbequb,ZGBEQUB) +#define LAPACK_cgbequb LAPACK_GLOBAL(cgbequb,CGBEQUB) +#define LAPACK_spoequ LAPACK_GLOBAL(spoequ,SPOEQU) +#define LAPACK_dpoequ LAPACK_GLOBAL(dpoequ,DPOEQU) +#define LAPACK_cpoequ LAPACK_GLOBAL(cpoequ,CPOEQU) +#define LAPACK_zpoequ LAPACK_GLOBAL(zpoequ,ZPOEQU) +#define LAPACK_dpoequb LAPACK_GLOBAL(dpoequb,DPOEQUB) +#define LAPACK_spoequb LAPACK_GLOBAL(spoequb,SPOEQUB) +#define LAPACK_zpoequb LAPACK_GLOBAL(zpoequb,ZPOEQUB) +#define LAPACK_cpoequb LAPACK_GLOBAL(cpoequb,CPOEQUB) +#define LAPACK_sppequ LAPACK_GLOBAL(sppequ,SPPEQU) +#define LAPACK_dppequ LAPACK_GLOBAL(dppequ,DPPEQU) +#define LAPACK_cppequ LAPACK_GLOBAL(cppequ,CPPEQU) +#define LAPACK_zppequ LAPACK_GLOBAL(zppequ,ZPPEQU) +#define LAPACK_spbequ LAPACK_GLOBAL(spbequ,SPBEQU) +#define LAPACK_dpbequ LAPACK_GLOBAL(dpbequ,DPBEQU) +#define LAPACK_cpbequ LAPACK_GLOBAL(cpbequ,CPBEQU) +#define LAPACK_zpbequ LAPACK_GLOBAL(zpbequ,ZPBEQU) +#define LAPACK_dsyequb LAPACK_GLOBAL(dsyequb,DSYEQUB) +#define LAPACK_ssyequb LAPACK_GLOBAL(ssyequb,SSYEQUB) +#define LAPACK_zsyequb LAPACK_GLOBAL(zsyequb,ZSYEQUB) +#define LAPACK_csyequb LAPACK_GLOBAL(csyequb,CSYEQUB) +#define LAPACK_zheequb LAPACK_GLOBAL(zheequb,ZHEEQUB) +#define LAPACK_cheequb LAPACK_GLOBAL(cheequb,CHEEQUB) +#define LAPACK_sgesv LAPACK_GLOBAL(sgesv,SGESV) +#define LAPACK_dgesv LAPACK_GLOBAL(dgesv,DGESV) +#define LAPACK_cgesv LAPACK_GLOBAL(cgesv,CGESV) +#define LAPACK_zgesv LAPACK_GLOBAL(zgesv,ZGESV) +#define LAPACK_dsgesv LAPACK_GLOBAL(dsgesv,DSGESV) +#define LAPACK_zcgesv LAPACK_GLOBAL(zcgesv,ZCGESV) +#define LAPACK_sgesvx LAPACK_GLOBAL(sgesvx,SGESVX) +#define LAPACK_dgesvx LAPACK_GLOBAL(dgesvx,DGESVX) +#define LAPACK_cgesvx LAPACK_GLOBAL(cgesvx,CGESVX) +#define LAPACK_zgesvx LAPACK_GLOBAL(zgesvx,ZGESVX) +#define LAPACK_dgesvxx LAPACK_GLOBAL(dgesvxx,DGESVXX) +#define LAPACK_sgesvxx LAPACK_GLOBAL(sgesvxx,SGESVXX) +#define LAPACK_zgesvxx LAPACK_GLOBAL(zgesvxx,ZGESVXX) +#define LAPACK_cgesvxx LAPACK_GLOBAL(cgesvxx,CGESVXX) +#define LAPACK_sgbsv LAPACK_GLOBAL(sgbsv,SGBSV) +#define LAPACK_dgbsv LAPACK_GLOBAL(dgbsv,DGBSV) +#define LAPACK_cgbsv LAPACK_GLOBAL(cgbsv,CGBSV) +#define LAPACK_zgbsv LAPACK_GLOBAL(zgbsv,ZGBSV) +#define LAPACK_sgbsvx LAPACK_GLOBAL(sgbsvx,SGBSVX) +#define LAPACK_dgbsvx LAPACK_GLOBAL(dgbsvx,DGBSVX) +#define LAPACK_cgbsvx LAPACK_GLOBAL(cgbsvx,CGBSVX) +#define LAPACK_zgbsvx LAPACK_GLOBAL(zgbsvx,ZGBSVX) +#define LAPACK_dgbsvxx LAPACK_GLOBAL(dgbsvxx,DGBSVXX) +#define LAPACK_sgbsvxx LAPACK_GLOBAL(sgbsvxx,SGBSVXX) +#define LAPACK_zgbsvxx LAPACK_GLOBAL(zgbsvxx,ZGBSVXX) +#define LAPACK_cgbsvxx LAPACK_GLOBAL(cgbsvxx,CGBSVXX) +#define LAPACK_sgtsv LAPACK_GLOBAL(sgtsv,SGTSV) +#define LAPACK_dgtsv LAPACK_GLOBAL(dgtsv,DGTSV) +#define LAPACK_cgtsv LAPACK_GLOBAL(cgtsv,CGTSV) +#define LAPACK_zgtsv LAPACK_GLOBAL(zgtsv,ZGTSV) +#define LAPACK_sgtsvx LAPACK_GLOBAL(sgtsvx,SGTSVX) +#define LAPACK_dgtsvx LAPACK_GLOBAL(dgtsvx,DGTSVX) +#define LAPACK_cgtsvx LAPACK_GLOBAL(cgtsvx,CGTSVX) +#define LAPACK_zgtsvx LAPACK_GLOBAL(zgtsvx,ZGTSVX) +#define LAPACK_sposv LAPACK_GLOBAL(sposv,SPOSV) +#define LAPACK_dposv LAPACK_GLOBAL(dposv,DPOSV) +#define LAPACK_cposv LAPACK_GLOBAL(cposv,CPOSV) +#define LAPACK_zposv LAPACK_GLOBAL(zposv,ZPOSV) +#define LAPACK_dsposv LAPACK_GLOBAL(dsposv,DSPOSV) +#define LAPACK_zcposv LAPACK_GLOBAL(zcposv,ZCPOSV) +#define LAPACK_sposvx LAPACK_GLOBAL(sposvx,SPOSVX) +#define LAPACK_dposvx LAPACK_GLOBAL(dposvx,DPOSVX) +#define LAPACK_cposvx LAPACK_GLOBAL(cposvx,CPOSVX) +#define LAPACK_zposvx LAPACK_GLOBAL(zposvx,ZPOSVX) +#define LAPACK_dposvxx LAPACK_GLOBAL(dposvxx,DPOSVXX) +#define LAPACK_sposvxx LAPACK_GLOBAL(sposvxx,SPOSVXX) +#define LAPACK_zposvxx LAPACK_GLOBAL(zposvxx,ZPOSVXX) +#define LAPACK_cposvxx LAPACK_GLOBAL(cposvxx,CPOSVXX) +#define LAPACK_sppsv LAPACK_GLOBAL(sppsv,SPPSV) +#define LAPACK_dppsv LAPACK_GLOBAL(dppsv,DPPSV) +#define LAPACK_cppsv LAPACK_GLOBAL(cppsv,CPPSV) +#define LAPACK_zppsv LAPACK_GLOBAL(zppsv,ZPPSV) +#define LAPACK_sppsvx LAPACK_GLOBAL(sppsvx,SPPSVX) +#define LAPACK_dppsvx LAPACK_GLOBAL(dppsvx,DPPSVX) +#define LAPACK_cppsvx LAPACK_GLOBAL(cppsvx,CPPSVX) +#define LAPACK_zppsvx LAPACK_GLOBAL(zppsvx,ZPPSVX) +#define LAPACK_spbsv LAPACK_GLOBAL(spbsv,SPBSV) +#define LAPACK_dpbsv LAPACK_GLOBAL(dpbsv,DPBSV) +#define LAPACK_cpbsv LAPACK_GLOBAL(cpbsv,CPBSV) +#define LAPACK_zpbsv LAPACK_GLOBAL(zpbsv,ZPBSV) +#define LAPACK_spbsvx LAPACK_GLOBAL(spbsvx,SPBSVX) +#define LAPACK_dpbsvx LAPACK_GLOBAL(dpbsvx,DPBSVX) +#define LAPACK_cpbsvx LAPACK_GLOBAL(cpbsvx,CPBSVX) +#define LAPACK_zpbsvx LAPACK_GLOBAL(zpbsvx,ZPBSVX) +#define LAPACK_sptsv LAPACK_GLOBAL(sptsv,SPTSV) +#define LAPACK_dptsv LAPACK_GLOBAL(dptsv,DPTSV) +#define LAPACK_cptsv LAPACK_GLOBAL(cptsv,CPTSV) +#define LAPACK_zptsv LAPACK_GLOBAL(zptsv,ZPTSV) +#define LAPACK_sptsvx LAPACK_GLOBAL(sptsvx,SPTSVX) +#define LAPACK_dptsvx LAPACK_GLOBAL(dptsvx,DPTSVX) +#define LAPACK_cptsvx LAPACK_GLOBAL(cptsvx,CPTSVX) +#define LAPACK_zptsvx LAPACK_GLOBAL(zptsvx,ZPTSVX) +#define LAPACK_ssysv LAPACK_GLOBAL(ssysv,SSYSV) +#define LAPACK_dsysv LAPACK_GLOBAL(dsysv,DSYSV) +#define LAPACK_csysv LAPACK_GLOBAL(csysv,CSYSV) +#define LAPACK_zsysv LAPACK_GLOBAL(zsysv,ZSYSV) +#define LAPACK_ssysvx LAPACK_GLOBAL(ssysvx,SSYSVX) +#define LAPACK_dsysvx LAPACK_GLOBAL(dsysvx,DSYSVX) +#define LAPACK_csysvx LAPACK_GLOBAL(csysvx,CSYSVX) +#define LAPACK_zsysvx LAPACK_GLOBAL(zsysvx,ZSYSVX) +#define LAPACK_dsysvxx LAPACK_GLOBAL(dsysvxx,DSYSVXX) +#define LAPACK_ssysvxx LAPACK_GLOBAL(ssysvxx,SSYSVXX) +#define LAPACK_zsysvxx LAPACK_GLOBAL(zsysvxx,ZSYSVXX) +#define LAPACK_csysvxx LAPACK_GLOBAL(csysvxx,CSYSVXX) +#define LAPACK_chesv LAPACK_GLOBAL(chesv,CHESV) +#define LAPACK_zhesv LAPACK_GLOBAL(zhesv,ZHESV) +#define LAPACK_chesvx LAPACK_GLOBAL(chesvx,CHESVX) +#define LAPACK_zhesvx LAPACK_GLOBAL(zhesvx,ZHESVX) +#define LAPACK_zhesvxx LAPACK_GLOBAL(zhesvxx,ZHESVXX) +#define LAPACK_chesvxx LAPACK_GLOBAL(chesvxx,CHESVXX) +#define LAPACK_sspsv LAPACK_GLOBAL(sspsv,SSPSV) +#define LAPACK_dspsv LAPACK_GLOBAL(dspsv,DSPSV) +#define LAPACK_cspsv LAPACK_GLOBAL(cspsv,CSPSV) +#define LAPACK_zspsv LAPACK_GLOBAL(zspsv,ZSPSV) +#define LAPACK_sspsvx LAPACK_GLOBAL(sspsvx,SSPSVX) +#define LAPACK_dspsvx LAPACK_GLOBAL(dspsvx,DSPSVX) +#define LAPACK_cspsvx LAPACK_GLOBAL(cspsvx,CSPSVX) +#define LAPACK_zspsvx LAPACK_GLOBAL(zspsvx,ZSPSVX) +#define LAPACK_chpsv LAPACK_GLOBAL(chpsv,CHPSV) +#define LAPACK_zhpsv LAPACK_GLOBAL(zhpsv,ZHPSV) +#define LAPACK_chpsvx LAPACK_GLOBAL(chpsvx,CHPSVX) +#define LAPACK_zhpsvx LAPACK_GLOBAL(zhpsvx,ZHPSVX) +#define LAPACK_sgeqrf LAPACK_GLOBAL(sgeqrf,SGEQRF) +#define LAPACK_dgeqrf LAPACK_GLOBAL(dgeqrf,DGEQRF) +#define LAPACK_cgeqrf LAPACK_GLOBAL(cgeqrf,CGEQRF) +#define LAPACK_zgeqrf LAPACK_GLOBAL(zgeqrf,ZGEQRF) +#define LAPACK_sgeqpf LAPACK_GLOBAL(sgeqpf,SGEQPF) +#define LAPACK_dgeqpf LAPACK_GLOBAL(dgeqpf,DGEQPF) +#define LAPACK_cgeqpf LAPACK_GLOBAL(cgeqpf,CGEQPF) +#define LAPACK_zgeqpf LAPACK_GLOBAL(zgeqpf,ZGEQPF) +#define LAPACK_sgeqp3 LAPACK_GLOBAL(sgeqp3,SGEQP3) +#define LAPACK_dgeqp3 LAPACK_GLOBAL(dgeqp3,DGEQP3) +#define LAPACK_cgeqp3 LAPACK_GLOBAL(cgeqp3,CGEQP3) +#define LAPACK_zgeqp3 LAPACK_GLOBAL(zgeqp3,ZGEQP3) +#define LAPACK_sorgqr LAPACK_GLOBAL(sorgqr,SORGQR) +#define LAPACK_dorgqr LAPACK_GLOBAL(dorgqr,DORGQR) +#define LAPACK_sormqr LAPACK_GLOBAL(sormqr,SORMQR) +#define LAPACK_dormqr LAPACK_GLOBAL(dormqr,DORMQR) +#define LAPACK_cungqr LAPACK_GLOBAL(cungqr,CUNGQR) +#define LAPACK_zungqr LAPACK_GLOBAL(zungqr,ZUNGQR) +#define LAPACK_cunmqr LAPACK_GLOBAL(cunmqr,CUNMQR) +#define LAPACK_zunmqr LAPACK_GLOBAL(zunmqr,ZUNMQR) +#define LAPACK_sgelqf LAPACK_GLOBAL(sgelqf,SGELQF) +#define LAPACK_dgelqf LAPACK_GLOBAL(dgelqf,DGELQF) +#define LAPACK_cgelqf LAPACK_GLOBAL(cgelqf,CGELQF) +#define LAPACK_zgelqf LAPACK_GLOBAL(zgelqf,ZGELQF) +#define LAPACK_sorglq LAPACK_GLOBAL(sorglq,SORGLQ) +#define LAPACK_dorglq LAPACK_GLOBAL(dorglq,DORGLQ) +#define LAPACK_sormlq LAPACK_GLOBAL(sormlq,SORMLQ) +#define LAPACK_dormlq LAPACK_GLOBAL(dormlq,DORMLQ) +#define LAPACK_cunglq LAPACK_GLOBAL(cunglq,CUNGLQ) +#define LAPACK_zunglq LAPACK_GLOBAL(zunglq,ZUNGLQ) +#define LAPACK_cunmlq LAPACK_GLOBAL(cunmlq,CUNMLQ) +#define LAPACK_zunmlq LAPACK_GLOBAL(zunmlq,ZUNMLQ) +#define LAPACK_sgeqlf LAPACK_GLOBAL(sgeqlf,SGEQLF) +#define LAPACK_dgeqlf LAPACK_GLOBAL(dgeqlf,DGEQLF) +#define LAPACK_cgeqlf LAPACK_GLOBAL(cgeqlf,CGEQLF) +#define LAPACK_zgeqlf LAPACK_GLOBAL(zgeqlf,ZGEQLF) +#define LAPACK_sorgql LAPACK_GLOBAL(sorgql,SORGQL) +#define LAPACK_dorgql LAPACK_GLOBAL(dorgql,DORGQL) +#define LAPACK_cungql LAPACK_GLOBAL(cungql,CUNGQL) +#define LAPACK_zungql LAPACK_GLOBAL(zungql,ZUNGQL) +#define LAPACK_sormql LAPACK_GLOBAL(sormql,SORMQL) +#define LAPACK_dormql LAPACK_GLOBAL(dormql,DORMQL) +#define LAPACK_cunmql LAPACK_GLOBAL(cunmql,CUNMQL) +#define LAPACK_zunmql LAPACK_GLOBAL(zunmql,ZUNMQL) +#define LAPACK_sgerqf LAPACK_GLOBAL(sgerqf,SGERQF) +#define LAPACK_dgerqf LAPACK_GLOBAL(dgerqf,DGERQF) +#define LAPACK_cgerqf LAPACK_GLOBAL(cgerqf,CGERQF) +#define LAPACK_zgerqf LAPACK_GLOBAL(zgerqf,ZGERQF) +#define LAPACK_sorgrq LAPACK_GLOBAL(sorgrq,SORGRQ) +#define LAPACK_dorgrq LAPACK_GLOBAL(dorgrq,DORGRQ) +#define LAPACK_cungrq LAPACK_GLOBAL(cungrq,CUNGRQ) +#define LAPACK_zungrq LAPACK_GLOBAL(zungrq,ZUNGRQ) +#define LAPACK_sormrq LAPACK_GLOBAL(sormrq,SORMRQ) +#define LAPACK_dormrq LAPACK_GLOBAL(dormrq,DORMRQ) +#define LAPACK_cunmrq LAPACK_GLOBAL(cunmrq,CUNMRQ) +#define LAPACK_zunmrq LAPACK_GLOBAL(zunmrq,ZUNMRQ) +#define LAPACK_stzrzf LAPACK_GLOBAL(stzrzf,STZRZF) +#define LAPACK_dtzrzf LAPACK_GLOBAL(dtzrzf,DTZRZF) +#define LAPACK_ctzrzf LAPACK_GLOBAL(ctzrzf,CTZRZF) +#define LAPACK_ztzrzf LAPACK_GLOBAL(ztzrzf,ZTZRZF) +#define LAPACK_sormrz LAPACK_GLOBAL(sormrz,SORMRZ) +#define LAPACK_dormrz LAPACK_GLOBAL(dormrz,DORMRZ) +#define LAPACK_cunmrz LAPACK_GLOBAL(cunmrz,CUNMRZ) +#define LAPACK_zunmrz LAPACK_GLOBAL(zunmrz,ZUNMRZ) +#define LAPACK_sggqrf LAPACK_GLOBAL(sggqrf,SGGQRF) +#define LAPACK_dggqrf LAPACK_GLOBAL(dggqrf,DGGQRF) +#define LAPACK_cggqrf LAPACK_GLOBAL(cggqrf,CGGQRF) +#define LAPACK_zggqrf LAPACK_GLOBAL(zggqrf,ZGGQRF) +#define LAPACK_sggrqf LAPACK_GLOBAL(sggrqf,SGGRQF) +#define LAPACK_dggrqf LAPACK_GLOBAL(dggrqf,DGGRQF) +#define LAPACK_cggrqf LAPACK_GLOBAL(cggrqf,CGGRQF) +#define LAPACK_zggrqf LAPACK_GLOBAL(zggrqf,ZGGRQF) +#define LAPACK_sgebrd LAPACK_GLOBAL(sgebrd,SGEBRD) +#define LAPACK_dgebrd LAPACK_GLOBAL(dgebrd,DGEBRD) +#define LAPACK_cgebrd LAPACK_GLOBAL(cgebrd,CGEBRD) +#define LAPACK_zgebrd LAPACK_GLOBAL(zgebrd,ZGEBRD) +#define LAPACK_sgbbrd LAPACK_GLOBAL(sgbbrd,SGBBRD) +#define LAPACK_dgbbrd LAPACK_GLOBAL(dgbbrd,DGBBRD) +#define LAPACK_cgbbrd LAPACK_GLOBAL(cgbbrd,CGBBRD) +#define LAPACK_zgbbrd LAPACK_GLOBAL(zgbbrd,ZGBBRD) +#define LAPACK_sorgbr LAPACK_GLOBAL(sorgbr,SORGBR) +#define LAPACK_dorgbr LAPACK_GLOBAL(dorgbr,DORGBR) +#define LAPACK_sormbr LAPACK_GLOBAL(sormbr,SORMBR) +#define LAPACK_dormbr LAPACK_GLOBAL(dormbr,DORMBR) +#define LAPACK_cungbr LAPACK_GLOBAL(cungbr,CUNGBR) +#define LAPACK_zungbr LAPACK_GLOBAL(zungbr,ZUNGBR) +#define LAPACK_cunmbr LAPACK_GLOBAL(cunmbr,CUNMBR) +#define LAPACK_zunmbr LAPACK_GLOBAL(zunmbr,ZUNMBR) +#define LAPACK_sbdsqr LAPACK_GLOBAL(sbdsqr,SBDSQR) +#define LAPACK_dbdsqr LAPACK_GLOBAL(dbdsqr,DBDSQR) +#define LAPACK_cbdsqr LAPACK_GLOBAL(cbdsqr,CBDSQR) +#define LAPACK_zbdsqr LAPACK_GLOBAL(zbdsqr,ZBDSQR) +#define LAPACK_sbdsdc LAPACK_GLOBAL(sbdsdc,SBDSDC) +#define LAPACK_dbdsdc LAPACK_GLOBAL(dbdsdc,DBDSDC) +#define LAPACK_ssytrd LAPACK_GLOBAL(ssytrd,SSYTRD) +#define LAPACK_dsytrd LAPACK_GLOBAL(dsytrd,DSYTRD) +#define LAPACK_sorgtr LAPACK_GLOBAL(sorgtr,SORGTR) +#define LAPACK_dorgtr LAPACK_GLOBAL(dorgtr,DORGTR) +#define LAPACK_sormtr LAPACK_GLOBAL(sormtr,SORMTR) +#define LAPACK_dormtr LAPACK_GLOBAL(dormtr,DORMTR) +#define LAPACK_chetrd LAPACK_GLOBAL(chetrd,CHETRD) +#define LAPACK_zhetrd LAPACK_GLOBAL(zhetrd,ZHETRD) +#define LAPACK_cungtr LAPACK_GLOBAL(cungtr,CUNGTR) +#define LAPACK_zungtr LAPACK_GLOBAL(zungtr,ZUNGTR) +#define LAPACK_cunmtr LAPACK_GLOBAL(cunmtr,CUNMTR) +#define LAPACK_zunmtr LAPACK_GLOBAL(zunmtr,ZUNMTR) +#define LAPACK_ssptrd LAPACK_GLOBAL(ssptrd,SSPTRD) +#define LAPACK_dsptrd LAPACK_GLOBAL(dsptrd,DSPTRD) +#define LAPACK_sopgtr LAPACK_GLOBAL(sopgtr,SOPGTR) +#define LAPACK_dopgtr LAPACK_GLOBAL(dopgtr,DOPGTR) +#define LAPACK_sopmtr LAPACK_GLOBAL(sopmtr,SOPMTR) +#define LAPACK_dopmtr LAPACK_GLOBAL(dopmtr,DOPMTR) +#define LAPACK_chptrd LAPACK_GLOBAL(chptrd,CHPTRD) +#define LAPACK_zhptrd LAPACK_GLOBAL(zhptrd,ZHPTRD) +#define LAPACK_cupgtr LAPACK_GLOBAL(cupgtr,CUPGTR) +#define LAPACK_zupgtr LAPACK_GLOBAL(zupgtr,ZUPGTR) +#define LAPACK_cupmtr LAPACK_GLOBAL(cupmtr,CUPMTR) +#define LAPACK_zupmtr LAPACK_GLOBAL(zupmtr,ZUPMTR) +#define LAPACK_ssbtrd LAPACK_GLOBAL(ssbtrd,SSBTRD) +#define LAPACK_dsbtrd LAPACK_GLOBAL(dsbtrd,DSBTRD) +#define LAPACK_chbtrd LAPACK_GLOBAL(chbtrd,CHBTRD) +#define LAPACK_zhbtrd LAPACK_GLOBAL(zhbtrd,ZHBTRD) +#define LAPACK_ssterf LAPACK_GLOBAL(ssterf,SSTERF) +#define LAPACK_dsterf LAPACK_GLOBAL(dsterf,DSTERF) +#define LAPACK_ssteqr LAPACK_GLOBAL(ssteqr,SSTEQR) +#define LAPACK_dsteqr LAPACK_GLOBAL(dsteqr,DSTEQR) +#define LAPACK_csteqr LAPACK_GLOBAL(csteqr,CSTEQR) +#define LAPACK_zsteqr LAPACK_GLOBAL(zsteqr,ZSTEQR) +#define LAPACK_sstemr LAPACK_GLOBAL(sstemr,SSTEMR) +#define LAPACK_dstemr LAPACK_GLOBAL(dstemr,DSTEMR) +#define LAPACK_cstemr LAPACK_GLOBAL(cstemr,CSTEMR) +#define LAPACK_zstemr LAPACK_GLOBAL(zstemr,ZSTEMR) +#define LAPACK_sstedc LAPACK_GLOBAL(sstedc,SSTEDC) +#define LAPACK_dstedc LAPACK_GLOBAL(dstedc,DSTEDC) +#define LAPACK_cstedc LAPACK_GLOBAL(cstedc,CSTEDC) +#define LAPACK_zstedc LAPACK_GLOBAL(zstedc,ZSTEDC) +#define LAPACK_sstegr LAPACK_GLOBAL(sstegr,SSTEGR) +#define LAPACK_dstegr LAPACK_GLOBAL(dstegr,DSTEGR) +#define LAPACK_cstegr LAPACK_GLOBAL(cstegr,CSTEGR) +#define LAPACK_zstegr LAPACK_GLOBAL(zstegr,ZSTEGR) +#define LAPACK_spteqr LAPACK_GLOBAL(spteqr,SPTEQR) +#define LAPACK_dpteqr LAPACK_GLOBAL(dpteqr,DPTEQR) +#define LAPACK_cpteqr LAPACK_GLOBAL(cpteqr,CPTEQR) +#define LAPACK_zpteqr LAPACK_GLOBAL(zpteqr,ZPTEQR) +#define LAPACK_sstebz LAPACK_GLOBAL(sstebz,SSTEBZ) +#define LAPACK_dstebz LAPACK_GLOBAL(dstebz,DSTEBZ) +#define LAPACK_sstein LAPACK_GLOBAL(sstein,SSTEIN) +#define LAPACK_dstein LAPACK_GLOBAL(dstein,DSTEIN) +#define LAPACK_cstein LAPACK_GLOBAL(cstein,CSTEIN) +#define LAPACK_zstein LAPACK_GLOBAL(zstein,ZSTEIN) +#define LAPACK_sdisna LAPACK_GLOBAL(sdisna,SDISNA) +#define LAPACK_ddisna LAPACK_GLOBAL(ddisna,DDISNA) +#define LAPACK_ssygst LAPACK_GLOBAL(ssygst,SSYGST) +#define LAPACK_dsygst LAPACK_GLOBAL(dsygst,DSYGST) +#define LAPACK_chegst LAPACK_GLOBAL(chegst,CHEGST) +#define LAPACK_zhegst LAPACK_GLOBAL(zhegst,ZHEGST) +#define LAPACK_sspgst LAPACK_GLOBAL(sspgst,SSPGST) +#define LAPACK_dspgst LAPACK_GLOBAL(dspgst,DSPGST) +#define LAPACK_chpgst LAPACK_GLOBAL(chpgst,CHPGST) +#define LAPACK_zhpgst LAPACK_GLOBAL(zhpgst,ZHPGST) +#define LAPACK_ssbgst LAPACK_GLOBAL(ssbgst,SSBGST) +#define LAPACK_dsbgst LAPACK_GLOBAL(dsbgst,DSBGST) +#define LAPACK_chbgst LAPACK_GLOBAL(chbgst,CHBGST) +#define LAPACK_zhbgst LAPACK_GLOBAL(zhbgst,ZHBGST) +#define LAPACK_spbstf LAPACK_GLOBAL(spbstf,SPBSTF) +#define LAPACK_dpbstf LAPACK_GLOBAL(dpbstf,DPBSTF) +#define LAPACK_cpbstf LAPACK_GLOBAL(cpbstf,CPBSTF) +#define LAPACK_zpbstf LAPACK_GLOBAL(zpbstf,ZPBSTF) +#define LAPACK_sgehrd LAPACK_GLOBAL(sgehrd,SGEHRD) +#define LAPACK_dgehrd LAPACK_GLOBAL(dgehrd,DGEHRD) +#define LAPACK_cgehrd LAPACK_GLOBAL(cgehrd,CGEHRD) +#define LAPACK_zgehrd LAPACK_GLOBAL(zgehrd,ZGEHRD) +#define LAPACK_sorghr LAPACK_GLOBAL(sorghr,SORGHR) +#define LAPACK_dorghr LAPACK_GLOBAL(dorghr,DORGHR) +#define LAPACK_sormhr LAPACK_GLOBAL(sormhr,SORMHR) +#define LAPACK_dormhr LAPACK_GLOBAL(dormhr,DORMHR) +#define LAPACK_cunghr LAPACK_GLOBAL(cunghr,CUNGHR) +#define LAPACK_zunghr LAPACK_GLOBAL(zunghr,ZUNGHR) +#define LAPACK_cunmhr LAPACK_GLOBAL(cunmhr,CUNMHR) +#define LAPACK_zunmhr LAPACK_GLOBAL(zunmhr,ZUNMHR) +#define LAPACK_sgebal LAPACK_GLOBAL(sgebal,SGEBAL) +#define LAPACK_dgebal LAPACK_GLOBAL(dgebal,DGEBAL) +#define LAPACK_cgebal LAPACK_GLOBAL(cgebal,CGEBAL) +#define LAPACK_zgebal LAPACK_GLOBAL(zgebal,ZGEBAL) +#define LAPACK_sgebak LAPACK_GLOBAL(sgebak,SGEBAK) +#define LAPACK_dgebak LAPACK_GLOBAL(dgebak,DGEBAK) +#define LAPACK_cgebak LAPACK_GLOBAL(cgebak,CGEBAK) +#define LAPACK_zgebak LAPACK_GLOBAL(zgebak,ZGEBAK) +#define LAPACK_shseqr LAPACK_GLOBAL(shseqr,SHSEQR) +#define LAPACK_dhseqr LAPACK_GLOBAL(dhseqr,DHSEQR) +#define LAPACK_chseqr LAPACK_GLOBAL(chseqr,CHSEQR) +#define LAPACK_zhseqr LAPACK_GLOBAL(zhseqr,ZHSEQR) +#define LAPACK_shsein LAPACK_GLOBAL(shsein,SHSEIN) +#define LAPACK_dhsein LAPACK_GLOBAL(dhsein,DHSEIN) +#define LAPACK_chsein LAPACK_GLOBAL(chsein,CHSEIN) +#define LAPACK_zhsein LAPACK_GLOBAL(zhsein,ZHSEIN) +#define LAPACK_strevc LAPACK_GLOBAL(strevc,STREVC) +#define LAPACK_dtrevc LAPACK_GLOBAL(dtrevc,DTREVC) +#define LAPACK_ctrevc LAPACK_GLOBAL(ctrevc,CTREVC) +#define LAPACK_ztrevc LAPACK_GLOBAL(ztrevc,ZTREVC) +#define LAPACK_strsna LAPACK_GLOBAL(strsna,STRSNA) +#define LAPACK_dtrsna LAPACK_GLOBAL(dtrsna,DTRSNA) +#define LAPACK_ctrsna LAPACK_GLOBAL(ctrsna,CTRSNA) +#define LAPACK_ztrsna LAPACK_GLOBAL(ztrsna,ZTRSNA) +#define LAPACK_strexc LAPACK_GLOBAL(strexc,STREXC) +#define LAPACK_dtrexc LAPACK_GLOBAL(dtrexc,DTREXC) +#define LAPACK_ctrexc LAPACK_GLOBAL(ctrexc,CTREXC) +#define LAPACK_ztrexc LAPACK_GLOBAL(ztrexc,ZTREXC) +#define LAPACK_strsen LAPACK_GLOBAL(strsen,STRSEN) +#define LAPACK_dtrsen LAPACK_GLOBAL(dtrsen,DTRSEN) +#define LAPACK_ctrsen LAPACK_GLOBAL(ctrsen,CTRSEN) +#define LAPACK_ztrsen LAPACK_GLOBAL(ztrsen,ZTRSEN) +#define LAPACK_strsyl LAPACK_GLOBAL(strsyl,STRSYL) +#define LAPACK_dtrsyl LAPACK_GLOBAL(dtrsyl,DTRSYL) +#define LAPACK_ctrsyl LAPACK_GLOBAL(ctrsyl,CTRSYL) +#define LAPACK_ztrsyl LAPACK_GLOBAL(ztrsyl,ZTRSYL) +#define LAPACK_sgghrd LAPACK_GLOBAL(sgghrd,SGGHRD) +#define LAPACK_dgghrd LAPACK_GLOBAL(dgghrd,DGGHRD) +#define LAPACK_cgghrd LAPACK_GLOBAL(cgghrd,CGGHRD) +#define LAPACK_zgghrd LAPACK_GLOBAL(zgghrd,ZGGHRD) +#define LAPACK_sggbal LAPACK_GLOBAL(sggbal,SGGBAL) +#define LAPACK_dggbal LAPACK_GLOBAL(dggbal,DGGBAL) +#define LAPACK_cggbal LAPACK_GLOBAL(cggbal,CGGBAL) +#define LAPACK_zggbal LAPACK_GLOBAL(zggbal,ZGGBAL) +#define LAPACK_sggbak LAPACK_GLOBAL(sggbak,SGGBAK) +#define LAPACK_dggbak LAPACK_GLOBAL(dggbak,DGGBAK) +#define LAPACK_cggbak LAPACK_GLOBAL(cggbak,CGGBAK) +#define LAPACK_zggbak LAPACK_GLOBAL(zggbak,ZGGBAK) +#define LAPACK_shgeqz LAPACK_GLOBAL(shgeqz,SHGEQZ) +#define LAPACK_dhgeqz LAPACK_GLOBAL(dhgeqz,DHGEQZ) +#define LAPACK_chgeqz LAPACK_GLOBAL(chgeqz,CHGEQZ) +#define LAPACK_zhgeqz LAPACK_GLOBAL(zhgeqz,ZHGEQZ) +#define LAPACK_stgevc LAPACK_GLOBAL(stgevc,STGEVC) +#define LAPACK_dtgevc LAPACK_GLOBAL(dtgevc,DTGEVC) +#define LAPACK_ctgevc LAPACK_GLOBAL(ctgevc,CTGEVC) +#define LAPACK_ztgevc LAPACK_GLOBAL(ztgevc,ZTGEVC) +#define LAPACK_stgexc LAPACK_GLOBAL(stgexc,STGEXC) +#define LAPACK_dtgexc LAPACK_GLOBAL(dtgexc,DTGEXC) +#define LAPACK_ctgexc LAPACK_GLOBAL(ctgexc,CTGEXC) +#define LAPACK_ztgexc LAPACK_GLOBAL(ztgexc,ZTGEXC) +#define LAPACK_stgsen LAPACK_GLOBAL(stgsen,STGSEN) +#define LAPACK_dtgsen LAPACK_GLOBAL(dtgsen,DTGSEN) +#define LAPACK_ctgsen LAPACK_GLOBAL(ctgsen,CTGSEN) +#define LAPACK_ztgsen LAPACK_GLOBAL(ztgsen,ZTGSEN) +#define LAPACK_stgsyl LAPACK_GLOBAL(stgsyl,STGSYL) +#define LAPACK_dtgsyl LAPACK_GLOBAL(dtgsyl,DTGSYL) +#define LAPACK_ctgsyl LAPACK_GLOBAL(ctgsyl,CTGSYL) +#define LAPACK_ztgsyl LAPACK_GLOBAL(ztgsyl,ZTGSYL) +#define LAPACK_stgsna LAPACK_GLOBAL(stgsna,STGSNA) +#define LAPACK_dtgsna LAPACK_GLOBAL(dtgsna,DTGSNA) +#define LAPACK_ctgsna LAPACK_GLOBAL(ctgsna,CTGSNA) +#define LAPACK_ztgsna LAPACK_GLOBAL(ztgsna,ZTGSNA) +#define LAPACK_sggsvp LAPACK_GLOBAL(sggsvp,SGGSVP) +#define LAPACK_dggsvp LAPACK_GLOBAL(dggsvp,DGGSVP) +#define LAPACK_cggsvp LAPACK_GLOBAL(cggsvp,CGGSVP) +#define LAPACK_zggsvp LAPACK_GLOBAL(zggsvp,ZGGSVP) +#define LAPACK_stgsja LAPACK_GLOBAL(stgsja,STGSJA) +#define LAPACK_dtgsja LAPACK_GLOBAL(dtgsja,DTGSJA) +#define LAPACK_ctgsja LAPACK_GLOBAL(ctgsja,CTGSJA) +#define LAPACK_ztgsja LAPACK_GLOBAL(ztgsja,ZTGSJA) +#define LAPACK_sgels LAPACK_GLOBAL(sgels,SGELS) +#define LAPACK_dgels LAPACK_GLOBAL(dgels,DGELS) +#define LAPACK_cgels LAPACK_GLOBAL(cgels,CGELS) +#define LAPACK_zgels LAPACK_GLOBAL(zgels,ZGELS) +#define LAPACK_sgelsy LAPACK_GLOBAL(sgelsy,SGELSY) +#define LAPACK_dgelsy LAPACK_GLOBAL(dgelsy,DGELSY) +#define LAPACK_cgelsy LAPACK_GLOBAL(cgelsy,CGELSY) +#define LAPACK_zgelsy LAPACK_GLOBAL(zgelsy,ZGELSY) +#define LAPACK_sgelss LAPACK_GLOBAL(sgelss,SGELSS) +#define LAPACK_dgelss LAPACK_GLOBAL(dgelss,DGELSS) +#define LAPACK_cgelss LAPACK_GLOBAL(cgelss,CGELSS) +#define LAPACK_zgelss LAPACK_GLOBAL(zgelss,ZGELSS) +#define LAPACK_sgelsd LAPACK_GLOBAL(sgelsd,SGELSD) +#define LAPACK_dgelsd LAPACK_GLOBAL(dgelsd,DGELSD) +#define LAPACK_cgelsd LAPACK_GLOBAL(cgelsd,CGELSD) +#define LAPACK_zgelsd LAPACK_GLOBAL(zgelsd,ZGELSD) +#define LAPACK_sgglse LAPACK_GLOBAL(sgglse,SGGLSE) +#define LAPACK_dgglse LAPACK_GLOBAL(dgglse,DGGLSE) +#define LAPACK_cgglse LAPACK_GLOBAL(cgglse,CGGLSE) +#define LAPACK_zgglse LAPACK_GLOBAL(zgglse,ZGGLSE) +#define LAPACK_sggglm LAPACK_GLOBAL(sggglm,SGGGLM) +#define LAPACK_dggglm LAPACK_GLOBAL(dggglm,DGGGLM) +#define LAPACK_cggglm LAPACK_GLOBAL(cggglm,CGGGLM) +#define LAPACK_zggglm LAPACK_GLOBAL(zggglm,ZGGGLM) +#define LAPACK_ssyev LAPACK_GLOBAL(ssyev,SSYEV) +#define LAPACK_dsyev LAPACK_GLOBAL(dsyev,DSYEV) +#define LAPACK_cheev LAPACK_GLOBAL(cheev,CHEEV) +#define LAPACK_zheev LAPACK_GLOBAL(zheev,ZHEEV) +#define LAPACK_ssyevd LAPACK_GLOBAL(ssyevd,SSYEVD) +#define LAPACK_dsyevd LAPACK_GLOBAL(dsyevd,DSYEVD) +#define LAPACK_cheevd LAPACK_GLOBAL(cheevd,CHEEVD) +#define LAPACK_zheevd LAPACK_GLOBAL(zheevd,ZHEEVD) +#define LAPACK_ssyevx LAPACK_GLOBAL(ssyevx,SSYEVX) +#define LAPACK_dsyevx LAPACK_GLOBAL(dsyevx,DSYEVX) +#define LAPACK_cheevx LAPACK_GLOBAL(cheevx,CHEEVX) +#define LAPACK_zheevx LAPACK_GLOBAL(zheevx,ZHEEVX) +#define LAPACK_ssyevr LAPACK_GLOBAL(ssyevr,SSYEVR) +#define LAPACK_dsyevr LAPACK_GLOBAL(dsyevr,DSYEVR) +#define LAPACK_cheevr LAPACK_GLOBAL(cheevr,CHEEVR) +#define LAPACK_zheevr LAPACK_GLOBAL(zheevr,ZHEEVR) +#define LAPACK_sspev LAPACK_GLOBAL(sspev,SSPEV) +#define LAPACK_dspev LAPACK_GLOBAL(dspev,DSPEV) +#define LAPACK_chpev LAPACK_GLOBAL(chpev,CHPEV) +#define LAPACK_zhpev LAPACK_GLOBAL(zhpev,ZHPEV) +#define LAPACK_sspevd LAPACK_GLOBAL(sspevd,SSPEVD) +#define LAPACK_dspevd LAPACK_GLOBAL(dspevd,DSPEVD) +#define LAPACK_chpevd LAPACK_GLOBAL(chpevd,CHPEVD) +#define LAPACK_zhpevd LAPACK_GLOBAL(zhpevd,ZHPEVD) +#define LAPACK_sspevx LAPACK_GLOBAL(sspevx,SSPEVX) +#define LAPACK_dspevx LAPACK_GLOBAL(dspevx,DSPEVX) +#define LAPACK_chpevx LAPACK_GLOBAL(chpevx,CHPEVX) +#define LAPACK_zhpevx LAPACK_GLOBAL(zhpevx,ZHPEVX) +#define LAPACK_ssbev LAPACK_GLOBAL(ssbev,SSBEV) +#define LAPACK_dsbev LAPACK_GLOBAL(dsbev,DSBEV) +#define LAPACK_chbev LAPACK_GLOBAL(chbev,CHBEV) +#define LAPACK_zhbev LAPACK_GLOBAL(zhbev,ZHBEV) +#define LAPACK_ssbevd LAPACK_GLOBAL(ssbevd,SSBEVD) +#define LAPACK_dsbevd LAPACK_GLOBAL(dsbevd,DSBEVD) +#define LAPACK_chbevd LAPACK_GLOBAL(chbevd,CHBEVD) +#define LAPACK_zhbevd LAPACK_GLOBAL(zhbevd,ZHBEVD) +#define LAPACK_ssbevx LAPACK_GLOBAL(ssbevx,SSBEVX) +#define LAPACK_dsbevx LAPACK_GLOBAL(dsbevx,DSBEVX) +#define LAPACK_chbevx LAPACK_GLOBAL(chbevx,CHBEVX) +#define LAPACK_zhbevx LAPACK_GLOBAL(zhbevx,ZHBEVX) +#define LAPACK_sstev LAPACK_GLOBAL(sstev,SSTEV) +#define LAPACK_dstev LAPACK_GLOBAL(dstev,DSTEV) +#define LAPACK_sstevd LAPACK_GLOBAL(sstevd,SSTEVD) +#define LAPACK_dstevd LAPACK_GLOBAL(dstevd,DSTEVD) +#define LAPACK_sstevx LAPACK_GLOBAL(sstevx,SSTEVX) +#define LAPACK_dstevx LAPACK_GLOBAL(dstevx,DSTEVX) +#define LAPACK_sstevr LAPACK_GLOBAL(sstevr,SSTEVR) +#define LAPACK_dstevr LAPACK_GLOBAL(dstevr,DSTEVR) +#define LAPACK_sgees LAPACK_GLOBAL(sgees,SGEES) +#define LAPACK_dgees LAPACK_GLOBAL(dgees,DGEES) +#define LAPACK_cgees LAPACK_GLOBAL(cgees,CGEES) +#define LAPACK_zgees LAPACK_GLOBAL(zgees,ZGEES) +#define LAPACK_sgeesx LAPACK_GLOBAL(sgeesx,SGEESX) +#define LAPACK_dgeesx LAPACK_GLOBAL(dgeesx,DGEESX) +#define LAPACK_cgeesx LAPACK_GLOBAL(cgeesx,CGEESX) +#define LAPACK_zgeesx LAPACK_GLOBAL(zgeesx,ZGEESX) +#define LAPACK_sgeev LAPACK_GLOBAL(sgeev,SGEEV) +#define LAPACK_dgeev LAPACK_GLOBAL(dgeev,DGEEV) +#define LAPACK_cgeev LAPACK_GLOBAL(cgeev,CGEEV) +#define LAPACK_zgeev LAPACK_GLOBAL(zgeev,ZGEEV) +#define LAPACK_sgeevx LAPACK_GLOBAL(sgeevx,SGEEVX) +#define LAPACK_dgeevx LAPACK_GLOBAL(dgeevx,DGEEVX) +#define LAPACK_cgeevx LAPACK_GLOBAL(cgeevx,CGEEVX) +#define LAPACK_zgeevx LAPACK_GLOBAL(zgeevx,ZGEEVX) +#define LAPACK_sgesvd LAPACK_GLOBAL(sgesvd,SGESVD) +#define LAPACK_dgesvd LAPACK_GLOBAL(dgesvd,DGESVD) +#define LAPACK_cgesvd LAPACK_GLOBAL(cgesvd,CGESVD) +#define LAPACK_zgesvd LAPACK_GLOBAL(zgesvd,ZGESVD) +#define LAPACK_sgesdd LAPACK_GLOBAL(sgesdd,SGESDD) +#define LAPACK_dgesdd LAPACK_GLOBAL(dgesdd,DGESDD) +#define LAPACK_cgesdd LAPACK_GLOBAL(cgesdd,CGESDD) +#define LAPACK_zgesdd LAPACK_GLOBAL(zgesdd,ZGESDD) +#define LAPACK_dgejsv LAPACK_GLOBAL(dgejsv,DGEJSV) +#define LAPACK_sgejsv LAPACK_GLOBAL(sgejsv,SGEJSV) +#define LAPACK_dgesvj LAPACK_GLOBAL(dgesvj,DGESVJ) +#define LAPACK_sgesvj LAPACK_GLOBAL(sgesvj,SGESVJ) +#define LAPACK_sggsvd LAPACK_GLOBAL(sggsvd,SGGSVD) +#define LAPACK_dggsvd LAPACK_GLOBAL(dggsvd,DGGSVD) +#define LAPACK_cggsvd LAPACK_GLOBAL(cggsvd,CGGSVD) +#define LAPACK_zggsvd LAPACK_GLOBAL(zggsvd,ZGGSVD) +#define LAPACK_ssygv LAPACK_GLOBAL(ssygv,SSYGV) +#define LAPACK_dsygv LAPACK_GLOBAL(dsygv,DSYGV) +#define LAPACK_chegv LAPACK_GLOBAL(chegv,CHEGV) +#define LAPACK_zhegv LAPACK_GLOBAL(zhegv,ZHEGV) +#define LAPACK_ssygvd LAPACK_GLOBAL(ssygvd,SSYGVD) +#define LAPACK_dsygvd LAPACK_GLOBAL(dsygvd,DSYGVD) +#define LAPACK_chegvd LAPACK_GLOBAL(chegvd,CHEGVD) +#define LAPACK_zhegvd LAPACK_GLOBAL(zhegvd,ZHEGVD) +#define LAPACK_ssygvx LAPACK_GLOBAL(ssygvx,SSYGVX) +#define LAPACK_dsygvx LAPACK_GLOBAL(dsygvx,DSYGVX) +#define LAPACK_chegvx LAPACK_GLOBAL(chegvx,CHEGVX) +#define LAPACK_zhegvx LAPACK_GLOBAL(zhegvx,ZHEGVX) +#define LAPACK_sspgv LAPACK_GLOBAL(sspgv,SSPGV) +#define LAPACK_dspgv LAPACK_GLOBAL(dspgv,DSPGV) +#define LAPACK_chpgv LAPACK_GLOBAL(chpgv,CHPGV) +#define LAPACK_zhpgv LAPACK_GLOBAL(zhpgv,ZHPGV) +#define LAPACK_sspgvd LAPACK_GLOBAL(sspgvd,SSPGVD) +#define LAPACK_dspgvd LAPACK_GLOBAL(dspgvd,DSPGVD) +#define LAPACK_chpgvd LAPACK_GLOBAL(chpgvd,CHPGVD) +#define LAPACK_zhpgvd LAPACK_GLOBAL(zhpgvd,ZHPGVD) +#define LAPACK_sspgvx LAPACK_GLOBAL(sspgvx,SSPGVX) +#define LAPACK_dspgvx LAPACK_GLOBAL(dspgvx,DSPGVX) +#define LAPACK_chpgvx LAPACK_GLOBAL(chpgvx,CHPGVX) +#define LAPACK_zhpgvx LAPACK_GLOBAL(zhpgvx,ZHPGVX) +#define LAPACK_ssbgv LAPACK_GLOBAL(ssbgv,SSBGV) +#define LAPACK_dsbgv LAPACK_GLOBAL(dsbgv,DSBGV) +#define LAPACK_chbgv LAPACK_GLOBAL(chbgv,CHBGV) +#define LAPACK_zhbgv LAPACK_GLOBAL(zhbgv,ZHBGV) +#define LAPACK_ssbgvd LAPACK_GLOBAL(ssbgvd,SSBGVD) +#define LAPACK_dsbgvd LAPACK_GLOBAL(dsbgvd,DSBGVD) +#define LAPACK_chbgvd LAPACK_GLOBAL(chbgvd,CHBGVD) +#define LAPACK_zhbgvd LAPACK_GLOBAL(zhbgvd,ZHBGVD) +#define LAPACK_ssbgvx LAPACK_GLOBAL(ssbgvx,SSBGVX) +#define LAPACK_dsbgvx LAPACK_GLOBAL(dsbgvx,DSBGVX) +#define LAPACK_chbgvx LAPACK_GLOBAL(chbgvx,CHBGVX) +#define LAPACK_zhbgvx LAPACK_GLOBAL(zhbgvx,ZHBGVX) +#define LAPACK_sgges LAPACK_GLOBAL(sgges,SGGES) +#define LAPACK_dgges LAPACK_GLOBAL(dgges,DGGES) +#define LAPACK_cgges LAPACK_GLOBAL(cgges,CGGES) +#define LAPACK_zgges LAPACK_GLOBAL(zgges,ZGGES) +#define LAPACK_sggesx LAPACK_GLOBAL(sggesx,SGGESX) +#define LAPACK_dggesx LAPACK_GLOBAL(dggesx,DGGESX) +#define LAPACK_cggesx LAPACK_GLOBAL(cggesx,CGGESX) +#define LAPACK_zggesx LAPACK_GLOBAL(zggesx,ZGGESX) +#define LAPACK_sggev LAPACK_GLOBAL(sggev,SGGEV) +#define LAPACK_dggev LAPACK_GLOBAL(dggev,DGGEV) +#define LAPACK_cggev LAPACK_GLOBAL(cggev,CGGEV) +#define LAPACK_zggev LAPACK_GLOBAL(zggev,ZGGEV) +#define LAPACK_sggevx LAPACK_GLOBAL(sggevx,SGGEVX) +#define LAPACK_dggevx LAPACK_GLOBAL(dggevx,DGGEVX) +#define LAPACK_cggevx LAPACK_GLOBAL(cggevx,CGGEVX) +#define LAPACK_zggevx LAPACK_GLOBAL(zggevx,ZGGEVX) +#define LAPACK_dsfrk LAPACK_GLOBAL(dsfrk,DSFRK) +#define LAPACK_ssfrk LAPACK_GLOBAL(ssfrk,SSFRK) +#define LAPACK_zhfrk LAPACK_GLOBAL(zhfrk,ZHFRK) +#define LAPACK_chfrk LAPACK_GLOBAL(chfrk,CHFRK) +#define LAPACK_dtfsm LAPACK_GLOBAL(dtfsm,DTFSM) +#define LAPACK_stfsm LAPACK_GLOBAL(stfsm,STFSM) +#define LAPACK_ztfsm LAPACK_GLOBAL(ztfsm,ZTFSM) +#define LAPACK_ctfsm LAPACK_GLOBAL(ctfsm,CTFSM) +#define LAPACK_dtfttp LAPACK_GLOBAL(dtfttp,DTFTTP) +#define LAPACK_stfttp LAPACK_GLOBAL(stfttp,STFTTP) +#define LAPACK_ztfttp LAPACK_GLOBAL(ztfttp,ZTFTTP) +#define LAPACK_ctfttp LAPACK_GLOBAL(ctfttp,CTFTTP) +#define LAPACK_dtfttr LAPACK_GLOBAL(dtfttr,DTFTTR) +#define LAPACK_stfttr LAPACK_GLOBAL(stfttr,STFTTR) +#define LAPACK_ztfttr LAPACK_GLOBAL(ztfttr,ZTFTTR) +#define LAPACK_ctfttr LAPACK_GLOBAL(ctfttr,CTFTTR) +#define LAPACK_dtpttf LAPACK_GLOBAL(dtpttf,DTPTTF) +#define LAPACK_stpttf LAPACK_GLOBAL(stpttf,STPTTF) +#define LAPACK_ztpttf LAPACK_GLOBAL(ztpttf,ZTPTTF) +#define LAPACK_ctpttf LAPACK_GLOBAL(ctpttf,CTPTTF) +#define LAPACK_dtpttr LAPACK_GLOBAL(dtpttr,DTPTTR) +#define LAPACK_stpttr LAPACK_GLOBAL(stpttr,STPTTR) +#define LAPACK_ztpttr LAPACK_GLOBAL(ztpttr,ZTPTTR) +#define LAPACK_ctpttr LAPACK_GLOBAL(ctpttr,CTPTTR) +#define LAPACK_dtrttf LAPACK_GLOBAL(dtrttf,DTRTTF) +#define LAPACK_strttf LAPACK_GLOBAL(strttf,STRTTF) +#define LAPACK_ztrttf LAPACK_GLOBAL(ztrttf,ZTRTTF) +#define LAPACK_ctrttf LAPACK_GLOBAL(ctrttf,CTRTTF) +#define LAPACK_dtrttp LAPACK_GLOBAL(dtrttp,DTRTTP) +#define LAPACK_strttp LAPACK_GLOBAL(strttp,STRTTP) +#define LAPACK_ztrttp LAPACK_GLOBAL(ztrttp,ZTRTTP) +#define LAPACK_ctrttp LAPACK_GLOBAL(ctrttp,CTRTTP) +#define LAPACK_sgeqrfp LAPACK_GLOBAL(sgeqrfp,SGEQRFP) +#define LAPACK_dgeqrfp LAPACK_GLOBAL(dgeqrfp,DGEQRFP) +#define LAPACK_cgeqrfp LAPACK_GLOBAL(cgeqrfp,CGEQRFP) +#define LAPACK_zgeqrfp LAPACK_GLOBAL(zgeqrfp,ZGEQRFP) +#define LAPACK_clacgv LAPACK_GLOBAL(clacgv,CLACGV) +#define LAPACK_zlacgv LAPACK_GLOBAL(zlacgv,ZLACGV) +#define LAPACK_slarnv LAPACK_GLOBAL(slarnv,SLARNV) +#define LAPACK_dlarnv LAPACK_GLOBAL(dlarnv,DLARNV) +#define LAPACK_clarnv LAPACK_GLOBAL(clarnv,CLARNV) +#define LAPACK_zlarnv LAPACK_GLOBAL(zlarnv,ZLARNV) +#define LAPACK_sgeqr2 LAPACK_GLOBAL(sgeqr2,SGEQR2) +#define LAPACK_dgeqr2 LAPACK_GLOBAL(dgeqr2,DGEQR2) +#define LAPACK_cgeqr2 LAPACK_GLOBAL(cgeqr2,CGEQR2) +#define LAPACK_zgeqr2 LAPACK_GLOBAL(zgeqr2,ZGEQR2) +#define LAPACK_slacpy LAPACK_GLOBAL(slacpy,SLACPY) +#define LAPACK_dlacpy LAPACK_GLOBAL(dlacpy,DLACPY) +#define LAPACK_clacpy LAPACK_GLOBAL(clacpy,CLACPY) +#define LAPACK_zlacpy LAPACK_GLOBAL(zlacpy,ZLACPY) +#define LAPACK_sgetf2 LAPACK_GLOBAL(sgetf2,SGETF2) +#define LAPACK_dgetf2 LAPACK_GLOBAL(dgetf2,DGETF2) +#define LAPACK_cgetf2 LAPACK_GLOBAL(cgetf2,CGETF2) +#define LAPACK_zgetf2 LAPACK_GLOBAL(zgetf2,ZGETF2) +#define LAPACK_slaswp LAPACK_GLOBAL(slaswp,SLASWP) +#define LAPACK_dlaswp LAPACK_GLOBAL(dlaswp,DLASWP) +#define LAPACK_claswp LAPACK_GLOBAL(claswp,CLASWP) +#define LAPACK_zlaswp LAPACK_GLOBAL(zlaswp,ZLASWP) +#define LAPACK_slange LAPACK_GLOBAL(slange,SLANGE) +#define LAPACK_dlange LAPACK_GLOBAL(dlange,DLANGE) +#define LAPACK_clange LAPACK_GLOBAL(clange,CLANGE) +#define LAPACK_zlange LAPACK_GLOBAL(zlange,ZLANGE) +#define LAPACK_clanhe LAPACK_GLOBAL(clanhe,CLANHE) +#define LAPACK_zlanhe LAPACK_GLOBAL(zlanhe,ZLANHE) +#define LAPACK_slansy LAPACK_GLOBAL(slansy,SLANSY) +#define LAPACK_dlansy LAPACK_GLOBAL(dlansy,DLANSY) +#define LAPACK_clansy LAPACK_GLOBAL(clansy,CLANSY) +#define LAPACK_zlansy LAPACK_GLOBAL(zlansy,ZLANSY) +#define LAPACK_slantr LAPACK_GLOBAL(slantr,SLANTR) +#define LAPACK_dlantr LAPACK_GLOBAL(dlantr,DLANTR) +#define LAPACK_clantr LAPACK_GLOBAL(clantr,CLANTR) +#define LAPACK_zlantr LAPACK_GLOBAL(zlantr,ZLANTR) +#define LAPACK_slamch LAPACK_GLOBAL(slamch,SLAMCH) +#define LAPACK_dlamch LAPACK_GLOBAL(dlamch,DLAMCH) +#define LAPACK_sgelq2 LAPACK_GLOBAL(sgelq2,SGELQ2) +#define LAPACK_dgelq2 LAPACK_GLOBAL(dgelq2,DGELQ2) +#define LAPACK_cgelq2 LAPACK_GLOBAL(cgelq2,CGELQ2) +#define LAPACK_zgelq2 LAPACK_GLOBAL(zgelq2,ZGELQ2) +#define LAPACK_slarfb LAPACK_GLOBAL(slarfb,SLARFB) +#define LAPACK_dlarfb LAPACK_GLOBAL(dlarfb,DLARFB) +#define LAPACK_clarfb LAPACK_GLOBAL(clarfb,CLARFB) +#define LAPACK_zlarfb LAPACK_GLOBAL(zlarfb,ZLARFB) +#define LAPACK_slarfg LAPACK_GLOBAL(slarfg,SLARFG) +#define LAPACK_dlarfg LAPACK_GLOBAL(dlarfg,DLARFG) +#define LAPACK_clarfg LAPACK_GLOBAL(clarfg,CLARFG) +#define LAPACK_zlarfg LAPACK_GLOBAL(zlarfg,ZLARFG) +#define LAPACK_slarft LAPACK_GLOBAL(slarft,SLARFT) +#define LAPACK_dlarft LAPACK_GLOBAL(dlarft,DLARFT) +#define LAPACK_clarft LAPACK_GLOBAL(clarft,CLARFT) +#define LAPACK_zlarft LAPACK_GLOBAL(zlarft,ZLARFT) +#define LAPACK_slarfx LAPACK_GLOBAL(slarfx,SLARFX) +#define LAPACK_dlarfx LAPACK_GLOBAL(dlarfx,DLARFX) +#define LAPACK_clarfx LAPACK_GLOBAL(clarfx,CLARFX) +#define LAPACK_zlarfx LAPACK_GLOBAL(zlarfx,ZLARFX) +#define LAPACK_slatms LAPACK_GLOBAL(slatms,SLATMS) +#define LAPACK_dlatms LAPACK_GLOBAL(dlatms,DLATMS) +#define LAPACK_clatms LAPACK_GLOBAL(clatms,CLATMS) +#define LAPACK_zlatms LAPACK_GLOBAL(zlatms,ZLATMS) +#define LAPACK_slag2d LAPACK_GLOBAL(slag2d,SLAG2D) +#define LAPACK_dlag2s LAPACK_GLOBAL(dlag2s,DLAG2S) +#define LAPACK_clag2z LAPACK_GLOBAL(clag2z,CLAG2Z) +#define LAPACK_zlag2c LAPACK_GLOBAL(zlag2c,ZLAG2C) +#define LAPACK_slauum LAPACK_GLOBAL(slauum,SLAUUM) +#define LAPACK_dlauum LAPACK_GLOBAL(dlauum,DLAUUM) +#define LAPACK_clauum LAPACK_GLOBAL(clauum,CLAUUM) +#define LAPACK_zlauum LAPACK_GLOBAL(zlauum,ZLAUUM) +#define LAPACK_slagge LAPACK_GLOBAL(slagge,SLAGGE) +#define LAPACK_dlagge LAPACK_GLOBAL(dlagge,DLAGGE) +#define LAPACK_clagge LAPACK_GLOBAL(clagge,CLAGGE) +#define LAPACK_zlagge LAPACK_GLOBAL(zlagge,ZLAGGE) +#define LAPACK_slaset LAPACK_GLOBAL(slaset,SLASET) +#define LAPACK_dlaset LAPACK_GLOBAL(dlaset,DLASET) +#define LAPACK_claset LAPACK_GLOBAL(claset,CLASET) +#define LAPACK_zlaset LAPACK_GLOBAL(zlaset,ZLASET) +#define LAPACK_slasrt LAPACK_GLOBAL(slasrt,SLASRT) +#define LAPACK_dlasrt LAPACK_GLOBAL(dlasrt,DLASRT) +#define LAPACK_slagsy LAPACK_GLOBAL(slagsy,SLAGSY) +#define LAPACK_dlagsy LAPACK_GLOBAL(dlagsy,DLAGSY) +#define LAPACK_clagsy LAPACK_GLOBAL(clagsy,CLAGSY) +#define LAPACK_zlagsy LAPACK_GLOBAL(zlagsy,ZLAGSY) +#define LAPACK_claghe LAPACK_GLOBAL(claghe,CLAGHE) +#define LAPACK_zlaghe LAPACK_GLOBAL(zlaghe,ZLAGHE) +#define LAPACK_slapmr LAPACK_GLOBAL(slapmr,SLAPMR) +#define LAPACK_dlapmr LAPACK_GLOBAL(dlapmr,DLAPMR) +#define LAPACK_clapmr LAPACK_GLOBAL(clapmr,CLAPMR) +#define LAPACK_zlapmr LAPACK_GLOBAL(zlapmr,ZLAPMR) +#define LAPACK_slapy2 LAPACK_GLOBAL(slapy2,SLAPY2) +#define LAPACK_dlapy2 LAPACK_GLOBAL(dlapy2,DLAPY2) +#define LAPACK_slapy3 LAPACK_GLOBAL(slapy3,SLAPY3) +#define LAPACK_dlapy3 LAPACK_GLOBAL(dlapy3,DLAPY3) +#define LAPACK_slartgp LAPACK_GLOBAL(slartgp,SLARTGP) +#define LAPACK_dlartgp LAPACK_GLOBAL(dlartgp,DLARTGP) +#define LAPACK_slartgs LAPACK_GLOBAL(slartgs,SLARTGS) +#define LAPACK_dlartgs LAPACK_GLOBAL(dlartgs,DLARTGS) +// LAPACK 3.3.0 +#define LAPACK_cbbcsd LAPACK_GLOBAL(cbbcsd,CBBCSD) +#define LAPACK_cheswapr LAPACK_GLOBAL(cheswapr,CHESWAPR) +#define LAPACK_chetri2 LAPACK_GLOBAL(chetri2,CHETRI2) +#define LAPACK_chetri2x LAPACK_GLOBAL(chetri2x,CHETRI2X) +#define LAPACK_chetrs2 LAPACK_GLOBAL(chetrs2,CHETRS2) +#define LAPACK_csyconv LAPACK_GLOBAL(csyconv,CSYCONV) +#define LAPACK_csyswapr LAPACK_GLOBAL(csyswapr,CSYSWAPR) +#define LAPACK_csytri2 LAPACK_GLOBAL(csytri2,CSYTRI2) +#define LAPACK_csytri2x LAPACK_GLOBAL(csytri2x,CSYTRI2X) +#define LAPACK_csytrs2 LAPACK_GLOBAL(csytrs2,CSYTRS2) +#define LAPACK_cunbdb LAPACK_GLOBAL(cunbdb,CUNBDB) +#define LAPACK_cuncsd LAPACK_GLOBAL(cuncsd,CUNCSD) +#define LAPACK_dbbcsd LAPACK_GLOBAL(dbbcsd,DBBCSD) +#define LAPACK_dorbdb LAPACK_GLOBAL(dorbdb,DORBDB) +#define LAPACK_dorcsd LAPACK_GLOBAL(dorcsd,DORCSD) +#define LAPACK_dsyconv LAPACK_GLOBAL(dsyconv,DSYCONV) +#define LAPACK_dsyswapr LAPACK_GLOBAL(dsyswapr,DSYSWAPR) +#define LAPACK_dsytri2 LAPACK_GLOBAL(dsytri2,DSYTRI2) +#define LAPACK_dsytri2x LAPACK_GLOBAL(dsytri2x,DSYTRI2X) +#define LAPACK_dsytrs2 LAPACK_GLOBAL(dsytrs2,DSYTRS2) +#define LAPACK_sbbcsd LAPACK_GLOBAL(sbbcsd,SBBCSD) +#define LAPACK_sorbdb LAPACK_GLOBAL(sorbdb,SORBDB) +#define LAPACK_sorcsd LAPACK_GLOBAL(sorcsd,SORCSD) +#define LAPACK_ssyconv LAPACK_GLOBAL(ssyconv,SSYCONV) +#define LAPACK_ssyswapr LAPACK_GLOBAL(ssyswapr,SSYSWAPR) +#define LAPACK_ssytri2 LAPACK_GLOBAL(ssytri2,SSYTRI2) +#define LAPACK_ssytri2x LAPACK_GLOBAL(ssytri2x,SSYTRI2X) +#define LAPACK_ssytrs2 LAPACK_GLOBAL(ssytrs2,SSYTRS2) +#define LAPACK_zbbcsd LAPACK_GLOBAL(zbbcsd,ZBBCSD) +#define LAPACK_zheswapr LAPACK_GLOBAL(zheswapr,ZHESWAPR) +#define LAPACK_zhetri2 LAPACK_GLOBAL(zhetri2,ZHETRI2) +#define LAPACK_zhetri2x LAPACK_GLOBAL(zhetri2x,ZHETRI2X) +#define LAPACK_zhetrs2 LAPACK_GLOBAL(zhetrs2,ZHETRS2) +#define LAPACK_zsyconv LAPACK_GLOBAL(zsyconv,ZSYCONV) +#define LAPACK_zsyswapr LAPACK_GLOBAL(zsyswapr,ZSYSWAPR) +#define LAPACK_zsytri2 LAPACK_GLOBAL(zsytri2,ZSYTRI2) +#define LAPACK_zsytri2x LAPACK_GLOBAL(zsytri2x,ZSYTRI2X) +#define LAPACK_zsytrs2 LAPACK_GLOBAL(zsytrs2,ZSYTRS2) +#define LAPACK_zunbdb LAPACK_GLOBAL(zunbdb,ZUNBDB) +#define LAPACK_zuncsd LAPACK_GLOBAL(zuncsd,ZUNCSD) +// LAPACK 3.4.0 +#define LAPACK_sgemqrt LAPACK_GLOBAL(sgemqrt,SGEMQRT) +#define LAPACK_dgemqrt LAPACK_GLOBAL(dgemqrt,DGEMQRT) +#define LAPACK_cgemqrt LAPACK_GLOBAL(cgemqrt,CGEMQRT) +#define LAPACK_zgemqrt LAPACK_GLOBAL(zgemqrt,ZGEMQRT) +#define LAPACK_sgeqrt LAPACK_GLOBAL(sgeqrt,SGEQRT) +#define LAPACK_dgeqrt LAPACK_GLOBAL(dgeqrt,DGEQRT) +#define LAPACK_cgeqrt LAPACK_GLOBAL(cgeqrt,CGEQRT) +#define LAPACK_zgeqrt LAPACK_GLOBAL(zgeqrt,ZGEQRT) +#define LAPACK_sgeqrt2 LAPACK_GLOBAL(sgeqrt2,SGEQRT2) +#define LAPACK_dgeqrt2 LAPACK_GLOBAL(dgeqrt2,DGEQRT2) +#define LAPACK_cgeqrt2 LAPACK_GLOBAL(cgeqrt2,CGEQRT2) +#define LAPACK_zgeqrt2 LAPACK_GLOBAL(zgeqrt2,ZGEQRT2) +#define LAPACK_sgeqrt3 LAPACK_GLOBAL(sgeqrt3,SGEQRT3) +#define LAPACK_dgeqrt3 LAPACK_GLOBAL(dgeqrt3,DGEQRT3) +#define LAPACK_cgeqrt3 LAPACK_GLOBAL(cgeqrt3,CGEQRT3) +#define LAPACK_zgeqrt3 LAPACK_GLOBAL(zgeqrt3,ZGEQRT3) +#define LAPACK_stpmqrt LAPACK_GLOBAL(stpmqrt,STPMQRT) +#define LAPACK_dtpmqrt LAPACK_GLOBAL(dtpmqrt,DTPMQRT) +#define LAPACK_ctpmqrt LAPACK_GLOBAL(ctpmqrt,CTPMQRT) +#define LAPACK_ztpmqrt LAPACK_GLOBAL(ztpmqrt,ZTPMQRT) +#define LAPACK_dtpqrt LAPACK_GLOBAL(dtpqrt,DTPQRT) +#define LAPACK_ctpqrt LAPACK_GLOBAL(ctpqrt,CTPQRT) +#define LAPACK_ztpqrt LAPACK_GLOBAL(ztpqrt,ZTPQRT) +#define LAPACK_stpqrt2 LAPACK_GLOBAL(stpqrt2,STPQRT2) +#define LAPACK_dtpqrt2 LAPACK_GLOBAL(dtpqrt2,DTPQRT2) +#define LAPACK_ctpqrt2 LAPACK_GLOBAL(ctpqrt2,CTPQRT2) +#define LAPACK_ztpqrt2 LAPACK_GLOBAL(ztpqrt2,ZTPQRT2) +#define LAPACK_stprfb LAPACK_GLOBAL(stprfb,STPRFB) +#define LAPACK_dtprfb LAPACK_GLOBAL(dtprfb,DTPRFB) +#define LAPACK_ctprfb LAPACK_GLOBAL(ctprfb,CTPRFB) +#define LAPACK_ztprfb LAPACK_GLOBAL(ztprfb,ZTPRFB) +// LAPACK 3.X.X +#define LAPACK_csyr LAPACK_GLOBAL(csyr,CSYR) +#define LAPACK_zsyr LAPACK_GLOBAL(zsyr,ZSYR) + + +void LAPACK_sgetrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_dgetrf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_cgetrf( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int* ipiv, lapack_int *info ); +void LAPACK_zgetrf( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int* ipiv, lapack_int *info ); +void LAPACK_sgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, float* ab, lapack_int* ldab, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_dgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, double* ab, lapack_int* ldab, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_cgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_complex_float* ab, lapack_int* ldab, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_zgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_complex_double* ab, lapack_int* ldab, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_sgttrf( lapack_int* n, float* dl, float* d, float* du, float* du2, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_dgttrf( lapack_int* n, double* dl, double* d, double* du, + double* du2, lapack_int* ipiv, lapack_int *info ); +void LAPACK_cgttrf( lapack_int* n, lapack_complex_float* dl, + lapack_complex_float* d, lapack_complex_float* du, + lapack_complex_float* du2, lapack_int* ipiv, + lapack_int *info ); +void LAPACK_zgttrf( lapack_int* n, lapack_complex_double* dl, + lapack_complex_double* d, lapack_complex_double* du, + lapack_complex_double* du2, lapack_int* ipiv, + lapack_int *info ); +void LAPACK_spotrf( char* uplo, lapack_int* n, float* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_dpotrf( char* uplo, lapack_int* n, double* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_cpotrf( char* uplo, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_zpotrf( char* uplo, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_dpstrf( char* uplo, lapack_int* n, double* a, lapack_int* lda, + lapack_int* piv, lapack_int* rank, double* tol, + double* work, lapack_int *info ); +void LAPACK_spstrf( char* uplo, lapack_int* n, float* a, lapack_int* lda, + lapack_int* piv, lapack_int* rank, float* tol, float* work, + lapack_int *info ); +void LAPACK_zpstrf( char* uplo, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int* piv, lapack_int* rank, + double* tol, double* work, lapack_int *info ); +void LAPACK_cpstrf( char* uplo, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int* piv, lapack_int* rank, + float* tol, float* work, lapack_int *info ); +void LAPACK_dpftrf( char* transr, char* uplo, lapack_int* n, double* a, + lapack_int *info ); +void LAPACK_spftrf( char* transr, char* uplo, lapack_int* n, float* a, + lapack_int *info ); +void LAPACK_zpftrf( char* transr, char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int *info ); +void LAPACK_cpftrf( char* transr, char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int *info ); +void LAPACK_spptrf( char* uplo, lapack_int* n, float* ap, lapack_int *info ); +void LAPACK_dpptrf( char* uplo, lapack_int* n, double* ap, lapack_int *info ); +void LAPACK_cpptrf( char* uplo, lapack_int* n, lapack_complex_float* ap, + lapack_int *info ); +void LAPACK_zpptrf( char* uplo, lapack_int* n, lapack_complex_double* ap, + lapack_int *info ); +void LAPACK_spbtrf( char* uplo, lapack_int* n, lapack_int* kd, float* ab, + lapack_int* ldab, lapack_int *info ); +void LAPACK_dpbtrf( char* uplo, lapack_int* n, lapack_int* kd, double* ab, + lapack_int* ldab, lapack_int *info ); +void LAPACK_cpbtrf( char* uplo, lapack_int* n, lapack_int* kd, + lapack_complex_float* ab, lapack_int* ldab, + lapack_int *info ); +void LAPACK_zpbtrf( char* uplo, lapack_int* n, lapack_int* kd, + lapack_complex_double* ab, lapack_int* ldab, + lapack_int *info ); +void LAPACK_spttrf( lapack_int* n, float* d, float* e, lapack_int *info ); +void LAPACK_dpttrf( lapack_int* n, double* d, double* e, lapack_int *info ); +void LAPACK_cpttrf( lapack_int* n, float* d, lapack_complex_float* e, + lapack_int *info ); +void LAPACK_zpttrf( lapack_int* n, double* d, lapack_complex_double* e, + lapack_int *info ); +void LAPACK_ssytrf( char* uplo, lapack_int* n, float* a, lapack_int* lda, + lapack_int* ipiv, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dsytrf( char* uplo, lapack_int* n, double* a, lapack_int* lda, + lapack_int* ipiv, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_csytrf( char* uplo, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int* ipiv, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zsytrf( char* uplo, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int* ipiv, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_chetrf( char* uplo, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int* ipiv, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zhetrf( char* uplo, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int* ipiv, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_ssptrf( char* uplo, lapack_int* n, float* ap, lapack_int* ipiv, + lapack_int *info ); +void LAPACK_dsptrf( char* uplo, lapack_int* n, double* ap, lapack_int* ipiv, + lapack_int *info ); +void LAPACK_csptrf( char* uplo, lapack_int* n, lapack_complex_float* ap, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_zsptrf( char* uplo, lapack_int* n, lapack_complex_double* ap, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_chptrf( char* uplo, lapack_int* n, lapack_complex_float* ap, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_zhptrf( char* uplo, lapack_int* n, lapack_complex_double* ap, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_sgetrs( char* trans, lapack_int* n, lapack_int* nrhs, + const float* a, lapack_int* lda, const lapack_int* ipiv, + float* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_dgetrs( char* trans, lapack_int* n, lapack_int* nrhs, + const double* a, lapack_int* lda, const lapack_int* ipiv, + double* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_cgetrs( char* trans, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_zgetrs( char* trans, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_int* ipiv, lapack_complex_double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_sgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, const float* ab, lapack_int* ldab, + const lapack_int* ipiv, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, const double* ab, lapack_int* ldab, + const lapack_int* ipiv, double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_cgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, const lapack_complex_float* ab, + lapack_int* ldab, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, const lapack_complex_double* ab, + lapack_int* ldab, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_sgttrs( char* trans, lapack_int* n, lapack_int* nrhs, + const float* dl, const float* d, const float* du, + const float* du2, const lapack_int* ipiv, float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_dgttrs( char* trans, lapack_int* n, lapack_int* nrhs, + const double* dl, const double* d, const double* du, + const double* du2, const lapack_int* ipiv, double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_cgttrs( char* trans, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + const lapack_complex_float* du2, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zgttrs( char* trans, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + const lapack_complex_double* du2, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_spotrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, + lapack_int* lda, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dpotrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const double* a, lapack_int* lda, double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_cpotrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zpotrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs, + const double* a, double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_spftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs, + const float* a, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_complex_double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_cpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_complex_float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_spptrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const float* ap, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dpptrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const double* ap, double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_cpptrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* ap, lapack_complex_float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_zpptrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* ap, lapack_complex_double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_spbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + const float* ab, lapack_int* ldab, float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_dpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + const double* ab, lapack_int* ldab, double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_cpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + const lapack_complex_float* ab, lapack_int* ldab, + lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + const lapack_complex_double* ab, lapack_int* ldab, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_spttrs( lapack_int* n, lapack_int* nrhs, const float* d, + const float* e, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dpttrs( lapack_int* n, lapack_int* nrhs, const double* d, + const double* e, double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_cpttrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* d, + const lapack_complex_float* e, lapack_complex_float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_zpttrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const double* d, const lapack_complex_double* e, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_ssytrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, + lapack_int* lda, const lapack_int* ipiv, float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_dsytrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const double* a, lapack_int* lda, const lapack_int* ipiv, + double* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_csytrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_zsytrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_int* ipiv, lapack_complex_double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_chetrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_int* ipiv, lapack_complex_float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_zhetrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_int* ipiv, lapack_complex_double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_ssptrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const float* ap, const lapack_int* ipiv, float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_dsptrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const double* ap, const lapack_int* ipiv, double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_csptrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* ap, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zsptrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* ap, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_chptrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* ap, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zhptrs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* ap, const lapack_int* ipiv, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_strtrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const float* a, lapack_int* lda, float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_dtrtrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const double* a, lapack_int* lda, + double* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_ctrtrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_ztrtrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_stptrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const float* ap, float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_dtptrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const double* ap, double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_ctptrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const lapack_complex_float* ap, + lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_ztptrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const lapack_complex_double* ap, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_stbtrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* kd, lapack_int* nrhs, const float* ab, + lapack_int* ldab, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dtbtrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* kd, lapack_int* nrhs, const double* ab, + lapack_int* ldab, double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_ctbtrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* kd, lapack_int* nrhs, + const lapack_complex_float* ab, lapack_int* ldab, + lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_ztbtrs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* kd, lapack_int* nrhs, + const lapack_complex_double* ab, lapack_int* ldab, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_sgecon( char* norm, lapack_int* n, const float* a, lapack_int* lda, + float* anorm, float* rcond, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dgecon( char* norm, lapack_int* n, const double* a, lapack_int* lda, + double* anorm, double* rcond, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_cgecon( char* norm, lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, float* anorm, float* rcond, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zgecon( char* norm, lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, double* anorm, double* rcond, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_sgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku, + const float* ab, lapack_int* ldab, const lapack_int* ipiv, + float* anorm, float* rcond, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku, + const double* ab, lapack_int* ldab, const lapack_int* ipiv, + double* anorm, double* rcond, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_cgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku, + const lapack_complex_float* ab, lapack_int* ldab, + const lapack_int* ipiv, float* anorm, float* rcond, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku, + const lapack_complex_double* ab, lapack_int* ldab, + const lapack_int* ipiv, double* anorm, double* rcond, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_sgtcon( char* norm, lapack_int* n, const float* dl, const float* d, + const float* du, const float* du2, const lapack_int* ipiv, + float* anorm, float* rcond, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dgtcon( char* norm, lapack_int* n, const double* dl, + const double* d, const double* du, const double* du2, + const lapack_int* ipiv, double* anorm, double* rcond, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_cgtcon( char* norm, lapack_int* n, const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + const lapack_complex_float* du2, const lapack_int* ipiv, + float* anorm, float* rcond, lapack_complex_float* work, + lapack_int *info ); +void LAPACK_zgtcon( char* norm, lapack_int* n, const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + const lapack_complex_double* du2, const lapack_int* ipiv, + double* anorm, double* rcond, lapack_complex_double* work, + lapack_int *info ); +void LAPACK_spocon( char* uplo, lapack_int* n, const float* a, lapack_int* lda, + float* anorm, float* rcond, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dpocon( char* uplo, lapack_int* n, const double* a, lapack_int* lda, + double* anorm, double* rcond, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_cpocon( char* uplo, lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, float* anorm, float* rcond, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zpocon( char* uplo, lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, double* anorm, double* rcond, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_sppcon( char* uplo, lapack_int* n, const float* ap, float* anorm, + float* rcond, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dppcon( char* uplo, lapack_int* n, const double* ap, double* anorm, + double* rcond, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_cppcon( char* uplo, lapack_int* n, const lapack_complex_float* ap, + float* anorm, float* rcond, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zppcon( char* uplo, lapack_int* n, const lapack_complex_double* ap, + double* anorm, double* rcond, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_spbcon( char* uplo, lapack_int* n, lapack_int* kd, const float* ab, + lapack_int* ldab, float* anorm, float* rcond, float* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_dpbcon( char* uplo, lapack_int* n, lapack_int* kd, const double* ab, + lapack_int* ldab, double* anorm, double* rcond, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_cpbcon( char* uplo, lapack_int* n, lapack_int* kd, + const lapack_complex_float* ab, lapack_int* ldab, + float* anorm, float* rcond, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zpbcon( char* uplo, lapack_int* n, lapack_int* kd, + const lapack_complex_double* ab, lapack_int* ldab, + double* anorm, double* rcond, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_sptcon( lapack_int* n, const float* d, const float* e, float* anorm, + float* rcond, float* work, lapack_int *info ); +void LAPACK_dptcon( lapack_int* n, const double* d, const double* e, + double* anorm, double* rcond, double* work, + lapack_int *info ); +void LAPACK_cptcon( lapack_int* n, const float* d, + const lapack_complex_float* e, float* anorm, float* rcond, + float* work, lapack_int *info ); +void LAPACK_zptcon( lapack_int* n, const double* d, + const lapack_complex_double* e, double* anorm, + double* rcond, double* work, lapack_int *info ); +void LAPACK_ssycon( char* uplo, lapack_int* n, const float* a, lapack_int* lda, + const lapack_int* ipiv, float* anorm, float* rcond, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_dsycon( char* uplo, lapack_int* n, const double* a, lapack_int* lda, + const lapack_int* ipiv, double* anorm, double* rcond, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_csycon( char* uplo, lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, const lapack_int* ipiv, float* anorm, + float* rcond, lapack_complex_float* work, + lapack_int *info ); +void LAPACK_zsycon( char* uplo, lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, const lapack_int* ipiv, double* anorm, + double* rcond, lapack_complex_double* work, + lapack_int *info ); +void LAPACK_checon( char* uplo, lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, const lapack_int* ipiv, float* anorm, + float* rcond, lapack_complex_float* work, + lapack_int *info ); +void LAPACK_zhecon( char* uplo, lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, const lapack_int* ipiv, double* anorm, + double* rcond, lapack_complex_double* work, + lapack_int *info ); +void LAPACK_sspcon( char* uplo, lapack_int* n, const float* ap, + const lapack_int* ipiv, float* anorm, float* rcond, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_dspcon( char* uplo, lapack_int* n, const double* ap, + const lapack_int* ipiv, double* anorm, double* rcond, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_cspcon( char* uplo, lapack_int* n, const lapack_complex_float* ap, + const lapack_int* ipiv, float* anorm, float* rcond, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zspcon( char* uplo, lapack_int* n, const lapack_complex_double* ap, + const lapack_int* ipiv, double* anorm, double* rcond, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_chpcon( char* uplo, lapack_int* n, const lapack_complex_float* ap, + const lapack_int* ipiv, float* anorm, float* rcond, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zhpcon( char* uplo, lapack_int* n, const lapack_complex_double* ap, + const lapack_int* ipiv, double* anorm, double* rcond, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_strcon( char* norm, char* uplo, char* diag, lapack_int* n, + const float* a, lapack_int* lda, float* rcond, float* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_dtrcon( char* norm, char* uplo, char* diag, lapack_int* n, + const double* a, lapack_int* lda, double* rcond, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_ctrcon( char* norm, char* uplo, char* diag, lapack_int* n, + const lapack_complex_float* a, lapack_int* lda, + float* rcond, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_ztrcon( char* norm, char* uplo, char* diag, lapack_int* n, + const lapack_complex_double* a, lapack_int* lda, + double* rcond, lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_stpcon( char* norm, char* uplo, char* diag, lapack_int* n, + const float* ap, float* rcond, float* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_dtpcon( char* norm, char* uplo, char* diag, lapack_int* n, + const double* ap, double* rcond, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_ctpcon( char* norm, char* uplo, char* diag, lapack_int* n, + const lapack_complex_float* ap, float* rcond, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_ztpcon( char* norm, char* uplo, char* diag, lapack_int* n, + const lapack_complex_double* ap, double* rcond, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_stbcon( char* norm, char* uplo, char* diag, lapack_int* n, + lapack_int* kd, const float* ab, lapack_int* ldab, + float* rcond, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dtbcon( char* norm, char* uplo, char* diag, lapack_int* n, + lapack_int* kd, const double* ab, lapack_int* ldab, + double* rcond, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_ctbcon( char* norm, char* uplo, char* diag, lapack_int* n, + lapack_int* kd, const lapack_complex_float* ab, + lapack_int* ldab, float* rcond, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_ztbcon( char* norm, char* uplo, char* diag, lapack_int* n, + lapack_int* kd, const lapack_complex_double* ab, + lapack_int* ldab, double* rcond, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_sgerfs( char* trans, lapack_int* n, lapack_int* nrhs, + const float* a, lapack_int* lda, const float* af, + lapack_int* ldaf, const lapack_int* ipiv, const float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, + float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dgerfs( char* trans, lapack_int* n, lapack_int* nrhs, + const double* a, lapack_int* lda, const double* af, + lapack_int* ldaf, const lapack_int* ipiv, const double* b, + lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, + double* berr, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_cgerfs( char* trans, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* af, lapack_int* ldaf, + const lapack_int* ipiv, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, + float* ferr, float* berr, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zgerfs( char* trans, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* af, lapack_int* ldaf, + const lapack_int* ipiv, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + double* ferr, double* berr, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_dgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs, + const double* a, lapack_int* lda, const double* af, + lapack_int* ldaf, const lapack_int* ipiv, const double* r, + const double* c, const double* b, lapack_int* ldb, + double* x, lapack_int* ldx, double* rcond, double* berr, + lapack_int* n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int* nparams, double* params, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_sgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs, + const float* a, lapack_int* lda, const float* af, + lapack_int* ldaf, const lapack_int* ipiv, const float* r, + const float* c, const float* b, lapack_int* ldb, float* x, + lapack_int* ldx, float* rcond, float* berr, + lapack_int* n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int* nparams, float* params, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_zgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* af, lapack_int* ldaf, + const lapack_int* ipiv, const double* r, const double* c, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_cgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* af, lapack_int* ldaf, + const lapack_int* ipiv, const float* r, const float* c, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int* nparams, float* params, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_sgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, const float* ab, lapack_int* ldab, + const float* afb, lapack_int* ldafb, const lapack_int* ipiv, + const float* b, lapack_int* ldb, float* x, lapack_int* ldx, + float* ferr, float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, const double* ab, lapack_int* ldab, + const double* afb, lapack_int* ldafb, + const lapack_int* ipiv, const double* b, lapack_int* ldb, + double* x, lapack_int* ldx, double* ferr, double* berr, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_cgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, const lapack_complex_float* ab, + lapack_int* ldab, const lapack_complex_float* afb, + lapack_int* ldafb, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* ferr, + float* berr, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, const lapack_complex_double* ab, + lapack_int* ldab, const lapack_complex_double* afb, + lapack_int* ldafb, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* ferr, + double* berr, lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_dgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, const double* ab, + lapack_int* ldab, const double* afb, lapack_int* ldafb, + const lapack_int* ipiv, const double* r, const double* c, + const double* b, lapack_int* ldb, double* x, + lapack_int* ldx, double* rcond, double* berr, + lapack_int* n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int* nparams, double* params, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_sgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, const float* ab, + lapack_int* ldab, const float* afb, lapack_int* ldafb, + const lapack_int* ipiv, const float* r, const float* c, + const float* b, lapack_int* ldb, float* x, lapack_int* ldx, + float* rcond, float* berr, lapack_int* n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int* nparams, float* params, float* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_zgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, + const lapack_complex_double* ab, lapack_int* ldab, + const lapack_complex_double* afb, lapack_int* ldafb, + const lapack_int* ipiv, const double* r, const double* c, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_cgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, + const lapack_complex_float* ab, lapack_int* ldab, + const lapack_complex_float* afb, lapack_int* ldafb, + const lapack_int* ipiv, const float* r, const float* c, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int* nparams, float* params, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_sgtrfs( char* trans, lapack_int* n, lapack_int* nrhs, + const float* dl, const float* d, const float* du, + const float* dlf, const float* df, const float* duf, + const float* du2, const lapack_int* ipiv, const float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, + float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dgtrfs( char* trans, lapack_int* n, lapack_int* nrhs, + const double* dl, const double* d, const double* du, + const double* dlf, const double* df, const double* duf, + const double* du2, const lapack_int* ipiv, const double* b, + lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, + double* berr, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_cgtrfs( char* trans, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, + const lapack_complex_float* dlf, + const lapack_complex_float* df, + const lapack_complex_float* duf, + const lapack_complex_float* du2, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* ferr, + float* berr, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zgtrfs( char* trans, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, + const lapack_complex_double* dlf, + const lapack_complex_double* df, + const lapack_complex_double* duf, + const lapack_complex_double* du2, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* ferr, + double* berr, lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_sporfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, + lapack_int* lda, const float* af, lapack_int* ldaf, + const float* b, lapack_int* ldb, float* x, lapack_int* ldx, + float* ferr, float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dporfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const double* a, lapack_int* lda, const double* af, + lapack_int* ldaf, const double* b, lapack_int* ldb, + double* x, lapack_int* ldx, double* ferr, double* berr, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_cporfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* af, lapack_int* ldaf, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* ferr, + float* berr, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zporfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* af, lapack_int* ldaf, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* ferr, + double* berr, lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_dporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, + const double* a, lapack_int* lda, const double* af, + lapack_int* ldaf, const double* s, const double* b, + lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, + double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_sporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, + const float* a, lapack_int* lda, const float* af, + lapack_int* ldaf, const float* s, const float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, + float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int* nparams, float* params, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_zporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* af, lapack_int* ldaf, + const double* s, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + double* rcond, double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_cporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* af, lapack_int* ldaf, + const float* s, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, + float* rcond, float* berr, lapack_int* n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int* nparams, float* params, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_spprfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const float* ap, const float* afp, const float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, + float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dpprfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const double* ap, const double* afp, const double* b, + lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, + double* berr, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_cpprfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* ap, + const lapack_complex_float* afp, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* ferr, + float* berr, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zpprfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* ap, + const lapack_complex_double* afp, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* ferr, + double* berr, lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_spbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + const float* ab, lapack_int* ldab, const float* afb, + lapack_int* ldafb, const float* b, lapack_int* ldb, + float* x, lapack_int* ldx, float* ferr, float* berr, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_dpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + const double* ab, lapack_int* ldab, const double* afb, + lapack_int* ldafb, const double* b, lapack_int* ldb, + double* x, lapack_int* ldx, double* ferr, double* berr, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_cpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + const lapack_complex_float* ab, lapack_int* ldab, + const lapack_complex_float* afb, lapack_int* ldafb, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* ferr, + float* berr, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + const lapack_complex_double* ab, lapack_int* ldab, + const lapack_complex_double* afb, lapack_int* ldafb, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* ferr, + double* berr, lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_sptrfs( lapack_int* n, lapack_int* nrhs, const float* d, + const float* e, const float* df, const float* ef, + const float* b, lapack_int* ldb, float* x, lapack_int* ldx, + float* ferr, float* berr, float* work, lapack_int *info ); +void LAPACK_dptrfs( lapack_int* n, lapack_int* nrhs, const double* d, + const double* e, const double* df, const double* ef, + const double* b, lapack_int* ldb, double* x, + lapack_int* ldx, double* ferr, double* berr, double* work, + lapack_int *info ); +void LAPACK_cptrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* d, + const lapack_complex_float* e, const float* df, + const lapack_complex_float* ef, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* ferr, + float* berr, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zptrfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const double* d, const lapack_complex_double* e, + const double* df, const lapack_complex_double* ef, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* ferr, + double* berr, lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_ssyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, + lapack_int* lda, const float* af, lapack_int* ldaf, + const lapack_int* ipiv, const float* b, lapack_int* ldb, + float* x, lapack_int* ldx, float* ferr, float* berr, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_dsyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const double* a, lapack_int* lda, const double* af, + lapack_int* ldaf, const lapack_int* ipiv, const double* b, + lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, + double* berr, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_csyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* af, lapack_int* ldaf, + const lapack_int* ipiv, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, + float* ferr, float* berr, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zsyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* af, lapack_int* ldaf, + const lapack_int* ipiv, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + double* ferr, double* berr, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_dsyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, + const double* a, lapack_int* lda, const double* af, + lapack_int* ldaf, const lapack_int* ipiv, const double* s, + const double* b, lapack_int* ldb, double* x, + lapack_int* ldx, double* rcond, double* berr, + lapack_int* n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int* nparams, double* params, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_ssyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, + const float* a, lapack_int* lda, const float* af, + lapack_int* ldaf, const lapack_int* ipiv, const float* s, + const float* b, lapack_int* ldb, float* x, lapack_int* ldx, + float* rcond, float* berr, lapack_int* n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int* nparams, float* params, float* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_zsyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* af, lapack_int* ldaf, + const lapack_int* ipiv, const double* s, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_csyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* af, lapack_int* ldaf, + const lapack_int* ipiv, const float* s, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int* nparams, float* params, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_cherfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* af, lapack_int* ldaf, + const lapack_int* ipiv, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, + float* ferr, float* berr, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zherfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* af, lapack_int* ldaf, + const lapack_int* ipiv, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + double* ferr, double* berr, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_zherfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* af, lapack_int* ldaf, + const lapack_int* ipiv, const double* s, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_cherfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* af, lapack_int* ldaf, + const lapack_int* ipiv, const float* s, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int* nparams, float* params, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_ssprfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const float* ap, const float* afp, const lapack_int* ipiv, + const float* b, lapack_int* ldb, float* x, lapack_int* ldx, + float* ferr, float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dsprfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const double* ap, const double* afp, const lapack_int* ipiv, + const double* b, lapack_int* ldb, double* x, + lapack_int* ldx, double* ferr, double* berr, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_csprfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* ap, + const lapack_complex_float* afp, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* ferr, + float* berr, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zsprfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* ap, + const lapack_complex_double* afp, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* ferr, + double* berr, lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_chprfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* ap, + const lapack_complex_float* afp, const lapack_int* ipiv, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* ferr, + float* berr, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zhprfs( char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* ap, + const lapack_complex_double* afp, const lapack_int* ipiv, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* ferr, + double* berr, lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_strrfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const float* a, lapack_int* lda, + const float* b, lapack_int* ldb, const float* x, + lapack_int* ldx, float* ferr, float* berr, float* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_dtrrfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const double* a, lapack_int* lda, + const double* b, lapack_int* ldb, const double* x, + lapack_int* ldx, double* ferr, double* berr, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_ctrrfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* b, + lapack_int* ldb, const lapack_complex_float* x, + lapack_int* ldx, float* ferr, float* berr, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_ztrrfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const lapack_complex_double* a, + lapack_int* lda, const lapack_complex_double* b, + lapack_int* ldb, const lapack_complex_double* x, + lapack_int* ldx, double* ferr, double* berr, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_stprfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const float* ap, const float* b, + lapack_int* ldb, const float* x, lapack_int* ldx, + float* ferr, float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dtprfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const double* ap, const double* b, + lapack_int* ldb, const double* x, lapack_int* ldx, + double* ferr, double* berr, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_ctprfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const lapack_complex_float* ap, + const lapack_complex_float* b, lapack_int* ldb, + const lapack_complex_float* x, lapack_int* ldx, float* ferr, + float* berr, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_ztprfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* nrhs, const lapack_complex_double* ap, + const lapack_complex_double* b, lapack_int* ldb, + const lapack_complex_double* x, lapack_int* ldx, + double* ferr, double* berr, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_stbrfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* kd, lapack_int* nrhs, const float* ab, + lapack_int* ldab, const float* b, lapack_int* ldb, + const float* x, lapack_int* ldx, float* ferr, float* berr, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_dtbrfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* kd, lapack_int* nrhs, const double* ab, + lapack_int* ldab, const double* b, lapack_int* ldb, + const double* x, lapack_int* ldx, double* ferr, + double* berr, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_ctbrfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* kd, lapack_int* nrhs, + const lapack_complex_float* ab, lapack_int* ldab, + const lapack_complex_float* b, lapack_int* ldb, + const lapack_complex_float* x, lapack_int* ldx, float* ferr, + float* berr, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_ztbrfs( char* uplo, char* trans, char* diag, lapack_int* n, + lapack_int* kd, lapack_int* nrhs, + const lapack_complex_double* ab, lapack_int* ldab, + const lapack_complex_double* b, lapack_int* ldb, + const lapack_complex_double* x, lapack_int* ldx, + double* ferr, double* berr, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_sgetri( lapack_int* n, float* a, lapack_int* lda, + const lapack_int* ipiv, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dgetri( lapack_int* n, double* a, lapack_int* lda, + const lapack_int* ipiv, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cgetri( lapack_int* n, lapack_complex_float* a, lapack_int* lda, + const lapack_int* ipiv, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zgetri( lapack_int* n, lapack_complex_double* a, lapack_int* lda, + const lapack_int* ipiv, lapack_complex_double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_spotri( char* uplo, lapack_int* n, float* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_dpotri( char* uplo, lapack_int* n, double* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_cpotri( char* uplo, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_zpotri( char* uplo, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_dpftri( char* transr, char* uplo, lapack_int* n, double* a, + lapack_int *info ); +void LAPACK_spftri( char* transr, char* uplo, lapack_int* n, float* a, + lapack_int *info ); +void LAPACK_zpftri( char* transr, char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int *info ); +void LAPACK_cpftri( char* transr, char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int *info ); +void LAPACK_spptri( char* uplo, lapack_int* n, float* ap, lapack_int *info ); +void LAPACK_dpptri( char* uplo, lapack_int* n, double* ap, lapack_int *info ); +void LAPACK_cpptri( char* uplo, lapack_int* n, lapack_complex_float* ap, + lapack_int *info ); +void LAPACK_zpptri( char* uplo, lapack_int* n, lapack_complex_double* ap, + lapack_int *info ); +void LAPACK_ssytri( char* uplo, lapack_int* n, float* a, lapack_int* lda, + const lapack_int* ipiv, float* work, lapack_int *info ); +void LAPACK_dsytri( char* uplo, lapack_int* n, double* a, lapack_int* lda, + const lapack_int* ipiv, double* work, lapack_int *info ); +void LAPACK_csytri( char* uplo, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, const lapack_int* ipiv, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zsytri( char* uplo, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, const lapack_int* ipiv, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_chetri( char* uplo, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, const lapack_int* ipiv, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zhetri( char* uplo, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, const lapack_int* ipiv, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_ssptri( char* uplo, lapack_int* n, float* ap, + const lapack_int* ipiv, float* work, lapack_int *info ); +void LAPACK_dsptri( char* uplo, lapack_int* n, double* ap, + const lapack_int* ipiv, double* work, lapack_int *info ); +void LAPACK_csptri( char* uplo, lapack_int* n, lapack_complex_float* ap, + const lapack_int* ipiv, lapack_complex_float* work, + lapack_int *info ); +void LAPACK_zsptri( char* uplo, lapack_int* n, lapack_complex_double* ap, + const lapack_int* ipiv, lapack_complex_double* work, + lapack_int *info ); +void LAPACK_chptri( char* uplo, lapack_int* n, lapack_complex_float* ap, + const lapack_int* ipiv, lapack_complex_float* work, + lapack_int *info ); +void LAPACK_zhptri( char* uplo, lapack_int* n, lapack_complex_double* ap, + const lapack_int* ipiv, lapack_complex_double* work, + lapack_int *info ); +void LAPACK_strtri( char* uplo, char* diag, lapack_int* n, float* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_dtrtri( char* uplo, char* diag, lapack_int* n, double* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_ctrtri( char* uplo, char* diag, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_ztrtri( char* uplo, char* diag, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_dtftri( char* transr, char* uplo, char* diag, lapack_int* n, + double* a, lapack_int *info ); +void LAPACK_stftri( char* transr, char* uplo, char* diag, lapack_int* n, + float* a, lapack_int *info ); +void LAPACK_ztftri( char* transr, char* uplo, char* diag, lapack_int* n, + lapack_complex_double* a, lapack_int *info ); +void LAPACK_ctftri( char* transr, char* uplo, char* diag, lapack_int* n, + lapack_complex_float* a, lapack_int *info ); +void LAPACK_stptri( char* uplo, char* diag, lapack_int* n, float* ap, + lapack_int *info ); +void LAPACK_dtptri( char* uplo, char* diag, lapack_int* n, double* ap, + lapack_int *info ); +void LAPACK_ctptri( char* uplo, char* diag, lapack_int* n, + lapack_complex_float* ap, lapack_int *info ); +void LAPACK_ztptri( char* uplo, char* diag, lapack_int* n, + lapack_complex_double* ap, lapack_int *info ); +void LAPACK_sgeequ( lapack_int* m, lapack_int* n, const float* a, + lapack_int* lda, float* r, float* c, float* rowcnd, + float* colcnd, float* amax, lapack_int *info ); +void LAPACK_dgeequ( lapack_int* m, lapack_int* n, const double* a, + lapack_int* lda, double* r, double* c, double* rowcnd, + double* colcnd, double* amax, lapack_int *info ); +void LAPACK_cgeequ( lapack_int* m, lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, float* r, float* c, float* rowcnd, + float* colcnd, float* amax, lapack_int *info ); +void LAPACK_zgeequ( lapack_int* m, lapack_int* n, + const lapack_complex_double* a, lapack_int* lda, double* r, + double* c, double* rowcnd, double* colcnd, double* amax, + lapack_int *info ); +void LAPACK_dgeequb( lapack_int* m, lapack_int* n, const double* a, + lapack_int* lda, double* r, double* c, double* rowcnd, + double* colcnd, double* amax, lapack_int *info ); +void LAPACK_sgeequb( lapack_int* m, lapack_int* n, const float* a, + lapack_int* lda, float* r, float* c, float* rowcnd, + float* colcnd, float* amax, lapack_int *info ); +void LAPACK_zgeequb( lapack_int* m, lapack_int* n, + const lapack_complex_double* a, lapack_int* lda, double* r, + double* c, double* rowcnd, double* colcnd, double* amax, + lapack_int *info ); +void LAPACK_cgeequb( lapack_int* m, lapack_int* n, + const lapack_complex_float* a, lapack_int* lda, float* r, + float* c, float* rowcnd, float* colcnd, float* amax, + lapack_int *info ); +void LAPACK_sgbequ( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const float* ab, lapack_int* ldab, float* r, + float* c, float* rowcnd, float* colcnd, float* amax, + lapack_int *info ); +void LAPACK_dgbequ( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const double* ab, lapack_int* ldab, + double* r, double* c, double* rowcnd, double* colcnd, + double* amax, lapack_int *info ); +void LAPACK_cgbequ( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const lapack_complex_float* ab, + lapack_int* ldab, float* r, float* c, float* rowcnd, + float* colcnd, float* amax, lapack_int *info ); +void LAPACK_zgbequ( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const lapack_complex_double* ab, + lapack_int* ldab, double* r, double* c, double* rowcnd, + double* colcnd, double* amax, lapack_int *info ); +void LAPACK_dgbequb( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const double* ab, lapack_int* ldab, + double* r, double* c, double* rowcnd, double* colcnd, + double* amax, lapack_int *info ); +void LAPACK_sgbequb( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const float* ab, lapack_int* ldab, + float* r, float* c, float* rowcnd, float* colcnd, + float* amax, lapack_int *info ); +void LAPACK_zgbequb( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const lapack_complex_double* ab, + lapack_int* ldab, double* r, double* c, double* rowcnd, + double* colcnd, double* amax, lapack_int *info ); +void LAPACK_cgbequb( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const lapack_complex_float* ab, + lapack_int* ldab, float* r, float* c, float* rowcnd, + float* colcnd, float* amax, lapack_int *info ); +void LAPACK_spoequ( lapack_int* n, const float* a, lapack_int* lda, float* s, + float* scond, float* amax, lapack_int *info ); +void LAPACK_dpoequ( lapack_int* n, const double* a, lapack_int* lda, double* s, + double* scond, double* amax, lapack_int *info ); +void LAPACK_cpoequ( lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, float* s, float* scond, float* amax, + lapack_int *info ); +void LAPACK_zpoequ( lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, double* s, double* scond, double* amax, + lapack_int *info ); +void LAPACK_dpoequb( lapack_int* n, const double* a, lapack_int* lda, double* s, + double* scond, double* amax, lapack_int *info ); +void LAPACK_spoequb( lapack_int* n, const float* a, lapack_int* lda, float* s, + float* scond, float* amax, lapack_int *info ); +void LAPACK_zpoequb( lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, double* s, double* scond, double* amax, + lapack_int *info ); +void LAPACK_cpoequb( lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, float* s, float* scond, float* amax, + lapack_int *info ); +void LAPACK_sppequ( char* uplo, lapack_int* n, const float* ap, float* s, + float* scond, float* amax, lapack_int *info ); +void LAPACK_dppequ( char* uplo, lapack_int* n, const double* ap, double* s, + double* scond, double* amax, lapack_int *info ); +void LAPACK_cppequ( char* uplo, lapack_int* n, const lapack_complex_float* ap, + float* s, float* scond, float* amax, lapack_int *info ); +void LAPACK_zppequ( char* uplo, lapack_int* n, const lapack_complex_double* ap, + double* s, double* scond, double* amax, lapack_int *info ); +void LAPACK_spbequ( char* uplo, lapack_int* n, lapack_int* kd, const float* ab, + lapack_int* ldab, float* s, float* scond, float* amax, + lapack_int *info ); +void LAPACK_dpbequ( char* uplo, lapack_int* n, lapack_int* kd, const double* ab, + lapack_int* ldab, double* s, double* scond, double* amax, + lapack_int *info ); +void LAPACK_cpbequ( char* uplo, lapack_int* n, lapack_int* kd, + const lapack_complex_float* ab, lapack_int* ldab, float* s, + float* scond, float* amax, lapack_int *info ); +void LAPACK_zpbequ( char* uplo, lapack_int* n, lapack_int* kd, + const lapack_complex_double* ab, lapack_int* ldab, + double* s, double* scond, double* amax, lapack_int *info ); +void LAPACK_dsyequb( char* uplo, lapack_int* n, const double* a, + lapack_int* lda, double* s, double* scond, double* amax, + double* work, lapack_int *info ); +void LAPACK_ssyequb( char* uplo, lapack_int* n, const float* a, lapack_int* lda, + float* s, float* scond, float* amax, float* work, + lapack_int *info ); +void LAPACK_zsyequb( char* uplo, lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, double* s, double* scond, double* amax, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_csyequb( char* uplo, lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, float* s, float* scond, float* amax, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zheequb( char* uplo, lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, double* s, double* scond, double* amax, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_cheequb( char* uplo, lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, float* s, float* scond, float* amax, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_sgesv( lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, + lapack_int* ipiv, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dgesv( lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, + lapack_int* ipiv, double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_cgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, + lapack_int* lda, lapack_int* ipiv, lapack_complex_float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_zgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, + lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_dsgesv( lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, + lapack_int* ipiv, double* b, lapack_int* ldb, double* x, + lapack_int* ldx, double* work, float* swork, + lapack_int* iter, lapack_int *info ); +void LAPACK_zcgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, + lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + lapack_complex_double* work, lapack_complex_float* swork, + double* rwork, lapack_int* iter, lapack_int *info ); +void LAPACK_sgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + float* a, lapack_int* lda, float* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, float* r, float* c, float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + double* a, lapack_int* lda, double* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, double* r, double* c, + double* b, lapack_int* ldb, double* x, lapack_int* ldx, + double* rcond, double* ferr, double* berr, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_cgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, float* r, float* c, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, double* r, double* c, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* ferr, double* berr, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_dgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + double* a, lapack_int* lda, double* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, double* r, double* c, + double* b, lapack_int* ldb, double* x, lapack_int* ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int* n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int* nparams, double* params, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_sgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + float* a, lapack_int* lda, float* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, float* r, float* c, + float* b, lapack_int* ldb, float* x, lapack_int* ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int* n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int* nparams, float* params, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_zgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, double* r, double* c, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* rpvgrw, double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_cgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, float* r, float* c, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* rpvgrw, float* berr, lapack_int* n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int* nparams, float* params, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_sgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, float* ab, lapack_int* ldab, + lapack_int* ipiv, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, double* ab, lapack_int* ldab, + lapack_int* ipiv, double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_cgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, lapack_complex_float* ab, lapack_int* ldab, + lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku, + lapack_int* nrhs, lapack_complex_double* ab, + lapack_int* ldab, lapack_int* ipiv, lapack_complex_double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_sgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, float* ab, + lapack_int* ldab, float* afb, lapack_int* ldafb, + lapack_int* ipiv, char* equed, float* r, float* c, float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, double* ab, + lapack_int* ldab, double* afb, lapack_int* ldafb, + lapack_int* ipiv, char* equed, double* r, double* c, + double* b, lapack_int* ldb, double* x, lapack_int* ldx, + double* rcond, double* ferr, double* berr, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_cgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab, + lapack_int* ldab, lapack_complex_float* afb, + lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r, + float* c, lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, lapack_complex_double* ab, + lapack_int* ldab, lapack_complex_double* afb, + lapack_int* ldafb, lapack_int* ipiv, char* equed, double* r, + double* c, lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* ferr, double* berr, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_dgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, double* ab, + lapack_int* ldab, double* afb, lapack_int* ldafb, + lapack_int* ipiv, char* equed, double* r, double* c, + double* b, lapack_int* ldb, double* x, lapack_int* ldx, + double* rcond, double* rpvgrw, double* berr, + lapack_int* n_err_bnds, double* err_bnds_norm, + double* err_bnds_comp, lapack_int* nparams, double* params, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_sgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, float* ab, + lapack_int* ldab, float* afb, lapack_int* ldafb, + lapack_int* ipiv, char* equed, float* r, float* c, + float* b, lapack_int* ldb, float* x, lapack_int* ldx, + float* rcond, float* rpvgrw, float* berr, + lapack_int* n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int* nparams, float* params, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_zgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, + lapack_complex_double* ab, lapack_int* ldab, + lapack_complex_double* afb, lapack_int* ldafb, + lapack_int* ipiv, char* equed, double* r, double* c, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* rpvgrw, double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_cgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl, + lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab, + lapack_int* ldab, lapack_complex_float* afb, + lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r, + float* c, lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* rpvgrw, float* berr, lapack_int* n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int* nparams, float* params, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_sgtsv( lapack_int* n, lapack_int* nrhs, float* dl, float* d, + float* du, float* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_dgtsv( lapack_int* n, lapack_int* nrhs, double* dl, double* d, + double* du, double* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_cgtsv( lapack_int* n, lapack_int* nrhs, lapack_complex_float* dl, + lapack_complex_float* d, lapack_complex_float* du, + lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_zgtsv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* dl, + lapack_complex_double* d, lapack_complex_double* du, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_sgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + const float* dl, const float* d, const float* du, + float* dlf, float* df, float* duf, float* du2, + lapack_int* ipiv, const float* b, lapack_int* ldb, float* x, + lapack_int* ldx, float* rcond, float* ferr, float* berr, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_dgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + const double* dl, const double* d, const double* du, + double* dlf, double* df, double* duf, double* du2, + lapack_int* ipiv, const double* b, lapack_int* ldb, + double* x, lapack_int* ldx, double* rcond, double* ferr, + double* berr, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_cgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* dl, + const lapack_complex_float* d, + const lapack_complex_float* du, lapack_complex_float* dlf, + lapack_complex_float* df, lapack_complex_float* duf, + lapack_complex_float* du2, lapack_int* ipiv, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* dl, + const lapack_complex_double* d, + const lapack_complex_double* du, lapack_complex_double* dlf, + lapack_complex_double* df, lapack_complex_double* duf, + lapack_complex_double* du2, lapack_int* ipiv, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* ferr, double* berr, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_sposv( char* uplo, lapack_int* n, lapack_int* nrhs, float* a, + lapack_int* lda, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dposv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a, + lapack_int* lda, double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_cposv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_zposv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dsposv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a, + lapack_int* lda, double* b, lapack_int* ldb, double* x, + lapack_int* ldx, double* work, float* swork, + lapack_int* iter, lapack_int *info ); +void LAPACK_zcposv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, + lapack_complex_double* work, lapack_complex_float* swork, + double* rwork, lapack_int* iter, lapack_int *info ); +void LAPACK_sposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + float* a, lapack_int* lda, float* af, lapack_int* ldaf, + char* equed, float* s, float* b, lapack_int* ldb, float* x, + lapack_int* ldx, float* rcond, float* ferr, float* berr, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_dposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + double* a, lapack_int* lda, double* af, lapack_int* ldaf, + char* equed, double* s, double* b, lapack_int* ldb, + double* x, lapack_int* ldx, double* rcond, double* ferr, + double* berr, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_cposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* af, lapack_int* ldaf, char* equed, + float* s, lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* af, lapack_int* ldaf, char* equed, + double* s, lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* ferr, double* berr, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_dposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + double* a, lapack_int* lda, double* af, lapack_int* ldaf, + char* equed, double* s, double* b, lapack_int* ldb, + double* x, lapack_int* ldx, double* rcond, double* rpvgrw, + double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_sposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + float* a, lapack_int* lda, float* af, lapack_int* ldaf, + char* equed, float* s, float* b, lapack_int* ldb, float* x, + lapack_int* ldx, float* rcond, float* rpvgrw, float* berr, + lapack_int* n_err_bnds, float* err_bnds_norm, + float* err_bnds_comp, lapack_int* nparams, float* params, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_zposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* af, lapack_int* ldaf, char* equed, + double* s, lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* rpvgrw, double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_cposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* af, lapack_int* ldaf, char* equed, + float* s, lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* rpvgrw, float* berr, lapack_int* n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int* nparams, float* params, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_sppsv( char* uplo, lapack_int* n, lapack_int* nrhs, float* ap, + float* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_dppsv( char* uplo, lapack_int* n, lapack_int* nrhs, double* ap, + double* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_cppsv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* ap, lapack_complex_float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_zppsv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* ap, lapack_complex_double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_sppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + float* ap, float* afp, char* equed, float* s, float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + double* ap, double* afp, char* equed, double* s, double* b, + lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, + double* ferr, double* berr, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_cppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* ap, lapack_complex_float* afp, + char* equed, float* s, lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, + float* rcond, float* ferr, float* berr, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* ap, lapack_complex_double* afp, + char* equed, double* s, lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_spbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + float* ab, lapack_int* ldab, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + double* ab, lapack_int* ldab, double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_cpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + lapack_complex_float* ab, lapack_int* ldab, + lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_zpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, + lapack_complex_double* ab, lapack_int* ldab, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_spbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd, + lapack_int* nrhs, float* ab, lapack_int* ldab, float* afb, + lapack_int* ldafb, char* equed, float* s, float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd, + lapack_int* nrhs, double* ab, lapack_int* ldab, double* afb, + lapack_int* ldafb, char* equed, double* s, double* b, + lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, + double* ferr, double* berr, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_cpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd, + lapack_int* nrhs, lapack_complex_float* ab, + lapack_int* ldab, lapack_complex_float* afb, + lapack_int* ldafb, char* equed, float* s, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd, + lapack_int* nrhs, lapack_complex_double* ab, + lapack_int* ldab, lapack_complex_double* afb, + lapack_int* ldafb, char* equed, double* s, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* ferr, double* berr, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_sptsv( lapack_int* n, lapack_int* nrhs, float* d, float* e, + float* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_dptsv( lapack_int* n, lapack_int* nrhs, double* d, double* e, + double* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_cptsv( lapack_int* n, lapack_int* nrhs, float* d, + lapack_complex_float* e, lapack_complex_float* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_zptsv( lapack_int* n, lapack_int* nrhs, double* d, + lapack_complex_double* e, lapack_complex_double* b, + lapack_int* ldb, lapack_int *info ); +void LAPACK_sptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const float* d, + const float* e, float* df, float* ef, const float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, float* work, lapack_int *info ); +void LAPACK_dptsvx( char* fact, lapack_int* n, lapack_int* nrhs, + const double* d, const double* e, double* df, double* ef, + const double* b, lapack_int* ldb, double* x, + lapack_int* ldx, double* rcond, double* ferr, double* berr, + double* work, lapack_int *info ); +void LAPACK_cptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const float* d, + const lapack_complex_float* e, float* df, + lapack_complex_float* ef, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, + float* rcond, float* ferr, float* berr, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zptsvx( char* fact, lapack_int* n, lapack_int* nrhs, + const double* d, const lapack_complex_double* e, double* df, + lapack_complex_double* ef, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_ssysv( char* uplo, lapack_int* n, lapack_int* nrhs, float* a, + lapack_int* lda, lapack_int* ipiv, float* b, lapack_int* ldb, + float* work, lapack_int* lwork, lapack_int *info ); +void LAPACK_dsysv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a, + lapack_int* lda, lapack_int* ipiv, double* b, + lapack_int* ldb, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_csysv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zsysv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_ssysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const float* a, lapack_int* lda, float* af, + lapack_int* ldaf, lapack_int* ipiv, const float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, + float* ferr, float* berr, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_dsysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const double* a, lapack_int* lda, double* af, + lapack_int* ldaf, lapack_int* ipiv, const double* b, + lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, + double* ferr, double* berr, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_csysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* af, lapack_int* ldaf, + lapack_int* ipiv, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, + float* rcond, float* ferr, float* berr, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int *info ); +void LAPACK_zsysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* af, lapack_int* ldaf, + lapack_int* ipiv, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int *info ); +void LAPACK_dsysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + double* a, lapack_int* lda, double* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, double* s, double* b, + lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, + double* rpvgrw, double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_ssysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + float* a, lapack_int* lda, float* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, float* s, float* b, + lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, + float* rpvgrw, float* berr, lapack_int* n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int* nparams, float* params, float* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_zsysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, double* s, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* rpvgrw, double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_csysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, float* s, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* rpvgrw, float* berr, lapack_int* n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int* nparams, float* params, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_chesv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zhesv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_chesvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* af, lapack_int* ldaf, + lapack_int* ipiv, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, + float* rcond, float* ferr, float* berr, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int *info ); +void LAPACK_zhesvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* af, lapack_int* ldaf, + lapack_int* ipiv, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int *info ); +void LAPACK_zhesvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, double* s, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* x, lapack_int* ldx, double* rcond, + double* rpvgrw, double* berr, lapack_int* n_err_bnds, + double* err_bnds_norm, double* err_bnds_comp, + lapack_int* nparams, double* params, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_chesvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* af, lapack_int* ldaf, + lapack_int* ipiv, char* equed, float* s, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* x, lapack_int* ldx, float* rcond, + float* rpvgrw, float* berr, lapack_int* n_err_bnds, + float* err_bnds_norm, float* err_bnds_comp, + lapack_int* nparams, float* params, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_sspsv( char* uplo, lapack_int* n, lapack_int* nrhs, float* ap, + lapack_int* ipiv, float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dspsv( char* uplo, lapack_int* n, lapack_int* nrhs, double* ap, + lapack_int* ipiv, double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_cspsv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* ap, lapack_int* ipiv, + lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_zspsv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* ap, lapack_int* ipiv, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_sspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const float* ap, float* afp, lapack_int* ipiv, + const float* b, lapack_int* ldb, float* x, lapack_int* ldx, + float* rcond, float* ferr, float* berr, float* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_dspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const double* ap, double* afp, lapack_int* ipiv, + const double* b, lapack_int* ldb, double* x, + lapack_int* ldx, double* rcond, double* ferr, double* berr, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_cspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* ap, lapack_complex_float* afp, + lapack_int* ipiv, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, + float* rcond, float* ferr, float* berr, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* ap, lapack_complex_double* afp, + lapack_int* ipiv, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_chpsv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* ap, lapack_int* ipiv, + lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); +void LAPACK_zhpsv( char* uplo, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* ap, lapack_int* ipiv, + lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_chpsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_float* ap, lapack_complex_float* afp, + lapack_int* ipiv, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, + float* rcond, float* ferr, float* berr, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zhpsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, + const lapack_complex_double* ap, lapack_complex_double* afp, + lapack_int* ipiv, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, + double* rcond, double* ferr, double* berr, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_sgeqrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* tau, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dgeqrf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* tau, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cgeqrf( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zgeqrf( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sgeqpf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + lapack_int* jpvt, float* tau, float* work, + lapack_int *info ); +void LAPACK_dgeqpf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + lapack_int* jpvt, double* tau, double* work, + lapack_int *info ); +void LAPACK_cgeqpf( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int* jpvt, + lapack_complex_float* tau, lapack_complex_float* work, + float* rwork, lapack_int *info ); +void LAPACK_zgeqpf( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int* jpvt, + lapack_complex_double* tau, lapack_complex_double* work, + double* rwork, lapack_int *info ); +void LAPACK_sgeqp3( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + lapack_int* jpvt, float* tau, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dgeqp3( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + lapack_int* jpvt, double* tau, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cgeqp3( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int* jpvt, + lapack_complex_float* tau, lapack_complex_float* work, + lapack_int* lwork, float* rwork, lapack_int *info ); +void LAPACK_zgeqp3( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int* jpvt, + lapack_complex_double* tau, lapack_complex_double* work, + lapack_int* lwork, double* rwork, lapack_int *info ); +void LAPACK_sorgqr( lapack_int* m, lapack_int* n, lapack_int* k, float* a, + lapack_int* lda, const float* tau, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dorgqr( lapack_int* m, lapack_int* n, lapack_int* k, double* a, + lapack_int* lda, const double* tau, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sormqr( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const float* a, lapack_int* lda, + const float* tau, float* c, lapack_int* ldc, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dormqr( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const double* a, lapack_int* lda, + const double* tau, double* c, lapack_int* ldc, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cungqr( lapack_int* m, lapack_int* n, lapack_int* k, + lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* tau, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zungqr( lapack_int* m, lapack_int* n, lapack_int* k, + lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cunmqr( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zunmqr( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const lapack_complex_double* a, + lapack_int* lda, const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int* ldc, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sgelqf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* tau, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dgelqf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* tau, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cgelqf( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zgelqf( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sorglq( lapack_int* m, lapack_int* n, lapack_int* k, float* a, + lapack_int* lda, const float* tau, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dorglq( lapack_int* m, lapack_int* n, lapack_int* k, double* a, + lapack_int* lda, const double* tau, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sormlq( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const float* a, lapack_int* lda, + const float* tau, float* c, lapack_int* ldc, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dormlq( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const double* a, lapack_int* lda, + const double* tau, double* c, lapack_int* ldc, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cunglq( lapack_int* m, lapack_int* n, lapack_int* k, + lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* tau, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zunglq( lapack_int* m, lapack_int* n, lapack_int* k, + lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cunmlq( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zunmlq( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const lapack_complex_double* a, + lapack_int* lda, const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int* ldc, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sgeqlf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* tau, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dgeqlf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* tau, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cgeqlf( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zgeqlf( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sorgql( lapack_int* m, lapack_int* n, lapack_int* k, float* a, + lapack_int* lda, const float* tau, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dorgql( lapack_int* m, lapack_int* n, lapack_int* k, double* a, + lapack_int* lda, const double* tau, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cungql( lapack_int* m, lapack_int* n, lapack_int* k, + lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* tau, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zungql( lapack_int* m, lapack_int* n, lapack_int* k, + lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sormql( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const float* a, lapack_int* lda, + const float* tau, float* c, lapack_int* ldc, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dormql( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const double* a, lapack_int* lda, + const double* tau, double* c, lapack_int* ldc, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cunmql( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zunmql( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const lapack_complex_double* a, + lapack_int* lda, const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int* ldc, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sgerqf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* tau, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dgerqf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* tau, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cgerqf( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zgerqf( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sorgrq( lapack_int* m, lapack_int* n, lapack_int* k, float* a, + lapack_int* lda, const float* tau, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dorgrq( lapack_int* m, lapack_int* n, lapack_int* k, double* a, + lapack_int* lda, const double* tau, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cungrq( lapack_int* m, lapack_int* n, lapack_int* k, + lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* tau, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zungrq( lapack_int* m, lapack_int* n, lapack_int* k, + lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sormrq( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const float* a, lapack_int* lda, + const float* tau, float* c, lapack_int* ldc, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dormrq( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const double* a, lapack_int* lda, + const double* tau, double* c, lapack_int* ldc, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cunmrq( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zunmrq( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, const lapack_complex_double* a, + lapack_int* lda, const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int* ldc, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_stzrzf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* tau, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dtzrzf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* tau, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_ctzrzf( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_ztzrzf( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sormrz( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* l, const float* a, + lapack_int* lda, const float* tau, float* c, + lapack_int* ldc, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dormrz( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* l, const double* a, + lapack_int* lda, const double* tau, double* c, + lapack_int* ldc, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cunmrz( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* l, const lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zunmrz( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* l, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* tau, lapack_complex_double* c, + lapack_int* ldc, lapack_complex_double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sggqrf( lapack_int* n, lapack_int* m, lapack_int* p, float* a, + lapack_int* lda, float* taua, float* b, lapack_int* ldb, + float* taub, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dggqrf( lapack_int* n, lapack_int* m, lapack_int* p, double* a, + lapack_int* lda, double* taua, double* b, lapack_int* ldb, + double* taub, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cggqrf( lapack_int* n, lapack_int* m, lapack_int* p, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* taua, lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* taub, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zggqrf( lapack_int* n, lapack_int* m, lapack_int* p, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* taua, lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* taub, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sggrqf( lapack_int* m, lapack_int* p, lapack_int* n, float* a, + lapack_int* lda, float* taua, float* b, lapack_int* ldb, + float* taub, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dggrqf( lapack_int* m, lapack_int* p, lapack_int* n, double* a, + lapack_int* lda, double* taua, double* b, lapack_int* ldb, + double* taub, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cggrqf( lapack_int* m, lapack_int* p, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* taua, lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* taub, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zggrqf( lapack_int* m, lapack_int* p, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* taua, lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* taub, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sgebrd( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* d, float* e, float* tauq, float* taup, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dgebrd( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* d, double* e, double* tauq, double* taup, + double* work, lapack_int* lwork, lapack_int *info ); +void LAPACK_cgebrd( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, float* d, float* e, + lapack_complex_float* tauq, lapack_complex_float* taup, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zgebrd( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, double* d, double* e, + lapack_complex_double* tauq, lapack_complex_double* taup, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc, + lapack_int* kl, lapack_int* ku, float* ab, lapack_int* ldab, + float* d, float* e, float* q, lapack_int* ldq, float* pt, + lapack_int* ldpt, float* c, lapack_int* ldc, float* work, + lapack_int *info ); +void LAPACK_dgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc, + lapack_int* kl, lapack_int* ku, double* ab, + lapack_int* ldab, double* d, double* e, double* q, + lapack_int* ldq, double* pt, lapack_int* ldpt, double* c, + lapack_int* ldc, double* work, lapack_int *info ); +void LAPACK_cgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc, + lapack_int* kl, lapack_int* ku, lapack_complex_float* ab, + lapack_int* ldab, float* d, float* e, + lapack_complex_float* q, lapack_int* ldq, + lapack_complex_float* pt, lapack_int* ldpt, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc, + lapack_int* kl, lapack_int* ku, lapack_complex_double* ab, + lapack_int* ldab, double* d, double* e, + lapack_complex_double* q, lapack_int* ldq, + lapack_complex_double* pt, lapack_int* ldpt, + lapack_complex_double* c, lapack_int* ldc, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_sorgbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k, + float* a, lapack_int* lda, const float* tau, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dorgbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k, + double* a, lapack_int* lda, const double* tau, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sormbr( char* vect, char* side, char* trans, lapack_int* m, + lapack_int* n, lapack_int* k, const float* a, + lapack_int* lda, const float* tau, float* c, + lapack_int* ldc, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dormbr( char* vect, char* side, char* trans, lapack_int* m, + lapack_int* n, lapack_int* k, const double* a, + lapack_int* lda, const double* tau, double* c, + lapack_int* ldc, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cungbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k, + lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* tau, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zungbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k, + lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cunmbr( char* vect, char* side, char* trans, lapack_int* m, + lapack_int* n, lapack_int* k, const lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zunmbr( char* vect, char* side, char* trans, lapack_int* m, + lapack_int* n, lapack_int* k, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* tau, lapack_complex_double* c, + lapack_int* ldc, lapack_complex_double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt, + lapack_int* nru, lapack_int* ncc, float* d, float* e, + float* vt, lapack_int* ldvt, float* u, lapack_int* ldu, + float* c, lapack_int* ldc, float* work, lapack_int *info ); +void LAPACK_dbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt, + lapack_int* nru, lapack_int* ncc, double* d, double* e, + double* vt, lapack_int* ldvt, double* u, lapack_int* ldu, + double* c, lapack_int* ldc, double* work, + lapack_int *info ); +void LAPACK_cbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt, + lapack_int* nru, lapack_int* ncc, float* d, float* e, + lapack_complex_float* vt, lapack_int* ldvt, + lapack_complex_float* u, lapack_int* ldu, + lapack_complex_float* c, lapack_int* ldc, float* work, + lapack_int *info ); +void LAPACK_zbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt, + lapack_int* nru, lapack_int* ncc, double* d, double* e, + lapack_complex_double* vt, lapack_int* ldvt, + lapack_complex_double* u, lapack_int* ldu, + lapack_complex_double* c, lapack_int* ldc, double* work, + lapack_int *info ); +void LAPACK_sbdsdc( char* uplo, char* compq, lapack_int* n, float* d, float* e, + float* u, lapack_int* ldu, float* vt, lapack_int* ldvt, + float* q, lapack_int* iq, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dbdsdc( char* uplo, char* compq, lapack_int* n, double* d, + double* e, double* u, lapack_int* ldu, double* vt, + lapack_int* ldvt, double* q, lapack_int* iq, double* work, + lapack_int* iwork, lapack_int *info ); +void LAPACK_ssytrd( char* uplo, lapack_int* n, float* a, lapack_int* lda, + float* d, float* e, float* tau, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dsytrd( char* uplo, lapack_int* n, double* a, lapack_int* lda, + double* d, double* e, double* tau, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sorgtr( char* uplo, lapack_int* n, float* a, lapack_int* lda, + const float* tau, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dorgtr( char* uplo, lapack_int* n, double* a, lapack_int* lda, + const double* tau, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sormtr( char* side, char* uplo, char* trans, lapack_int* m, + lapack_int* n, const float* a, lapack_int* lda, + const float* tau, float* c, lapack_int* ldc, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dormtr( char* side, char* uplo, char* trans, lapack_int* m, + lapack_int* n, const double* a, lapack_int* lda, + const double* tau, double* c, lapack_int* ldc, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_chetrd( char* uplo, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, float* d, float* e, + lapack_complex_float* tau, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zhetrd( char* uplo, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, double* d, double* e, + lapack_complex_double* tau, lapack_complex_double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cungtr( char* uplo, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* tau, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zungtr( char* uplo, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cunmtr( char* side, char* uplo, char* trans, lapack_int* m, + lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* tau, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zunmtr( char* side, char* uplo, char* trans, lapack_int* m, + lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, const lapack_complex_double* tau, + lapack_complex_double* c, lapack_int* ldc, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_ssptrd( char* uplo, lapack_int* n, float* ap, float* d, float* e, + float* tau, lapack_int *info ); +void LAPACK_dsptrd( char* uplo, lapack_int* n, double* ap, double* d, double* e, + double* tau, lapack_int *info ); +void LAPACK_sopgtr( char* uplo, lapack_int* n, const float* ap, + const float* tau, float* q, lapack_int* ldq, float* work, + lapack_int *info ); +void LAPACK_dopgtr( char* uplo, lapack_int* n, const double* ap, + const double* tau, double* q, lapack_int* ldq, double* work, + lapack_int *info ); +void LAPACK_sopmtr( char* side, char* uplo, char* trans, lapack_int* m, + lapack_int* n, const float* ap, const float* tau, float* c, + lapack_int* ldc, float* work, lapack_int *info ); +void LAPACK_dopmtr( char* side, char* uplo, char* trans, lapack_int* m, + lapack_int* n, const double* ap, const double* tau, + double* c, lapack_int* ldc, double* work, + lapack_int *info ); +void LAPACK_chptrd( char* uplo, lapack_int* n, lapack_complex_float* ap, + float* d, float* e, lapack_complex_float* tau, + lapack_int *info ); +void LAPACK_zhptrd( char* uplo, lapack_int* n, lapack_complex_double* ap, + double* d, double* e, lapack_complex_double* tau, + lapack_int *info ); +void LAPACK_cupgtr( char* uplo, lapack_int* n, const lapack_complex_float* ap, + const lapack_complex_float* tau, lapack_complex_float* q, + lapack_int* ldq, lapack_complex_float* work, + lapack_int *info ); +void LAPACK_zupgtr( char* uplo, lapack_int* n, const lapack_complex_double* ap, + const lapack_complex_double* tau, lapack_complex_double* q, + lapack_int* ldq, lapack_complex_double* work, + lapack_int *info ); +void LAPACK_cupmtr( char* side, char* uplo, char* trans, lapack_int* m, + lapack_int* n, const lapack_complex_float* ap, + const lapack_complex_float* tau, lapack_complex_float* c, + lapack_int* ldc, lapack_complex_float* work, + lapack_int *info ); +void LAPACK_zupmtr( char* side, char* uplo, char* trans, lapack_int* m, + lapack_int* n, const lapack_complex_double* ap, + const lapack_complex_double* tau, lapack_complex_double* c, + lapack_int* ldc, lapack_complex_double* work, + lapack_int *info ); +void LAPACK_ssbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd, + float* ab, lapack_int* ldab, float* d, float* e, float* q, + lapack_int* ldq, float* work, lapack_int *info ); +void LAPACK_dsbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd, + double* ab, lapack_int* ldab, double* d, double* e, + double* q, lapack_int* ldq, double* work, + lapack_int *info ); +void LAPACK_chbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd, + lapack_complex_float* ab, lapack_int* ldab, float* d, + float* e, lapack_complex_float* q, lapack_int* ldq, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zhbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd, + lapack_complex_double* ab, lapack_int* ldab, double* d, + double* e, lapack_complex_double* q, lapack_int* ldq, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_ssterf( lapack_int* n, float* d, float* e, lapack_int *info ); +void LAPACK_dsterf( lapack_int* n, double* d, double* e, lapack_int *info ); +void LAPACK_ssteqr( char* compz, lapack_int* n, float* d, float* e, float* z, + lapack_int* ldz, float* work, lapack_int *info ); +void LAPACK_dsteqr( char* compz, lapack_int* n, double* d, double* e, double* z, + lapack_int* ldz, double* work, lapack_int *info ); +void LAPACK_csteqr( char* compz, lapack_int* n, float* d, float* e, + lapack_complex_float* z, lapack_int* ldz, float* work, + lapack_int *info ); +void LAPACK_zsteqr( char* compz, lapack_int* n, double* d, double* e, + lapack_complex_double* z, lapack_int* ldz, double* work, + lapack_int *info ); +void LAPACK_sstemr( char* jobz, char* range, lapack_int* n, float* d, float* e, + float* vl, float* vu, lapack_int* il, lapack_int* iu, + lapack_int* m, float* w, float* z, lapack_int* ldz, + lapack_int* nzc, lapack_int* isuppz, lapack_logical* tryrac, + float* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_dstemr( char* jobz, char* range, lapack_int* n, double* d, + double* e, double* vl, double* vu, lapack_int* il, + lapack_int* iu, lapack_int* m, double* w, double* z, + lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz, + lapack_logical* tryrac, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_cstemr( char* jobz, char* range, lapack_int* n, float* d, float* e, + float* vl, float* vu, lapack_int* il, lapack_int* iu, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz, + lapack_logical* tryrac, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_zstemr( char* jobz, char* range, lapack_int* n, double* d, + double* e, double* vl, double* vu, lapack_int* il, + lapack_int* iu, lapack_int* m, double* w, + lapack_complex_double* z, lapack_int* ldz, lapack_int* nzc, + lapack_int* isuppz, lapack_logical* tryrac, double* work, + lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_sstedc( char* compz, lapack_int* n, float* d, float* e, float* z, + lapack_int* ldz, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_dstedc( char* compz, lapack_int* n, double* d, double* e, double* z, + lapack_int* ldz, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_cstedc( char* compz, lapack_int* n, float* d, float* e, + lapack_complex_float* z, lapack_int* ldz, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_zstedc( char* compz, lapack_int* n, double* d, double* e, + lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* lrwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_sstegr( char* jobz, char* range, lapack_int* n, float* d, float* e, + float* vl, float* vu, lapack_int* il, lapack_int* iu, + float* abstol, lapack_int* m, float* w, float* z, + lapack_int* ldz, lapack_int* isuppz, float* work, + lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_dstegr( char* jobz, char* range, lapack_int* n, double* d, + double* e, double* vl, double* vu, lapack_int* il, + lapack_int* iu, double* abstol, lapack_int* m, double* w, + double* z, lapack_int* ldz, lapack_int* isuppz, + double* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_cstegr( char* jobz, char* range, lapack_int* n, float* d, float* e, + float* vl, float* vu, lapack_int* il, lapack_int* iu, + float* abstol, lapack_int* m, float* w, + lapack_complex_float* z, lapack_int* ldz, + lapack_int* isuppz, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_zstegr( char* jobz, char* range, lapack_int* n, double* d, + double* e, double* vl, double* vu, lapack_int* il, + lapack_int* iu, double* abstol, lapack_int* m, double* w, + lapack_complex_double* z, lapack_int* ldz, + lapack_int* isuppz, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_spteqr( char* compz, lapack_int* n, float* d, float* e, float* z, + lapack_int* ldz, float* work, lapack_int *info ); +void LAPACK_dpteqr( char* compz, lapack_int* n, double* d, double* e, double* z, + lapack_int* ldz, double* work, lapack_int *info ); +void LAPACK_cpteqr( char* compz, lapack_int* n, float* d, float* e, + lapack_complex_float* z, lapack_int* ldz, float* work, + lapack_int *info ); +void LAPACK_zpteqr( char* compz, lapack_int* n, double* d, double* e, + lapack_complex_double* z, lapack_int* ldz, double* work, + lapack_int *info ); +void LAPACK_sstebz( char* range, char* order, lapack_int* n, float* vl, + float* vu, lapack_int* il, lapack_int* iu, float* abstol, + const float* d, const float* e, lapack_int* m, + lapack_int* nsplit, float* w, lapack_int* iblock, + lapack_int* isplit, float* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dstebz( char* range, char* order, lapack_int* n, double* vl, + double* vu, lapack_int* il, lapack_int* iu, double* abstol, + const double* d, const double* e, lapack_int* m, + lapack_int* nsplit, double* w, lapack_int* iblock, + lapack_int* isplit, double* work, lapack_int* iwork, + lapack_int *info ); +void LAPACK_sstein( lapack_int* n, const float* d, const float* e, + lapack_int* m, const float* w, const lapack_int* iblock, + const lapack_int* isplit, float* z, lapack_int* ldz, + float* work, lapack_int* iwork, lapack_int* ifailv, + lapack_int *info ); +void LAPACK_dstein( lapack_int* n, const double* d, const double* e, + lapack_int* m, const double* w, const lapack_int* iblock, + const lapack_int* isplit, double* z, lapack_int* ldz, + double* work, lapack_int* iwork, lapack_int* ifailv, + lapack_int *info ); +void LAPACK_cstein( lapack_int* n, const float* d, const float* e, + lapack_int* m, const float* w, const lapack_int* iblock, + const lapack_int* isplit, lapack_complex_float* z, + lapack_int* ldz, float* work, lapack_int* iwork, + lapack_int* ifailv, lapack_int *info ); +void LAPACK_zstein( lapack_int* n, const double* d, const double* e, + lapack_int* m, const double* w, const lapack_int* iblock, + const lapack_int* isplit, lapack_complex_double* z, + lapack_int* ldz, double* work, lapack_int* iwork, + lapack_int* ifailv, lapack_int *info ); +void LAPACK_sdisna( char* job, lapack_int* m, lapack_int* n, const float* d, + float* sep, lapack_int *info ); +void LAPACK_ddisna( char* job, lapack_int* m, lapack_int* n, const double* d, + double* sep, lapack_int *info ); +void LAPACK_ssygst( lapack_int* itype, char* uplo, lapack_int* n, float* a, + lapack_int* lda, const float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_dsygst( lapack_int* itype, char* uplo, lapack_int* n, double* a, + lapack_int* lda, const double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_chegst( lapack_int* itype, char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_zhegst( lapack_int* itype, char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* b, lapack_int* ldb, + lapack_int *info ); +void LAPACK_sspgst( lapack_int* itype, char* uplo, lapack_int* n, float* ap, + const float* bp, lapack_int *info ); +void LAPACK_dspgst( lapack_int* itype, char* uplo, lapack_int* n, double* ap, + const double* bp, lapack_int *info ); +void LAPACK_chpgst( lapack_int* itype, char* uplo, lapack_int* n, + lapack_complex_float* ap, const lapack_complex_float* bp, + lapack_int *info ); +void LAPACK_zhpgst( lapack_int* itype, char* uplo, lapack_int* n, + lapack_complex_double* ap, const lapack_complex_double* bp, + lapack_int *info ); +void LAPACK_ssbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, float* ab, lapack_int* ldab, + const float* bb, lapack_int* ldbb, float* x, + lapack_int* ldx, float* work, lapack_int *info ); +void LAPACK_dsbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, double* ab, lapack_int* ldab, + const double* bb, lapack_int* ldbb, double* x, + lapack_int* ldx, double* work, lapack_int *info ); +void LAPACK_chbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab, + const lapack_complex_float* bb, lapack_int* ldbb, + lapack_complex_float* x, lapack_int* ldx, + lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zhbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab, + const lapack_complex_double* bb, lapack_int* ldbb, + lapack_complex_double* x, lapack_int* ldx, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_spbstf( char* uplo, lapack_int* n, lapack_int* kb, float* bb, + lapack_int* ldbb, lapack_int *info ); +void LAPACK_dpbstf( char* uplo, lapack_int* n, lapack_int* kb, double* bb, + lapack_int* ldbb, lapack_int *info ); +void LAPACK_cpbstf( char* uplo, lapack_int* n, lapack_int* kb, + lapack_complex_float* bb, lapack_int* ldbb, + lapack_int *info ); +void LAPACK_zpbstf( char* uplo, lapack_int* n, lapack_int* kb, + lapack_complex_double* bb, lapack_int* ldbb, + lapack_int *info ); +void LAPACK_sgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a, + lapack_int* lda, float* tau, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a, + lapack_int* lda, double* tau, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* tau, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* tau, lapack_complex_double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sorghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a, + lapack_int* lda, const float* tau, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dorghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a, + lapack_int* lda, const double* tau, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sormhr( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* ilo, lapack_int* ihi, const float* a, + lapack_int* lda, const float* tau, float* c, + lapack_int* ldc, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dormhr( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* ilo, lapack_int* ihi, const double* a, + lapack_int* lda, const double* tau, double* c, + lapack_int* ldc, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cunghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, + lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* tau, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zunghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, + lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cunmhr( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* ilo, lapack_int* ihi, + const lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* tau, lapack_complex_float* c, + lapack_int* ldc, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zunmhr( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* ilo, lapack_int* ihi, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* tau, lapack_complex_double* c, + lapack_int* ldc, lapack_complex_double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sgebal( char* job, lapack_int* n, float* a, lapack_int* lda, + lapack_int* ilo, lapack_int* ihi, float* scale, + lapack_int *info ); +void LAPACK_dgebal( char* job, lapack_int* n, double* a, lapack_int* lda, + lapack_int* ilo, lapack_int* ihi, double* scale, + lapack_int *info ); +void LAPACK_cgebal( char* job, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int* ilo, lapack_int* ihi, + float* scale, lapack_int *info ); +void LAPACK_zgebal( char* job, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int* ilo, lapack_int* ihi, + double* scale, lapack_int *info ); +void LAPACK_sgebak( char* job, char* side, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, const float* scale, lapack_int* m, + float* v, lapack_int* ldv, lapack_int *info ); +void LAPACK_dgebak( char* job, char* side, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, const double* scale, lapack_int* m, + double* v, lapack_int* ldv, lapack_int *info ); +void LAPACK_cgebak( char* job, char* side, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, const float* scale, lapack_int* m, + lapack_complex_float* v, lapack_int* ldv, + lapack_int *info ); +void LAPACK_zgebak( char* job, char* side, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, const double* scale, lapack_int* m, + lapack_complex_double* v, lapack_int* ldv, + lapack_int *info ); +void LAPACK_shseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, float* h, lapack_int* ldh, float* wr, + float* wi, float* z, lapack_int* ldz, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dhseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, double* h, lapack_int* ldh, double* wr, + double* wi, double* z, lapack_int* ldz, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_chseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, lapack_complex_float* h, lapack_int* ldh, + lapack_complex_float* w, lapack_complex_float* z, + lapack_int* ldz, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zhseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, lapack_complex_double* h, lapack_int* ldh, + lapack_complex_double* w, lapack_complex_double* z, + lapack_int* ldz, lapack_complex_double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_shsein( char* job, char* eigsrc, char* initv, + lapack_logical* select, lapack_int* n, const float* h, + lapack_int* ldh, float* wr, const float* wi, float* vl, + lapack_int* ldvl, float* vr, lapack_int* ldvr, + lapack_int* mm, lapack_int* m, float* work, + lapack_int* ifaill, lapack_int* ifailr, lapack_int *info ); +void LAPACK_dhsein( char* job, char* eigsrc, char* initv, + lapack_logical* select, lapack_int* n, const double* h, + lapack_int* ldh, double* wr, const double* wi, double* vl, + lapack_int* ldvl, double* vr, lapack_int* ldvr, + lapack_int* mm, lapack_int* m, double* work, + lapack_int* ifaill, lapack_int* ifailr, lapack_int *info ); +void LAPACK_chsein( char* job, char* eigsrc, char* initv, + const lapack_logical* select, lapack_int* n, + const lapack_complex_float* h, lapack_int* ldh, + lapack_complex_float* w, lapack_complex_float* vl, + lapack_int* ldvl, lapack_complex_float* vr, + lapack_int* ldvr, lapack_int* mm, lapack_int* m, + lapack_complex_float* work, float* rwork, + lapack_int* ifaill, lapack_int* ifailr, lapack_int *info ); +void LAPACK_zhsein( char* job, char* eigsrc, char* initv, + const lapack_logical* select, lapack_int* n, + const lapack_complex_double* h, lapack_int* ldh, + lapack_complex_double* w, lapack_complex_double* vl, + lapack_int* ldvl, lapack_complex_double* vr, + lapack_int* ldvr, lapack_int* mm, lapack_int* m, + lapack_complex_double* work, double* rwork, + lapack_int* ifaill, lapack_int* ifailr, lapack_int *info ); +void LAPACK_strevc( char* side, char* howmny, lapack_logical* select, + lapack_int* n, const float* t, lapack_int* ldt, float* vl, + lapack_int* ldvl, float* vr, lapack_int* ldvr, + lapack_int* mm, lapack_int* m, float* work, + lapack_int *info ); +void LAPACK_dtrevc( char* side, char* howmny, lapack_logical* select, + lapack_int* n, const double* t, lapack_int* ldt, double* vl, + lapack_int* ldvl, double* vr, lapack_int* ldvr, + lapack_int* mm, lapack_int* m, double* work, + lapack_int *info ); +void LAPACK_ctrevc( char* side, char* howmny, const lapack_logical* select, + lapack_int* n, lapack_complex_float* t, lapack_int* ldt, + lapack_complex_float* vl, lapack_int* ldvl, + lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm, + lapack_int* m, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_ztrevc( char* side, char* howmny, const lapack_logical* select, + lapack_int* n, lapack_complex_double* t, lapack_int* ldt, + lapack_complex_double* vl, lapack_int* ldvl, + lapack_complex_double* vr, lapack_int* ldvr, lapack_int* mm, + lapack_int* m, lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_strsna( char* job, char* howmny, const lapack_logical* select, + lapack_int* n, const float* t, lapack_int* ldt, + const float* vl, lapack_int* ldvl, const float* vr, + lapack_int* ldvr, float* s, float* sep, lapack_int* mm, + lapack_int* m, float* work, lapack_int* ldwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_dtrsna( char* job, char* howmny, const lapack_logical* select, + lapack_int* n, const double* t, lapack_int* ldt, + const double* vl, lapack_int* ldvl, const double* vr, + lapack_int* ldvr, double* s, double* sep, lapack_int* mm, + lapack_int* m, double* work, lapack_int* ldwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_ctrsna( char* job, char* howmny, const lapack_logical* select, + lapack_int* n, const lapack_complex_float* t, + lapack_int* ldt, const lapack_complex_float* vl, + lapack_int* ldvl, const lapack_complex_float* vr, + lapack_int* ldvr, float* s, float* sep, lapack_int* mm, + lapack_int* m, lapack_complex_float* work, + lapack_int* ldwork, float* rwork, lapack_int *info ); +void LAPACK_ztrsna( char* job, char* howmny, const lapack_logical* select, + lapack_int* n, const lapack_complex_double* t, + lapack_int* ldt, const lapack_complex_double* vl, + lapack_int* ldvl, const lapack_complex_double* vr, + lapack_int* ldvr, double* s, double* sep, lapack_int* mm, + lapack_int* m, lapack_complex_double* work, + lapack_int* ldwork, double* rwork, lapack_int *info ); +void LAPACK_strexc( char* compq, lapack_int* n, float* t, lapack_int* ldt, + float* q, lapack_int* ldq, lapack_int* ifst, + lapack_int* ilst, float* work, lapack_int *info ); +void LAPACK_dtrexc( char* compq, lapack_int* n, double* t, lapack_int* ldt, + double* q, lapack_int* ldq, lapack_int* ifst, + lapack_int* ilst, double* work, lapack_int *info ); +void LAPACK_ctrexc( char* compq, lapack_int* n, lapack_complex_float* t, + lapack_int* ldt, lapack_complex_float* q, lapack_int* ldq, + lapack_int* ifst, lapack_int* ilst, lapack_int *info ); +void LAPACK_ztrexc( char* compq, lapack_int* n, lapack_complex_double* t, + lapack_int* ldt, lapack_complex_double* q, lapack_int* ldq, + lapack_int* ifst, lapack_int* ilst, lapack_int *info ); +void LAPACK_strsen( char* job, char* compq, const lapack_logical* select, + lapack_int* n, float* t, lapack_int* ldt, float* q, + lapack_int* ldq, float* wr, float* wi, lapack_int* m, + float* s, float* sep, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_dtrsen( char* job, char* compq, const lapack_logical* select, + lapack_int* n, double* t, lapack_int* ldt, double* q, + lapack_int* ldq, double* wr, double* wi, lapack_int* m, + double* s, double* sep, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_ctrsen( char* job, char* compq, const lapack_logical* select, + lapack_int* n, lapack_complex_float* t, lapack_int* ldt, + lapack_complex_float* q, lapack_int* ldq, + lapack_complex_float* w, lapack_int* m, float* s, + float* sep, lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_ztrsen( char* job, char* compq, const lapack_logical* select, + lapack_int* n, lapack_complex_double* t, lapack_int* ldt, + lapack_complex_double* q, lapack_int* ldq, + lapack_complex_double* w, lapack_int* m, double* s, + double* sep, lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_strsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m, + lapack_int* n, const float* a, lapack_int* lda, + const float* b, lapack_int* ldb, float* c, lapack_int* ldc, + float* scale, lapack_int *info ); +void LAPACK_dtrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m, + lapack_int* n, const double* a, lapack_int* lda, + const double* b, lapack_int* ldb, double* c, + lapack_int* ldc, double* scale, lapack_int *info ); +void LAPACK_ctrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m, + lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* b, + lapack_int* ldb, lapack_complex_float* c, lapack_int* ldc, + float* scale, lapack_int *info ); +void LAPACK_ztrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m, + lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, const lapack_complex_double* b, + lapack_int* ldb, lapack_complex_double* c, lapack_int* ldc, + double* scale, lapack_int *info ); +void LAPACK_sgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, float* a, lapack_int* lda, float* b, + lapack_int* ldb, float* q, lapack_int* ldq, float* z, + lapack_int* ldz, lapack_int *info ); +void LAPACK_dgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, double* a, lapack_int* lda, double* b, + lapack_int* ldb, double* q, lapack_int* ldq, double* z, + lapack_int* ldz, lapack_int *info ); +void LAPACK_cgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* q, lapack_int* ldq, + lapack_complex_float* z, lapack_int* ldz, + lapack_int *info ); +void LAPACK_zgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* q, lapack_int* ldq, + lapack_complex_double* z, lapack_int* ldz, + lapack_int *info ); +void LAPACK_sggbal( char* job, lapack_int* n, float* a, lapack_int* lda, + float* b, lapack_int* ldb, lapack_int* ilo, lapack_int* ihi, + float* lscale, float* rscale, float* work, + lapack_int *info ); +void LAPACK_dggbal( char* job, lapack_int* n, double* a, lapack_int* lda, + double* b, lapack_int* ldb, lapack_int* ilo, + lapack_int* ihi, double* lscale, double* rscale, + double* work, lapack_int *info ); +void LAPACK_cggbal( char* job, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, + lapack_int* ilo, lapack_int* ihi, float* lscale, + float* rscale, float* work, lapack_int *info ); +void LAPACK_zggbal( char* job, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, + lapack_int* ilo, lapack_int* ihi, double* lscale, + double* rscale, double* work, lapack_int *info ); +void LAPACK_sggbak( char* job, char* side, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, const float* lscale, const float* rscale, + lapack_int* m, float* v, lapack_int* ldv, + lapack_int *info ); +void LAPACK_dggbak( char* job, char* side, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, const double* lscale, const double* rscale, + lapack_int* m, double* v, lapack_int* ldv, + lapack_int *info ); +void LAPACK_cggbak( char* job, char* side, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, const float* lscale, const float* rscale, + lapack_int* m, lapack_complex_float* v, lapack_int* ldv, + lapack_int *info ); +void LAPACK_zggbak( char* job, char* side, lapack_int* n, lapack_int* ilo, + lapack_int* ihi, const double* lscale, const double* rscale, + lapack_int* m, lapack_complex_double* v, lapack_int* ldv, + lapack_int *info ); +void LAPACK_shgeqz( char* job, char* compq, char* compz, lapack_int* n, + lapack_int* ilo, lapack_int* ihi, float* h, lapack_int* ldh, + float* t, lapack_int* ldt, float* alphar, float* alphai, + float* beta, float* q, lapack_int* ldq, float* z, + lapack_int* ldz, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dhgeqz( char* job, char* compq, char* compz, lapack_int* n, + lapack_int* ilo, lapack_int* ihi, double* h, + lapack_int* ldh, double* t, lapack_int* ldt, double* alphar, + double* alphai, double* beta, double* q, lapack_int* ldq, + double* z, lapack_int* ldz, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_chgeqz( char* job, char* compq, char* compz, lapack_int* n, + lapack_int* ilo, lapack_int* ihi, lapack_complex_float* h, + lapack_int* ldh, lapack_complex_float* t, lapack_int* ldt, + lapack_complex_float* alpha, lapack_complex_float* beta, + lapack_complex_float* q, lapack_int* ldq, + lapack_complex_float* z, lapack_int* ldz, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int *info ); +void LAPACK_zhgeqz( char* job, char* compq, char* compz, lapack_int* n, + lapack_int* ilo, lapack_int* ihi, lapack_complex_double* h, + lapack_int* ldh, lapack_complex_double* t, lapack_int* ldt, + lapack_complex_double* alpha, lapack_complex_double* beta, + lapack_complex_double* q, lapack_int* ldq, + lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int *info ); +void LAPACK_stgevc( char* side, char* howmny, const lapack_logical* select, + lapack_int* n, const float* s, lapack_int* lds, + const float* p, lapack_int* ldp, float* vl, + lapack_int* ldvl, float* vr, lapack_int* ldvr, + lapack_int* mm, lapack_int* m, float* work, + lapack_int *info ); +void LAPACK_dtgevc( char* side, char* howmny, const lapack_logical* select, + lapack_int* n, const double* s, lapack_int* lds, + const double* p, lapack_int* ldp, double* vl, + lapack_int* ldvl, double* vr, lapack_int* ldvr, + lapack_int* mm, lapack_int* m, double* work, + lapack_int *info ); +void LAPACK_ctgevc( char* side, char* howmny, const lapack_logical* select, + lapack_int* n, const lapack_complex_float* s, + lapack_int* lds, const lapack_complex_float* p, + lapack_int* ldp, lapack_complex_float* vl, lapack_int* ldvl, + lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm, + lapack_int* m, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_ztgevc( char* side, char* howmny, const lapack_logical* select, + lapack_int* n, const lapack_complex_double* s, + lapack_int* lds, const lapack_complex_double* p, + lapack_int* ldp, lapack_complex_double* vl, + lapack_int* ldvl, lapack_complex_double* vr, + lapack_int* ldvr, lapack_int* mm, lapack_int* m, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_stgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n, + float* a, lapack_int* lda, float* b, lapack_int* ldb, + float* q, lapack_int* ldq, float* z, lapack_int* ldz, + lapack_int* ifst, lapack_int* ilst, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dtgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n, + double* a, lapack_int* lda, double* b, lapack_int* ldb, + double* q, lapack_int* ldq, double* z, lapack_int* ldz, + lapack_int* ifst, lapack_int* ilst, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_ctgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* q, lapack_int* ldq, + lapack_complex_float* z, lapack_int* ldz, lapack_int* ifst, + lapack_int* ilst, lapack_int *info ); +void LAPACK_ztgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* q, lapack_int* ldq, + lapack_complex_double* z, lapack_int* ldz, lapack_int* ifst, + lapack_int* ilst, lapack_int *info ); +void LAPACK_stgsen( lapack_int* ijob, lapack_logical* wantq, + lapack_logical* wantz, const lapack_logical* select, + lapack_int* n, float* a, lapack_int* lda, float* b, + lapack_int* ldb, float* alphar, float* alphai, float* beta, + float* q, lapack_int* ldq, float* z, lapack_int* ldz, + lapack_int* m, float* pl, float* pr, float* dif, + float* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_dtgsen( lapack_int* ijob, lapack_logical* wantq, + lapack_logical* wantz, const lapack_logical* select, + lapack_int* n, double* a, lapack_int* lda, double* b, + lapack_int* ldb, double* alphar, double* alphai, + double* beta, double* q, lapack_int* ldq, double* z, + lapack_int* ldz, lapack_int* m, double* pl, double* pr, + double* dif, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_ctgsen( lapack_int* ijob, lapack_logical* wantq, + lapack_logical* wantz, const lapack_logical* select, + lapack_int* n, lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* alpha, lapack_complex_float* beta, + lapack_complex_float* q, lapack_int* ldq, + lapack_complex_float* z, lapack_int* ldz, lapack_int* m, + float* pl, float* pr, float* dif, + lapack_complex_float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_ztgsen( lapack_int* ijob, lapack_logical* wantq, + lapack_logical* wantz, const lapack_logical* select, + lapack_int* n, lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* alpha, lapack_complex_double* beta, + lapack_complex_double* q, lapack_int* ldq, + lapack_complex_double* z, lapack_int* ldz, lapack_int* m, + double* pl, double* pr, double* dif, + lapack_complex_double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_stgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n, + const float* a, lapack_int* lda, const float* b, + lapack_int* ldb, float* c, lapack_int* ldc, const float* d, + lapack_int* ldd, const float* e, lapack_int* lde, float* f, + lapack_int* ldf, float* scale, float* dif, float* work, + lapack_int* lwork, lapack_int* iwork, lapack_int *info ); +void LAPACK_dtgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n, + const double* a, lapack_int* lda, const double* b, + lapack_int* ldb, double* c, lapack_int* ldc, + const double* d, lapack_int* ldd, const double* e, + lapack_int* lde, double* f, lapack_int* ldf, double* scale, + double* dif, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_ctgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n, + const lapack_complex_float* a, lapack_int* lda, + const lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* c, lapack_int* ldc, + const lapack_complex_float* d, lapack_int* ldd, + const lapack_complex_float* e, lapack_int* lde, + lapack_complex_float* f, lapack_int* ldf, float* scale, + float* dif, lapack_complex_float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_ztgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n, + const lapack_complex_double* a, lapack_int* lda, + const lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* c, lapack_int* ldc, + const lapack_complex_double* d, lapack_int* ldd, + const lapack_complex_double* e, lapack_int* lde, + lapack_complex_double* f, lapack_int* ldf, double* scale, + double* dif, lapack_complex_double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_stgsna( char* job, char* howmny, const lapack_logical* select, + lapack_int* n, const float* a, lapack_int* lda, + const float* b, lapack_int* ldb, const float* vl, + lapack_int* ldvl, const float* vr, lapack_int* ldvr, + float* s, float* dif, lapack_int* mm, lapack_int* m, + float* work, lapack_int* lwork, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dtgsna( char* job, char* howmny, const lapack_logical* select, + lapack_int* n, const double* a, lapack_int* lda, + const double* b, lapack_int* ldb, const double* vl, + lapack_int* ldvl, const double* vr, lapack_int* ldvr, + double* s, double* dif, lapack_int* mm, lapack_int* m, + double* work, lapack_int* lwork, lapack_int* iwork, + lapack_int *info ); +void LAPACK_ctgsna( char* job, char* howmny, const lapack_logical* select, + lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, const lapack_complex_float* b, + lapack_int* ldb, const lapack_complex_float* vl, + lapack_int* ldvl, const lapack_complex_float* vr, + lapack_int* ldvr, float* s, float* dif, lapack_int* mm, + lapack_int* m, lapack_complex_float* work, + lapack_int* lwork, lapack_int* iwork, lapack_int *info ); +void LAPACK_ztgsna( char* job, char* howmny, const lapack_logical* select, + lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, const lapack_complex_double* b, + lapack_int* ldb, const lapack_complex_double* vl, + lapack_int* ldvl, const lapack_complex_double* vr, + lapack_int* ldvr, double* s, double* dif, lapack_int* mm, + lapack_int* m, lapack_complex_double* work, + lapack_int* lwork, lapack_int* iwork, lapack_int *info ); +void LAPACK_sggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* p, lapack_int* n, float* a, lapack_int* lda, + float* b, lapack_int* ldb, float* tola, float* tolb, + lapack_int* k, lapack_int* l, float* u, lapack_int* ldu, + float* v, lapack_int* ldv, float* q, lapack_int* ldq, + lapack_int* iwork, float* tau, float* work, + lapack_int *info ); +void LAPACK_dggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* p, lapack_int* n, double* a, lapack_int* lda, + double* b, lapack_int* ldb, double* tola, double* tolb, + lapack_int* k, lapack_int* l, double* u, lapack_int* ldu, + double* v, lapack_int* ldv, double* q, lapack_int* ldq, + lapack_int* iwork, double* tau, double* work, + lapack_int *info ); +void LAPACK_cggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* p, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, + float* tola, float* tolb, lapack_int* k, lapack_int* l, + lapack_complex_float* u, lapack_int* ldu, + lapack_complex_float* v, lapack_int* ldv, + lapack_complex_float* q, lapack_int* ldq, lapack_int* iwork, + float* rwork, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* p, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, + double* tola, double* tolb, lapack_int* k, lapack_int* l, + lapack_complex_double* u, lapack_int* ldu, + lapack_complex_double* v, lapack_int* ldv, + lapack_complex_double* q, lapack_int* ldq, + lapack_int* iwork, double* rwork, + lapack_complex_double* tau, lapack_complex_double* work, + lapack_int *info ); +void LAPACK_stgsja( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l, + float* a, lapack_int* lda, float* b, lapack_int* ldb, + float* tola, float* tolb, float* alpha, float* beta, + float* u, lapack_int* ldu, float* v, lapack_int* ldv, + float* q, lapack_int* ldq, float* work, lapack_int* ncycle, + lapack_int *info ); +void LAPACK_dtgsja( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l, + double* a, lapack_int* lda, double* b, lapack_int* ldb, + double* tola, double* tolb, double* alpha, double* beta, + double* u, lapack_int* ldu, double* v, lapack_int* ldv, + double* q, lapack_int* ldq, double* work, + lapack_int* ncycle, lapack_int *info ); +void LAPACK_ctgsja( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, float* tola, + float* tolb, float* alpha, float* beta, + lapack_complex_float* u, lapack_int* ldu, + lapack_complex_float* v, lapack_int* ldv, + lapack_complex_float* q, lapack_int* ldq, + lapack_complex_float* work, lapack_int* ncycle, + lapack_int *info ); +void LAPACK_ztgsja( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, double* tola, + double* tolb, double* alpha, double* beta, + lapack_complex_double* u, lapack_int* ldu, + lapack_complex_double* v, lapack_int* ldv, + lapack_complex_double* q, lapack_int* ldq, + lapack_complex_double* work, lapack_int* ncycle, + lapack_int *info ); +void LAPACK_sgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs, + float* a, lapack_int* lda, float* b, lapack_int* ldb, + float* work, lapack_int* lwork, lapack_int *info ); +void LAPACK_dgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs, + double* a, lapack_int* lda, double* b, lapack_int* ldb, + double* work, lapack_int* lwork, lapack_int *info ); +void LAPACK_cgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_sgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a, + lapack_int* lda, float* b, lapack_int* ldb, + lapack_int* jpvt, float* rcond, lapack_int* rank, + float* work, lapack_int* lwork, lapack_int *info ); +void LAPACK_dgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a, + lapack_int* lda, double* b, lapack_int* ldb, + lapack_int* jpvt, double* rcond, lapack_int* rank, + double* work, lapack_int* lwork, lapack_int *info ); +void LAPACK_cgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, lapack_int* jpvt, + float* rcond, lapack_int* rank, lapack_complex_float* work, + lapack_int* lwork, float* rwork, lapack_int *info ); +void LAPACK_zgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, lapack_int* jpvt, + double* rcond, lapack_int* rank, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int *info ); +void LAPACK_sgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a, + lapack_int* lda, float* b, lapack_int* ldb, float* s, + float* rcond, lapack_int* rank, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a, + lapack_int* lda, double* b, lapack_int* ldb, double* s, + double* rcond, lapack_int* rank, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, float* s, + float* rcond, lapack_int* rank, lapack_complex_float* work, + lapack_int* lwork, float* rwork, lapack_int *info ); +void LAPACK_zgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, double* s, + double* rcond, lapack_int* rank, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int *info ); +void LAPACK_sgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a, + lapack_int* lda, float* b, lapack_int* ldb, float* s, + float* rcond, lapack_int* rank, float* work, + lapack_int* lwork, lapack_int* iwork, lapack_int *info ); +void LAPACK_dgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a, + lapack_int* lda, double* b, lapack_int* ldb, double* s, + double* rcond, lapack_int* rank, double* work, + lapack_int* lwork, lapack_int* iwork, lapack_int *info ); +void LAPACK_cgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, float* s, + float* rcond, lapack_int* rank, lapack_complex_float* work, + lapack_int* lwork, float* rwork, lapack_int* iwork, + lapack_int *info ); +void LAPACK_zgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, double* s, + double* rcond, lapack_int* rank, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* iwork, lapack_int *info ); +void LAPACK_sgglse( lapack_int* m, lapack_int* n, lapack_int* p, float* a, + lapack_int* lda, float* b, lapack_int* ldb, float* c, + float* d, float* x, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dgglse( lapack_int* m, lapack_int* n, lapack_int* p, double* a, + lapack_int* lda, double* b, lapack_int* ldb, double* c, + double* d, double* x, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cgglse( lapack_int* m, lapack_int* n, lapack_int* p, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* c, lapack_complex_float* d, + lapack_complex_float* x, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zgglse( lapack_int* m, lapack_int* n, lapack_int* p, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* c, lapack_complex_double* d, + lapack_complex_double* x, lapack_complex_double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sggglm( lapack_int* n, lapack_int* m, lapack_int* p, float* a, + lapack_int* lda, float* b, lapack_int* ldb, float* d, + float* x, float* y, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dggglm( lapack_int* n, lapack_int* m, lapack_int* p, double* a, + lapack_int* lda, double* b, lapack_int* ldb, double* d, + double* x, double* y, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cggglm( lapack_int* n, lapack_int* m, lapack_int* p, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* d, lapack_complex_float* x, + lapack_complex_float* y, lapack_complex_float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_zggglm( lapack_int* n, lapack_int* m, lapack_int* p, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* d, lapack_complex_double* x, + lapack_complex_double* y, lapack_complex_double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_ssyev( char* jobz, char* uplo, lapack_int* n, float* a, + lapack_int* lda, float* w, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dsyev( char* jobz, char* uplo, lapack_int* n, double* a, + lapack_int* lda, double* w, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cheev( char* jobz, char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, float* w, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int *info ); +void LAPACK_zheev( char* jobz, char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, double* w, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int *info ); +void LAPACK_ssyevd( char* jobz, char* uplo, lapack_int* n, float* a, + lapack_int* lda, float* w, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_dsyevd( char* jobz, char* uplo, lapack_int* n, double* a, + lapack_int* lda, double* w, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_cheevd( char* jobz, char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, float* w, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_zheevd( char* jobz, char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, double* w, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* lrwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_ssyevx( char* jobz, char* range, char* uplo, lapack_int* n, + float* a, lapack_int* lda, float* vl, float* vu, + lapack_int* il, lapack_int* iu, float* abstol, + lapack_int* m, float* w, float* z, lapack_int* ldz, + float* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_dsyevx( char* jobz, char* range, char* uplo, lapack_int* n, + double* a, lapack_int* lda, double* vl, double* vu, + lapack_int* il, lapack_int* iu, double* abstol, + lapack_int* m, double* w, double* z, lapack_int* ldz, + double* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_cheevx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, float* vl, + float* vu, lapack_int* il, lapack_int* iu, float* abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int* ldz, lapack_complex_float* work, + lapack_int* lwork, float* rwork, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_zheevx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, double* vl, + double* vu, lapack_int* il, lapack_int* iu, double* abstol, + lapack_int* m, double* w, lapack_complex_double* z, + lapack_int* ldz, lapack_complex_double* work, + lapack_int* lwork, double* rwork, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_ssyevr( char* jobz, char* range, char* uplo, lapack_int* n, + float* a, lapack_int* lda, float* vl, float* vu, + lapack_int* il, lapack_int* iu, float* abstol, + lapack_int* m, float* w, float* z, lapack_int* ldz, + lapack_int* isuppz, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_dsyevr( char* jobz, char* range, char* uplo, lapack_int* n, + double* a, lapack_int* lda, double* vl, double* vu, + lapack_int* il, lapack_int* iu, double* abstol, + lapack_int* m, double* w, double* z, lapack_int* ldz, + lapack_int* isuppz, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_cheevr( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, float* vl, + float* vu, lapack_int* il, lapack_int* iu, float* abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int* ldz, lapack_int* isuppz, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_zheevr( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, double* vl, + double* vu, lapack_int* il, lapack_int* iu, double* abstol, + lapack_int* m, double* w, lapack_complex_double* z, + lapack_int* ldz, lapack_int* isuppz, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* lrwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_sspev( char* jobz, char* uplo, lapack_int* n, float* ap, float* w, + float* z, lapack_int* ldz, float* work, lapack_int *info ); +void LAPACK_dspev( char* jobz, char* uplo, lapack_int* n, double* ap, double* w, + double* z, lapack_int* ldz, double* work, lapack_int *info ); +void LAPACK_chpev( char* jobz, char* uplo, lapack_int* n, + lapack_complex_float* ap, float* w, lapack_complex_float* z, + lapack_int* ldz, lapack_complex_float* work, float* rwork, + lapack_int *info ); +void LAPACK_zhpev( char* jobz, char* uplo, lapack_int* n, + lapack_complex_double* ap, double* w, + lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_sspevd( char* jobz, char* uplo, lapack_int* n, float* ap, float* w, + float* z, lapack_int* ldz, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_dspevd( char* jobz, char* uplo, lapack_int* n, double* ap, + double* w, double* z, lapack_int* ldz, double* work, + lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_chpevd( char* jobz, char* uplo, lapack_int* n, + lapack_complex_float* ap, float* w, lapack_complex_float* z, + lapack_int* ldz, lapack_complex_float* work, + lapack_int* lwork, float* rwork, lapack_int* lrwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_zhpevd( char* jobz, char* uplo, lapack_int* n, + lapack_complex_double* ap, double* w, + lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* lrwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_sspevx( char* jobz, char* range, char* uplo, lapack_int* n, + float* ap, float* vl, float* vu, lapack_int* il, + lapack_int* iu, float* abstol, lapack_int* m, float* w, + float* z, lapack_int* ldz, float* work, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_dspevx( char* jobz, char* range, char* uplo, lapack_int* n, + double* ap, double* vl, double* vu, lapack_int* il, + lapack_int* iu, double* abstol, lapack_int* m, double* w, + double* z, lapack_int* ldz, double* work, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_chpevx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_complex_float* ap, float* vl, float* vu, + lapack_int* il, lapack_int* iu, float* abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int* ldz, lapack_complex_float* work, float* rwork, + lapack_int* iwork, lapack_int* ifail, lapack_int *info ); +void LAPACK_zhpevx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_complex_double* ap, double* vl, double* vu, + lapack_int* il, lapack_int* iu, double* abstol, + lapack_int* m, double* w, lapack_complex_double* z, + lapack_int* ldz, lapack_complex_double* work, double* rwork, + lapack_int* iwork, lapack_int* ifail, lapack_int *info ); +void LAPACK_ssbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, + float* ab, lapack_int* ldab, float* w, float* z, + lapack_int* ldz, float* work, lapack_int *info ); +void LAPACK_dsbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, + double* ab, lapack_int* ldab, double* w, double* z, + lapack_int* ldz, double* work, lapack_int *info ); +void LAPACK_chbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, + lapack_complex_float* ab, lapack_int* ldab, float* w, + lapack_complex_float* z, lapack_int* ldz, + lapack_complex_float* work, float* rwork, lapack_int *info ); +void LAPACK_zhbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, + lapack_complex_double* ab, lapack_int* ldab, double* w, + lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_ssbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, + float* ab, lapack_int* ldab, float* w, float* z, + lapack_int* ldz, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_dsbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, + double* ab, lapack_int* ldab, double* w, double* z, + lapack_int* ldz, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_chbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, + lapack_complex_float* ab, lapack_int* ldab, float* w, + lapack_complex_float* z, lapack_int* ldz, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_zhbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, + lapack_complex_double* ab, lapack_int* ldab, double* w, + lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* lrwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_ssbevx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_int* kd, float* ab, lapack_int* ldab, float* q, + lapack_int* ldq, float* vl, float* vu, lapack_int* il, + lapack_int* iu, float* abstol, lapack_int* m, float* w, + float* z, lapack_int* ldz, float* work, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_dsbevx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_int* kd, double* ab, lapack_int* ldab, double* q, + lapack_int* ldq, double* vl, double* vu, lapack_int* il, + lapack_int* iu, double* abstol, lapack_int* m, double* w, + double* z, lapack_int* ldz, double* work, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_chbevx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_int* kd, lapack_complex_float* ab, lapack_int* ldab, + lapack_complex_float* q, lapack_int* ldq, float* vl, + float* vu, lapack_int* il, lapack_int* iu, float* abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int* ldz, lapack_complex_float* work, float* rwork, + lapack_int* iwork, lapack_int* ifail, lapack_int *info ); +void LAPACK_zhbevx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_int* kd, lapack_complex_double* ab, lapack_int* ldab, + lapack_complex_double* q, lapack_int* ldq, double* vl, + double* vu, lapack_int* il, lapack_int* iu, double* abstol, + lapack_int* m, double* w, lapack_complex_double* z, + lapack_int* ldz, lapack_complex_double* work, double* rwork, + lapack_int* iwork, lapack_int* ifail, lapack_int *info ); +void LAPACK_sstev( char* jobz, lapack_int* n, float* d, float* e, float* z, + lapack_int* ldz, float* work, lapack_int *info ); +void LAPACK_dstev( char* jobz, lapack_int* n, double* d, double* e, double* z, + lapack_int* ldz, double* work, lapack_int *info ); +void LAPACK_sstevd( char* jobz, lapack_int* n, float* d, float* e, float* z, + lapack_int* ldz, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_dstevd( char* jobz, lapack_int* n, double* d, double* e, double* z, + lapack_int* ldz, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_sstevx( char* jobz, char* range, lapack_int* n, float* d, float* e, + float* vl, float* vu, lapack_int* il, lapack_int* iu, + float* abstol, lapack_int* m, float* w, float* z, + lapack_int* ldz, float* work, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_dstevx( char* jobz, char* range, lapack_int* n, double* d, + double* e, double* vl, double* vu, lapack_int* il, + lapack_int* iu, double* abstol, lapack_int* m, double* w, + double* z, lapack_int* ldz, double* work, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_sstevr( char* jobz, char* range, lapack_int* n, float* d, float* e, + float* vl, float* vu, lapack_int* il, lapack_int* iu, + float* abstol, lapack_int* m, float* w, float* z, + lapack_int* ldz, lapack_int* isuppz, float* work, + lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_dstevr( char* jobz, char* range, lapack_int* n, double* d, + double* e, double* vl, double* vu, lapack_int* il, + lapack_int* iu, double* abstol, lapack_int* m, double* w, + double* z, lapack_int* ldz, lapack_int* isuppz, + double* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_sgees( char* jobvs, char* sort, LAPACK_S_SELECT2 select, + lapack_int* n, float* a, lapack_int* lda, lapack_int* sdim, + float* wr, float* wi, float* vs, lapack_int* ldvs, + float* work, lapack_int* lwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_dgees( char* jobvs, char* sort, LAPACK_D_SELECT2 select, + lapack_int* n, double* a, lapack_int* lda, lapack_int* sdim, + double* wr, double* wi, double* vs, lapack_int* ldvs, + double* work, lapack_int* lwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_cgees( char* jobvs, char* sort, LAPACK_C_SELECT1 select, + lapack_int* n, lapack_complex_float* a, lapack_int* lda, + lapack_int* sdim, lapack_complex_float* w, + lapack_complex_float* vs, lapack_int* ldvs, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_logical* bwork, lapack_int *info ); +void LAPACK_zgees( char* jobvs, char* sort, LAPACK_Z_SELECT1 select, + lapack_int* n, lapack_complex_double* a, lapack_int* lda, + lapack_int* sdim, lapack_complex_double* w, + lapack_complex_double* vs, lapack_int* ldvs, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_logical* bwork, lapack_int *info ); +void LAPACK_sgeesx( char* jobvs, char* sort, LAPACK_S_SELECT2 select, + char* sense, lapack_int* n, float* a, lapack_int* lda, + lapack_int* sdim, float* wr, float* wi, float* vs, + lapack_int* ldvs, float* rconde, float* rcondv, float* work, + lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, + lapack_logical* bwork, lapack_int *info ); +void LAPACK_dgeesx( char* jobvs, char* sort, LAPACK_D_SELECT2 select, + char* sense, lapack_int* n, double* a, lapack_int* lda, + lapack_int* sdim, double* wr, double* wi, double* vs, + lapack_int* ldvs, double* rconde, double* rcondv, + double* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_cgeesx( char* jobvs, char* sort, LAPACK_C_SELECT1 select, + char* sense, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int* sdim, lapack_complex_float* w, + lapack_complex_float* vs, lapack_int* ldvs, float* rconde, + float* rcondv, lapack_complex_float* work, + lapack_int* lwork, float* rwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_zgeesx( char* jobvs, char* sort, LAPACK_Z_SELECT1 select, + char* sense, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int* sdim, lapack_complex_double* w, + lapack_complex_double* vs, lapack_int* ldvs, double* rconde, + double* rcondv, lapack_complex_double* work, + lapack_int* lwork, double* rwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_sgeev( char* jobvl, char* jobvr, lapack_int* n, float* a, + lapack_int* lda, float* wr, float* wi, float* vl, + lapack_int* ldvl, float* vr, lapack_int* ldvr, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dgeev( char* jobvl, char* jobvr, lapack_int* n, double* a, + lapack_int* lda, double* wr, double* wi, double* vl, + lapack_int* ldvl, double* vr, lapack_int* ldvr, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cgeev( char* jobvl, char* jobvr, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* w, lapack_complex_float* vl, + lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int *info ); +void LAPACK_zgeev( char* jobvl, char* jobvr, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* w, lapack_complex_double* vl, + lapack_int* ldvl, lapack_complex_double* vr, + lapack_int* ldvr, lapack_complex_double* work, + lapack_int* lwork, double* rwork, lapack_int *info ); +void LAPACK_sgeevx( char* balanc, char* jobvl, char* jobvr, char* sense, + lapack_int* n, float* a, lapack_int* lda, float* wr, + float* wi, float* vl, lapack_int* ldvl, float* vr, + lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, + float* scale, float* abnrm, float* rconde, float* rcondv, + float* work, lapack_int* lwork, lapack_int* iwork, + lapack_int *info ); +void LAPACK_dgeevx( char* balanc, char* jobvl, char* jobvr, char* sense, + lapack_int* n, double* a, lapack_int* lda, double* wr, + double* wi, double* vl, lapack_int* ldvl, double* vr, + lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, + double* scale, double* abnrm, double* rconde, + double* rcondv, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_cgeevx( char* balanc, char* jobvl, char* jobvr, char* sense, + lapack_int* n, lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* w, lapack_complex_float* vl, + lapack_int* ldvl, lapack_complex_float* vr, + lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, + float* scale, float* abnrm, float* rconde, float* rcondv, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int *info ); +void LAPACK_zgeevx( char* balanc, char* jobvl, char* jobvr, char* sense, + lapack_int* n, lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* w, lapack_complex_double* vl, + lapack_int* ldvl, lapack_complex_double* vr, + lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, + double* scale, double* abnrm, double* rconde, + double* rcondv, lapack_complex_double* work, + lapack_int* lwork, double* rwork, lapack_int *info ); +void LAPACK_sgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n, + float* a, lapack_int* lda, float* s, float* u, + lapack_int* ldu, float* vt, lapack_int* ldvt, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_dgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n, + double* a, lapack_int* lda, double* s, double* u, + lapack_int* ldu, double* vt, lapack_int* ldvt, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, float* s, + lapack_complex_float* u, lapack_int* ldu, + lapack_complex_float* vt, lapack_int* ldvt, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int *info ); +void LAPACK_zgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, double* s, + lapack_complex_double* u, lapack_int* ldu, + lapack_complex_double* vt, lapack_int* ldvt, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int *info ); +void LAPACK_sgesdd( char* jobz, lapack_int* m, lapack_int* n, float* a, + lapack_int* lda, float* s, float* u, lapack_int* ldu, + float* vt, lapack_int* ldvt, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_dgesdd( char* jobz, lapack_int* m, lapack_int* n, double* a, + lapack_int* lda, double* s, double* u, lapack_int* ldu, + double* vt, lapack_int* ldvt, double* work, + lapack_int* lwork, lapack_int* iwork, lapack_int *info ); +void LAPACK_cgesdd( char* jobz, lapack_int* m, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, float* s, + lapack_complex_float* u, lapack_int* ldu, + lapack_complex_float* vt, lapack_int* ldvt, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_zgesdd( char* jobz, lapack_int* m, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, double* s, + lapack_complex_double* u, lapack_int* ldu, + lapack_complex_double* vt, lapack_int* ldvt, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* iwork, lapack_int *info ); +void LAPACK_dgejsv( char* joba, char* jobu, char* jobv, char* jobr, char* jobt, + char* jobp, lapack_int* m, lapack_int* n, double* a, + lapack_int* lda, double* sva, double* u, lapack_int* ldu, + double* v, lapack_int* ldv, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_sgejsv( char* joba, char* jobu, char* jobv, char* jobr, char* jobt, + char* jobp, lapack_int* m, lapack_int* n, float* a, + lapack_int* lda, float* sva, float* u, lapack_int* ldu, + float* v, lapack_int* ldv, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_dgesvj( char* joba, char* jobu, char* jobv, lapack_int* m, + lapack_int* n, double* a, lapack_int* lda, double* sva, + lapack_int* mv, double* v, lapack_int* ldv, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sgesvj( char* joba, char* jobu, char* jobv, lapack_int* m, + lapack_int* n, float* a, lapack_int* lda, float* sva, + lapack_int* mv, float* v, lapack_int* ldv, float* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_sggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l, + float* a, lapack_int* lda, float* b, lapack_int* ldb, + float* alpha, float* beta, float* u, lapack_int* ldu, + float* v, lapack_int* ldv, float* q, lapack_int* ldq, + float* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_dggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l, + double* a, lapack_int* lda, double* b, lapack_int* ldb, + double* alpha, double* beta, double* u, lapack_int* ldu, + double* v, lapack_int* ldv, double* q, lapack_int* ldq, + double* work, lapack_int* iwork, lapack_int *info ); +void LAPACK_cggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, float* alpha, + float* beta, lapack_complex_float* u, lapack_int* ldu, + lapack_complex_float* v, lapack_int* ldv, + lapack_complex_float* q, lapack_int* ldq, + lapack_complex_float* work, float* rwork, lapack_int* iwork, + lapack_int *info ); +void LAPACK_zggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m, + lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, double* alpha, + double* beta, lapack_complex_double* u, lapack_int* ldu, + lapack_complex_double* v, lapack_int* ldv, + lapack_complex_double* q, lapack_int* ldq, + lapack_complex_double* work, double* rwork, + lapack_int* iwork, lapack_int *info ); +void LAPACK_ssygv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + float* a, lapack_int* lda, float* b, lapack_int* ldb, + float* w, float* work, lapack_int* lwork, lapack_int *info ); +void LAPACK_dsygv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + double* a, lapack_int* lda, double* b, lapack_int* ldb, + double* w, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_chegv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, float* w, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int *info ); +void LAPACK_zhegv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, double* w, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int *info ); +void LAPACK_ssygvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + float* a, lapack_int* lda, float* b, lapack_int* ldb, + float* w, float* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_dsygvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + double* a, lapack_int* lda, double* b, lapack_int* ldb, + double* w, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_chegvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, float* w, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_zhegvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, double* w, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* lrwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_ssygvx( lapack_int* itype, char* jobz, char* range, char* uplo, + lapack_int* n, float* a, lapack_int* lda, float* b, + lapack_int* ldb, float* vl, float* vu, lapack_int* il, + lapack_int* iu, float* abstol, lapack_int* m, float* w, + float* z, lapack_int* ldz, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* ifail, lapack_int *info ); +void LAPACK_dsygvx( lapack_int* itype, char* jobz, char* range, char* uplo, + lapack_int* n, double* a, lapack_int* lda, double* b, + lapack_int* ldb, double* vl, double* vu, lapack_int* il, + lapack_int* iu, double* abstol, lapack_int* m, double* w, + double* z, lapack_int* ldz, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* ifail, lapack_int *info ); +void LAPACK_chegvx( lapack_int* itype, char* jobz, char* range, char* uplo, + lapack_int* n, lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, float* vl, + float* vu, lapack_int* il, lapack_int* iu, float* abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int* ldz, lapack_complex_float* work, + lapack_int* lwork, float* rwork, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_zhegvx( lapack_int* itype, char* jobz, char* range, char* uplo, + lapack_int* n, lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, double* vl, + double* vu, lapack_int* il, lapack_int* iu, double* abstol, + lapack_int* m, double* w, lapack_complex_double* z, + lapack_int* ldz, lapack_complex_double* work, + lapack_int* lwork, double* rwork, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_sspgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + float* ap, float* bp, float* w, float* z, lapack_int* ldz, + float* work, lapack_int *info ); +void LAPACK_dspgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + double* ap, double* bp, double* w, double* z, + lapack_int* ldz, double* work, lapack_int *info ); +void LAPACK_chpgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + lapack_complex_float* ap, lapack_complex_float* bp, float* w, + lapack_complex_float* z, lapack_int* ldz, + lapack_complex_float* work, float* rwork, lapack_int *info ); +void LAPACK_zhpgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + lapack_complex_double* ap, lapack_complex_double* bp, + double* w, lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_sspgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + float* ap, float* bp, float* w, float* z, lapack_int* ldz, + float* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_dspgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + double* ap, double* bp, double* w, double* z, + lapack_int* ldz, double* work, lapack_int* lwork, + lapack_int* iwork, lapack_int* liwork, lapack_int *info ); +void LAPACK_chpgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + lapack_complex_float* ap, lapack_complex_float* bp, + float* w, lapack_complex_float* z, lapack_int* ldz, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_zhpgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, + lapack_complex_double* ap, lapack_complex_double* bp, + double* w, lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* lrwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_sspgvx( lapack_int* itype, char* jobz, char* range, char* uplo, + lapack_int* n, float* ap, float* bp, float* vl, float* vu, + lapack_int* il, lapack_int* iu, float* abstol, + lapack_int* m, float* w, float* z, lapack_int* ldz, + float* work, lapack_int* iwork, lapack_int* ifail, + lapack_int *info ); +void LAPACK_dspgvx( lapack_int* itype, char* jobz, char* range, char* uplo, + lapack_int* n, double* ap, double* bp, double* vl, + double* vu, lapack_int* il, lapack_int* iu, double* abstol, + lapack_int* m, double* w, double* z, lapack_int* ldz, + double* work, lapack_int* iwork, lapack_int* ifail, + lapack_int *info ); +void LAPACK_chpgvx( lapack_int* itype, char* jobz, char* range, char* uplo, + lapack_int* n, lapack_complex_float* ap, + lapack_complex_float* bp, float* vl, float* vu, + lapack_int* il, lapack_int* iu, float* abstol, + lapack_int* m, float* w, lapack_complex_float* z, + lapack_int* ldz, lapack_complex_float* work, float* rwork, + lapack_int* iwork, lapack_int* ifail, lapack_int *info ); +void LAPACK_zhpgvx( lapack_int* itype, char* jobz, char* range, char* uplo, + lapack_int* n, lapack_complex_double* ap, + lapack_complex_double* bp, double* vl, double* vu, + lapack_int* il, lapack_int* iu, double* abstol, + lapack_int* m, double* w, lapack_complex_double* z, + lapack_int* ldz, lapack_complex_double* work, double* rwork, + lapack_int* iwork, lapack_int* ifail, lapack_int *info ); +void LAPACK_ssbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, float* ab, lapack_int* ldab, float* bb, + lapack_int* ldbb, float* w, float* z, lapack_int* ldz, + float* work, lapack_int *info ); +void LAPACK_dsbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, double* ab, lapack_int* ldab, double* bb, + lapack_int* ldbb, double* w, double* z, lapack_int* ldz, + double* work, lapack_int *info ); +void LAPACK_chbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab, + lapack_complex_float* bb, lapack_int* ldbb, float* w, + lapack_complex_float* z, lapack_int* ldz, + lapack_complex_float* work, float* rwork, lapack_int *info ); +void LAPACK_zhbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab, + lapack_complex_double* bb, lapack_int* ldbb, double* w, + lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, double* rwork, + lapack_int *info ); +void LAPACK_ssbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, float* ab, lapack_int* ldab, float* bb, + lapack_int* ldbb, float* w, float* z, lapack_int* ldz, + float* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_dsbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, double* ab, lapack_int* ldab, double* bb, + lapack_int* ldbb, double* w, double* z, lapack_int* ldz, + double* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_chbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab, + lapack_complex_float* bb, lapack_int* ldbb, float* w, + lapack_complex_float* z, lapack_int* ldz, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, + lapack_int *info ); +void LAPACK_zhbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, + lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab, + lapack_complex_double* bb, lapack_int* ldbb, double* w, + lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* lrwork, lapack_int* iwork, + lapack_int* liwork, lapack_int *info ); +void LAPACK_ssbgvx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_int* ka, lapack_int* kb, float* ab, lapack_int* ldab, + float* bb, lapack_int* ldbb, float* q, lapack_int* ldq, + float* vl, float* vu, lapack_int* il, lapack_int* iu, + float* abstol, lapack_int* m, float* w, float* z, + lapack_int* ldz, float* work, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_dsbgvx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_int* ka, lapack_int* kb, double* ab, + lapack_int* ldab, double* bb, lapack_int* ldbb, double* q, + lapack_int* ldq, double* vl, double* vu, lapack_int* il, + lapack_int* iu, double* abstol, lapack_int* m, double* w, + double* z, lapack_int* ldz, double* work, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_chbgvx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_int* ka, lapack_int* kb, lapack_complex_float* ab, + lapack_int* ldab, lapack_complex_float* bb, + lapack_int* ldbb, lapack_complex_float* q, lapack_int* ldq, + float* vl, float* vu, lapack_int* il, lapack_int* iu, + float* abstol, lapack_int* m, float* w, + lapack_complex_float* z, lapack_int* ldz, + lapack_complex_float* work, float* rwork, lapack_int* iwork, + lapack_int* ifail, lapack_int *info ); +void LAPACK_zhbgvx( char* jobz, char* range, char* uplo, lapack_int* n, + lapack_int* ka, lapack_int* kb, lapack_complex_double* ab, + lapack_int* ldab, lapack_complex_double* bb, + lapack_int* ldbb, lapack_complex_double* q, lapack_int* ldq, + double* vl, double* vu, lapack_int* il, lapack_int* iu, + double* abstol, lapack_int* m, double* w, + lapack_complex_double* z, lapack_int* ldz, + lapack_complex_double* work, double* rwork, + lapack_int* iwork, lapack_int* ifail, lapack_int *info ); +void LAPACK_sgges( char* jobvsl, char* jobvsr, char* sort, + LAPACK_S_SELECT3 selctg, lapack_int* n, float* a, + lapack_int* lda, float* b, lapack_int* ldb, lapack_int* sdim, + float* alphar, float* alphai, float* beta, float* vsl, + lapack_int* ldvsl, float* vsr, lapack_int* ldvsr, + float* work, lapack_int* lwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_dgges( char* jobvsl, char* jobvsr, char* sort, + LAPACK_D_SELECT3 selctg, lapack_int* n, double* a, + lapack_int* lda, double* b, lapack_int* ldb, + lapack_int* sdim, double* alphar, double* alphai, + double* beta, double* vsl, lapack_int* ldvsl, double* vsr, + lapack_int* ldvsr, double* work, lapack_int* lwork, + lapack_logical* bwork, lapack_int *info ); +void LAPACK_cgges( char* jobvsl, char* jobvsr, char* sort, + LAPACK_C_SELECT2 selctg, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, lapack_int* sdim, + lapack_complex_float* alpha, lapack_complex_float* beta, + lapack_complex_float* vsl, lapack_int* ldvsl, + lapack_complex_float* vsr, lapack_int* ldvsr, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_logical* bwork, lapack_int *info ); +void LAPACK_zgges( char* jobvsl, char* jobvsr, char* sort, + LAPACK_Z_SELECT2 selctg, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, lapack_int* sdim, + lapack_complex_double* alpha, lapack_complex_double* beta, + lapack_complex_double* vsl, lapack_int* ldvsl, + lapack_complex_double* vsr, lapack_int* ldvsr, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_logical* bwork, lapack_int *info ); +void LAPACK_sggesx( char* jobvsl, char* jobvsr, char* sort, + LAPACK_S_SELECT3 selctg, char* sense, lapack_int* n, + float* a, lapack_int* lda, float* b, lapack_int* ldb, + lapack_int* sdim, float* alphar, float* alphai, float* beta, + float* vsl, lapack_int* ldvsl, float* vsr, + lapack_int* ldvsr, float* rconde, float* rcondv, + float* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_dggesx( char* jobvsl, char* jobvsr, char* sort, + LAPACK_D_SELECT3 selctg, char* sense, lapack_int* n, + double* a, lapack_int* lda, double* b, lapack_int* ldb, + lapack_int* sdim, double* alphar, double* alphai, + double* beta, double* vsl, lapack_int* ldvsl, double* vsr, + lapack_int* ldvsr, double* rconde, double* rcondv, + double* work, lapack_int* lwork, lapack_int* iwork, + lapack_int* liwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_cggesx( char* jobvsl, char* jobvsr, char* sort, + LAPACK_C_SELECT2 selctg, char* sense, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, lapack_int* sdim, + lapack_complex_float* alpha, lapack_complex_float* beta, + lapack_complex_float* vsl, lapack_int* ldvsl, + lapack_complex_float* vsr, lapack_int* ldvsr, float* rconde, + float* rcondv, lapack_complex_float* work, + lapack_int* lwork, float* rwork, lapack_int* iwork, + lapack_int* liwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_zggesx( char* jobvsl, char* jobvsr, char* sort, + LAPACK_Z_SELECT2 selctg, char* sense, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, lapack_int* sdim, + lapack_complex_double* alpha, lapack_complex_double* beta, + lapack_complex_double* vsl, lapack_int* ldvsl, + lapack_complex_double* vsr, lapack_int* ldvsr, + double* rconde, double* rcondv, lapack_complex_double* work, + lapack_int* lwork, double* rwork, lapack_int* iwork, + lapack_int* liwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_sggev( char* jobvl, char* jobvr, lapack_int* n, float* a, + lapack_int* lda, float* b, lapack_int* ldb, float* alphar, + float* alphai, float* beta, float* vl, lapack_int* ldvl, + float* vr, lapack_int* ldvr, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dggev( char* jobvl, char* jobvr, lapack_int* n, double* a, + lapack_int* lda, double* b, lapack_int* ldb, double* alphar, + double* alphai, double* beta, double* vl, lapack_int* ldvl, + double* vr, lapack_int* ldvr, double* work, + lapack_int* lwork, lapack_int *info ); +void LAPACK_cggev( char* jobvl, char* jobvr, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* alpha, lapack_complex_float* beta, + lapack_complex_float* vl, lapack_int* ldvl, + lapack_complex_float* vr, lapack_int* ldvr, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int *info ); +void LAPACK_zggev( char* jobvl, char* jobvr, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* alpha, lapack_complex_double* beta, + lapack_complex_double* vl, lapack_int* ldvl, + lapack_complex_double* vr, lapack_int* ldvr, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int *info ); +void LAPACK_sggevx( char* balanc, char* jobvl, char* jobvr, char* sense, + lapack_int* n, float* a, lapack_int* lda, float* b, + lapack_int* ldb, float* alphar, float* alphai, float* beta, + float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr, + lapack_int* ilo, lapack_int* ihi, float* lscale, + float* rscale, float* abnrm, float* bbnrm, float* rconde, + float* rcondv, float* work, lapack_int* lwork, + lapack_int* iwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_dggevx( char* balanc, char* jobvl, char* jobvr, char* sense, + lapack_int* n, double* a, lapack_int* lda, double* b, + lapack_int* ldb, double* alphar, double* alphai, + double* beta, double* vl, lapack_int* ldvl, double* vr, + lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, + double* lscale, double* rscale, double* abnrm, + double* bbnrm, double* rconde, double* rcondv, double* work, + lapack_int* lwork, lapack_int* iwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_cggevx( char* balanc, char* jobvl, char* jobvr, char* sense, + lapack_int* n, lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* alpha, lapack_complex_float* beta, + lapack_complex_float* vl, lapack_int* ldvl, + lapack_complex_float* vr, lapack_int* ldvr, lapack_int* ilo, + lapack_int* ihi, float* lscale, float* rscale, float* abnrm, + float* bbnrm, float* rconde, float* rcondv, + lapack_complex_float* work, lapack_int* lwork, float* rwork, + lapack_int* iwork, lapack_logical* bwork, + lapack_int *info ); +void LAPACK_zggevx( char* balanc, char* jobvl, char* jobvr, char* sense, + lapack_int* n, lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* alpha, lapack_complex_double* beta, + lapack_complex_double* vl, lapack_int* ldvl, + lapack_complex_double* vr, lapack_int* ldvr, + lapack_int* ilo, lapack_int* ihi, double* lscale, + double* rscale, double* abnrm, double* bbnrm, + double* rconde, double* rcondv, lapack_complex_double* work, + lapack_int* lwork, double* rwork, lapack_int* iwork, + lapack_logical* bwork, lapack_int *info ); +void LAPACK_dsfrk( char* transr, char* uplo, char* trans, lapack_int* n, + lapack_int* k, double* alpha, const double* a, + lapack_int* lda, double* beta, double* c ); +void LAPACK_ssfrk( char* transr, char* uplo, char* trans, lapack_int* n, + lapack_int* k, float* alpha, const float* a, lapack_int* lda, + float* beta, float* c ); +void LAPACK_zhfrk( char* transr, char* uplo, char* trans, lapack_int* n, + lapack_int* k, double* alpha, const lapack_complex_double* a, + lapack_int* lda, double* beta, lapack_complex_double* c ); +void LAPACK_chfrk( char* transr, char* uplo, char* trans, lapack_int* n, + lapack_int* k, float* alpha, const lapack_complex_float* a, + lapack_int* lda, float* beta, lapack_complex_float* c ); +void LAPACK_dtfsm( char* transr, char* side, char* uplo, char* trans, + char* diag, lapack_int* m, lapack_int* n, double* alpha, + const double* a, double* b, lapack_int* ldb ); +void LAPACK_stfsm( char* transr, char* side, char* uplo, char* trans, + char* diag, lapack_int* m, lapack_int* n, float* alpha, + const float* a, float* b, lapack_int* ldb ); +void LAPACK_ztfsm( char* transr, char* side, char* uplo, char* trans, + char* diag, lapack_int* m, lapack_int* n, + lapack_complex_double* alpha, const lapack_complex_double* a, + lapack_complex_double* b, lapack_int* ldb ); +void LAPACK_ctfsm( char* transr, char* side, char* uplo, char* trans, + char* diag, lapack_int* m, lapack_int* n, + lapack_complex_float* alpha, const lapack_complex_float* a, + lapack_complex_float* b, lapack_int* ldb ); +void LAPACK_dtfttp( char* transr, char* uplo, lapack_int* n, const double* arf, + double* ap, lapack_int *info ); +void LAPACK_stfttp( char* transr, char* uplo, lapack_int* n, const float* arf, + float* ap, lapack_int *info ); +void LAPACK_ztfttp( char* transr, char* uplo, lapack_int* n, + const lapack_complex_double* arf, lapack_complex_double* ap, + lapack_int *info ); +void LAPACK_ctfttp( char* transr, char* uplo, lapack_int* n, + const lapack_complex_float* arf, lapack_complex_float* ap, + lapack_int *info ); +void LAPACK_dtfttr( char* transr, char* uplo, lapack_int* n, const double* arf, + double* a, lapack_int* lda, lapack_int *info ); +void LAPACK_stfttr( char* transr, char* uplo, lapack_int* n, const float* arf, + float* a, lapack_int* lda, lapack_int *info ); +void LAPACK_ztfttr( char* transr, char* uplo, lapack_int* n, + const lapack_complex_double* arf, lapack_complex_double* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_ctfttr( char* transr, char* uplo, lapack_int* n, + const lapack_complex_float* arf, lapack_complex_float* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_dtpttf( char* transr, char* uplo, lapack_int* n, const double* ap, + double* arf, lapack_int *info ); +void LAPACK_stpttf( char* transr, char* uplo, lapack_int* n, const float* ap, + float* arf, lapack_int *info ); +void LAPACK_ztpttf( char* transr, char* uplo, lapack_int* n, + const lapack_complex_double* ap, lapack_complex_double* arf, + lapack_int *info ); +void LAPACK_ctpttf( char* transr, char* uplo, lapack_int* n, + const lapack_complex_float* ap, lapack_complex_float* arf, + lapack_int *info ); +void LAPACK_dtpttr( char* uplo, lapack_int* n, const double* ap, double* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_stpttr( char* uplo, lapack_int* n, const float* ap, float* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_ztpttr( char* uplo, lapack_int* n, const lapack_complex_double* ap, + lapack_complex_double* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_ctpttr( char* uplo, lapack_int* n, const lapack_complex_float* ap, + lapack_complex_float* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_dtrttf( char* transr, char* uplo, lapack_int* n, const double* a, + lapack_int* lda, double* arf, lapack_int *info ); +void LAPACK_strttf( char* transr, char* uplo, lapack_int* n, const float* a, + lapack_int* lda, float* arf, lapack_int *info ); +void LAPACK_ztrttf( char* transr, char* uplo, lapack_int* n, + const lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* arf, lapack_int *info ); +void LAPACK_ctrttf( char* transr, char* uplo, lapack_int* n, + const lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* arf, lapack_int *info ); +void LAPACK_dtrttp( char* uplo, lapack_int* n, const double* a, lapack_int* lda, + double* ap, lapack_int *info ); +void LAPACK_strttp( char* uplo, lapack_int* n, const float* a, lapack_int* lda, + float* ap, lapack_int *info ); +void LAPACK_ztrttp( char* uplo, lapack_int* n, const lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* ap, + lapack_int *info ); +void LAPACK_ctrttp( char* uplo, lapack_int* n, const lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* ap, + lapack_int *info ); +void LAPACK_sgeqrfp( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* tau, float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_dgeqrfp( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* tau, double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_cgeqrfp( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_zgeqrfp( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* lwork, + lapack_int *info ); +void LAPACK_clacgv( lapack_int* n, lapack_complex_float* x, lapack_int* incx ); +void LAPACK_zlacgv( lapack_int* n, lapack_complex_double* x, lapack_int* incx ); +void LAPACK_slarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n, + float* x ); +void LAPACK_dlarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n, + double* x ); +void LAPACK_clarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n, + lapack_complex_float* x ); +void LAPACK_zlarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n, + lapack_complex_double* x ); +void LAPACK_sgeqr2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* tau, float* work, lapack_int *info ); +void LAPACK_dgeqr2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* tau, double* work, lapack_int *info ); +void LAPACK_cgeqr2( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zgeqr2( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_slacpy( char* uplo, lapack_int* m, lapack_int* n, const float* a, + lapack_int* lda, float* b, lapack_int* ldb ); +void LAPACK_dlacpy( char* uplo, lapack_int* m, lapack_int* n, const double* a, + lapack_int* lda, double* b, lapack_int* ldb ); +void LAPACK_clacpy( char* uplo, lapack_int* m, lapack_int* n, + const lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb ); +void LAPACK_zlacpy( char* uplo, lapack_int* m, lapack_int* n, + const lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb ); +void LAPACK_sgetf2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_dgetf2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + lapack_int* ipiv, lapack_int *info ); +void LAPACK_cgetf2( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int* ipiv, lapack_int *info ); +void LAPACK_zgetf2( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int* ipiv, lapack_int *info ); +void LAPACK_slaswp( lapack_int* n, float* a, lapack_int* lda, lapack_int* k1, + lapack_int* k2, const lapack_int* ipiv, lapack_int* incx ); +void LAPACK_dlaswp( lapack_int* n, double* a, lapack_int* lda, lapack_int* k1, + lapack_int* k2, const lapack_int* ipiv, lapack_int* incx ); +void LAPACK_claswp( lapack_int* n, lapack_complex_float* a, lapack_int* lda, + lapack_int* k1, lapack_int* k2, const lapack_int* ipiv, + lapack_int* incx ); +void LAPACK_zlaswp( lapack_int* n, lapack_complex_double* a, lapack_int* lda, + lapack_int* k1, lapack_int* k2, const lapack_int* ipiv, + lapack_int* incx ); +float LAPACK_slange( char* norm, lapack_int* m, lapack_int* n, const float* a, + lapack_int* lda, float* work ); +double LAPACK_dlange( char* norm, lapack_int* m, lapack_int* n, const double* a, + lapack_int* lda, double* work ); +float LAPACK_clange( char* norm, lapack_int* m, lapack_int* n, + const lapack_complex_float* a, lapack_int* lda, float* work ); +double LAPACK_zlange( char* norm, lapack_int* m, lapack_int* n, + const lapack_complex_double* a, lapack_int* lda, double* work ); +float LAPACK_clanhe( char* norm, char* uplo, lapack_int* n, + const lapack_complex_float* a, lapack_int* lda, float* work ); +double LAPACK_zlanhe( char* norm, char* uplo, lapack_int* n, + const lapack_complex_double* a, lapack_int* lda, double* work ); +float LAPACK_slansy( char* norm, char* uplo, lapack_int* n, const float* a, + lapack_int* lda, float* work ); +double LAPACK_dlansy( char* norm, char* uplo, lapack_int* n, const double* a, + lapack_int* lda, double* work ); +float LAPACK_clansy( char* norm, char* uplo, lapack_int* n, + const lapack_complex_float* a, lapack_int* lda, float* work ); +double LAPACK_zlansy( char* norm, char* uplo, lapack_int* n, + const lapack_complex_double* a, lapack_int* lda, double* work ); +float LAPACK_slantr( char* norm, char* uplo, char* diag, lapack_int* m, + lapack_int* n, const float* a, lapack_int* lda, float* work ); +double LAPACK_dlantr( char* norm, char* uplo, char* diag, lapack_int* m, + lapack_int* n, const double* a, lapack_int* lda, double* work ); +float LAPACK_clantr( char* norm, char* uplo, char* diag, lapack_int* m, + lapack_int* n, const lapack_complex_float* a, lapack_int* lda, + float* work ); +double LAPACK_zlantr( char* norm, char* uplo, char* diag, lapack_int* m, + lapack_int* n, const lapack_complex_double* a, lapack_int* lda, + double* work ); +float LAPACK_slamch( char* cmach ); +double LAPACK_dlamch( char* cmach ); +void LAPACK_sgelq2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* tau, float* work, lapack_int *info ); +void LAPACK_dgelq2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* tau, double* work, lapack_int *info ); +void LAPACK_cgelq2( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zgelq2( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_slarfb( char* side, char* trans, char* direct, char* storev, + lapack_int* m, lapack_int* n, lapack_int* k, const float* v, + lapack_int* ldv, const float* t, lapack_int* ldt, float* c, + lapack_int* ldc, float* work, lapack_int* ldwork ); +void LAPACK_dlarfb( char* side, char* trans, char* direct, char* storev, + lapack_int* m, lapack_int* n, lapack_int* k, + const double* v, lapack_int* ldv, const double* t, + lapack_int* ldt, double* c, lapack_int* ldc, double* work, + lapack_int* ldwork ); +void LAPACK_clarfb( char* side, char* trans, char* direct, char* storev, + lapack_int* m, lapack_int* n, lapack_int* k, + const lapack_complex_float* v, lapack_int* ldv, + const lapack_complex_float* t, lapack_int* ldt, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work, lapack_int* ldwork ); +void LAPACK_zlarfb( char* side, char* trans, char* direct, char* storev, + lapack_int* m, lapack_int* n, lapack_int* k, + const lapack_complex_double* v, lapack_int* ldv, + const lapack_complex_double* t, lapack_int* ldt, + lapack_complex_double* c, lapack_int* ldc, + lapack_complex_double* work, lapack_int* ldwork ); +void LAPACK_slarfg( lapack_int* n, float* alpha, float* x, lapack_int* incx, + float* tau ); +void LAPACK_dlarfg( lapack_int* n, double* alpha, double* x, lapack_int* incx, + double* tau ); +void LAPACK_clarfg( lapack_int* n, lapack_complex_float* alpha, + lapack_complex_float* x, lapack_int* incx, + lapack_complex_float* tau ); +void LAPACK_zlarfg( lapack_int* n, lapack_complex_double* alpha, + lapack_complex_double* x, lapack_int* incx, + lapack_complex_double* tau ); +void LAPACK_slarft( char* direct, char* storev, lapack_int* n, lapack_int* k, + const float* v, lapack_int* ldv, const float* tau, float* t, + lapack_int* ldt ); +void LAPACK_dlarft( char* direct, char* storev, lapack_int* n, lapack_int* k, + const double* v, lapack_int* ldv, const double* tau, + double* t, lapack_int* ldt ); +void LAPACK_clarft( char* direct, char* storev, lapack_int* n, lapack_int* k, + const lapack_complex_float* v, lapack_int* ldv, + const lapack_complex_float* tau, lapack_complex_float* t, + lapack_int* ldt ); +void LAPACK_zlarft( char* direct, char* storev, lapack_int* n, lapack_int* k, + const lapack_complex_double* v, lapack_int* ldv, + const lapack_complex_double* tau, lapack_complex_double* t, + lapack_int* ldt ); +void LAPACK_slarfx( char* side, lapack_int* m, lapack_int* n, const float* v, + float* tau, float* c, lapack_int* ldc, float* work ); +void LAPACK_dlarfx( char* side, lapack_int* m, lapack_int* n, const double* v, + double* tau, double* c, lapack_int* ldc, double* work ); +void LAPACK_clarfx( char* side, lapack_int* m, lapack_int* n, + const lapack_complex_float* v, lapack_complex_float* tau, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work ); +void LAPACK_zlarfx( char* side, lapack_int* m, lapack_int* n, + const lapack_complex_double* v, lapack_complex_double* tau, + lapack_complex_double* c, lapack_int* ldc, + lapack_complex_double* work ); +void LAPACK_slatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed, + char* sym, float* d, lapack_int* mode, float* cond, + float* dmax, lapack_int* kl, lapack_int* ku, char* pack, + float* a, lapack_int* lda, float* work, lapack_int *info ); +void LAPACK_dlatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed, + char* sym, double* d, lapack_int* mode, double* cond, + double* dmax, lapack_int* kl, lapack_int* ku, char* pack, + double* a, lapack_int* lda, double* work, + lapack_int *info ); +void LAPACK_clatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed, + char* sym, float* d, lapack_int* mode, float* cond, + float* dmax, lapack_int* kl, lapack_int* ku, char* pack, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zlatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed, + char* sym, double* d, lapack_int* mode, double* cond, + double* dmax, lapack_int* kl, lapack_int* ku, char* pack, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_slag2d( lapack_int* m, lapack_int* n, const float* sa, + lapack_int* ldsa, double* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_dlag2s( lapack_int* m, lapack_int* n, const double* a, + lapack_int* lda, float* sa, lapack_int* ldsa, + lapack_int *info ); +void LAPACK_clag2z( lapack_int* m, lapack_int* n, + const lapack_complex_float* sa, lapack_int* ldsa, + lapack_complex_double* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_zlag2c( lapack_int* m, lapack_int* n, + const lapack_complex_double* a, lapack_int* lda, + lapack_complex_float* sa, lapack_int* ldsa, + lapack_int *info ); +void LAPACK_slauum( char* uplo, lapack_int* n, float* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_dlauum( char* uplo, lapack_int* n, double* a, lapack_int* lda, + lapack_int *info ); +void LAPACK_clauum( char* uplo, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_zlauum( char* uplo, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_int *info ); +void LAPACK_slagge( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const float* d, float* a, lapack_int* lda, + lapack_int* iseed, float* work, lapack_int *info ); +void LAPACK_dlagge( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const double* d, double* a, lapack_int* lda, + lapack_int* iseed, double* work, lapack_int *info ); +void LAPACK_clagge( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const float* d, lapack_complex_float* a, + lapack_int* lda, lapack_int* iseed, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zlagge( lapack_int* m, lapack_int* n, lapack_int* kl, + lapack_int* ku, const double* d, lapack_complex_double* a, + lapack_int* lda, lapack_int* iseed, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_slaset( char* uplo, lapack_int* m, lapack_int* n, float* alpha, + float* beta, float* a, lapack_int* lda ); +void LAPACK_dlaset( char* uplo, lapack_int* m, lapack_int* n, double* alpha, + double* beta, double* a, lapack_int* lda ); +void LAPACK_claset( char* uplo, lapack_int* m, lapack_int* n, + lapack_complex_float* alpha, lapack_complex_float* beta, + lapack_complex_float* a, lapack_int* lda ); +void LAPACK_zlaset( char* uplo, lapack_int* m, lapack_int* n, + lapack_complex_double* alpha, lapack_complex_double* beta, + lapack_complex_double* a, lapack_int* lda ); +void LAPACK_slasrt( char* id, lapack_int* n, float* d, lapack_int *info ); +void LAPACK_dlasrt( char* id, lapack_int* n, double* d, lapack_int *info ); +void LAPACK_claghe( lapack_int* n, lapack_int* k, const float* d, + lapack_complex_float* a, lapack_int* lda, lapack_int* iseed, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zlaghe( lapack_int* n, lapack_int* k, const double* d, + lapack_complex_double* a, lapack_int* lda, + lapack_int* iseed, lapack_complex_double* work, + lapack_int *info ); +void LAPACK_slagsy( lapack_int* n, lapack_int* k, const float* d, float* a, + lapack_int* lda, lapack_int* iseed, float* work, + lapack_int *info ); +void LAPACK_dlagsy( lapack_int* n, lapack_int* k, const double* d, double* a, + lapack_int* lda, lapack_int* iseed, double* work, + lapack_int *info ); +void LAPACK_clagsy( lapack_int* n, lapack_int* k, const float* d, + lapack_complex_float* a, lapack_int* lda, lapack_int* iseed, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zlagsy( lapack_int* n, lapack_int* k, const double* d, + lapack_complex_double* a, lapack_int* lda, + lapack_int* iseed, lapack_complex_double* work, + lapack_int *info ); +void LAPACK_slapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n, + float* x, lapack_int* ldx, lapack_int* k ); +void LAPACK_dlapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n, + double* x, lapack_int* ldx, lapack_int* k ); +void LAPACK_clapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n, + lapack_complex_float* x, lapack_int* ldx, lapack_int* k ); +void LAPACK_zlapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n, + lapack_complex_double* x, lapack_int* ldx, lapack_int* k ); +float LAPACK_slapy2( float* x, float* y ); +double LAPACK_dlapy2( double* x, double* y ); +float LAPACK_slapy3( float* x, float* y, float* z ); +double LAPACK_dlapy3( double* x, double* y, double* z ); +void LAPACK_slartgp( float* f, float* g, float* cs, float* sn, float* r ); +void LAPACK_dlartgp( double* f, double* g, double* cs, double* sn, double* r ); +void LAPACK_slartgs( float* x, float* y, float* sigma, float* cs, float* sn ); +void LAPACK_dlartgs( double* x, double* y, double* sigma, double* cs, + double* sn ); +// LAPACK 3.3.0 +void LAPACK_cbbcsd( char* jobu1, char* jobu2, + char* jobv1t, char* jobv2t, char* trans, + lapack_int* m, lapack_int* p, lapack_int* q, + float* theta, float* phi, + lapack_complex_float* u1, lapack_int* ldu1, + lapack_complex_float* u2, lapack_int* ldu2, + lapack_complex_float* v1t, lapack_int* ldv1t, + lapack_complex_float* v2t, lapack_int* ldv2t, + float* b11d, float* b11e, float* b12d, + float* b12e, float* b21d, float* b21e, + float* b22d, float* b22e, float* rwork, + lapack_int* lrwork , lapack_int *info ); +void LAPACK_cheswapr( char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* i1, + lapack_int* i2 ); +void LAPACK_chetri2( char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int* lwork , lapack_int *info ); +void LAPACK_chetri2x( char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int* nb , lapack_int *info ); +void LAPACK_chetrs2( char* uplo, lapack_int* n, + lapack_int* nrhs, const lapack_complex_float* a, + lapack_int* lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* work , lapack_int *info ); +void LAPACK_csyconv( char* uplo, char* way, + lapack_int* n, lapack_complex_float* a, + lapack_int* lda, const lapack_int* ipiv, + lapack_complex_float* work , lapack_int *info ); +void LAPACK_csyswapr( char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* i1, + lapack_int* i2 ); +void LAPACK_csytri2( char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int* lwork , lapack_int *info ); +void LAPACK_csytri2x( char* uplo, lapack_int* n, + lapack_complex_float* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int* nb , lapack_int *info ); +void LAPACK_csytrs2( char* uplo, lapack_int* n, + lapack_int* nrhs, const lapack_complex_float* a, + lapack_int* lda, const lapack_int* ipiv, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* work , lapack_int *info ); +void LAPACK_cunbdb( char* trans, char* signs, + lapack_int* m, lapack_int* p, lapack_int* q, + lapack_complex_float* x11, lapack_int* ldx11, + lapack_complex_float* x12, lapack_int* ldx12, + lapack_complex_float* x21, lapack_int* ldx21, + lapack_complex_float* x22, lapack_int* ldx22, + float* theta, float* phi, + lapack_complex_float* taup1, + lapack_complex_float* taup2, + lapack_complex_float* tauq1, + lapack_complex_float* tauq2, + lapack_complex_float* work, lapack_int* lwork , lapack_int *info ); +void LAPACK_cuncsd( char* jobu1, char* jobu2, + char* jobv1t, char* jobv2t, char* trans, + char* signs, lapack_int* m, lapack_int* p, + lapack_int* q, lapack_complex_float* x11, + lapack_int* ldx11, lapack_complex_float* x12, + lapack_int* ldx12, lapack_complex_float* x21, + lapack_int* ldx21, lapack_complex_float* x22, + lapack_int* ldx22, float* theta, + lapack_complex_float* u1, lapack_int* ldu1, + lapack_complex_float* u2, lapack_int* ldu2, + lapack_complex_float* v1t, lapack_int* ldv1t, + lapack_complex_float* v2t, lapack_int* ldv2t, + lapack_complex_float* work, lapack_int* lwork, + float* rwork, lapack_int* lrwork, + lapack_int* iwork , lapack_int *info ); +void LAPACK_dbbcsd( char* jobu1, char* jobu2, + char* jobv1t, char* jobv2t, char* trans, + lapack_int* m, lapack_int* p, lapack_int* q, + double* theta, double* phi, double* u1, + lapack_int* ldu1, double* u2, lapack_int* ldu2, + double* v1t, lapack_int* ldv1t, double* v2t, + lapack_int* ldv2t, double* b11d, double* b11e, + double* b12d, double* b12e, double* b21d, + double* b21e, double* b22d, double* b22e, + double* work, lapack_int* lwork , lapack_int *info ); +void LAPACK_dorbdb( char* trans, char* signs, + lapack_int* m, lapack_int* p, lapack_int* q, + double* x11, lapack_int* ldx11, double* x12, + lapack_int* ldx12, double* x21, lapack_int* ldx21, + double* x22, lapack_int* ldx22, double* theta, + double* phi, double* taup1, double* taup2, + double* tauq1, double* tauq2, double* work, + lapack_int* lwork , lapack_int *info ); +void LAPACK_dorcsd( char* jobu1, char* jobu2, + char* jobv1t, char* jobv2t, char* trans, + char* signs, lapack_int* m, lapack_int* p, + lapack_int* q, double* x11, lapack_int* ldx11, + double* x12, lapack_int* ldx12, double* x21, + lapack_int* ldx21, double* x22, lapack_int* ldx22, + double* theta, double* u1, lapack_int* ldu1, + double* u2, lapack_int* ldu2, double* v1t, + lapack_int* ldv1t, double* v2t, lapack_int* ldv2t, + double* work, lapack_int* lwork, + lapack_int* iwork , lapack_int *info ); +void LAPACK_dsyconv( char* uplo, char* way, + lapack_int* n, double* a, lapack_int* lda, + const lapack_int* ipiv, double* work , lapack_int *info ); +void LAPACK_dsyswapr( char* uplo, lapack_int* n, + double* a, lapack_int* i1, lapack_int* i2 ); +void LAPACK_dsytri2( char* uplo, lapack_int* n, + double* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int* lwork , lapack_int *info ); +void LAPACK_dsytri2x( char* uplo, lapack_int* n, + double* a, lapack_int* lda, + const lapack_int* ipiv, double* work, + lapack_int* nb , lapack_int *info ); +void LAPACK_dsytrs2( char* uplo, lapack_int* n, + lapack_int* nrhs, const double* a, + lapack_int* lda, const lapack_int* ipiv, + double* b, lapack_int* ldb, double* work , lapack_int *info ); +void LAPACK_sbbcsd( char* jobu1, char* jobu2, + char* jobv1t, char* jobv2t, char* trans, + lapack_int* m, lapack_int* p, lapack_int* q, + float* theta, float* phi, float* u1, + lapack_int* ldu1, float* u2, lapack_int* ldu2, + float* v1t, lapack_int* ldv1t, float* v2t, + lapack_int* ldv2t, float* b11d, float* b11e, + float* b12d, float* b12e, float* b21d, + float* b21e, float* b22d, float* b22e, + float* work, lapack_int* lwork , lapack_int *info ); +void LAPACK_sorbdb( char* trans, char* signs, + lapack_int* m, lapack_int* p, lapack_int* q, + float* x11, lapack_int* ldx11, float* x12, + lapack_int* ldx12, float* x21, lapack_int* ldx21, + float* x22, lapack_int* ldx22, float* theta, + float* phi, float* taup1, float* taup2, + float* tauq1, float* tauq2, float* work, + lapack_int* lwork , lapack_int *info ); +void LAPACK_sorcsd( char* jobu1, char* jobu2, + char* jobv1t, char* jobv2t, char* trans, + char* signs, lapack_int* m, lapack_int* p, + lapack_int* q, float* x11, lapack_int* ldx11, + float* x12, lapack_int* ldx12, float* x21, + lapack_int* ldx21, float* x22, lapack_int* ldx22, + float* theta, float* u1, lapack_int* ldu1, + float* u2, lapack_int* ldu2, float* v1t, + lapack_int* ldv1t, float* v2t, lapack_int* ldv2t, + float* work, lapack_int* lwork, + lapack_int* iwork , lapack_int *info ); +void LAPACK_ssyconv( char* uplo, char* way, + lapack_int* n, float* a, lapack_int* lda, + const lapack_int* ipiv, float* work , lapack_int *info ); +void LAPACK_ssyswapr( char* uplo, lapack_int* n, + float* a, lapack_int* i1, lapack_int* i2 ); +void LAPACK_ssytri2( char* uplo, lapack_int* n, + float* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_float* work, lapack_int* lwork , lapack_int *info ); +void LAPACK_ssytri2x( char* uplo, lapack_int* n, + float* a, lapack_int* lda, + const lapack_int* ipiv, float* work, + lapack_int* nb , lapack_int *info ); +void LAPACK_ssytrs2( char* uplo, lapack_int* n, + lapack_int* nrhs, const float* a, + lapack_int* lda, const lapack_int* ipiv, + float* b, lapack_int* ldb, float* work , lapack_int *info ); +void LAPACK_zbbcsd( char* jobu1, char* jobu2, + char* jobv1t, char* jobv2t, char* trans, + lapack_int* m, lapack_int* p, lapack_int* q, + double* theta, double* phi, + lapack_complex_double* u1, lapack_int* ldu1, + lapack_complex_double* u2, lapack_int* ldu2, + lapack_complex_double* v1t, lapack_int* ldv1t, + lapack_complex_double* v2t, lapack_int* ldv2t, + double* b11d, double* b11e, double* b12d, + double* b12e, double* b21d, double* b21e, + double* b22d, double* b22e, double* rwork, + lapack_int* lrwork , lapack_int *info ); +void LAPACK_zheswapr( char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* i1, + lapack_int* i2 ); +void LAPACK_zhetri2( char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int* lwork , lapack_int *info ); +void LAPACK_zhetri2x( char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int* nb , lapack_int *info ); +void LAPACK_zhetrs2( char* uplo, lapack_int* n, + lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* work , lapack_int *info ); +void LAPACK_zsyconv( char* uplo, char* way, + lapack_int* n, lapack_complex_double* a, + lapack_int* lda, const lapack_int* ipiv, + lapack_complex_double* work , lapack_int *info ); +void LAPACK_zsyswapr( char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* i1, + lapack_int* i2 ); +void LAPACK_zsytri2( char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int* lwork , lapack_int *info ); +void LAPACK_zsytri2x( char* uplo, lapack_int* n, + lapack_complex_double* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_double* work, lapack_int* nb , lapack_int *info ); +void LAPACK_zsytrs2( char* uplo, lapack_int* n, + lapack_int* nrhs, + const lapack_complex_double* a, lapack_int* lda, + const lapack_int* ipiv, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* work , lapack_int *info ); +void LAPACK_zunbdb( char* trans, char* signs, + lapack_int* m, lapack_int* p, lapack_int* q, + lapack_complex_double* x11, lapack_int* ldx11, + lapack_complex_double* x12, lapack_int* ldx12, + lapack_complex_double* x21, lapack_int* ldx21, + lapack_complex_double* x22, lapack_int* ldx22, + double* theta, double* phi, + lapack_complex_double* taup1, + lapack_complex_double* taup2, + lapack_complex_double* tauq1, + lapack_complex_double* tauq2, + lapack_complex_double* work, lapack_int* lwork , lapack_int *info ); +void LAPACK_zuncsd( char* jobu1, char* jobu2, + char* jobv1t, char* jobv2t, char* trans, + char* signs, lapack_int* m, lapack_int* p, + lapack_int* q, lapack_complex_double* x11, + lapack_int* ldx11, lapack_complex_double* x12, + lapack_int* ldx12, lapack_complex_double* x21, + lapack_int* ldx21, lapack_complex_double* x22, + lapack_int* ldx22, double* theta, + lapack_complex_double* u1, lapack_int* ldu1, + lapack_complex_double* u2, lapack_int* ldu2, + lapack_complex_double* v1t, lapack_int* ldv1t, + lapack_complex_double* v2t, lapack_int* ldv2t, + lapack_complex_double* work, lapack_int* lwork, + double* rwork, lapack_int* lrwork, + lapack_int* iwork , lapack_int *info ); +// LAPACK 3.4.0 +void LAPACK_sgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* nb, const float* v, + lapack_int* ldv, const float* t, lapack_int* ldt, float* c, + lapack_int* ldc, float* work, lapack_int *info ); +void LAPACK_dgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* nb, const double* v, + lapack_int* ldv, const double* t, lapack_int* ldt, + double* c, lapack_int* ldc, double* work, + lapack_int *info ); +void LAPACK_cgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* nb, + const lapack_complex_float* v, lapack_int* ldv, + const lapack_complex_float* t, lapack_int* ldt, + lapack_complex_float* c, lapack_int* ldc, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* nb, + const lapack_complex_double* v, lapack_int* ldv, + const lapack_complex_double* t, lapack_int* ldt, + lapack_complex_double* c, lapack_int* ldc, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_sgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, float* a, + lapack_int* lda, float* t, lapack_int* ldt, float* work, + lapack_int *info ); +void LAPACK_dgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, double* a, + lapack_int* lda, double* t, lapack_int* ldt, double* work, + lapack_int *info ); +void LAPACK_cgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* t, lapack_int* ldt, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_zgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* t, lapack_int* ldt, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_sgeqrt2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* t, lapack_int* ldt, lapack_int *info ); +void LAPACK_dgeqrt2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* t, lapack_int* ldt, lapack_int *info ); +void LAPACK_cgeqrt2( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* t, lapack_int* ldt, + lapack_int *info ); +void LAPACK_zgeqrt2( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* t, lapack_int* ldt, + lapack_int *info ); +void LAPACK_sgeqrt3( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* t, lapack_int* ldt, lapack_int *info ); +void LAPACK_dgeqrt3( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* t, lapack_int* ldt, lapack_int *info ); +void LAPACK_cgeqrt3( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* t, lapack_int* ldt, + lapack_int *info ); +void LAPACK_zgeqrt3( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* t, lapack_int* ldt, + lapack_int *info ); +void LAPACK_stpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* l, lapack_int* nb, + const float* v, lapack_int* ldv, const float* t, + lapack_int* ldt, float* a, lapack_int* lda, float* b, + lapack_int* ldb, float* work, lapack_int *info ); +void LAPACK_dtpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* l, lapack_int* nb, + const double* v, lapack_int* ldv, const double* t, + lapack_int* ldt, double* a, lapack_int* lda, double* b, + lapack_int* ldb, double* work, lapack_int *info ); +void LAPACK_ctpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* l, lapack_int* nb, + const lapack_complex_float* v, lapack_int* ldv, + const lapack_complex_float* t, lapack_int* ldt, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_ztpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n, + lapack_int* k, lapack_int* l, lapack_int* nb, + const lapack_complex_double* v, lapack_int* ldv, + const lapack_complex_double* t, lapack_int* ldt, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_dtpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb, + double* a, lapack_int* lda, double* b, lapack_int* ldb, + double* t, lapack_int* ldt, double* work, + lapack_int *info ); +void LAPACK_ctpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* t, lapack_complex_float* b, + lapack_int* ldb, lapack_int* ldt, + lapack_complex_float* work, lapack_int *info ); +void LAPACK_ztpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* t, lapack_int* ldt, + lapack_complex_double* work, lapack_int *info ); +void LAPACK_stpqrt2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, + float* b, lapack_int* ldb, float* t, lapack_int* ldt, + lapack_int *info ); +void LAPACK_dtpqrt2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, + double* b, lapack_int* ldb, double* t, lapack_int* ldt, + lapack_int *info ); +void LAPACK_ctpqrt2( lapack_int* m, lapack_int* n, lapack_complex_float* a, + lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* t, lapack_int* ldt, + lapack_int *info ); +void LAPACK_ztpqrt2( lapack_int* m, lapack_int* n, lapack_complex_double* a, + lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* t, lapack_int* ldt, + lapack_int *info ); +void LAPACK_stprfb( char* side, char* trans, char* direct, char* storev, + lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, + const float* v, lapack_int* ldv, const float* t, + lapack_int* ldt, float* a, lapack_int* lda, float* b, + lapack_int* ldb, const float* mywork, + lapack_int* myldwork ); +void LAPACK_dtprfb( char* side, char* trans, char* direct, char* storev, + lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, + const double* v, lapack_int* ldv, const double* t, + lapack_int* ldt, double* a, lapack_int* lda, double* b, + lapack_int* ldb, const double* mywork, + lapack_int* myldwork ); +void LAPACK_ctprfb( char* side, char* trans, char* direct, char* storev, + lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, + const lapack_complex_float* v, lapack_int* ldv, + const lapack_complex_float* t, lapack_int* ldt, + lapack_complex_float* a, lapack_int* lda, + lapack_complex_float* b, lapack_int* ldb, + const float* mywork, lapack_int* myldwork ); +void LAPACK_ztprfb( char* side, char* trans, char* direct, char* storev, + lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, + const lapack_complex_double* v, lapack_int* ldv, + const lapack_complex_double* t, lapack_int* ldt, + lapack_complex_double* a, lapack_int* lda, + lapack_complex_double* b, lapack_int* ldb, + const double* mywork, lapack_int* myldwork ); +// LAPACK 3.X.X +void LAPACK_csyr( char* uplo, lapack_int* n, lapack_complex_float* alpha, + const lapack_complex_float* x, lapack_int* incx, + lapack_complex_float* a, lapack_int* lda ); +void LAPACK_zsyr( char* uplo, lapack_int* n, lapack_complex_double* alpha, + const lapack_complex_double* x, lapack_int* incx, + lapack_complex_double* a, lapack_int* lda ); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _LAPACKE_H_ */ + +#endif /* _MKL_LAPACKE_H_ */ diff --git a/Eigen/src/misc/lapacke_mangling.h b/Eigen/src/misc/lapacke_mangling.h new file mode 100644 index 000000000..6211fd144 --- /dev/null +++ b/Eigen/src/misc/lapacke_mangling.h @@ -0,0 +1,17 @@ +#ifndef LAPACK_HEADER_INCLUDED +#define LAPACK_HEADER_INCLUDED + +#ifndef LAPACK_GLOBAL +#if defined(LAPACK_GLOBAL_PATTERN_LC) || defined(ADD_) +#define LAPACK_GLOBAL(lcname,UCNAME) lcname##_ +#elif defined(LAPACK_GLOBAL_PATTERN_UC) || defined(UPPER) +#define LAPACK_GLOBAL(lcname,UCNAME) UCNAME +#elif defined(LAPACK_GLOBAL_PATTERN_MC) || defined(NOCHANGE) +#define LAPACK_GLOBAL(lcname,UCNAME) lcname +#else +#define LAPACK_GLOBAL(lcname,UCNAME) lcname##_ +#endif +#endif + +#endif + diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 5694592d6..62fb303d9 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -1,13 +1,14 @@ + /** \returns an expression of the coefficient wise product of \c *this and \a other * * \sa MatrixBase::cwiseProduct */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived) +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product) operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)(derived(), other.derived()); + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived()); } /** \returns an expression of the coefficient wise quotient of \c *this and \a other @@ -16,10 +17,10 @@ operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise min of \c *this and \a other @@ -29,14 +30,14 @@ operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * * \sa max() */ -EIGEN_MAKE_CWISE_BINARY_OP(min,internal::scalar_min_op) +EIGEN_MAKE_CWISE_BINARY_OP(min,min) /** \returns an expression of the coefficient-wise min of \c *this and scalar \a other * * \sa max() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > #ifdef EIGEN_PARSED_BY_DOXYGEN min @@ -55,14 +56,14 @@ min * * \sa min() */ -EIGEN_MAKE_CWISE_BINARY_OP(max,internal::scalar_max_op) +EIGEN_MAKE_CWISE_BINARY_OP(max,max) /** \returns an expression of the coefficient-wise max of \c *this and scalar \a other * * \sa min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > #ifdef EIGEN_PARSED_BY_DOXYGEN max @@ -81,27 +82,38 @@ max * Example: \include Cwise_array_power_array.cpp * Output: \verbinclude Cwise_array_power_array.out */ -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -const CwiseBinaryOp, const Derived, const ExponentDerived> -pow(const ArrayBase& exponents) const -{ - return CwiseBinaryOp, const Derived, const ExponentDerived>( - this->derived(), - exponents.derived() - ); -} +EIGEN_MAKE_CWISE_BINARY_OP(pow,pow) + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow) +#else +/** \returns an expression of the coefficients of \c *this rasied to the constant power \a exponent + * + * \tparam T is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression. + * + * This function computes the coefficient-wise power. The function MatrixBase::pow() in the + * unsupported module MatrixFunctions computes the matrix power. + * + * Example: \include Cwise_pow.cpp + * Output: \verbinclude Cwise_pow.out + * + * \sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log() + */ +template +const CwiseBinaryOp,Derived,Constant > pow(const T& exponent) const; +#endif + // TODO code generating macros could be moved to Macros.h and could include generation of documentation #define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \ template \ -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ { \ - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ }\ -typedef CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \ -typedef CwiseBinaryOp, const CwiseNullaryOp, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \ +typedef CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \ +typedef CwiseBinaryOp, const CwiseNullaryOp, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \ OP(const Scalar& s) const { \ return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \ @@ -113,10 +125,10 @@ OP(const Scalar& s, const Derived& d) { \ #define EIGEN_MAKE_CWISE_COMP_R_OP(OP, R_OP, RCOMPARATOR) \ template \ -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const OtherDerived, const Derived> \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const OtherDerived, const Derived> \ OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ { \ - return CwiseBinaryOp, const OtherDerived, const Derived>(other.derived(), derived()); \ + return CwiseBinaryOp, const OtherDerived, const Derived>(other.derived(), derived()); \ } \ EIGEN_DEVICE_FUNC \ inline const RCmp ## RCOMPARATOR ## ReturnType \ @@ -199,48 +211,63 @@ EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ) #undef EIGEN_MAKE_CWISE_COMP_R_OP // scalar addition - +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum) +#else /** \returns an expression of \c *this with each coeff incremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. * * Example: \include Cwise_plus.cpp * Output: \verbinclude Cwise_plus.out * * \sa operator+=(), operator-() */ -EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp, const Derived> -operator+(const Scalar& scalar) const -{ - return CwiseUnaryOp, const Derived>(derived(), internal::scalar_add_op(scalar)); -} - -EIGEN_DEVICE_FUNC -friend inline const CwiseUnaryOp, const Derived> -operator+(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS& other) -{ - return other + scalar; -} +template +const CwiseBinaryOp,Derived,Constant > operator+(const T& scalar) const; +/** \returns an expression of \a expr with each coeff incremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator+(const T& scalar, const StorageBaseType& expr); +#endif +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference) +#else /** \returns an expression of \c *this with each coeff decremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. * * Example: \include Cwise_minus.cpp * Output: \verbinclude Cwise_minus.out * - * \sa operator+(), operator-=() + * \sa operator+=(), operator-() */ -EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp, const Derived> -operator-(const Scalar& scalar) const -{ - return CwiseUnaryOp, const Derived>(derived(), internal::scalar_sub_op(scalar));; -} +template +const CwiseBinaryOp,Derived,Constant > operator-(const T& scalar) const; +/** \returns an expression of the constant matrix of value \a scalar decremented by the coefficients of \a expr + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator-(const T& scalar, const StorageBaseType& expr); +#endif -EIGEN_DEVICE_FUNC -friend inline const CwiseUnaryOp, const Derived> -operator-(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS& other) -{ - return CwiseUnaryOp, const Derived>(other.derived(), internal::scalar_rsub_op(scalar));; -} + +#ifndef EIGEN_PARSED_BY_DOXYGEN + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(operator/,quotient) +#else + /** + * \brief Component-wise division of the scalar \a s by array elements of \a a. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). + */ + template friend + inline const CwiseBinaryOp,Constant,Derived> + operator/(const T& s,const StorageBaseType& a); +#endif /** \returns an expression of the coefficient-wise && operator of *this and \a other * @@ -298,3 +325,46 @@ operator^(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL); return CwiseBinaryOp(derived(),other.derived()); } + +// NOTE disabled until we agree on argument order +#if 0 +/** \cpp11 \returns an expression of the coefficient-wise polygamma function. + * + * \specialfunctions_module + * + * It returns the \a n -th derivative of the digamma(psi) evaluated at \c *this. + * + * \warning Be careful with the order of the parameters: x.polygamma(n) is equivalent to polygamma(n,x) + * + * \sa Eigen::polygamma() + */ +template +inline const CwiseBinaryOp, const DerivedN, const Derived> +polygamma(const EIGEN_CURRENT_STORAGE_BASE_CLASS &n) const +{ + return CwiseBinaryOp, const DerivedN, const Derived>(n.derived(), this->derived()); +} +#endif + +/** \returns an expression of the coefficient-wise zeta function. + * + * \specialfunctions_module + * + * It returns the Riemann zeta function of two arguments \c *this and \a q: + * + * \param *this is the exposent, it must be > 1 + * \param q is the shift, it must be > 0 + * + * \note This function supports only float and double scalar types. To support other scalar types, the user has + * to provide implementations of zeta(T,T) for any scalar type T to be supported. + * + * This method is an alias for zeta(*this,q); + * + * \sa Eigen::zeta() + */ +template +inline const CwiseBinaryOp, const Derived, const DerivedQ> +zeta(const EIGEN_CURRENT_STORAGE_BASE_CLASS &q) const +{ + return CwiseBinaryOp, const Derived, const DerivedQ>(this->derived(), q.derived()); +} diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 56c71172c..ebaa3f192 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -11,6 +11,7 @@ typedef CwiseUnaryOp, const Derived> Boo typedef CwiseUnaryOp, const Derived> ExpReturnType; typedef CwiseUnaryOp, const Derived> LogReturnType; +typedef CwiseUnaryOp, const Derived> Log1pReturnType; typedef CwiseUnaryOp, const Derived> Log10ReturnType; typedef CwiseUnaryOp, const Derived> CosReturnType; typedef CwiseUnaryOp, const Derived> SinReturnType; @@ -21,13 +22,6 @@ typedef CwiseUnaryOp, const Derived> AtanReturn typedef CwiseUnaryOp, const Derived> TanhReturnType; typedef CwiseUnaryOp, const Derived> SinhReturnType; typedef CwiseUnaryOp, const Derived> CoshReturnType; -typedef CwiseUnaryOp, const Derived> LgammaReturnType; -typedef CwiseUnaryOp, const Derived> DigammaReturnType; -typedef CwiseUnaryOp, const Derived> ZetaReturnType; -typedef CwiseUnaryOp, const Derived> PolygammaReturnType; -typedef CwiseUnaryOp, const Derived> ErfReturnType; -typedef CwiseUnaryOp, const Derived> ErfcReturnType; -typedef CwiseUnaryOp, const Derived> PowReturnType; typedef CwiseUnaryOp, const Derived> SquareReturnType; typedef CwiseUnaryOp, const Derived> CubeReturnType; typedef CwiseUnaryOp, const Derived> RoundReturnType; @@ -42,7 +36,7 @@ typedef CwiseUnaryOp, const Derived> IsFini * Example: \include Cwise_abs.cpp * Output: \verbinclude Cwise_abs.out * - * \sa abs2() + * \sa Math functions, abs2() */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const AbsReturnType @@ -70,7 +64,7 @@ arg() const * Example: \include Cwise_abs2.cpp * Output: \verbinclude Cwise_abs2.out * - * \sa abs(), square() + * \sa Math functions, abs(), square() */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Abs2ReturnType @@ -87,7 +81,7 @@ abs2() const * Example: \include Cwise_exp.cpp * Output: \verbinclude Cwise_exp.out * - * \sa pow(), log(), sin(), cos() + * \sa Math functions, pow(), log(), sin(), cos() */ EIGEN_DEVICE_FUNC inline const ExpReturnType @@ -104,7 +98,7 @@ exp() const * Example: \include Cwise_log.cpp * Output: \verbinclude Cwise_log.out * - * \sa exp() + * \sa Math functions, exp() */ EIGEN_DEVICE_FUNC inline const LogReturnType @@ -113,6 +107,20 @@ log() const return LogReturnType(derived()); } +/** \returns an expression of the coefficient-wise logarithm of 1 plus \c *this. + * + * In exact arithmetic, \c x.log() is equivalent to \c (x+1).log(), + * however, with finite precision, this function is much more accurate when \c x is close to zero. + * + * \sa Math functions, log() + */ +EIGEN_DEVICE_FUNC +inline const Log1pReturnType +log1p() const +{ + return Log1pReturnType(derived()); +} + /** \returns an expression of the coefficient-wise base-10 logarithm of *this. * * This function computes the coefficient-wise base-10 logarithm. @@ -120,7 +128,7 @@ log() const * Example: \include Cwise_log10.cpp * Output: \verbinclude Cwise_log10.out * - * \sa log() + * \sa Math functions, log() */ EIGEN_DEVICE_FUNC inline const Log10ReturnType @@ -137,7 +145,7 @@ log10() const * Example: \include Cwise_sqrt.cpp * Output: \verbinclude Cwise_sqrt.out * - * \sa pow(), square() + * \sa Math functions, pow(), square() */ EIGEN_DEVICE_FUNC inline const SqrtReturnType @@ -187,7 +195,7 @@ sign() const * Example: \include Cwise_cos.cpp * Output: \verbinclude Cwise_cos.out * - * \sa sin(), acos() + * \sa Math functions, sin(), acos() */ EIGEN_DEVICE_FUNC inline const CosReturnType @@ -205,7 +213,7 @@ cos() const * Example: \include Cwise_sin.cpp * Output: \verbinclude Cwise_sin.out * - * \sa cos(), asin() + * \sa Math functions, cos(), asin() */ EIGEN_DEVICE_FUNC inline const SinReturnType @@ -219,7 +227,7 @@ sin() const * Example: \include Cwise_tan.cpp * Output: \verbinclude Cwise_tan.out * - * \sa cos(), sin() + * \sa Math functions, cos(), sin() */ EIGEN_DEVICE_FUNC inline const TanReturnType @@ -233,8 +241,9 @@ tan() const * Example: \include Cwise_atan.cpp * Output: \verbinclude Cwise_atan.out * - * \sa tan(), asin(), acos() + * \sa Math functions, tan(), asin(), acos() */ +EIGEN_DEVICE_FUNC inline const AtanReturnType atan() const { @@ -246,7 +255,7 @@ atan() const * Example: \include Cwise_acos.cpp * Output: \verbinclude Cwise_acos.out * - * \sa cos(), asin() + * \sa Math functions, cos(), asin() */ EIGEN_DEVICE_FUNC inline const AcosReturnType @@ -260,7 +269,7 @@ acos() const * Example: \include Cwise_asin.cpp * Output: \verbinclude Cwise_asin.out * - * \sa sin(), acos() + * \sa Math functions, sin(), acos() */ EIGEN_DEVICE_FUNC inline const AsinReturnType @@ -274,8 +283,9 @@ asin() const * Example: \include Cwise_tanh.cpp * Output: \verbinclude Cwise_tanh.out * - * \sa tan(), sinh(), cosh() + * \sa Math functions, tan(), sinh(), cosh() */ +EIGEN_DEVICE_FUNC inline const TanhReturnType tanh() const { @@ -287,8 +297,9 @@ tanh() const * Example: \include Cwise_sinh.cpp * Output: \verbinclude Cwise_sinh.out * - * \sa sin(), tanh(), cosh() + * \sa Math functions, sin(), tanh(), cosh() */ +EIGEN_DEVICE_FUNC inline const SinhReturnType sinh() const { @@ -300,99 +311,15 @@ sinh() const * Example: \include Cwise_cosh.cpp * Output: \verbinclude Cwise_cosh.out * - * \sa tan(), sinh(), cosh() + * \sa Math functions, tan(), sinh(), cosh() */ +EIGEN_DEVICE_FUNC inline const CoshReturnType cosh() const { return CoshReturnType(derived()); } -/** \returns an expression of the coefficient-wise ln(|gamma(*this)|). - * - * Example: \include Cwise_lgamma.cpp - * Output: \verbinclude Cwise_lgamma.out - * - * \sa cos(), sin(), tan() - */ -inline const LgammaReturnType -lgamma() const -{ - return LgammaReturnType(derived()); -} - -/** \returns an expression of the coefficient-wise digamma (psi, derivative of lgamma). - * - * \sa cos(), sin(), tan() - */ -inline const DigammaReturnType -digamma() const -{ - return DigammaReturnType(derived()); -} - -/** \returns an expression of the coefficient-wise zeta function. - */ -inline const ZetaReturnType -zeta() const -{ - return ZetaReturnType(derived()); -} - -/** \returns an expression of the coefficient-wise polygamma function. - */ -inline const PolygammaReturnType -polygamma() const -{ - return PolygammaReturnType(derived()); -} - -/** \returns an expression of the coefficient-wise Gauss error - * function of *this. - * - * Example: \include Cwise_erf.cpp - * Output: \verbinclude Cwise_erf.out - * - * \sa cos(), sin(), tan() - */ -inline const ErfReturnType -erf() const -{ - return ErfReturnType(derived()); -} - -/** \returns an expression of the coefficient-wise Complementary error - * function of *this. - * - * Example: \include Cwise_erfc.cpp - * Output: \verbinclude Cwise_erfc.out - * - * \sa cos(), sin(), tan() - */ -inline const ErfcReturnType -erfc() const -{ - return ErfcReturnType(derived()); -} - -/** \returns an expression of the coefficient-wise power of *this to the given exponent. - * - * This function computes the coefficient-wise power. The function MatrixBase::pow() in the - * unsupported module MatrixFunctions computes the matrix power. - * - * Example: \include Cwise_pow.cpp - * Output: \verbinclude Cwise_pow.out - * - * \sa exp(), log() - */ -EIGEN_DEVICE_FUNC -inline const PowReturnType -pow(const Scalar& exponent) const -{ - return PowReturnType(derived(), internal::scalar_pow_op(exponent)); -} - - /** \returns an expression of the coefficient-wise inverse of *this. * * Example: \include Cwise_inverse.cpp @@ -412,7 +339,7 @@ inverse() const * Example: \include Cwise_square.cpp * Output: \verbinclude Cwise_square.out * - * \sa operator/(), operator*(), abs2() + * \sa Math functions, abs2(), cube(), pow() */ EIGEN_DEVICE_FUNC inline const SquareReturnType @@ -426,7 +353,7 @@ square() const * Example: \include Cwise_cube.cpp * Output: \verbinclude Cwise_cube.out * - * \sa square(), pow() + * \sa Math functions, square(), pow() */ EIGEN_DEVICE_FUNC inline const CubeReturnType @@ -440,8 +367,9 @@ cube() const * Example: \include Cwise_round.cpp * Output: \verbinclude Cwise_round.out * - * \sa ceil(), floor() + * \sa Math functions, ceil(), floor() */ +EIGEN_DEVICE_FUNC inline const RoundReturnType round() const { @@ -453,8 +381,9 @@ round() const * Example: \include Cwise_floor.cpp * Output: \verbinclude Cwise_floor.out * - * \sa ceil(), round() + * \sa Math functions, ceil(), round() */ +EIGEN_DEVICE_FUNC inline const FloorReturnType floor() const { @@ -466,8 +395,9 @@ floor() const * Example: \include Cwise_ceil.cpp * Output: \verbinclude Cwise_ceil.out * - * \sa floor(), round() + * \sa Math functions, floor(), round() */ +EIGEN_DEVICE_FUNC inline const CeilReturnType ceil() const { @@ -481,6 +411,7 @@ ceil() const * * \sa isfinite(), isinf() */ +EIGEN_DEVICE_FUNC inline const IsNaNReturnType isNaN() const { @@ -494,6 +425,7 @@ isNaN() const * * \sa isnan(), isfinite() */ +EIGEN_DEVICE_FUNC inline const IsInfReturnType isInf() const { @@ -507,6 +439,7 @@ isInf() const * * \sa isnan(), isinf() */ +EIGEN_DEVICE_FUNC inline const IsFiniteReturnType isFinite() const { @@ -530,3 +463,90 @@ operator!() const THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL); return BooleanNotReturnType(derived()); } + + +// --- SpecialFunctions module --- + +typedef CwiseUnaryOp, const Derived> LgammaReturnType; +typedef CwiseUnaryOp, const Derived> DigammaReturnType; +typedef CwiseUnaryOp, const Derived> ErfReturnType; +typedef CwiseUnaryOp, const Derived> ErfcReturnType; + +/** \cpp11 \returns an expression of the coefficient-wise ln(|gamma(*this)|). + * + * \specialfunctions_module + * + * Example: \include Cwise_lgamma.cpp + * Output: \verbinclude Cwise_lgamma.out + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of lgamma(T) for any scalar + * type T to be supported. + * + * \sa Math functions, digamma() + */ +EIGEN_DEVICE_FUNC +inline const LgammaReturnType +lgamma() const +{ + return LgammaReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise digamma (psi, derivative of lgamma). + * + * \specialfunctions_module + * + * \note This function supports only float and double scalar types. To support other scalar types, + * the user has to provide implementations of digamma(T) for any scalar + * type T to be supported. + * + * \sa Math functions, Eigen::digamma(), Eigen::polygamma(), lgamma() + */ +EIGEN_DEVICE_FUNC +inline const DigammaReturnType +digamma() const +{ + return DigammaReturnType(derived()); +} + +/** \cpp11 \returns an expression of the coefficient-wise Gauss error + * function of *this. + * + * \specialfunctions_module + * + * Example: \include Cwise_erf.cpp + * Output: \verbinclude Cwise_erf.out + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of erf(T) for any scalar + * type T to be supported. + * + * \sa Math functions, erfc() + */ +EIGEN_DEVICE_FUNC +inline const ErfReturnType +erf() const +{ + return ErfReturnType(derived()); +} + +/** \cpp11 \returns an expression of the coefficient-wise Complementary error + * function of *this. + * + * \specialfunctions_module + * + * Example: \include Cwise_erfc.cpp + * Output: \verbinclude Cwise_erfc.out + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of erfc(T) for any scalar + * type T to be supported. + * + * \sa Math functions, erf() + */ +EIGEN_DEVICE_FUNC +inline const ErfcReturnType +erfc() const +{ + return ErfcReturnType(derived()); +} diff --git a/Eigen/src/plugins/BlockMethods.h b/Eigen/src/plugins/BlockMethods.h index 632094e15..b76973613 100644 --- a/Eigen/src/plugins/BlockMethods.h +++ b/Eigen/src/plugins/BlockMethods.h @@ -10,28 +10,28 @@ #ifndef EIGEN_PARSED_BY_DOXYGEN -/** \internal expression type of a column */ +/// \internal expression type of a column */ typedef Block::RowsAtCompileTime, 1, !IsRowMajor> ColXpr; typedef const Block::RowsAtCompileTime, 1, !IsRowMajor> ConstColXpr; -/** \internal expression type of a row */ +/// \internal expression type of a row */ typedef Block::ColsAtCompileTime, IsRowMajor> RowXpr; typedef const Block::ColsAtCompileTime, IsRowMajor> ConstRowXpr; -/** \internal expression type of a block of whole columns */ +/// \internal expression type of a block of whole columns */ typedef Block::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr; typedef const Block::RowsAtCompileTime, Dynamic, !IsRowMajor> ConstColsBlockXpr; -/** \internal expression type of a block of whole rows */ +/// \internal expression type of a block of whole rows */ typedef Block::ColsAtCompileTime, IsRowMajor> RowsBlockXpr; typedef const Block::ColsAtCompileTime, IsRowMajor> ConstRowsBlockXpr; -/** \internal expression type of a block of whole columns */ +/// \internal expression type of a block of whole columns */ template struct NColsBlockXpr { typedef Block::RowsAtCompileTime, N, !IsRowMajor> Type; }; template struct ConstNColsBlockXpr { typedef const Block::RowsAtCompileTime, N, !IsRowMajor> Type; }; -/** \internal expression type of a block of whole rows */ +/// \internal expression type of a block of whole rows */ template struct NRowsBlockXpr { typedef Block::ColsAtCompileTime, IsRowMajor> Type; }; template struct ConstNRowsBlockXpr { typedef const Block::ColsAtCompileTime, IsRowMajor> Type; }; -/** \internal expression of a block */ +/// \internal expression of a block */ typedef Block BlockXpr; typedef const Block ConstBlockXpr; -/** \internal expression of a block of fixed sizes */ +/// \internal expression of a block of fixed sizes */ template struct FixedBlockXpr { typedef Block Type; }; template struct ConstFixedBlockXpr { typedef Block Type; }; @@ -42,29 +42,31 @@ template struct ConstFixedSegmentReturnType { typedef const VectorBloc #endif // not EIGEN_PARSED_BY_DOXYGEN -/** \returns a dynamic-size expression of a block in *this. - * - * \param startRow the first row in the block - * \param startCol the first column in the block - * \param blockRows the number of rows in the block - * \param blockCols the number of columns in the block - * - * Example: \include MatrixBase_block_int_int_int_int.cpp - * Output: \verbinclude MatrixBase_block_int_int_int_int.out - * - * \note Even though the returned expression has dynamic size, in the case - * when it is applied to a fixed-size matrix, it inherits a fixed maximal size, - * which means that evaluating it does not cause a dynamic memory allocation. - * - * \sa class Block, block(Index,Index) - */ +/// \returns a dynamic-size expression of a block in *this. +/// +/// \param startRow the first row in the block +/// \param startCol the first column in the block +/// \param blockRows the number of rows in the block +/// \param blockCols the number of columns in the block +/// +/// Example: \include MatrixBase_block_int_int_int_int.cpp +/// Output: \verbinclude MatrixBase_block_int_int_int_int.out +/// +/// \note Even though the returned expression has dynamic size, in the case +/// when it is applied to a fixed-size matrix, it inherits a fixed maximal size, +/// which means that evaluating it does not cause a dynamic memory allocation. +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index) +/// EIGEN_DEVICE_FUNC inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) { return BlockXpr(derived(), startRow, startCol, blockRows, blockCols); } -/** This is the const version of block(Index,Index,Index,Index). */ +/// This is the const version of block(Index,Index,Index,Index). */ EIGEN_DEVICE_FUNC inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const { @@ -74,39 +76,43 @@ inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows -/** \returns a dynamic-size expression of a top-right corner of *this. - * - * \param cRows the number of rows in the corner - * \param cCols the number of columns in the corner - * - * Example: \include MatrixBase_topRightCorner_int_int.cpp - * Output: \verbinclude MatrixBase_topRightCorner_int_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a dynamic-size expression of a top-right corner of *this. +/// +/// \param cRows the number of rows in the corner +/// \param cCols the number of columns in the corner +/// +/// Example: \include MatrixBase_topRightCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_topRightCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline BlockXpr topRightCorner(Index cRows, Index cCols) { return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols); } -/** This is the const version of topRightCorner(Index, Index).*/ +/// This is the const version of topRightCorner(Index, Index). EIGEN_DEVICE_FUNC inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const { return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols); } -/** \returns an expression of a fixed-size top-right corner of *this. - * - * \tparam CRows the number of rows in the corner - * \tparam CCols the number of columns in the corner - * - * Example: \include MatrixBase_template_int_int_topRightCorner.cpp - * Output: \verbinclude MatrixBase_template_int_int_topRightCorner.out - * - * \sa class Block, block(Index,Index) - */ +/// \returns an expression of a fixed-size top-right corner of *this. +/// +/// \tparam CRows the number of rows in the corner +/// \tparam CCols the number of columns in the corner +/// +/// Example: \include MatrixBase_template_int_int_topRightCorner.cpp +/// Output: \verbinclude MatrixBase_template_int_int_topRightCorner.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename FixedBlockXpr::Type topRightCorner() @@ -114,7 +120,7 @@ inline typename FixedBlockXpr::Type topRightCorner() return typename FixedBlockXpr::Type(derived(), 0, cols() - CCols); } -/** This is the const version of topRightCorner().*/ +/// This is the const version of topRightCorner(). template EIGEN_DEVICE_FUNC inline const typename ConstFixedBlockXpr::Type topRightCorner() const @@ -122,30 +128,32 @@ inline const typename ConstFixedBlockXpr::Type topRightCorner() con return typename ConstFixedBlockXpr::Type(derived(), 0, cols() - CCols); } -/** \returns an expression of a top-right corner of *this. - * - * \tparam CRows number of rows in corner as specified at compile-time - * \tparam CCols number of columns in corner as specified at compile-time - * \param cRows number of rows in corner as specified at run-time - * \param cCols number of columns in corner as specified at run-time - * - * This function is mainly useful for corners where the number of rows is specified at compile-time - * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a cRows should equal \a CRows unless - * \a CRows is \a Dynamic, and the same for the number of columns. - * - * Example: \include MatrixBase_template_int_int_topRightCorner_int_int.cpp - * Output: \verbinclude MatrixBase_template_int_int_topRightCorner_int_int.out - * - * \sa class Block - */ +/// \returns an expression of a top-right corner of *this. +/// +/// \tparam CRows number of rows in corner as specified at compile-time +/// \tparam CCols number of columns in corner as specified at compile-time +/// \param cRows number of rows in corner as specified at run-time +/// \param cCols number of columns in corner as specified at run-time +/// +/// This function is mainly useful for corners where the number of rows is specified at compile-time +/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time +/// information should not contradict. In other words, \a cRows should equal \a CRows unless +/// \a CRows is \a Dynamic, and the same for the number of columns. +/// +/// Example: \include MatrixBase_template_int_int_topRightCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_template_int_int_topRightCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block +/// template inline typename FixedBlockXpr::Type topRightCorner(Index cRows, Index cCols) { return typename FixedBlockXpr::Type(derived(), 0, cols() - cCols, cRows, cCols); } -/** This is the const version of topRightCorner(Index, Index).*/ +/// This is the const version of topRightCorner(Index, Index). template inline const typename ConstFixedBlockXpr::Type topRightCorner(Index cRows, Index cCols) const { @@ -154,38 +162,42 @@ inline const typename ConstFixedBlockXpr::Type topRightCorner(Index -/** \returns a dynamic-size expression of a top-left corner of *this. - * - * \param cRows the number of rows in the corner - * \param cCols the number of columns in the corner - * - * Example: \include MatrixBase_topLeftCorner_int_int.cpp - * Output: \verbinclude MatrixBase_topLeftCorner_int_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a dynamic-size expression of a top-left corner of *this. +/// +/// \param cRows the number of rows in the corner +/// \param cCols the number of columns in the corner +/// +/// Example: \include MatrixBase_topLeftCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_topLeftCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline BlockXpr topLeftCorner(Index cRows, Index cCols) { return BlockXpr(derived(), 0, 0, cRows, cCols); } -/** This is the const version of topLeftCorner(Index, Index).*/ +/// This is the const version of topLeftCorner(Index, Index). EIGEN_DEVICE_FUNC inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const { return ConstBlockXpr(derived(), 0, 0, cRows, cCols); } -/** \returns an expression of a fixed-size top-left corner of *this. - * - * The template parameters CRows and CCols are the number of rows and columns in the corner. - * - * Example: \include MatrixBase_template_int_int_topLeftCorner.cpp - * Output: \verbinclude MatrixBase_template_int_int_topLeftCorner.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns an expression of a fixed-size top-left corner of *this. +/// +/// The template parameters CRows and CCols are the number of rows and columns in the corner. +/// +/// Example: \include MatrixBase_template_int_int_topLeftCorner.cpp +/// Output: \verbinclude MatrixBase_template_int_int_topLeftCorner.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename FixedBlockXpr::Type topLeftCorner() @@ -193,7 +205,7 @@ inline typename FixedBlockXpr::Type topLeftCorner() return typename FixedBlockXpr::Type(derived(), 0, 0); } -/** This is the const version of topLeftCorner().*/ +/// This is the const version of topLeftCorner(). template EIGEN_DEVICE_FUNC inline const typename ConstFixedBlockXpr::Type topLeftCorner() const @@ -201,30 +213,32 @@ inline const typename ConstFixedBlockXpr::Type topLeftCorner() cons return typename ConstFixedBlockXpr::Type(derived(), 0, 0); } -/** \returns an expression of a top-left corner of *this. - * - * \tparam CRows number of rows in corner as specified at compile-time - * \tparam CCols number of columns in corner as specified at compile-time - * \param cRows number of rows in corner as specified at run-time - * \param cCols number of columns in corner as specified at run-time - * - * This function is mainly useful for corners where the number of rows is specified at compile-time - * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a cRows should equal \a CRows unless - * \a CRows is \a Dynamic, and the same for the number of columns. - * - * Example: \include MatrixBase_template_int_int_topLeftCorner_int_int.cpp - * Output: \verbinclude MatrixBase_template_int_int_topLeftCorner_int_int.out - * - * \sa class Block - */ +/// \returns an expression of a top-left corner of *this. +/// +/// \tparam CRows number of rows in corner as specified at compile-time +/// \tparam CCols number of columns in corner as specified at compile-time +/// \param cRows number of rows in corner as specified at run-time +/// \param cCols number of columns in corner as specified at run-time +/// +/// This function is mainly useful for corners where the number of rows is specified at compile-time +/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time +/// information should not contradict. In other words, \a cRows should equal \a CRows unless +/// \a CRows is \a Dynamic, and the same for the number of columns. +/// +/// Example: \include MatrixBase_template_int_int_topLeftCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_template_int_int_topLeftCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block +/// template inline typename FixedBlockXpr::Type topLeftCorner(Index cRows, Index cCols) { return typename FixedBlockXpr::Type(derived(), 0, 0, cRows, cCols); } -/** This is the const version of topLeftCorner(Index, Index).*/ +/// This is the const version of topLeftCorner(Index, Index). template inline const typename ConstFixedBlockXpr::Type topLeftCorner(Index cRows, Index cCols) const { @@ -233,38 +247,42 @@ inline const typename ConstFixedBlockXpr::Type topLeftCorner(Index -/** \returns a dynamic-size expression of a bottom-right corner of *this. - * - * \param cRows the number of rows in the corner - * \param cCols the number of columns in the corner - * - * Example: \include MatrixBase_bottomRightCorner_int_int.cpp - * Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a dynamic-size expression of a bottom-right corner of *this. +/// +/// \param cRows the number of rows in the corner +/// \param cCols the number of columns in the corner +/// +/// Example: \include MatrixBase_bottomRightCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline BlockXpr bottomRightCorner(Index cRows, Index cCols) { return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } -/** This is the const version of bottomRightCorner(Index, Index).*/ +/// This is the const version of bottomRightCorner(Index, Index). EIGEN_DEVICE_FUNC inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const { return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } -/** \returns an expression of a fixed-size bottom-right corner of *this. - * - * The template parameters CRows and CCols are the number of rows and columns in the corner. - * - * Example: \include MatrixBase_template_int_int_bottomRightCorner.cpp - * Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns an expression of a fixed-size bottom-right corner of *this. +/// +/// The template parameters CRows and CCols are the number of rows and columns in the corner. +/// +/// Example: \include MatrixBase_template_int_int_bottomRightCorner.cpp +/// Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename FixedBlockXpr::Type bottomRightCorner() @@ -272,7 +290,7 @@ inline typename FixedBlockXpr::Type bottomRightCorner() return typename FixedBlockXpr::Type(derived(), rows() - CRows, cols() - CCols); } -/** This is the const version of bottomRightCorner().*/ +/// This is the const version of bottomRightCorner(). template EIGEN_DEVICE_FUNC inline const typename ConstFixedBlockXpr::Type bottomRightCorner() const @@ -280,30 +298,32 @@ inline const typename ConstFixedBlockXpr::Type bottomRightCorner() return typename ConstFixedBlockXpr::Type(derived(), rows() - CRows, cols() - CCols); } -/** \returns an expression of a bottom-right corner of *this. - * - * \tparam CRows number of rows in corner as specified at compile-time - * \tparam CCols number of columns in corner as specified at compile-time - * \param cRows number of rows in corner as specified at run-time - * \param cCols number of columns in corner as specified at run-time - * - * This function is mainly useful for corners where the number of rows is specified at compile-time - * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a cRows should equal \a CRows unless - * \a CRows is \a Dynamic, and the same for the number of columns. - * - * Example: \include MatrixBase_template_int_int_bottomRightCorner_int_int.cpp - * Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner_int_int.out - * - * \sa class Block - */ +/// \returns an expression of a bottom-right corner of *this. +/// +/// \tparam CRows number of rows in corner as specified at compile-time +/// \tparam CCols number of columns in corner as specified at compile-time +/// \param cRows number of rows in corner as specified at run-time +/// \param cCols number of columns in corner as specified at run-time +/// +/// This function is mainly useful for corners where the number of rows is specified at compile-time +/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time +/// information should not contradict. In other words, \a cRows should equal \a CRows unless +/// \a CRows is \a Dynamic, and the same for the number of columns. +/// +/// Example: \include MatrixBase_template_int_int_bottomRightCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block +/// template inline typename FixedBlockXpr::Type bottomRightCorner(Index cRows, Index cCols) { return typename FixedBlockXpr::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } -/** This is the const version of bottomRightCorner(Index, Index).*/ +/// This is the const version of bottomRightCorner(Index, Index). template inline const typename ConstFixedBlockXpr::Type bottomRightCorner(Index cRows, Index cCols) const { @@ -312,38 +332,42 @@ inline const typename ConstFixedBlockXpr::Type bottomRightCorner(In -/** \returns a dynamic-size expression of a bottom-left corner of *this. - * - * \param cRows the number of rows in the corner - * \param cCols the number of columns in the corner - * - * Example: \include MatrixBase_bottomLeftCorner_int_int.cpp - * Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a dynamic-size expression of a bottom-left corner of *this. +/// +/// \param cRows the number of rows in the corner +/// \param cCols the number of columns in the corner +/// +/// Example: \include MatrixBase_bottomLeftCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline BlockXpr bottomLeftCorner(Index cRows, Index cCols) { return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols); } -/** This is the const version of bottomLeftCorner(Index, Index).*/ +/// This is the const version of bottomLeftCorner(Index, Index). EIGEN_DEVICE_FUNC inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const { return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols); } -/** \returns an expression of a fixed-size bottom-left corner of *this. - * - * The template parameters CRows and CCols are the number of rows and columns in the corner. - * - * Example: \include MatrixBase_template_int_int_bottomLeftCorner.cpp - * Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns an expression of a fixed-size bottom-left corner of *this. +/// +/// The template parameters CRows and CCols are the number of rows and columns in the corner. +/// +/// Example: \include MatrixBase_template_int_int_bottomLeftCorner.cpp +/// Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename FixedBlockXpr::Type bottomLeftCorner() @@ -351,7 +375,7 @@ inline typename FixedBlockXpr::Type bottomLeftCorner() return typename FixedBlockXpr::Type(derived(), rows() - CRows, 0); } -/** This is the const version of bottomLeftCorner().*/ +/// This is the const version of bottomLeftCorner(). template EIGEN_DEVICE_FUNC inline const typename ConstFixedBlockXpr::Type bottomLeftCorner() const @@ -359,30 +383,32 @@ inline const typename ConstFixedBlockXpr::Type bottomLeftCorner() c return typename ConstFixedBlockXpr::Type(derived(), rows() - CRows, 0); } -/** \returns an expression of a bottom-left corner of *this. - * - * \tparam CRows number of rows in corner as specified at compile-time - * \tparam CCols number of columns in corner as specified at compile-time - * \param cRows number of rows in corner as specified at run-time - * \param cCols number of columns in corner as specified at run-time - * - * This function is mainly useful for corners where the number of rows is specified at compile-time - * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a cRows should equal \a CRows unless - * \a CRows is \a Dynamic, and the same for the number of columns. - * - * Example: \include MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp - * Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner_int_int.out - * - * \sa class Block - */ +/// \returns an expression of a bottom-left corner of *this. +/// +/// \tparam CRows number of rows in corner as specified at compile-time +/// \tparam CCols number of columns in corner as specified at compile-time +/// \param cRows number of rows in corner as specified at run-time +/// \param cCols number of columns in corner as specified at run-time +/// +/// This function is mainly useful for corners where the number of rows is specified at compile-time +/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time +/// information should not contradict. In other words, \a cRows should equal \a CRows unless +/// \a CRows is \a Dynamic, and the same for the number of columns. +/// +/// Example: \include MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp +/// Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner_int_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block +/// template inline typename FixedBlockXpr::Type bottomLeftCorner(Index cRows, Index cCols) { return typename FixedBlockXpr::Type(derived(), rows() - cRows, 0, cRows, cCols); } -/** This is the const version of bottomLeftCorner(Index, Index).*/ +/// This is the const version of bottomLeftCorner(Index, Index). template inline const typename ConstFixedBlockXpr::Type bottomLeftCorner(Index cRows, Index cCols) const { @@ -391,41 +417,45 @@ inline const typename ConstFixedBlockXpr::Type bottomLeftCorner(Ind -/** \returns a block consisting of the top rows of *this. - * - * \param n the number of rows in the block - * - * Example: \include MatrixBase_topRows_int.cpp - * Output: \verbinclude MatrixBase_topRows_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the top rows of *this. +/// +/// \param n the number of rows in the block +/// +/// Example: \include MatrixBase_topRows_int.cpp +/// Output: \verbinclude MatrixBase_topRows_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline RowsBlockXpr topRows(Index n) { return RowsBlockXpr(derived(), 0, 0, n, cols()); } -/** This is the const version of topRows(Index).*/ +/// This is the const version of topRows(Index). EIGEN_DEVICE_FUNC inline ConstRowsBlockXpr topRows(Index n) const { return ConstRowsBlockXpr(derived(), 0, 0, n, cols()); } -/** \returns a block consisting of the top rows of *this. - * - * \tparam N the number of rows in the block as specified at compile-time - * \param n the number of rows in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_topRows.cpp - * Output: \verbinclude MatrixBase_template_int_topRows.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the top rows of *this. +/// +/// \tparam N the number of rows in the block as specified at compile-time +/// \param n the number of rows in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_topRows.cpp +/// Output: \verbinclude MatrixBase_template_int_topRows.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename NRowsBlockXpr::Type topRows(Index n = N) @@ -433,7 +463,7 @@ inline typename NRowsBlockXpr::Type topRows(Index n = N) return typename NRowsBlockXpr::Type(derived(), 0, 0, n, cols()); } -/** This is the const version of topRows().*/ +/// This is the const version of topRows(). template EIGEN_DEVICE_FUNC inline typename ConstNRowsBlockXpr::Type topRows(Index n = N) const @@ -443,41 +473,45 @@ inline typename ConstNRowsBlockXpr::Type topRows(Index n = N) const -/** \returns a block consisting of the bottom rows of *this. - * - * \param n the number of rows in the block - * - * Example: \include MatrixBase_bottomRows_int.cpp - * Output: \verbinclude MatrixBase_bottomRows_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the bottom rows of *this. +/// +/// \param n the number of rows in the block +/// +/// Example: \include MatrixBase_bottomRows_int.cpp +/// Output: \verbinclude MatrixBase_bottomRows_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline RowsBlockXpr bottomRows(Index n) { return RowsBlockXpr(derived(), rows() - n, 0, n, cols()); } -/** This is the const version of bottomRows(Index).*/ +/// This is the const version of bottomRows(Index). EIGEN_DEVICE_FUNC inline ConstRowsBlockXpr bottomRows(Index n) const { return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols()); } -/** \returns a block consisting of the bottom rows of *this. - * - * \tparam N the number of rows in the block as specified at compile-time - * \param n the number of rows in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_bottomRows.cpp - * Output: \verbinclude MatrixBase_template_int_bottomRows.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the bottom rows of *this. +/// +/// \tparam N the number of rows in the block as specified at compile-time +/// \param n the number of rows in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_bottomRows.cpp +/// Output: \verbinclude MatrixBase_template_int_bottomRows.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename NRowsBlockXpr::Type bottomRows(Index n = N) @@ -485,7 +519,7 @@ inline typename NRowsBlockXpr::Type bottomRows(Index n = N) return typename NRowsBlockXpr::Type(derived(), rows() - n, 0, n, cols()); } -/** This is the const version of bottomRows().*/ +/// This is the const version of bottomRows(). template EIGEN_DEVICE_FUNC inline typename ConstNRowsBlockXpr::Type bottomRows(Index n = N) const @@ -495,43 +529,47 @@ inline typename ConstNRowsBlockXpr::Type bottomRows(Index n = N) const -/** \returns a block consisting of a range of rows of *this. - * - * \param startRow the index of the first row in the block - * \param n the number of rows in the block - * - * Example: \include DenseBase_middleRows_int.cpp - * Output: \verbinclude DenseBase_middleRows_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of a range of rows of *this. +/// +/// \param startRow the index of the first row in the block +/// \param n the number of rows in the block +/// +/// Example: \include DenseBase_middleRows_int.cpp +/// Output: \verbinclude DenseBase_middleRows_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline RowsBlockXpr middleRows(Index startRow, Index n) { return RowsBlockXpr(derived(), startRow, 0, n, cols()); } -/** This is the const version of middleRows(Index,Index).*/ +/// This is the const version of middleRows(Index,Index). EIGEN_DEVICE_FUNC inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const { return ConstRowsBlockXpr(derived(), startRow, 0, n, cols()); } -/** \returns a block consisting of a range of rows of *this. - * - * \tparam N the number of rows in the block as specified at compile-time - * \param startRow the index of the first row in the block - * \param n the number of rows in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include DenseBase_template_int_middleRows.cpp - * Output: \verbinclude DenseBase_template_int_middleRows.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of a range of rows of *this. +/// +/// \tparam N the number of rows in the block as specified at compile-time +/// \param startRow the index of the first row in the block +/// \param n the number of rows in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include DenseBase_template_int_middleRows.cpp +/// Output: \verbinclude DenseBase_template_int_middleRows.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename NRowsBlockXpr::Type middleRows(Index startRow, Index n = N) @@ -539,7 +577,7 @@ inline typename NRowsBlockXpr::Type middleRows(Index startRow, Index n = N) return typename NRowsBlockXpr::Type(derived(), startRow, 0, n, cols()); } -/** This is the const version of middleRows().*/ +/// This is the const version of middleRows(). template EIGEN_DEVICE_FUNC inline typename ConstNRowsBlockXpr::Type middleRows(Index startRow, Index n = N) const @@ -549,41 +587,45 @@ inline typename ConstNRowsBlockXpr::Type middleRows(Index startRow, Index n = -/** \returns a block consisting of the left columns of *this. - * - * \param n the number of columns in the block - * - * Example: \include MatrixBase_leftCols_int.cpp - * Output: \verbinclude MatrixBase_leftCols_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the left columns of *this. +/// +/// \param n the number of columns in the block +/// +/// Example: \include MatrixBase_leftCols_int.cpp +/// Output: \verbinclude MatrixBase_leftCols_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline ColsBlockXpr leftCols(Index n) { return ColsBlockXpr(derived(), 0, 0, rows(), n); } -/** This is the const version of leftCols(Index).*/ +/// This is the const version of leftCols(Index). EIGEN_DEVICE_FUNC inline ConstColsBlockXpr leftCols(Index n) const { return ConstColsBlockXpr(derived(), 0, 0, rows(), n); } -/** \returns a block consisting of the left columns of *this. - * - * \tparam N the number of columns in the block as specified at compile-time - * \param n the number of columns in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_leftCols.cpp - * Output: \verbinclude MatrixBase_template_int_leftCols.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the left columns of *this. +/// +/// \tparam N the number of columns in the block as specified at compile-time +/// \param n the number of columns in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_leftCols.cpp +/// Output: \verbinclude MatrixBase_template_int_leftCols.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename NColsBlockXpr::Type leftCols(Index n = N) @@ -591,7 +633,7 @@ inline typename NColsBlockXpr::Type leftCols(Index n = N) return typename NColsBlockXpr::Type(derived(), 0, 0, rows(), n); } -/** This is the const version of leftCols().*/ +/// This is the const version of leftCols(). template EIGEN_DEVICE_FUNC inline typename ConstNColsBlockXpr::Type leftCols(Index n = N) const @@ -601,41 +643,45 @@ inline typename ConstNColsBlockXpr::Type leftCols(Index n = N) const -/** \returns a block consisting of the right columns of *this. - * - * \param n the number of columns in the block - * - * Example: \include MatrixBase_rightCols_int.cpp - * Output: \verbinclude MatrixBase_rightCols_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the right columns of *this. +/// +/// \param n the number of columns in the block +/// +/// Example: \include MatrixBase_rightCols_int.cpp +/// Output: \verbinclude MatrixBase_rightCols_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline ColsBlockXpr rightCols(Index n) { return ColsBlockXpr(derived(), 0, cols() - n, rows(), n); } -/** This is the const version of rightCols(Index).*/ +/// This is the const version of rightCols(Index). EIGEN_DEVICE_FUNC inline ConstColsBlockXpr rightCols(Index n) const { return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n); } -/** \returns a block consisting of the right columns of *this. - * - * \tparam N the number of columns in the block as specified at compile-time - * \param n the number of columns in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_rightCols.cpp - * Output: \verbinclude MatrixBase_template_int_rightCols.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of the right columns of *this. +/// +/// \tparam N the number of columns in the block as specified at compile-time +/// \param n the number of columns in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_rightCols.cpp +/// Output: \verbinclude MatrixBase_template_int_rightCols.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename NColsBlockXpr::Type rightCols(Index n = N) @@ -643,7 +689,7 @@ inline typename NColsBlockXpr::Type rightCols(Index n = N) return typename NColsBlockXpr::Type(derived(), 0, cols() - n, rows(), n); } -/** This is the const version of rightCols().*/ +/// This is the const version of rightCols(). template EIGEN_DEVICE_FUNC inline typename ConstNColsBlockXpr::Type rightCols(Index n = N) const @@ -653,43 +699,47 @@ inline typename ConstNColsBlockXpr::Type rightCols(Index n = N) const -/** \returns a block consisting of a range of columns of *this. - * - * \param startCol the index of the first column in the block - * \param numCols the number of columns in the block - * - * Example: \include DenseBase_middleCols_int.cpp - * Output: \verbinclude DenseBase_middleCols_int.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of a range of columns of *this. +/// +/// \param startCol the index of the first column in the block +/// \param numCols the number of columns in the block +/// +/// Example: \include DenseBase_middleCols_int.cpp +/// Output: \verbinclude DenseBase_middleCols_int.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// EIGEN_DEVICE_FUNC inline ColsBlockXpr middleCols(Index startCol, Index numCols) { return ColsBlockXpr(derived(), 0, startCol, rows(), numCols); } -/** This is the const version of middleCols(Index,Index).*/ +/// This is the const version of middleCols(Index,Index). EIGEN_DEVICE_FUNC inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const { return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols); } -/** \returns a block consisting of a range of columns of *this. - * - * \tparam N the number of columns in the block as specified at compile-time - * \param startCol the index of the first column in the block - * \param n the number of columns in the block as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include DenseBase_template_int_middleCols.cpp - * Output: \verbinclude DenseBase_template_int_middleCols.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a block consisting of a range of columns of *this. +/// +/// \tparam N the number of columns in the block as specified at compile-time +/// \param startCol the index of the first column in the block +/// \param n the number of columns in the block as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include DenseBase_template_int_middleCols.cpp +/// Output: \verbinclude DenseBase_template_int_middleCols.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename NColsBlockXpr::Type middleCols(Index startCol, Index n = N) @@ -697,7 +747,7 @@ inline typename NColsBlockXpr::Type middleCols(Index startCol, Index n = N) return typename NColsBlockXpr::Type(derived(), 0, startCol, rows(), n); } -/** This is the const version of middleCols().*/ +/// This is the const version of middleCols(). template EIGEN_DEVICE_FUNC inline typename ConstNColsBlockXpr::Type middleCols(Index startCol, Index n = N) const @@ -707,22 +757,24 @@ inline typename ConstNColsBlockXpr::Type middleCols(Index startCol, Index n = -/** \returns a fixed-size expression of a block in *this. - * - * The template parameters \a NRows and \a NCols are the number of - * rows and columns in the block. - * - * \param startRow the first row in the block - * \param startCol the first column in the block - * - * Example: \include MatrixBase_block_int_int.cpp - * Output: \verbinclude MatrixBase_block_int_int.out - * - * \note since block is a templated member, the keyword template has to be used - * if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns a fixed-size expression of a block in *this. +/// +/// The template parameters \a NRows and \a NCols are the number of +/// rows and columns in the block. +/// +/// \param startRow the first row in the block +/// \param startCol the first column in the block +/// +/// Example: \include MatrixBase_block_int_int.cpp +/// Output: \verbinclude MatrixBase_block_int_int.out +/// +/// \note since block is a templated member, the keyword template has to be used +/// if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template EIGEN_DEVICE_FUNC inline typename FixedBlockXpr::Type block(Index startRow, Index startCol) @@ -730,7 +782,7 @@ inline typename FixedBlockXpr::Type block(Index startRow, Index sta return typename FixedBlockXpr::Type(derived(), startRow, startCol); } -/** This is the const version of block<>(Index, Index). */ +/// This is the const version of block<>(Index, Index). */ template EIGEN_DEVICE_FUNC inline const typename ConstFixedBlockXpr::Type block(Index startRow, Index startCol) const @@ -738,25 +790,27 @@ inline const typename ConstFixedBlockXpr::Type block(Index startRow return typename ConstFixedBlockXpr::Type(derived(), startRow, startCol); } -/** \returns an expression of a block in *this. - * - * \tparam NRows number of rows in block as specified at compile-time - * \tparam NCols number of columns in block as specified at compile-time - * \param startRow the first row in the block - * \param startCol the first column in the block - * \param blockRows number of rows in block as specified at run-time - * \param blockCols number of columns in block as specified at run-time - * - * This function is mainly useful for blocks where the number of rows is specified at compile-time - * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time - * information should not contradict. In other words, \a blockRows should equal \a NRows unless - * \a NRows is \a Dynamic, and the same for the number of columns. - * - * Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp - * Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp - * - * \sa class Block, block(Index,Index,Index,Index) - */ +/// \returns an expression of a block in *this. +/// +/// \tparam NRows number of rows in block as specified at compile-time +/// \tparam NCols number of columns in block as specified at compile-time +/// \param startRow the first row in the block +/// \param startCol the first column in the block +/// \param blockRows number of rows in block as specified at run-time +/// \param blockCols number of columns in block as specified at run-time +/// +/// This function is mainly useful for blocks where the number of rows is specified at compile-time +/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time +/// information should not contradict. In other words, \a blockRows should equal \a NRows unless +/// \a NRows is \a Dynamic, and the same for the number of columns. +/// +/// Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp +/// Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp +/// +EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +/// +/// \sa class Block, block(Index,Index,Index,Index) +/// template inline typename FixedBlockXpr::Type block(Index startRow, Index startCol, Index blockRows, Index blockCols) @@ -764,7 +818,7 @@ inline typename FixedBlockXpr::Type block(Index startRow, Index sta return typename FixedBlockXpr::Type(derived(), startRow, startCol, blockRows, blockCols); } -/** This is the const version of block<>(Index, Index, Index, Index). */ +/// This is the const version of block<>(Index, Index, Index, Index). */ template inline const typename ConstFixedBlockXpr::Type block(Index startRow, Index startCol, Index blockRows, Index blockCols) const @@ -772,60 +826,64 @@ inline const typename ConstFixedBlockXpr::Type block(Index startRow return typename ConstFixedBlockXpr::Type(derived(), startRow, startCol, blockRows, blockCols); } -/** \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0. - * - * Example: \include MatrixBase_col.cpp - * Output: \verbinclude MatrixBase_col.out - * - * \sa row(), class Block */ +/// \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0. +/// +/// Example: \include MatrixBase_col.cpp +/// Output: \verbinclude MatrixBase_col.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) +/// +/// \sa row(), class Block */ EIGEN_DEVICE_FUNC inline ColXpr col(Index i) { return ColXpr(derived(), i); } -/** This is the const version of col(). */ +/// This is the const version of col(). */ EIGEN_DEVICE_FUNC inline ConstColXpr col(Index i) const { return ConstColXpr(derived(), i); } -/** \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0. - * - * Example: \include MatrixBase_row.cpp - * Output: \verbinclude MatrixBase_row.out - * - * \sa col(), class Block */ +/// \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0. +/// +/// Example: \include MatrixBase_row.cpp +/// Output: \verbinclude MatrixBase_row.out +/// +EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) +/// +/// \sa col(), class Block */ EIGEN_DEVICE_FUNC inline RowXpr row(Index i) { return RowXpr(derived(), i); } -/** This is the const version of row(). */ +/// This is the const version of row(). */ EIGEN_DEVICE_FUNC inline ConstRowXpr row(Index i) const { return ConstRowXpr(derived(), i); } -/** \returns a dynamic-size expression of a segment (i.e. a vector block) in *this. - * - * \only_for_vectors - * - * \param start the first coefficient in the segment - * \param n the number of coefficients in the segment - * - * Example: \include MatrixBase_segment_int_int.cpp - * Output: \verbinclude MatrixBase_segment_int_int.out - * - * \note Even though the returned expression has dynamic size, in the case - * when it is applied to a fixed-size vector, it inherits a fixed maximal size, - * which means that evaluating it does not cause a dynamic memory allocation. - * - * \sa class Block, segment(Index) - */ +/// \returns a dynamic-size expression of a segment (i.e. a vector block) in *this. +/// +/// \only_for_vectors +/// +/// \param start the first coefficient in the segment +/// \param n the number of coefficients in the segment +/// +/// Example: \include MatrixBase_segment_int_int.cpp +/// Output: \verbinclude MatrixBase_segment_int_int.out +/// +/// \note Even though the returned expression has dynamic size, in the case +/// when it is applied to a fixed-size vector, it inherits a fixed maximal size, +/// which means that evaluating it does not cause a dynamic memory allocation. +/// +/// \sa class Block, segment(Index) +/// EIGEN_DEVICE_FUNC inline SegmentReturnType segment(Index start, Index n) { @@ -834,7 +892,7 @@ inline SegmentReturnType segment(Index start, Index n) } -/** This is the const version of segment(Index,Index).*/ +/// This is the const version of segment(Index,Index). EIGEN_DEVICE_FUNC inline ConstSegmentReturnType segment(Index start, Index n) const { @@ -842,21 +900,21 @@ inline ConstSegmentReturnType segment(Index start, Index n) const return ConstSegmentReturnType(derived(), start, n); } -/** \returns a dynamic-size expression of the first coefficients of *this. - * - * \only_for_vectors - * - * \param n the number of coefficients in the segment - * - * Example: \include MatrixBase_start_int.cpp - * Output: \verbinclude MatrixBase_start_int.out - * - * \note Even though the returned expression has dynamic size, in the case - * when it is applied to a fixed-size vector, it inherits a fixed maximal size, - * which means that evaluating it does not cause a dynamic memory allocation. - * - * \sa class Block, block(Index,Index) - */ +/// \returns a dynamic-size expression of the first coefficients of *this. +/// +/// \only_for_vectors +/// +/// \param n the number of coefficients in the segment +/// +/// Example: \include MatrixBase_start_int.cpp +/// Output: \verbinclude MatrixBase_start_int.out +/// +/// \note Even though the returned expression has dynamic size, in the case +/// when it is applied to a fixed-size vector, it inherits a fixed maximal size, +/// which means that evaluating it does not cause a dynamic memory allocation. +/// +/// \sa class Block, block(Index,Index) +/// EIGEN_DEVICE_FUNC inline SegmentReturnType head(Index n) { @@ -864,7 +922,7 @@ inline SegmentReturnType head(Index n) return SegmentReturnType(derived(), 0, n); } -/** This is the const version of head(Index).*/ +/// This is the const version of head(Index). EIGEN_DEVICE_FUNC inline ConstSegmentReturnType head(Index n) const { @@ -872,21 +930,21 @@ inline ConstSegmentReturnType head(Index n) const return ConstSegmentReturnType(derived(), 0, n); } -/** \returns a dynamic-size expression of the last coefficients of *this. - * - * \only_for_vectors - * - * \param n the number of coefficients in the segment - * - * Example: \include MatrixBase_end_int.cpp - * Output: \verbinclude MatrixBase_end_int.out - * - * \note Even though the returned expression has dynamic size, in the case - * when it is applied to a fixed-size vector, it inherits a fixed maximal size, - * which means that evaluating it does not cause a dynamic memory allocation. - * - * \sa class Block, block(Index,Index) - */ +/// \returns a dynamic-size expression of the last coefficients of *this. +/// +/// \only_for_vectors +/// +/// \param n the number of coefficients in the segment +/// +/// Example: \include MatrixBase_end_int.cpp +/// Output: \verbinclude MatrixBase_end_int.out +/// +/// \note Even though the returned expression has dynamic size, in the case +/// when it is applied to a fixed-size vector, it inherits a fixed maximal size, +/// which means that evaluating it does not cause a dynamic memory allocation. +/// +/// \sa class Block, block(Index,Index) +/// EIGEN_DEVICE_FUNC inline SegmentReturnType tail(Index n) { @@ -894,7 +952,7 @@ inline SegmentReturnType tail(Index n) return SegmentReturnType(derived(), this->size() - n, n); } -/** This is the const version of tail(Index).*/ +/// This is the const version of tail(Index). EIGEN_DEVICE_FUNC inline ConstSegmentReturnType tail(Index n) const { @@ -902,22 +960,22 @@ inline ConstSegmentReturnType tail(Index n) const return ConstSegmentReturnType(derived(), this->size() - n, n); } -/** \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this - * - * \only_for_vectors - * - * \tparam N the number of coefficients in the segment as specified at compile-time - * \param start the index of the first element in the segment - * \param n the number of coefficients in the segment as specified at compile-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_segment.cpp - * Output: \verbinclude MatrixBase_template_int_segment.out - * - * \sa class Block - */ +/// \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this +/// +/// \only_for_vectors +/// +/// \tparam N the number of coefficients in the segment as specified at compile-time +/// \param start the index of the first element in the segment +/// \param n the number of coefficients in the segment as specified at compile-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_segment.cpp +/// Output: \verbinclude MatrixBase_template_int_segment.out +/// +/// \sa class Block +/// template EIGEN_DEVICE_FUNC inline typename FixedSegmentReturnType::Type segment(Index start, Index n = N) @@ -926,7 +984,7 @@ inline typename FixedSegmentReturnType::Type segment(Index start, Index n = N return typename FixedSegmentReturnType::Type(derived(), start, n); } -/** This is the const version of segment(Index).*/ +/// This is the const version of segment(Index). template EIGEN_DEVICE_FUNC inline typename ConstFixedSegmentReturnType::Type segment(Index start, Index n = N) const @@ -935,21 +993,21 @@ inline typename ConstFixedSegmentReturnType::Type segment(Index start, Index return typename ConstFixedSegmentReturnType::Type(derived(), start, n); } -/** \returns a fixed-size expression of the first coefficients of *this. - * - * \only_for_vectors - * - * \tparam N the number of coefficients in the segment as specified at compile-time - * \param n the number of coefficients in the segment as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_start.cpp - * Output: \verbinclude MatrixBase_template_int_start.out - * - * \sa class Block - */ +/// \returns a fixed-size expression of the first coefficients of *this. +/// +/// \only_for_vectors +/// +/// \tparam N the number of coefficients in the segment as specified at compile-time +/// \param n the number of coefficients in the segment as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_start.cpp +/// Output: \verbinclude MatrixBase_template_int_start.out +/// +/// \sa class Block +/// template EIGEN_DEVICE_FUNC inline typename FixedSegmentReturnType::Type head(Index n = N) @@ -958,7 +1016,7 @@ inline typename FixedSegmentReturnType::Type head(Index n = N) return typename FixedSegmentReturnType::Type(derived(), 0, n); } -/** This is the const version of head().*/ +/// This is the const version of head(). template EIGEN_DEVICE_FUNC inline typename ConstFixedSegmentReturnType::Type head(Index n = N) const @@ -967,21 +1025,21 @@ inline typename ConstFixedSegmentReturnType::Type head(Index n = N) const return typename ConstFixedSegmentReturnType::Type(derived(), 0, n); } -/** \returns a fixed-size expression of the last coefficients of *this. - * - * \only_for_vectors - * - * \tparam N the number of coefficients in the segment as specified at compile-time - * \param n the number of coefficients in the segment as specified at run-time - * - * The compile-time and run-time information should not contradict. In other words, - * \a n should equal \a N unless \a N is \a Dynamic. - * - * Example: \include MatrixBase_template_int_end.cpp - * Output: \verbinclude MatrixBase_template_int_end.out - * - * \sa class Block - */ +/// \returns a fixed-size expression of the last coefficients of *this. +/// +/// \only_for_vectors +/// +/// \tparam N the number of coefficients in the segment as specified at compile-time +/// \param n the number of coefficients in the segment as specified at run-time +/// +/// The compile-time and run-time information should not contradict. In other words, +/// \a n should equal \a N unless \a N is \a Dynamic. +/// +/// Example: \include MatrixBase_template_int_end.cpp +/// Output: \verbinclude MatrixBase_template_int_end.out +/// +/// \sa class Block +/// template EIGEN_DEVICE_FUNC inline typename FixedSegmentReturnType::Type tail(Index n = N) @@ -990,7 +1048,7 @@ inline typename FixedSegmentReturnType::Type tail(Index n = N) return typename FixedSegmentReturnType::Type(derived(), size() - n); } -/** This is the const version of tail.*/ +/// This is the const version of tail. template EIGEN_DEVICE_FUNC inline typename ConstFixedSegmentReturnType::Type tail(Index n = N) const diff --git a/Eigen/src/plugins/CMakeLists.txt b/Eigen/src/plugins/CMakeLists.txt deleted file mode 100644 index 1a1d3ffbd..000000000 --- a/Eigen/src/plugins/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_plugins_SRCS "*.h") - -INSTALL(FILES - ${Eigen_plugins_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/plugins COMPONENT Devel - ) diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index a8fa287c9..b51ee9e4c 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2016 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -16,7 +16,7 @@ * * \sa class CwiseBinaryOp, operator-=() */ -EIGEN_MAKE_CWISE_BINARY_OP(operator-,internal::scalar_difference_op) +EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference) /** \returns an expression of the sum of \c *this and \a other * @@ -24,7 +24,7 @@ EIGEN_MAKE_CWISE_BINARY_OP(operator-,internal::scalar_difference_op) * * \sa class CwiseBinaryOp, operator+=() */ -EIGEN_MAKE_CWISE_BINARY_OP(operator+,internal::scalar_sum_op) +EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum) /** \returns an expression of a custom coefficient-wise operator \a func of *this and \a other * @@ -45,3 +45,33 @@ binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other, const Cu return CwiseBinaryOp(derived(), other.derived(), func); } + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product) +#else +/** \returns an expression of \c *this scaled by the scalar factor \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template +const CwiseBinaryOp,Derived,Constant > operator*(const T& scalar) const; +/** \returns an expression of \a expr scaled by the scalar factor \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator*(const T& scalar, const StorageBaseType& expr); +#endif + + + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient) +#else +/** \returns an expression of \c *this divided by the scalar value \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template +const CwiseBinaryOp,Derived,Constant > operator/(const T& scalar) const; +#endif diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h index 050bce03c..89f4faaac 100644 --- a/Eigen/src/plugins/CommonCwiseUnaryOps.h +++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h @@ -12,12 +12,6 @@ #ifndef EIGEN_PARSED_BY_DOXYGEN -/** \internal Represents a scalar multiple of an expression */ -typedef CwiseUnaryOp, const Derived> ScalarMultipleReturnType; -typedef CwiseUnaryOp >, const Derived> ScalarComplexMultipleReturnType; - -/** \internal Represents a quotient of an expression by a scalar*/ -typedef CwiseUnaryOp, const Derived> ScalarQuotient1ReturnType; /** \internal the return type of conjugate() */ typedef typename internal::conditional::IsComplex, const CwiseUnaryOp, const Derived>, @@ -39,65 +33,29 @@ typedef CwiseUnaryOp, const Derived> ImagReturn typedef CwiseUnaryView, Derived> NonConstImagReturnType; typedef CwiseUnaryOp, const Derived> NegativeReturnType; -//typedef CwiseUnaryOp, const Derived> #endif // not EIGEN_PARSED_BY_DOXYGEN -/** \returns an expression of the opposite of \c *this - */ +/// \returns an expression of the opposite of \c *this +/// +EIGEN_DOC_UNARY_ADDONS(operator-,opposite) +/// EIGEN_DEVICE_FUNC inline const NegativeReturnType operator-() const { return NegativeReturnType(derived()); } -/** \returns an expression of \c *this scaled by the scalar factor \a scalar */ -EIGEN_DEVICE_FUNC -inline const ScalarMultipleReturnType -operator*(const Scalar& scalar) const -{ - return ScalarMultipleReturnType(derived(), internal::scalar_multiple_op(scalar)); -} - -#ifdef EIGEN_PARSED_BY_DOXYGEN -const ScalarMultipleReturnType operator*(const RealScalar& scalar) const; -#endif - -/** \returns an expression of \c *this divided by the scalar value \a scalar */ -EIGEN_DEVICE_FUNC -inline const ScalarQuotient1ReturnType -operator/(const Scalar& scalar) const -{ - return ScalarQuotient1ReturnType(derived(), internal::scalar_quotient1_op(scalar)); -} - -/** Overloaded for efficient real matrix times complex scalar value */ -EIGEN_DEVICE_FUNC -inline const ScalarComplexMultipleReturnType -operator*(const std::complex& scalar) const -{ - return ScalarComplexMultipleReturnType(derived(), internal::scalar_multiple2_op >(scalar)); -} - -EIGEN_DEVICE_FUNC -inline friend const ScalarMultipleReturnType -operator*(const Scalar& scalar, const StorageBaseType& matrix) -{ return matrix*scalar; } - -EIGEN_DEVICE_FUNC -inline friend const CwiseUnaryOp >, const Derived> -operator*(const std::complex& scalar, const StorageBaseType& matrix) -{ return matrix*scalar; } - - template struct CastXpr { typedef typename internal::cast_return_type, const Derived> >::type Type; }; -/** \returns an expression of *this with the \a Scalar type casted to - * \a NewScalar. - * - * The template parameter \a NewScalar is the type we are casting the scalars to. - * - * \sa class CwiseUnaryOp - */ +/// \returns an expression of \c *this with the \a Scalar type casted to +/// \a NewScalar. +/// +/// The template parameter \a NewScalar is the type we are casting the scalars to. +/// +EIGEN_DOC_UNARY_ADDONS(cast,conversion function) +/// +/// \sa class CwiseUnaryOp +/// template EIGEN_DEVICE_FUNC typename CastXpr::Type @@ -106,9 +64,11 @@ cast() const return typename CastXpr::Type(derived()); } -/** \returns an expression of the complex conjugate of \c *this. - * - * \sa adjoint() */ +/// \returns an expression of the complex conjugate of \c *this. +/// +EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate) +/// +/// \sa Math functions, MatrixBase::adjoint() EIGEN_DEVICE_FUNC inline ConjugateReturnType conjugate() const @@ -116,39 +76,45 @@ conjugate() const return ConjugateReturnType(derived()); } -/** \returns a read-only expression of the real part of \c *this. - * - * \sa imag() */ +/// \returns a read-only expression of the real part of \c *this. +/// +EIGEN_DOC_UNARY_ADDONS(real,real part function) +/// +/// \sa imag() EIGEN_DEVICE_FUNC inline RealReturnType real() const { return RealReturnType(derived()); } -/** \returns an read-only expression of the imaginary part of \c *this. - * - * \sa real() */ +/// \returns an read-only expression of the imaginary part of \c *this. +/// +EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function) +/// +/// \sa real() EIGEN_DEVICE_FUNC inline const ImagReturnType imag() const { return ImagReturnType(derived()); } -/** \brief Apply a unary operator coefficient-wise - * \param[in] func Functor implementing the unary operator - * \tparam CustomUnaryOp Type of \a func - * \returns An expression of a custom coefficient-wise unary operator \a func of *this - * - * The function \c ptr_fun() from the C++ standard library can be used to make functors out of normal functions. - * - * Example: - * \include class_CwiseUnaryOp_ptrfun.cpp - * Output: \verbinclude class_CwiseUnaryOp_ptrfun.out - * - * Genuine functors allow for more possibilities, for instance it may contain a state. - * - * Example: - * \include class_CwiseUnaryOp.cpp - * Output: \verbinclude class_CwiseUnaryOp.out - * - * \sa class CwiseUnaryOp, class CwiseBinaryOp - */ +/// \brief Apply a unary operator coefficient-wise +/// \param[in] func Functor implementing the unary operator +/// \tparam CustomUnaryOp Type of \a func +/// \returns An expression of a custom coefficient-wise unary operator \a func of *this +/// +/// The function \c ptr_fun() from the C++ standard library can be used to make functors out of normal functions. +/// +/// Example: +/// \include class_CwiseUnaryOp_ptrfun.cpp +/// Output: \verbinclude class_CwiseUnaryOp_ptrfun.out +/// +/// Genuine functors allow for more possibilities, for instance it may contain a state. +/// +/// Example: +/// \include class_CwiseUnaryOp.cpp +/// Output: \verbinclude class_CwiseUnaryOp.out +/// +EIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function) +/// +/// \sa unaryViewExpr, binaryExpr, class CwiseUnaryOp +/// template EIGEN_DEVICE_FUNC inline const CwiseUnaryOp @@ -157,17 +123,19 @@ unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const return CwiseUnaryOp(derived(), func); } -/** \returns an expression of a custom coefficient-wise unary operator \a func of *this - * - * The template parameter \a CustomUnaryOp is the type of the functor - * of the custom unary operator. - * - * Example: - * \include class_CwiseUnaryOp.cpp - * Output: \verbinclude class_CwiseUnaryOp.out - * - * \sa class CwiseUnaryOp, class CwiseBinaryOp - */ +/// \returns an expression of a custom coefficient-wise unary operator \a func of *this +/// +/// The template parameter \a CustomUnaryOp is the type of the functor +/// of the custom unary operator. +/// +/// Example: +/// \include class_CwiseUnaryOp.cpp +/// Output: \verbinclude class_CwiseUnaryOp.out +/// +EIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function) +/// +/// \sa unaryExpr, binaryExpr class CwiseUnaryOp +/// template EIGEN_DEVICE_FUNC inline const CwiseUnaryView @@ -176,16 +144,20 @@ unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const return CwiseUnaryView(derived(), func); } -/** \returns a non const expression of the real part of \c *this. - * - * \sa imag() */ +/// \returns a non const expression of the real part of \c *this. +/// +EIGEN_DOC_UNARY_ADDONS(real,real part function) +/// +/// \sa imag() EIGEN_DEVICE_FUNC inline NonConstRealReturnType real() { return NonConstRealReturnType(derived()); } -/** \returns a non const expression of the imaginary part of \c *this. - * - * \sa real() */ +/// \returns a non const expression of the imaginary part of \c *this. +/// +EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function) +/// +/// \sa real() EIGEN_DEVICE_FUNC inline NonConstImagReturnType imag() { return NonConstImagReturnType(derived()); } diff --git a/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/Eigen/src/plugins/MatrixCwiseBinaryOps.h index 6dd2e1192..f1084abef 100644 --- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h @@ -19,10 +19,10 @@ */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived) +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product) cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)(derived(), other.derived()); + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived()); } /** \returns an expression of the coefficient-wise == operator of *this and \a other @@ -74,10 +74,10 @@ cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise min of *this and scalar \a other @@ -85,7 +85,7 @@ cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * \sa class CwiseBinaryOp, min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> cwiseMin(const Scalar &other) const { return cwiseMin(Derived::Constant(rows(), cols(), other)); @@ -100,10 +100,10 @@ cwiseMin(const Scalar &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise max of *this and scalar \a other @@ -111,7 +111,7 @@ cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * \sa class CwiseBinaryOp, min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> cwiseMax(const Scalar &other) const { return cwiseMax(Derived::Constant(rows(), cols(), other)); @@ -133,7 +133,7 @@ cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } -typedef CwiseBinaryOp, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType; +typedef CwiseBinaryOp, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType; /** \returns an expression of the coefficient-wise == operator of \c *this and a scalar \a s * @@ -148,5 +148,5 @@ EIGEN_DEVICE_FUNC inline const CwiseScalarEqualReturnType cwiseEqual(const Scalar& s) const { - return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op()); + return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op()); } diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/Eigen/src/plugins/MatrixCwiseUnaryOps.h index e16bb374b..b1be3d566 100644 --- a/Eigen/src/plugins/MatrixCwiseUnaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.h @@ -11,63 +11,75 @@ // This file is included into the body of the base classes supporting matrix specific coefficient-wise functions. // This include MatrixBase and SparseMatrixBase. + typedef CwiseUnaryOp, const Derived> CwiseAbsReturnType; typedef CwiseUnaryOp, const Derived> CwiseAbs2ReturnType; typedef CwiseUnaryOp, const Derived> CwiseSqrtReturnType; typedef CwiseUnaryOp, const Derived> CwiseSignReturnType; typedef CwiseUnaryOp, const Derived> CwiseInverseReturnType; -/** \returns an expression of the coefficient-wise absolute value of \c *this - * - * Example: \include MatrixBase_cwiseAbs.cpp - * Output: \verbinclude MatrixBase_cwiseAbs.out - * - * \sa cwiseAbs2() - */ +/// \returns an expression of the coefficient-wise absolute value of \c *this +/// +/// Example: \include MatrixBase_cwiseAbs.cpp +/// Output: \verbinclude MatrixBase_cwiseAbs.out +/// +EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value) +/// +/// \sa cwiseAbs2() +/// EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseAbsReturnType cwiseAbs() const { return CwiseAbsReturnType(derived()); } -/** \returns an expression of the coefficient-wise squared absolute value of \c *this - * - * Example: \include MatrixBase_cwiseAbs2.cpp - * Output: \verbinclude MatrixBase_cwiseAbs2.out - * - * \sa cwiseAbs() - */ +/// \returns an expression of the coefficient-wise squared absolute value of \c *this +/// +/// Example: \include MatrixBase_cwiseAbs2.cpp +/// Output: \verbinclude MatrixBase_cwiseAbs2.out +/// +EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value) +/// +/// \sa cwiseAbs() +/// EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseAbs2ReturnType cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); } -/** \returns an expression of the coefficient-wise square root of *this. - * - * Example: \include MatrixBase_cwiseSqrt.cpp - * Output: \verbinclude MatrixBase_cwiseSqrt.out - * - * \sa cwisePow(), cwiseSquare() - */ +/// \returns an expression of the coefficient-wise square root of *this. +/// +/// Example: \include MatrixBase_cwiseSqrt.cpp +/// Output: \verbinclude MatrixBase_cwiseSqrt.out +/// +EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root) +/// +/// \sa cwisePow(), cwiseSquare() +/// EIGEN_DEVICE_FUNC inline const CwiseSqrtReturnType cwiseSqrt() const { return CwiseSqrtReturnType(derived()); } -/** \returns an expression of the coefficient-wise signum of *this. - * - * Example: \include MatrixBase_cwiseSign.cpp - * Output: \verbinclude MatrixBase_cwiseSign.out - * - */ +/// \returns an expression of the coefficient-wise signum of *this. +/// +/// Example: \include MatrixBase_cwiseSign.cpp +/// Output: \verbinclude MatrixBase_cwiseSign.out +/// +EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function) +/// EIGEN_DEVICE_FUNC inline const CwiseSignReturnType cwiseSign() const { return CwiseSignReturnType(derived()); } -/** \returns an expression of the coefficient-wise inverse of *this. - * - * Example: \include MatrixBase_cwiseInverse.cpp - * Output: \verbinclude MatrixBase_cwiseInverse.out - * - * \sa cwiseProduct() - */ +/// \returns an expression of the coefficient-wise inverse of *this. +/// +/// Example: \include MatrixBase_cwiseInverse.cpp +/// Output: \verbinclude MatrixBase_cwiseInverse.out +/// +EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse) +/// +/// \sa cwiseProduct() +/// EIGEN_DEVICE_FUNC inline const CwiseInverseReturnType cwiseInverse() const { return CwiseInverseReturnType(derived()); } + + diff --git a/bench/benchCholesky.cpp b/bench/benchCholesky.cpp index 42b3e1285..9a8e7cf63 100644 --- a/bench/benchCholesky.cpp +++ b/bench/benchCholesky.cpp @@ -31,7 +31,7 @@ __attribute__ ((noinline)) void benchLLT(const MatrixType& m) int rows = m.rows(); int cols = m.cols(); - int cost = 0; + double cost = 0; for (int j=0; j0; ++i) + for (int i=0; dynsizes[i]>0; ++i) benchLLT(Matrix(dynsizes[i],dynsizes[i])); benchLLT(Matrix()); diff --git a/bench/btl/libs/blaze/CMakeLists.txt b/bench/btl/libs/blaze/CMakeLists.txt index f8b1b2ec3..e99a0855c 100644 --- a/bench/btl/libs/blaze/CMakeLists.txt +++ b/bench/btl/libs/blaze/CMakeLists.txt @@ -1,10 +1,13 @@ find_package(BLAZE) -find_package(Boost) +find_package(Boost COMPONENTS system) if (BLAZE_FOUND AND Boost_FOUND) include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS}) btl_add_bench(btl_blaze main.cpp) + # Note: The newest blaze version requires C++14. + # Ideally, we should set this depending on the version of Blaze we found + set_property(TARGET btl_blaze PROPERTY CXX_STANDARD 14) if(BUILD_btl_blaze) - target_link_libraries(btl_blaze ${Boost_LIBRARIES} ${Boost_system_LIBRARY} /opt/local/lib/libboost_system-mt.a ) + target_link_libraries(btl_blaze ${Boost_LIBRARIES}) endif() endif () diff --git a/bench/dense_solvers.cpp b/bench/dense_solvers.cpp index aa4ff011f..24343dcd8 100644 --- a/bench/dense_solvers.cpp +++ b/bench/dense_solvers.cpp @@ -2,47 +2,74 @@ #include "BenchTimer.h" #include #include +#include #include +#include using namespace Eigen; -std::map > results; +std::map > results; +std::vector labels; +std::vector sizes; + +template +EIGEN_DONT_INLINE +void compute_norm_equation(Solver &solver, const MatrixType &A) { + if(A.rows()!=A.cols()) + solver.compute(A.transpose()*A); + else + solver.compute(A); +} + +template +EIGEN_DONT_INLINE +void compute(Solver &solver, const MatrixType &A) { + solver.compute(A); +} template -void bench(int id, int size = Size) +void bench(int id, int rows, int size = Size) { - typedef Matrix Mat; - Mat A(size,size); + typedef Matrix Mat; + typedef Matrix MatDyn; + typedef Matrix MatSquare; + Mat A(rows,size); A.setRandom(); - A = A*A.adjoint(); + if(rows==size) + A = A*A.adjoint(); BenchTimer t_llt, t_ldlt, t_lu, t_fplu, t_qr, t_cpqr, t_cod, t_fpqr, t_jsvd, t_bdcsvd; + + int svd_opt = ComputeThinU|ComputeThinV; - int tries = 3; + int tries = 5; int rep = 1000/size; if(rep==0) rep = 1; // rep = rep*rep; - LLT llt(A); - LDLT ldlt(A); - PartialPivLU lu(A); - FullPivLU fplu(A); - HouseholderQR qr(A); - ColPivHouseholderQR cpqr(A); - CompleteOrthogonalDecomposition cod(A); - FullPivHouseholderQR fpqr(A); - JacobiSVD jsvd(A.rows(),A.cols()); - BDCSVD bdcsvd(A.rows(),A.cols()); + LLT llt(size); + LDLT ldlt(size); + PartialPivLU lu(size); + FullPivLU fplu(size,size); + HouseholderQR qr(A.rows(),A.cols()); + ColPivHouseholderQR cpqr(A.rows(),A.cols()); + CompleteOrthogonalDecomposition cod(A.rows(),A.cols()); + FullPivHouseholderQR fpqr(A.rows(),A.cols()); + JacobiSVD jsvd(A.rows(),A.cols()); + BDCSVD bdcsvd(A.rows(),A.cols()); - BENCH(t_llt, tries, rep, llt.compute(A)); - BENCH(t_ldlt, tries, rep, ldlt.compute(A)); - BENCH(t_lu, tries, rep, lu.compute(A)); - BENCH(t_fplu, tries, rep, fplu.compute(A)); - BENCH(t_qr, tries, rep, qr.compute(A)); - BENCH(t_cpqr, tries, rep, cpqr.compute(A)); - BENCH(t_cod, tries, rep, cod.compute(A)); - BENCH(t_fpqr, tries, rep, fpqr.compute(A)); + BENCH(t_llt, tries, rep, compute_norm_equation(llt,A)); + BENCH(t_ldlt, tries, rep, compute_norm_equation(ldlt,A)); + BENCH(t_lu, tries, rep, compute_norm_equation(lu,A)); + if(size<=1000) + BENCH(t_fplu, tries, rep, compute_norm_equation(fplu,A)); + BENCH(t_qr, tries, rep, compute(qr,A)); + BENCH(t_cpqr, tries, rep, compute(cpqr,A)); + BENCH(t_cod, tries, rep, compute(cod,A)); + if(size*rows<=10000000) + BENCH(t_fpqr, tries, rep, compute(fpqr,A)); if(size<500) // JacobiSVD is really too slow for too large matrices - BENCH(t_jsvd, tries, rep, jsvd.compute(A,ComputeFullU|ComputeFullV)); - BENCH(t_bdcsvd, tries, rep, bdcsvd.compute(A,ComputeFullU|ComputeFullV)); + BENCH(t_jsvd, tries, rep, jsvd.compute(A,svd_opt)); +// if(size*rows<=20000000) + BENCH(t_bdcsvd, tries, rep, bdcsvd.compute(A,svd_opt)); results["LLT"][id] = t_llt.best(); results["LDLT"][id] = t_ldlt.best(); @@ -52,33 +79,108 @@ void bench(int id, int size = Size) results["ColPivHouseholderQR"][id] = t_cpqr.best(); results["CompleteOrthogonalDecomposition"][id] = t_cod.best(); results["FullPivHouseholderQR"][id] = t_fpqr.best(); - results["JacobiSVD"][id] = size<500 ? t_jsvd.best() : 0; + results["JacobiSVD"][id] = t_jsvd.best(); results["BDCSVD"][id] = t_bdcsvd.best(); } + int main() { + labels.push_back("LLT"); + labels.push_back("LDLT"); + labels.push_back("PartialPivLU"); + labels.push_back("FullPivLU"); + labels.push_back("HouseholderQR"); + labels.push_back("ColPivHouseholderQR"); + labels.push_back("CompleteOrthogonalDecomposition"); + labels.push_back("FullPivHouseholderQR"); + labels.push_back("JacobiSVD"); + labels.push_back("BDCSVD"); + + for(int i=0; i(0); - bench(1,medium); - bench(2,large); - bench(3,xl); - - IOFormat fmt(3, 0, " \t", "\n", "", ""); - - std::cout << "solver/size " << small << "\t" << medium << "\t" << large << "\t" << xl << "\n"; - std::cout << "LLT (ms) " << (results["LLT"]/1000.).format(fmt) << "\n"; - std::cout << "LDLT (%) " << (results["LDLT"]/results["LLT"]).format(fmt) << "\n"; - std::cout << "PartialPivLU (%) " << (results["PartialPivLU"]/results["LLT"]).format(fmt) << "\n"; - std::cout << "FullPivLU (%) " << (results["FullPivLU"]/results["LLT"]).format(fmt) << "\n"; - std::cout << "HouseholderQR (%) " << (results["HouseholderQR"]/results["LLT"]).format(fmt) << "\n"; - std::cout << "ColPivHouseholderQR (%) " << (results["ColPivHouseholderQR"]/results["LLT"]).format(fmt) << "\n"; - std::cout << "CompleteOrthogonalDecomposition (%) " << (results["CompleteOrthogonalDecomposition"]/results["LLT"]).format(fmt) << "\n"; - std::cout << "FullPivHouseholderQR (%) " << (results["FullPivHouseholderQR"]/results["LLT"]).format(fmt) << "\n"; - std::cout << "JacobiSVD (%) " << (results["JacobiSVD"]/results["LLT"]).format(fmt) << "\n"; - std::cout << "BDCSVD (%) " << (results["BDCSVD"]/results["LLT"]).format(fmt) << "\n"; + sizes.push_back(Array2i(small,small)); + sizes.push_back(Array2i(100,100)); + sizes.push_back(Array2i(1000,1000)); + sizes.push_back(Array2i(4000,4000)); + sizes.push_back(Array2i(10000,small)); + sizes.push_back(Array2i(10000,100)); + sizes.push_back(Array2i(10000,1000)); + sizes.push_back(Array2i(10000,4000)); + + using namespace std; + + for(int k=0; k(k,sizes[k](0),sizes[k](1)); + } + + cout.width(32); + cout << "solver/size"; + cout << " "; + for(int k=0; k=1e6) cout << "-"; + else cout << r(k); + cout << " "; + } + cout << endl; + } + + // HTML output + cout << "" << endl; + cout << "" << endl; + for(int k=0; k" << sizes[k](0) << "x" << sizes[k](1) << ""; + cout << "" << endl; + for(int i=0; i"; + ArrayXf r = (results[labels[i]]*100000.f).floor()/100.f; + for(int k=0; k=1e6) cout << ""; + else + { + cout << ""; + } + } + cout << "" << endl; + } + cout << "
solver/size
" << labels[i] << "-" << r(k); + if(i>0) + cout << " (x" << numext::round(10.f*results[labels[i]](k)/results["LLT"](k))/10.f << ")"; + if(i<4 && sizes[k](0)!=sizes[k](1)) + cout << " *"; + cout << "
" << endl; + +// cout << "LLT (ms) " << (results["LLT"]*1000.).format(fmt) << "\n"; +// cout << "LDLT (%) " << (results["LDLT"]/results["LLT"]).format(fmt) << "\n"; +// cout << "PartialPivLU (%) " << (results["PartialPivLU"]/results["LLT"]).format(fmt) << "\n"; +// cout << "FullPivLU (%) " << (results["FullPivLU"]/results["LLT"]).format(fmt) << "\n"; +// cout << "HouseholderQR (%) " << (results["HouseholderQR"]/results["LLT"]).format(fmt) << "\n"; +// cout << "ColPivHouseholderQR (%) " << (results["ColPivHouseholderQR"]/results["LLT"]).format(fmt) << "\n"; +// cout << "CompleteOrthogonalDecomposition (%) " << (results["CompleteOrthogonalDecomposition"]/results["LLT"]).format(fmt) << "\n"; +// cout << "FullPivHouseholderQR (%) " << (results["FullPivHouseholderQR"]/results["LLT"]).format(fmt) << "\n"; +// cout << "JacobiSVD (%) " << (results["JacobiSVD"]/results["LLT"]).format(fmt) << "\n"; +// cout << "BDCSVD (%) " << (results["BDCSVD"]/results["LLT"]).format(fmt) << "\n"; } diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index fb3e48e99..af8eb9b8f 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -42,6 +42,20 @@ before-evaluators 6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache 6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1. 7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) +7015:8aad8f35c955 # Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables +7016:a58d253e8c91 # Polish lookup tables generation +7018:9b27294a8186 # actual_panel_rows computation should always be resilient to parameters not consistent with the known L1 cache size, see comment +7019:c758b1e2c073 # Provide a empirical lookup table for blocking sizes measured on a Nexus 5. Only for float, only for Android on ARM 32bit for now. +7085:627e039fba68 # Bug 986: add support for coefficient-based product with 0 depth. +7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code 7591:09a8e2186610 # 3.3-alpha1 7650:b0f3c8f43025 # help clang inlining - +#8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) +8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes +8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path +8985:d935df21a082 # Remove the rotating kernel. +8988:6c2dc56e73b3 # Bug 256: enable vectorization with unaligned loads/stores. +9148:b8b8c421e36c # Relax mixing-type constraints for binary coefficient-wise operators +9174:d228bc282ac9 # merge +9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955 +9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775 diff --git a/bench/perf_monitoring/gemm/lazy_gemm.cpp b/bench/perf_monitoring/gemm/lazy_gemm.cpp index b443218d7..6dc370155 100644 --- a/bench/perf_monitoring/gemm/lazy_gemm.cpp +++ b/bench/perf_monitoring/gemm/lazy_gemm.cpp @@ -12,12 +12,13 @@ using namespace Eigen; typedef SCALAR Scalar; template -inline void lazy_gemm(const MatA &A, const MatB &B, MatC &C) +EIGEN_DONT_INLINE +void lazy_gemm(const MatA &A, const MatB &B, MatC &C) { - escape((void*)A.data()); - escape((void*)B.data()); +// escape((void*)A.data()); +// escape((void*)B.data()); C.noalias() += A.lazyProduct(B); - escape((void*)C.data()); +// escape((void*)C.data()); } template diff --git a/bench/perf_monitoring/gemm/make_plot.sh b/bench/perf_monitoring/gemm/make_plot.sh index 4d6053501..cd3214ac9 100755 --- a/bench/perf_monitoring/gemm/make_plot.sh +++ b/bench/perf_monitoring/gemm/make_plot.sh @@ -25,7 +25,7 @@ echo "set xtics rotate 1" >> $WHAT.gnuplot echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot -col=`cat settings.txt | wc -l` +col=`cat $bench"_settings.txt" | wc -l` echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot echo " " >> $WHAT.gnuplot diff --git a/bench/perf_monitoring/gemm/run.sh b/bench/perf_monitoring/gemm/run.sh index bfb4ecfac..9d6ee40bc 100755 --- a/bench/perf_monitoring/gemm/run.sh +++ b/bench/perf_monitoring/gemm/run.sh @@ -138,15 +138,15 @@ do done echo "Float:" -cat $PREFIX"s"$bench.out" -echo "" +cat $PREFIX"s""$bench.out" +echo " " echo "Double:" -cat $PREFIX"d"$bench.out" +cat $PREFIX"d""$bench.out" echo "" echo "Complex:" -cat $PREFIX"c"$bench.out" +cat $PREFIX"c""$bench.out" echo "" ./make_plot.sh $PREFIX"s"$bench $bench diff --git a/bench/tensors/README b/bench/tensors/README index 4398aa81b..803cb8ef8 100644 --- a/bench/tensors/README +++ b/bench/tensors/README @@ -1,12 +1,15 @@ -Each benchmark comes in 2 flavors: one that runs on CPU, and one that runs on GPU. +The tensor benchmark suite is made of several parts. + +The first part is a generic suite, in which each benchmark comes in 2 flavors: one that runs on CPU, and one that runs on GPU. To compile the floating point CPU benchmarks, simply call: g++ tensor_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu To compile the floating point GPU benchmarks, simply call: -nvcc tensor_benchmarks_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -arch compute_35 -o benchmarks_gpu +nvcc tensor_benchmarks_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -use_fast_math -ftz=true -arch compute_35 -o benchmarks_gpu +We also provide a version of the generic GPU tensor benchmarks that uses half floats (aka fp16) instead of regular floats. To compile these benchmarks, simply call the command line below. You'll need a recent GPU that supports compute capability 5.3 or higher to run them and nvcc 7.5 or higher to compile the code. +nvcc tensor_benchmarks_fp16_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -use_fast_math -ftz=true -arch compute_53 -o benchmarks_fp16_gpu -To compile the half float GPU benchmarks, simply call the command line below. You'll need a recent GPU that supports compute capability 5.3 or higher to run them and nvcc 7.5 or higher to compile the code. -nvcc tensor_benchmarks_fp16_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -arch compute_53 -o benchmarks_fp16_gpu - +last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call +g++ contraction_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu diff --git a/bench/tensors/contraction_benchmarks_cpu.cc b/bench/tensors/contraction_benchmarks_cpu.cc new file mode 100644 index 000000000..f9e57ad47 --- /dev/null +++ b/bench/tensors/contraction_benchmarks_cpu.cc @@ -0,0 +1,39 @@ +#define EIGEN_USE_THREADS + +#include + +#include "tensor_benchmarks.h" + +#define CREATE_THREAD_POOL(threads) \ +Eigen::ThreadPool pool(threads); \ +Eigen::ThreadPoolDevice device(&pool, threads); + + +// Contractions for number of threads ranging from 1 to 32 +// Dimensions are Rows, Cols, Depth +#define BM_ContractionCPU(D1, D2, D3) \ + static void BM_##Contraction##_##D1##x##D2##x##D3(int iters, int Threads) { \ + StopBenchmarkTiming(); \ + CREATE_THREAD_POOL(Threads); \ + BenchmarkSuite suite(device, D1, D2, D3); \ + suite.contraction(iters); \ + } \ + BENCHMARK_RANGE(BM_##Contraction##_##D1##x##D2##x##D3, 1, 32); + + +// Vector Matrix and Matrix Vector products +BM_ContractionCPU(1, 2000, 500); +BM_ContractionCPU(2000, 1, 500); + +// Various skinny matrices +BM_ContractionCPU(250, 3, 512); +BM_ContractionCPU(1500, 3, 512); + +BM_ContractionCPU(512, 800, 4); +BM_ContractionCPU(512, 80, 800); +BM_ContractionCPU(512, 80, 13522); +BM_ContractionCPU(1, 80, 13522); + +BM_ContractionCPU(3200, 512, 4); +BM_ContractionCPU(3200, 512, 80); +BM_ContractionCPU(3200, 80, 512); diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h index 62533a608..c2fb3dede 100644 --- a/bench/tensors/tensor_benchmarks.h +++ b/bench/tensors/tensor_benchmarks.h @@ -178,9 +178,14 @@ template class BenchmarkSuite { size_b[1] = m_; TensorMap, Eigen::Aligned> B(b_, size_b); +#if defined(EIGEN_HAS_INDEX_LIST) + Eigen::IndexPairList, + Eigen::type2indexpair<2, 1> > paddings; +#else Eigen::array, 2> paddings; paddings[0] = Eigen::IndexPair(0, 0); paddings[1] = Eigen::IndexPair(2, 1); +#endif StartBenchmarkTiming(); for (int iter = 0; iter < num_iters; ++iter) { @@ -368,7 +373,7 @@ template class BenchmarkSuite { const TensorMap, Eigen::Aligned> B( b_, input_size); Eigen::array output_size; - TensorMap, Eigen::Aligned> C( + TensorMap, Eigen::Aligned> C( c_, output_size); StartBenchmarkTiming(); diff --git a/bench/tensors/tensor_benchmarks_fp16_gpu.cu b/bench/tensors/tensor_benchmarks_fp16_gpu.cu index 14876556e..65784d0d6 100644 --- a/bench/tensors/tensor_benchmarks_fp16_gpu.cu +++ b/bench/tensors/tensor_benchmarks_fp16_gpu.cu @@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc); BM_FuncGPU(transcendentalFunc); BM_FuncGPU(rowReduction); BM_FuncGPU(colReduction); +BM_FuncGPU(fullReduction); // Contractions diff --git a/blas/PackedTriangularMatrixVector.h b/blas/PackedTriangularMatrixVector.h index e9886d56f..0039536a8 100644 --- a/blas/PackedTriangularMatrixVector.h +++ b/blas/PackedTriangularMatrixVector.h @@ -18,7 +18,7 @@ struct packed_triangular_matrix_vector_product; template struct packed_triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = (Mode & Lower) ==Lower, HasUnitDiag = (Mode & UnitDiag)==UnitDiag, @@ -47,7 +47,7 @@ struct packed_triangular_matrix_vector_product struct packed_triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = (Mode & Lower) ==Lower, HasUnitDiag = (Mode & UnitDiag)==UnitDiag, diff --git a/blas/single.cpp b/blas/single.cpp index 836e3eee2..20ea57d5c 100644 --- a/blas/single.cpp +++ b/blas/single.cpp @@ -19,4 +19,4 @@ #include "level3_impl.h" float BLASFUNC(sdsdot)(int* n, float* alpha, float* x, int* incx, float* y, int* incy) -{ return *alpha + BLASFUNC(dsdot)(n, x, incx, y, incy); } +{ return double(*alpha) + BLASFUNC(dsdot)(n, x, incx, y, incy); } diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 206f2d93d..f53f46087 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -440,6 +440,8 @@ macro(ei_get_compilerver_from_cxx_version_string VERSTRING CNAME CVER) set(${CNAME} "llvm-g++") elseif((ei_has_llvm) AND (ei_has_clang)) set(${CNAME} "llvm-clang++") + elseif(ei_has_clang) + set(${CNAME} "clang++") elseif(ei_has_icpc) set(${CNAME} "icpc") elseif(ei_has_gpp OR ei_has_gcc) diff --git a/cmake/FindEigen3.cmake b/cmake/FindEigen3.cmake index cea1afeab..9e9697860 100644 --- a/cmake/FindEigen3.cmake +++ b/cmake/FindEigen3.cmake @@ -66,16 +66,23 @@ if (EIGEN3_INCLUDE_DIR) set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) else (EIGEN3_INCLUDE_DIR) + + # search first if an Eigen3Config.cmake is available in the system, + # if successful this would set EIGEN3_INCLUDE_DIR and the rest of + # the script will work as usual + find_package(Eigen3 ${Eigen3_FIND_VERSION} NO_MODULE QUIET) - find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library - HINTS - ENV EIGEN3_ROOT - ENV EIGEN3_ROOT_DIR - PATHS - ${CMAKE_INSTALL_PREFIX}/include - ${KDE4_INCLUDE_DIR} - PATH_SUFFIXES eigen3 eigen - ) + if(NOT EIGEN3_INCLUDE_DIR) + find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library + HINTS + ENV EIGEN3_ROOT + ENV EIGEN3_ROOT_DIR + PATHS + ${CMAKE_INSTALL_PREFIX}/include + ${KDE4_INCLUDE_DIR} + PATH_SUFFIXES eigen3 eigen + ) + endif(NOT EIGEN3_INCLUDE_DIR) if(EIGEN3_INCLUDE_DIR) _eigen3_check_version() diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake index e4142fe4d..f38146e06 100644 --- a/cmake/FindSuperLU.cmake +++ b/cmake/FindSuperLU.cmake @@ -17,7 +17,10 @@ find_path(SUPERLU_INCLUDES SRC ) -find_library(SUPERLU_LIBRARIES NAMES "superlu_4.3" "superlu_4.2" "superlu_4.1" "superlu_4.0" "superlu_3.1" "superlu_3.0" "superlu" PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) +find_library(SUPERLU_LIBRARIES + NAMES "superlu_5.2.1" "superlu_5.2" "superlu_5.1.1" "superlu_5.1" "superlu_5.0" "superlu_4.3" "superlu_4.2" "superlu_4.1" "superlu_4.0" "superlu_3.1" "superlu_3.0" "superlu" + PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} + PATH_SUFFIXES lib) if(SUPERLU_INCLUDES AND SUPERLU_LIBRARIES) @@ -48,11 +51,25 @@ int main() { }" SUPERLU_HAS_CLEAN_ENUMS) -if(SUPERLU_HAS_CLEAN_ENUMS) +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main(void) +{ + GlobalLU_t glu; + return 0; +}" +SUPERLU_HAS_GLOBALLU_T) + +if(SUPERLU_HAS_GLOBALLU_T) + # at least 5.0 + set(SUPERLU_VERSION_VAR "5.0") +elseif(SUPERLU_HAS_CLEAN_ENUMS) # at least 4.3 set(SUPERLU_VERSION_VAR "4.3") elseif(SUPERLU_HAS_GLOBAL_MEM_USAGE_T) - # at least 4.3 + # at least 4.0 set(SUPERLU_VERSION_VAR "4.0") else() set(SUPERLU_VERSION_VAR "3.0") diff --git a/doc/A05_PortingFrom2To3.dox b/doc/A05_PortingFrom2To3.dox index 0dbddb976..51555f996 100644 --- a/doc/A05_PortingFrom2To3.dox +++ b/doc/A05_PortingFrom2To3.dox @@ -261,7 +261,7 @@ use it unless you are sure of what you are doing, i.e., you have rigourosly meas The EIGEN_ALIGN_128 macro has been renamed to EIGEN_ALIGN16. Don't be surprised, it's just that we switched to counting in bytes ;-) -The EIGEN_DONT_ALIGN option still exists in Eigen 3, but it has a new cousin: EIGEN_DONT_ALIGN_STATICALLY. It allows to get rid of all static alignment issues while keeping alignment of dynamic-size heap-allocated arrays, thus keeping vectorization for dynamic-size objects. +The \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_ALIGN \endlink option still exists in Eigen 3, but it has a new cousin: \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_ALIGN_STATICALLY.\endlink It allows to get rid of all static alignment issues while keeping alignment of dynamic-size heap-allocated arrays. Vectorization of statically allocated arrays is still preserved (unless you define \link TopicPreprocessorDirectivesPerformance EIGEN_UNALIGNED_VECTORIZE \endlink =0), at the cost of unaligned memory stores. \section AlignedMap Aligned Map objects diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 4d01a0424..db413bc65 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -78,6 +78,8 @@ add_custom_target( COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/html/ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/eigen_navtree_hacks.js ${CMAKE_CURRENT_BINARY_DIR}/html/ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/Eigen_Silly_Professor_64x64.png ${CMAKE_CURRENT_BINARY_DIR}/html/ + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ftv2pnode.png ${CMAKE_CURRENT_BINARY_DIR}/html/ + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ftv2node.png ${CMAKE_CURRENT_BINARY_DIR}/html/ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/AsciiQuickReference.txt ${CMAKE_CURRENT_BINARY_DIR}/html/ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) @@ -88,6 +90,8 @@ add_custom_target( COMMAND ${CMAKE_COMMAND} -E make_directory ${Eigen_BINARY_DIR}/doc/html/unsupported COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/eigen_navtree_hacks.js ${CMAKE_CURRENT_BINARY_DIR}/html/unsupported/ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/Eigen_Silly_Professor_64x64.png ${CMAKE_CURRENT_BINARY_DIR}/html/unsupported/ + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ftv2pnode.png ${CMAKE_CURRENT_BINARY_DIR}/html/unsupported/ + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ftv2node.png ${CMAKE_CURRENT_BINARY_DIR}/html/unsupported/ WORKING_DIRECTORY ${Eigen_BINARY_DIR}/doc ) diff --git a/doc/CoeffwiseMathFunctionsTable.dox b/doc/CoeffwiseMathFunctionsTable.dox new file mode 100644 index 000000000..ac6e0bd31 --- /dev/null +++ b/doc/CoeffwiseMathFunctionsTable.dox @@ -0,0 +1,525 @@ +namespace Eigen { + +/** \eigenManualPage CoeffwiseMathFunctions Catalog of coefficient-wise math functions + + + + +This table presents a catalog of the coefficient-wise math functions supported by %Eigen. +In this table, \c a, \c b, refer to Array objects or expressions, and \c m refers to a linear algebra Matrix/Vector object. Standard scalar types are abbreviated as follows: + - \c int: \c i32 + - \c float: \c f + - \c double: \c d + - \c std::complex: \c cf + - \c std::complex: \c cd + +For each row, the first column list the equivalent calls for arrays, and matrices when supported. Of course, all functions are available for matrices by first casting it as an array: \c m.array(). + +The third column gives some hints in the underlying scalar implementation. In most cases, %Eigen does not implement itself the math function but relies on the STL for standard scalar types, or user-provided functions for custom scalar types. +For instance, some simply calls the respective function of the STL while preserving argument-dependent lookup for custom types. +The following: +\code +using std::foo; +foo(a[i]); +\endcode +means that the STL's function \c std::foo will be potentially called if it is compatible with the underlying scalar type. If not, then the user must ensure that an overload of the function foo is available for the given scalar type (usually defined in the same namespace as the given scalar type). +This also means that, unless specified, if the function \c std::foo is available only in some recent c++ versions (e.g., c++11), then the respective %Eigen's function/method will be usable on standard types only if the compiler support the required c++ version. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
APIDescriptionDefault scalar implementationSIMD
Basic operations
+ \anchor cwisetable_abs + a.\link ArrayBase::abs abs\endlink(); \n + \link Eigen::abs abs\endlink(a); \n + m.\link MatrixBase::cwiseAbs cwiseAbs\endlink(); + absolute value (\f$ |a_i| \f$) + using std::abs; \n + abs(a[i]); + SSE2, AVX (i32,f,d)
+ \anchor cwisetable_inverse + a.\link ArrayBase::inverse inverse\endlink(); \n + \link Eigen::inverse inverse\endlink(a); \n + m.\link MatrixBase::cwiseInverse cwiseInverse\endlink(); + inverse value (\f$ 1/a_i \f$) + 1/a[i]; + All engines (f,d,fc,fd)
+ \anchor cwisetable_conj + a.\link ArrayBase::conjugate conjugate\endlink(); \n + \link Eigen::conj conj\endlink(a); \n + m.\link MatrixBase::conjugate conjugate(); + complex conjugate (\f$ \bar{a_i} \f$),\n + no-op for real + using std::conj; \n + conj(a[i]); + All engines (fc,fd)
Exponential functions
+ \anchor cwisetable_exp + a.\link ArrayBase::exp exp\endlink(); \n + \link Eigen::exp exp\endlink(a); + \f$ e \f$ raised to the given power (\f$ e^{a_i} \f$) + using std::exp; \n + exp(a[i]); + SSE2, AVX (f,d)
+ \anchor cwisetable_log + a.\link ArrayBase::log log\endlink(); \n + \link Eigen::log log\endlink(a); + natural (base \f$ e \f$) logarithm (\f$ \ln({a_i}) \f$) + using std::log; \n + log(a[i]); + SSE2, AVX (f)
+ \anchor cwisetable_log1p + a.\link ArrayBase::log1p log1p\endlink(); \n + \link Eigen::log1p log1p\endlink(a); + natural (base \f$ e \f$) logarithm of 1 plus \n the given number (\f$ \ln({1+a_i}) \f$)built-in generic implementation based on \c log,\n + plus \c using \c std::log1p ; \cpp11
+ \anchor cwisetable_log10 + a.\link ArrayBase::log10 log10\endlink(); \n + \link Eigen::log10 log10\endlink(a); + base 10 logarithm (\f$ \log_{10}({a_i}) \f$) + using std::log10; \n + log10(a[i]); +
Power functions
+ \anchor cwisetable_pow + a.\link ArrayBase::pow pow\endlink(b); \n + \link Eigen::pow pow\endlink(a,b); + raises a number to the given power (\f$ a_i ^ {b_i} \f$) \n \c a and \c b can be either an array or scalar. + using std::pow; \n + pow(a[i],b[i]);\n + (plus builtin for integer types)
+ \anchor cwisetable_sqrt + a.\link ArrayBase::sqrt sqrt\endlink(); \n + \link Eigen::sqrt sqrt\endlink(a);\n + m.\link MatrixBase::cwiseSqrt cwiseSqrt\endlink(); + computes square root (\f$ \sqrt a_i \f$) + using std::sqrt; \n + sqrt(a[i]);SSE2, AVX (f,d)
+ \anchor cwisetable_rsqrt + a.\link ArrayBase::rsqrt rsqrt\endlink(); \n + \link Eigen::rsqrt rsqrt\endlink(a); + reciprocal square root (\f$ 1/{\sqrt a_i} \f$) + using std::sqrt; \n + 1/sqrt(a[i]); \n + SSE2, AVX, AltiVec, ZVector (f,d)\n + (approx + 1 Newton iteration)
+ \anchor cwisetable_square + a.\link ArrayBase::square square\endlink(); \n + \link Eigen::square square\endlink(a); + computes square power (\f$ a_i^2 \f$) + a[i]*a[i]All (i32,f,d,cf,cd)
+ \anchor cwisetable_cube + a.\link ArrayBase::cube cube\endlink(); \n + \link Eigen::cube cube\endlink(a); + computes cubic power (\f$ a_i^3 \f$) + a[i]*a[i]*a[i]All (i32,f,d,cf,cd)
+ \anchor cwisetable_abs2 + a.\link ArrayBase::abs2 abs2\endlink(); \n + \link Eigen::abs2 abs2\endlink(a);\n + m.\link MatrixBase::cwiseAbs2 cwiseAbs2\endlink(); + computes the squared absolute value (\f$ |a_i|^2 \f$) + real: a[i]*a[i] \n + complex: real(a[i])*real(a[i]) \n +        + imag(a[i])*imag(a[i])All (i32,f,d)
Trigonometric functions
+ \anchor cwisetable_sin + a.\link ArrayBase::sin sin\endlink(); \n + \link Eigen::sin sin\endlink(a); + computes sine + using std::sin; \n + sin(a[i]);SSE2, AVX (f)
+ \anchor cwisetable_cos + a.\link ArrayBase::cos cos\endlink(); \n + \link Eigen::cos cos\endlink(a); + computes cosine + using std::cos; \n + cos(a[i]);SSE2, AVX (f)
+ \anchor cwisetable_tan + a.\link ArrayBase::tan tan\endlink(); \n + \link Eigen::tan tan\endlink(a); + computes tangent + using std::tan; \n + tan(a[i]);
+ \anchor cwisetable_asin + a.\link ArrayBase::asin asin\endlink(); \n + \link Eigen::asin asin\endlink(a); + computes arc sine (\f$ \sin^{-1} a_i \f$) + using std::asin; \n + asin(a[i]);
+ \anchor cwisetable_acos + a.\link ArrayBase::acos acos\endlink(); \n + \link Eigen::acos acos\endlink(a); + computes arc cosine (\f$ \cos^{-1} a_i \f$) + using std::acos; \n + acos(a[i]);
+ \anchor cwisetable_atan + a.\link ArrayBase::atan tan\endlink(); \n + \link Eigen::atan atan\endlink(a); + computes arc tangent (\f$ \tan^{-1} a_i \f$) + using std::atan; \n + atan(a[i]);
Hyperbolic functions
+ \anchor cwisetable_sinh + a.\link ArrayBase::sinh sinh\endlink(); \n + \link Eigen::sinh sinh\endlink(a); + computes hyperbolic sine + using std::sinh; \n + sinh(a[i]);
+ \anchor cwisetable_cosh + a.\link ArrayBase::cosh cohs\endlink(); \n + \link Eigen::cosh cosh\endlink(a); + computes hyperbolic cosine + using std::cosh; \n + cosh(a[i]);
+ \anchor cwisetable_tanh + a.\link ArrayBase::tanh tanh\endlink(); \n + \link Eigen::tanh tanh\endlink(a); + computes hyperbolic tangent + using std::tanh; \n + tanh(a[i]);
Nearest integer floating point operations
+ \anchor cwisetable_ceil + a.\link ArrayBase::ceil ceil\endlink(); \n + \link Eigen::ceil ceil\endlink(a); + nearest integer not less than the given value + using std::ceil; \n + ceil(a[i]);SSE4,AVX,ZVector (f,d)
+ \anchor cwisetable_floor + a.\link ArrayBase::floor floor\endlink(); \n + \link Eigen::floor floor\endlink(a); + nearest integer not greater than the given value + using std::floor; \n + floor(a[i]);SSE4,AVX,ZVector (f,d)
+ \anchor cwisetable_round + a.\link ArrayBase::round round\endlink(); \n + \link Eigen::round round\endlink(a); + nearest integer, \n rounding away from zero in halfway casesbuilt-in generic implementation \n based on \c floor and \c ceil,\n + plus \c using \c std::round ; \cpp11SSE4,AVX,ZVector (f,d)
Floating point manipulation functions
Classification and comparison
+ \anchor cwisetable_isfinite + a.\link ArrayBase::isfinite isfinite\endlink(); \n + \link Eigen::isfinite isfinite\endlink(a); + checks if the given number has finite valuebuilt-in generic implementation,\n + plus \c using \c std::isfinite ; \cpp11
+ \anchor cwisetable_isinf + a.\link ArrayBase::isinf isinf\endlink(); \n + \link Eigen::isinf isinf\endlink(a); + checks if the given number is infinitebuilt-in generic implementation,\n + plus \c using \c std::isinf ; \cpp11
+ \anchor cwisetable_isnan + a.\link ArrayBase::isnan isnan\endlink(); \n + \link Eigen::isnan isnan\endlink(a); + checks if the given number is not a numberbuilt-in generic implementation,\n + plus \c using \c std::isnan ; \cpp11
Error and gamma functions
Require \c #include \c
+ \anchor cwisetable_erf + a.\link ArrayBase::erf erf\endlink(); \n + \link Eigen::erf erf\endlink(a); + error function + using std::erf; \cpp11 \n + erf(a[i]); +
+ \anchor cwisetable_erfc + a.\link ArrayBase::erfc erfc\endlink(); \n + \link Eigen::erfc erfc\endlink(a); + complementary error function + using std::erfc; \cpp11 \n + erfc(a[i]); +
+ \anchor cwisetable_lgamma + a.\link ArrayBase::lgamma lgamma\endlink(); \n + \link Eigen::lgamma lgamma\endlink(a); + natural logarithm of the gamma function + using std::lgamma; \cpp11 \n + lgamma(a[i]); +
+ \anchor cwisetable_digamma + a.\link ArrayBase::digamma digamma\endlink(); \n + \link Eigen::digamma digamma\endlink(a); + logarithmic derivative of the gamma function + built-in for float and double +
+ \anchor cwisetable_igamma + \link Eigen::igamma igamma\endlink(a,x); + lower incomplete gamma integral + \n \f$ \gamma(a_i,x_i)= \frac{1}{|a_i|} \int_{0}^{x_i}e^{\text{-}t} t^{a_i-1} \mathrm{d} t \f$ + built-in for float and double,\n but requires \cpp11 +
+ \anchor cwisetable_igammac + \link Eigen::igammac igammac\endlink(a,x); + upper incomplete gamma integral + \n \f$ \Gamma(a_i,x_i) = \frac{1}{|a_i|} \int_{x_i}^{\infty}e^{\text{-}t} t^{a_i-1} \mathrm{d} t \f$ + built-in for float and double,\n but requires \cpp11 +
Special functions
Require \c #include \c
+ \anchor cwisetable_polygamma + \link Eigen::polygamma polygamma\endlink(n,x); + n-th derivative of digamma at x + built-in generic based on\n \c lgamma , + \c digamma + and \c zeta . +
+ \anchor cwisetable_betainc + \link Eigen::betainc betainc\endlink(a,b,x); + Incomplete beta function + built-in for float and double,\n but requires \cpp11 +
+ \anchor cwisetable_zeta + \link Eigen::zeta zeta\endlink(a,b); + Hurwitz zeta function + \n \f$ \zeta(a_i,b_i)=\sum_{k=0}^{\infty}(b_i+k)^{\text{-}a_i} \f$ + built-in for float and double +
+ +\n + +*/ + +} \ No newline at end of file diff --git a/doc/CustomizingEigen.dox b/doc/CustomizingEigen.dox deleted file mode 100644 index cb25f4ec9..000000000 --- a/doc/CustomizingEigen.dox +++ /dev/null @@ -1,226 +0,0 @@ -namespace Eigen { - -/** \page TopicCustomizingEigen Customizing/Extending Eigen - -Eigen can be extended in several ways, for instance, by defining global methods, \ref ExtendingMatrixBase "by adding custom methods to MatrixBase", adding support to \ref CustomScalarType "custom types" etc. - -\eigenAutoToc - -\section ExtendingMatrixBase Extending MatrixBase (and other classes) - -In this section we will see how to add custom methods to MatrixBase. Since all expressions and matrix types inherit MatrixBase, adding a method to MatrixBase make it immediately available to all expressions ! A typical use case is, for instance, to make Eigen compatible with another API. - -You certainly know that in C++ it is not possible to add methods to an existing class. So how that's possible ? Here the trick is to include in the declaration of MatrixBase a file defined by the preprocessor token \c EIGEN_MATRIXBASE_PLUGIN: -\code -class MatrixBase { - // ... - #ifdef EIGEN_MATRIXBASE_PLUGIN - #include EIGEN_MATRIXBASE_PLUGIN - #endif -}; -\endcode -Therefore to extend MatrixBase with your own methods you just have to create a file with your method declaration and define EIGEN_MATRIXBASE_PLUGIN before you include any Eigen's header file. - -You can extend many of the other classes used in Eigen by defining similarly named preprocessor symbols. For instance, define \c EIGEN_ARRAYBASE_PLUGIN if you want to extend the ArrayBase class. A full list of classes that can be extended in this way and the corresponding preprocessor symbols can be found on our page \ref TopicPreprocessorDirectives. - -Here is an example of an extension file for adding methods to MatrixBase: \n -\b MatrixBaseAddons.h -\code -inline Scalar at(uint i, uint j) const { return this->operator()(i,j); } -inline Scalar& at(uint i, uint j) { return this->operator()(i,j); } -inline Scalar at(uint i) const { return this->operator[](i); } -inline Scalar& at(uint i) { return this->operator[](i); } - -inline RealScalar squaredLength() const { return squaredNorm(); } -inline RealScalar length() const { return norm(); } -inline RealScalar invLength(void) const { return fast_inv_sqrt(squaredNorm()); } - -template -inline Scalar squaredDistanceTo(const MatrixBase& other) const -{ return (derived() - other.derived()).squaredNorm(); } - -template -inline RealScalar distanceTo(const MatrixBase& other) const -{ return internal::sqrt(derived().squaredDistanceTo(other)); } - -inline void scaleTo(RealScalar l) { RealScalar vl = norm(); if (vl>1e-9) derived() *= (l/vl); } - -inline Transpose transposed() {return this->transpose();} -inline const Transpose transposed() const {return this->transpose();} - -inline uint minComponentId(void) const { int i; this->minCoeff(&i); return i; } -inline uint maxComponentId(void) const { int i; this->maxCoeff(&i); return i; } - -template -void makeFloor(const MatrixBase& other) { derived() = derived().cwiseMin(other.derived()); } -template -void makeCeil(const MatrixBase& other) { derived() = derived().cwiseMax(other.derived()); } - -const CwiseUnaryOp, Derived> -operator+(const Scalar& scalar) const -{ return CwiseUnaryOp, Derived>(derived(), internal::scalar_add_op(scalar)); } - -friend const CwiseUnaryOp, Derived> -operator+(const Scalar& scalar, const MatrixBase& mat) -{ return CwiseUnaryOp, Derived>(mat.derived(), internal::scalar_add_op(scalar)); } -\endcode - -Then one can the following declaration in the config.h or whatever prerequisites header file of his project: -\code -#define EIGEN_MATRIXBASE_PLUGIN "MatrixBaseAddons.h" -\endcode - -\section InheritingFromMatrix Inheriting from Matrix - -Before inheriting from Matrix, be really, I mean REALLY, sure that using -EIGEN_MATRIX_PLUGIN is not what you really want (see previous section). -If you just need to add few members to Matrix, this is the way to go. - -An example of when you actually need to inherit Matrix, is when you -have several layers of heritage such as -MyVerySpecificVector1, MyVerySpecificVector2 -> MyVector1 -> Matrix and -MyVerySpecificVector3, MyVerySpecificVector4 -> MyVector2 -> Matrix. - -In order for your object to work within the %Eigen framework, you need to -define a few members in your inherited class. - -Here is a minimalistic example: - -\include CustomizingEigen_Inheritance.cpp - -Output: \verbinclude CustomizingEigen_Inheritance.out - -This is the kind of error you can get if you don't provide those methods -\verbatim -error: no match for ‘operator=’ in ‘v = Eigen::operator*( -const Eigen::MatrixBase >::Scalar&, -const Eigen::MatrixBase >::StorageBaseType&) -(((const Eigen::MatrixBase >::StorageBaseType&) -((const Eigen::MatrixBase >::StorageBaseType*)(& v))))’ -\endverbatim - -\anchor user_defined_scalars \section CustomScalarType Using custom scalar types - -By default, Eigen currently supports standard floating-point types (\c float, \c double, \c std::complex, \c std::complex, \c long \c double), as well as all native integer types (e.g., \c int, \c unsigned \c int, \c short, etc.), and \c bool. -On x86-64 systems, \c long \c double permits to locally enforces the use of x87 registers with extended accuracy (in comparison to SSE). - -In order to add support for a custom type \c T you need: --# make sure the common operator (+,-,*,/,etc.) are supported by the type \c T --# add a specialization of struct Eigen::NumTraits (see \ref NumTraits) --# define the math functions that makes sense for your type. This includes standard ones like sqrt, pow, sin, tan, conj, real, imag, etc, as well as abs2 which is Eigen specific. - (see the file Eigen/src/Core/MathFunctions.h) - -The math function should be defined in the same namespace than \c T, or in the \c std namespace though that second approach is not recommended. - -Here is a concrete example adding support for the Adolc's \c adouble type. Adolc is an automatic differentiation library. The type \c adouble is basically a real value tracking the values of any number of partial derivatives. - -\code -#ifndef ADOLCSUPPORT_H -#define ADOLCSUPPORT_H - -#define ADOLC_TAPELESS -#include -#include - -namespace Eigen { - -template<> struct NumTraits - : NumTraits // permits to get the epsilon, dummy_precision, lowest, highest functions -{ - typedef adtl::adouble Real; - typedef adtl::adouble NonInteger; - typedef adtl::adouble Nested; - - enum { - IsComplex = 0, - IsInteger = 0, - IsSigned = 1, - RequireInitialization = 1, - ReadCost = 1, - AddCost = 3, - MulCost = 3 - }; -}; - -} - -namespace adtl { - -inline const adouble& conj(const adouble& x) { return x; } -inline const adouble& real(const adouble& x) { return x; } -inline adouble imag(const adouble&) { return 0.; } -inline adouble abs(const adouble& x) { return fabs(x); } -inline adouble abs2(const adouble& x) { return x*x; } - -} - -#endif // ADOLCSUPPORT_H -\endcode - -This other example adds support for the \c mpq_class type from GMP. It shows in particular how to change the way Eigen picks the best pivot during LU factorization. It selects the coefficient with the highest score, where the score is by default the absolute value of a number, but we can define a different score, for instance to prefer pivots with a more compact representation (this is an example, not a recommendation). Note that the scores should always be non-negative and only zero is allowed to have a score of zero. Also, this can interact badly with thresholds for inexact scalar types. - -\code -#include -#include -#include - -namespace Eigen { - template struct NumTraits; - template<> struct NumTraits - { - typedef mpq_class Real; - typedef mpq_class NonInteger; - typedef mpq_class Nested; - - static inline Real epsilon() { return 0; } - static inline Real dummy_precision() { return 0; } - - enum { - IsInteger = 0, - IsSigned = 1, - IsComplex = 0, - RequireInitialization = 1, - ReadCost = 6, - AddCost = 150, - MulCost = 100 - }; - }; - - namespace internal { - template<> - struct significant_decimals_impl - { - // Infinite precision when printing - static inline int run() { return 0; } - }; - - template<> struct scalar_score_coeff_op { - struct result_type : boost::totally_ordered1 { - std::size_t len; - result_type(int i = 0) : len(i) {} // Eigen uses Score(0) and Score() - result_type(mpq_class const& q) : - len(mpz_size(q.get_num_mpz_t())+ - mpz_size(q.get_den_mpz_t())-1) {} - friend bool operator<(result_type x, result_type y) { - // 0 is the worst possible pivot - if (x.len == 0) return y.len > 0; - if (y.len == 0) return false; - // Prefer a pivot with a small representation - return x.len > y.len; - } - friend bool operator==(result_type x, result_type y) { - // Only used to test if the score is 0 - return x.len == y.len; - } - }; - result_type operator()(mpq_class const& x) const { return x; } - }; - } -} -\endcode - -\sa \ref TopicPreprocessorDirectives - -*/ - -} diff --git a/doc/CustomizingEigen_CustomScalar.dox b/doc/CustomizingEigen_CustomScalar.dox new file mode 100644 index 000000000..1ee78cbe5 --- /dev/null +++ b/doc/CustomizingEigen_CustomScalar.dox @@ -0,0 +1,120 @@ +namespace Eigen { + +/** \page TopicCustomizing_CustomScalar Using custom scalar types +\anchor user_defined_scalars + +By default, Eigen currently supports standard floating-point types (\c float, \c double, \c std::complex, \c std::complex, \c long \c double), as well as all native integer types (e.g., \c int, \c unsigned \c int, \c short, etc.), and \c bool. +On x86-64 systems, \c long \c double permits to locally enforces the use of x87 registers with extended accuracy (in comparison to SSE). + +In order to add support for a custom type \c T you need: +-# make sure the common operator (+,-,*,/,etc.) are supported by the type \c T +-# add a specialization of struct Eigen::NumTraits (see \ref NumTraits) +-# define the math functions that makes sense for your type. This includes standard ones like sqrt, pow, sin, tan, conj, real, imag, etc, as well as abs2 which is Eigen specific. + (see the file Eigen/src/Core/MathFunctions.h) + +The math function should be defined in the same namespace than \c T, or in the \c std namespace though that second approach is not recommended. + +Here is a concrete example adding support for the Adolc's \c adouble type. Adolc is an automatic differentiation library. The type \c adouble is basically a real value tracking the values of any number of partial derivatives. + +\code +#ifndef ADOLCSUPPORT_H +#define ADOLCSUPPORT_H + +#define ADOLC_TAPELESS +#include +#include + +namespace Eigen { + +template<> struct NumTraits + : NumTraits // permits to get the epsilon, dummy_precision, lowest, highest functions +{ + typedef adtl::adouble Real; + typedef adtl::adouble NonInteger; + typedef adtl::adouble Nested; + + enum { + IsComplex = 0, + IsInteger = 0, + IsSigned = 1, + RequireInitialization = 1, + ReadCost = 1, + AddCost = 3, + MulCost = 3 + }; +}; + +} + +namespace adtl { + +inline const adouble& conj(const adouble& x) { return x; } +inline const adouble& real(const adouble& x) { return x; } +inline adouble imag(const adouble&) { return 0.; } +inline adouble abs(const adouble& x) { return fabs(x); } +inline adouble abs2(const adouble& x) { return x*x; } + +} + +#endif // ADOLCSUPPORT_H +\endcode + +This other example adds support for the \c mpq_class type from GMP. It shows in particular how to change the way Eigen picks the best pivot during LU factorization. It selects the coefficient with the highest score, where the score is by default the absolute value of a number, but we can define a different score, for instance to prefer pivots with a more compact representation (this is an example, not a recommendation). Note that the scores should always be non-negative and only zero is allowed to have a score of zero. Also, this can interact badly with thresholds for inexact scalar types. + +\code +#include +#include +#include + +namespace Eigen { + template<> struct NumTraits : GenericNumTraits + { + typedef mpq_class Real; + typedef mpq_class NonInteger; + typedef mpq_class Nested; + + static inline Real epsilon() { return 0; } + static inline Real dummy_precision() { return 0; } + static inline Real digits10() { return 0; } + + enum { + IsInteger = 0, + IsSigned = 1, + IsComplex = 0, + RequireInitialization = 1, + ReadCost = 6, + AddCost = 150, + MulCost = 100 + }; + }; + + namespace internal { + + template<> struct scalar_score_coeff_op { + struct result_type : boost::totally_ordered1 { + std::size_t len; + result_type(int i = 0) : len(i) {} // Eigen uses Score(0) and Score() + result_type(mpq_class const& q) : + len(mpz_size(q.get_num_mpz_t())+ + mpz_size(q.get_den_mpz_t())-1) {} + friend bool operator<(result_type x, result_type y) { + // 0 is the worst possible pivot + if (x.len == 0) return y.len > 0; + if (y.len == 0) return false; + // Prefer a pivot with a small representation + return x.len > y.len; + } + friend bool operator==(result_type x, result_type y) { + // Only used to test if the score is 0 + return x.len == y.len; + } + }; + result_type operator()(mpq_class const& x) const { return x; } + }; + } +} +\endcode + +*/ + +} diff --git a/doc/CustomizingEigen_InheritingMatrix.dox b/doc/CustomizingEigen_InheritingMatrix.dox new file mode 100644 index 000000000..b21e55433 --- /dev/null +++ b/doc/CustomizingEigen_InheritingMatrix.dox @@ -0,0 +1,34 @@ +namespace Eigen { + +/** \page TopicCustomizing_InheritingMatrix Inheriting from Matrix + +Before inheriting from Matrix, be really, I mean REALLY, sure that using +EIGEN_MATRIX_PLUGIN is not what you really want (see previous section). +If you just need to add few members to Matrix, this is the way to go. + +An example of when you actually need to inherit Matrix, is when you +have several layers of heritage such as +MyVerySpecificVector1, MyVerySpecificVector2 -> MyVector1 -> Matrix and +MyVerySpecificVector3, MyVerySpecificVector4 -> MyVector2 -> Matrix. + +In order for your object to work within the %Eigen framework, you need to +define a few members in your inherited class. + +Here is a minimalistic example: + +\include CustomizingEigen_Inheritance.cpp + +Output: \verbinclude CustomizingEigen_Inheritance.out + +This is the kind of error you can get if you don't provide those methods +\verbatim +error: no match for ‘operator=’ in ‘v = Eigen::operator*( +const Eigen::MatrixBase >::Scalar&, +const Eigen::MatrixBase >::StorageBaseType&) +(((const Eigen::MatrixBase >::StorageBaseType&) +((const Eigen::MatrixBase >::StorageBaseType*)(& v))))’ +\endverbatim + +*/ + +} diff --git a/doc/CustomizingEigen_NullaryExpr.dox b/doc/CustomizingEigen_NullaryExpr.dox new file mode 100644 index 000000000..37c8dcd89 --- /dev/null +++ b/doc/CustomizingEigen_NullaryExpr.dox @@ -0,0 +1,86 @@ +namespace Eigen { + +/** \page TopicCustomizing_NullaryExpr Matrix manipulation via nullary-expressions + + +The main purpose of the class CwiseNullaryOp is to define \em procedural matrices such as constant or random matrices as returned by the Ones(), Zero(), Constant(), Identity() and Random() methods. +Nevertheless, with some imagination it is possible to accomplish very sophisticated matrix manipulation with minimal efforts such that \ref TopicNewExpressionType "implementing new expression" is rarely needed. + +\section NullaryExpr_Circulant Example 1: circulant matrix + +To explore these possibilities let us start with the \em circulant example of the \ref TopicNewExpressionType "implementing new expression" topic. +Let us recall that a circulant matrix is a matrix where each column is the same as the +column to the left, except that it is cyclically shifted downwards. +For example, here is a 4-by-4 circulant matrix: +\f[ \begin{bmatrix} + 1 & 8 & 4 & 2 \\ + 2 & 1 & 8 & 4 \\ + 4 & 2 & 1 & 8 \\ + 8 & 4 & 2 & 1 +\end{bmatrix} \f] +A circulant matrix is uniquely determined by its first column. We wish +to write a function \c makeCirculant which, given the first column, +returns an expression representing the circulant matrix. + +For this exercise, the return type of \c makeCirculant will be a CwiseNullaryOp that we need to instantiate with: +1 - a proper \c circulant_functor storing the input vector and implementing the adequate coefficient accessor \c operator(i,j) +2 - a template instantiation of class Matrix conveying compile-time information such as the scalar type, sizes, and preferred storage layout. + +Calling \c ArgType the type of the input vector, we can construct the equivalent squared Matrix type as follows: + +\snippet make_circulant2.cpp square + +This little helper structure will help us to implement our \c makeCirculant function as follows: + +\snippet make_circulant2.cpp makeCirculant + +As usual, our function takes as argument a \c MatrixBase (see this \ref TopicFunctionTakingEigenTypes "page" for more details). +Then, the CwiseNullaryOp object is constructed through the DenseBase::NullaryExpr static method with the adequate runtime sizes. + +Then, we need to implement our \c circulant_functor, which is a straightforward exercise: + +\snippet make_circulant2.cpp circulant_func + +We are now all set to try our new feature: + +\snippet make_circulant2.cpp main + + +If all the fragments are combined, the following output is produced, +showing that the program works as expected: + +\include make_circulant2.out + +This implementation of \c makeCirculant is much simpler than \ref TopicNewExpressionType "defining a new expression" from scratch. + + +\section NullaryExpr_Indexing Example 2: indexing rows and columns + +The goal here is to mimic MatLab's ability to index a matrix through two vectors of indices referencing the rows and columns to be picked respectively, like this: + +\snippet nullary_indexing.out main1 + +To this end, let us first write a nullary-functor storing references to the input matrix and to the two arrays of indices, and implementing the required \c operator()(i,j): + +\snippet nullary_indexing.cpp functor + +Then, let's create an \c indexing(A,rows,cols) function creating the nullary expression: + +\snippet nullary_indexing.cpp function + +Finally, here is an example of how this function can be used: + +\snippet nullary_indexing.cpp main1 + +This straightforward implementation is already quite powerful as the row or column index arrays can also be expressions to perform offsetting, modulo, striding, reverse, etc. + +\snippet nullary_indexing.cpp main2 + +and the output is: + +\snippet nullary_indexing.out main2 + +*/ + +} + diff --git a/doc/CustomizingEigen_Plugins.dox b/doc/CustomizingEigen_Plugins.dox new file mode 100644 index 000000000..d88f2409b --- /dev/null +++ b/doc/CustomizingEigen_Plugins.dox @@ -0,0 +1,69 @@ +namespace Eigen { + +/** \page TopicCustomizing_Plugins Extending MatrixBase (and other classes) + +In this section we will see how to add custom methods to MatrixBase. Since all expressions and matrix types inherit MatrixBase, adding a method to MatrixBase make it immediately available to all expressions ! A typical use case is, for instance, to make Eigen compatible with another API. + +You certainly know that in C++ it is not possible to add methods to an existing class. So how that's possible ? Here the trick is to include in the declaration of MatrixBase a file defined by the preprocessor token \c EIGEN_MATRIXBASE_PLUGIN: +\code +class MatrixBase { + // ... + #ifdef EIGEN_MATRIXBASE_PLUGIN + #include EIGEN_MATRIXBASE_PLUGIN + #endif +}; +\endcode +Therefore to extend MatrixBase with your own methods you just have to create a file with your method declaration and define EIGEN_MATRIXBASE_PLUGIN before you include any Eigen's header file. + +You can extend many of the other classes used in Eigen by defining similarly named preprocessor symbols. For instance, define \c EIGEN_ARRAYBASE_PLUGIN if you want to extend the ArrayBase class. A full list of classes that can be extended in this way and the corresponding preprocessor symbols can be found on our page \ref TopicPreprocessorDirectives. + +Here is an example of an extension file for adding methods to MatrixBase: \n +\b MatrixBaseAddons.h +\code +inline Scalar at(uint i, uint j) const { return this->operator()(i,j); } +inline Scalar& at(uint i, uint j) { return this->operator()(i,j); } +inline Scalar at(uint i) const { return this->operator[](i); } +inline Scalar& at(uint i) { return this->operator[](i); } + +inline RealScalar squaredLength() const { return squaredNorm(); } +inline RealScalar length() const { return norm(); } +inline RealScalar invLength(void) const { return fast_inv_sqrt(squaredNorm()); } + +template +inline Scalar squaredDistanceTo(const MatrixBase& other) const +{ return (derived() - other.derived()).squaredNorm(); } + +template +inline RealScalar distanceTo(const MatrixBase& other) const +{ return internal::sqrt(derived().squaredDistanceTo(other)); } + +inline void scaleTo(RealScalar l) { RealScalar vl = norm(); if (vl>1e-9) derived() *= (l/vl); } + +inline Transpose transposed() {return this->transpose();} +inline const Transpose transposed() const {return this->transpose();} + +inline uint minComponentId(void) const { int i; this->minCoeff(&i); return i; } +inline uint maxComponentId(void) const { int i; this->maxCoeff(&i); return i; } + +template +void makeFloor(const MatrixBase& other) { derived() = derived().cwiseMin(other.derived()); } +template +void makeCeil(const MatrixBase& other) { derived() = derived().cwiseMax(other.derived()); } + +const CwiseBinaryOp, const Derived, const ConstantReturnType> +operator+(const Scalar& scalar) const +{ return CwiseBinaryOp, const Derived, const ConstantReturnType>(derived(), Constant(rows(),cols(),scalar)); } + +friend const CwiseBinaryOp, const ConstantReturnType, Derived> +operator+(const Scalar& scalar, const MatrixBase& mat) +{ return CwiseBinaryOp, const ConstantReturnType, Derived>(Constant(rows(),cols(),scalar), mat.derived()); } +\endcode + +Then one can the following declaration in the config.h or whatever prerequisites header file of his project: +\code +#define EIGEN_MATRIXBASE_PLUGIN "MatrixBaseAddons.h" +\endcode + +*/ + +} diff --git a/doc/DenseDecompositionBenchmark.dox b/doc/DenseDecompositionBenchmark.dox new file mode 100644 index 000000000..7be9c70cd --- /dev/null +++ b/doc/DenseDecompositionBenchmark.dox @@ -0,0 +1,42 @@ +namespace Eigen { + +/** \eigenManualPage DenseDecompositionBenchmark Benchmark of dense decompositions + +This page presents a speed comparison of the dense matrix decompositions offered by %Eigen for a wide range of square matrices and overconstrained problems. + +For a more general overview on the features and numerical robustness of linear solvers and decompositions, check this \link TopicLinearAlgebraDecompositions table \endlink. + +This benchmark has been run on a laptop equipped with an Intel core i7 \@ 2,6 GHz, and compiled with clang with \b AVX and \b FMA instruction sets enabled but without multi-threading. +It uses \b single \b precision \b float numbers. For double, you can get a good estimate by multiplying the timings by a factor 2. + +The square matrices are symmetric, and for the overconstrained matrices, the reported timmings include the cost to compute the symmetric covariance matrix \f$ A^T A \f$ for the first four solvers based on Cholesky and LU, as denoted by the \b * symbol (top-right corner part of the table). +Timings are in \b milliseconds, and factors are relative to the LLT decomposition which is the fastest but also the least general and robust. + + + + + + + + + + + + + + +
solver/size8x8 100x100 1000x1000 4000x4000 10000x8 10000x100 10000x1000 10000x4000
LLT0.050.425.83374.556.79 *30.15 *236.34 *3847.17 *
LDLT0.07 (x1.3)0.65 (x1.5)26.86 (x4.6)2361.18 (x6.3)6.81 (x1) *31.91 (x1.1) *252.61 (x1.1) *5807.66 (x1.5) *
PartialPivLU0.08 (x1.5)0.69 (x1.6)15.63 (x2.7)709.32 (x1.9)6.81 (x1) *31.32 (x1) *241.68 (x1) *4270.48 (x1.1) *
FullPivLU0.1 (x1.9)4.48 (x10.6)281.33 (x48.2)-6.83 (x1) *32.67 (x1.1) *498.25 (x2.1) *-
HouseholderQR0.19 (x3.5)2.18 (x5.2)23.42 (x4)1337.52 (x3.6)34.26 (x5)129.01 (x4.3)377.37 (x1.6)4839.1 (x1.3)
ColPivHouseholderQR0.23 (x4.3)2.23 (x5.3)103.34 (x17.7)9987.16 (x26.7)36.05 (x5.3)163.18 (x5.4)2354.08 (x10)37860.5 (x9.8)
CompleteOrthogonalDecomposition0.23 (x4.3)2.22 (x5.2)99.44 (x17.1)10555.3 (x28.2)35.75 (x5.3)169.39 (x5.6)2150.56 (x9.1)36981.8 (x9.6)
FullPivHouseholderQR0.23 (x4.3)4.64 (x11)289.1 (x49.6)-69.38 (x10.2)446.73 (x14.8)4852.12 (x20.5)-
JacobiSVD1.01 (x18.6)71.43 (x168.4)--113.81 (x16.7)1179.66 (x39.1)--
BDCSVD1.07 (x19.7)21.83 (x51.5)331.77 (x56.9)18587.9 (x49.6)110.53 (x16.3)397.67 (x13.2)2975 (x12.6)48593.2 (x12.6)
+ +\b *: This decomposition do not support direct least-square solving for over-constrained problems, and the reported timing include the cost to form the symmetric covariance matrix \f$ A^T A \f$. + +\b Observations: + + LLT is always the fastest solvers. + + For largely over-constrained problems, the cost of Cholesky/LU decompositions is dominated by the computation of the symmetric covariance matrix. + + For large problem sizes, only the decomposition implementing a cache-friendly blocking strategy scale well. Those include LLT, PartialPivLU, HouseholderQR, and BDCSVD. This explain why for a 4k x 4k matrix, HouseholderQR is faster than LDLT. In the future, LDLT and ColPivHouseholderQR will also implement blocking strategies. + + CompleteOrthogonalDecomposition is based on ColPivHouseholderQR and they thus achieve the same level of performance. + +The above table has been generated by the bench/dense_solvers.cpp file, feel-free to hack it to generate a table matching your hardware, compiler, and favorite problem sizes. + +*/ + +} diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 0a43c7c4e..e9b116d28 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -125,7 +125,7 @@ ALWAYS_DETAILED_SEC = NO # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. -INLINE_INHERITED_MEMB = YES +INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set @@ -216,6 +216,7 @@ ALIASES = "only_for_vectors=This is only for vectors (either row- "lu_module=This is defined in the %LU module. \code #include \endcode" \ "qr_module=This is defined in the %QR module. \code #include \endcode" \ "svd_module=This is defined in the %SVD module. \code #include \endcode" \ + "specialfunctions_module=This is defined in the \b unsupported SpecialFunctions module. \code #include \endcode" \ "label=\bug" \ "matrixworld=*" \ "arrayworld=*" \ @@ -225,7 +226,10 @@ ALIASES = "only_for_vectors=This is only for vectors (either row- "note_try_to_help_rvo=This function returns the result by value. In order to make that efficient, it is implemented as just a return statement using a special constructor, hopefully allowing the compiler to perform a RVO (return value optimization)." \ "nonstableyet=\warning This is not considered to be part of the stable public API yet. Changes may happen in future releases. See \ref Experimental \"Experimental parts of Eigen\"" \ "implsparsesolverconcept=This class follows the \link TutorialSparseSolverConcept sparse solver concept \endlink." \ - "blank= " + "blank= " \ + "cpp11=[c++11]" \ + "cpp14=[c++14]" \ + "cpp17=[c++17]" ALIASES += "eigenAutoToc= " @@ -1587,7 +1591,8 @@ PREDEFINED = EIGEN_EMPTY_STRUCT \ EIGEN_STRONG_INLINE=inline \ EIGEN_DEVICE_FUNC= \ "EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR)=template const CwiseBinaryOp, const Derived, const OtherDerived> METHOD(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const;" \ - "EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS)=CwiseBinaryOp, const LHS, const RHS>" + "EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS)=CwiseBinaryOp, const LHS, const RHS>"\ + DOXCOMMA=, # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. @@ -1602,7 +1607,15 @@ EXPAND_AS_DEFINED = EIGEN_MAKE_TYPEDEFS \ EIGEN_CWISE_BINOP_RETURN_TYPE \ EIGEN_CURRENT_STORAGE_BASE_CLASS \ EIGEN_MATHFUNC_IMPL \ - _EIGEN_GENERIC_PUBLIC_INTERFACE + _EIGEN_GENERIC_PUBLIC_INTERFACE \ + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY \ + EIGEN_EMPTY \ + EIGEN_EULER_ANGLES_TYPEDEFS \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF \ + EIGEN_EULER_SYSTEM_TYPEDEF \ + EIGEN_DOC_UNARY_ADDONS \ + EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL \ + EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all references to function-like macros diff --git a/doc/InplaceDecomposition.dox b/doc/InplaceDecomposition.dox new file mode 100644 index 000000000..cb1c6d413 --- /dev/null +++ b/doc/InplaceDecomposition.dox @@ -0,0 +1,115 @@ +namespace Eigen { + +/** \eigenManualPage InplaceDecomposition Inplace matrix decompositions + +Starting from %Eigen 3.3, the LU, Cholesky, and QR decompositions can operate \em inplace, that is, directly within the given input matrix. +This feature is especially useful when dealing with huge matrices, and or when the available memory is very limited (embedded systems). + +To this end, the respective decomposition class must be instantiated with a Ref<> matrix type, and the decomposition object must be constructed with the input matrix as argument. As an example, let us consider an inplace LU decomposition with partial pivoting. + +Let's start with the basic inclusions, and declaration of a 2x2 matrix \c A: + + + + + + + +
codeoutput
\snippet TutorialInplaceLU.cpp init + \snippet TutorialInplaceLU.out init +
+ +No surprise here! Then, let's declare our inplace LU object \c lu, and check the content of the matrix \c A: + + + + + + +
\snippet TutorialInplaceLU.cpp declaration + \snippet TutorialInplaceLU.out declaration +
+ +Here, the \c lu object computes and stores the \c L and \c U factors within the memory held by the matrix \c A. +The coefficients of \c A have thus been destroyed during the factorization, and replaced by the L and U factors as one can verify: + + + + + + +
\snippet TutorialInplaceLU.cpp matrixLU + \snippet TutorialInplaceLU.out matrixLU +
+ +Then, one can use the \c lu object as usual, for instance to solve the Ax=b problem: + + + + + +
\snippet TutorialInplaceLU.cpp solve + \snippet TutorialInplaceLU.out solve +
+ +Here, since the content of the original matrix \c A has been lost, we had to declared a new matrix \c A0 to verify the result. + +Since the memory is shared between \c A and \c lu, modifying the matrix \c A will make \c lu invalid. +This can easily be verified by modifying the content of \c A and trying to solve the initial problem again: + + + + + + +
\snippet TutorialInplaceLU.cpp modifyA + \snippet TutorialInplaceLU.out modifyA +
+ +Note that there is no shared pointer under the hood, it is the \b responsibility \b of \b the \b user to keep the input matrix \c A in life as long as \c lu is living. + +If one wants to update the factorization with the modified A, one has to call the compute method as usual: + + + + + +
\snippet TutorialInplaceLU.cpp recompute + \snippet TutorialInplaceLU.out recompute +
+ +Note that calling compute does not change the memory which is referenced by the \c lu object. Therefore, if the compute method is called with another matrix \c A1 different than \c A, then the content of \c A1 won't be modified. This is still the content of \c A that will be used to store the L and U factors of the matrix \c A1. +This can easily be verified as follows: + + + + + +
\snippet TutorialInplaceLU.cpp recompute_bis0 + \snippet TutorialInplaceLU.out recompute_bis0 +
+The matrix \c A1 is unchanged, and one can thus solve A1*x=b, and directly check the residual without any copy of \c A1: + + + + + +
\snippet TutorialInplaceLU.cpp recompute_bis1 + \snippet TutorialInplaceLU.out recompute_bis1 +
+ + +Here is the list of matrix decompositions supporting this inplace mechanism: + +- class LLT +- class LDLT +- class PartialPivLU +- class FullPivLU +- class HouseholderQR +- class ColPivHouseholderQR +- class FullPivHouseholderQR +- class CompleteOrthogonalDecomposition + +*/ + +} \ No newline at end of file diff --git a/doc/Manual.dox b/doc/Manual.dox index 70aaa9a42..a08609ad7 100644 --- a/doc/Manual.dox +++ b/doc/Manual.dox @@ -3,21 +3,31 @@ namespace Eigen { +/** \page UserManual_CustomizingEigen Extending/Customizing Eigen + %Eigen can be extended in several ways, for instance, by defining global methods, by inserting custom methods within main %Eigen's classes through the \ref TopicCustomizing_Plugins "plugin" mechanism, by adding support to \ref TopicCustomizing_CustomScalar "custom scalar types" etc. See below for the respective sub-topics. + - \subpage TopicCustomizing_Plugins + - \subpage TopicCustomizing_InheritingMatrix + - \subpage TopicCustomizing_CustomScalar + - \subpage TopicCustomizing_NullaryExpr + - \subpage TopicNewExpressionType + \sa \ref TopicPreprocessorDirectives +*/ + + /** \page UserManual_Generalities General topics - \subpage Eigen2ToEigen3 - \subpage TopicFunctionTakingEigenTypes - \subpage TopicPreprocessorDirectives - \subpage TopicAssertions - - \subpage TopicCustomizingEigen - \subpage TopicMultiThreading + - \subpage TopicUsingBlasLapack - \subpage TopicUsingIntelMKL - \subpage TopicCUDA - \subpage TopicPitfalls - \subpage TopicTemplateKeyword - - \subpage TopicNewExpressionType - \subpage UserManual_UnderstandingEigen */ - + /** \page UserManual_UnderstandingEigen Understanding Eigen - \subpage TopicInsideEigenExample - \subpage TopicClassHierarchy @@ -90,6 +100,9 @@ namespace Eigen { /** \addtogroup Householder_Module \ingroup DenseMatrixManipulation_Reference */ +/** \addtogroup CoeffwiseMathFunctions + \ingroup DenseMatrixManipulation_chapter */ + /** \addtogroup QuickRefPage \ingroup DenseMatrixManipulation_chapter */ @@ -103,6 +116,10 @@ namespace Eigen { \ingroup DenseLinearSolvers_chapter */ /** \addtogroup LeastSquares \ingroup DenseLinearSolvers_chapter */ +/** \addtogroup InplaceDecomposition + \ingroup DenseLinearSolvers_chapter */ +/** \addtogroup DenseDecompositionBenchmark + \ingroup DenseLinearSolvers_chapter */ /** \addtogroup DenseLinearSolvers_Reference \ingroup DenseLinearSolvers_chapter */ diff --git a/doc/MatrixfreeSolverExample.dox b/doc/MatrixfreeSolverExample.dox index 000cb0bbe..3efa292b5 100644 --- a/doc/MatrixfreeSolverExample.dox +++ b/doc/MatrixfreeSolverExample.dox @@ -6,12 +6,12 @@ namespace Eigen { \eigenManualPage MatrixfreeSolverExample Matrix-free solvers Iterative solvers such as ConjugateGradient and BiCGSTAB can be used in a matrix free context. To this end, user must provide a wrapper class inheriting EigenBase<> and implementing the following methods: - - Index rows() and Index cols(): returns number of rows and columns respectively - - operator* with and %Eigen dense column vector (its actual implementation goes in a specialization of the internal::generic_product_impl class) + - \c Index \c rows() and \c Index \c cols(): returns number of rows and columns respectively + - \c operator* with your type and an %Eigen dense column vector (its actual implementation goes in a specialization of the internal::generic_product_impl class) -Eigen::internal::traits<> must also be specialized for the wrapper type. +\c Eigen::internal::traits<> must also be specialized for the wrapper type. -Here is a complete example wrapping a Eigen::SparseMatrix: +Here is a complete example wrapping an Eigen::SparseMatrix: \include matrixfree_cg.cpp Output: \verbinclude matrixfree_cg.out diff --git a/doc/NewExpressionType.dox b/doc/NewExpressionType.dox index ad8b7f86b..c2f243312 100644 --- a/doc/NewExpressionType.dox +++ b/doc/NewExpressionType.dox @@ -2,6 +2,12 @@ namespace Eigen { /** \page TopicNewExpressionType Adding a new expression type + +\warning +Disclaimer: this page is tailored to very advanced users who are not afraid of dealing with some %Eigen's internal aspects. +In most cases, a custom expression can be avoided by either using custom \ref MatrixBase::unaryExpr "unary" or \ref MatrixBase::binaryExpr "binary" functors, +while extremely complex matrix manipulations can be achieved by a nullary functors as described in the \ref TopicCustomizing_NullaryExpr "previous page". + This page describes with the help of an example how to implement a new light-weight expression type in %Eigen. This consists of three parts: the expression type itself, a traits class containing compile-time @@ -130,7 +136,7 @@ function can be called. If all the fragments are combined, the following output is produced, showing that the program works as expected: -\verbinclude make_circulant.out +\include make_circulant.out */ } diff --git a/doc/Overview.dox b/doc/Overview.dox index 9ab96233a..dbb49bd21 100644 --- a/doc/Overview.dox +++ b/doc/Overview.dox @@ -17,7 +17,9 @@ You're a MatLab user? There is also a short AS The \b main \b documentation is organized into \em chapters covering different domains of features. They are themselves composed of \em user \em manual pages describing the different features in a comprehensive way, and \em reference pages that gives you access to the API documentation through the related Eigen's \em modules and \em classes. -Under the \subpage UserManual_Generalities section, you will find documentation on more general topics such as preprocessor directives, controlling assertions, multi-threading, MKL support, some Eigen's internal insights, and much more... +Under the \subpage UserManual_CustomizingEigen section, you will find discussions and examples on extending %Eigen's features and supporting custom scalar types. + +Under the \subpage UserManual_Generalities section, you will find documentation on more general topics such as preprocessor directives, controlling assertions, multi-threading, MKL support, some Eigen's internal insights, and much more... Finally, do not miss the search engine, useful to quickly get to the documentation of a given class or function. diff --git a/doc/PreprocessorDirectives.dox b/doc/PreprocessorDirectives.dox index 14e84bc20..2f9c4c370 100644 --- a/doc/PreprocessorDirectives.dox +++ b/doc/PreprocessorDirectives.dox @@ -49,6 +49,36 @@ are doing. the correct size. Not defined by default. +\section TopicPreprocessorDirectivesCppVersion C++ standard features + +By default, %Eigen strive to automatically detect and enable langage features at compile-time based on +the information provided by the compiler. + + - \b EIGEN_MAX_CPP_VER - disables usage of C++ features requiring a version greater than EIGEN_MAX_CPP_VER. + Possible values are: 03, 11, 14, 17, etc. If not defined (the default), %Eigen enables all features supported + by the compiler. + +Individual features can be explicitly enabled or disabled by defining the following token to 0 or 1 respectively. +For instance, one might limit the C++ version to C++03 by defining EIGEN_MAX_CPP_VER=03, but still enable C99 math +functions by defining EIGEN_HAS_C99_MATH=1. + + - \b EIGEN_HAS_C99_MATH - controls the usage of C99 math functions such as erf, erfc, lgamma, etc. + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_CXX11_MATH - controls the implementation of some functions such as round, logp1, isinf, isnan, etc. + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_RVALUE_REFERENCES - defines whetehr rvalue references are supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_STD_RESULT_OF - defines whether std::result_of is supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_VARIADIC_TEMPLATES - defines whether variadic templates are supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_CONSTEXPR - defines whether relaxed const expression are supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<14. + - \b EIGEN_HAS_CXX11_CONTAINERS - defines whether STL's containers follows C++11 specifications + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_CXX11_NOEXCEPT - defines whether noexcept is supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + \section TopicPreprocessorDirectivesAssertions Assertions The %Eigen library contains many assertions to guard against programming errors, both at compile time and at @@ -78,6 +108,9 @@ run time. However, these assertions do cost time and can thus be turned off. See \ref TopicMultiThreading for details. - \b EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN. + - \b EIGEN_UNALIGNED_VECTORIZE - disables/enables vectorization with unaligned stores. Default is 1 (enabled). + If set to 0 (disabled), then expression for which the destination cannot be aligned are not vectorized (e.g., unaligned + small fixed size vectors or matrices) - \b EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently enables the SSE vectorization of sin() and cos(), and speedups sqrt() for single precision. Defined to 1 by default. Define it to 0 to disable. diff --git a/doc/SparseLinearSystems.dox b/doc/SparseLinearSystems.dox index ee4f53a4e..fc33b93e7 100644 --- a/doc/SparseLinearSystems.dox +++ b/doc/SparseLinearSystems.dox @@ -76,6 +76,9 @@ They are summarized in the following tables: SPQR\link SPQRSupport_Module SPQRSupport \endlink QR factorization Any, rectangularfill-in reducing, multithreaded, fast dense algebra requires the SuiteSparse package, \b GPL recommended for linear least-squares problems, has a rank-revealing feature +PardisoLLT \n PardisoLDLT \n PardisoLU\link PardisoSupport_Module PardisoSupport \endlinkDirect LLt, LDLt, LU factorizationsSPD \n SPD \n SquareFill-in reducing, Leverage fast dense algebra, Multithreading + Requires the Intel MKL package, \b Proprietary + optimized for tough problems patterns, see also \link TopicUsingIntelMKL using MKL with Eigen \endlink Here \c SPD means symmetric positive definite. diff --git a/doc/SparseQuickReference.dox b/doc/SparseQuickReference.dox index e0a30edcc..a25622e80 100644 --- a/doc/SparseQuickReference.dox +++ b/doc/SparseQuickReference.dox @@ -206,7 +206,7 @@ See \ref TutorialSparse_SubMatrices and below for read-write sub-matrices. sm1.innerVectors(start, size); // RW sm1.leftCols(size); // RW sm2.rightCols(size); // RO because sm2 is row-major - sm1.middleRows(start, numRows); // RO becasue sm1 is column-major + sm1.middleRows(start, numRows); // RO because sm1 is column-major sm1.middleCols(start, numCols); // RW sm1.col(j); // RW \endcode @@ -253,6 +253,20 @@ If the matrix is not in compressed form, makeCompressed() should be called befor Note that these functions are mostly provided for interoperability purposes with external libraries.\n A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section +Mapping external buffers + + +\code +int outerIndexPtr[cols+1]; +int innerIndices[nnz]; +double values[nnz]; +Map > sm1(rows,cols,nnz,outerIndexPtr, // read-write + innerIndices,values); +Map > sm2(...); // read-only +\endcode + +As for dense matrices, class Map can be used to see external buffers as an %Eigen's SparseMatrix object. + */ } diff --git a/doc/TopicAssertions.dox b/doc/TopicAssertions.dox index 4ead40174..c8b4d84f2 100644 --- a/doc/TopicAssertions.dox +++ b/doc/TopicAssertions.dox @@ -16,7 +16,7 @@ Both eigen_assert and eigen_plain_assert are defined in Macros.h. Defining eigen #include #undef eigen_assert #define eigen_assert(x) \ - if (!x) { throw (std::runtime_error("Put your message here")); } + if (!(x)) { throw (std::runtime_error("Put your message here")); } \endcode \subsection DisableAssert Disabling assertions diff --git a/doc/TopicLinearAlgebraDecompositions.dox b/doc/TopicLinearAlgebraDecompositions.dox index 5bcff2c96..491470627 100644 --- a/doc/TopicLinearAlgebraDecompositions.dox +++ b/doc/TopicLinearAlgebraDecompositions.dox @@ -4,6 +4,7 @@ namespace Eigen { This page presents a catalogue of the dense matrix decompositions offered by Eigen. For an introduction on linear solvers and decompositions, check this \link TutorialLinearAlgebra page \endlink. +To get an overview of the true relative speed of the different decomposition, check this \link DenseDecompositionBenchmark benchmark \endlink. \section TopicLinAlgBigTable Catalogue of decompositions offered by Eigen @@ -256,6 +257,7 @@ For an introduction on linear solvers and decompositions, check this \link Tutor
+ */ } diff --git a/doc/UnalignedArrayAssert.dox b/doc/UnalignedArrayAssert.dox index f0f84d25f..95d95a2d5 100644 --- a/doc/UnalignedArrayAssert.dox +++ b/doc/UnalignedArrayAssert.dox @@ -92,27 +92,28 @@ Note that here, Eigen::Quaternionf is only used as an example, more generally th \section explanation General explanation of this assertion -\ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen objects" must absolutely be created at 16-byte-aligned locations, otherwise SIMD instructions adressing them will crash. +\ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen objects" must absolutely be created at 16-byte-aligned locations, otherwise SIMD instructions addressing them will crash. Eigen normally takes care of these alignment issues for you, by setting an alignment attribute on them and by overloading their "operator new". However there are a few corner cases where these alignment settings get overridden: they are the possible causes for this assertion. -\section getrid I don't care about vectorization, how do I get rid of that stuff? +\section getrid I don't care about optimal vectorization, how do I get rid of that stuff? -Two possibilities: +Three possibilities:
    -
  • Define EIGEN_DONT_ALIGN_STATICALLY. That disables all 128-bit static alignment code, while keeping 128-bit heap alignment. This has the effect of - disabling vectorization for fixed-size objects (like Matrix4d) while keeping vectorization of dynamic-size objects - (like MatrixXd). But do note that this breaks ABI compatibility with the default behavior of 128-bit static alignment.
  • -
  • Or define both EIGEN_DONT_VECTORIZE and EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT. This keeps the - 128-bit alignment code and thus preserves ABI compatibility, but completely disables vectorization.
  • +
  • Use the \c DontAlign option to Matrix, Array, Quaternion, etc. objects that gives you trouble. This way Eigen won't try to align them, and thus won"t assume any special alignment. On the down side, you will pay the cost of unaligned loads/stores for them, but on modern CPUs, the overhead is either null or marginal. See \link StructHavingEigenMembers_othersolutions here \endlink for an example.
  • +
  • Define \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_ALIGN_STATICALLY \endlink. That disables all 16-byte (and above) static alignment code, while keeping 16-byte (or above) heap alignment. This has the effect of + vectorizing fixed-size objects (like Matrix4d) through unaligned stores (as controlled by \link TopicPreprocessorDirectivesPerformance EIGEN_UNALIGNED_VECTORIZE \endlink), while keeping unchanged the vectorization of dynamic-size objects + (like MatrixXd). But do note that this breaks ABI compatibility with the default behavior of static alignment.
  • +
  • Or define both \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_VECTORIZE \endlink and EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT. This keeps the + 16-byte alignment code and thus preserves ABI compatibility, but completely disables vectorization.
-If you want to know why defining EIGEN_DONT_VECTORIZE does not by itself disable 128-bit alignment and the assertion, here's the explanation: +If you want to know why defining EIGEN_DONT_VECTORIZE does not by itself disable 16-byte alignment and the assertion, here's the explanation: It doesn't disable the assertion, because otherwise code that runs fine without vectorization would suddenly crash when enabling vectorization. -It doesn't disable 128bit alignment, because that would mean that vectorized and non-vectorized code are not mutually ABI-compatible. This ABI compatibility is very important, even for people who develop only an in-house application, as for instance one may want to have in the same application a vectorized path and a non-vectorized path. +It doesn't disable 16-byte alignment, because that would mean that vectorized and non-vectorized code are not mutually ABI-compatible. This ABI compatibility is very important, even for people who develop only an in-house application, as for instance one may want to have in the same application a vectorized path and a non-vectorized path. */ diff --git a/doc/UsingBlasLapackBackends.dox b/doc/UsingBlasLapackBackends.dox new file mode 100644 index 000000000..caa597122 --- /dev/null +++ b/doc/UsingBlasLapackBackends.dox @@ -0,0 +1,133 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + Copyright (C) 2011-2016 Gael Guennebaud + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Documentation on the use of BLAS/LAPACK libraries through Eigen + ******************************************************************************** +*/ + +namespace Eigen { + +/** \page TopicUsingBlasLapack Using BLAS/LAPACK from %Eigen + + +Since %Eigen version 3.3 and later, any F77 compatible BLAS or LAPACK libraries can be used as backends for dense matrix products and dense matrix decompositions. +For instance, one can use Intel® MKL, Apple's Accelerate framework on OSX, OpenBLAS, Netlib LAPACK, etc. + +Do not miss this \link TopicUsingIntelMKL page \endlink for further discussions on the specific use of Intel® MKL (also includes VML, PARDISO, etc.) + +In order to use an external BLAS and/or LAPACK library, you must link you own application to the respective libraries and their dependencies. +For LAPACK, you must also link to the standard Lapacke library, which is used as a convenient think layer between %Eigen's C++ code and LAPACK F77 interface. Then you must activate their usage by defining one or multiple of the following macros (\b before including any %Eigen's header): + +\note For Mac users, in order to use the lapack version shipped with the Accelerate framework, you also need the lapacke library. +Using MacPorts, this is as easy as: +\code +sudo port install lapack +\endcode +and then use the following link flags: \c -framework \c Accelerate \c /opt/local/lib/lapack/liblapacke.dylib + + + + + +
\c EIGEN_USE_BLAS Enables the use of external BLAS level 2 and 3 routines (compatible with any F77 BLAS interface)
\c EIGEN_USE_LAPACKE Enables the use of external Lapack routines via the Lapacke C interface to Lapack (compatible with any F77 LAPACK interface)
\c EIGEN_USE_LAPACKE_STRICT Same as \c EIGEN_USE_LAPACKE but algorithms of lower numerical robustness are disabled. \n This currently concerns only JacobiSVD which otherwise would be replaced by \c gesvd that is less robust than Jacobi rotations.
+ +When doing so, a number of %Eigen's algorithms are silently substituted with calls to BLAS or LAPACK routines. +These substitutions apply only for \b Dynamic \b or \b large enough objects with one of the following four standard scalar types: \c float, \c double, \c complex, and \c complex. +Operations on other scalar types or mixing reals and complexes will continue to use the built-in algorithms. + +The breadth of %Eigen functionality that can be substituted is listed in the table below. + + + + + + + + + + +
Functional domainCode exampleBLAS/LAPACK routines
Matrix-matrix operations \n \c EIGEN_USE_BLAS \code +m1*m2.transpose(); +m1.selfadjointView()*m2; +m1*m2.triangularView(); +m1.selfadjointView().rankUpdate(m2,1.0); +\endcode\code +?gemm +?symm/?hemm +?trmm +dsyrk/ssyrk +\endcode
Matrix-vector operations \n \c EIGEN_USE_BLAS \code +m1.adjoint()*b; +m1.selfadjointView()*b; +m1.triangularView()*b; +\endcode\code +?gemv +?symv/?hemv +?trmv +\endcode
LU decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +v1 = m1.lu().solve(v2); +\endcode\code +?getrf +\endcode
Cholesky decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +v1 = m2.selfadjointView().llt().solve(v2); +\endcode\code +?potrf +\endcode
QR decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +m1.householderQr(); +m1.colPivHouseholderQr(); +\endcode\code +?geqrf +?geqp3 +\endcode
Singular value decomposition \n \c EIGEN_USE_LAPACKE \code +JacobiSVD svd; +svd.compute(m1, ComputeThinV); +\endcode\code +?gesvd +\endcode
Eigen-value decompositions \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +EigenSolver es(m1); +ComplexEigenSolver ces(m1); +SelfAdjointEigenSolver saes(m1+m1.transpose()); +GeneralizedSelfAdjointEigenSolver + gsaes(m1+m1.transpose(),m2+m2.transpose()); +\endcode\code +?gees +?gees +?syev/?heev +?syev/?heev, +?potrf +\endcode
Schur decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +RealSchur schurR(m1); +ComplexSchur schurC(m1); +\endcode\code +?gees +\endcode
+In the examples, m1 and m2 are dense matrices and v1 and v2 are dense vectors. + +*/ + +} diff --git a/doc/UsingIntelMKL.dox b/doc/UsingIntelMKL.dox index dbe559e53..a1a3a18f2 100644 --- a/doc/UsingIntelMKL.dox +++ b/doc/UsingIntelMKL.dox @@ -32,107 +32,45 @@ namespace Eigen { -/** \page TopicUsingIntelMKL Using Intel® Math Kernel Library from Eigen +/** \page TopicUsingIntelMKL Using Intel® MKL from %Eigen -\section TopicUsingIntelMKL_Intro Eigen and Intel® Math Kernel Library (Intel® MKL) + + +Since %Eigen version 3.1 and later, users can benefit from built-in Intel® Math Kernel Library (MKL) optimizations with an installed copy of Intel MKL 10.3 (or later). -Since Eigen version 3.1 and later, users can benefit from built-in Intel MKL optimizations with an installed copy of Intel MKL 10.3 (or later). Intel MKL provides highly optimized multi-threaded mathematical routines for x86-compatible architectures. Intel MKL is available on Linux, Mac and Windows for both Intel64 and IA32 architectures. \note Intel® MKL is a proprietary software and it is the responsibility of users to buy or register for community (free) Intel MKL licenses for their products. Moreover, the license of the user product has to allow linking to proprietary software that excludes any unmodified versions of the GPL. -Using Intel MKL through Eigen is easy: --# define the \c EIGEN_USE_MKL_ALL macro before including any Eigen's header +Using Intel MKL through %Eigen is easy: +-# define the \c EIGEN_USE_MKL_ALL macro before including any %Eigen's header -# link your program to MKL libraries (see the MKL linking advisor) -# on a 64bits system, you must use the LP64 interface (not the ILP64 one) -When doing so, a number of Eigen's algorithms are silently substituted with calls to Intel MKL routines. +When doing so, a number of %Eigen's algorithms are silently substituted with calls to Intel MKL routines. These substitutions apply only for \b Dynamic \b or \b large enough objects with one of the following four standard scalar types: \c float, \c double, \c complex, and \c complex. Operations on other scalar types or mixing reals and complexes will continue to use the built-in algorithms. In addition you can choose which parts will be substituted by defining one or multiple of the following macros: - - - + + +
\c EIGEN_USE_BLAS Enables the use of external BLAS level 2 and 3 routines (compatible with any F77 BLAS interface, not only Intel MKL)
\c EIGEN_USE_LAPACKE Enables the use of external Lapack routines via the Intel Lapacke C interface to Lapack (currently works with Intel MKL only)
\c EIGEN_USE_LAPACKE_STRICT Same as \c EIGEN_USE_LAPACKE but algorithm of lower robustness are disabled. This currently concerns only JacobiSVD which otherwise would be replaced by \c gesvd that is less robust than Jacobi rotations.
\c EIGEN_USE_BLAS Enables the use of external BLAS level 2 and 3 routines
\c EIGEN_USE_LAPACKE Enables the use of external Lapack routines via the Lapacke C interface to Lapack
\c EIGEN_USE_LAPACKE_STRICT Same as \c EIGEN_USE_LAPACKE but algorithm of lower robustness are disabled. \n This currently concerns only JacobiSVD which otherwise would be replaced by \c gesvd that is less robust than Jacobi rotations.
\c EIGEN_USE_MKL_VML Enables the use of Intel VML (vector operations)
\c EIGEN_USE_MKL_ALL Defines \c EIGEN_USE_BLAS, \c EIGEN_USE_LAPACKE, and \c EIGEN_USE_MKL_VML
+Note that the BLAS and LAPACKE backends can be enabled for any F77 compatible BLAS and LAPACK libraries. See this \link TopicUsingBlasLapack page \endlink for the details. + Finally, the PARDISO sparse solver shipped with Intel MKL can be used through the \ref PardisoLU, \ref PardisoLLT and \ref PardisoLDLT classes of the \ref PardisoSupport_Module. - -\section TopicUsingIntelMKL_SupportedFeatures List of supported features - -The breadth of Eigen functionality covered by Intel MKL is listed in the table below. +The following table summarizes the list of functions covered by \c EIGEN_USE_MKL_VML: - - - - - - - - - - +
Functional domainCode exampleMKL routines
Matrix-matrix operations \n \c EIGEN_USE_BLAS \code -m1*m2.transpose(); -m1.selfadjointView()*m2; -m1*m2.triangularView(); -m1.selfadjointView().rankUpdate(m2,1.0); -\endcode\code -?gemm -?symm/?hemm -?trmm -dsyrk/ssyrk -\endcode
Matrix-vector operations \n \c EIGEN_USE_BLAS \code -m1.adjoint()*b; -m1.selfadjointView()*b; -m1.triangularView()*b; -\endcode\code -?gemv -?symv/?hemv -?trmv -\endcode
LU decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code -v1 = m1.lu().solve(v2); -\endcode\code -?getrf -\endcode
Cholesky decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code -v1 = m2.selfadjointView().llt().solve(v2); -\endcode\code -?potrf -\endcode
QR decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code -m1.householderQr(); -m1.colPivHouseholderQr(); -\endcode\code -?geqrf -?geqp3 -\endcode
Singular value decomposition \n \c EIGEN_USE_LAPACKE \code -JacobiSVD svd; -svd.compute(m1, ComputeThinV); -\endcode\code -?gesvd -\endcode
Eigen-value decompositions \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code -EigenSolver es(m1); -ComplexEigenSolver ces(m1); -SelfAdjointEigenSolver saes(m1+m1.transpose()); -GeneralizedSelfAdjointEigenSolver - gsaes(m1+m1.transpose(),m2+m2.transpose()); -\endcode\code -?gees -?gees -?syev/?heev -?syev/?heev, -?potrf -\endcode
Schur decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code -RealSchur schurR(m1); -ComplexSchur schurC(m1); -\endcode\code -?gees -\endcode
Vector Math \n \c EIGEN_USE_MKL_VML \code +
Code exampleMKL routines
\code v2=v1.array().sin(); v2=v1.array().asin(); v2=v1.array().cos(); @@ -156,7 +94,7 @@ v?Sqr v?Powx \endcode
-In the examples, m1 and m2 are dense matrices and v1 and v2 are dense vectors. +In the examples, v1 and v2 are dense vectors. \section TopicUsingIntelMKL_Links Links diff --git a/doc/eigendoxy.css b/doc/eigendoxy.css index 60243d870..6274e6c70 100644 --- a/doc/eigendoxy.css +++ b/doc/eigendoxy.css @@ -45,7 +45,7 @@ pre.fragment { /* Common style for all Eigen's tables */ -table.example, table.manual, table.manual-vl { +table.example, table.manual, table.manual-vl, table.manual-hl { max-width:100%; border-collapse: collapse; border-style: solid; @@ -58,7 +58,7 @@ table.example, table.manual, table.manual-vl { -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); } -table.example th, table.manual th, table.manual-vl th { +table.example th, table.manual th, table.manual-vl th, table.manual-hl th { padding: 0.5em 0.5em 0.5em 0.5em; text-align: left; padding-right: 1em; @@ -70,7 +70,7 @@ table.example th, table.manual th, table.manual-vl th { filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#FFFFFF', endColorstr='#F4F4E5'); } -table.example td, table.manual td, table.manual-vl td { +table.example td, table.manual td, table.manual-vl td, table.manual-hl td { vertical-align:top; border-width: 1px; border-color: #cccccc; @@ -108,15 +108,15 @@ table.example td { /* standard class for the manual */ -table.manual, table.manual-vl { +table.manual, table.manual-vl, table.manual-hl { padding: 0.2em 0em 0.5em 0em; } -table.manual th, table.manual-vl th { +table.manual th, table.manual-vl th, table.manual-hl th { margin: 0em 0em 0.3em 0em; } -table.manual td, table.manual-vl td { +table.manual td, table.manual-vl td, table.manual-hl td { padding: 0.3em 0.5em 0.3em 0.5em; vertical-align:top; border-width: 1px; @@ -136,6 +136,16 @@ table.manual-vl th.inter { border-style: solid solid solid solid; } +table.manual-hl td { + border-color: #cccccc; + border-width: 1px; + border-style: solid none solid none; +} + +table td.code { + font-family: monospace; +} + h2 { margin-top:2em; border-style: none none solid none; @@ -166,6 +176,11 @@ div.toc ul { margin: 0.2em 0 0.4em 0.5em; } +span.cpp11,span.cpp14,span.cpp17 { + color: #119911; + font-weight: bold; +} + /**** old Eigen's styles ****/ @@ -177,8 +192,8 @@ table.tutorial_code td { /* Whenever doxygen meets a '\n' or a '
', it will put - * the text containing the characted into a

. - * This little hack togehter with table.tutorial_code td.note + * the text containing the character into a

. + * This little hack together with table.tutorial_code td.note * aims at fixing this issue. */ table.tutorial_code td.note p.starttd { margin: 0px; diff --git a/doc/examples/CMakeLists.txt b/doc/examples/CMakeLists.txt index 08cf8efd7..f7a19055f 100644 --- a/doc/examples/CMakeLists.txt +++ b/doc/examples/CMakeLists.txt @@ -14,3 +14,8 @@ foreach(example_src ${examples_SRCS}) ) add_dependencies(all_examples ${example}) endforeach(example_src) + +check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11) +if(EIGEN_COMPILER_SUPPORT_CPP11) +ei_add_target_property(nullary_indexing COMPILE_FLAGS "-std=c++11") +endif() \ No newline at end of file diff --git a/doc/examples/Cwise_erf.cpp b/doc/examples/Cwise_erf.cpp new file mode 100644 index 000000000..e7cd2c1c0 --- /dev/null +++ b/doc/examples/Cwise_erf.cpp @@ -0,0 +1,9 @@ +#include +#include +#include +using namespace Eigen; +int main() +{ + Array4d v(-0.5,2,0,-7); + std::cout << v.erf() << std::endl; +} diff --git a/doc/examples/Cwise_erfc.cpp b/doc/examples/Cwise_erfc.cpp new file mode 100644 index 000000000..d8bb04c30 --- /dev/null +++ b/doc/examples/Cwise_erfc.cpp @@ -0,0 +1,9 @@ +#include +#include +#include +using namespace Eigen; +int main() +{ + Array4d v(-0.5,2,0,-7); + std::cout << v.erfc() << std::endl; +} diff --git a/doc/examples/Cwise_lgamma.cpp b/doc/examples/Cwise_lgamma.cpp new file mode 100644 index 000000000..f1c4f503e --- /dev/null +++ b/doc/examples/Cwise_lgamma.cpp @@ -0,0 +1,9 @@ +#include +#include +#include +using namespace Eigen; +int main() +{ + Array4d v(0.5,10,0,-1); + std::cout << v.lgamma() << std::endl; +} \ No newline at end of file diff --git a/doc/examples/TutorialInplaceLU.cpp b/doc/examples/TutorialInplaceLU.cpp new file mode 100644 index 000000000..cb9c59b60 --- /dev/null +++ b/doc/examples/TutorialInplaceLU.cpp @@ -0,0 +1,61 @@ +#include +struct init { + init() { std::cout << "[" << "init" << "]" << std::endl; } +}; +init init_obj; +// [init] +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + MatrixXd A(2,2); + A << 2, -1, 1, 3; + cout << "Here is the input matrix A before decomposition:\n" << A << endl; +cout << "[init]" << endl; + +cout << "[declaration]" << endl; + PartialPivLU > lu(A); + cout << "Here is the input matrix A after decomposition:\n" << A << endl; +cout << "[declaration]" << endl; + +cout << "[matrixLU]" << endl; + cout << "Here is the matrix storing the L and U factors:\n" << lu.matrixLU() << endl; +cout << "[matrixLU]" << endl; + +cout << "[solve]" << endl; + MatrixXd A0(2,2); A0 << 2, -1, 1, 3; + VectorXd b(2); b << 1, 2; + VectorXd x = lu.solve(b); + cout << "Residual: " << (A0 * x - b).norm() << endl; +cout << "[solve]" << endl; + +cout << "[modifyA]" << endl; + A << 3, 4, -2, 1; + x = lu.solve(b); + cout << "Residual: " << (A0 * x - b).norm() << endl; +cout << "[modifyA]" << endl; + +cout << "[recompute]" << endl; + A0 = A; // save A + lu.compute(A); + x = lu.solve(b); + cout << "Residual: " << (A0 * x - b).norm() << endl; +cout << "[recompute]" << endl; + +cout << "[recompute_bis0]" << endl; + MatrixXd A1(2,2); + A1 << 5,-2,3,4; + lu.compute(A1); + cout << "Here is the input matrix A1 after decomposition:\n" << A1 << endl; +cout << "[recompute_bis0]" << endl; + +cout << "[recompute_bis1]" << endl; + x = lu.solve(b); + cout << "Residual: " << (A1 * x - b).norm() << endl; +cout << "[recompute_bis1]" << endl; + +} diff --git a/doc/examples/make_circulant2.cpp b/doc/examples/make_circulant2.cpp new file mode 100644 index 000000000..95d3dd31a --- /dev/null +++ b/doc/examples/make_circulant2.cpp @@ -0,0 +1,52 @@ +#include +#include + +using namespace Eigen; + +// [circulant_func] +template +class circulant_functor { + const ArgType &m_vec; +public: + circulant_functor(const ArgType& arg) : m_vec(arg) {} + + const typename ArgType::Scalar& operator() (Index row, Index col) const { + Index index = row - col; + if (index < 0) index += m_vec.size(); + return m_vec(index); + } +}; +// [circulant_func] + +// [square] +template +struct circulant_helper { + typedef Matrix MatrixType; +}; +// [square] + +// [makeCirculant] +template +CwiseNullaryOp, typename circulant_helper::MatrixType> +makeCirculant(const Eigen::MatrixBase& arg) +{ + typedef typename circulant_helper::MatrixType MatrixType; + return MatrixType::NullaryExpr(arg.size(), arg.size(), circulant_functor(arg.derived())); +} +// [makeCirculant] + +// [main] +int main() +{ + Eigen::VectorXd vec(4); + vec << 1, 2, 4, 8; + Eigen::MatrixXd mat; + mat = makeCirculant(vec); + std::cout << mat << std::endl; +} +// [main] diff --git a/doc/examples/nullary_indexing.cpp b/doc/examples/nullary_indexing.cpp new file mode 100644 index 000000000..e27c3585a --- /dev/null +++ b/doc/examples/nullary_indexing.cpp @@ -0,0 +1,66 @@ +#include +#include + +using namespace Eigen; + +// [functor] +template +class indexing_functor { + const ArgType &m_arg; + const RowIndexType &m_rowIndices; + const ColIndexType &m_colIndices; +public: + typedef Matrix MatrixType; + + indexing_functor(const ArgType& arg, const RowIndexType& row_indices, const ColIndexType& col_indices) + : m_arg(arg), m_rowIndices(row_indices), m_colIndices(col_indices) + {} + + const typename ArgType::Scalar& operator() (Index row, Index col) const { + return m_arg(m_rowIndices[row], m_colIndices[col]); + } +}; +// [functor] + +// [function] +template +CwiseNullaryOp, typename indexing_functor::MatrixType> +indexing(const Eigen::MatrixBase& arg, const RowIndexType& row_indices, const ColIndexType& col_indices) +{ + typedef indexing_functor Func; + typedef typename Func::MatrixType MatrixType; + return MatrixType::NullaryExpr(row_indices.size(), col_indices.size(), Func(arg.derived(), row_indices, col_indices)); +} +// [function] + + +int main() +{ + std::cout << "[main1]\n"; + Eigen::MatrixXi A = Eigen::MatrixXi::Random(4,4); + Array3i ri(1,2,1); + ArrayXi ci(6); ci << 3,2,1,0,0,2; + Eigen::MatrixXi B = indexing(A, ri, ci); + std::cout << "A =" << std::endl; + std::cout << A << std::endl << std::endl; + std::cout << "A([" << ri.transpose() << "], [" << ci.transpose() << "]) =" << std::endl; + std::cout << B << std::endl; + std::cout << "[main1]\n"; + + std::cout << "[main2]\n"; + B = indexing(A, ri+1, ci); + std::cout << "A(ri+1,ci) =" << std::endl; + std::cout << B << std::endl << std::endl; +#if __cplusplus >= 201103L + B = indexing(A, ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)); + std::cout << "A(ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)) =" << std::endl; + std::cout << B << std::endl << std::endl; +#endif + std::cout << "[main2]\n"; +} + diff --git a/doc/ftv2node.png b/doc/ftv2node.png new file mode 100644 index 000000000..63c605bb4 Binary files /dev/null and b/doc/ftv2node.png differ diff --git a/doc/ftv2pnode.png b/doc/ftv2pnode.png new file mode 100644 index 000000000..c6ee22f93 Binary files /dev/null and b/doc/ftv2pnode.png differ diff --git a/doc/snippets/CMakeLists.txt b/doc/snippets/CMakeLists.txt index 1135900cf..1baf32fba 100644 --- a/doc/snippets/CMakeLists.txt +++ b/doc/snippets/CMakeLists.txt @@ -24,5 +24,3 @@ foreach(snippet_src ${snippets_SRCS}) set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src} PROPERTIES OBJECT_DEPENDS ${snippet_src}) endforeach(snippet_src) - -ei_add_target_property(compile_tut_arithmetic_transpose_aliasing COMPILE_FLAGS -DEIGEN_NO_DEBUG) diff --git a/doc/snippets/Cwise_erf.cpp b/doc/snippets/Cwise_erf.cpp deleted file mode 100644 index 7f51c1b6a..000000000 --- a/doc/snippets/Cwise_erf.cpp +++ /dev/null @@ -1,2 +0,0 @@ -Array4d v(-0.5,2,0,-7); -cout << v.erf() << endl; diff --git a/doc/snippets/Cwise_erfc.cpp b/doc/snippets/Cwise_erfc.cpp deleted file mode 100644 index f0453d4b1..000000000 --- a/doc/snippets/Cwise_erfc.cpp +++ /dev/null @@ -1,2 +0,0 @@ -Array4d v(-0.5,2,0,-7); -cout << v.erfc() << endl; diff --git a/doc/snippets/Cwise_lgamma.cpp b/doc/snippets/Cwise_lgamma.cpp deleted file mode 100644 index cbc69b989..000000000 --- a/doc/snippets/Cwise_lgamma.cpp +++ /dev/null @@ -1,2 +0,0 @@ -Array4d v(0.5,10,0,-1); -cout << v.lgamma() << endl; \ No newline at end of file diff --git a/doc/snippets/SparseMatrix_coeffs.cpp b/doc/snippets/SparseMatrix_coeffs.cpp new file mode 100644 index 000000000..f71a69b07 --- /dev/null +++ b/doc/snippets/SparseMatrix_coeffs.cpp @@ -0,0 +1,9 @@ +SparseMatrix A(3,3); +A.insert(1,2) = 0; +A.insert(0,1) = 1; +A.insert(2,0) = 2; +A.makeCompressed(); +cout << "The matrix A is:" << endl << MatrixXd(A) << endl; +cout << "it has " << A.nonZeros() << " stored non zero coefficients that are: " << A.coeffs().transpose() << endl; +A.coeffs() += 10; +cout << "After adding 10 to every stored non zero coefficient, the matrix A is:" << endl << MatrixXd(A) << endl; diff --git a/doc/snippets/compile_snippet.cpp.in b/doc/snippets/compile_snippet.cpp.in index fdae39bcf..d63f371a3 100644 --- a/doc/snippets/compile_snippet.cpp.in +++ b/doc/snippets/compile_snippet.cpp.in @@ -1,5 +1,8 @@ -#include +static bool eigen_did_assert = false; +#define eigen_assert(X) if(!eigen_did_assert && !(X)){ std::cout << "### Assertion raised in " << __FILE__ << ":" << __LINE__ << ":\n" #X << "\n### The following would happen without assertions:\n"; eigen_did_assert = true;} + #include +#include #ifndef M_PI #define M_PI 3.1415926535897932384626433832795 diff --git a/doc/special_examples/random_cpp11.cpp b/doc/special_examples/random_cpp11.cpp index adc3c110c..33744c051 100644 --- a/doc/special_examples/random_cpp11.cpp +++ b/doc/special_examples/random_cpp11.cpp @@ -7,7 +7,7 @@ using namespace Eigen; int main() { std::default_random_engine generator; std::poisson_distribution distribution(4.1); - auto poisson = [&] (Eigen::Index) {return distribution(generator);}; + auto poisson = [&] () {return distribution(generator);}; RowVectorXi v = RowVectorXi::NullaryExpr(10, poisson ); std::cout << v << "\n"; diff --git a/lapack/svd.cpp b/lapack/svd.cpp index df77a371c..77b302b6b 100644 --- a/lapack/svd.cpp +++ b/lapack/svd.cpp @@ -124,14 +124,15 @@ EIGEN_LAPACK_FUNC(gesvd,(char *jobu, char *jobv, int *m, int* n, Scalar* a, int JacobiSVD svd(mat,option); make_vector(s,diag_size) = svd.singularValues().head(diag_size); - + { if(*jobu=='A') matrix(u,*m,*m,*ldu) = svd.matrixU(); else if(*jobu=='S') matrix(u,*m,diag_size,*ldu) = svd.matrixU(); - else if(*jobu=='O') matrix(a,*m,diag_size,*lda) = svd.matrixU(); - + else if(*jobu=='O') matrix(a,*m,diag_size,*lda) = svd.matrixU(); + } + { if(*jobv=='A') matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint(); else if(*jobv=='S') matrix(vt,diag_size,*n,*ldvt) = svd.matrixV().adjoint(); else if(*jobv=='O') matrix(a,diag_size,*n,*lda) = svd.matrixV().adjoint(); - + } return 0; } diff --git a/scripts/buildtests.in b/scripts/buildtests.in index d2fd10276..526d5b74b 100755 --- a/scripts/buildtests.in +++ b/scripts/buildtests.in @@ -2,7 +2,7 @@ if [[ $# != 1 || $1 == *help ]] then - echo "usage: ./check regexp" + echo "usage: $0 regexp" echo " Builds tests matching the regexp." echo " The EIGEN_MAKE_ARGS environment variable allows to pass args to 'make'." echo " For example, to launch 5 concurrent builds, use EIGEN_MAKE_ARGS='-j5'" diff --git a/scripts/check.in b/scripts/check.in index a90061a57..7717e2d93 100755 --- a/scripts/check.in +++ b/scripts/check.in @@ -3,7 +3,7 @@ if [[ $# != 1 || $1 == *help ]] then - echo "usage: ./check regexp" + echo "usage: $0 regexp" echo " Builds and runs tests matching the regexp." echo " The EIGEN_MAKE_ARGS environment variable allows to pass args to 'make'." echo " For example, to launch 5 concurrent builds, use EIGEN_MAKE_ARGS='-j5'" diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7bed6a45c..e17985107 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -147,7 +147,7 @@ ei_add_test(nomalloc) ei_add_test(first_aligned) ei_add_test(nullary) ei_add_test(mixingtypes) -ei_add_test(packetmath) +ei_add_test(packetmath "-DEIGEN_FAST_MATH=1") ei_add_test(unalignedassert) ei_add_test(vectorization_logic) ei_add_test(basicstuff) @@ -258,6 +258,11 @@ ei_add_test(rvalue_types) ei_add_test(dense_storage) ei_add_test(ctorleak) ei_add_test(mpl2only) +ei_add_test(inplace_decomposition) +ei_add_test(half_float) +ei_add_test(array_of_string) + +add_executable(bug1213 bug1213.cpp bug1213_main.cpp) check_cxx_compiler_flag("-ffast-math" COMPILER_SUPPORT_FASTMATH) if(COMPILER_SUPPORT_FASTMATH) @@ -324,6 +329,16 @@ if(EIGEN_TEST_EIGEN2) message(WARNING "The Eigen2 test suite has been removed") endif() +# boost MP unit test +find_package(Boost) +if(Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + ei_add_test(boostmultiprec "" "${Boost_LIBRARIES}") + ei_add_property(EIGEN_TESTED_BACKENDS "Boost.Multiprecision, ") +else() + ei_add_property(EIGEN_MISSING_BACKENDS "Boost.Multiprecision, ") +endif() + # CUDA unit tests option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF) @@ -340,7 +355,7 @@ if(CUDA_FOUND) set(CUDA_PROPAGATE_HOST_FLAGS OFF) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) + set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) endif() if(EIGEN_TEST_CUDA_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30") diff --git a/test/adjoint.cpp b/test/adjoint.cpp index 9c895e0ac..bdea51c10 100644 --- a/test/adjoint.cpp +++ b/test/adjoint.cpp @@ -169,7 +169,7 @@ void test_adjoint() // test a large static matrix only once CALL_SUBTEST_7( adjoint(Matrix()) ); -#ifdef EIGEN_TEST_PART_4 +#ifdef EIGEN_TEST_PART_13 { MatrixXcf a(10,10), b(10,10); VERIFY_RAISES_ASSERT(a = a.transpose()); @@ -187,6 +187,13 @@ void test_adjoint() a.transpose() = a.adjoint(); a.transpose() += a.adjoint(); a.transpose() += a.adjoint() + b; + + // regression tests for check_for_aliasing + MatrixXd c(10,10); + c = 1.0 * MatrixXd::Ones(10,10) + c; + c = MatrixXd::Ones(10,10) * 1.0 + c; + c = c + MatrixXd::Ones(10,10) .cwiseProduct( MatrixXd::Zero(10,10) ); + c = MatrixXd::Ones(10,10) * MatrixXd::Zero(10,10); } #endif } diff --git a/test/array.cpp b/test/array.cpp index beaa62221..15c3266a9 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -13,6 +13,7 @@ template void array(const ArrayType& m) { typedef typename ArrayType::Index Index; typedef typename ArrayType::Scalar Scalar; + typedef typename ArrayType::RealScalar RealScalar; typedef Array ColVectorType; typedef Array RowVectorType; @@ -72,7 +73,7 @@ template void array(const ArrayType& m) VERIFY_IS_MUCH_SMALLER_THAN(abs(m1.rowwise().sum().sum() - m1.sum()), m1.abs().sum()); if (!internal::isMuchSmallerThan(abs(m1.sum() - (m1+m2).sum()), m1.abs().sum(), test_precision())) VERIFY_IS_NOT_APPROX(((m1+m2).rowwise().sum()).sum(), m1.sum()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); // vector-wise ops m3 = m1; @@ -102,6 +103,22 @@ template void array(const ArrayType& m) FixedArrayType f4(f1.data()); VERIFY_IS_APPROX(f4, f1); + // pow + VERIFY_IS_APPROX(m1.pow(2), m1.square()); + VERIFY_IS_APPROX(pow(m1,2), m1.square()); + VERIFY_IS_APPROX(m1.pow(3), m1.cube()); + VERIFY_IS_APPROX(pow(m1,3), m1.cube()); + VERIFY_IS_APPROX((-m1).pow(3), -m1.cube()); + VERIFY_IS_APPROX(pow(2*m1,3), 8*m1.cube()); + ArrayType exponents = ArrayType::Constant(rows, cols, RealScalar(2)); + VERIFY_IS_APPROX(Eigen::pow(m1,exponents), m1.square()); + VERIFY_IS_APPROX(m1.pow(exponents), m1.square()); + VERIFY_IS_APPROX(Eigen::pow(2*m1,exponents), 4*m1.square()); + VERIFY_IS_APPROX((2*m1).pow(exponents), 4*m1.square()); + VERIFY_IS_APPROX(Eigen::pow(m1,2*exponents), m1.square().square()); + VERIFY_IS_APPROX(m1.pow(2*exponents), m1.square().square()); + VERIFY_IS_APPROX(Eigen::pow(m1(0,0), exponents), ArrayType::Constant(rows,cols,m1(0,0)*m1(0,0))); + // Check possible conflicts with 1D ctor typedef Array OneDArrayType; OneDArrayType o1(rows); @@ -217,12 +234,7 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); -#ifdef EIGEN_HAS_C99_MATH - VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1)); - VERIFY_IS_APPROX(m1.digamma(), digamma(m1)); - VERIFY_IS_APPROX(m1.erf(), erf(m1)); - VERIFY_IS_APPROX(m1.erfc(), erfc(m1)); -#endif // EIGEN_HAS_C99_MATH + VERIFY_IS_APPROX(m1.arg(), arg(m1)); VERIFY_IS_APPROX(m1.round(), round(m1)); VERIFY_IS_APPROX(m1.floor(), floor(m1)); @@ -243,7 +255,9 @@ template void array_real(const ArrayType& m) m3 = m1.abs(); VERIFY_IS_APPROX(m3.sqrt(), sqrt(abs(m1))); VERIFY_IS_APPROX(m3.rsqrt(), Scalar(1)/sqrt(abs(m1))); + VERIFY_IS_APPROX(rsqrt(m3), Scalar(1)/sqrt(abs(m1))); VERIFY_IS_APPROX(m3.log(), log(m3)); + VERIFY_IS_APPROX(m3.log1p(), log1p(m3)); VERIFY_IS_APPROX(m3.log10(), log10(m3)); @@ -275,27 +289,12 @@ template void array_real(const ArrayType& m) // shift argument of logarithm so that it is not zero Scalar smallNumber = NumTraits::dummy_precision(); VERIFY_IS_APPROX((m3 + smallNumber).log() , log(abs(m1) + smallNumber)); + VERIFY_IS_APPROX((m3 + smallNumber + 1).log() , log1p(abs(m1) + smallNumber)); VERIFY_IS_APPROX(m1.exp() * m2.exp(), exp(m1+m2)); VERIFY_IS_APPROX(m1.exp(), exp(m1)); VERIFY_IS_APPROX(m1.exp() / m2.exp(),(m1-m2).exp()); - VERIFY_IS_APPROX(m1.pow(2), m1.square()); - VERIFY_IS_APPROX(pow(m1,2), m1.square()); - VERIFY_IS_APPROX(m1.pow(3), m1.cube()); - VERIFY_IS_APPROX(pow(m1,3), m1.cube()); - VERIFY_IS_APPROX((-m1).pow(3), -m1.cube()); - VERIFY_IS_APPROX(pow(2*m1,3), 8*m1.cube()); - - ArrayType exponents = ArrayType::Constant(rows, cols, RealScalar(2)); - VERIFY_IS_APPROX(Eigen::pow(m1,exponents), m1.square()); - VERIFY_IS_APPROX(m1.pow(exponents), m1.square()); - VERIFY_IS_APPROX(Eigen::pow(2*m1,exponents), 4*m1.square()); - VERIFY_IS_APPROX((2*m1).pow(exponents), 4*m1.square()); - VERIFY_IS_APPROX(Eigen::pow(m1,2*exponents), m1.square().square()); - VERIFY_IS_APPROX(m1.pow(2*exponents), m1.square().square()); - VERIFY_IS_APPROX(pow(m1(0,0), exponents), ArrayType::Constant(rows,cols,m1(0,0)*m1(0,0))); - VERIFY_IS_APPROX(m3.pow(RealScalar(0.5)), m3.sqrt()); VERIFY_IS_APPROX(pow(m3,RealScalar(0.5)), m3.sqrt()); @@ -310,122 +309,6 @@ template void array_real(const ArrayType& m) m1 += ArrayType::Constant(rows,cols,Scalar(tiny)); VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse()); -#ifdef EIGEN_HAS_C99_MATH - // check special functions (comparing against numpy implementation) - if (!NumTraits::IsComplex) { - VERIFY_IS_APPROX(numext::digamma(Scalar(1)), RealScalar(-0.5772156649015329)); - VERIFY_IS_APPROX(numext::digamma(Scalar(1.5)), RealScalar(0.03648997397857645)); - VERIFY_IS_APPROX(numext::digamma(Scalar(4)), RealScalar(1.2561176684318)); - VERIFY_IS_APPROX(numext::digamma(Scalar(-10.5)), RealScalar(2.398239129535781)); - VERIFY_IS_APPROX(numext::digamma(Scalar(10000.5)), RealScalar(9.210340372392849)); - VERIFY_IS_EQUAL(numext::digamma(Scalar(0)), - std::numeric_limits::infinity()); - VERIFY_IS_EQUAL(numext::digamma(Scalar(-1)), - std::numeric_limits::infinity()); - - // Check the zeta function against scipy.special.zeta - VERIFY_IS_APPROX(numext::zeta(Scalar(1.5), Scalar(2)), RealScalar(1.61237534869)); - VERIFY_IS_APPROX(numext::zeta(Scalar(4), Scalar(1.5)), RealScalar(0.234848505667)); - VERIFY_IS_APPROX(numext::zeta(Scalar(10.5), Scalar(3)), RealScalar(1.03086757337e-5)); - VERIFY_IS_APPROX(numext::zeta(Scalar(10000.5), Scalar(1.0001)), RealScalar(0.367879440865)); - VERIFY_IS_APPROX(numext::zeta(Scalar(3), Scalar(-2.5)), RealScalar(0.054102025820864097)); - VERIFY_IS_EQUAL(numext::zeta(Scalar(1), Scalar(1.2345)), // The second scalar does not matter - std::numeric_limits::infinity()); - VERIFY((numext::isnan)(numext::zeta(Scalar(0.9), Scalar(1.2345)))); // The second scalar does not matter - - // Check the polygamma against scipy.special.polygamma examples - VERIFY_IS_APPROX(numext::polygamma(Scalar(1), Scalar(2)), RealScalar(0.644934066848)); - VERIFY_IS_APPROX(numext::polygamma(Scalar(1), Scalar(3)), RealScalar(0.394934066848)); - VERIFY_IS_APPROX(numext::polygamma(Scalar(1), Scalar(25.5)), RealScalar(0.0399946696496)); - VERIFY((numext::isnan)(numext::polygamma(Scalar(1.5), Scalar(1.2345)))); // The second scalar does not matter - - // Check the polygamma function over a larger range of values - VERIFY_IS_APPROX(numext::polygamma(Scalar(17), Scalar(4.7)), RealScalar(293.334565435)); - VERIFY_IS_APPROX(numext::polygamma(Scalar(31), Scalar(11.8)), RealScalar(0.445487887616)); - VERIFY_IS_APPROX(numext::polygamma(Scalar(28), Scalar(17.7)), RealScalar(-2.47810300902e-07)); - VERIFY_IS_APPROX(numext::polygamma(Scalar(8), Scalar(30.2)), RealScalar(-8.29668781082e-09)); - /* The following tests only pass for doubles because floats cannot handle the large values of - the gamma function. - VERIFY_IS_APPROX(numext::polygamma(Scalar(42), Scalar(15.8)), RealScalar(-0.434562276666)); - VERIFY_IS_APPROX(numext::polygamma(Scalar(147), Scalar(54.1)), RealScalar(0.567742190178)); - VERIFY_IS_APPROX(numext::polygamma(Scalar(170), Scalar(64)), RealScalar(-0.0108615497927)); - */ - - { - // Test various propreties of igamma & igammac. These are normalized - // gamma integrals where - // igammac(a, x) = Gamma(a, x) / Gamma(a) - // igamma(a, x) = gamma(a, x) / Gamma(a) - // where Gamma and gamma are considered the standard unnormalized - // upper and lower incomplete gamma functions, respectively. - ArrayType a = m1.abs() + 2; - ArrayType x = m2.abs() + 2; - ArrayType zero = ArrayType::Zero(rows, cols); - ArrayType one = ArrayType::Constant(rows, cols, Scalar(1.0)); - ArrayType a_m1 = a - one; - ArrayType Gamma_a_x = Eigen::igammac(a, x) * a.lgamma().exp(); - ArrayType Gamma_a_m1_x = Eigen::igammac(a_m1, x) * a_m1.lgamma().exp(); - ArrayType gamma_a_x = Eigen::igamma(a, x) * a.lgamma().exp(); - ArrayType gamma_a_m1_x = Eigen::igamma(a_m1, x) * a_m1.lgamma().exp(); - - // Gamma(a, 0) == Gamma(a) - VERIFY_IS_APPROX(Eigen::igammac(a, zero), one); - - // Gamma(a, x) + gamma(a, x) == Gamma(a) - VERIFY_IS_APPROX(Gamma_a_x + gamma_a_x, a.lgamma().exp()); - - // Gamma(a, x) == (a - 1) * Gamma(a-1, x) + x^(a-1) * exp(-x) - VERIFY_IS_APPROX(Gamma_a_x, (a - 1) * Gamma_a_m1_x + x.pow(a-1) * (-x).exp()); - - // gamma(a, x) == (a - 1) * gamma(a-1, x) - x^(a-1) * exp(-x) - VERIFY_IS_APPROX(gamma_a_x, (a - 1) * gamma_a_m1_x - x.pow(a-1) * (-x).exp()); - } - - // Check exact values of igamma and igammac against a third party calculation. - Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; - Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; - - // location i*6+j corresponds to a_s[i], x_s[j]. - Scalar nan = std::numeric_limits::quiet_NaN(); - Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan}, - {0.0, 0.6321205588285578, 0.7768698398515702, - 0.9816843611112658, 9.999500016666262e-05, 1.0}, - {0.0, 0.4275932955291202, 0.608374823728911, - 0.9539882943107686, 7.522076445089201e-07, 1.0}, - {0.0, 0.01898815687615381, 0.06564245437845008, - 0.5665298796332909, 4.166333347221828e-18, 1.0}, - {0.0, 0.9999780593618628, 0.9999899967080838, - 0.9999996219837988, 0.9991370418689945, 1.0}, - {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}}; - Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan}, - {1.0, 0.36787944117144233, 0.22313016014842982, - 0.018315638888734182, 0.9999000049998333, 0.0}, - {1.0, 0.5724067044708798, 0.3916251762710878, - 0.04601170568923136, 0.9999992477923555, 0.0}, - {1.0, 0.9810118431238462, 0.9343575456215499, - 0.4334701203667089, 1.0, 0.0}, - {1.0, 2.1940638138146658e-05, 1.0003291916285e-05, - 3.7801620118431334e-07, 0.0008629581310054535, - 0.0}, - {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}}; - for (int i = 0; i < 6; ++i) { - for (int j = 0; j < 6; ++j) { - if ((std::isnan)(igamma_s[i][j])) { - VERIFY((std::isnan)(numext::igamma(a_s[i], x_s[j]))); - } else { - VERIFY_IS_APPROX(numext::igamma(a_s[i], x_s[j]), igamma_s[i][j]); - } - - if ((std::isnan)(igammac_s[i][j])) { - VERIFY((std::isnan)(numext::igammac(a_s[i], x_s[j]))); - } else { - VERIFY_IS_APPROX(numext::igammac(a_s[i], x_s[j]), igammac_s[i][j]); - } - } - } - } -#endif // EIGEN_HAS_C99_MATH - // check inplace transpose m3 = m1; m3.transposeInPlace(); @@ -525,7 +408,7 @@ template void array_complex(const ArrayType& m) // scalar by array division Scalar s1 = internal::random(); - const RealScalar tiny = sqrt(std::numeric_limits::epsilon()); + const RealScalar tiny = std::sqrt(std::numeric_limits::epsilon()); s1 += Scalar(tiny); m1 += ArrayType::Constant(rows,cols,Scalar(tiny)); VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse()); @@ -605,7 +488,7 @@ void test_array() VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, int >::value)); VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, float >::value)); VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); - typedef CwiseUnaryOp, ArrayXd > Xpr; + typedef CwiseUnaryOp, ArrayXd > Xpr; VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); diff --git a/test/array_for_matrix.cpp b/test/array_for_matrix.cpp index db5f3b34a..97e03be83 100644 --- a/test/array_for_matrix.cpp +++ b/test/array_for_matrix.cpp @@ -45,7 +45,7 @@ template void array_for_matrix(const MatrixType& m) VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum().sum() - m1.sum(), m1.squaredNorm()); VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum() + m2.colwise().sum() - (m1+m2).colwise().sum(), (m1+m2).squaredNorm()); VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum() - m2.rowwise().sum() - (m1-m2).rowwise().sum(), (m1-m2).squaredNorm()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); // vector-wise ops m3 = m1; @@ -144,9 +144,21 @@ template void comparisons(const MatrixType& m) template void lpNorm(const VectorType& v) { using std::sqrt; + typedef typename VectorType::RealScalar RealScalar; VectorType u = VectorType::Random(v.size()); - VERIFY_IS_APPROX(u.template lpNorm(), u.cwiseAbs().maxCoeff()); + if(v.size()==0) + { + VERIFY_IS_APPROX(u.template lpNorm(), RealScalar(0)); + VERIFY_IS_APPROX(u.template lpNorm<1>(), RealScalar(0)); + VERIFY_IS_APPROX(u.template lpNorm<2>(), RealScalar(0)); + VERIFY_IS_APPROX(u.template lpNorm<5>(), RealScalar(0)); + } + else + { + VERIFY_IS_APPROX(u.template lpNorm(), u.cwiseAbs().maxCoeff()); + } + VERIFY_IS_APPROX(u.template lpNorm<1>(), u.cwiseAbs().sum()); VERIFY_IS_APPROX(u.template lpNorm<2>(), sqrt(u.array().abs().square().sum())); VERIFY_IS_APPROX(numext::pow(u.template lpNorm<5>(), typename VectorType::RealScalar(5)), u.array().abs().pow(5).sum()); @@ -255,6 +267,8 @@ void test_array_for_matrix() CALL_SUBTEST_5( lpNorm(VectorXf(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_4( lpNorm(VectorXcf(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } + CALL_SUBTEST_5( lpNorm(VectorXf(0)) ); + CALL_SUBTEST_4( lpNorm(VectorXcf(0)) ); for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_4( resize(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_5( resize(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); diff --git a/test/array_of_string.cpp b/test/array_of_string.cpp new file mode 100644 index 000000000..e23b7c59e --- /dev/null +++ b/test/array_of_string.cpp @@ -0,0 +1,32 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +void test_array_of_string() +{ + typedef Array ArrayXs; + ArrayXs a1(3), a2(3), a3(3), a3ref(3); + a1 << "one", "two", "three"; + a2 << "1", "2", "3"; + a3ref << "one (1)", "two (2)", "three (3)"; + std::stringstream s1; + s1 << a1; + VERIFY_IS_EQUAL(s1.str(), std::string(" one two three")); + a3 = a1 + std::string(" (") + a2 + std::string(")"); + VERIFY((a3==a3ref).all()); + + a3 = a1; + a3 += std::string(" (") + a2 + std::string(")"); + VERIFY((a3==a3ref).all()); + + a1.swap(a3); + VERIFY((a1==a3ref).all()); + VERIFY((a3!=a3ref).all()); +} diff --git a/test/array_reverse.cpp b/test/array_reverse.cpp index a5c0d37f9..c9d9f90c3 100644 --- a/test/array_reverse.cpp +++ b/test/array_reverse.cpp @@ -117,13 +117,11 @@ template void reverse(const MatrixType& m) m2.colwise().reverseInPlace(); VERIFY_IS_APPROX(m2,m1.colwise().reverse().eval()); - /* m1.colwise().reverse()(r, c) = x; VERIFY_IS_APPROX(x, m1(rows - 1 - r, c)); m1.rowwise().reverse()(r, c) = x; VERIFY_IS_APPROX(x, m1(r, cols - 1 - c)); - */ } void test_array_reverse() diff --git a/test/boostmultiprec.cpp b/test/boostmultiprec.cpp new file mode 100644 index 000000000..e06e9bdaf --- /dev/null +++ b/test/boostmultiprec.cpp @@ -0,0 +1,201 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#ifdef EIGEN_TEST_MAX_SIZE +#undef EIGEN_TEST_MAX_SIZE +#endif + +#define EIGEN_TEST_MAX_SIZE 50 + +#ifdef EIGEN_TEST_PART_1 +#include "cholesky.cpp" +#endif + +#ifdef EIGEN_TEST_PART_2 +#include "lu.cpp" +#endif + +#ifdef EIGEN_TEST_PART_3 +#include "qr.cpp" +#endif + +#ifdef EIGEN_TEST_PART_4 +#include "qr_colpivoting.cpp" +#endif + +#ifdef EIGEN_TEST_PART_5 +#include "qr_fullpivoting.cpp" +#endif + +#ifdef EIGEN_TEST_PART_6 +#include "eigensolver_selfadjoint.cpp" +#endif + +#ifdef EIGEN_TEST_PART_7 +#include "eigensolver_generic.cpp" +#endif + +#ifdef EIGEN_TEST_PART_8 +#include "eigensolver_generalized_real.cpp" +#endif + +#ifdef EIGEN_TEST_PART_9 +#include "jacobisvd.cpp" +#endif + +#ifdef EIGEN_TEST_PART_10 +#include "bdcsvd.cpp" +#endif + +#include + +#undef min +#undef max +#undef isnan +#undef isinf +#undef isfinite + +#include +#include +#include +#include + +namespace mp = boost::multiprecision; +typedef mp::number, mp::et_on> Real; + +namespace Eigen { + template<> struct NumTraits : GenericNumTraits { + static inline Real dummy_precision() { return 1e-50; } + }; + + template + struct NumTraits > : NumTraits {}; + + template<> + Real test_precision() { return 1e-50; } + + // needed in C++93 mode where number does not support explicit cast. + namespace internal { + template + struct cast_impl { + static inline NewType run(const Real& x) { + return x.template convert_to(); + } + }; + + template<> + struct cast_impl > { + static inline std::complex run(const Real& x) { + return std::complex(x); + } + }; + } +} + +namespace boost { +namespace multiprecision { + // to make ADL works as expected: + using boost::math::isfinite; + using boost::math::isnan; + using boost::math::isinf; + using boost::math::copysign; + using boost::math::hypot; + + // The following is needed for std::complex: + Real fabs(const Real& a) { return abs EIGEN_NOT_A_MACRO (a); } + Real fmax(const Real& a, const Real& b) { using std::max; return max(a,b); } + + // some specialization for the unit tests: + inline bool test_isMuchSmallerThan(const Real& a, const Real& b) { + return internal::isMuchSmallerThan(a, b, test_precision()); + } + + inline bool test_isApprox(const Real& a, const Real& b) { + return internal::isApprox(a, b, test_precision()); + } + + inline bool test_isApproxOrLessThan(const Real& a, const Real& b) { + return internal::isApproxOrLessThan(a, b, test_precision()); + } + + Real get_test_precision(const Real&) { + return test_precision(); + } + + Real test_relative_error(const Real &a, const Real &b) { + using Eigen::numext::abs2; + return sqrt(abs2(a-b)/Eigen::numext::mini(abs2(a),abs2(b))); + } +} +} + +namespace Eigen { + +} + +void test_boostmultiprec() +{ + typedef Matrix Mat; + typedef Matrix,Dynamic,Dynamic> MatC; + + std::cout << "NumTraits::epsilon() = " << NumTraits::epsilon() << std::endl; + std::cout << "NumTraits::dummy_precision() = " << NumTraits::dummy_precision() << std::endl; + std::cout << "NumTraits::lowest() = " << NumTraits::lowest() << std::endl; + std::cout << "NumTraits::highest() = " << NumTraits::highest() << std::endl; + std::cout << "NumTraits::digits10() = " << NumTraits::digits10() << std::endl; + + // chekc stream output + { + Mat A(10,10); + A.setRandom(); + std::stringstream ss; + ss << A; + } + { + MatC A(10,10); + A.setRandom(); + std::stringstream ss; + ss << A; + } + + for(int i = 0; i < g_repeat; i++) { + int s = internal::random(1,EIGEN_TEST_MAX_SIZE); + + CALL_SUBTEST_1( cholesky(Mat(s,s)) ); + + CALL_SUBTEST_2( lu_non_invertible() ); + CALL_SUBTEST_2( lu_invertible() ); + CALL_SUBTEST_2( lu_non_invertible() ); + CALL_SUBTEST_2( lu_invertible() ); + + CALL_SUBTEST_3( qr(Mat(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_3( qr_invertible() ); + + CALL_SUBTEST_4( qr() ); + CALL_SUBTEST_4( cod() ); + CALL_SUBTEST_4( qr_invertible() ); + + CALL_SUBTEST_5( qr() ); + CALL_SUBTEST_5( qr_invertible() ); + + CALL_SUBTEST_6( selfadjointeigensolver(Mat(s,s)) ); + + CALL_SUBTEST_7( eigensolver(Mat(s,s)) ); + + CALL_SUBTEST_8( generalized_eigensolver_real(Mat(s,s)) ); + + TEST_SET_BUT_UNUSED_VARIABLE(s) + } + + CALL_SUBTEST_9(( jacobisvd(Mat(internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); + CALL_SUBTEST_10(( bdcsvd(Mat(internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); +} + diff --git a/test/bug1213.cpp b/test/bug1213.cpp new file mode 100644 index 000000000..581760c1a --- /dev/null +++ b/test/bug1213.cpp @@ -0,0 +1,13 @@ + +// This anonymous enum is essential to trigger the linking issue +enum { + Foo +}; + +#include "bug1213.h" + +bool bug1213_1(const Eigen::Vector3f& x) +{ + return bug1213_2(x); +} + diff --git a/test/bug1213.h b/test/bug1213.h new file mode 100644 index 000000000..040e5a470 --- /dev/null +++ b/test/bug1213.h @@ -0,0 +1,8 @@ + +#include + +template +bool bug1213_2(const Eigen::Matrix& x); + +bool bug1213_1(const Eigen::Vector3f& x); + diff --git a/test/bug1213_main.cpp b/test/bug1213_main.cpp new file mode 100644 index 000000000..4802c0003 --- /dev/null +++ b/test/bug1213_main.cpp @@ -0,0 +1,18 @@ + +// This is a regression unit regarding a weird linking issue with gcc. + +#include "bug1213.h" + +int main() +{ + return 0; +} + + +template +bool bug1213_2(const Eigen::Matrix& ) +{ + return true; +} + +template bool bug1213_2(const Eigen::Vector3f&); diff --git a/test/cholesky.cpp b/test/cholesky.cpp index b7abc230b..8ad5ac639 100644 --- a/test/cholesky.cpp +++ b/test/cholesky.cpp @@ -154,6 +154,7 @@ template void cholesky(const MatrixType& m) SquareMatrixType symmLo = symm.template triangularView(); LDLT ldltlo(symmLo); + VERIFY(ldltlo.info()==Success); VERIFY_IS_APPROX(symm, ldltlo.reconstructedMatrix()); vecX = ldltlo.solve(vecB); VERIFY_IS_APPROX(symm * vecX, vecB); @@ -170,6 +171,7 @@ template void cholesky(const MatrixType& m) LDLT ldltup(symmUp); + VERIFY(ldltup.info()==Success); VERIFY_IS_APPROX(symm, ldltup.reconstructedMatrix()); vecX = ldltup.solve(vecB); VERIFY_IS_APPROX(symm * vecX, vecB); @@ -243,11 +245,13 @@ template void cholesky(const MatrixType& m) // check matrices with a wide spectrum if(rows>=3) { + using std::pow; + using std::sqrt; RealScalar s = (std::min)(16,std::numeric_limits::max_exponent10/8); Matrix a = Matrix::Random(rows,rows); Matrix d = Matrix::Random(rows); for(Index k=0; k(-s,s)); + d(k) = d(k)*pow(RealScalar(10),internal::random(-s,s)); SquareMatrixType A = a * d.asDiagonal() * a.adjoint(); // Make sure a solution exists: vecX.setRandom(); @@ -263,7 +267,7 @@ template void cholesky(const MatrixType& m) } else { - RealScalar large_tol = std::sqrt(test_precision()); + RealScalar large_tol = sqrt(test_precision()); VERIFY((A * vecX).isApprox(vecB, large_tol)); ++g_test_level; @@ -329,6 +333,7 @@ template void cholesky_cplx(const MatrixType& m) RealMatrixType symmLo = symm.template triangularView(); LDLT ldltlo(symmLo); + VERIFY(ldltlo.info()==Success); VERIFY_IS_APPROX(symm, ldltlo.reconstructedMatrix()); vecX = ldltlo.solve(vecB); VERIFY_IS_APPROX(symm * vecX, vecB); @@ -365,35 +370,90 @@ template void cholesky_definiteness(const MatrixType& m) { mat << 1, 0, 0, -1; ldlt.compute(mat); + VERIFY(ldlt.info()==Success); VERIFY(!ldlt.isNegative()); VERIFY(!ldlt.isPositive()); } { mat << 1, 2, 2, 1; ldlt.compute(mat); + VERIFY(ldlt.info()==Success); VERIFY(!ldlt.isNegative()); VERIFY(!ldlt.isPositive()); } { mat << 0, 0, 0, 0; ldlt.compute(mat); + VERIFY(ldlt.info()==Success); VERIFY(ldlt.isNegative()); VERIFY(ldlt.isPositive()); } { mat << 0, 0, 0, 1; ldlt.compute(mat); + VERIFY(ldlt.info()==Success); VERIFY(!ldlt.isNegative()); VERIFY(ldlt.isPositive()); } { mat << -1, 0, 0, 0; ldlt.compute(mat); + VERIFY(ldlt.info()==Success); VERIFY(ldlt.isNegative()); VERIFY(!ldlt.isPositive()); } } +template +void cholesky_faillure_cases() +{ + MatrixXd mat; + LDLT ldlt; + + { + mat.resize(2,2); + mat << 0, 1, 1, 0; + ldlt.compute(mat); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + VERIFY(ldlt.info()==NumericalIssue); + } +#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE_SSE2) + { + mat.resize(3,3); + mat << -1, -3, 3, + -3, -8.9999999999999999999, 1, + 3, 1, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==NumericalIssue); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + } +#endif + { + mat.resize(3,3); + mat << 1, 2, 3, + 2, 4, 1, + 3, 1, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==NumericalIssue); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + } + + { + mat.resize(8,8); + mat << 0.1, 0, -0.1, 0, 0, 0, 1, 0, + 0, 4.24667, 0, 2.00333, 0, 0, 0, 0, + -0.1, 0, 0.2, 0, -0.1, 0, 0, 0, + 0, 2.00333, 0, 8.49333, 0, 2.00333, 0, 0, + 0, 0, -0.1, 0, 0.1, 0, 0, 1, + 0, 0, 0, 2.00333, 0, 4.24667, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==NumericalIssue); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + } +} + template void cholesky_verify_assert() { MatrixType tmp; @@ -443,5 +503,7 @@ void test_cholesky() CALL_SUBTEST_9( LLT(10) ); CALL_SUBTEST_9( LDLT(10) ); + CALL_SUBTEST_2( cholesky_faillure_cases() ); + TEST_SET_BUT_UNUSED_VARIABLE(nb_temporaries) } diff --git a/test/commainitializer.cpp b/test/commainitializer.cpp index 99102b966..9844adbd2 100644 --- a/test/commainitializer.cpp +++ b/test/commainitializer.cpp @@ -9,6 +9,62 @@ #include "main.h" + +template +void test_blocks() +{ + Matrix m_fixed; + MatrixXi m_dynamic(M1+M2, N1+N2); + + Matrix mat11; mat11.setRandom(); + Matrix mat12; mat12.setRandom(); + Matrix mat21; mat21.setRandom(); + Matrix mat22; mat22.setRandom(); + + MatrixXi matx11 = mat11, matx12 = mat12, matx21 = mat21, matx22 = mat22; + + { + VERIFY_IS_EQUAL((m_fixed << mat11, mat12, mat21, matx22).finished(), (m_dynamic << mat11, matx12, mat21, matx22).finished()); + VERIFY_IS_EQUAL((m_fixed.template topLeftCorner()), mat11); + VERIFY_IS_EQUAL((m_fixed.template topRightCorner()), mat12); + VERIFY_IS_EQUAL((m_fixed.template bottomLeftCorner()), mat21); + VERIFY_IS_EQUAL((m_fixed.template bottomRightCorner()), mat22); + VERIFY_IS_EQUAL((m_fixed << mat12, mat11, matx21, mat22).finished(), (m_dynamic << mat12, matx11, matx21, mat22).finished()); + } + + if(N1 > 0) + { + VERIFY_RAISES_ASSERT((m_fixed << mat11, mat12, mat11, mat21, mat22)); + VERIFY_RAISES_ASSERT((m_fixed << mat11, mat12, mat21, mat21, mat22)); + } + else + { + // allow insertion of zero-column blocks: + VERIFY_IS_EQUAL((m_fixed << mat11, mat12, mat11, mat11, mat21, mat21, mat22).finished(), (m_dynamic << mat12, mat22).finished()); + } + if(M1 != M2) + { + VERIFY_RAISES_ASSERT((m_fixed << mat11, mat21, mat12, mat22)); + } +} + + +template +struct test_block_recursion +{ + static void run() + { + test_blocks<(N>>6)&3, (N>>4)&3, (N>>2)&3, N & 3>(); + test_block_recursion::run(); + } +}; + +template<> +struct test_block_recursion<-1> +{ + static void run() { } +}; + void test_commainitializer() { Matrix3d m3; @@ -43,4 +99,8 @@ void test_commainitializer() 4, 5, 6, vec[2].transpose(); VERIFY_IS_APPROX(m3, ref); + + + // recursively test all block-sizes from 0 to 3: + test_block_recursion<(1<<8) - 1>(); } diff --git a/test/cuda_basic.cu b/test/cuda_basic.cu index b36ed888d..cb2e4167a 100644 --- a/test/cuda_basic.cu +++ b/test/cuda_basic.cu @@ -1,4 +1,11 @@ - +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // workaround issue between gcc >= 4.7 and cuda 5.5 #if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7) @@ -12,10 +19,15 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #include +#include +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include +#endif #include "main.h" #include "cuda_common.h" -#include +// Check that dense modules can be properly parsed by nvcc +#include // struct Foo{ // EIGEN_DEVICE_FUNC diff --git a/test/dynalloc.cpp b/test/dynalloc.cpp index 5f587007c..f1cc70bee 100644 --- a/test/dynalloc.cpp +++ b/test/dynalloc.cpp @@ -22,7 +22,7 @@ void check_handmade_aligned_malloc() for(int i = 1; i < 1000; i++) { char *p = (char*)internal::handmade_aligned_malloc(i); - VERIFY(size_t(p)%ALIGNMENT==0); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; internal::handmade_aligned_free(p); @@ -34,7 +34,7 @@ void check_aligned_malloc() for(int i = ALIGNMENT; i < 1000; i++) { char *p = (char*)internal::aligned_malloc(i); - VERIFY(size_t(p)%ALIGNMENT==0); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; internal::aligned_free(p); @@ -46,7 +46,7 @@ void check_aligned_new() for(int i = ALIGNMENT; i < 1000; i++) { float *p = internal::aligned_new(i); - VERIFY(size_t(p)%ALIGNMENT==0); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; internal::aligned_delete(p,i); @@ -58,7 +58,7 @@ void check_aligned_stack_alloc() for(int i = ALIGNMENT; i < 400; i++) { ei_declare_aligned_stack_constructed_variable(float,p,i,0); - VERIFY(size_t(p)%ALIGNMENT==0); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; } @@ -88,7 +88,7 @@ template void check_dynaligned() { T* obj = new T; VERIFY(T::NeedsToAlign==1); - VERIFY(size_t(obj)%ALIGNMENT==0); + VERIFY(internal::UIntPtr(obj)%ALIGNMENT==0); delete obj; } } @@ -148,15 +148,15 @@ void test_dynalloc() } { - MyStruct foo0; VERIFY(size_t(foo0.avec.data())%ALIGNMENT==0); - MyClassA fooA; VERIFY(size_t(fooA.avec.data())%ALIGNMENT==0); + MyStruct foo0; VERIFY(internal::UIntPtr(foo0.avec.data())%ALIGNMENT==0); + MyClassA fooA; VERIFY(internal::UIntPtr(fooA.avec.data())%ALIGNMENT==0); } // dynamic allocation, single object for (int i=0; iavec.data())%ALIGNMENT==0); - MyClassA *fooA = new MyClassA(); VERIFY(size_t(fooA->avec.data())%ALIGNMENT==0); + MyStruct *foo0 = new MyStruct(); VERIFY(internal::UIntPtr(foo0->avec.data())%ALIGNMENT==0); + MyClassA *fooA = new MyClassA(); VERIFY(internal::UIntPtr(fooA->avec.data())%ALIGNMENT==0); delete foo0; delete fooA; } @@ -165,8 +165,8 @@ void test_dynalloc() const int N = 10; for (int i=0; iavec.data())%ALIGNMENT==0); - MyClassA *fooA = new MyClassA[N]; VERIFY(size_t(fooA->avec.data())%ALIGNMENT==0); + MyStruct *foo0 = new MyStruct[N]; VERIFY(internal::UIntPtr(foo0->avec.data())%ALIGNMENT==0); + MyClassA *fooA = new MyClassA[N]; VERIFY(internal::UIntPtr(fooA->avec.data())%ALIGNMENT==0); delete[] foo0; delete[] fooA; } diff --git a/test/eigensolver_generalized_real.cpp b/test/eigensolver_generalized_real.cpp index a46a2e50e..9c0838ba4 100644 --- a/test/eigensolver_generalized_real.cpp +++ b/test/eigensolver_generalized_real.cpp @@ -1,15 +1,17 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2012 Gael Guennebaud +// Copyright (C) 2012-2016 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define EIGEN_RUNTIME_NO_MALLOC #include "main.h" #include #include +#include template void generalized_eigensolver_real(const MatrixType& m) { @@ -21,6 +23,7 @@ template void generalized_eigensolver_real(const MatrixType Index cols = m.cols(); typedef typename MatrixType::Scalar Scalar; + typedef std::complex ComplexScalar; typedef Matrix VectorType; MatrixType a = MatrixType::Random(rows,cols); @@ -31,14 +34,41 @@ template void generalized_eigensolver_real(const MatrixType MatrixType spdB = b.adjoint() * b + b1.adjoint() * b1; // lets compare to GeneralizedSelfAdjointEigenSolver - GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); - GeneralizedEigenSolver eig(spdA, spdB); + { + GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); + GeneralizedEigenSolver eig(spdA, spdB); - VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); + VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); - VectorType realEigenvalues = eig.eigenvalues().real(); - std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); - VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + VectorType realEigenvalues = eig.eigenvalues().real(); + std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); + VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + + // check eigenvectors + typename GeneralizedEigenSolver::EigenvectorsType D = eig.eigenvalues().asDiagonal(); + typename GeneralizedEigenSolver::EigenvectorsType V = eig.eigenvectors(); + VERIFY_IS_APPROX(spdA*V, spdB*V*D); + } + + // non symmetric case: + { + GeneralizedEigenSolver eig(rows); + // TODO enable full-prealocation of required memory, this probably requires an in-place mode for HessenbergDecomposition + //Eigen::internal::set_is_malloc_allowed(false); + eig.compute(a,b); + //Eigen::internal::set_is_malloc_allowed(true); + for(Index k=0; k tmp = (eig.betas()(k)*a).template cast() - eig.alphas()(k)*b; + if(tmp.size()>1 && tmp.norm()>(std::numeric_limits::min)()) + tmp /= tmp.norm(); + VERIFY_IS_MUCH_SMALLER_THAN( std::abs(tmp.determinant()), Scalar(1) ); + } + // check eigenvectors + typename GeneralizedEigenSolver::EigenvectorsType D = eig.eigenvalues().asDiagonal(); + typename GeneralizedEigenSolver::EigenvectorsType V = eig.eigenvectors(); + VERIFY_IS_APPROX(a*V, b*V*D); + } // regression test for bug 1098 { @@ -57,7 +87,7 @@ void test_eigensolver_generalized_real() s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(s,s)) ); - // some trivial but implementation-wise tricky cases + // some trivial but implementation-wise special cases CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(1,1)) ); CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(2,2)) ); CALL_SUBTEST_3( generalized_eigensolver_real(Matrix()) ); diff --git a/test/eigensolver_generic.cpp b/test/eigensolver_generic.cpp index 566546310..e18fbf687 100644 --- a/test/eigensolver_generic.cpp +++ b/test/eigensolver_generic.cpp @@ -127,16 +127,29 @@ void test_eigensolver_generic() } ); - // regression test for bug 793 #ifdef EIGEN_TEST_PART_2 { - MatrixXd a(3,3); - a << 0, 0, 1, - 1, 1, 1, - 1, 1e+200, 1; - Eigen::EigenSolver eig(a); - VERIFY_IS_APPROX(a * eig.pseudoEigenvectors(), eig.pseudoEigenvectors() * eig.pseudoEigenvalueMatrix()); - VERIFY_IS_APPROX(a * eig.eigenvectors(), eig.eigenvectors() * eig.eigenvalues().asDiagonal()); + // regression test for bug 793 + MatrixXd a(3,3); + a << 0, 0, 1, + 1, 1, 1, + 1, 1e+200, 1; + Eigen::EigenSolver eig(a); + double scale = 1e-200; // scale to avoid overflow during the comparisons + VERIFY_IS_APPROX(a * eig.pseudoEigenvectors()*scale, eig.pseudoEigenvectors() * eig.pseudoEigenvalueMatrix()*scale); + VERIFY_IS_APPROX(a * eig.eigenvectors()*scale, eig.eigenvectors() * eig.eigenvalues().asDiagonal()*scale); + } + { + // check a case where all eigenvalues are null. + MatrixXd a(2,2); + a << 1, 1, + -1, -1; + Eigen::EigenSolver eig(a); + VERIFY_IS_APPROX(eig.pseudoEigenvectors().squaredNorm(), 2.); + VERIFY_IS_APPROX((a * eig.pseudoEigenvectors()).norm()+1., 1.); + VERIFY_IS_APPROX((eig.pseudoEigenvectors() * eig.pseudoEigenvalueMatrix()).norm()+1., 1.); + VERIFY_IS_APPROX((a * eig.eigenvectors()).norm()+1., 1.); + VERIFY_IS_APPROX((eig.eigenvectors() * eig.eigenvalues().asDiagonal()).norm()+1., 1.); } #endif diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp index f909761a1..4ed126116 100644 --- a/test/eigensolver_selfadjoint.cpp +++ b/test/eigensolver_selfadjoint.cpp @@ -12,18 +12,29 @@ #include "svd_fill.h" #include #include +#include template void selfadjointeigensolver_essential_check(const MatrixType& m) { typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; - RealScalar eival_eps = (std::min)(test_precision(), NumTraits::dummy_precision()*20000); + RealScalar eival_eps = numext::mini(test_precision(), NumTraits::dummy_precision()*20000); SelfAdjointEigenSolver eiSymm(m); VERIFY_IS_EQUAL(eiSymm.info(), Success); - VERIFY_IS_APPROX(m.template selfadjointView() * eiSymm.eigenvectors(), - eiSymm.eigenvectors() * eiSymm.eigenvalues().asDiagonal()); + + RealScalar scaling = m.cwiseAbs().maxCoeff(); + + if(scaling<(std::numeric_limits::min)()) + { + VERIFY(eiSymm.eigenvalues().cwiseAbs().maxCoeff() <= (std::numeric_limits::min)()); + } + else + { + VERIFY_IS_APPROX((m.template selfadjointView() * eiSymm.eigenvectors())/scaling, + (eiSymm.eigenvectors() * eiSymm.eigenvalues().asDiagonal())/scaling); + } VERIFY_IS_APPROX(m.template selfadjointView().eigenvalues(), eiSymm.eigenvalues()); VERIFY_IS_UNITARY(eiSymm.eigenvectors()); @@ -32,7 +43,6 @@ template void selfadjointeigensolver_essential_check(const SelfAdjointEigenSolver eiDirect; eiDirect.computeDirect(m); VERIFY_IS_EQUAL(eiDirect.info(), Success); - VERIFY_IS_APPROX(eiSymm.eigenvalues(), eiDirect.eigenvalues()); if(! eiSymm.eigenvalues().isApprox(eiDirect.eigenvalues(), eival_eps) ) { std::cerr << "reference eigenvalues: " << eiSymm.eigenvalues().transpose() << "\n" @@ -40,10 +50,18 @@ template void selfadjointeigensolver_essential_check(const << "diff: " << (eiSymm.eigenvalues()-eiDirect.eigenvalues()).transpose() << "\n" << "error (eps): " << (eiSymm.eigenvalues()-eiDirect.eigenvalues()).norm() / eiSymm.eigenvalues().norm() << " (" << eival_eps << ")\n"; } - VERIFY(eiSymm.eigenvalues().isApprox(eiDirect.eigenvalues(), eival_eps)); - VERIFY_IS_APPROX(m.template selfadjointView() * eiDirect.eigenvectors(), - eiDirect.eigenvectors() * eiDirect.eigenvalues().asDiagonal()); - VERIFY_IS_APPROX(m.template selfadjointView().eigenvalues(), eiDirect.eigenvalues()); + if(scaling<(std::numeric_limits::min)()) + { + VERIFY(eiDirect.eigenvalues().cwiseAbs().maxCoeff() <= (std::numeric_limits::min)()); + } + else + { + VERIFY_IS_APPROX(eiSymm.eigenvalues()/scaling, eiDirect.eigenvalues()/scaling); + VERIFY_IS_APPROX((m.template selfadjointView() * eiDirect.eigenvectors())/scaling, + (eiDirect.eigenvectors() * eiDirect.eigenvalues().asDiagonal())/scaling); + VERIFY_IS_APPROX(m.template selfadjointView().eigenvalues()/scaling, eiDirect.eigenvalues()/scaling); + } + VERIFY_IS_UNITARY(eiDirect.eigenvectors()); } } @@ -164,6 +182,7 @@ template void selfadjointeigensolver(const MatrixType& m) } } +template void bug_854() { Matrix3d m; @@ -173,6 +192,7 @@ void bug_854() selfadjointeigensolver_essential_check(m); } +template void bug_1014() { Matrix3d m; @@ -182,6 +202,26 @@ void bug_1014() selfadjointeigensolver_essential_check(m); } +template +void bug_1225() +{ + Matrix3d m1, m2; + m1.setRandom(); + m1 = m1*m1.transpose(); + m2 = m1.triangularView(); + SelfAdjointEigenSolver eig1(m1); + SelfAdjointEigenSolver eig2(m2.selfadjointView()); + VERIFY_IS_APPROX(eig1.eigenvalues(), eig2.eigenvalues()); +} + +template +void bug_1204() +{ + SparseMatrix A(2,2); + A.setIdentity(); + SelfAdjointEigenSolver > eig(A); +} + void test_eigensolver_selfadjoint() { int s = 0; @@ -210,8 +250,10 @@ void test_eigensolver_selfadjoint() CALL_SUBTEST_7( selfadjointeigensolver(Matrix()) ); } - CALL_SUBTEST_13( bug_854() ); - CALL_SUBTEST_13( bug_1014() ); + CALL_SUBTEST_13( bug_854<0>() ); + CALL_SUBTEST_13( bug_1014<0>() ); + CALL_SUBTEST_13( bug_1204<0>() ); + CALL_SUBTEST_13( bug_1225<0>() ); // Test problem size constructors s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); diff --git a/test/evaluators.cpp b/test/evaluators.cpp index 876dffe22..aed5a05a7 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -21,7 +21,7 @@ namespace Eigen { EIGEN_STRONG_INLINE DstXprType& copy_using_evaluator(const EigenBase &dst, const SrcXprType &src) { - call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); return dst.const_cast_derived(); } @@ -29,7 +29,7 @@ namespace Eigen { EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const NoAlias& dst, const SrcXprType &src) { - call_assignment(dst, src.derived(), internal::assign_op()); + call_assignment(dst, src.derived(), internal::assign_op()); return dst.expression(); } @@ -45,7 +45,7 @@ namespace Eigen { dst.const_cast_derived().resizeLike(src.derived()); #endif - call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); return dst.const_cast_derived(); } @@ -53,28 +53,28 @@ namespace Eigen { void add_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(const_cast(dst), src.derived(), internal::add_assign_op()); + call_assignment(const_cast(dst), src.derived(), internal::add_assign_op()); } template void subtract_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(const_cast(dst), src.derived(), internal::sub_assign_op()); + call_assignment(const_cast(dst), src.derived(), internal::sub_assign_op()); } template void multiply_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op()); } template void divide_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op()); } template diff --git a/test/fastmath.cpp b/test/fastmath.cpp index efdd5b313..cc5db0746 100644 --- a/test/fastmath.cpp +++ b/test/fastmath.cpp @@ -49,7 +49,8 @@ void check_inf_nan(bool dryrun) { VERIFY( !m.allFinite() ); VERIFY( m.hasNaN() ); } - m(4) /= 0.0; + T hidden_zero = (std::numeric_limits::min)()*(std::numeric_limits::min)(); + m(4) /= hidden_zero; if(dryrun) { std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n"; diff --git a/test/first_aligned.cpp b/test/first_aligned.cpp index bf22f6b97..ae2d4bc42 100644 --- a/test/first_aligned.cpp +++ b/test/first_aligned.cpp @@ -41,7 +41,7 @@ void test_first_aligned() test_first_aligned_helper(array_double+1, 50); test_first_aligned_helper(array_double+2, 50); - double *array_double_plus_4_bytes = (double*)(size_t(array_double)+4); + double *array_double_plus_4_bytes = (double*)(internal::UIntPtr(array_double)+4); test_none_aligned_helper(array_double_plus_4_bytes, 50); test_none_aligned_helper(array_double_plus_4_bytes+1, 50); diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp index 2bdb4b7f2..d2339a651 100644 --- a/test/geo_alignedbox.cpp +++ b/test/geo_alignedbox.cpp @@ -48,6 +48,8 @@ template void alignedbox(const BoxType& _box) b0.extend(p0); b0.extend(p1); VERIFY(b0.contains(p0*s1+(Scalar(1)-s1)*p1)); + VERIFY(b0.contains(b0.center())); + VERIFY_IS_APPROX(b0.center(),(p0+p1)/Scalar(2)); (b2 = b0).extend(b1); VERIFY(b2.contains(b0)); diff --git a/test/geo_homogeneous.cpp b/test/geo_homogeneous.cpp index bf63c69ec..2187c7bf9 100644 --- a/test/geo_homogeneous.cpp +++ b/test/geo_homogeneous.cpp @@ -58,6 +58,8 @@ template void homogeneous(void) T2MatrixType t2 = T2MatrixType::Random(); VERIFY_IS_APPROX(t2 * (v0.homogeneous().eval()), t2 * v0.homogeneous()); VERIFY_IS_APPROX(t2 * (m0.colwise().homogeneous().eval()), t2 * m0.colwise().homogeneous()); + VERIFY_IS_APPROX(t2 * (v0.homogeneous().asDiagonal()), t2 * hv0.asDiagonal()); + VERIFY_IS_APPROX((v0.homogeneous().asDiagonal()) * t2, hv0.asDiagonal() * t2); VERIFY_IS_APPROX((v0.transpose().rowwise().homogeneous().eval()) * t2, v0.transpose().rowwise().homogeneous() * t2); @@ -109,6 +111,8 @@ template void homogeneous(void) VERIFY_IS_APPROX( (v0.transpose().homogeneous() .lazyProduct( t2 )).hnormalized(), (v0.transpose().homogeneous()*t2).hnormalized() ); VERIFY_IS_APPROX( (pts.transpose().rowwise().homogeneous() .lazyProduct( t2 )).rowwise().hnormalized(), (pts1.transpose()*t2).rowwise().hnormalized() ); + + VERIFY_IS_APPROX( (t2.template triangularView() * v0.homogeneous()).eval(), (t2.template triangularView()*hv0) ); } void test_geo_homogeneous() diff --git a/test/geo_hyperplane.cpp b/test/geo_hyperplane.cpp index c1cc691c9..e77702bc7 100644 --- a/test/geo_hyperplane.cpp +++ b/test/geo_hyperplane.cpp @@ -97,9 +97,9 @@ template void lines() Vector u = Vector::Random(); Vector v = Vector::Random(); Scalar a = internal::random(); - while (abs(a-1) < 1e-4) a = internal::random(); - while (u.norm() < 1e-4) u = Vector::Random(); - while (v.norm() < 1e-4) v = Vector::Random(); + while (abs(a-1) < Scalar(1e-4)) a = internal::random(); + while (u.norm() < Scalar(1e-4)) u = Vector::Random(); + while (v.norm() < Scalar(1e-4)) v = Vector::Random(); HLine line_u = HLine::Through(center + u, center + a*u); HLine line_v = HLine::Through(center + v, center + a*v); @@ -111,14 +111,14 @@ template void lines() Vector result = line_u.intersection(line_v); // the lines should intersect at the point we called "center" - if(abs(a-1) > 1e-2 && abs(v.normalized().dot(u.normalized()))<0.9) + if(abs(a-1) > Scalar(1e-2) && abs(v.normalized().dot(u.normalized())) void check_slerp(const QuatType& q0, const QuatType& Scalar largeEps = test_precision(); Scalar theta_tot = AA(q1*q0.inverse()).angle(); - if(theta_tot>EIGEN_PI) - theta_tot = Scalar(2.*EIGEN_PI)-theta_tot; + if(theta_tot>Scalar(EIGEN_PI)) + theta_tot = Scalar(2.)*Scalar(EIGEN_PI)-theta_tot; for(Scalar t=0; t<=Scalar(1.001); t+=Scalar(0.1)) { QuatType q = q0.slerp(t,q1); @@ -50,13 +50,12 @@ template void quaternion(void) using std::abs; typedef Matrix Vector3; typedef Matrix Matrix3; - typedef Matrix Vector4; typedef Quaternion Quaternionx; typedef AngleAxis AngleAxisx; Scalar largeEps = test_precision(); if (internal::is_same::value) - largeEps = 1e-3f; + largeEps = Scalar(1e-3); Scalar eps = internal::random() * Scalar(1e-2); @@ -115,8 +114,8 @@ template void quaternion(void) // Do not execute the test if the rotation angle is almost zero, or // the rotation axis and v1 are almost parallel. if (abs(aa.angle()) > 5*test_precision() - && (aa.axis() - v1.normalized()).norm() < 1.99 - && (aa.axis() + v1.normalized()).norm() < 1.99) + && (aa.axis() - v1.normalized()).norm() < Scalar(1.99) + && (aa.axis() + v1.normalized()).norm() < Scalar(1.99)) { VERIFY_IS_NOT_APPROX(q1 * v1, Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1); } @@ -157,8 +156,8 @@ template void quaternion(void) Quaternionx *q = new Quaternionx; delete q; - q1 = AngleAxisx(a, v0.normalized()); - q2 = AngleAxisx(b, v1.normalized()); + q1 = Quaternionx::UnitRandom(); + q2 = Quaternionx::UnitRandom(); check_slerp(q1,q2); q1 = AngleAxisx(b, v1.normalized()); @@ -169,7 +168,7 @@ template void quaternion(void) q2 = AngleAxisx(-b, -v1.normalized()); check_slerp(q1,q2); - q1.coeffs() = Vector4::Random().normalized(); + q1 = Quaternionx::UnitRandom(); q2.coeffs() = -q1.coeffs(); check_slerp(q1,q2); } diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp index 51f90036d..278e527c2 100644 --- a/test/geo_transformations.cpp +++ b/test/geo_transformations.cpp @@ -18,6 +18,11 @@ Matrix angleToVec(T a) return Matrix(std::cos(a), std::sin(a)); } +// This permits to workaround a bug in clang/llvm code generation. +template +EIGEN_DONT_INLINE +void dont_over_optimize(T& x) { volatile typename T::Scalar tmp = x(0); x(0) = tmp; } + template void non_projective_only() { /* this test covers the following files: @@ -224,12 +229,13 @@ template void transformations() do { v3 = Vector3::Random(); + dont_over_optimize(v3); } while (v3.cwiseAbs().minCoeff()::epsilon()); Translation3 tv3(v3); Transform3 t5(tv3); t4 = tv3; VERIFY_IS_APPROX(t5.matrix(), t4.matrix()); - t4.translate(-v3); + t4.translate((-v3).eval()); VERIFY_IS_APPROX(t4.matrix(), MatrixType::Identity()); t4 *= tv3; VERIFY_IS_APPROX(t5.matrix(), t4.matrix()); @@ -328,6 +334,9 @@ template void transformations() t0.scale(v0); t1 *= AlignedScaling3(v0); VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + t1 = AlignedScaling3(v0) * (Translation3(v0) * Transform3(q1)); + t1 = t1 * v0.asDiagonal(); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); // transformation * translation t0.translate(v0); t1 = t1 * Translation3(v0); @@ -466,7 +475,7 @@ template void transformations() Scalar a2 = R0.slerp(Scalar(k+1)/Scalar(path_steps), R1).angle(); l += std::abs(a2-a1); } - VERIFY(l<=EIGEN_PI*(Scalar(1)+NumTraits::epsilon()*Scalar(path_steps/2))); + VERIFY(l<=Scalar(EIGEN_PI)*(Scalar(1)+NumTraits::epsilon()*Scalar(path_steps/2))); // check basic features { @@ -476,6 +485,79 @@ template void transformations() Rotation2D r2(r1); // copy ctor VERIFY_IS_APPROX(r2.angle(),s0); } + + { + Transform3 t32(Matrix4::Random()), t33, t34; + t34 = t33 = t32; + t32.scale(v0); + t33*=AlignedScaling3(v0); + VERIFY_IS_APPROX(t32.matrix(), t33.matrix()); + t33 = t34 * AlignedScaling3(v0); + VERIFY_IS_APPROX(t32.matrix(), t33.matrix()); + } + +} + +template +void transform_associativity_left(const A1& a1, const A2& a2, const P& p, const Q& q, const V& v, const H& h) +{ + VERIFY_IS_APPROX( q*(a1*v), (q*a1)*v ); + VERIFY_IS_APPROX( q*(a2*v), (q*a2)*v ); + VERIFY_IS_APPROX( q*(p*h).hnormalized(), ((q*p)*h).hnormalized() ); +} + +template +void transform_associativity2(const A1& a1, const A2& a2, const P& p, const Q& q, const V& v, const H& h) +{ + VERIFY_IS_APPROX( a1*(q*v), (a1*q)*v ); + VERIFY_IS_APPROX( a2*(q*v), (a2*q)*v ); + VERIFY_IS_APPROX( p *(q*v).homogeneous(), (p *q)*v.homogeneous() ); + + transform_associativity_left(a1, a2,p, q, v, h); +} + +template +void transform_associativity(const RotationType& R) +{ + typedef Matrix VectorType; + typedef Matrix HVectorType; + typedef Matrix LinearType; + typedef Matrix MatrixType; + typedef Transform AffineCompactType; + typedef Transform AffineType; + typedef Transform ProjectiveType; + typedef DiagonalMatrix ScalingType; + typedef Translation TranslationType; + + AffineCompactType A1c; A1c.matrix().setRandom(); + AffineCompactType A2c; A2c.matrix().setRandom(); + AffineType A1(A1c); + AffineType A2(A2c); + ProjectiveType P1; P1.matrix().setRandom(); + VectorType v1 = VectorType::Random(); + VectorType v2 = VectorType::Random(); + HVectorType h1 = HVectorType::Random(); + Scalar s1 = internal::random(); + LinearType L = LinearType::Random(); + MatrixType M = MatrixType::Random(); + + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, A2, v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, A2c, v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, v1.asDiagonal(), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, ScalingType(v1), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, Scaling(v1), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, Scaling(s1), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, TranslationType(v1), v2, h1) ); + CALL_SUBTEST( transform_associativity_left(A1c, A1, P1, L, v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, R, v2, h1) ); + + VERIFY_IS_APPROX( A1*(M*h1), (A1*M)*h1 ); + VERIFY_IS_APPROX( A1c*(M*h1), (A1c*M)*h1 ); + VERIFY_IS_APPROX( P1*(M*h1), (P1*M)*h1 ); + + VERIFY_IS_APPROX( M*(A1*h1), (M*A1)*h1 ); + VERIFY_IS_APPROX( M*(A1c*h1), (M*A1c)*h1 ); + VERIFY_IS_APPROX( M*(P1*h1), ((M*P1)*h1) ); } template void transform_alignment() @@ -556,5 +638,8 @@ void test_geo_transformations() CALL_SUBTEST_7(( transform_products() )); CALL_SUBTEST_7(( transform_products() )); + + CALL_SUBTEST_8(( transform_associativity(Rotation2D(internal::random()*double(EIGEN_PI))) )); + CALL_SUBTEST_8(( transform_associativity(Quaterniond::UnitRandom()) )); } } diff --git a/unsupported/test/cxx11_float16.cpp b/test/half_float.cpp similarity index 72% rename from unsupported/test/cxx11_float16.cpp rename to test/half_float.cpp index 9a813653c..f8d438e2f 100644 --- a/unsupported/test/cxx11_float16.cpp +++ b/test/half_float.cpp @@ -5,17 +5,23 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_float16 +#include #include "main.h" + #include +// Make sure it's possible to forward declare Eigen::half +namespace Eigen { +struct half; +} + using Eigen::half; void test_conversion() { + using Eigen::half_impl::__half; + // Conversion from float. VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00); VERIFY_IS_EQUAL(half(0.5f).x, 0x3800); @@ -34,8 +40,8 @@ void test_conversion() float val1 = float(half(__half(0x3c00))); float val2 = float(half(__half(0x3c01))); float val3 = float(half(__half(0x3c02))); - VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00); - VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02); + VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, 0x3c00); + VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, 0x3c02); // Conversion from int. VERIFY_IS_EQUAL(half(-1).x, 0xbc00); @@ -88,6 +94,16 @@ void test_conversion() #endif } +void test_numtraits() +{ + std::cout << "epsilon = " << NumTraits::epsilon() << std::endl; + std::cout << "highest = " << NumTraits::highest() << std::endl; + std::cout << "lowest = " << NumTraits::lowest() << std::endl; + std::cout << "inifinty = " << NumTraits::infinity() << std::endl; + std::cout << "nan = " << NumTraits::quiet_NaN() << std::endl; + +} + void test_arithmetic() { VERIFY_IS_EQUAL(float(half(2) + half(2)), 4); @@ -140,53 +156,97 @@ void test_comparison() void test_basic_functions() { VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(abs(half(3.5f))), 3.5f); VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(abs(half(-3.5f))), 3.5f); VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f); + VERIFY_IS_EQUAL(float(floor(half(3.5f))), 3.0f); VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f); + VERIFY_IS_EQUAL(float(floor(half(-3.5f))), -4.0f); VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f); + VERIFY_IS_EQUAL(float(ceil(half(3.5f))), 4.0f); VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f); + VERIFY_IS_EQUAL(float(ceil(half(-3.5f))), -3.0f); VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(sqrt(half(0.0f))), 0.0f); VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f); + VERIFY_IS_APPROX(float(sqrt(half(4.0f))), 2.0f); VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(pow(half(0.0f), half(1.0f))), 0.0f); VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f); + VERIFY_IS_APPROX(float(pow(half(2.0f), half(2.0f))), 4.0f); VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f); - VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI)); + VERIFY_IS_EQUAL(float(exp(half(0.0f))), 1.0f); + VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI)); + VERIFY_IS_APPROX(float(exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI)); VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f); + VERIFY_IS_EQUAL(float(log(half(1.0f))), 0.0f); VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f); + VERIFY_IS_APPROX(float(log(half(10.0f))), 2.30273f); + + VERIFY_IS_EQUAL(float(numext::log1p(half(0.0f))), 0.0f); + VERIFY_IS_EQUAL(float(log1p(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::log1p(half(10.0f))), 2.3978953f); + VERIFY_IS_APPROX(float(log1p(half(10.0f))), 2.3978953f); } void test_trigonometric_functions() { VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f))); + VERIFY_IS_APPROX(cos(half(0.0f)), half(cosf(0.0f))); VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI))); //VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2))); //VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2))); VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f))); VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f))); + VERIFY_IS_APPROX(sin(half(0.0f)), half(sinf(0.0f))); // VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI))); VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2))); VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2))); VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f))); VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f))); + VERIFY_IS_APPROX(tan(half(0.0f)), half(tanf(0.0f))); // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI))); // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2))); //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2))); VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f))); } -void test_cxx11_float16() +void test_array() +{ + typedef Array ArrayXh; + Index size = internal::random(1,10); + Index i = internal::random(0,size-1); + ArrayXh a1 = ArrayXh::Random(size), a2 = ArrayXh::Random(size); + VERIFY_IS_APPROX( a1+a1, half(2)*a1 ); + VERIFY( (a1.abs() >= half(0)).all() ); + VERIFY_IS_APPROX( (a1*a1).sqrt(), a1.abs() ); + + VERIFY( ((a1.min)(a2) <= (a1.max)(a2)).all() ); + a1(i) = half(-10.); + VERIFY_IS_EQUAL( a1.minCoeff(), half(-10.) ); + a1(i) = half(10.); + VERIFY_IS_EQUAL( a1.maxCoeff(), half(10.) ); + + std::stringstream ss; + ss << a1; +} + +void test_half_float() { CALL_SUBTEST(test_conversion()); + CALL_SUBTEST(test_numtraits()); CALL_SUBTEST(test_arithmetic()); CALL_SUBTEST(test_comparison()); CALL_SUBTEST(test_basic_functions()); CALL_SUBTEST(test_trigonometric_functions()); + CALL_SUBTEST(test_array()); } diff --git a/test/inplace_decomposition.cpp b/test/inplace_decomposition.cpp new file mode 100644 index 000000000..92d0d91b6 --- /dev/null +++ b/test/inplace_decomposition.cpp @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +// This file test inplace decomposition through Ref<>, as supported by Cholesky, LU, and QR decompositions. + +template void inplace(bool square = false, bool SPD = false) +{ + typedef typename MatrixType::Scalar Scalar; + typedef Matrix RhsType; + typedef Matrix ResType; + + Index rows = MatrixType::RowsAtCompileTime==Dynamic ? internal::random(2,EIGEN_TEST_MAX_SIZE/2) : Index(MatrixType::RowsAtCompileTime); + Index cols = MatrixType::ColsAtCompileTime==Dynamic ? (square?rows:internal::random(2,rows)) : Index(MatrixType::ColsAtCompileTime); + + MatrixType A = MatrixType::Random(rows,cols); + RhsType b = RhsType::Random(rows); + ResType x(cols); + + if(SPD) + { + assert(square); + A.topRows(cols) = A.topRows(cols).adjoint() * A.topRows(cols); + A.diagonal().array() += 1e-3; + } + + MatrixType A0 = A; + MatrixType A1 = A; + + DecType dec(A); + + // Check that the content of A has been modified + VERIFY_IS_NOT_APPROX( A, A0 ); + + // Check that the decomposition is correct: + if(rows==cols) + { + VERIFY_IS_APPROX( A0 * (x = dec.solve(b)), b ); + } + else + { + VERIFY_IS_APPROX( A0.transpose() * A0 * (x = dec.solve(b)), A0.transpose() * b ); + } + + // Check that modifying A breaks the current dec: + A.setRandom(); + if(rows==cols) + { + VERIFY_IS_NOT_APPROX( A0 * (x = dec.solve(b)), b ); + } + else + { + VERIFY_IS_NOT_APPROX( A0.transpose() * A0 * (x = dec.solve(b)), A0.transpose() * b ); + } + + // Check that calling compute(A1) does not modify A1: + A = A0; + dec.compute(A1); + VERIFY_IS_EQUAL(A0,A1); + VERIFY_IS_NOT_APPROX( A, A0 ); + if(rows==cols) + { + VERIFY_IS_APPROX( A0 * (x = dec.solve(b)), b ); + } + else + { + VERIFY_IS_APPROX( A0.transpose() * A0 * (x = dec.solve(b)), A0.transpose() * b ); + } +} + + +void test_inplace_decomposition() +{ + EIGEN_UNUSED typedef Matrix Matrix43d; + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(( inplace >, MatrixXd>(true,true) )); + CALL_SUBTEST_1(( inplace >, Matrix4d>(true,true) )); + + CALL_SUBTEST_2(( inplace >, MatrixXd>(true,true) )); + CALL_SUBTEST_2(( inplace >, Matrix4d>(true,true) )); + + CALL_SUBTEST_3(( inplace >, MatrixXd>(true,false) )); + CALL_SUBTEST_3(( inplace >, Matrix4d>(true,false) )); + + CALL_SUBTEST_4(( inplace >, MatrixXd>(true,false) )); + CALL_SUBTEST_4(( inplace >, Matrix4d>(true,false) )); + + CALL_SUBTEST_5(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_5(( inplace >, Matrix43d>(false,false) )); + + CALL_SUBTEST_6(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_6(( inplace >, Matrix43d>(false,false) )); + + CALL_SUBTEST_7(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_7(( inplace >, Matrix43d>(false,false) )); + + CALL_SUBTEST_8(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_8(( inplace >, Matrix43d>(false,false) )); + } +} diff --git a/test/integer_types.cpp b/test/integer_types.cpp index 950f8e9be..a21f73a81 100644 --- a/test/integer_types.cpp +++ b/test/integer_types.cpp @@ -158,4 +158,12 @@ void test_integer_types() CALL_SUBTEST_8( integer_type_tests(Matrix(1, 5)) ); } +#ifdef EIGEN_TEST_PART_9 + VERIFY_IS_EQUAL(internal::scalar_div_cost::value, 8); + VERIFY_IS_EQUAL(internal::scalar_div_cost::value, 8); + if(sizeof(long)>sizeof(int)) { + VERIFY(internal::scalar_div_cost::value > internal::scalar_div_cost::value); + VERIFY(internal::scalar_div_cost::value > internal::scalar_div_cost::value); + } +#endif } diff --git a/test/is_same_dense.cpp b/test/is_same_dense.cpp index 6d7904bac..2c7838ce9 100644 --- a/test/is_same_dense.cpp +++ b/test/is_same_dense.cpp @@ -9,6 +9,8 @@ #include "main.h" +using internal::is_same_dense; + void test_is_same_dense() { typedef Matrix ColMatrixXd; diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp index 292f33969..17474af10 100644 --- a/test/linearstructure.cpp +++ b/test/linearstructure.cpp @@ -9,7 +9,7 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. static bool g_called; -#define EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN { g_called = true; } +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } #include "main.h" @@ -21,6 +21,7 @@ template void linearStructure(const MatrixType& m) */ typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; Index rows = m.rows(); Index cols = m.cols(); @@ -32,7 +33,7 @@ template void linearStructure(const MatrixType& m) m3(rows, cols); Scalar s1 = internal::random(); - while (abs(s1)<1e-3) s1 = internal::random(); + while (abs(s1)(); Index r = internal::random(0, rows-1), c = internal::random(0, cols-1); @@ -92,6 +93,22 @@ template void real_complex(DenseIndex rows = MatrixType::Ro g_called = false; VERIFY_IS_APPROX(m1/s, m1/Scalar(s)); VERIFY(g_called && "matrix / real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s+m1.array(), Scalar(s)+m1.array()); + VERIFY(g_called && "real + matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()+s, m1.array()+Scalar(s)); + VERIFY(g_called && "matrix + real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s-m1.array(), Scalar(s)-m1.array()); + VERIFY(g_called && "real - matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()-s, m1.array()-Scalar(s)); + VERIFY(g_called && "matrix - real not properly optimized"); } void test_linearstructure() diff --git a/test/main.h b/test/main.h index b0e3b7818..74ff96a23 100644 --- a/test/main.h +++ b/test/main.h @@ -279,8 +279,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file, #define VERIFY_LE(a, b) ::verify_impl(a <= b, g_test_stack.back().c_str(), __FILE__, __LINE__, EI_PP_MAKE_STRING(a <= b)) -#define VERIFY_IS_EQUAL(a, b) VERIFY(test_is_equal(a, b)) -#define VERIFY_IS_NOT_EQUAL(a, b) VERIFY(!test_is_equal(a, b)) +#define VERIFY_IS_EQUAL(a, b) VERIFY(test_is_equal(a, b, true)) +#define VERIFY_IS_NOT_EQUAL(a, b) VERIFY(test_is_equal(a, b, false)) #define VERIFY_IS_APPROX(a, b) VERIFY(verifyIsApprox(a, b)) #define VERIFY_IS_NOT_APPROX(a, b) VERIFY(!test_isApprox(a, b)) #define VERIFY_IS_MUCH_SMALLER_THAN(a, b) VERIFY(test_isMuchSmallerThan(a, b)) @@ -302,7 +302,7 @@ namespace Eigen { template inline typename NumTraits::Real test_precision() { return NumTraits::dummy_precision(); } template<> inline float test_precision() { return 1e-3f; } template<> inline double test_precision() { return 1e-6; } -template<> inline long double test_precision() { return 1e-6; } +template<> inline long double test_precision() { return 1e-6l; } template<> inline float test_precision >() { return test_precision(); } template<> inline double test_precision >() { return test_precision(); } template<> inline long double test_precision >() { return test_precision(); } @@ -452,20 +452,20 @@ T test_relative_error(const AngleAxis &a, const AngleAxis &b) } template -inline bool test_isApprox(const Type1& a, const Type2& b) +inline bool test_isApprox(const Type1& a, const Type2& b, typename Type1::Scalar* = 0) // Enabled for Eigen's type only { return a.isApprox(b, test_precision()); } // get_test_precision is a small wrapper to test_precision allowing to return the scalar precision for either scalars or expressions template -typename NumTraits::Real get_test_precision(const typename T::Scalar* = 0) +typename NumTraits::Real get_test_precision(const T&, const typename T::Scalar* = 0) { return test_precision::Real>(); } template -typename NumTraits::Real get_test_precision(typename internal::enable_if::Real>::value, T>::type* = 0) +typename NumTraits::Real get_test_precision(const T&,typename internal::enable_if::Real>::value, T>::type* = 0) { return test_precision::Real>(); } @@ -477,7 +477,7 @@ inline bool verifyIsApprox(const Type1& a, const Type2& b) bool ret = test_isApprox(a,b); if(!ret) { - std::cerr << "Difference too large wrt tolerance " << get_test_precision() << ", relative error is: " << test_relative_error(a,b) << std::endl; + std::cerr << "Difference too large wrt tolerance " << get_test_precision(a) << ", relative error is: " << test_relative_error(a,b) << std::endl; } return ret; } @@ -517,17 +517,17 @@ inline bool test_isUnitary(const MatrixBase& m) // Forward declaration to avoid ICC warning template -bool test_is_equal(const T& actual, const U& expected); +bool test_is_equal(const T& actual, const U& expected, bool expect_equal=true); template -bool test_is_equal(const T& actual, const U& expected) +bool test_is_equal(const T& actual, const U& expected, bool expect_equal) { - if (actual==expected) + if ((actual==expected) == expect_equal) return true; // false: std::cerr - << std::endl << " actual = " << actual - << std::endl << " expected = " << expected << std::endl << std::endl; + << "\n actual = " << actual + << "\n expected " << (expect_equal ? "= " : "!=") << expected << "\n\n"; return false; } @@ -736,3 +736,8 @@ int main(int argc, char *argv[]) // remark #1572: floating-point equality and inequality comparisons are unreliable #pragma warning disable 279 383 1418 1572 #endif + +#ifdef _MSC_VER + // 4503 - decorated name length exceeded, name was truncated + #pragma warning( disable : 4503) +#endif diff --git a/test/mapped_matrix.cpp b/test/mapped_matrix.cpp index 88653e887..6a84c5897 100644 --- a/test/mapped_matrix.cpp +++ b/test/mapped_matrix.cpp @@ -25,7 +25,7 @@ template void map_class_vector(const VectorType& m) Scalar* array1 = internal::aligned_new(size); Scalar* array2 = internal::aligned_new(size); Scalar* array3 = new Scalar[size+1]; - Scalar* array3unaligned = (std::size_t(array3)%EIGEN_MAX_ALIGN_BYTES) == 0 ? array3+1 : array3; + Scalar* array3unaligned = (internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES) == 0 ? array3+1 : array3; Scalar array4[EIGEN_TESTMAP_MAX_SIZE]; Map(array1, size) = VectorType::Random(size); @@ -65,7 +65,7 @@ template void map_class_matrix(const MatrixType& m) // array3unaligned -> unaligned pointer to heap Scalar* array3 = new Scalar[size+1]; for(int i = 0; i < size+1; i++) array3[i] = Scalar(1); - Scalar* array3unaligned = size_t(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3; + Scalar* array3unaligned = internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3; Scalar array4[256]; if(size<=256) for(int i = 0; i < size; i++) array4[i] = Scalar(1); @@ -129,7 +129,7 @@ template void map_static_methods(const VectorType& m) Scalar* array1 = internal::aligned_new(size); Scalar* array2 = internal::aligned_new(size); Scalar* array3 = new Scalar[size+1]; - Scalar* array3unaligned = size_t(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3; + Scalar* array3unaligned = internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3; VectorType::MapAligned(array1, size) = VectorType::Random(size); VectorType::Map(array2, size) = VectorType::Map(array1, size); diff --git a/test/mapstride.cpp b/test/mapstride.cpp index ee2414248..4858f8fea 100644 --- a/test/mapstride.cpp +++ b/test/mapstride.cpp @@ -23,7 +23,7 @@ template void map_class_vector(const VectorTy Scalar* a_array = internal::aligned_new(arraysize+1); Scalar* array = a_array; if(Alignment!=Aligned) - array = (Scalar*)(ptrdiff_t(a_array) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); + array = (Scalar*)(internal::IntPtr(a_array) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); { Map > map(array, size); @@ -63,14 +63,14 @@ template void map_class_matrix(const MatrixTy Scalar* a_array1 = internal::aligned_new(arraysize+1); Scalar* array1 = a_array1; if(Alignment!=Aligned) - array1 = (Scalar*)(std::ptrdiff_t(a_array1) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); + array1 = (Scalar*)(internal::IntPtr(a_array1) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); Scalar a_array2[256]; Scalar* array2 = a_array2; if(Alignment!=Aligned) - array2 = (Scalar*)(std::ptrdiff_t(a_array2) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); + array2 = (Scalar*)(internal::IntPtr(a_array2) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); else - array2 = (Scalar*)(((std::size_t(a_array2)+EIGEN_MAX_ALIGN_BYTES-1)/EIGEN_MAX_ALIGN_BYTES)*EIGEN_MAX_ALIGN_BYTES); + array2 = (Scalar*)(((internal::UIntPtr(a_array2)+EIGEN_MAX_ALIGN_BYTES-1)/EIGEN_MAX_ALIGN_BYTES)*EIGEN_MAX_ALIGN_BYTES); Index maxsize2 = a_array2 - array2 + 256; // test no inner stride and some dynamic outer stride diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index 0b381ec6c..ad9c2c652 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -23,10 +23,18 @@ #endif +static bool g_called; +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } + #include "main.h" using namespace std; +#define VERIFY_MIX_SCALAR(XPR,REF) \ + g_called = false; \ + VERIFY_IS_APPROX(XPR,REF); \ + VERIFY( g_called && #XPR" not properly optimized"); + template void mixingtypes(int size = SizeAtCompileType) { typedef std::complex CF; @@ -42,6 +50,7 @@ template void mixingtypes(int size = SizeAtCompileType) Mat_f mf = Mat_f::Random(size,size); Mat_d md = mf.template cast(); + //Mat_d rd = md; Mat_cf mcf = Mat_cf::Random(size,size); Mat_cd mcd = mcf.template cast >(); Mat_cd rcd = mcd; @@ -54,25 +63,59 @@ template void mixingtypes(int size = SizeAtCompileType) complex scf = internal::random >(); complex scd = internal::random >(); - mf+mf; - VERIFY_RAISES_ASSERT(mf+md); -#ifndef EIGEN_HAS_STD_RESULT_OF - // this one does not even compile with C++11 - VERIFY_RAISES_ASSERT(mf+mcf); -#endif + + float epsf = std::sqrt(std::numeric_limits ::min EIGEN_EMPTY ()); + double epsd = std::sqrt(std::numeric_limits::min EIGEN_EMPTY ()); + + while(std::abs(sf )(); + while(std::abs(sd )(); + while(std::abs(scf)(); + while(std::abs(scd)(); + +// VERIFY_RAISES_ASSERT(mf+md); // does not even compile #ifdef EIGEN_DONT_VECTORIZE VERIFY_RAISES_ASSERT(vf=vd); VERIFY_RAISES_ASSERT(vf+=vd); - VERIFY_RAISES_ASSERT(mcd=md); #endif // check scalar products - VERIFY_IS_APPROX(vcf * sf , vcf * complex(sf)); - VERIFY_IS_APPROX(sd * vcd, complex(sd) * vcd); - VERIFY_IS_APPROX(vf * scf , vf.template cast >() * scf); - VERIFY_IS_APPROX(scd * vd, scd * vd.template cast >()); + VERIFY_MIX_SCALAR(vcf * sf , vcf * complex(sf)); + VERIFY_MIX_SCALAR(sd * vcd , complex(sd) * vcd); + VERIFY_MIX_SCALAR(vf * scf , vf.template cast >() * scf); + VERIFY_MIX_SCALAR(scd * vd , scd * vd.template cast >()); + + VERIFY_MIX_SCALAR(vcf * 2 , vcf * complex(2)); + VERIFY_MIX_SCALAR(vcf * 2.1 , vcf * complex(2.1)); + VERIFY_MIX_SCALAR(2 * vcf, vcf * complex(2)); + VERIFY_MIX_SCALAR(2.1 * vcf , vcf * complex(2.1)); + + // check scalar quotients + VERIFY_MIX_SCALAR(vcf / sf , vcf / complex(sf)); + VERIFY_MIX_SCALAR(vf / scf , vf.template cast >() / scf); + VERIFY_MIX_SCALAR(vf.array() / scf, vf.template cast >().array() / scf); + VERIFY_MIX_SCALAR(scd / vd.array() , scd / vd.template cast >().array()); + + // check scalar increment + VERIFY_MIX_SCALAR(vcf.array() + sf , vcf.array() + complex(sf)); + VERIFY_MIX_SCALAR(sd + vcd.array(), complex(sd) + vcd.array()); + VERIFY_MIX_SCALAR(vf.array() + scf, vf.template cast >().array() + scf); + VERIFY_MIX_SCALAR(scd + vd.array() , scd + vd.template cast >().array()); + + // check scalar subtractions + VERIFY_MIX_SCALAR(vcf.array() - sf , vcf.array() - complex(sf)); + VERIFY_MIX_SCALAR(sd - vcd.array(), complex(sd) - vcd.array()); + VERIFY_MIX_SCALAR(vf.array() - scf, vf.template cast >().array() - scf); + VERIFY_MIX_SCALAR(scd - vd.array() , scd - vd.template cast >().array()); + + // check scalar powers + VERIFY_MIX_SCALAR( pow(vcf.array(), sf), Eigen::pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( vcf.array().pow(sf) , Eigen::pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( pow(sd, vcd.array()), Eigen::pow(complex(sd), vcd.array()) ); + VERIFY_MIX_SCALAR( Eigen::pow(vf.array(), scf), Eigen::pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( vf.array().pow(scf) , Eigen::pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( Eigen::pow(scd, vd.array()), Eigen::pow(scd, vd.template cast >().array()) ); // check dot product vf.dot(vf); @@ -184,6 +227,63 @@ template void mixingtypes(int size = SizeAtCompileType) Mat_cd((scd * mcd * md.template cast().eval()).template triangularView())); VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView() = scd * md * mcd), Mat_cd((scd * md.template cast().eval() * mcd).template triangularView())); + + + VERIFY_IS_APPROX( md.array() * mcd.array(), md.template cast().eval().array() * mcd.array() ); + VERIFY_IS_APPROX( mcd.array() * md.array(), mcd.array() * md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() + mcd.array(), md.template cast().eval().array() + mcd.array() ); + VERIFY_IS_APPROX( mcd.array() + md.array(), mcd.array() + md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() - mcd.array(), md.template cast().eval().array() - mcd.array() ); + VERIFY_IS_APPROX( mcd.array() - md.array(), mcd.array() - md.template cast().eval().array() ); + + if(mcd.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); + } + if(md.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast().eval().array() ); + } + + if(md.array().abs().minCoeff()>epsd || mcd.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( mcd.array().pow(md.array()), mcd.array().pow(md.template cast().eval().array()) ); + + VERIFY_IS_APPROX( pow(md.array(),mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( pow(mcd.array(),md.array()), mcd.array().pow(md.template cast().eval().array()) ); + } + + rcd = mcd; + VERIFY_IS_APPROX( rcd = md, md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd += md, mcd + md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd -= md, mcd - md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.array() *= md.array(), mcd.array() * md.template cast().eval().array() ); + rcd = mcd; + if(md.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( rcd.array() /= md.array(), mcd.array() / md.template cast().eval().array() ); + } + + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += md + mcd*md, mcd + (md.template cast().eval()) + mcd*(md.template cast().eval())); + + VERIFY_IS_APPROX( rcd.noalias() = md*md, ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= md*md, mcd - ((md*md).eval().template cast()) ); + + VERIFY_IS_APPROX( rcd.noalias() = mcd + md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += mcd + md*md, mcd + mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= mcd + md*md, - ((md*md).eval().template cast()) ); } void test_mixingtypes() diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp index 2f5025305..a419b0e44 100644 --- a/test/nesting_ops.cpp +++ b/test/nesting_ops.cpp @@ -75,8 +75,8 @@ template void run_nesting_ops_2(const MatrixType& _m) } else { - VERIFY( verify_eval_type<1>(2*m1, 2*m1) ); - VERIFY( verify_eval_type<2>(2*m1, m1) ); + VERIFY( verify_eval_type<2>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<3>(2*m1, m1) ); } VERIFY( verify_eval_type<2>(m1+m1, m1+m1) ); VERIFY( verify_eval_type<3>(m1+m1, m1) ); diff --git a/test/nullary.cpp b/test/nullary.cpp index cb87695ee..9063c6de8 100644 --- a/test/nullary.cpp +++ b/test/nullary.cpp @@ -104,13 +104,29 @@ void testVectorType(const VectorType& base) template void testMatrixType(const MatrixType& m) { + using std::abs; const Index rows = m.rows(); const Index cols = m.cols(); + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + Scalar s1; + do { + s1 = internal::random(); + } while(abs(s1)::IsInteger)); MatrixType A; A.setIdentity(rows, cols); VERIFY(equalsIdentity(A)); VERIFY(equalsIdentity(MatrixType::Identity(rows, cols))); + + + A = MatrixType::Constant(rows,cols,s1); + Index i = internal::random(0,rows-1); + Index j = internal::random(0,cols-1); + VERIFY_IS_APPROX( MatrixType::Constant(rows,cols,s1)(i,j), s1 ); + VERIFY_IS_APPROX( MatrixType::Constant(rows,cols,s1).coeff(i,j), s1 ); + VERIFY_IS_APPROX( A(i,j), s1 ); } void test_nullary() @@ -137,4 +153,47 @@ void test_nullary() // Assignment of a RowVectorXd to a MatrixXd (regression test for bug #79). VERIFY( (MatrixXd(RowVectorXd::LinSpaced(3, 0, 1)) - RowVector3d(0, 0.5, 1)).norm() < std::numeric_limits::epsilon() ); #endif + +#ifdef EIGEN_TEST_PART_10 + // check some internal logic + VERIFY(( internal::has_nullary_operator >::value )); + VERIFY(( !internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + + VERIFY(( !internal::has_nullary_operator >::value )); + VERIFY(( !internal::has_unary_operator >::value )); + VERIFY(( internal::has_binary_operator >::value )); + VERIFY(( !internal::functor_has_linear_access >::ret )); + + VERIFY(( !internal::has_nullary_operator >::value )); + VERIFY(( internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + + // Regression unit test for a weird MSVC bug. + // Search "nullary_wrapper_workaround_msvc" in CoreEvaluators.h for the details. + // See also traits::match. + { + MatrixXf A = MatrixXf::Random(3,3); + Ref R = 2.0*A; + VERIFY_IS_APPROX(R, A+A); + + Ref R1 = MatrixXf::Random(3,3)+A; + + VectorXi V = VectorXi::Random(3); + Ref R2 = VectorXi::LinSpaced(3,1,3)+V; + VERIFY_IS_APPROX(R2, V+Vector3i(1,2,3)); + + VERIFY(( internal::has_nullary_operator >::value )); + VERIFY(( !internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + + VERIFY(( !internal::has_nullary_operator >::value )); + VERIFY(( internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + } +#endif } diff --git a/test/packetmath.cpp b/test/packetmath.cpp index c2346e1cd..20addf1ad 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -9,7 +9,11 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #include "main.h" +#include "unsupported/Eigen/SpecialFunctions" +#if defined __GNUC__ && __GNUC__>=6 + #pragma GCC diagnostic ignored "-Wignored-attributes" +#endif // using namespace Eigen; namespace Eigen { @@ -368,7 +372,15 @@ template void packetmath_real() VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::denorm_min()), data2[1]); } -#ifdef EIGEN_HAS_C99_MATH + if (PacketTraits::HasTanh) { + // NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details. + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasTanh,Packet> h; + h.store(data2, internal::ptanh(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } + +#if EIGEN_HAS_C99_MATH { data1[0] = std::numeric_limits::quiet_NaN(); packet_helper::HasLGamma,Packet> h; @@ -395,11 +407,12 @@ template void packetmath_real() data2[i] = internal::random(0,1) * std::pow(Scalar(10), internal::random(-6,6)); } - if(internal::random(0,1)<0.1) + if(internal::random(0,1)<0.1f) data1[internal::random(0, PacketSize)] = 0; CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); -#if defined(EIGEN_HAS_C99_MATH) && (__cplusplus > 199711L) +#if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L) + CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p); CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); CHECK_CWISE1_IF(internal::packet_traits::HasErfc, std::erfc, internal::perfc); @@ -432,7 +445,7 @@ template void packetmath_real() // VERIFY_IS_EQUAL(std::log(std::numeric_limits::denorm_min()), data2[0]); VERIFY((numext::isnan)(data2[1])); - data1[0] = -1.0f; + data1[0] = Scalar(-1.0f); h.store(data2, internal::plog(h.load(data1))); VERIFY((numext::isnan)(data2[0])); #if !EIGEN_FAST_MATH diff --git a/test/prec_inverse_4x4.cpp b/test/prec_inverse_4x4.cpp index c4ef2d4bd..eb6ad18c9 100644 --- a/test/prec_inverse_4x4.cpp +++ b/test/prec_inverse_4x4.cpp @@ -53,14 +53,29 @@ template void inverse_general_4x4(int repeat) // FIXME that 1.25 used to be 1.2 until we tested gcc 4.1 on 30 June 2010 and got 1.21. VERIFY(error_avg < (NumTraits::IsComplex ? 8.0 : 1.25)); VERIFY(error_max < (NumTraits::IsComplex ? 64.0 : 20.0)); + + { + int s = 5;//internal::random(4,10); + int i = 0;//internal::random(0,s-4); + int j = 0;//internal::random(0,s-4); + Matrix mat(s,s); + mat.setRandom(); + MatrixType submat = mat.template block<4,4>(i,j); + MatrixType mat_inv = mat.template block<4,4>(i,j).inverse(); + VERIFY_IS_APPROX(mat_inv, submat.inverse()); + mat.template block<4,4>(i,j) = submat.inverse(); + VERIFY_IS_APPROX(mat_inv, (mat.template block<4,4>(i,j))); + } } void test_prec_inverse_4x4() { CALL_SUBTEST_1((inverse_permutation_4x4())); CALL_SUBTEST_1(( inverse_general_4x4(200000 * g_repeat) )); + CALL_SUBTEST_1(( inverse_general_4x4 >(200000 * g_repeat) )); CALL_SUBTEST_2((inverse_permutation_4x4 >())); + CALL_SUBTEST_2(( inverse_general_4x4 >(200000 * g_repeat) )); CALL_SUBTEST_2(( inverse_general_4x4 >(200000 * g_repeat) )); CALL_SUBTEST_3((inverse_permutation_4x4())); diff --git a/test/product.h b/test/product.h index 27976a4ae..3b6511270 100644 --- a/test/product.h +++ b/test/product.h @@ -119,6 +119,14 @@ template void product(const MatrixType& m) res.noalias() -= square + m1 * m2.transpose(); VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + // test d ?= a-b*c rules + res.noalias() = square - m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square - m1 * m2.transpose()); + res.noalias() += square - m1 * m2.transpose(); + VERIFY_IS_APPROX(res, 2*(square - m1 * m2.transpose())); + res.noalias() -= square - m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square - m1 * m2.transpose()); + tm1 = m1; VERIFY_IS_APPROX(tm1.transpose() * v1, m1.transpose() * v1); @@ -160,6 +168,29 @@ template void product(const MatrixType& m) VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.block(0,0,1,cols) * square2, (ref2.row(0) = m1.row(0) * square2)); } + // vector.block() (see bug 1283) + { + RowVectorType w1(rows); + VERIFY_IS_APPROX(square * v1.block(0,0,rows,1), square * v1); + VERIFY_IS_APPROX(w1.noalias() = square * v1.block(0,0,rows,1), square * v1); + VERIFY_IS_APPROX(w1.block(0,0,rows,1).noalias() = square * v1.block(0,0,rows,1), square * v1); + + Matrix w2(cols); + VERIFY_IS_APPROX(vc2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.noalias() = vc2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.block(0,0,1,cols).noalias() = vc2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + + vc2 = square2.block(0,0,1,cols).transpose(); + VERIFY_IS_APPROX(square2.block(0,0,1,cols) * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.noalias() = square2.block(0,0,1,cols) * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.block(0,0,1,cols).noalias() = square2.block(0,0,1,cols) * square2, vc2.transpose() * square2); + + vc2 = square2.block(0,0,cols,1); + VERIFY_IS_APPROX(square2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.noalias() = square2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.block(0,0,1,cols).noalias() = square2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + } + // inner product { Scalar x = square2.row(c) * square2.col(c2); @@ -196,4 +227,5 @@ template void product(const MatrixType& m) VERIFY_IS_APPROX(square * (s1*(square*square)), s1 * square * square * square); VERIFY_IS_APPROX(square * (square*square).conjugate(), square * square.conjugate() * square.conjugate()); } + } diff --git a/test/product_extra.cpp b/test/product_extra.cpp index d253fd7ed..e4990ac8c 100644 --- a/test/product_extra.cpp +++ b/test/product_extra.cpp @@ -256,6 +256,51 @@ Index compute_block_size() return ret; } + + +template +void bug_1308() +{ + int n = 10; + MatrixXd r(n,n); + VectorXd v = VectorXd::Random(n); + r = v * RowVectorXd::Ones(n); + VERIFY_IS_APPROX(r, v.rowwise().replicate(n)); + r = VectorXd::Ones(n) * v.transpose(); + VERIFY_IS_APPROX(r, v.rowwise().replicate(n).transpose()); + + Matrix4d ones44 = Matrix4d::Ones(); + Matrix4d m44 = Matrix4d::Ones() * Matrix4d::Ones(); + VERIFY_IS_APPROX(m44,Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=ones44*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=ones44.transpose()*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=Matrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=Matrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + + typedef Matrix RMatrix4d; + RMatrix4d r44 = Matrix4d::Ones() * Matrix4d::Ones(); + VERIFY_IS_APPROX(r44,Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44.transpose()*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=Matrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=Matrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44*RMatrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44.transpose()*RMatrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=RMatrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=RMatrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + +// RowVector4d r4; + m44.setOnes(); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += m44.row(0).transpose() * RowVector4d::Ones(), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += m44.col(0) * RowVector4d::Ones(), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += Vector4d::Ones() * m44.row(0), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += Vector4d::Ones() * m44.col(0).transpose(), ones44); +} + void test_product_extra() { for(int i = 0; i < g_repeat; i++) { @@ -268,8 +313,10 @@ void test_product_extra() } CALL_SUBTEST_5( bug_127<0>() ); CALL_SUBTEST_5( bug_817<0>() ); + CALL_SUBTEST_5( bug_1308<0>() ); CALL_SUBTEST_6( unaligned_objects<0>() ); CALL_SUBTEST_7( compute_block_size() ); CALL_SUBTEST_7( compute_block_size() ); CALL_SUBTEST_7( compute_block_size >() ); + } diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp index 5a3f3a01a..2bb19a681 100644 --- a/test/product_notemporary.cpp +++ b/test/product_notemporary.cpp @@ -56,6 +56,9 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 0); VERIFY_EVALUATION_COUNT( m3.noalias() += m3 + m1 * m2.transpose(), 0); VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() = m3 - m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() += m3 - m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 - m1 * m2.transpose(), 0); VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * m2.adjoint(), 0); VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * (m1*s3+m2*s2).adjoint(), 1); diff --git a/test/product_small.cpp b/test/product_small.cpp index c35db6f65..fdfdd9f6c 100644 --- a/test/product_small.cpp +++ b/test/product_small.cpp @@ -12,6 +12,7 @@ #include // regression test for bug 447 +template void product1x1() { Matrix matAstatic; @@ -177,15 +178,66 @@ void test_lazy_l3() CALL_SUBTEST(( test_lazy_all_layout(4,cols,depth) )); } +template +void test_linear_but_not_vectorizable() +{ + // Check tricky cases for which the result of the product is a vector and thus must exhibit the LinearBit flag, + // but is not vectorizable along the linear dimension. + Index n = N==Dynamic ? internal::random(1,32) : N; + Index m = M==Dynamic ? internal::random(1,32) : M; + Index k = K==Dynamic ? internal::random(1,32) : K; + + { + Matrix A; A.setRandom(n,m+1); + Matrix B; B.setRandom(m*2,k); + Matrix C; + Matrix R; + + C.noalias() = A.template topLeftCorner<1,M>() * (B.template topRows()+B.template bottomRows()); + R.noalias() = A.template topLeftCorner<1,M>() * (B.template topRows()+B.template bottomRows()).eval(); + VERIFY_IS_APPROX(C,R); + } + + { + Matrix A; A.setRandom(m+1,n); + Matrix B; B.setRandom(k,m*2); + Matrix C; + Matrix R; + + C.noalias() = (B.template leftCols()+B.template rightCols()) * A.template topLeftCorner(); + R.noalias() = (B.template leftCols()+B.template rightCols()).eval() * A.template topLeftCorner(); + VERIFY_IS_APPROX(C,R); + } +} + +template +void bug_1311() +{ + Matrix< double, Rows, 2 > A; A.setRandom(); + Vector2d b = Vector2d::Random() ; + Matrix res; + res.noalias() = 1. * (A * b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = 1.*A * b; + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (1.*A).lazyProduct(b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (1.*A).lazyProduct(1.*b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (A).lazyProduct(1.*b); + VERIFY_IS_APPROX(res, A*b); +} + void test_product_small() { for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1( product(Matrix()) ); - CALL_SUBTEST_2( product(Matrix()) ); + CALL_SUBTEST_2( product(Matrix()) ); + CALL_SUBTEST_8( product(Matrix()) ); CALL_SUBTEST_3( product(Matrix3d()) ); CALL_SUBTEST_4( product(Matrix4d()) ); CALL_SUBTEST_5( product(Matrix4f()) ); - CALL_SUBTEST_6( product1x1() ); + CALL_SUBTEST_6( product1x1<0>() ); CALL_SUBTEST_11( test_lazy_l1() ); CALL_SUBTEST_12( test_lazy_l2() ); @@ -202,6 +254,13 @@ void test_product_small() CALL_SUBTEST_41( test_lazy_l1 >() ); CALL_SUBTEST_42( test_lazy_l2 >() ); CALL_SUBTEST_43( test_lazy_l3 >() ); + + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + + CALL_SUBTEST_6( bug_1311<3>() ); + CALL_SUBTEST_6( bug_1311<5>() ); } #ifdef EIGEN_TEST_PART_6 diff --git a/test/qr.cpp b/test/qr.cpp index 98738777f..dfcc1e8f9 100644 --- a/test/qr.cpp +++ b/test/qr.cpp @@ -86,7 +86,7 @@ template void qr_invertible() VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant()); // This test is tricky if the determinant becomes too small. // Since we generate random numbers with magnitude rrange [0,1], the average determinant is 0.5^size - VERIFY_IS_MUCH_SMALLER_THAN( abs(absdet-qr.absDeterminant()), (max)(RealScalar(pow(0.5,size)),(max)(abs(absdet),abs(qr.absDeterminant()))) ); + VERIFY_IS_MUCH_SMALLER_THAN( abs(absdet-qr.absDeterminant()), numext::maxi(RealScalar(pow(0.5,size)),numext::maxi(abs(absdet),abs(qr.absDeterminant()))) ); } diff --git a/test/qr_colpivoting.cpp b/test/qr_colpivoting.cpp index 46c54b74f..057bb014c 100644 --- a/test/qr_colpivoting.cpp +++ b/test/qr_colpivoting.cpp @@ -93,6 +93,7 @@ void cod_fixedsize() { template void qr() { + using std::sqrt; typedef typename MatrixType::Index Index; Index rows = internal::random(2,EIGEN_TEST_MAX_SIZE), cols = internal::random(2,EIGEN_TEST_MAX_SIZE), cols2 = internal::random(2,EIGEN_TEST_MAX_SIZE); @@ -120,14 +121,14 @@ template void qr() // Verify that the absolute value of the diagonal elements in R are // non-increasing until they reach the singularity threshold. RealScalar threshold = - std::sqrt(RealScalar(rows)) * (std::abs)(r(0, 0)) * NumTraits::epsilon(); + sqrt(RealScalar(rows)) * numext::abs(r(0, 0)) * NumTraits::epsilon(); for (Index i = 0; i < (std::min)(rows, cols) - 1; ++i) { - RealScalar x = (std::abs)(r(i, i)); - RealScalar y = (std::abs)(r(i + 1, i + 1)); + RealScalar x = numext::abs(r(i, i)); + RealScalar y = numext::abs(r(i + 1, i + 1)); if (x < threshold && y < threshold) continue; if (!test_isApproxOrLessThan(y, x)) { for (Index j = 0; j < (std::min)(rows, cols); ++j) { - std::cout << "i = " << j << ", |r_ii| = " << (std::abs)(r(j, j)) << std::endl; + std::cout << "i = " << j << ", |r_ii| = " << numext::abs(r(j, j)) << std::endl; } std::cout << "Failure at i=" << i << ", rank=" << rank << ", threshold=" << threshold << std::endl; @@ -144,6 +145,8 @@ template void qr() template void qr_fixedsize() { + using std::sqrt; + using std::abs; enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; @@ -169,14 +172,14 @@ template void qr_fixedsize() // Verify that the absolute value of the diagonal elements in R are // non-increasing until they reache the singularity threshold. RealScalar threshold = - std::sqrt(RealScalar(Rows)) * (std::abs)(r(0, 0)) * NumTraits::epsilon(); + sqrt(RealScalar(Rows)) * (std::abs)(r(0, 0)) * NumTraits::epsilon(); for (Index i = 0; i < (std::min)(int(Rows), int(Cols)) - 1; ++i) { - RealScalar x = (std::abs)(r(i, i)); - RealScalar y = (std::abs)(r(i + 1, i + 1)); + RealScalar x = numext::abs(r(i, i)); + RealScalar y = numext::abs(r(i + 1, i + 1)); if (x < threshold && y < threshold) continue; if (!test_isApproxOrLessThan(y, x)) { for (Index j = 0; j < (std::min)(int(Rows), int(Cols)); ++j) { - std::cout << "i = " << j << ", |r_ii| = " << (std::abs)(r(j, j)) << std::endl; + std::cout << "i = " << j << ", |r_ii| = " << numext::abs(r(j, j)) << std::endl; } std::cout << "Failure at i=" << i << ", rank=" << rank << ", threshold=" << threshold << std::endl; @@ -194,6 +197,8 @@ template void qr_fixedsize() // page 3 for more detail. template void qr_kahan_matrix() { + using std::sqrt; + using std::abs; typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; @@ -204,23 +209,25 @@ template void qr_kahan_matrix() m1.setZero(rows,cols); RealScalar s = std::pow(NumTraits::epsilon(), 1.0 / rows); RealScalar c = std::sqrt(1 - s*s); + RealScalar pow_s_i(1.0); // pow(s,i) for (Index i = 0; i < rows; ++i) { - m1(i, i) = pow(s, i); - m1.row(i).tail(rows - i - 1) = -pow(s, i) * c * MatrixType::Ones(1, rows - i - 1); + m1(i, i) = pow_s_i; + m1.row(i).tail(rows - i - 1) = -pow_s_i * c * MatrixType::Ones(1, rows - i - 1); + pow_s_i *= s; } m1 = (m1 + m1.transpose()).eval(); ColPivHouseholderQR qr(m1); MatrixType r = qr.matrixQR().template triangularView(); RealScalar threshold = - std::sqrt(RealScalar(rows)) * (std::abs)(r(0, 0)) * NumTraits::epsilon(); + std::sqrt(RealScalar(rows)) * numext::abs(r(0, 0)) * NumTraits::epsilon(); for (Index i = 0; i < (std::min)(rows, cols) - 1; ++i) { - RealScalar x = (std::abs)(r(i, i)); - RealScalar y = (std::abs)(r(i + 1, i + 1)); + RealScalar x = numext::abs(r(i, i)); + RealScalar y = numext::abs(r(i + 1, i + 1)); if (x < threshold && y < threshold) continue; if (!test_isApproxOrLessThan(y, x)) { for (Index j = 0; j < (std::min)(rows, cols); ++j) { - std::cout << "i = " << j << ", |r_ii| = " << (std::abs)(r(j, j)) << std::endl; + std::cout << "i = " << j << ", |r_ii| = " << numext::abs(r(j, j)) << std::endl; } std::cout << "Failure at i=" << i << ", rank=" << qr.rank() << ", threshold=" << threshold << std::endl; diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp index d82e123d0..05a705887 100644 --- a/test/qr_fullpivoting.cpp +++ b/test/qr_fullpivoting.cpp @@ -15,8 +15,12 @@ template void qr() { typedef typename MatrixType::Index Index; - Index rows = internal::random(20,200), cols = internal::random(20,200), cols2 = internal::random(20,200); - Index rank = internal::random(1, (std::min)(rows, cols)-1); + Index max_size = EIGEN_TEST_MAX_SIZE; + Index min_size = numext::maxi(1,EIGEN_TEST_MAX_SIZE/10); + Index rows = internal::random(min_size,max_size), + cols = internal::random(min_size,max_size), + cols2 = internal::random(min_size,max_size), + rank = internal::random(1, (std::min)(rows, cols)-1); typedef typename MatrixType::Scalar Scalar; typedef Matrix MatrixQType; @@ -59,7 +63,9 @@ template void qr_invertible() typedef typename NumTraits::Real RealScalar; typedef typename MatrixType::Scalar Scalar; - int size = internal::random(10,50); + Index max_size = numext::mini(50,EIGEN_TEST_MAX_SIZE); + Index min_size = numext::maxi(1,EIGEN_TEST_MAX_SIZE/10); + Index size = internal::random(min_size,max_size); MatrixType m1(size, size), m2(size, size), m3(size, size); m1 = MatrixType::Random(size,size); diff --git a/test/rand.cpp b/test/rand.cpp index eeec34191..51cf01773 100644 --- a/test/rand.cpp +++ b/test/rand.cpp @@ -9,6 +9,8 @@ #include "main.h" +typedef long long int64; + template Scalar check_in_range(Scalar x, Scalar y) { Scalar r = internal::random(x,y); @@ -35,31 +37,49 @@ template void check_all_in_range(Scalar x, Scalar y) VERIFY( (mask>0).all() ); } +template void check_histogram(Scalar x, Scalar y, int bins) +{ + Array hist(bins); + hist.fill(0); + int f = 100000; + int n = bins*f; + int64 range = int64(y)-int64(x); + int divisor = int((range+1)/bins); + assert(((range+1)%bins)==0); + for(int k=0; k()/double(f))-1.0).abs()<0.02).all() ); +} + void test_rand() { long long_ref = NumTraits::highest()/10; signed char char_offset = (std::min)(g_repeat,64); signed char short_offset = (std::min)(g_repeat,16000); - - for(int i = 0; i < g_repeat*10; i++) { + + for(int i = 0; i < g_repeat*10000; i++) { CALL_SUBTEST(check_in_range(10,11)); CALL_SUBTEST(check_in_range(1.24234523,1.24234523)); CALL_SUBTEST(check_in_range(-1,1)); CALL_SUBTEST(check_in_range(-1432.2352,-1432.2352)); - + CALL_SUBTEST(check_in_range(10,11)); CALL_SUBTEST(check_in_range(1.24234523,1.24234523)); CALL_SUBTEST(check_in_range(-1,1)); CALL_SUBTEST(check_in_range(-1432.2352,-1432.2352)); - + CALL_SUBTEST(check_in_range(0,-1)); CALL_SUBTEST(check_in_range(0,-1)); CALL_SUBTEST(check_in_range(0,-1)); CALL_SUBTEST(check_in_range(-673456,673456)); + CALL_SUBTEST(check_in_range(-RAND_MAX+10,RAND_MAX-10)); CALL_SUBTEST(check_in_range(-24345,24345)); CALL_SUBTEST(check_in_range(-long_ref,long_ref)); } - + CALL_SUBTEST(check_all_in_range(11,11)); CALL_SUBTEST(check_all_in_range(11,11+char_offset)); CALL_SUBTEST(check_all_in_range(-5,5)); @@ -67,25 +87,32 @@ void test_rand() CALL_SUBTEST(check_all_in_range(-126,-126+char_offset)); CALL_SUBTEST(check_all_in_range(126-char_offset,126)); CALL_SUBTEST(check_all_in_range(-126,126)); - + CALL_SUBTEST(check_all_in_range(11,11)); CALL_SUBTEST(check_all_in_range(11,11+short_offset)); CALL_SUBTEST(check_all_in_range(-5,5)); CALL_SUBTEST(check_all_in_range(-11-short_offset,-11)); CALL_SUBTEST(check_all_in_range(-24345,-24345+short_offset)); CALL_SUBTEST(check_all_in_range(24345,24345+short_offset)); - + CALL_SUBTEST(check_all_in_range(11,11)); CALL_SUBTEST(check_all_in_range(11,11+g_repeat)); CALL_SUBTEST(check_all_in_range(-5,5)); CALL_SUBTEST(check_all_in_range(-11-g_repeat,-11)); CALL_SUBTEST(check_all_in_range(-673456,-673456+g_repeat)); CALL_SUBTEST(check_all_in_range(673456,673456+g_repeat)); - + CALL_SUBTEST(check_all_in_range(11,11)); CALL_SUBTEST(check_all_in_range(11,11+g_repeat)); CALL_SUBTEST(check_all_in_range(-5,5)); CALL_SUBTEST(check_all_in_range(-11-g_repeat,-11)); CALL_SUBTEST(check_all_in_range(-long_ref,-long_ref+g_repeat)); CALL_SUBTEST(check_all_in_range( long_ref, long_ref+g_repeat)); + + CALL_SUBTEST(check_histogram(-5,5,11)); + int bins = 100; + CALL_SUBTEST(check_histogram(-3333,-3333+bins*(3333/bins)-1,bins)); + bins = 1000; + CALL_SUBTEST(check_histogram(-RAND_MAX+10,-RAND_MAX+10+bins*(RAND_MAX/bins)-1,bins)); + CALL_SUBTEST(check_histogram(-RAND_MAX+10,-int64(RAND_MAX)+10+bins*(2*int64(RAND_MAX)/bins)-1,bins)); } diff --git a/test/real_qz.cpp b/test/real_qz.cpp index a1766c6d9..99ac31235 100644 --- a/test/real_qz.cpp +++ b/test/real_qz.cpp @@ -7,6 +7,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define EIGEN_RUNTIME_NO_MALLOC #include "main.h" #include #include @@ -41,7 +42,11 @@ template void real_qz(const MatrixType& m) break; } - RealQZ qz(A,B); + RealQZ qz(dim); + // TODO enable full-prealocation of required memory, this probably requires an in-place mode for HessenbergDecomposition + //Eigen::internal::set_is_malloc_allowed(false); + qz.compute(A,B); + //Eigen::internal::set_is_malloc_allowed(true); VERIFY_IS_EQUAL(qz.info(), Success); // check for zeros @@ -49,11 +54,20 @@ template void real_qz(const MatrixType& m) for (Index i=0; i void sparse_product() { typedef typename SparseMatrixType::StorageIndex StorageIndex; @@ -76,6 +94,24 @@ template void sparse_product() VERIFY_IS_APPROX(m4=(m2t.transpose()*m3t.transpose()).pruned(0), refMat4=refMat2t.transpose()*refMat3t.transpose()); VERIFY_IS_APPROX(m4=(m2*m3t.transpose()).pruned(0), refMat4=refMat2*refMat3t.transpose()); + // make sure the right product implementation is called: + if((!SparseMatrixType::IsRowMajor) && m2.rows()<=m3.cols()) + { + VERIFY_EVALUATION_COUNT(m4 = m2*m3, 3); // 1 temp for the result + 2 for transposing and get a sorted result. + VERIFY_EVALUATION_COUNT(m4 = (m2*m3).pruned(0), 1); + VERIFY_EVALUATION_COUNT(m4 = (m2*m3).eval().pruned(0), 4); + } + + // and that pruning is effective: + { + DenseMatrix Ad(2,2); + Ad << -1, 1, 1, 1; + SparseMatrixType As(Ad.sparseView()), B(2,2); + VERIFY_IS_EQUAL( (As*As.transpose()).eval().nonZeros(), 4); + VERIFY_IS_EQUAL( (Ad*Ad.transpose()).eval().sparseView().eval().nonZeros(), 2); + VERIFY_IS_EQUAL( (As*As.transpose()).pruned(1e-6).eval().nonZeros(), 2); + } + // dense ?= sparse * sparse VERIFY_IS_APPROX(dm4 =m2*m3, refMat4 =refMat2*refMat3); VERIFY_IS_APPROX(dm4+=m2*m3, refMat4+=refMat2*refMat3); @@ -245,7 +281,7 @@ template void sparse_product() for (int k=0; k void sparse_product() VERIFY_IS_APPROX(x=mUp.template selfadjointView()*b, refX=refS*b); VERIFY_IS_APPROX(x=mLo.template selfadjointView()*b, refX=refS*b); VERIFY_IS_APPROX(x=mS.template selfadjointView()*b, refX=refS*b); + + VERIFY_IS_APPROX(x.noalias()+=mUp.template selfadjointView()*b, refX+=refS*b); + VERIFY_IS_APPROX(x.noalias()-=mLo.template selfadjointView()*b, refX-=refS*b); + VERIFY_IS_APPROX(x.noalias()+=mS.template selfadjointView()*b, refX+=refS*b); // sparse selfadjointView with sparse matrices SparseMatrixType mSres(rows,rows); diff --git a/test/sparse_ref.cpp b/test/sparse_ref.cpp index f4aefbb48..5e9607234 100644 --- a/test/sparse_ref.cpp +++ b/test/sparse_ref.cpp @@ -87,8 +87,8 @@ void call_ref() VERIFY_EVALUATION_COUNT( call_ref_3(B, B), 1); VERIFY_EVALUATION_COUNT( call_ref_2(B.transpose(), B.transpose()), 0); VERIFY_EVALUATION_COUNT( call_ref_3(B.transpose(), B.transpose()), 0); - VERIFY_EVALUATION_COUNT( call_ref_2(A*A, AA), 1); - VERIFY_EVALUATION_COUNT( call_ref_3(A*A, AA), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(A*A, AA), 3); + VERIFY_EVALUATION_COUNT( call_ref_3(A*A, AA), 3); VERIFY(!C.isCompressed()); VERIFY_EVALUATION_COUNT( call_ref_3(C, C), 1); diff --git a/test/sparse_solver.h b/test/sparse_solver.h index b67653496..fd6199f3e 100644 --- a/test/sparse_solver.h +++ b/test/sparse_solver.h @@ -11,6 +11,33 @@ #include #include +template +void solve_with_guess(IterativeSolverBase& solver, const MatrixBase& b, const Guess& g, Result &x) { + if(internal::random()) + { + // With a temporary through evaluator + x = solver.derived().solveWithGuess(b,g) + Result::Zero(x.rows(), x.cols()); + } + else + { + // direct evaluation within x through Assignment + x = solver.derived().solveWithGuess(b.derived(),g); + } +} + +template +void solve_with_guess(SparseSolverBase& solver, const MatrixBase& b, const Guess& , Result& x) { + if(internal::random()) + x = solver.derived().solve(b) + Result::Zero(x.rows(), x.cols()); + else + x = solver.derived().solve(b); +} + +template +void solve_with_guess(SparseSolverBase& solver, const SparseMatrixBase& b, const Guess& , Result& x) { + x = solver.derived().solve(b); +} + template void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A, const Rhs& b, const DenseMat& dA, const DenseRhs& db) { @@ -37,6 +64,12 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A, } VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!"); VERIFY(x.isApprox(refX,test_precision())); + + x.setZero(); + solve_with_guess(solver, b, x, x); + VERIFY(solver.info() == Success && "solving failed when using analyzePattern/factorize API"); + VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(x.isApprox(refX,test_precision())); x.setZero(); // test the analyze/factorize API diff --git a/test/sparse_vector.cpp b/test/sparse_vector.cpp index d95f301d5..b3e1dda25 100644 --- a/test/sparse_vector.cpp +++ b/test/sparse_vector.cpp @@ -12,7 +12,7 @@ template void sparse_vector(int rows, int cols) { double densityMat = (std::max)(8./(rows*cols), 0.01); - double densityVec = (std::max)(8./float(rows), 0.1); + double densityVec = (std::max)(8./(rows), 0.1); typedef Matrix DenseMatrix; typedef Matrix DenseVector; typedef SparseVector SparseVectorType; diff --git a/test/sparseqr.cpp b/test/sparseqr.cpp index 50d1fcdf2..e8605fd21 100644 --- a/test/sparseqr.cpp +++ b/test/sparseqr.cpp @@ -54,7 +54,7 @@ template void test_sparseqr_scalar() b = dA * DenseVector::Random(A.cols()); solver.compute(A); - if(internal::random(0,1)>0.5) + if(internal::random(0,1)>0.5f) solver.factorize(A); // this checks that calling analyzePattern is not needed if the pattern do not change. if (solver.info() != Success) { diff --git a/test/stdvector.cpp b/test/stdvector.cpp index 6e173c678..50cb3341d 100644 --- a/test/stdvector.cpp +++ b/test/stdvector.cpp @@ -34,7 +34,7 @@ void check_stdvector_matrix(const MatrixType& m) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(MatrixType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(MatrixType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) @@ -69,7 +69,7 @@ void check_stdvector_transform(const TransformType&) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(TransformType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(TransformType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) @@ -104,7 +104,7 @@ void check_stdvector_quaternion(const QuaternionType&) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(QuaternionType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(QuaternionType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) diff --git a/test/stdvector_overload.cpp b/test/stdvector_overload.cpp index 736ff0ee7..959665954 100644 --- a/test/stdvector_overload.cpp +++ b/test/stdvector_overload.cpp @@ -48,7 +48,7 @@ void check_stdvector_matrix(const MatrixType& m) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(MatrixType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(MatrixType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) @@ -83,7 +83,7 @@ void check_stdvector_transform(const TransformType&) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(TransformType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(TransformType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) @@ -118,7 +118,7 @@ void check_stdvector_quaternion(const QuaternionType&) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(QuaternionType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(QuaternionType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) diff --git a/test/svd_common.h b/test/svd_common.h index d8611b541..605d5dfef 100644 --- a/test/svd_common.h +++ b/test/svd_common.h @@ -42,9 +42,14 @@ void svd_check_full(const MatrixType& m, const SvdType& svd) MatrixUType u = svd.matrixU(); MatrixVType v = svd.matrixV(); RealScalar scaling = m.cwiseAbs().maxCoeff(); - if(scaling<=(std::numeric_limits::min)()) - scaling = RealScalar(1); - VERIFY_IS_APPROX(m/scaling, u * (sigma/scaling) * v.adjoint()); + if(scaling<(std::numeric_limits::min)()) + { + VERIFY(sigma.cwiseAbs().maxCoeff() <= (std::numeric_limits::min)()); + } + else + { + VERIFY_IS_APPROX(m/scaling, u * (sigma/scaling) * v.adjoint()); + } VERIFY_IS_UNITARY(u); VERIFY_IS_UNITARY(v); } @@ -141,14 +146,14 @@ void svd_least_square(const MatrixType& m, unsigned int computationOptions) using std::abs; SolutionType y(x); - y.row(k) = (1.+2*NumTraits::epsilon())*x.row(k); + y.row(k) = (RealScalar(1)+2*NumTraits::epsilon())*x.row(k); RealScalar residual_y = (m*y-rhs).norm(); VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y ); if(internal::is_same::value) ++g_test_level; VERIFY( test_isApprox(residual_y,residual) || residual < residual_y ); if(internal::is_same::value) --g_test_level; - y.row(k) = (1.-2*NumTraits::epsilon())*x.row(k); + y.row(k) = (RealScalar(1)-2*NumTraits::epsilon())*x.row(k); residual_y = (m*y-rhs).norm(); VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y ); if(internal::is_same::value) ++g_test_level; @@ -336,7 +341,7 @@ void svd_underoverflow() M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3)); svd.compute(M,ComputeFullU|ComputeFullV); CALL_SUBTEST( svd_check_full(M,svd) ); - + id(k)++; if(id(k)>=value_set.size()) { @@ -344,7 +349,7 @@ void svd_underoverflow() id.head(k).setZero(); k=0; } - + } while((id +Array four_denorms(); + +template<> +Array4f four_denorms() { return Array4f(5.60844e-39f, -5.60844e-39f, 4.94e-44f, -4.94e-44f); } +template<> +Array4d four_denorms() { return Array4d(5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324); } +template +Array four_denorms() { return four_denorms().cast(); } + template void svd_fill_random(MatrixType &m, int Option = 0) { + using std::pow; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::Index Index; @@ -18,7 +29,7 @@ void svd_fill_random(MatrixType &m, int Option = 0) s = internal::random(1,s); Matrix d = Matrix::Random(diagSize); for(Index k=0; k(-s,s)); + d(k) = d(k)*pow(RealScalar(10),internal::random(-s,s)); bool dup = internal::random(0,10) < 3; bool unit_uv = internal::random(0,10) < (dup?7:3); // if we duplicate some diagonal entries, then increase the chance to preserve them using unitary U and V factors @@ -53,8 +64,9 @@ void svd_fill_random(MatrixType &m, int Option = 0) VT.setRandom(); } - Matrix samples(7); - samples << 0, 5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324, -1./NumTraits::highest(), 1./NumTraits::highest(); + Matrix samples(9); + samples << 0, four_denorms(), + -RealScalar(1)/NumTraits::highest(), RealScalar(1)/NumTraits::highest(), (std::numeric_limits::min)(), pow((std::numeric_limits::min)(),0.8); if(Option==Symmetric) { diff --git a/test/triangular.cpp b/test/triangular.cpp index 936c2aef3..b96856486 100644 --- a/test/triangular.cpp +++ b/test/triangular.cpp @@ -65,7 +65,7 @@ template void triangular_square(const MatrixType& m) m1 = MatrixType::Random(rows, cols); for (int i=0; i(); + while (numext::abs2(m1(i,i))(); Transpose trm4(m4); // test back and forward subsitution with a vector as the rhs @@ -78,7 +78,7 @@ template void triangular_square(const MatrixType& m) m3 = m1.template triangularView(); VERIFY(v2.isApprox(m3.conjugate() * (m1.conjugate().template triangularView().solve(v2)), largerEps)); - // test back and forward subsitution with a matrix as the rhs + // test back and forward substitution with a matrix as the rhs m3 = m1.template triangularView(); VERIFY(m2.isApprox(m3.adjoint() * (m1.adjoint().template triangularView().solve(m2)), largerEps)); m3 = m1.template triangularView(); @@ -121,6 +121,14 @@ template void triangular_square(const MatrixType& m) VERIFY_IS_APPROX(m1.template triangularView() * m5, m3*m5); VERIFY_IS_APPROX(m6*m1.template triangularView(), m6*m3); + m1up = m1.template triangularView(); + VERIFY_IS_APPROX(m1.template selfadjointView().template triangularView().toDenseMatrix(), m1up); + VERIFY_IS_APPROX(m1up.template selfadjointView().template triangularView().toDenseMatrix(), m1up); + VERIFY_IS_APPROX(m1.template selfadjointView().template triangularView().toDenseMatrix(), m1up.adjoint()); + VERIFY_IS_APPROX(m1up.template selfadjointView().template triangularView().toDenseMatrix(), m1up.adjoint()); + + VERIFY_IS_APPROX(m1.template selfadjointView().diagonal(), m1.diagonal()); + } diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp index e2f03ffca..731a08977 100644 --- a/test/unalignedassert.cpp +++ b/test/unalignedassert.cpp @@ -94,7 +94,7 @@ template void construct_at_boundary(int boundary) { char buf[sizeof(T)+256]; - size_t _buf = reinterpret_cast(buf); + size_t _buf = reinterpret_cast(buf); _buf += (EIGEN_MAX_ALIGN_BYTES - (_buf % EIGEN_MAX_ALIGN_BYTES)); // make 16/32/...-byte aligned _buf += boundary; // make exact boundary-aligned T *x = ::new(reinterpret_cast(_buf)) T; diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index ee446c3c1..83c1439ad 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -7,6 +7,14 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifdef EIGEN_TEST_PART_1 +#define EIGEN_UNALIGNED_VECTORIZE 1 +#endif + +#ifdef EIGEN_TEST_PART_2 +#define EIGEN_UNALIGNED_VECTORIZE 0 +#endif + #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR #undef EIGEN_DEFAULT_TO_ROW_MAJOR #endif @@ -21,7 +29,7 @@ using internal::demangle_unrolling; template bool test_assign(const Dst&, const Src&, int traversal, int unrolling) { - typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; + typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; bool res = traits::Traversal==traversal; if(unrolling==InnerUnrolling+CompleteUnrolling) res = res && (int(traits::Unrolling)==InnerUnrolling || int(traits::Unrolling)==CompleteUnrolling); @@ -45,7 +53,7 @@ bool test_assign(const Dst&, const Src&, int traversal, int unrolling) template bool test_assign(int traversal, int unrolling) { - typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; + typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; bool res = traits::Traversal==traversal && traits::Unrolling==unrolling; if(!res) { @@ -65,7 +73,8 @@ bool test_assign(int traversal, int unrolling) template bool test_redux(const Xpr&, int traversal, int unrolling) { - typedef internal::redux_traits,internal::redux_evaluator > traits; + typedef typename Xpr::Scalar Scalar; + typedef internal::redux_traits,internal::redux_evaluator > traits; bool res = traits::Traversal==traversal && traits::Unrolling==unrolling; if(!res) @@ -144,10 +153,16 @@ struct vectorization_logic InnerVectorizedTraversal,InnerUnrolling)); VERIFY(test_assign(Matrix44u(),Matrix44()+Matrix44(), - LinearTraversal,NoUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal, + EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling)); + + VERIFY(test_assign(Matrix1(),Matrix1()+Matrix1(), + (Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal, + CompleteUnrolling)); VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) + : LinearTraversal, CompleteUnrolling)); VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3), InnerVectorizedTraversal,CompleteUnrolling)); @@ -158,19 +173,29 @@ struct vectorization_logic if(PacketSize>1) { typedef Matrix Matrix33c; + typedef Matrix Vector3; VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1), LinearTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector3(),Vector3()+Vector3(), + EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal), CompleteUnrolling)); VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? SliceVectorizedTraversal : LinearTraversal), + ((!EIGEN_UNALIGNED_VECTORIZE) && HalfPacketSize==1) ? NoUnrolling : CompleteUnrolling)); VERIFY(test_assign(Matrix3(),Matrix3().cwiseProduct(Matrix3()), LinearVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal,NoUnrolling)); + HalfPacketSize==1 ? InnerVectorizedTraversal : + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : + LinearTraversal, + NoUnrolling)); + + VERIFY(test_assign(Matrix11(), Matrix11()+Matrix11(),InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Matrix11(),Matrix().template block(2,3)+Matrix().template block(8,4), - DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling)); + (EIGEN_UNALIGNED_VECTORIZE) ? InnerVectorizedTraversal : DefaultTraversal, CompleteUnrolling|InnerUnrolling)); VERIFY(test_assign(Vector1(),Matrix11()*Vector1(), InnerVectorizedTraversal,CompleteUnrolling)); @@ -208,7 +233,7 @@ struct vectorization_logic VERIFY((test_assign< Map, AlignedMax, InnerStride<3*PacketSize> >, Matrix - >(DefaultTraversal,CompleteUnrolling))); + >(DefaultTraversal,PacketSize>=8?InnerUnrolling:CompleteUnrolling))); VERIFY((test_assign(Matrix11(), Matrix()*Matrix(), InnerVectorizedTraversal, CompleteUnrolling))); @@ -270,6 +295,12 @@ struct vectorization_logic_half InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Vector1(),Vector1()+Vector1(), InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().template segment(0).derived(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Scalar(2.1)*Vector1()-Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),(Scalar(2.1)*Vector1().template segment(0)-Vector1().template segment(0)).derived(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()), InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Vector1(),Vector1().template cast(), @@ -287,10 +318,11 @@ struct vectorization_logic_half InnerVectorizedTraversal,InnerUnrolling)); VERIFY(test_assign(Matrix57u(),Matrix57()+Matrix57(), - LinearTraversal,NoUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal, + EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling)); VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling)); if(PacketSize>1) { @@ -298,16 +330,17 @@ struct vectorization_logic_half VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1), LinearTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()), PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - LinearTraversal,NoUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal, + NoUnrolling)); VERIFY(test_assign(Matrix11(),Matrix().template block(2,3)+Matrix().template block(8,4), - DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling)); VERIFY(test_assign(Vector1(),Matrix11()*Vector1(), InnerVectorizedTraversal,CompleteUnrolling)); @@ -337,7 +370,7 @@ struct vectorization_logic_half >(DefaultTraversal,CompleteUnrolling))); VERIFY((test_assign(Matrix57(), Matrix()*Matrix(), - InnerVectorizedTraversal, CompleteUnrolling))); + InnerVectorizedTraversal, InnerUnrolling|CompleteUnrolling))); #endif } }; @@ -367,19 +400,19 @@ void test_vectorization_logic() if(internal::packet_traits::Vectorizable) { VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); VERIFY(test_redux(Matrix(), - DefaultTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling)); } if(internal::packet_traits::Vectorizable) { VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); VERIFY(test_redux(Matrix(), - DefaultTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling)); } #endif // EIGEN_VECTORIZE diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp index 3cc198772..739eacaf3 100644 --- a/test/vectorwiseop.cpp +++ b/test/vectorwiseop.cpp @@ -233,10 +233,10 @@ template void vectorwiseop_matrix(const MatrixType& m) Matrix tmp(rows); VERIFY_EVALUATION_COUNT( tmp = (m1 * m1.transpose()).colwise().sum(), (MatrixType::RowsAtCompileTime==Dynamic ? 1 : 0)); - m2 = m1.rowwise() - (m1.colwise().sum()/m1.rows()).eval(); - m1 = m1.rowwise() - (m1.colwise().sum()/m1.rows()); + m2 = m1.rowwise() - (m1.colwise().sum()/RealScalar(m1.rows())).eval(); + m1 = m1.rowwise() - (m1.colwise().sum()/RealScalar(m1.rows())); VERIFY_IS_APPROX( m1, m2 ); - VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/m1.rows()), (MatrixType::RowsAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime!=1 ? 1 : 0) ); + VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/RealScalar(m1.rows())), (MatrixType::RowsAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime!=1 ? 1 : 0) ); } void test_vectorwiseop() diff --git a/unsupported/Eigen/CMakeLists.txt b/unsupported/Eigen/CMakeLists.txt index 6d0cf4f9d..631a06014 100644 --- a/unsupported/Eigen/CMakeLists.txt +++ b/unsupported/Eigen/CMakeLists.txt @@ -4,6 +4,7 @@ set(Eigen_HEADERS ArpackSupport AutoDiff BVH + EulerAngles FFT IterativeSolvers KroneckerProduct @@ -17,6 +18,7 @@ set(Eigen_HEADERS Polynomials Skyline SparseExtra + SpecialFunctions Splines ) @@ -25,5 +27,6 @@ install(FILES DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel ) -add_subdirectory(src) -add_subdirectory(CXX11) \ No newline at end of file +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") + +add_subdirectory(CXX11) diff --git a/unsupported/Eigen/CXX11/CMakeLists.txt b/unsupported/Eigen/CXX11/CMakeLists.txt index a40bc4715..385ed240c 100644 --- a/unsupported/Eigen/CXX11/CMakeLists.txt +++ b/unsupported/Eigen/CXX11/CMakeLists.txt @@ -5,4 +5,4 @@ install(FILES DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel ) -add_subdirectory(src) +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel FILES_MATCHING PATTERN "*.h") diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 1e97ad3c0..4976a1254 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -15,6 +15,7 @@ #include +#include "../SpecialFunctions" #include "src/util/CXX11Meta.h" #include "src/util/MaxSizeVector.h" @@ -60,15 +61,17 @@ typedef unsigned __int64 uint64_t; #ifdef EIGEN_USE_GPU #include #include -#if defined(__CUDACC__) -#include +#if __cplusplus >= 201103L +#include +#include #endif #endif - #include "src/Tensor/TensorMacros.h" #include "src/Tensor/TensorForwardDeclarations.h" #include "src/Tensor/TensorMeta.h" +#include "src/Tensor/TensorFunctors.h" +#include "src/Tensor/TensorCostModel.h" #include "src/Tensor/TensorDeviceDefault.h" #include "src/Tensor/TensorDeviceThreadPool.h" #include "src/Tensor/TensorDeviceCuda.h" @@ -77,13 +80,13 @@ typedef unsigned __int64 uint64_t; #include "src/Tensor/TensorDimensions.h" #include "src/Tensor/TensorInitializer.h" #include "src/Tensor/TensorTraits.h" -#include "src/Tensor/TensorFunctors.h" +#include "src/Tensor/TensorRandom.h" #include "src/Tensor/TensorUInt128.h" #include "src/Tensor/TensorIntDiv.h" +#include "src/Tensor/TensorGlobalFunctions.h" #include "src/Tensor/TensorBase.h" -#include "src/Tensor/TensorCostModel.h" #include "src/Tensor/TensorEvaluator.h" #include "src/Tensor/TensorExpr.h" #include "src/Tensor/TensorReduction.h" @@ -115,6 +118,7 @@ typedef unsigned __int64 uint64_t; #include "src/Tensor/TensorForcedEval.h" #include "src/Tensor/TensorGenerator.h" #include "src/Tensor/TensorAssign.h" +#include "src/Tensor/TensorScan.h" #include "src/Tensor/TensorExecutor.h" #include "src/Tensor/TensorDevice.h" diff --git a/unsupported/Eigen/CXX11/src/CMakeLists.txt b/unsupported/Eigen/CXX11/src/CMakeLists.txt deleted file mode 100644 index 1734262bb..000000000 --- a/unsupported/Eigen/CXX11/src/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_subdirectory(util) -add_subdirectory(ThreadPool) -add_subdirectory(Tensor) -add_subdirectory(TensorSymmetry) diff --git a/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt b/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt deleted file mode 100644 index 6d4b3ea0d..000000000 --- a/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_CXX11_Tensor_SRCS "*.h") - -INSTALL(FILES - ${Eigen_CXX11_Tensor_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/Tensor COMPONENT Devel - ) diff --git a/unsupported/Eigen/CXX11/src/Tensor/README.md b/unsupported/Eigen/CXX11/src/Tensor/README.md index eeca2f69e..02146527b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/README.md +++ b/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -1102,7 +1102,7 @@ Example: Reduction along two dimensions. As a special case, if you pass no parameter to a reduction operation the original tensor is reduced along *all* its dimensions. The result is a -one-dimension tensor with a single value. +scalar, represented as a zero-dimension tensor. Eigen::Tensor a(2, 3, 4); a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, @@ -1112,7 +1112,7 @@ one-dimension tensor with a single value. {19.0f, 18.0f, 17.0f, 16.0f}, {20.0f, 21.0f, 22.0f, 23.0f}}}); // Reduce along all dimensions using the sum() operator. - Eigen::Tensor b = a.sum(); + Eigen::Tensor b = a.sum(); cout << "b" << endl << b << endl << endl; => b @@ -1168,6 +1168,44 @@ Reduce a tensor using a user-defined reduction operator. See ```SumReducer``` in TensorFunctors.h for information on how to implement a reduction operator. +## Scan Operations + +A *Scan* operation returns a tensor with the same dimensions as the original +tensor. The operation performs an inclusive scan along the specified +axis, which means it computes a running total along the axis for a given +reduction operation. +If the reduction operation corresponds to summation, then this computes the +prefix sum of the tensor along the given axis. + +Example: +dd a comment to this line + + // Create a tensor of 2 dimensions + Eigen::Tensor a(2, 3); + a.setValues({{1, 2, 3}, {4, 5, 6}}); + // Scan it along the second dimension (1) using summation + Eigen::Tensor b = a.cumsum(1); + // The result is a tensor with the same size as the input + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 2 3 + 6 5 4 + + b + 1 3 6 + 4 9 15 + +### cumsum(const Index& axis) + +Perform a scan by summing consecutive entries. + +### cumprod(const Index& axis) + +Perform a scan by multiplying consecutive entries. + + ## Convolutions ### convolve(const Kernel& kernel, const Dimensions& dims) diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 759dede3f..1940a9692 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -110,7 +110,7 @@ class Tensor : public TensorBase EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const { @@ -150,7 +150,7 @@ class Tensor : public TensorBase inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) { @@ -190,7 +190,7 @@ class Tensor : public TensorBase inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const { @@ -257,7 +257,7 @@ class Tensor : public TensorBase inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) { @@ -336,7 +336,7 @@ class Tensor : public TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions) : m_storage(firstDimension, otherDimensions...) @@ -350,22 +350,22 @@ class Tensor : public TensorBase(dim1, dim2)) { EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2, Index dim3) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3) : m_storage(dim1*dim2*dim3, array(dim1, dim2, dim3)) { EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4) : m_storage(dim1*dim2*dim3*dim4, array(dim1, dim2, dim3, dim4)) { EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_storage(dim1*dim2*dim3*dim4*dim5, array(dim1, dim2, dim3, dim4, dim5)) { EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -418,7 +418,7 @@ class Tensor : public TensorBase EIGEN_DEVICE_FUNC void resize(Index firstDimension, IndexTypes... otherDimensions) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h index babafe108..d06f40cd8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -254,6 +254,14 @@ struct TensorEvaluator, Devi EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double compute_cost = 1.0 + + (m_return_dim < 0 ? 0.0 : (TensorOpCost::ModCost() + TensorOpCost::DivCost())); + return m_orig_impl.costPerCoeff(vectorized) + + m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost); + } + private: EIGEN_DEVICE_FUNC void gen_strides(const InputDimensions& dims, StrideDims& strides) { if (m_return_dim < 0) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 1a34f3ccc..7a45a5cf4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -191,6 +191,12 @@ class TensorBase return unaryExpr(internal::scalar_log_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + log1p() const { + return unaryExpr(internal::scalar_log1p_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> abs() const { @@ -204,34 +210,74 @@ class TensorBase } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> pow(Scalar exponent) const { - return unaryExpr(internal::scalar_pow_op(exponent)); + return unaryExpr(internal::bind2nd_op >(exponent)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + real() const { + return unaryExpr(internal::scalar_real_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + imag() const { + return unaryExpr(internal::scalar_imag_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator+ (Scalar rhs) const { - return unaryExpr(internal::scalar_add_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator+ (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator- (Scalar rhs) const { EIGEN_STATIC_ASSERT((NumTraits::IsSigned || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return unaryExpr(internal::scalar_sub_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator- (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator* (Scalar rhs) const { - return unaryExpr(internal::scalar_multiple_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator* (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator/ (Scalar rhs) const { - return unaryExpr(internal::scalar_quotient1_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator/ (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC @@ -277,7 +323,6 @@ class TensorBase return unaryExpr(internal::scalar_floor_op()); } - // Generic binary operation support. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp @@ -342,66 +387,66 @@ class TensorBase // Comparisons and tests. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator==(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator!=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } // comparisons and tests for Scalars EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator<(Scalar threshold) const { return operator<(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator<=(Scalar threshold) const { return operator<=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator>(Scalar threshold) const { return operator>(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator>=(Scalar threshold) const { return operator>=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator==(Scalar threshold) const { return operator==(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator!=(Scalar threshold) const { return operator!=(constant(threshold)); } @@ -453,6 +498,28 @@ class TensorBase return TensorFFTOp(derived(), fft); } + // Scan. + typedef TensorScanOp, const Derived> TensorScanSumOp; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanSumOp + cumsum(const Index& axis, bool exclusive = false) const { + return TensorScanSumOp(derived(), axis, exclusive); + } + + typedef TensorScanOp, const Derived> TensorScanProdOp; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanProdOp + cumprod(const Index& axis, bool exclusive = false) const { + return TensorScanProdOp(derived(), axis, exclusive); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanOp + scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const { + return TensorScanOp(derived(), axis, exclusive, reducer); + } + // Reductions. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReductionOp, const Dims, const Derived> @@ -676,6 +743,12 @@ class TensorBase slice(const StartIndices& startIndices, const Sizes& sizes) const { return TensorSlicingOp(derived(), startIndices, sizes); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorChippingOp chip(const Index offset) const { @@ -750,8 +823,8 @@ class TensorBase EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } }; -template -class TensorBase : public TensorBase { +template::value> +class TensorBase : public TensorBase { public: typedef internal::traits DerivedTraits; typedef typename DerivedTraits::Scalar Scalar; @@ -761,7 +834,7 @@ class TensorBase : public TensorBase friend class Tensor; template friend class TensorFixedSize; - template friend class TensorBase; + template friend class TensorBase; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& setZero() { @@ -780,7 +853,7 @@ class TensorBase : public TensorBasetemplate random(); } -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_HAS_VARIADIC_TEMPLATES EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& setValues( const typename internal::Initializer::InitList& vals) { @@ -851,6 +924,19 @@ class TensorBase : public TensorBase(derived(), startIndices, sizes); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorChippingOp chip(const Index offset) const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index c771496e2..5d67f69f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -106,7 +106,7 @@ struct TensorEvaluator, Device> static const int PacketSize = internal::unpacket_traits::size; enum { - IsAligned = false, + IsAligned = true, PacketAccess = TensorEvaluator::PacketAccess, Layout = TensorEvaluator::Layout, RawAccess = false @@ -118,7 +118,7 @@ struct TensorEvaluator, Device> // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar // and store the result in a scalar. Instead one should reshape the scalar into a a N-D // tensor with N >= 1 of 1 element first and then broadcast. - EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); const InputDimensions& input_dims = m_impl.dimensions(); const Broadcast& broadcast = op.broadcast(); for (int i = 0; i < NumDims; ++i) { @@ -247,7 +247,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const { - EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); const Index originalIndex = index; @@ -299,7 +299,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const { - EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); const Index originalIndex = index; @@ -354,11 +354,11 @@ struct TensorEvaluator, Device> if (NumDims > 0) { for (int i = NumDims - 1; i > 0; --i) { compute_cost += TensorOpCost::DivCost(); - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { compute_cost += TensorOpCost::MulCost() + TensorOpCost::AddCost(); } else { - if (!internal::index_statically_eq()(i, 1)) { + if (!internal::index_statically_eq(i, 1)) { compute_cost += TensorOpCost::MulCost() + TensorOpCost::ModCost() + TensorOpCost::AddCost(); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index 2742dbb95..1ba7ef170 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -152,8 +152,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device) { - // We could also support the case where NumInputDims==1 if needed. - EIGEN_STATIC_ASSERT(NumInputDims >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); eigen_assert(NumInputDims > m_dim.actualDim()); const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); @@ -203,7 +202,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == 0) || @@ -342,7 +341,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { - EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) if ((static_cast(this->Layout) == static_cast(ColMajor) && this->m_dim.actualDim() == 0) || (static_cast(this->Layout) == static_cast(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index 839c6e3e5..59bf90d93 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -128,8 +128,8 @@ struct TensorEvaluator(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(NumDims == RightNumDims, YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims == RightNumDims), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); eigen_assert(0 <= m_axis && m_axis < NumDims); const Dimensions& lhs_dims = m_leftImpl.dimensions(); @@ -248,8 +248,8 @@ struct TensorEvaluator EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - static const int packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; @@ -344,8 +344,8 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { - static const int packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize()); EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 6f113b903..20b29e5fd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -25,8 +25,9 @@ template struct traits > { // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename internal::promote_storage_type::ret Scalar; + typedef typename gebp_traits::type, + typename remove_const::type>::ResScalar Scalar; + typedef typename promote_storage_type::StorageKind, typename traits::StorageKind>::ret StorageKind; typedef typename promote_index_type::Index, @@ -37,7 +38,7 @@ struct traits > typedef typename remove_reference::type _RhsNested; // From NumDims below. - static const int NumDimensions = max_n_1::NumDimensions + traits::NumDimensions - 2 * array_size::value>::size; + static const int NumDimensions = traits::NumDimensions + traits::NumDimensions - 2 * array_size::value; static const int Layout = traits::Layout; enum { @@ -65,7 +66,7 @@ struct traits::NumDimensions + traits::NumDimensions - 2 * array_size::value>::size; + static const int NumDimensions = traits::NumDimensions + traits::NumDimensions - 2 * array_size::value; }; } // end namespace internal @@ -75,8 +76,8 @@ class TensorContractionOp : public TensorBase::Scalar Scalar; - typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename internal::gebp_traits::ResScalar CoeffReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -140,11 +141,11 @@ struct TensorContractionEvaluatorBase static const int RDims = internal::array_size::Dimensions>::value; static const int ContractDims = internal::array_size::value; - static const int NumDims = max_n_1::size; + static const int NumDims = LDims + RDims - 2 * ContractDims; typedef array contract_t; - typedef array::size> left_nocontract_t; - typedef array::size> right_nocontract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; typedef DSizes Dimensions; @@ -218,11 +219,9 @@ struct TensorContractionEvaluatorBase rhs_strides[i+1] = rhs_strides[i] * eval_right_dims[i]; } - m_i_strides[0] = 1; - m_j_strides[0] = 1; - if(ContractDims) { - m_k_strides[0] = 1; - } + if (m_i_strides.size() > 0) m_i_strides[0] = 1; + if (m_j_strides.size() > 0) m_j_strides[0] = 1; + if (m_k_strides.size() > 0) m_k_strides[0] = 1; m_i_size = 1; m_j_size = 1; @@ -318,11 +317,6 @@ struct TensorContractionEvaluatorBase } } - // Scalar case. We represent the result as a 1d tensor of size 1. - if (LDims + RDims == 2 * ContractDims) { - m_dimensions[0] = 1; - } - // If the layout is RowMajor, we need to reverse the m_dimensions if (static_cast(Layout) == static_cast(RowMajor)) { for (int i = 0, j = NumDims - 1; i < j; i++, j--) { @@ -510,7 +504,7 @@ struct TensorContractionEvaluatorBase // call gebp (matrix kernel) // The parameters here are copied from Eigen's GEMM implementation - gebp(output.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, 1.0, -1, -1, 0, 0); + gebp(output.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, Scalar(1), -1, -1, 0, 0); } } } @@ -607,15 +601,14 @@ struct TensorEvaluator::value; typedef array contract_t; - typedef array::size> left_nocontract_t; - typedef array::size> right_nocontract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; - static const int NumDims = max_n_1::size; + static const int NumDims = LDims + RDims - 2 * ContractDims; // Could we use NumDimensions here? typedef DSizes Dimensions; - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : Base(op, device) { } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h index 6a3ef14ef..d65dbb40f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -461,8 +461,8 @@ EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, #undef writeResultShmem #undef writeRow - const int max_i_write = (min)((int)((m_size - base_m - threadIdx.y + 7) / 8), 8); - const int max_j_write = (min)((int)((n_size - base_n - threadIdx.z + 7) / 8), 8); + const int max_i_write = numext::mini((int)((m_size - base_m - threadIdx.y + 7) / 8), 8); + const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8); if (threadIdx.x < max_i_write) { if (max_j_write == 8) { @@ -1240,10 +1240,10 @@ struct TensorEvaluator right_dim_mapper_t; typedef array contract_t; - typedef array::size> left_nocontract_t; - typedef array::size> right_nocontract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; - static const int NumDims = max_n_1::size; + static const int NumDims = LDims + RDims - 2 * ContractDims; typedef DSizes Dimensions; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h index b27e1a1b4..9b2cb3ff6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -130,19 +130,19 @@ class SimpleTensorContractionMapper { } Index contract_val = left ? col : row; - for (int i = static_cast(array_size::value) - 1; i > 0; i--) { - const Index idx = contract_val / m_k_strides[i]; - linidx += idx * m_contract_strides[i]; - contract_val -= idx * m_k_strides[i]; - } - if(array_size::value > 0) { - if (side == Rhs && inner_dim_contiguous) { - eigen_assert(m_contract_strides[0] == 1); - linidx += contract_val; - } else { - linidx += contract_val * m_contract_strides[0]; - } + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx = contract_val / m_k_strides[i]; + linidx += idx * m_contract_strides[i]; + contract_val -= idx * m_k_strides[i]; + } + + if (side == Rhs && inner_dim_contiguous) { + eigen_assert(m_contract_strides[0] == 1); + linidx += contract_val; + } else { + linidx += contract_val * m_contract_strides[0]; + } } return linidx; @@ -153,15 +153,15 @@ class SimpleTensorContractionMapper { const bool left = (side == Lhs); Index nocontract_val[2] = {left ? row : col, left ? row + distance : col}; Index linidx[2] = {0, 0}; - for (int i = static_cast(array_size::value) - 1; i > 0; i--) { - const Index idx0 = nocontract_val[0] / m_ij_strides[i]; - const Index idx1 = nocontract_val[1] / m_ij_strides[i]; - linidx[0] += idx0 * m_nocontract_strides[i]; - linidx[1] += idx1 * m_nocontract_strides[i]; - nocontract_val[0] -= idx0 * m_ij_strides[i]; - nocontract_val[1] -= idx1 * m_ij_strides[i]; - } if (array_size::value > array_size::value) { + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx0 = nocontract_val[0] / m_ij_strides[i]; + const Index idx1 = nocontract_val[1] / m_ij_strides[i]; + linidx[0] += idx0 * m_nocontract_strides[i]; + linidx[1] += idx1 * m_nocontract_strides[i]; + nocontract_val[0] -= idx0 * m_ij_strides[i]; + nocontract_val[1] -= idx1 * m_ij_strides[i]; + } if (side == Lhs && inner_dim_contiguous) { eigen_assert(m_nocontract_strides[0] == 1); linidx[0] += nocontract_val[0]; @@ -173,22 +173,24 @@ class SimpleTensorContractionMapper { } Index contract_val[2] = {left ? col : row, left ? col : row + distance}; - for (int i = static_cast(array_size::value) - 1; i > 0; i--) { - const Index idx0 = contract_val[0] / m_k_strides[i]; - const Index idx1 = contract_val[1] / m_k_strides[i]; - linidx[0] += idx0 * m_contract_strides[i]; - linidx[1] += idx1 * m_contract_strides[i]; - contract_val[0] -= idx0 * m_k_strides[i]; - contract_val[1] -= idx1 * m_k_strides[i]; - } + if (array_size::value> 0) { + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx0 = contract_val[0] / m_k_strides[i]; + const Index idx1 = contract_val[1] / m_k_strides[i]; + linidx[0] += idx0 * m_contract_strides[i]; + linidx[1] += idx1 * m_contract_strides[i]; + contract_val[0] -= idx0 * m_k_strides[i]; + contract_val[1] -= idx1 * m_k_strides[i]; + } - if (side == Rhs && inner_dim_contiguous) { - eigen_assert(m_contract_strides[0] == 1); - linidx[0] += contract_val[0]; - linidx[1] += contract_val[1]; - } else { - linidx[0] += contract_val[0] * m_contract_strides[0]; - linidx[1] += contract_val[1] * m_contract_strides[0]; + if (side == Rhs && inner_dim_contiguous) { + eigen_assert(m_contract_strides[0] == 1); + linidx[0] += contract_val[0]; + linidx[1] += contract_val[1]; + } else { + linidx[0] += contract_val[0] * m_contract_strides[0]; + linidx[1] += contract_val[1] * m_contract_strides[0]; + } } return IndexPair(linidx[0], linidx[1]); } @@ -200,7 +202,7 @@ class SimpleTensorContractionMapper { return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size; } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const { - return ((side == Lhs) && inner_dim_contiguous) ? m_contract_strides[0] : 1; + return ((side == Lhs) && inner_dim_contiguous && array_size::value > 0) ? m_contract_strides[0] : 1; } protected: diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 9044454fd..ee16cde9b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -14,6 +14,8 @@ #ifdef EIGEN_USE_THREADS namespace Eigen { + +#ifdef EIGEN_USE_SIMPLE_THREAD_POOL namespace internal { template @@ -52,7 +54,7 @@ struct packRhsAndKernelArg { }; } // end namespace internal - +#endif // EIGEN_USE_SIMPLE_THREAD_POOL template struct TensorEvaluator, ThreadPoolDevice> : @@ -92,10 +94,10 @@ struct TensorEvaluator right_dim_mapper_t; typedef array contract_t; - typedef array::size> left_nocontract_t; - typedef array::size> right_nocontract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; - static const int NumDims = max_n_1::size; + static const int NumDims = LDims + RDims - 2 * ContractDims; typedef DSizes Dimensions; @@ -110,6 +112,623 @@ struct TensorEvaluator + void evalProduct(Scalar* buffer) const { + typedef + typename internal::remove_const::type + LhsScalar; + typedef + typename internal::remove_const::type + RhsScalar; + typedef typename internal::gebp_traits Traits; + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + typedef internal::TensorContractionInputMapper< + LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t, + contract_t, internal::packet_traits::size, + lhs_inner_dim_contiguous, false, Unaligned> + LhsMapper; + typedef internal::TensorContractionInputMapper< + RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t, + contract_t, internal::packet_traits::size, + rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned> + RhsMapper; + typedef internal::blas_data_mapper OutputMapper; + typedef internal::gemm_pack_lhs + LhsPacker; + typedef internal::gemm_pack_rhs< + RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> + RhsPacker; + typedef internal::gebp_kernel + GebpKernel; + + const Index m = this->m_i_size; + const Index n = this->m_j_size; + const Index k = this->m_k_size; + if (m == 0 || n == 0 || k == 0) return; + + // Compute a set of algorithm parameters: + // - kernel block sizes (bm, bn, bk) + // - task grain sizes (number of kernels executed per task: gm, gn) + // - number of threads + // - sharding by row/column + // - parallel packing or first lhs then rhs + // and some derived parameters: + // - number of tasks (nm, nn, nk) + // - number of kernels (nm0, nn0) + // Unfortunately, all these parameters are tightly interdependent. + // So in some cases we first compute approximate values, then compute other + // values based on these approximations and then refine the approximations. + + // There are lots of heuristics here. There is some reasoning behind them, + // but ultimately they are just tuned on contraction benchmarks for + // different input configurations, thread counts and instruction sets. + // So feel free to question any of them. + + // Compute whether we want to shard by row or by column. + // This is a first approximation, it will be refined later. Since we don't + // know number of threads yet we use 2, because what's we are most + // interested in at this point is whether it makes sense to use + // parallelization at all or not. + bool shard_by_col = shardByCol(m, n, 2); + + // First approximation of kernel blocking sizes. + // Again, we don't know number of threads yet, so we use 2. + Index bm, bn, bk; + if (shard_by_col) { + internal::TensorContractionBlocking + blocking(k, m, n, 2); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } else { + internal::TensorContractionBlocking + blocking(k, m, n, 2); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } + + // Compute optimal number of threads. + // Note: we use bk instead of k here because we are interested in amount of + // _parallelizable_ computations, and computations are not parallelizable + // across k dimension. + const TensorOpCost cost = + contractionCost(m, n, bm, bn, bk, shard_by_col, false); + int num_threads = TensorCostModel::numThreads( + static_cast(n) * m, cost, this->m_device.numThreads()); + + // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost + // model is not tuned. Remove this when the cost model is tuned. + if (n == 1) num_threads = 1; + + if (num_threads == 1) { + // The single-threaded algorithm should be faster in this case. + if (n == 1) + this->template evalGemv(buffer); + else + this->template evalGemm(buffer); + return; + } + + // Now that we know number of threads, recalculate sharding and blocking. + shard_by_col = shardByCol(m, n, num_threads); + if (shard_by_col) { + internal::TensorContractionBlocking + blocking(k, m, n, num_threads); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } else { + internal::TensorContractionBlocking + blocking(k, m, n, num_threads); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } + + // Number of kernels for each dimension. + Index nm0 = divup(m, bm); + Index nn0 = divup(n, bn); + Index nk = divup(k, bk); + + // Calculate task grain size (number of kernels executed per task). + // This task size coarsening serves two purposes: + // 1. It reduces per-task overheads including synchronization overheads. + // 2. It allows to use caches better (reuse the same packed rhs in several + // consecutive kernels). + Index gm = 1; + Index gn = 1; + // If we are sharding by column, then we prefer to reduce rows first. + if (shard_by_col) { + gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col); + gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col); + } else { + gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col); + gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col); + } + // Number of tasks in each dimension. + Index nm = divup(nm0, gm); + Index nn = divup(nn0, gn); + + // Last by not least, decide whether we want to issue both lhs and rhs + // packing in parallel; or issue lhs packing first, and then issue rhs + // packing when lhs packing completes (for !shard_by_col lhs and rhs are + // swapped). Parallel packing allows more parallelism (for both packing and + // kernels), while sequential packing provides better locality (once + // a thread finishes rhs packing it proceed to kernels with that rhs). + // First, we are interested in parallel packing if there are few tasks. + bool parallel_pack = num_threads >= nm * nn; + // Also do parallel packing if all data fits into L2$. + if (m * bk * Index(sizeof(LhsScalar)) + n * bk * Index(sizeof(RhsScalar)) <= + l2CacheSize() * num_threads) + parallel_pack = true; + // But don't do it if we will use each rhs only once. Locality seems to be + // more important in this case. + if ((shard_by_col ? nm : nn) == 1) parallel_pack = false; + + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, + this->m_i_strides, this->m_left_contracting_strides, + this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, + this->m_j_strides, this->m_right_contracting_strides, + this->m_k_strides); + + Context(this->m_device, num_threads, lhs, rhs, buffer, m, n, + k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, nn0, + shard_by_col, parallel_pack) + .run(); + } + + // Context coordinates a single parallel gemm operation. + template + class Context { + public: + Context(const Device& device, int num_threads, LhsMapper& lhs, + RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm, + Index bn, Index bk, Index nm, Index nn, Index nk, Index gm, + Index gn, Index nm0, Index nn0, bool shard_by_col, + bool parallel_pack) + : device_(device), + lhs_(lhs), + rhs_(rhs), + buffer_(buffer), + output_(buffer, tm), + num_threads_(num_threads), + shard_by_col_(shard_by_col), + parallel_pack_(parallel_pack), + m_(tm), + n_(tn), + k_(tk), + bm_(bm), + bn_(bn), + bk_(bk), + nm_(nm), + nn_(nn), + nk_(nk), + gm_(gm), + gn_(gn), + nm0_(nm0), + nn0_(nn0) + { + for (Index x = 0; x < P; x++) { + // Normal number of notifications for k slice switch is + // nm_ + nn_ + nm_ * nn_. However, first P - 1 slices will receive only + // nm_ + nn_ notifications, because they will not receive notifications + // from preceeding kernels. + state_switch_[x] = + x == 0 + ? 1 + : (parallel_pack_ ? nn_ + nm_ : (shard_by_col_ ? nn_ : nm_)) + + (x == P - 1 ? nm_ * nn_ : 0); + state_packing_ready_[x] = + parallel_pack_ ? 0 : (shard_by_col_ ? nm_ : nn_); + state_kernel_[x] = new std::atomic*[nm_]; + for (Index m = 0; m < nm_; m++) { + state_kernel_[x][m] = new std::atomic[nn_]; + // Kernels generally receive 3 notifications (previous kernel + 2 + // packing), but the first slice won't get notifications from previous + // kernels. + for (Index n = 0; n < nn_; n++) + state_kernel_[x][m][n].store( + (x == 0 ? 0 : 1) + (parallel_pack_ ? 2 : 1), + std::memory_order_relaxed); + } + } + + // Allocate memory for packed rhs/lhs matrices. + size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1); + size_t lhs_size = + divup(bm_ * bk_ * sizeof(LhsScalar), align) * align; + size_t rhs_size = + divup(bn_ * bk_ * sizeof(RhsScalar), align) * align; + packed_mem_ = static_cast(internal::aligned_malloc( + (nm0_ * lhs_size + nn0_ * rhs_size) * std::min(nk_, P - 1))); + char* mem = static_cast(packed_mem_); + for (Index x = 0; x < numext::mini(nk_, P - 1); x++) { + packed_lhs_[x].resize(nm0_); + for (Index m = 0; m < nm0_; m++) { + packed_lhs_[x][m] = reinterpret_cast(mem); + mem += lhs_size; + } + packed_rhs_[x].resize(nn0_); + for (Index n = 0; n < nn0_; n++) { + packed_rhs_[x][n] = reinterpret_cast(mem); + mem += rhs_size; + } + } + } + + ~Context() { + for (Index x = 0; x < P; x++) { + for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m]; + delete[] state_kernel_[x]; + } + internal::aligned_free(packed_mem_); + } + + void run() { + // Kick off packing of the first slice. + signal_switch(0, 1); + // Wait for overall completion. + // TODO(dvyukov): this wait can lead to deadlock. + // If nthreads contractions are concurrently submitted from worker + // threads, this wait will block all worker threads and the system will + // deadlock. + done_.Wait(); + } + + private: + Notification done_; + const Device& device_; + LhsMapper& lhs_; + RhsMapper& rhs_; + Scalar* const buffer_; + OutputMapper output_; + const int num_threads_; + const bool shard_by_col_; + const bool parallel_pack_; + // Matrix sizes. + const Index m_; + const Index n_; + const Index k_; + // Block sizes. + const Index bm_; + const Index bn_; + const Index bk_; + // Number of tasks. + const Index nm_; + const Index nn_; + const Index nk_; + // Task grain sizes (number of kernels executed per task). + const Index gm_; + const Index gn_; + // Number of blocks (this is different from ni_/nn_ because of task size + // coarsening). + const Index nm0_; + const Index nn0_; + + // Parallelization strategy. + // + // Blocks related to the same k block can run in parallel because they write + // to different output blocks. So we parallelize within k slices, this + // gives us parallelism level of m x n. Before we can start any kernels + // related to k-th slice, we need to issue m lhs packing tasks and n rhs + // packing tasks. + // + // However, there is a bottleneck when we are finishing kernels for k-th + // slice (at the very end there is only 1 runnable kernel). To mitigate this + // bottleneck we allow kernels from k-th and k+1-th slices to run in + // parallel. Note that (m, n, k) and (m, n, k+1) kernels write to the same + // output block, so they must not run in parallel. + // + // This gives us the following dependency graph. + // On each k slice we have m x n kernel tasks, m lhs paking tasks and n rhs + // packing tasks. + // Kernel (m, n, k) can start when: + // - kernel (m, n, k-1) has finished + // - lhs packing (m, k) has finished + // - rhs packing (n, k) has finished + // Lhs/rhs packing can start when: + // - all k-1 packing has finished (artificially imposed to limit amount of + // parallel packing) + // + // On top of that we limit runnable tasks to two consecutive k slices. + // This is done to limit amount of memory we need for packed lhs/rhs + // (for each k slice we need m*bk + n*bk memory in packed_lhs_/packed_rhs_). + // + // state_switch_ tracks when we are ready to switch to the next k slice. + // state_kernel_[m][n] tracks when we are ready to kick off kernel (m, n). + // These variable are rolling over 3 consecutive k slices: first two we are + // actively executing + one to track completion of kernels in the second + // slice. + static const Index P = 3; + void* packed_mem_; + std::vector packed_lhs_[P - 1]; + std::vector packed_rhs_[P - 1]; + std::atomic** state_kernel_[P]; + // state_switch_ is frequently modified by worker threads, while other + // fields are read-only after constructor. Let's move it to a separate cache + // line to reduce cache-coherency traffic. + char pad_[128]; + std::atomic state_packing_ready_[P]; + std::atomic state_switch_[P]; + + void pack_lhs(Index m, Index k) { + const Index mend = m * gm_ + gm(m); + for (Index m1 = m * gm_; m1 < mend; m1++) + LhsPacker()(packed_lhs_[k % (P - 1)][m1], + lhs_.getSubMapper(m1 * bm_, k * bk_), bk(k), bm(m1)); + + if (!parallel_pack_ && shard_by_col_) { + signal_packing(k); + } else { + signal_switch(k + 1); + for (Index n = nn_ - 1; n >= 0; n--) signal_kernel(m, n, k, n == 0); + } + } + + void pack_rhs(Index n, Index k) { + const Index nend = n * gn_ + gn(n); + for (Index n1 = n * gn_; n1 < nend; n1++) { + if (k == 0) { + // Zero the output memory in parallel. + // On 10000x2x10000 mm zeroing can easily take half of time. + // Zero (bn x m) row. Safe to do here because all kernels that will + // write to this memory depend on completion of this task. + // Note: don't call device_.memset() here. device_.memset() blocks on + // thread pool worker thread, which can lead to underutilization and + // deadlocks. + memset(buffer_ + n1 * bn_ * m_, 0, bn(n1) * m_ * sizeof(Scalar)); + } + RhsPacker()(packed_rhs_[k % (P - 1)][n1], + rhs_.getSubMapper(k * bk_, n1 * bn_), bk(k), bn(n1)); + } + + if (parallel_pack_ || shard_by_col_) { + signal_switch(k + 1); + for (Index m = nm_ - 1; m >= 0; m--) signal_kernel(m, n, k, m == 0); + } else { + signal_packing(k); + } + } + + void kernel(Index m, Index n, Index k) { + // Note: order of iteration matters here. Iteration over m is innermost + // because we want to reuse the same packed rhs in consequetive tasks + // (rhs fits into L2$ while lhs only into L3$). + const Index nend = n * gn_ + gn(n); + const Index mend = m * gm_ + gm(m); + if (shard_by_col_) { + for (Index n1 = n * gn_; n1 < nend; n1++) { + for (Index m1 = m * gm_; m1 < mend; m1++) + GebpKernel()(output_.getSubMapper(m1 * bm_, n1 * bn_), + packed_lhs_[k % (P - 1)][m1], + packed_rhs_[k % (P - 1)][n1], bm(m1), bk(k), bn(n1), + Scalar(1), -1, -1, 0, 0); + } + } else { + for (Index m1 = m * gm_; m1 < mend; m1++) + for (Index n1 = n * gn_; n1 < nend; n1++) { + GebpKernel()(output_.getSubMapper(m1 * bm_, n1 * bn_), + packed_lhs_[k % (P - 1)][m1], + packed_rhs_[k % (P - 1)][n1], bm(m1), bk(k), bn(n1), + Scalar(1), -1, -1, 0, 0); + } + } + signal_kernel(m, n, k + 1, false); + signal_switch(k + 2); + } + + void signal_packing(Index k) { + eigen_assert(!parallel_pack_); + Index s = state_packing_ready_[k % P].fetch_sub(1); + eigen_assert(s > 0); + if (s != 1) return; + state_packing_ready_[k % P] = shard_by_col_ ? nm_ : nn_; + enqueue_packing(k, shard_by_col_); + } + + void signal_kernel(Index m, Index n, Index k, bool sync) { + std::atomic* state = &state_kernel_[k % P][m][n]; + Index s = state->load(); + eigen_assert(s > 0); + if (s != 1 && state->fetch_sub(1) != 1) return; + state->store(parallel_pack_ ? 3 : 2, std::memory_order_relaxed); + if (sync) + kernel(m, n, k); + else + device_.enqueueNoNotification([=]() { kernel(m, n, k); }); + } + + void signal_switch(Index k, Index v = 1) { + Index s = state_switch_[k % P].fetch_sub(v); + eigen_assert(s >= v); + if (s != v) return; + + // Ready to switch to the next k slice. + // Reset counter for the next iteration. + state_switch_[k % P] = + (parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)) + + nm_ * nn_; + if (k < nk_) { + // Issue lhs/rhs packing. Their completion will in turn kick off + // kernels. + if (parallel_pack_) { + enqueue_packing(k, !shard_by_col_); + enqueue_packing(k, shard_by_col_); + } else if (shard_by_col_) { + enqueue_packing(k, false); + } else { + enqueue_packing(k, true); + } + + // Termination handling. + // Because kernel completion signals k + 2 switch, we need to finish nk + // + 2 slices without issuing any tasks on nk + 1 slice. So here we + // pretend that all nk + 1 packing tasks just finish instantly; so that + // nk + 2 switch only waits for completion of nk kernels. + } else if (k == nk_) { + signal_switch(k + 1, + parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)); + } else { + done_.Notify(); + } + } + + // Enqueue all rhs/lhs packing for k-th slice. + void enqueue_packing(Index k, bool rhs) { + enqueue_packing_helper(0, rhs ? nn_ : nm_, k, rhs); + } + + void enqueue_packing_helper(Index start, Index end, Index k, bool rhs) { + if (end - start == 1) { + if (rhs) + pack_rhs(start, k); + else + pack_lhs(start, k); + } else { + Index mid = (start + end) / 2; + device_.enqueueNoNotification( + [=]() { enqueue_packing_helper(mid, end, k, rhs); }); + device_.enqueueNoNotification( + [=]() { enqueue_packing_helper(start, mid, k, rhs); }); + } + } + + // Block sizes with accounting for potentially incomplete last block. + Index bm(Index m) const { return m + 1 < nm0_ ? bm_ : m_ + bm_ - bm_ * nm0_; } + Index bn(Index n) const { return n + 1 < nn0_ ? bn_ : n_ + bn_ - bn_ * nn0_; } + Index bk(Index k) const { return k + 1 < nk_ ? bk_ : k_ + bk_ - bk_ * nk_; } + // Task grain sizes accounting for potentially incomplete last task. + Index gm(Index m) const { return m + 1 < nm_ ? gm_ : nm0_ + gm_ - gm_ * nm_; } + Index gn(Index n) const { return n + 1 < nn_ ? gn_ : nn0_ + gn_ - gn_ * nn_; } + + Context(const Context&) = delete; + void operator=(const Context&) = delete; + }; + + // Decide whether we want to shard m x n contraction by columns or by rows. + static bool shardByCol(Index m, Index n, Index num_threads) { + // Note: we are comparing both n and m against Traits::nr, it is not + // a mistake. We are trying to figure out how both n and m will fit into + // the main sharding dimension. + + // Sharding by column is the default + // ... unless there is enough data for vectorization over rows + if (m / num_threads >= Traits::nr && + // and not enough data for vectorization over columns + (n / num_threads < Traits::nr || + // ... or barely enough data for vectorization over columns, + // but it is not evenly dividable across threads + (n / num_threads < 4 * Traits::nr && + (n % (num_threads * Traits::nr)) != 0 && + // ... and it is evenly dividable across threads for rows + ((m % (num_threads * Traits::nr)) == 0 || + // .. or it is not evenly dividable for both dimensions but + // there is much more data over rows so that corner effects are + // mitigated. + (m / n >= 6))))) + return false; + // Wait, or if matrices are just substantially prolonged over the other + // dimension. + if (n / num_threads < 16 * Traits::nr && m > n * 32) return false; + return true; + } + + Index coarsenM(Index m, Index n, Index bm, Index bn, Index bk, Index gn, + int num_threads, bool shard_by_col) const { + Index gm = 1; + Index gm1 = 1; + Index nm0 = divup(m, bm); + Index nm1 = nm0; + for (;;) { + // Find the next candidate for m grain size. It needs to result in + // different number of blocks. E.g. if we have 10 kernels, we want to try + // 5 and 10, but not 6, 7, 8 and 9. + while (gm1 <= nm0 && nm1 == divup(nm0, gm1)) gm1++; + if (gm1 > nm0) break; + // Check the candidate. + int res = checkGrain(m, n, bm, bn, bk, gm1, gn, gm, gn, num_threads, + shard_by_col); + if (res < 0) break; + nm1 = divup(nm0, gm1); + if (res == 0) continue; + // Commit new grain size. + gm = gm1; + } + return gm; + } + + Index coarsenN(Index m, Index n, Index bm, Index bn, Index bk, Index gm, + int num_threads, bool shard_by_col) const { + Index gn = 1; + Index gn1 = 1; + Index nn0 = divup(n, bn); + Index nn1 = nn0; + for (;;) { + while (gn1 <= nn0 && nn1 == divup(nn0, gn1)) gn1++; + if (gn1 > nn0) break; + int res = checkGrain(m, n, bm, bn, bk, gm, gn1, gm, gn, num_threads, + shard_by_col); + if (res < 0) break; + nn1 = divup(nn0, gn1); + if (res == 0) continue; + gn = gn1; + } + return gn; + } + + // checkGrain checks whether grain (gm, gn) is suitable and is better than + // (oldgm, oldgn). + int checkGrain(Index m, Index n, Index bm, Index bn, Index bk, Index gm, + Index gn, Index oldgm, Index oldgn, int num_threads, + bool shard_by_col) const { + const TensorOpCost cost = + contractionCost(bm * gm, bn * gn, bm, bn, bk, shard_by_col, true); + double taskSize = TensorCostModel::taskSize( + static_cast(bm) * gm * bn * gn, cost); + // If the task is too small, then we agree on it regardless of anything + // else. Otherwise synchronization overheads will dominate. + if (taskSize < 1) return 1; + // If it is too large, then we reject it and all larger tasks. + if (taskSize > 2) return -1; + // Now we are in presumably good task size range. + // The main deciding factor here is parallelism. Consider that we have 12 + // kernels and 4 threads. Grains of 2, 3 and 4 all yield good task sizes. + // But 2/4 yield 6/3 tasks, which gives us parallelism of 0.75 (at most 3/4 + // of cores will be busy). While grain size 3 gives us 4 tasks, which gives + // us parallelism of 1 (we can load all cores). + Index nm0 = divup(m, bm); + Index nn0 = divup(n, bn); + Index new_tasks = divup(nm0, gm) * divup(nn0, gn); + double new_parallelism = static_cast(new_tasks) / + (divup(new_tasks, num_threads) * num_threads); + Index old_tasks = divup(nm0, oldgm) * divup(nn0, oldgn); + double old_parallelism = static_cast(old_tasks) / + (divup(old_tasks, num_threads) * num_threads); + if (new_parallelism > old_parallelism || new_parallelism == 1) return 1; + return 0; + } + +#else // EIGEN_USE_SIMPLE_THREAD_POOL + template void evalProduct(Scalar* buffer) const { if (this->m_j_size == 1) { @@ -376,7 +995,7 @@ struct TensorEvaluator(PacketType::size, + PacketType::size); + const int output_packet_size = internal::unpacket_traits::size; + const double kd = static_cast(bk); + // Peak VFMA bandwidth is 0.5. However if we have not enough data for + // vectorization bandwidth drops. The 4.0 and 2.0 bandwidth is determined + // experimentally. + double computeBandwidth = bk == 1 ? 4.0 : + (shard_by_col ? bn : bm) < Traits::nr || + (shard_by_col ? bm : bn) < Traits::mr ? 2.0 : 0.5; +#ifndef EIGEN_VECTORIZE_FMA + // Bandwidth of all of VFMA/MULPS/ADDPS is 0.5 on latest Intel processors. + // However for MULPS/ADDPS we have dependent sequence of 2 such instructions, + // so overall bandwidth is 1.0. + if (computeBandwidth == 0.5) computeBandwidth = 1.0; +#endif + // Computations. + TensorOpCost cost = TensorOpCost(0, 0, kd * computeBandwidth, true, packed_size); + // Output stores. + cost += TensorOpCost(0, sizeof(CoeffReturnType), 0, true, output_packet_size); + if (prepacked) { + // Packing and kernels are executed in different tasks. When we calculate + // task grain size we look only at kernel cost assuming that kernel + // is more expensive than packing. + return cost; + } + // Lhs/rhs loads + computations. + TensorOpCost lhsCost = this->m_leftImpl.costPerCoeff(true) * (kd / n); + TensorOpCost rhsCost = this->m_rightImpl.costPerCoeff(true) * (kd / m); + // Lhs packing memory cost does not contribute considerably to overall + // execution time because lhs is prefetched early and accessed sequentially. + if (shard_by_col) + lhsCost.dropMemoryCost(); + else + rhsCost.dropMemoryCost(); + return cost + lhsCost + rhsCost; + } }; } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index a2f1f71f5..860a6949a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -164,14 +164,14 @@ class TensorConversionOp : public TensorBase struct ConversionSubExprEval { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool run(Eval& impl, Scalar*) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) { impl.evalSubExprsIfNeeded(NULL); return true; } }; template struct ConversionSubExprEval { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool run(Eval& impl, Scalar* data) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) { return impl.evalSubExprsIfNeeded(data); } }; @@ -193,7 +193,7 @@ struct TensorEvaluator, Device> enum { IsAligned = false, - PacketAccess = TensorEvaluator::PacketAccess && internal::type_casting_traits::VectorizedCast, + PacketAccess = true, Layout = TensorEvaluator::Layout, RawAccess = false }; @@ -224,11 +224,9 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - const int SrcCoeffRatio = internal::type_casting_traits::SrcCoeffRatio; - const int TgtCoeffRatio = internal::type_casting_traits::TgtCoeffRatio; - PacketConverter, PacketSourceType, PacketReturnType, - SrcCoeffRatio, TgtCoeffRatio> converter(m_impl); - return converter.template packet(index); + const bool Vectorizable = TensorEvaluator::PacketAccess & + internal::type_casting_traits::VectorizedCast; + return PacketConv::run(m_impl, index); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost @@ -249,7 +247,31 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } protected: - TensorEvaluator m_impl; + template + struct PacketConv { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator& impl, Index index) { + internal::scalar_cast_op converter; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = converter(impl.coeff(index+i)); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + }; + + template + struct PacketConv { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator& impl, Index index) { + const int SrcCoeffRatio = internal::type_casting_traits::SrcCoeffRatio; + const int TgtCoeffRatio = internal::type_casting_traits::TgtCoeffRatio; + PacketConverter, PacketSourceType, PacketReturnType, + SrcCoeffRatio, TgtCoeffRatio> converter(impl); + return converter.template packet(index); + } + }; + + TensorEvaluator m_impl; }; } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 091007ab7..abdf742c6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -254,7 +254,7 @@ struct nested, 1, t template -class TensorConvolutionOp : public TensorBase > +class TensorConvolutionOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h index 0f6dcedaa..83c449cf1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h @@ -10,10 +10,6 @@ #ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H #define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H -//#if !defined(EIGEN_USE_GPU) -//#define EIGEN_USE_COST_MODEL -//#endif - namespace Eigen { /** \class TensorEvaluator @@ -32,45 +28,47 @@ class TensorOpCost { // model based on minimal reciprocal throughput numbers from Intel or // Agner Fog's tables would be better than what is there now. template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int MulCost() { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() { return internal::functor_traits< internal::scalar_product_op >::Cost; } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int AddCost() { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost() { return internal::functor_traits >::Cost; } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int DivCost() { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost() { return internal::functor_traits< internal::scalar_quotient_op >::Cost; } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int ModCost() { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost() { return internal::functor_traits >::Cost; } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int CastCost() { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost() { return internal::functor_traits< internal::scalar_cast_op >::Cost; } + EIGEN_DEVICE_FUNC TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {} + EIGEN_DEVICE_FUNC TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles) : bytes_loaded_(bytes_loaded), bytes_stored_(bytes_stored), compute_cycles_(compute_cycles) {} + EIGEN_DEVICE_FUNC TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles, bool vectorized, double packet_size) : bytes_loaded_(bytes_loaded), bytes_stored_(bytes_stored), compute_cycles_(vectorized ? compute_cycles / packet_size : compute_cycles) { - using std::isfinite; - eigen_assert(bytes_loaded >= 0 && (isfinite)(bytes_loaded)); - eigen_assert(bytes_stored >= 0 && (isfinite)(bytes_stored)); - eigen_assert(compute_cycles >= 0 && (isfinite)(compute_cycles)); + eigen_assert(bytes_loaded >= 0 && (numext::isfinite)(bytes_loaded)); + eigen_assert(bytes_stored >= 0 && (numext::isfinite)(bytes_stored)); + eigen_assert(compute_cycles >= 0 && (numext::isfinite)(compute_cycles)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_loaded() const { @@ -96,21 +94,21 @@ class TensorOpCost { } // TODO(rmlarsen): Define min in terms of total cost, not elementwise. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& cwiseMin( - const TensorOpCost& rhs) { - bytes_loaded_ = numext::mini(bytes_loaded_, rhs.bytes_loaded()); - bytes_stored_ = numext::mini(bytes_stored_, rhs.bytes_stored()); - compute_cycles_ = numext::mini(compute_cycles_, rhs.compute_cycles()); - return *this; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin( + const TensorOpCost& rhs) const { + double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded()); + double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored()); + double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles()); + return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); } // TODO(rmlarsen): Define max in terms of total cost, not elementwise. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& cwiseMax( - const TensorOpCost& rhs) { - bytes_loaded_ = numext::maxi(bytes_loaded_, rhs.bytes_loaded()); - bytes_stored_ = numext::maxi(bytes_stored_, rhs.bytes_stored()); - compute_cycles_ = numext::maxi(compute_cycles_, rhs.compute_cycles()); - return *this; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax( + const TensorOpCost& rhs) const { + double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded()); + double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored()); + double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles()); + return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator+=( diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 1d2d162dc..4f5767bc7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -12,6 +12,8 @@ namespace Eigen { +static const int kCudaScratchSize = 1024; + // This defines an interface that GPUDevice can take to use // CUDA streams underneath. class StreamInterface { @@ -24,6 +26,15 @@ class StreamInterface { // Allocate memory on the actual device where the computation will run virtual void* allocate(size_t num_bytes) const = 0; virtual void deallocate(void* buffer) const = 0; + + // Return a scratchpad buffer of size 1k + virtual void* scratchpad() const = 0; + + // Return a semaphore. The semaphore is initially initialized to 0, and + // each kernel using it is responsible for resetting to 0 upon completion + // to maintain the invariant that the semaphore is always equal to 0 upon + // each kernel start. + virtual unsigned int* semaphore() const = 0; }; static cudaDeviceProp* m_deviceProperties; @@ -31,7 +42,21 @@ static bool m_devicePropInitialized = false; static void initializeDeviceProp() { if (!m_devicePropInitialized) { - if (!m_devicePropInitialized) { + // Attempts to ensure proper behavior in the case of multiple threads + // calling this function simultaneously. This would be trivial to + // implement if we could use std::mutex, but unfortunately mutex don't + // compile with nvcc, so we resort to atomics and thread fences instead. + // Note that if the caller uses a compiler that doesn't support c++11 we + // can't ensure that the initialization is thread safe. +#if __cplusplus >= 201103L + static std::atomic first(true); + if (first.exchange(false)) { +#else + static bool first = true; + if (first) { + first = false; +#endif + // We're the first thread to reach this point. int num_devices; cudaError_t status = cudaGetDeviceCount(&num_devices); if (status != cudaSuccess) { @@ -52,7 +77,19 @@ static void initializeDeviceProp() { assert(status == cudaSuccess); } } + +#if __cplusplus >= 201103L + std::atomic_thread_fence(std::memory_order_release); +#endif m_devicePropInitialized = true; + } else { + // Wait for the other thread to inititialize the properties. + while (!m_devicePropInitialized) { +#if __cplusplus >= 201103L + std::atomic_thread_fence(std::memory_order_acquire); +#endif + sleep(1); + } } } } @@ -62,12 +99,12 @@ static const cudaStream_t default_stream = cudaStreamDefault; class CudaStreamDevice : public StreamInterface { public: // Use the default stream on the current device - CudaStreamDevice() : stream_(&default_stream) { + CudaStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) { cudaGetDevice(&device_); initializeDeviceProp(); } // Use the default stream on the specified device - CudaStreamDevice(int device) : stream_(&default_stream), device_(device) { + CudaStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) { initializeDeviceProp(); } // Use the specified stream. Note that it's the @@ -75,7 +112,7 @@ class CudaStreamDevice : public StreamInterface { // the specified device. If no device is specified the code // assumes that the stream is associated to the current gpu device. CudaStreamDevice(const cudaStream_t* stream, int device = -1) - : stream_(stream), device_(device) { + : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) { if (device < 0) { cudaGetDevice(&device_); } else { @@ -89,6 +126,12 @@ class CudaStreamDevice : public StreamInterface { initializeDeviceProp(); } + virtual ~CudaStreamDevice() { + if (scratch_) { + deallocate(scratch_); + } + } + const cudaStream_t& stream() const { return *stream_; } const cudaDeviceProp& deviceProperties() const { return m_deviceProperties[device_]; @@ -112,9 +155,29 @@ class CudaStreamDevice : public StreamInterface { assert(err == cudaSuccess); } + virtual void* scratchpad() const { + if (scratch_ == NULL) { + scratch_ = allocate(kCudaScratchSize + sizeof(unsigned int)); + } + return scratch_; + } + + virtual unsigned int* semaphore() const { + if (semaphore_ == NULL) { + char* scratch = static_cast(scratchpad()) + kCudaScratchSize; + semaphore_ = reinterpret_cast(scratch); + cudaError_t err = cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + } + return semaphore_; + } + private: const cudaStream_t* stream_; int device_; + mutable void* scratch_; + mutable unsigned int* semaphore_; }; struct GpuDevice { @@ -131,22 +194,20 @@ struct GpuDevice { return stream_->stream(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { return stream_->allocate(num_bytes); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return NULL; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { stream_->deallocate(buffer); + } -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif + EIGEN_STRONG_INLINE void* scratchpad() const { + return stream_->scratchpad(); + } + + EIGEN_STRONG_INLINE unsigned int* semaphore() const { + return stream_->semaphore(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { @@ -156,30 +217,22 @@ struct GpuDevice { EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else - eigen_assert(false && "The default device should be used instead to generate kernel code"); + eigen_assert(false && "The default device should be used instead to generate kernel code"); #endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { @@ -188,21 +241,21 @@ struct GpuDevice { EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else - eigen_assert(false && "The default device should be used instead to generate kernel code"); + eigen_assert(false && "The default device should be used instead to generate kernel code"); #endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { + EIGEN_STRONG_INLINE size_t numThreads() const { // FIXME return 32; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { // FIXME return 48*1024; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { // We won't try to take advantage of the l2 cache for the time being, and // there is no l3 cache on cuda devices. return firstLevelCacheSize(); @@ -222,56 +275,26 @@ struct GpuDevice { #endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { return stream_->deviceProperties().multiProcessorCount; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { return stream_->deviceProperties().maxThreadsPerBlock; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { return stream_->deviceProperties().maxThreadsPerMultiProcessor; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int sharedMemPerBlock() const { return stream_->deviceProperties().sharedMemPerBlock; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int majorDeviceVersion() const { return stream_->deviceProperties().major; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int minorDeviceVersion() const { -#ifndef __CUDA_ARCH__ + EIGEN_STRONG_INLINE int minorDeviceVersion() const { return stream_->deviceProperties().minor; -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return 0; -#endif } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const { + EIGEN_STRONG_INLINE int maxBlocks() const { return max_blocks_; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index c02891465..069680a11 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -14,7 +14,7 @@ namespace Eigen { // Use the SimpleThreadPool by default. We'll switch to the new non blocking // thread pool later. -#ifdef EIGEN_USE_NONBLOCKING_THREAD_POOL +#ifndef EIGEN_USE_SIMPLE_THREAD_POOL template using ThreadPoolTempl = NonBlockingThreadPoolTempl; typedef NonBlockingThreadPool ThreadPool; #else @@ -106,7 +106,7 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { // Build a thread pool device on top the an existing pool of threads. struct ThreadPoolDevice { // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { } EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { return internal::aligned_malloc(num_bytes); @@ -130,7 +130,7 @@ struct ThreadPoolDevice { ::memset(buffer, c, n); } - EIGEN_STRONG_INLINE size_t numThreads() const { + EIGEN_STRONG_INLINE int numThreads() const { return num_threads_; } @@ -151,9 +151,7 @@ struct ThreadPoolDevice { template EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { Notification* n = new Notification(); - std::function func = - std::bind(&FunctionWrapperWithNotification::run, n, f, args...); - pool_->Schedule(func); + pool_->Schedule(std::bind(&FunctionWrapperWithNotification::run, n, f, args...)); return n; } @@ -161,20 +159,118 @@ struct ThreadPoolDevice { EIGEN_STRONG_INLINE void enqueue_with_barrier(Barrier* b, Function&& f, Args&&... args) const { - std::function func = std::bind( - &FunctionWrapperWithBarrier::run, b, f, args...); - pool_->Schedule(func); + pool_->Schedule(std::bind( + &FunctionWrapperWithBarrier::run, b, f, args...)); } template EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { - std::function func = std::bind(f, args...); - pool_->Schedule(func); + pool_->Schedule(std::bind(f, args...)); + } + + // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if + // called from one of the threads in pool_. Returns -1 otherwise. + EIGEN_STRONG_INLINE int currentThreadId() const { + return pool_->CurrentThreadId(); + } + + // parallelFor executes f with [0, n) arguments in parallel and waits for + // completion. F accepts a half-open interval [first, last). + // Block size is choosen based on the iteration cost and resulting parallel + // efficiency. If block_align is not nullptr, it is called to round up the + // block size. + void parallelFor(Index n, const TensorOpCost& cost, + std::function block_align, + std::function f) const { + typedef TensorCostModel CostModel; + if (n <= 1 || numThreads() == 1 || + CostModel::numThreads(n, cost, static_cast(numThreads())) == 1) { + f(0, n); + return; + } + + // Calculate block size based on (1) the iteration cost and (2) parallel + // efficiency. We want blocks to be not too small to mitigate + // parallelization overheads; not too large to mitigate tail + // effect and potential load imbalance and we also want number + // of blocks to be evenly dividable across threads. + + double block_size_f = 1.0 / CostModel::taskSize(1, cost); + Index block_size = numext::mini(n, numext::maxi(1, block_size_f)); + const Index max_block_size = + numext::mini(n, numext::maxi(1, 2 * block_size_f)); + if (block_align) { + Index new_block_size = block_align(block_size); + eigen_assert(new_block_size >= block_size); + block_size = numext::mini(n, new_block_size); + } + Index block_count = divup(n, block_size); + // Calculate parallel efficiency as fraction of total CPU time used for + // computations: + double max_efficiency = + static_cast(block_count) / + (divup(block_count, numThreads()) * numThreads()); + // Now try to increase block size up to max_block_size as long as it + // doesn't decrease parallel efficiency. + for (Index prev_block_count = block_count; prev_block_count > 1;) { + // This is the next block size that divides size into a smaller number + // of blocks than the current block_size. + Index coarser_block_size = divup(n, prev_block_count - 1); + if (block_align) { + Index new_block_size = block_align(coarser_block_size); + eigen_assert(new_block_size >= coarser_block_size); + coarser_block_size = numext::mini(n, new_block_size); + } + if (coarser_block_size > max_block_size) { + break; // Reached max block size. Stop. + } + // Recalculate parallel efficiency. + const Index coarser_block_count = divup(n, coarser_block_size); + eigen_assert(coarser_block_count < prev_block_count); + prev_block_count = coarser_block_count; + const double coarser_efficiency = + static_cast(coarser_block_count) / + (divup(coarser_block_count, numThreads()) * numThreads()); + if (coarser_efficiency + 0.01 >= max_efficiency) { + // Taking it. + block_size = coarser_block_size; + block_count = coarser_block_count; + if (max_efficiency < coarser_efficiency) { + max_efficiency = coarser_efficiency; + } + } + } + + // Recursively divide size into halves until we reach block_size. + // Division code rounds mid to block_size, so we are guaranteed to get + // block_count leaves that do actual computations. + Barrier barrier(static_cast(block_count)); + std::function handleRange; + handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) { + if (last - first <= block_size) { + // Single block or less, execute directly. + f(first, last); + barrier.Notify(); + return; + } + // Split into halves and submit to the pool. + Index mid = first + divup((last - first) / 2, block_size) * block_size; + pool_->Schedule([=, &handleRange]() { handleRange(mid, last); }); + pool_->Schedule([=, &handleRange]() { handleRange(first, mid); }); + }; + handleRange(0, n); + barrier.Wait(); + } + + // Convenience wrapper for parallelFor that does not align blocks. + void parallelFor(Index n, const TensorOpCost& cost, + std::function f) const { + parallelFor(n, cost, nullptr, std::move(f)); } private: ThreadPoolInterface* pool_; - size_t num_threads_; + int num_threads_; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h index ca9ac79df..1a30e45fb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -44,7 +44,7 @@ template const Index array_get(c } -#if defined(EIGEN_HAS_CONSTEXPR) +#if EIGEN_HAS_CONSTEXPR template struct index_known_statically_impl > { EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index f0b8ac958..b24cdebf1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -29,14 +29,6 @@ namespace Eigen { * \sa Tensor */ -// Can't use std::pair on cuda devices -template struct IndexPair { - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) { } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Index f, Index s) : first(f), second(s) { } - Index first; - Index second; -}; - // Boilerplate code namespace internal { @@ -115,7 +107,7 @@ struct Sizes : internal::numeric_list { explicit EIGEN_DEVICE_FUNC Sizes(const array& /*indices*/) { // todo: add assertion } -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { } explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list /*l*/) { // todo: add assertion @@ -182,7 +174,7 @@ template Sizes(DenseIndex... /*indices*/) { } explicit Sizes(std::initializer_list) { // todo: add assertion @@ -190,13 +182,13 @@ template { (*this)[0] = i0; } -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension, IndexTypes... otherDimensions) : Base({{firstDimension, secondDimension, otherDimensions...}}) { EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) } #else - EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1) { + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1) { eigen_assert(NumDims == 2); (*this)[0] = i0; (*this)[1] = i1; } - EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { eigen_assert(NumDims == 3); (*this)[0] = i0; (*this)[1] = i1; (*this)[2] = i2; } - EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { eigen_assert(NumDims == 4); (*this)[0] = i0; (*this)[1] = i1; (*this)[2] = i2; (*this)[3] = i3; } - EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { eigen_assert(NumDims == 5); (*this)[0] = i0; (*this)[1] = i1; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index c556fec0f..a08dfa7c3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -56,7 +56,7 @@ struct nested, 1, typename eval template -class TensorEvalToOp : public TensorBase > +class TensorEvalToOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; @@ -94,7 +94,7 @@ struct TensorEvaluator, Device> static const int PacketSize = internal::unpacket_traits::size; enum { - IsAligned = true, + IsAligned = TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index ae4ce3c90..61c111cec 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -129,6 +129,10 @@ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double loadConstant(const double* address) { return __ldg(address); } +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +Eigen::half loadConstant(const Eigen::half* address) { + return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); +} #endif } @@ -222,7 +226,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device) + : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() { } typedef typename XprType::Index Index; @@ -239,13 +243,13 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { - return m_functor(index); + return m_wrapper(m_functor, index); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return m_functor.template packetOp(index); + return m_wrapper.template packetOp(m_functor, index); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost @@ -259,6 +263,7 @@ struct TensorEvaluator, Device> private: const NullaryOp m_functor; TensorEvaluator m_argImpl; + const internal::nullary_wrapper m_wrapper; }; @@ -399,6 +404,101 @@ struct TensorEvaluator m_rightImpl; }; +// -------------------- CwiseTernaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseTernaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), + m_arg1Impl(op.arg1Expression(), device), + m_arg2Impl(op.arg2Expression(), device), + m_arg3Impl(op.arg3Expression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + + eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use arg2 or arg3 dimensions if they are known at compile time. + return m_arg1Impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_arg1Impl.evalSubExprsIfNeeded(NULL); + m_arg2Impl.evalSubExprsIfNeeded(NULL); + m_arg3Impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_arg1Impl.cleanup(); + m_arg2Impl.cleanup(); + m_arg3Impl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_arg1Impl.costPerCoeff(vectorized) + + m_arg2Impl.costPerCoeff(vectorized) + + m_arg3Impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + + private: + const TernaryOp m_functor; + TensorEvaluator m_arg1Impl; + TensorEvaluator m_arg2Impl; + TensorEvaluator m_arg3Impl; +}; + // -------------------- SelectOp -------------------- @@ -475,7 +575,7 @@ struct TensorEvaluator .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); } - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; } private: TensorEvaluator m_condImpl; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 5c3d4d630..0cac7b179 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -59,13 +59,14 @@ class TensorExecutor { const Index size = array_prod(evaluator.dimensions()); const int PacketSize = unpacket_traits::PacketReturnType>::size; - // Manually unroll this loop since compilers don't do it. + // Give the compiler a strong hint to unroll the loop. But don't insist + // on unrolling, because if the function is expensive the compiler should not + // unroll the loop at the expense of inlining. const Index UnrolledSize = (size / (4 * PacketSize)) * 4 * PacketSize; for (Index i = 0; i < UnrolledSize; i += 4*PacketSize) { - evaluator.evalPacket(i); - evaluator.evalPacket(i+PacketSize); - evaluator.evalPacket(i+2*PacketSize); - evaluator.evalPacket(i+3*PacketSize); + for (Index j = 0; j < 4; j++) { + evaluator.evalPacket(i + j * PacketSize); + } } const Index VectorizedSize = (size / PacketSize) * PacketSize; for (Index i = UnrolledSize; i < VectorizedSize; i += PacketSize) { @@ -92,24 +93,30 @@ struct EvalRange { evaluator.evalScalar(i); } } + + static Index alignBlockSize(Index size) { + return size; + } }; template struct EvalRange { + static const int PacketSize = unpacket_traits::size; + static void run(Evaluator* evaluator_in, const Index first, const Index last) { Evaluator evaluator = *evaluator_in; eigen_assert(last >= first); Index i = first; - const int PacketSize = unpacket_traits::size; if (last - first >= PacketSize) { eigen_assert(first % PacketSize == 0); Index last_chunk_offset = last - 4 * PacketSize; - // Manually unroll this loop since compilers don't do it. + // Give the compiler a strong hint to unroll the loop. But don't insist + // on unrolling, because if the function is expensive the compiler should not + // unroll the loop at the expense of inlining. for (; i <= last_chunk_offset; i += 4*PacketSize) { - evaluator.evalPacket(i); - evaluator.evalPacket(i+PacketSize); - evaluator.evalPacket(i+2*PacketSize); - evaluator.evalPacket(i+3*PacketSize); + for (Index j = 0; j < 4; j++) { + evaluator.evalPacket(i + j * PacketSize); + } } last_chunk_offset = last - PacketSize; for (; i <= last_chunk_offset; i += PacketSize) { @@ -120,6 +127,15 @@ struct EvalRange { evaluator.evalScalar(i); } } + + static Index alignBlockSize(Index size) { + // Align block size to packet size and account for unrolling in run above. + if (size >= 16 * PacketSize) { + return (size + 4 * PacketSize - 1) & ~(4 * PacketSize - 1); + } + // Aligning to 4 * PacketSize would increase block size by more than 25%. + return (size + PacketSize - 1) & ~(PacketSize - 1); + } }; template @@ -133,18 +149,23 @@ class TensorExecutor { const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const Index PacketSize = Vectorizable ? unpacket_traits::size : 1; const Index size = array_prod(evaluator.dimensions()); +#if !defined(EIGEN_USE_SIMPLE_THREAD_POOL) + device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), + EvalRange::alignBlockSize, + [&evaluator](Index first, Index last) { + EvalRange::run(&evaluator, first, last); + }); +#else size_t num_threads = device.numThreads(); -#ifdef EIGEN_USE_COST_MODEL if (num_threads > 1) { num_threads = TensorCostModel::numThreads( size, evaluator.costPerCoeff(Vectorizable), num_threads); } -#endif if (num_threads == 1) { EvalRange::run(&evaluator, 0, size); } else { + const Index PacketSize = Vectorizable ? unpacket_traits::size : 1; Index blocksz = std::ceil(static_cast(size)/num_threads) + PacketSize - 1; const Index blocksize = numext::maxi(PacketSize, (blocksz - (blocksz % PacketSize))); const Index numblocks = size / blocksize; @@ -161,11 +182,12 @@ class TensorExecutor { } barrier.Wait(); } +#endif // defined(!EIGEN_USE_SIMPLE_THREAD_POOL) } evaluator.cleanup(); } }; -#endif +#endif // EIGEN_USE_THREADS // GPU: the evaluation of the expression is offloaded to a GPU. @@ -212,16 +234,11 @@ struct EigenMetaKernelEval { template __global__ void __launch_bounds__(1024) -EigenMetaKernel(Evaluator memcopied_eval, Index size) { +EigenMetaKernel(Evaluator eval, Index size) { const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; const Index step_size = blockDim.x * gridDim.x; - // Cuda memcopies the kernel arguments. That's fine for POD, but for more - // complex types such as evaluators we should really conform to the C++ - // standard and call a proper copy constructor. - Evaluator eval(memcopied_eval); - const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned; EigenMetaKernelEval::run(eval, first_index, size, step_size); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h index 8491c4ca2..5f2e329f2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -218,6 +218,86 @@ class TensorCwiseBinaryOp : public TensorBase +struct traits > +{ + // Type promotion to handle the case where the types of the args are different. + typedef typename result_of< + TernaryOp(typename Arg1XprType::Scalar, + typename Arg2XprType::Scalar, + typename Arg3XprType::Scalar)>::type Scalar; + typedef traits XprTraits; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename Arg1XprType::Nested Arg1Nested; + typedef typename Arg2XprType::Nested Arg2Nested; + typedef typename Arg3XprType::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseTernaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseTernaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseTernaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseTernaryOp(const Arg1XprType& arg1, const Arg2XprType& arg2, const Arg3XprType& arg3, const TernaryOp& func = TernaryOp()) + : m_arg1_xpr(arg1), m_arg2_xpr(arg2), m_arg3_xpr(arg3), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg1Expression() const { return m_arg1_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg2Expression() const { return m_arg2_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg3Expression() const { return m_arg3_xpr; } + + protected: + typename Arg1XprType::Nested m_arg1_xpr; + typename Arg1XprType::Nested m_arg2_xpr; + typename Arg3XprType::Nested m_arg3_xpr; + const TernaryOp m_functor; +}; + + namespace internal { template struct traits > @@ -252,7 +332,7 @@ struct nested, 1, typename e template -class TensorSelectOp : public TensorBase > +class TensorSelectOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index ece2ed91b..08eb5595a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -329,7 +329,7 @@ struct TensorEvaluator, D for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { - a[i] = data[i] * std::conj(pos_j_base_powered[i]); + a[i] = data[i] * numext::conj(pos_j_base_powered[i]); } else { a[i] = data[i] * pos_j_base_powered[i]; @@ -344,7 +344,7 @@ struct TensorEvaluator, D b[i] = pos_j_base_powered[i]; } else { - b[i] = std::conj(pos_j_base_powered[i]); + b[i] = numext::conj(pos_j_base_powered[i]); } } for (Index i = n; i < m - n; ++i) { @@ -355,7 +355,7 @@ struct TensorEvaluator, D b[i] = pos_j_base_powered[m-i]; } else { - b[i] = std::conj(pos_j_base_powered[m-i]); + b[i] = numext::conj(pos_j_base_powered[m-i]); } } @@ -379,7 +379,7 @@ struct TensorEvaluator, D for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { - data[i] = a[i] * std::conj(pos_j_base_powered[i]); + data[i] = a[i] * numext::conj(pos_j_base_powered[i]); } else { data[i] = a[i] * pos_j_base_powered[i]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index b27ee0084..fcee5f60d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -65,7 +65,7 @@ class TensorFixedSize : public TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const { @@ -97,7 +97,7 @@ class TensorFixedSize : public TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) { @@ -128,7 +128,7 @@ class TensorFixedSize : public TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const { @@ -213,7 +213,7 @@ class TensorFixedSize : public TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) { @@ -309,7 +309,7 @@ class TensorFixedSize : public TensorBase, 1, typename eval -class TensorForcedEvalOp : public TensorBase > +class TensorForcedEvalOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; @@ -102,7 +102,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - const Index numValues = m_impl.dimensions().TotalSize(); + const Index numValues = internal::array_prod(m_impl.dimensions()); m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); // Should initialize the memory in case we're dealing with non POD types. if (NumTraits::RequireInitialization) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index a8bd8b888..490ddd8bd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -16,11 +16,12 @@ template class TensorFixedSize; template class TensorMap; template class TensorRef; -template::value> class TensorBase; +template class TensorBase; template class TensorCwiseNullaryOp; template class TensorCwiseUnaryOp; template class TensorCwiseBinaryOp; +template class TensorCwiseTernaryOp; template class TensorSelectOp; template class TensorReductionOp; template class TensorIndexTupleOp; @@ -42,9 +43,11 @@ template class TensorReverseOp; template class TensorPaddingOp; template class TensorShufflingOp; template class TensorStridingOp; +template class TensorStridingSlicingOp; template class TensorInflationOp; template class TensorGeneratorOp; template class TensorAssignOp; +template class TensorScanOp; template class TensorCustomUnaryOp; template class TensorCustomBinaryOp; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 33cd00391..7164e8d60 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -25,7 +25,7 @@ struct scalar_mod_op { }; template struct functor_traits > -{ enum { Cost = NumTraits::template Div::Cost, PacketAccess = false }; }; +{ enum { Cost = scalar_div_cost::value, PacketAccess = false }; }; /** \internal @@ -38,7 +38,7 @@ struct scalar_mod2_op { }; template struct functor_traits > -{ enum { Cost = NumTraits::template Div::Cost, PacketAccess = false }; }; +{ enum { Cost = scalar_div_cost::value, PacketAccess = false }; }; template struct scalar_fmod_op { @@ -69,7 +69,7 @@ struct scalar_sigmoid_op { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { - const Packet one = pset1(1); + const Packet one = pset1(T(1)); return pdiv(one, padd(one, pexp(pnegate(x)))); } }; @@ -84,14 +84,23 @@ struct functor_traits > { }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + // Standard reduction functors template struct SumReducer { - static const bool PacketAccess = true; + static const bool PacketAccess = packet_traits::HasAdd; static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - (*accum) += t; + internal::scalar_sum_op sum_op; + *accum = sum_op(*accum, t); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { @@ -119,16 +128,26 @@ template struct SumReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasAdd + }; +}; + + template struct MeanReducer { - static const bool PacketAccess = !NumTraits::IsInteger; + static const bool PacketAccess = packet_traits::HasAdd && !NumTraits::IsInteger; static const bool IsStateful = true; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MeanReducer() : scalarCount_(0), packetCount_(0) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { - (*accum) += t; + internal::scalar_sum_op sum_op; + *accum = sum_op(*accum, t); scalarCount_++; } template @@ -162,9 +181,44 @@ template struct MeanReducer DenseIndex packetCount_; }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasAdd + }; +}; + + +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::lowest(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return -Eigen::NumTraits::infinity(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::highest(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::infinity(); + } +}; + + template struct MaxReducer { - static const bool PacketAccess = true; + static const bool PacketAccess = packet_traits::HasMax; static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { @@ -174,9 +228,8 @@ template struct MaxReducer EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { (*accum) = pmax(*accum, p); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return Eigen::NumTraits::lowest(); + return MinMaxBottomValue::IsInteger>::bottom_value(); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { @@ -195,9 +248,18 @@ template struct MaxReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasMax + }; +}; + + template struct MinReducer { - static const bool PacketAccess = true; + static const bool PacketAccess = packet_traits::HasMin; static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { @@ -207,9 +269,8 @@ template struct MinReducer EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { (*accum) = pmin(*accum, p); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return Eigen::NumTraits::highest(); + return MinMaxBottomValue::IsInteger>::bottom_value(); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { @@ -228,10 +289,18 @@ template struct MinReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasMin + }; +}; + template struct ProdReducer { - static const bool PacketAccess = true; + static const bool PacketAccess = packet_traits::HasMul; static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { @@ -263,6 +332,14 @@ template struct ProdReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::MulCost, + PacketAccess = PacketType::HasMul + }; +}; + struct AndReducer { @@ -280,6 +357,15 @@ struct AndReducer } }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + struct OrReducer { static const bool PacketAccess = false; static const bool IsStateful = false; @@ -295,6 +381,15 @@ struct OrReducer { } }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + // Argmin/Argmax reducers template struct ArgMaxTupleReducer { @@ -312,6 +407,15 @@ template struct ArgMaxTupleReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + + template struct ArgMinTupleReducer { static const bool PacketAccess = false; @@ -328,457 +432,11 @@ template struct ArgMinTupleReducer } }; - -// Random number generation -namespace { -#ifdef __CUDA_ARCH__ -__device__ int get_random_seed() { - return clock(); -} -#else -int get_random_seed() { -#ifdef _WIN32 - SYSTEMTIME st; - GetSystemTime(&st); - return st.wSecond + 1000 * st.wMilliseconds; -#elif defined __APPLE__ - return static_cast(mach_absolute_time()); -#else - timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return static_cast(ts.tv_nsec); -#endif -} -#endif -} - -#if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__) -// We're not compiling a cuda kernel -template class UniformRandomGenerator { - - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - if (!deterministic) { - srand(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - } - - template - T operator()(Index) const { - return random(); - } - template - PacketType packetOp(Index) const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_ALIGN_MAX T values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = random(); - } - return internal::pload(values); - } - - private: - bool m_deterministic; -}; - -#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 -template <> class UniformRandomGenerator { - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator& other) { - m_generator = new std::mt19937(); - m_generator->seed(other(0) * UINT_MAX); - m_deterministic = other.m_deterministic; - } - ~UniformRandomGenerator() { - delete m_generator; - } - - template - float operator()(Index) const { - return m_distribution(*m_generator); - } - template - PacketType packetOp(Index i) const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_ALIGN_MAX float values[packetSize]; - for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(i); - } - return internal::pload(values); - } - - private: - UniformRandomGenerator& operator = (const UniformRandomGenerator&); - // Make sure m_deterministic comes first to match the layout of the cpu - // version of the code. - bool m_deterministic; - std::mt19937* m_generator; - mutable std::uniform_real_distribution m_distribution; -}; - -template <> class UniformRandomGenerator { - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator& other) { - m_generator = new std::mt19937(); - m_generator->seed(other(0) * UINT_MAX); - m_deterministic = other.m_deterministic; - } - ~UniformRandomGenerator() { - delete m_generator; - } - - template - double operator()(Index) const { - return m_distribution(*m_generator); - } - template - PacketType packetOp(Index i) const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_ALIGN_MAX double values[packetSize]; - for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(i); - } - return internal::pload(values); - } - - private: - UniformRandomGenerator& operator = (const UniformRandomGenerator&); - // Make sure m_deterministic comes first to match the layout of the cpu - // version of the code. - bool m_deterministic; - std::mt19937* m_generator; - mutable std::uniform_real_distribution m_distribution; -}; -#endif - -#else - -// We're compiling a cuda kernel -template class UniformRandomGenerator; - -template <> class UniformRandomGenerator { - public: - static const bool PacketAccess = true; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - - template - __device__ float operator()(Index) const { - return curand_uniform(&m_state); - } - template - __device__ float4 packetOp(Index) const { - EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_uniform4(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator { - public: - static const bool PacketAccess = true; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template - __device__ double operator()(Index) const { - return curand_uniform_double(&m_state); - } - template - __device__ double2 packetOp(Index) const { - EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_uniform2_double(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator > { - public: - static const bool PacketAccess = false; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template - __device__ std::complex operator()(Index) const { - float4 vals = curand_uniform4(&m_state); - return std::complex(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator > { - public: - static const bool PacketAccess = false; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template - __device__ std::complex operator()(Index) const { - double2 vals = curand_uniform2_double(&m_state); - return std::complex(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -#endif - -template -struct functor_traits > { +template +struct reducer_traits, Device> { enum { - // Rough estimate. - Cost = 100 * NumTraits::MulCost, - PacketAccess = UniformRandomGenerator::PacketAccess - }; -}; - - - -#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && (__cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900) -// We're not compiling a cuda kernel -template class NormalRandomGenerator { - public: - static const bool PacketAccess = true; - - NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - NormalRandomGenerator(const NormalRandomGenerator& other) - : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) { - m_generator->seed(other(0) * UINT_MAX); - } - ~NormalRandomGenerator() { - delete m_generator; - } - template - T operator()(Index) const { - return m_distribution(*m_generator); - } - template - PacketType packetOp(Index) const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_ALIGN_MAX T values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = m_distribution(*m_generator); - } - return internal::pload(values); - } - - private: - // No assignment - NormalRandomGenerator& operator = (const NormalRandomGenerator&); - - bool m_deterministic; - mutable std::normal_distribution m_distribution; - std::mt19937* m_generator; -}; - -#elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__) - -// We're compiling a cuda kernel -template class NormalRandomGenerator; - -template <> class NormalRandomGenerator { - public: - static const bool PacketAccess = true; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template - __device__ float operator()(Index) const { - return curand_normal(&m_state); - } - template - __device__ float4 packetOp(Index) const { - EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_normal4(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator { - public: - static const bool PacketAccess = true; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template - __device__ double operator()(Index) const { - return curand_normal_double(&m_state); - } - template - __device__ double2 packetOp(Index) const { - EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_normal2_double(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator > { - public: - static const bool PacketAccess = false; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template - __device__ std::complex operator()(Index) const { - float4 vals = curand_normal4(&m_state); - return std::complex(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator > { - public: - static const bool PacketAccess = false; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template - __device__ std::complex operator()(Index) const { - double2 vals = curand_normal2_double(&m_state); - return std::complex(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -#else - -template class NormalRandomGenerator { - public: - static const bool PacketAccess = false; - NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {} - - private: - bool m_deterministic; -}; - -#endif - -template -struct functor_traits > { - enum { - // Rough estimate. - Cost = 100 * NumTraits::MulCost, - PacketAccess = NormalRandomGenerator::PacketAccess + Cost = NumTraits::AddCost, + PacketAccess = false }; }; @@ -797,7 +455,7 @@ class GaussianGenerator { } } - T operator()(const array& coordinates) const { + EIGEN_DEVICE_FUNC T operator()(const array& coordinates) const { T tmp = T(0); for (size_t i = 0; i < NumDims; ++i) { T offset = coordinates[i] - m_means[i]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h index 8ff7d5815..eb1d4934e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -134,7 +134,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { const int packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+packetSize-1 < dimensions().TotalSize()); EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h new file mode 100644 index 000000000..665b861cf --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H + +namespace Eigen { + +/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors. + * + * This function computes the regularized incomplete beta function (integral). + * + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const + TensorCwiseTernaryOp, + const ADerived, const BDerived, const XDerived> + betainc(const ADerived& a, const BDerived& b, const XDerived& x) { + return TensorCwiseTernaryOp< + internal::scalar_betainc_op, const ADerived, + const BDerived, const XDerived>( + a, b, x, internal::scalar_betainc_op()); +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index 38a833f82..a901c5dd4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -13,38 +13,61 @@ namespace Eigen { namespace internal { -template<> -struct significant_decimals_impl - : significant_decimals_default_impl -{}; -} +// Print the tensor as a 2d matrix +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + if (total_size > 0) { + const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); + static const int layout = Tensor::Layout; + Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); + os << matrix; + } + } +}; + + +// Print the tensor as a vector +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + if (total_size > 0) { + Map > array(const_cast(tensor.data()), total_size); + os << array; + } + } +}; + + +// Print the tensor as a scalar +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + os << tensor.coeff(0); + } +}; +} template std::ostream& operator << (std::ostream& os, const TensorBase& expr) { + typedef TensorEvaluator, DefaultDevice> Evaluator; + typedef typename Evaluator::Dimensions Dimensions; + // Evaluate the expression if needed TensorForcedEvalOp eval = expr.eval(); - TensorEvaluator, DefaultDevice> tensor(eval, DefaultDevice()); + Evaluator tensor(eval, DefaultDevice()); tensor.evalSubExprsIfNeeded(NULL); - typedef typename internal::remove_const::type Scalar; - typedef typename T::Index Index; - typedef typename TensorEvaluator, DefaultDevice>::Dimensions Dimensions; - const Index total_size = internal::array_prod(tensor.dimensions()); - - // Print the tensor as a 1d vector or a 2d matrix. + // Print the result static const int rank = internal::array_size::value; - if (rank == 0) { - os << tensor.coeff(0); - } else if (rank == 1) { - Map > array(const_cast(tensor.data()), total_size); - os << array; - } else { - const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); - static const int layout = TensorEvaluator, DefaultDevice>::Layout; - Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); - os << matrix; - } + internal::TensorPrinter::run(os, tensor); // Cleanup. tensor.cleanup(); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index bafcc67bd..566856ed2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -174,7 +174,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device) { - EIGEN_STATIC_ASSERT(NumDims >= 4, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE); m_paddingValue = op.padding_value(); @@ -362,7 +362,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h index 985594bc8..3209fecd3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -10,7 +10,8 @@ #ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H #define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H -#if defined(EIGEN_HAS_CONSTEXPR) && defined(EIGEN_HAS_VARIADIC_TEMPLATES) + +#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES #define EIGEN_HAS_INDEX_LIST @@ -45,6 +46,24 @@ struct type2index { } }; +// This can be used with IndexPairList to get compile-time constant pairs, +// such as IndexPairList, type2indexpair<3,4>>(). +template +struct type2indexpair { + static const DenseIndex first = f; + static const DenseIndex second = s; + + constexpr EIGEN_DEVICE_FUNC operator IndexPair() const { + return IndexPair(f, s); + } + + EIGEN_DEVICE_FUNC void set(const IndexPair& val) { + eigen_assert(val.first == f); + eigen_assert(val.second == s); + } +}; + + template struct NumTraits > { typedef DenseIndex Real; @@ -72,6 +91,16 @@ EIGEN_DEVICE_FUNC void update_value(type2index& val, DenseIndex new_val) { val.set(new_val); } +template +EIGEN_DEVICE_FUNC void update_value(T& val, IndexPair new_val) { + val = new_val; +} +template +EIGEN_DEVICE_FUNC void update_value(type2indexpair& val, IndexPair new_val) { + val.set(new_val); +} + + template struct is_compile_time_constant { static constexpr bool value = false; @@ -94,7 +123,22 @@ struct is_compile_time_constant& > { static constexpr bool value = true; }; - +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; template @@ -184,31 +228,32 @@ template -template +template struct tuple_coeff { template - EIGEN_DEVICE_FUNC static constexpr DenseIndex get(const DenseIndex i, const IndexTuple& t) { - return array_get(t) * (i == Idx) + tuple_coeff::get(i, t) * (i != Idx); + EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex i, const IndexTuple& t) { + // return array_get(t) * (i == Idx) + tuple_coeff::get(i, t) * (i != Idx); + return (i == Idx ? array_get(t) : tuple_coeff::get(i, t)); } template - EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple& t, const DenseIndex value) { + EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple& t, const ValueT& value) { if (i == Idx) { update_value(array_get(t), value); } else { - tuple_coeff::set(i, t, value); + tuple_coeff::set(i, t, value); } } template EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple& t) { return ((i == Idx) & is_compile_time_constant::ValType>::value) || - tuple_coeff::value_known_statically(i, t); + tuple_coeff::value_known_statically(i, t); } template EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple& t) { return is_compile_time_constant::ValType>::value && - tuple_coeff::values_up_to_known_statically(t); + tuple_coeff::values_up_to_known_statically(t); } template @@ -216,19 +261,19 @@ struct tuple_coeff { return is_compile_time_constant::ValType>::value && is_compile_time_constant::ValType>::value && array_get(t) > array_get(t) && - tuple_coeff::values_up_to_statically_known_to_increase(t); + tuple_coeff::values_up_to_statically_known_to_increase(t); } }; -template <> -struct tuple_coeff<0> { +template +struct tuple_coeff<0, ValueT> { template - EIGEN_DEVICE_FUNC static constexpr DenseIndex get(const DenseIndex i, const IndexTuple& t) { + EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex /*i*/, const IndexTuple& t) { // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr - return array_get<0>(t) * (i == 0); + return array_get<0>(t)/* * (i == 0)*/; } template - EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple& t, const DenseIndex value) { + EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple& t, const ValueT value) { eigen_assert (i == 0); update_value(array_get<0>(t), value); } @@ -254,13 +299,13 @@ struct tuple_coeff<0> { template struct IndexList : internal::IndexTuple { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex operator[] (const DenseIndex i) const { - return internal::tuple_coeff >::value-1>::get(i, *this); + return internal::tuple_coeff >::value-1, DenseIndex>::get(i, *this); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex get(const DenseIndex i) const { - return internal::tuple_coeff >::value-1>::get(i, *this); + return internal::tuple_coeff >::value-1, DenseIndex>::get(i, *this); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const DenseIndex value) { - return internal::tuple_coeff >::value-1>::set(i, *this, value); + return internal::tuple_coeff >::value-1, DenseIndex>::set(i, *this, value); } EIGEN_DEVICE_FUNC constexpr IndexList(const internal::IndexTuple& other) : internal::IndexTuple(other) { } @@ -268,14 +313,14 @@ struct IndexList : internal::IndexTuple { EIGEN_DEVICE_FUNC constexpr IndexList() : internal::IndexTuple() { } EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const { - return internal::tuple_coeff >::value-1>::value_known_statically(i, *this); + return internal::tuple_coeff >::value-1, DenseIndex>::value_known_statically(i, *this); } EIGEN_DEVICE_FUNC constexpr bool all_values_known_statically() const { - return internal::tuple_coeff >::value-1>::values_up_to_known_statically(*this); + return internal::tuple_coeff >::value-1, DenseIndex>::values_up_to_known_statically(*this); } EIGEN_DEVICE_FUNC constexpr bool values_statically_known_to_increase() const { - return internal::tuple_coeff >::value-1>::values_up_to_statically_known_to_increase(*this); + return internal::tuple_coeff >::value-1, DenseIndex>::values_up_to_statically_known_to_increase(*this); } }; @@ -286,6 +331,23 @@ constexpr IndexList make_index_list(FirstType val1, Ot } +template +struct IndexPairList : internal::IndexTuple { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr IndexPair operator[] (const DenseIndex i) const { + return internal::tuple_coeff >::value-1, IndexPair>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const IndexPair value) { + return internal::tuple_coeff>::value-1, IndexPair >::set(i, *this, value); + } + + EIGEN_DEVICE_FUNC constexpr IndexPairList(const internal::IndexTuple& other) : internal::IndexTuple(other) { } + EIGEN_DEVICE_FUNC constexpr IndexPairList() : internal::IndexTuple() { } + + EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const { + return internal::tuple_coeff >::value-1, DenseIndex>::value_known_statically(i, *this); + } +}; + namespace internal { template size_t array_prod(const IndexList& sizes) { @@ -303,6 +365,13 @@ template struct array_size >::value; }; +template struct array_size > { + static const size_t value = std::tuple_size >::value; +}; +template struct array_size > { + static const size_t value = std::tuple_size >::value; +}; + template EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(IndexList& a) { return IndexTupleExtractor::get_val(a); } @@ -472,6 +541,57 @@ struct index_statically_lt_impl > { } }; + + +template +struct index_pair_first_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_first_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).first == value); + } +}; + +template +struct index_pair_first_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).first == value); + } +}; + + + +template +struct index_pair_second_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_second_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).second == value); + } +}; + +template +struct index_pair_second_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).second == value); + } +}; + + } // end namespace internal } // end namespace Eigen @@ -482,53 +602,69 @@ namespace internal { template struct index_known_statically_impl { - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const DenseIndex) { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { return false; } }; template struct all_indices_known_statically_impl { - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run() { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { return false; } }; template struct indices_statically_known_to_increase_impl { - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run() { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { return false; } }; template struct index_statically_eq_impl { - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { return false; } }; template struct index_statically_ne_impl { - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { return false; } }; template struct index_statically_gt_impl { - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { return false; } }; template struct index_statically_lt_impl { - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { return false; } }; +template +struct index_pair_first_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_second_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + + + } // end namespace internal } // end namespace Eigen @@ -572,6 +708,16 @@ static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_lt(DenseIndex i, return index_statically_lt_impl::run(i, value); } +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_first_statically_eq(DenseIndex i, DenseIndex value) { + return index_pair_first_statically_eq_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_second_statically_eq(DenseIndex i, DenseIndex value) { + return index_pair_second_statically_eq_impl::run(i, value); +} + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h index de2f67d74..f391fb9ee 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h @@ -189,7 +189,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h index 2d223140e..33edc49e3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h @@ -10,7 +10,7 @@ #ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H #define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_HAS_VARIADIC_TEMPLATES #include diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index 33c6c1b0f..ede3939c2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -29,25 +29,47 @@ namespace Eigen { namespace internal { namespace { + // Note: result is undefined if val == 0 template - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val) + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + typename internal::enable_if::type count_leading_zeros(const T val) { #ifdef __CUDA_ARCH__ - return (sizeof(T) == 8) ? __clzll(val) : __clz(val); + return __clz(val); #elif EIGEN_COMP_MSVC - unsigned long index; - if (sizeof(T) == 8) { - _BitScanReverse64(&index, val); - } else { - _BitScanReverse(&index, val); - } - return (sizeof(T) == 8) ? 63 - index : 31 - index; + unsigned long index; + _BitScanReverse(&index, val); + return 31 - index; #else EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); - return (sizeof(T) == 8) ? - __builtin_clzll(static_cast(val)) : - __builtin_clz(static_cast(val)); + return __builtin_clz(static_cast(val)); +#endif + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + typename internal::enable_if::type count_leading_zeros(const T val) + { +#ifdef __CUDA_ARCH__ + return __clzll(val); +#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64 + unsigned long index; + _BitScanReverse64(&index, val); + return 63 - index; +#elif EIGEN_COMP_MSVC + // MSVC's _BitScanReverse64 is not available for 32bits builds. + unsigned int lo = (unsigned int)(val&0xffffffff); + unsigned int hi = (unsigned int)((val>>32)&0xffffffff); + int n; + if(hi==0) + n = 32 + count_leading_zeros(lo); + else + n = count_leading_zeros(hi); + return n; +#else + EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); + return __builtin_clzll(static_cast(val)); #endif } @@ -98,7 +120,9 @@ namespace { return static_cast((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); #else const uint64_t shift = 1ULL << log_div; - TensorUInt128 result = (TensorUInt128 >(shift, 0) / TensorUInt128, uint64_t>(divider) - TensorUInt128, static_val<0> >(1, 0) + TensorUInt128, static_val<1> >(1)); + TensorUInt128 result = TensorUInt128 >(shift, 0) / TensorUInt128, uint64_t>(divider) + - TensorUInt128, static_val<0> >(1, 0) + + TensorUInt128, static_val<1> >(1); return static_cast(result); #endif } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h index 8ed71f838..ee0078bbc 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h @@ -28,7 +28,7 @@ // SFINAE requires variadic templates #ifndef __CUDACC__ -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_HAS_VARIADIC_TEMPLATES // SFINAE doesn't work for gcc <= 4.7 #ifdef EIGEN_COMP_GNUC #if EIGEN_GNUC_AT_LEAST(4,8) @@ -44,7 +44,7 @@ typename internal::enable_if< ( __condition__ ) , int >::type = 0 -#if defined(EIGEN_HAS_CONSTEXPR) +#if EIGEN_HAS_CONSTEXPR #define EIGEN_CONSTEXPR constexpr #else #define EIGEN_CONSTEXPR diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 9ebd9172b..6fb4f4a31 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -57,7 +57,7 @@ template class TensorMap : public Tensor EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) } -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) { // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. @@ -140,7 +140,7 @@ template class TensorMap : public Tensor return m_data[index]; } -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const { @@ -227,7 +227,7 @@ template class TensorMap : public Tensor return m_data[index]; } -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index cd04716bd..fdb5ee6b8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -47,22 +47,39 @@ template <> struct max_n_1<0> { // Default packet types template -struct PacketType { +struct PacketType : internal::packet_traits { typedef typename internal::packet_traits::type type; - enum { size = internal::unpacket_traits::size }; }; // For CUDA packet types when using a GpuDevice -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16) template <> -struct PacketType { - typedef float4 type; - static const int size = 4; -}; -template <> -struct PacketType { - typedef double2 type; +struct PacketType { + typedef half2 type; static const int size = 2; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0, + + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasLog = 1, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 1, + }; }; #endif @@ -112,6 +129,20 @@ bool operator!=(const Tuple& x, const Tuple& y) { } +// Can't use std::pairs on cuda devices +template struct IndexPair { + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) {} + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Idx f, Idx s) : first(f), second(s) {} + + EIGEN_DEVICE_FUNC void set(IndexPair val) { + first = val.first; + second = val.second; + } + + Idx first; + Idx second; +}; + #ifdef EIGEN_HAS_SFINAE namespace internal { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index bfa65a607..d34f1e328 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -148,7 +148,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast(m_impl.data()); } - const TensorEvaluator& impl() const { return m_impl; } + EIGEN_DEVICE_FUNC const TensorEvaluator& impl() const { return m_impl; } protected: TensorEvaluator m_impl; @@ -409,7 +409,7 @@ struct TensorEvaluator, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { const int packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); Index inputIndices[] = {0, 0}; @@ -603,6 +603,286 @@ struct TensorEvaluator, Device> }; + +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorStridingSlicingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorStridingSlicingOp type; +}; + +} // end namespace internal + + +template +class TensorStridingSlicingOp : public TensorBase > +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp( + const XprType& expr, const StartIndices& startIndices, + const StopIndices& stopIndices, const Strides& strides) + : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices), + m_strides(strides) {} + + EIGEN_DEVICE_FUNC + const StartIndices& startIndices() const { return m_startIndices; } + EIGEN_DEVICE_FUNC + const StartIndices& stopIndices() const { return m_stopIndices; } + EIGEN_DEVICE_FUNC + const StartIndices& strides() const { return m_strides; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run( + assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run( + assign, DefaultDevice()); + return *this; + } + + protected: + typename XprType::Nested m_xpr; + const StartIndices m_startIndices; + const StopIndices m_stopIndices; + const Strides m_strides; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorStridingSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets and sizes. + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()) + { + // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero + DSizes startIndicesClamped, stopIndicesClamped; + for (size_t i = 0; i < internal::array_size::value; ++i) { + eigen_assert(m_strides[i] != 0 && "0 stride is invalid"); + if(m_strides[i]>0){ + startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]); + stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]); + }else{ + /* implies m_strides[i]<0 by assert */ + startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1); + stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1); + } + m_startIndices[i] = startIndicesClamped[i]; + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + + // check for degenerate intervals and compute output tensor shape + bool degenerate = false;; + for(int i = 0; i < NumDims; i++){ + Index interval = stopIndicesClamped[i] - startIndicesClamped[i]; + if(interval == 0 || ((interval<0) != (m_strides[i]<0))){ + m_dimensions[i] = 0; + degenerate = true; + }else{ + m_dimensions[i] = interval / m_strides[i] + + (interval % m_strides[i] != 0 ? 1 : 0); + eigen_assert(m_dimensions[i] >= 0); + } + } + Strides output_dims = m_dimensions; + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = m_strides[0]; + m_offsets[0] = startIndicesClamped[0]; + Index previousDimProduct = 1; + for (int i = 1; i < NumDims; ++i) { + previousDimProduct *= input_dims[i-1]; + m_inputStrides[i] = previousDimProduct * m_strides[i]; + m_offsets[i] = startIndicesClamped[i] * previousDimProduct; + } + + // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; + // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash + m_fastOutputStrides[i] = internal::TensorIntDivisor(degenerate ? 1 : m_outputStrides[i]); + } + } else { + m_inputStrides[NumDims-1] = m_strides[NumDims-1]; + m_offsets[NumDims-1] = startIndicesClamped[NumDims-1]; + Index previousDimProduct = 1; + for (int i = NumDims - 2; i >= 0; --i) { + previousDimProduct *= input_dims[i+1]; + m_inputStrides[i] = previousDimProduct * m_strides[i]; + m_offsets[i] = startIndicesClamped[i] * previousDimProduct; + } + + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; + // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash + m_fastOutputStrides[i] = internal::TensorIntDivisor(degenerate ? 1 : m_outputStrides[i]); + } + } + m_block_total_size_max = numext::maxi(static_cast(1), + device.lastLevelCacheSize() / + sizeof(Scalar)); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type ScalarNonConst; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Strides Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(srcCoeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { + return NULL; + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i >= 0; --i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += idx * m_inputStrides[i] + m_offsets[i]; + index -= idx * m_outputStrides[i]; + } + } else { + for (int i = 0; i < NumDims; ++i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += idx * m_inputStrides[i] + m_offsets[i]; + index -= idx * m_outputStrides[i]; + } + } + return inputIndex; + } + + static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) { + return numext::maxi(min, numext::mini(max,value)); + } + + array m_outputStrides; + array, NumDims> m_fastOutputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + const Device& m_device; + DSizes m_startIndices; // clamped startIndices + DSizes m_dimensions; + DSizes m_offsets; // offset in a flattened shape + const Strides m_strides; + std::size_t m_block_total_size_max; +}; + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorStridingSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = TensorEvaluator::CoordAccess, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type ScalarNonConst; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Strides Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } +}; + + } // end namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 88b838b27..647bcf108 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -93,7 +93,7 @@ struct TensorEvaluator, Device static const int PacketSize = internal::unpacket_traits::size; enum { - IsAligned = false, + IsAligned = true, PacketAccess = TensorEvaluator::PacketAccess, Layout = TensorEvaluator::Layout, CoordAccess = true, @@ -106,7 +106,7 @@ struct TensorEvaluator, Device // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector // of 1 element first and then pad. - EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); // Compute dimensions m_dimensions = m_impl.dimensions(); @@ -150,27 +150,26 @@ struct TensorEvaluator, Device if (static_cast(Layout) == static_cast(ColMajor)) { for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { + if (isPaddingAtIndexForDim(idx, i)) { return m_paddingValue; } inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; index -= idx * m_outputStrides[i]; } - if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) { + if (isPaddingAtIndexForDim(index, 0)) { return m_paddingValue; } inputIndex += (index - m_padding[0].first); } else { for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / m_outputStrides[i+1]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { + if (isPaddingAtIndexForDim(idx, i)) { return m_paddingValue; } inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; index -= idx * m_outputStrides[i+1]; } - if (index < m_padding[NumDims-1].first || - index >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) { + if (isPaddingAtIndexForDim(index, NumDims-1)) { return m_paddingValue; } inputIndex += (index - m_padding[NumDims-1].first); @@ -187,43 +186,6 @@ struct TensorEvaluator, Device return packetRowMajor(index); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const - { - Index inputIndex; - if (static_cast(Layout) == static_cast(ColMajor)) { - { - const Index idx = coords[0]; - if (idx < m_padding[0].first || idx >= m_dimensions[0] - m_padding[0].second) { - return m_paddingValue; - } - inputIndex = idx - m_padding[0].first; - } - for (int i = 1; i < NumDims; ++i) { - const Index idx = coords[i]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { - return m_paddingValue; - } - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - } - } else { - { - const Index idx = coords[NumDims-1]; - if (idx < m_padding[NumDims-1].first || idx >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) { - return m_paddingValue; - } - inputIndex = idx - m_padding[NumDims-1].first; - } - for (int i = NumDims - 2; i >= 0; --i) { - const Index idx = coords[i]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { - return m_paddingValue; - } - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - } - } - return m_impl.coeff(inputIndex); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { TensorOpCost cost = m_impl.costPerCoeff(vectorized); if (static_cast(Layout) == static_cast(ColMajor)) { @@ -239,6 +201,40 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } private: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim( + Index index, int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return (!internal::index_pair_first_statically_eq(dim_index, 0) && + index < m_padding[dim_index].first) || + (!internal::index_pair_second_statically_eq(dim_index, 0) && + index >= m_dimensions[dim_index] - m_padding[dim_index].second); +#else + return (index < m_padding[dim_index].first) || + (index >= m_dimensions[dim_index] - m_padding[dim_index].second); +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero( + int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return internal::index_pair_first_statically_eq(dim_index, 0); +#else + EIGEN_UNUSED_VARIABLE(dim_index); + return false; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero( + int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return internal::index_pair_second_statically_eq(dim_index, 0); +#else + EIGEN_UNUSED_VARIABLE(dim_index); + return false; +#endif + } + + void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const { const double in = static_cast(m_impl.dimensions()[i]); const double out = in + m_padding[i].first + m_padding[i].second; @@ -261,7 +257,7 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const { - EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); const Index initialIndex = index; @@ -273,15 +269,15 @@ struct TensorEvaluator, Device const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i]; const Index lastPaddedRight = m_outputStrides[i+1]; - if (last < lastPaddedLeft) { + if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= firstPaddedRight && last < lastPaddedRight) { + else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { + else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { // all the coefficient are between the 2 padding zones. const Index idx = index / m_outputStrides[i]; inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; @@ -299,15 +295,15 @@ struct TensorEvaluator, Device const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second); const Index lastPaddedRight = m_outputStrides[1]; - if (last < lastPaddedLeft) { + if (!isLeftPaddingCompileTimeZero(0) && last < lastPaddedLeft) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= firstPaddedRight && last < lastPaddedRight) { + else if (!isRightPaddingCompileTimeZero(0) && first >= firstPaddedRight && last < lastPaddedRight) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { + else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { // all the coefficient are between the 2 padding zones. inputIndex += (index - m_padding[0].first); return m_impl.template packet(inputIndex); @@ -318,7 +314,7 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const { - EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); const Index initialIndex = index; @@ -331,15 +327,15 @@ struct TensorEvaluator, Device const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1]; const Index lastPaddedRight = m_outputStrides[i]; - if (last < lastPaddedLeft) { + if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= firstPaddedRight && last < lastPaddedRight) { + else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { + else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { // all the coefficient are between the 2 padding zones. const Index idx = index / m_outputStrides[i+1]; inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; @@ -357,15 +353,15 @@ struct TensorEvaluator, Device const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second); const Index lastPaddedRight = m_outputStrides[NumDims-1]; - if (last < lastPaddedLeft) { + if (!isLeftPaddingCompileTimeZero(NumDims-1) && last < lastPaddedLeft) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= firstPaddedRight && last < lastPaddedRight) { + else if (!isRightPaddingCompileTimeZero(NumDims-1) && first >= firstPaddedRight && last < lastPaddedRight) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { + else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { // all the coefficient are between the 2 padding zones. inputIndex += (index - m_padding[NumDims-1].first); return m_impl.template packet(inputIndex); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h index a87e45330..886a254f6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h @@ -184,7 +184,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); Index output_stride_index = (static_cast(Layout) == static_cast(ColMajor)) ? NumDims - 1 : 0; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h new file mode 100644 index 000000000..1655a813e --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h @@ -0,0 +1,276 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H +#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H + +namespace Eigen { +namespace internal { + +namespace { + +EIGEN_DEVICE_FUNC uint64_t get_random_seed() { +#ifdef __CUDA_ARCH__ + // We don't support 3d kernels since we currently only use 1 and + // 2d kernels. + assert(threadIdx.z == 0); + return clock64() + + blockIdx.x * blockDim.x + threadIdx.x + + gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); + +#elif defined _WIN32 + // Use the current time as a baseline. + SYSTEMTIME st; + GetSystemTime(&st); + int time = st.wSecond + 1000 * st.wMilliseconds; + // Mix in a random number to make sure that we get different seeds if + // we try to generate seeds faster than the clock resolution. + // We need 2 random values since the generator only generate 16 bits at + // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx) + int rnd1 = ::rand(); + int rnd2 = ::rand(); + uint64_t rnd = (rnd1 | rnd2 << 16) ^ time; + return rnd; + +#elif defined __APPLE__ + // Same approach as for win32, except that the random number generator + // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random). + uint64_t rnd = ::random() ^ mach_absolute_time(); + return rnd; + +#else + // Augment the current time with pseudo random number generation + // to ensure that we get different seeds if we try to generate seeds + // faster than the clock resolution. + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + uint64_t rnd = ::random() ^ ts.tv_nsec; + return rnd; +#endif +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) { + // TODO: Unify with the implementation in the non blocking thread pool. + uint64_t current = *state; + // Update the internal state + *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; + // Generate the random output (using the PCG-XSH-RS scheme) + return static_cast((current ^ (current >> 22)) >> (22 + (current >> 61))); +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) { + seed = seed ? seed : get_random_seed(); + return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; +} + +} // namespace + + +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeUniform(uint64_t* state) { + unsigned rnd = PCG_XSH_RS_generator(state); + return static_cast(rnd); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Eigen::half RandomToTypeUniform(uint64_t* state) { + Eigen::half result; + // Generate 10 random bits for the mantissa + unsigned rnd = PCG_XSH_RS_generator(state); + result.x = static_cast(rnd & 0x3ffu); + // Set the exponent + result.x |= (static_cast(15) << 10); + // Return the final result + return result - Eigen::half(1.0f); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float RandomToTypeUniform(uint64_t* state) { + typedef union { + uint32_t raw; + float fp; + } internal; + internal result; + // Generate 23 random bits for the mantissa mantissa + const unsigned rnd = PCG_XSH_RS_generator(state); + result.raw = rnd & 0x7fffffu; + // Set the exponent + result.raw |= (static_cast(127) << 23); + // Return the final result + return result.fp - 1.0f; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double RandomToTypeUniform(uint64_t* state) { + typedef union { + uint64_t raw; + double dp; + } internal; + internal result; + result.raw = 0; + // Generate 52 random bits for the mantissa + // First generate the upper 20 bits + unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu; + // The generate the lower 32 bits + unsigned rnd2 = PCG_XSH_RS_generator(state); + result.raw = (static_cast(rnd1) << 32) | rnd2; + // Set the exponent + result.raw |= (static_cast(1023) << 52); + // Return the final result + return result.dp - 1.0; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeUniform >(uint64_t* state) { + return std::complex(RandomToTypeUniform(state), + RandomToTypeUniform(state)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeUniform >(uint64_t* state) { + return std::complex(RandomToTypeUniform(state), + RandomToTypeUniform(state)); +} + +template class UniformRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + const UniformRandomGenerator& other) { + m_state = other.m_state; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + uint64_t local_state = m_state + i; + T result = RandomToTypeUniform(&local_state); + m_state = local_state; + return result; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX T values[packetSize]; + uint64_t local_state = m_state + i; + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeUniform(&local_state); + } + m_state = local_state; + return internal::pload(values); + } + + private: + mutable uint64_t m_state; +}; + +template +struct functor_traits > { + enum { + // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)). + Cost = 12 * NumTraits::AddCost * + ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)), + PacketAccess = UniformRandomGenerator::PacketAccess + }; +}; + + + +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeNormal(uint64_t* state) { + // Use the ratio of uniform method to generate numbers following a normal + // distribution. See for example Numerical Recipes chapter 7.3.9 for the + // details. + T u, v, q; + do { + u = RandomToTypeUniform(state); + v = T(1.7156) * (RandomToTypeUniform(state) - T(0.5)); + const T x = u - T(0.449871); + const T y = numext::abs(v) + T(0.386595); + q = x*x + y * (T(0.196)*y - T(0.25472)*x); + } while (q > T(0.27597) && + (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u)); + + return v/u; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeNormal >(uint64_t* state) { + return std::complex(RandomToTypeNormal(state), + RandomToTypeNormal(state)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeNormal >(uint64_t* state) { + return std::complex(RandomToTypeNormal(state), + RandomToTypeNormal(state)); +} + + +template class NormalRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( + const NormalRandomGenerator& other) { + m_state = other.m_state; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + uint64_t local_state = m_state + i; + T result = RandomToTypeNormal(&local_state); + m_state = local_state; + return result; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX T values[packetSize]; + uint64_t local_state = m_state + i; + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeNormal(&local_state); + } + m_state = local_state; + return internal::pload(values); + } + + private: + mutable uint64_t m_state; +}; + + +template +struct functor_traits > { + enum { + // On average, we need to generate about 3 random numbers + // 15 mul, 8 add, 1.5 logs + Cost = 3 * functor_traits >::Cost + + 15 * NumTraits::AddCost + 8 * NumTraits::AddCost + + 3 * functor_traits >::Cost / 2, + PacketAccess = NormalRandomGenerator::PacketAccess + }; +}; + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 885295f0a..a87777b22 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -87,7 +87,7 @@ struct preserve_inner_most_dims { static const bool value = false; }; -#if defined(EIGEN_HAS_CONSTEXPR) && defined(EIGEN_HAS_VARIADIC_TEMPLATES) +#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES template struct are_inner_most_dims{ static const bool tmp1 = indices_statically_known_to_increase(); @@ -122,7 +122,7 @@ struct preserve_inner_most_dims{ template struct GenericDimReducer { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { - EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) { const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; GenericDimReducer::reduce(self, input, reducer, accum); @@ -183,7 +183,7 @@ struct InnerMostDimPreserver { template struct InnerMostDimPreserver { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { - EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) { const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; InnerMostDimPreserver::reduce(self, input, reducer, accum); @@ -248,16 +248,12 @@ struct FullReducer { *output = reducer.finalize(reducer.initialize()); return; } -#ifdef EIGEN_USE_COST_MODEL const TensorOpCost cost = self.m_impl.costPerCoeff(Vectorizable) + TensorOpCost(0, 0, internal::functor_traits::Cost, Vectorizable, PacketSize); const int num_threads = TensorCostModel::numThreads( num_coeffs, cost, device.numThreads()); -#else - const int num_threads = device.numThreads(); -#endif if (num_threads == 1) { *output = InnerMostDimReducer::reduce(self, 0, num_coeffs, reducer); @@ -268,7 +264,7 @@ struct FullReducer { const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; eigen_assert(num_coeffs >= numblocks * blocksize); - Barrier barrier(numblocks); + Barrier barrier(internal::convert_index(numblocks)); MaxSizeVector shards(numblocks, reducer.initialize()); for (Index i = 0; i < numblocks; ++i) { device.enqueue_with_barrier(&barrier, &FullReducerShard::run, @@ -320,7 +316,18 @@ struct OuterReducer { #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template -__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); +__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); + + +#ifdef EIGEN_HAS_CUDA_FP16 +template +__global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); +template +__global__ void FullReductionKernelHalfFloat(R, const S, I, half*, half2*); +template +__global__ void InnerReductionKernelHalfFloat(R, const S, I, I, half*); + +#endif template __global__ void InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); @@ -396,7 +403,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) { - EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), YOU_MADE_A_PROGRAMMING_MISTAKE); @@ -464,22 +471,14 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - static bool size_large_enough(Index total_size) { -#ifndef EIGEN_USE_COST_MODEL - return total_size > 1024 * 1024; -#else - return true || total_size; -#endif - } - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool evalSubExprsIfNeeded(CoeffReturnType* data) { m_impl.evalSubExprsIfNeeded(NULL); // Use the FullReducer if possible. - if (RunningFullReduction && internal::FullReducer::HasOptimizedImplementation && + if (RunningFullReduction && + internal::FullReducer::HasOptimizedImplementation && ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || - (!RunningOnGPU && size_large_enough(internal::array_prod(m_impl.dimensions()))))) { - + !RunningOnGPU)) { bool need_assign = false; if (!data) { m_result = static_cast(m_device.allocate(sizeof(CoeffReturnType))); @@ -493,7 +492,7 @@ struct TensorEvaluator, Device> } // Attempt to use an optimized reduction. - else if (RunningOnGPU && data && (m_device.majorDeviceVersion() >= 3)) { + else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) { bool reducing_inner_dims = true; for (int i = 0; i < NumReducedDims; ++i) { if (static_cast(Layout) == static_cast(ColMajor)) { @@ -506,8 +505,25 @@ struct TensorEvaluator, Device> (reducing_inner_dims || ReducingInnerMostDims)) { const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { + data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + else { + return true; + } + } Op reducer(m_reducer); - return internal::InnerReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve); + if (internal::InnerReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { + if (m_result) { + m_device.deallocate(m_result); + m_result = NULL; + } + return true; + } else { + return (m_result != NULL); + } } bool preserving_inner_dims = true; @@ -522,8 +538,25 @@ struct TensorEvaluator, Device> preserving_inner_dims) { const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { + data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + else { + return true; + } + } Op reducer(m_reducer); - return internal::OuterReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve); + if (internal::OuterReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { + if (m_result) { + m_device.deallocate(m_result); + m_result = NULL; + } + return true; + } else { + return (m_result != NULL); + } } } return true; @@ -533,13 +566,14 @@ struct TensorEvaluator, Device> m_impl.cleanup(); if (m_result) { m_device.deallocate(m_result); + m_result = NULL; } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - if (RunningFullReduction && m_result) { - return *m_result; + if ((RunningFullReduction || RunningOnGPU) && m_result) { + return *(m_result + index); } Op reducer(m_reducer); if (ReducingInnerMostDims || RunningFullReduction) { @@ -558,8 +592,12 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + PacketSize - 1 < dimensions().TotalSize()); + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions()))); + + if (RunningOnGPU && m_result) { + return internal::pload(m_result + index); + } EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; if (ReducingInnerMostDims) { @@ -617,11 +655,19 @@ struct TensorEvaluator, Device> template friend struct internal::FullReducerShard; #endif #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); + template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); +#ifdef EIGEN_HAS_CUDA_FP16 + template friend void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); + template friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); + template friend void internal::InnerReductionKernelHalfFloat(R, const S, I, I, half*); +#endif template friend void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); + template friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); #endif + template friend struct internal::InnerReducer; + // Returns the Index in the input tensor of the first value that needs to be // used to compute the reduction at output index "index". EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index fd2587dd5..65638b6a8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -67,8 +67,41 @@ __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) #endif } -template -__device__ inline void atomicReduce(T* output, T accum, SumReducer&) { +// We extend atomicExch to support extra data types +template +__device__ inline Type atomicExchCustom(Type* address, Type val) { + return atomicExch(address, val); +} + +template <> +__device__ inline double atomicExchCustom(double* address, double val) { + unsigned long long int* address_as_ull = reinterpret_cast(address); + return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val))); +} + +#ifdef EIGEN_HAS_CUDA_FP16 +template