diff --git a/Eigen/Cholesky b/Eigen/Cholesky index a318ceb79..2c686f175 100644 --- a/Eigen/Cholesky +++ b/Eigen/Cholesky @@ -32,11 +32,7 @@ #include "src/Cholesky/LLT.h" #include "src/Cholesky/LDLT.h" #ifdef EIGEN_USE_LAPACKE -#ifdef EIGEN_USE_MKL -#include "mkl_lapacke.h" -#else -#include "src/misc/lapacke.h" -#endif +#include "src/misc/lapacke_helpers.h" #include "src/Cholesky/LLT_LAPACKE.h" #endif diff --git a/Eigen/LU b/Eigen/LU index 1236ceb04..b7f9a8a9d 100644 --- a/Eigen/LU +++ b/Eigen/LU @@ -28,11 +28,7 @@ #include "src/LU/FullPivLU.h" #include "src/LU/PartialPivLU.h" #ifdef EIGEN_USE_LAPACKE -#ifdef EIGEN_USE_MKL -#include "mkl_lapacke.h" -#else -#include "src/misc/lapacke.h" -#endif +#include "src/misc/lapacke_helpers.h" #include "src/LU/PartialPivLU_LAPACKE.h" #endif #include "src/LU/Determinant.h" diff --git a/Eigen/QR b/Eigen/QR index 8465b62ce..1f6c22e0d 100644 --- a/Eigen/QR +++ b/Eigen/QR @@ -36,11 +36,7 @@ #include "src/QR/ColPivHouseholderQR.h" #include "src/QR/CompleteOrthogonalDecomposition.h" #ifdef EIGEN_USE_LAPACKE -#ifdef EIGEN_USE_MKL -#include "mkl_lapacke.h" -#else -#include "src/misc/lapacke.h" -#endif +#include "src/misc/lapacke_helpers.h" #include "src/QR/HouseholderQR_LAPACKE.h" #include "src/QR/ColPivHouseholderQR_LAPACKE.h" #endif diff --git a/Eigen/src/Cholesky/LLT_LAPACKE.h b/Eigen/src/Cholesky/LLT_LAPACKE.h index bde9bcdd5..62bc679d4 100644 --- a/Eigen/src/Cholesky/LLT_LAPACKE.h +++ b/Eigen/src/Cholesky/LLT_LAPACKE.h @@ -39,44 +39,12 @@ namespace Eigen { namespace internal { -namespace lapacke_llt_helpers { - - // ------------------------------------------------------------------------------------------------------------------- - // Translation from Eigen to Lapacke types - // ------------------------------------------------------------------------------------------------------------------- - - // For complex numbers, the types in Eigen and Lapacke are different, but layout compatible. - template struct translate_type; - template<> struct translate_type { using type = float; }; - template<> struct translate_type { using type = double; }; - template<> struct translate_type { using type = lapack_complex_double; }; - template<> struct translate_type { using type = lapack_complex_float; }; - - // ------------------------------------------------------------------------------------------------------------------- - // Dispatch for potrf handling double, float, complex double, complex float types - // ------------------------------------------------------------------------------------------------------------------- - - inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, double* a, lapack_int lda) { - return LAPACKE_dpotrf( matrix_order, uplo, size, a, lda ); - } - - inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, float* a, lapack_int lda) { - return LAPACKE_spotrf( matrix_order, uplo, size, a, lda ); - } - - inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, lapack_complex_double* a, lapack_int lda) { - return LAPACKE_zpotrf( matrix_order, uplo, size, a, lda ); - } - - inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, lapack_complex_float* a, lapack_int lda) { - return LAPACKE_cpotrf( matrix_order, uplo, size, a, lda ); - } - +namespace lapacke_helpers { // ------------------------------------------------------------------------------------------------------------------- // Dispatch for rank update handling upper and lower parts // ------------------------------------------------------------------------------------------------------------------- - template + template struct rank_update {}; template<> @@ -100,9 +68,8 @@ namespace lapacke_llt_helpers { // Generic lapacke llt implementation that hands of to the dispatches // ------------------------------------------------------------------------------------------------------------------- - template + template struct lapacke_llt { - using BlasType = typename translate_type::type; template static Index blocked(MatrixType& m) { @@ -110,15 +77,13 @@ namespace lapacke_llt_helpers { if(m.rows() == 0) { return -1; } - /* Set up parameters for ?potrf */ - lapack_int size = convert_index(m.rows()); - lapack_int StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; - lapack_int matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; + lapack_int size = to_lapack(m.rows()); + lapack_int matrix_order = lapack_storage_of(m); Scalar* a = &(m.coeffRef(0,0)); - lapack_int lda = convert_index(m.outerStride()); + lapack_int lda = to_lapack(m.outerStride()); - lapack_int info = potrf( matrix_order, Mode == Lower ? 'L' : 'U', size, (BlasType*)a, lda ); + lapack_int info = potrf(matrix_order, translate_mode, size, to_lapack(a), lda ); info = (info==0) ? -1 : info>0 ? info-1 : size; return info; } @@ -130,7 +95,7 @@ namespace lapacke_llt_helpers { } }; } -// end namespace lapacke_llt_helpers +// end namespace lapacke_helpers /* * Here, we just put the generic implementation from lapacke_llt into a full specialization of the llt_inplace @@ -139,13 +104,13 @@ namespace lapacke_llt_helpers { */ #define EIGEN_LAPACKE_LLT(EIGTYPE) \ -template<> struct llt_inplace : public lapacke_llt_helpers::lapacke_llt {}; \ -template<> struct llt_inplace : public lapacke_llt_helpers::lapacke_llt {}; +template<> struct llt_inplace : public lapacke_helpers::lapacke_llt {}; \ +template<> struct llt_inplace : public lapacke_helpers::lapacke_llt {}; EIGEN_LAPACKE_LLT(double) EIGEN_LAPACKE_LLT(float) -EIGEN_LAPACKE_LLT(dcomplex) -EIGEN_LAPACKE_LLT(scomplex) +EIGEN_LAPACKE_LLT(std::complex) +EIGEN_LAPACKE_LLT(std::complex) #undef EIGEN_LAPACKE_LLT diff --git a/Eigen/src/LU/PartialPivLU_LAPACKE.h b/Eigen/src/LU/PartialPivLU_LAPACKE.h index 2f244f6b4..b63644239 100644 --- a/Eigen/src/LU/PartialPivLU_LAPACKE.h +++ b/Eigen/src/LU/PartialPivLU_LAPACKE.h @@ -39,44 +39,55 @@ namespace Eigen { namespace internal { -/** \internal Specialization for the data types supported by LAPACKe */ +namespace lapacke_helpers { +// ------------------------------------------------------------------------------------------------------------------- +// Generic lapacke partial lu implementation that converts arguments and dispatches to the function above +// ------------------------------------------------------------------------------------------------------------------- -#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \ -template \ -struct partial_lu_impl \ -{ \ - /* \internal performs the LU decomposition in-place of the matrix represented */ \ - static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ - { \ - EIGEN_UNUSED_VARIABLE(maxBlockSize);\ - lapack_int matrix_order, first_zero_pivot; \ - lapack_int m, n, lda, *ipiv, info; \ - EIGTYPE* a; \ -/* Set up parameters for ?getrf */ \ - matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - lda = convert_index(luStride); \ - a = lu_data; \ - ipiv = row_transpositions; \ - m = convert_index(rows); \ - n = convert_index(cols); \ - nb_transpositions = 0; \ -\ - info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \ -\ - for(int i=0;i= 0); \ -/* something should be done with nb_transpositions */ \ -\ - first_zero_pivot = info; \ - return first_zero_pivot; \ - } \ +template +struct lapacke_partial_lu { + /** \internal performs the LU decomposition in-place of the matrix represented */ + static lapack_int blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, lapack_int* row_transpositions, + lapack_int& nb_transpositions, lapack_int maxBlockSize=256) + { + EIGEN_UNUSED_VARIABLE(maxBlockSize); + // Set up parameters for getrf + lapack_int matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; + lapack_int lda = to_lapack(luStride); + Scalar* a = lu_data; + lapack_int* ipiv = row_transpositions; + lapack_int m = to_lapack(rows); + lapack_int n = to_lapack(cols); + nb_transpositions = 0; + + lapack_int info = getrf(matrix_order, m, n, to_lapack(a), lda, ipiv ); + eigen_assert(info >= 0); + + for(int i=0; i \ +struct partial_lu_impl : public lapacke_helpers::lapacke_partial_lu {}; + +EIGEN_LAPACKE_PARTIAL_LU(double) +EIGEN_LAPACKE_PARTIAL_LU(float) +EIGEN_LAPACKE_PARTIAL_LU(std::complex) +EIGEN_LAPACKE_PARTIAL_LU(std::complex) + +#undef EIGEN_LAPACKE_PARTIAL_LU } // end namespace internal diff --git a/Eigen/src/QR/HouseholderQR_LAPACKE.h b/Eigen/src/QR/HouseholderQR_LAPACKE.h index ef6776002..57c2f6ab3 100644 --- a/Eigen/src/QR/HouseholderQR_LAPACKE.h +++ b/Eigen/src/QR/HouseholderQR_LAPACKE.h @@ -40,28 +40,35 @@ namespace Eigen { namespace internal { -/** \internal Specialization for the data types supported by LAPACKe */ +namespace lapacke_helpers { -#define EIGEN_LAPACKE_QR_NOPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \ -template \ -struct householder_qr_inplace_blocked \ -{ \ - static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, \ - typename MatrixQR::Scalar* = 0) \ - { \ - lapack_int m = (lapack_int) mat.rows(); \ - lapack_int n = (lapack_int) mat.cols(); \ - lapack_int lda = (lapack_int) mat.outerStride(); \ - lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - LAPACKE_##LAPACKE_PREFIX##geqrf( matrix_order, m, n, (LAPACKE_TYPE*)mat.data(), lda, (LAPACKE_TYPE*)hCoeffs.data()); \ - hCoeffs.adjointInPlace(); \ - } \ +template +struct lapacke_hqr +{ + static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, typename MatrixQR::Scalar* = 0) + { + lapack_int m = to_lapack(mat.rows()); + lapack_int n = to_lapack(mat.cols()); + lapack_int lda = to_lapack(mat.outerStride()); + lapack_int matrix_order = lapack_storage_of(mat); + geqrf(matrix_order, m, n, to_lapack(mat.data()), lda, to_lapack(hCoeffs.data())); + hCoeffs.adjointInPlace(); + } }; -EIGEN_LAPACKE_QR_NOPIV(double, double, d) -EIGEN_LAPACKE_QR_NOPIV(float, float, s) -EIGEN_LAPACKE_QR_NOPIV(dcomplex, lapack_complex_double, z) -EIGEN_LAPACKE_QR_NOPIV(scomplex, lapack_complex_float, c) +} + +/** \internal Specialization for the data types supported by LAPACKe */ +#define EIGEN_LAPACKE_HH_QR(EIGTYPE) \ +template \ +struct householder_qr_inplace_blocked : public lapacke_helpers::lapacke_hqr {}; + +EIGEN_LAPACKE_HH_QR(double) +EIGEN_LAPACKE_HH_QR(float) +EIGEN_LAPACKE_HH_QR(std::complex) +EIGEN_LAPACKE_HH_QR(std::complex) + +#undef EIGEN_LAPACKE_HH_QR } // end namespace internal diff --git a/Eigen/src/misc/lapacke_helpers.h b/Eigen/src/misc/lapacke_helpers.h new file mode 100644 index 000000000..6fff863f8 --- /dev/null +++ b/Eigen/src/misc/lapacke_helpers.h @@ -0,0 +1,159 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2021 Erik Schultheis +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LAPACKE_HELPERS_H +#define EIGEN_LAPACKE_HELPERS_H + +#include "./InternalHeaderCheck.h" + +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else +#include "lapacke.h" +#endif + +namespace Eigen { +namespace internal { +/** + * \internal + * \brief Implementation details and helper functions for the lapacke glue code. + */ +namespace lapacke_helpers { + +// --------------------------------------------------------------------------------------------------------------------- +// Translation from Eigen to Lapacke for types and constants +// --------------------------------------------------------------------------------------------------------------------- + +// For complex numbers, the types in Eigen and Lapacke are different, but layout compatible. +template +struct translate_type_imp; +template<> +struct translate_type_imp { + using type = float; +}; +template<> +struct translate_type_imp { + using type = double; +}; +template<> +struct translate_type_imp> { + using type = lapack_complex_double; +}; +template<> +struct translate_type_imp> { + using type = lapack_complex_float; +}; + +/// Given an Eigen types, this is defined to be the corresponding, layout-compatible lapack type +template +using translated_type = typename translate_type_imp::type; + +/// These functions convert their arguments from Eigen to Lapack types +/// This function performs conversion for any of the translations defined above. +template> +auto to_lapack(Source value) { return static_cast(value); } + +/// This function performs conversions for pointer types corresponding to the translations abovce. +/// This is valid because the translations are between layout-compatible types. +template> +auto to_lapack(Source *value) { return reinterpret_cast(value); } + +/// This function converts the Eigen Index to a lapack index, with possible range checks +/// \sa internal::convert_index +lapack_int to_lapack(Index index) { + return convert_index(index); +} + +/// translates storage order of the given Eigen object to the corresponding lapack constant +template +EIGEN_CONSTEXPR lapack_int lapack_storage_of(const EigenBase &) { + return Derived::IsRowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; +} + +/// translate UpLo type to the corresponding letter code +template char translate_mode; +template<> EIGEN_CONSTEXPR const char translate_mode = 'L'; +template<> EIGEN_CONSTEXPR const char translate_mode = 'U'; + + +// --------------------------------------------------------------------------------------------------------------------- +// Automatic generation of low-level wrappers +// --------------------------------------------------------------------------------------------------------------------- + +/*! + * \internal + * \brief Helper type to facilitate the wrapping of raw LAPACKE functions for different types into a single, overloaded C++ function. + * This is achieved in combination with \r EIGEN_MAKE_LAPACKE_WRAPPER + * \details This implementation works by providing an overloaded call function that just forwards its arguments to the + * underlying lapack function. Each of these overloads is enabled only if the call is actually well formed. + * Because these lapack functions take pointers to the underlying scalar type as arguments, even though the actual Scalars + * would be implicitly convertible, the pointers are not and therefore only a single overload can be valid at the same time. + * Thus, despite all functions taking fully generic `Args&&... args` as arguments, there is never any ambiguity. + */ +template +struct WrappingHelper { + // The naming of double, single, double complex and single complex is purely for readability + // and doesn't actually affect the workings of this class. In principle, the arguments can + // be supplied in any permuted order. + DoubleFn double_; SingleFn single_; DoubleCpxFn double_cpx_; SingleCpxFn single_cpx_; + + template + auto call(Args&&... args) -> decltype(double_(std::forward(args)...)) { + return double_(std::forward(args)...); + } + + template + auto call(Args&&... args) -> decltype(single_(std::forward(args)...)){ + return single_(std::forward(args)...); + } + + template + auto call(Args&&... args) -> decltype(double_cpx_(std::forward(args)...)){ + return double_cpx_(std::forward(args)...); + } + + template + auto call(Args&&... args) -> decltype(single_cpx_(std::forward(args)...)){ + return single_cpx_(std::forward(args)...); + } +}; + +/** \internal Helper function that generates a `WrappingHelper` object with the given function pointers and + * invokes its `call` method, thus selecting one of the overloads. + * \sa EIGEN_MAKE_LAPACKE_WRAPPER + */ +template +auto call_wrapper(DoubleFn df, SingleFn sf, DoubleCpxFn dcf, SingleCpxFn scf, Args&&... args) { + WrappingHelper helper{df, sf, dcf, scf}; + return helper.call(std::forward(args)...); +} + +/** + * \internal + * Generates a new function `Function` that dispatches to the corresponding LAPACKE_? prefixed functions. + * \sa WrappingHelper + */ +#define EIGEN_MAKE_LAPACKE_WRAPPER(FUNCTION) \ +template \ +auto FUNCTION(Args&&... args) { return call_wrapper(LAPACKE_d##FUNCTION, LAPACKE_s##FUNCTION, LAPACKE_z##FUNCTION, LAPACKE_c##FUNCTION, std::forward(args)...); } + +// Now with this macro and the helper wrappers, we can generate the dispatch for all the lapacke functions that are +// used in Eigen. +// We define these here instead of in the files where they are used because this allows us to #undef the macro again +// right here +EIGEN_MAKE_LAPACKE_WRAPPER(potrf) +EIGEN_MAKE_LAPACKE_WRAPPER(getrf) +EIGEN_MAKE_LAPACKE_WRAPPER(geqrf) + +#undef EIGEN_MAKE_LAPACKE_WRAPPER +} +} +} + +#endif // EIGEN_LAPACKE_HELPERS_H \ No newline at end of file