diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 59bb7165e..b929e842f 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -35,13 +35,13 @@ * It is a mixed-radix Fast Fourier Transform based up on the principle, "Keep It Simple, Stupid." * Notice that:kissfft fails to handle "atypically-sized" inputs(i.e., sizes with large factors),a workaround is using fftw or pocketfft. * - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size. - * - MKL (http://en.wikipedia.org/wiki/Math_Kernel_Library) : fastest, commercial -- may be incompatible with Eigen in GPL form. + * - MKL (https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html) : fastest, free -- may be incompatible with Eigen in GPL form. * - pocketfft (https://gitlab.mpcdf.mpg.de/mtr/pocketfft) : faster than kissfft, BSD 3-clause. * It is a heavily modified implementation of FFTPack, with the following advantages: * 1.strictly C++11 compliant * 2.more accurate twiddle factor computation * 3.very fast plan generation - * 4.worst case complexity for transform sizes with large prime factors is N*log(N), because Bluestein's algorithm is used for these cases. + * 4.worst case complexity for transform sizes with large prime factors is N*log(N), because Bluestein's algorithm is used for these cases * * \section FFTDesign Design * @@ -88,11 +88,10 @@ template struct default_fft_impl : public internal::fftw_impl {}; } #elif defined EIGEN_MKL_DEFAULT -// TODO -// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form +// intel Math Kernel Library: fastest, free -- may be incompatible with Eigen in GPL form # include "src/FFT/ei_imklfft_impl.h" namespace Eigen { - template struct default_fft_impl : public internal::imklfft_impl {}; + template struct default_fft_impl : public internal::imklfft::imklfft_impl {}; } #elif defined EIGEN_POCKETFFT_DEFAULT // internal::pocketfft_impl: a heavily modified implementation of FFTPack, with many advantages. @@ -211,7 +210,7 @@ class FFT m_impl.fwd(dst,src,static_cast(nfft)); } -#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT +#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT inline void fwd2(Complex * dst, const Complex * src, int n0,int n1) { @@ -370,7 +369,7 @@ class FFT } -#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT +#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT inline void inv2(Complex * dst, const Complex * src, int n0,int n1) { diff --git a/unsupported/Eigen/src/FFT/ei_imklfft_impl.h b/unsupported/Eigen/src/FFT/ei_imklfft_impl.h new file mode 100644 index 000000000..186a66c89 --- /dev/null +++ b/unsupported/Eigen/src/FFT/ei_imklfft_impl.h @@ -0,0 +1,288 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#include "./InternalHeaderCheck.h" + +#include + +namespace Eigen { +namespace internal { +namespace imklfft { + +#define RUN_OR_ASSERT(EXPR, ERROR_MSG) \ + { \ + MKL_LONG status = (EXPR); \ + eigen_assert(status == DFTI_NO_ERROR && (ERROR_MSG)); \ + }; + +inline MKL_Complex16* complex_cast(const std::complex* p) { + return const_cast(reinterpret_cast(p)); +} + +inline MKL_Complex8* complex_cast(const std::complex* p) { + return const_cast(reinterpret_cast(p)); +} + +/* + * Parameters: + * precision: enum, Precision of the transform: DFTI_SINGLE or DFTI_DOUBLE. + * forward_domain: enum, Forward domain of the transform: DFTI_COMPLEX or + * DFTI_REAL. dimension: MKL_LONG Dimension of the transform. sizes: MKL_LONG if + * dimension = 1.Length of the transform for a one-dimensional transform. sizes: + * Array of type MKL_LONG otherwise. Lengths of each dimension for a + * multi-dimensional transform. + */ +inline void configure_descriptor(DFTI_DESCRIPTOR_HANDLE* handl, + enum DFTI_CONFIG_VALUE precision, + enum DFTI_CONFIG_VALUE forward_domain, + MKL_LONG dimension, MKL_LONG* sizes) { + eigen_assert(dimension == 1 || + dimension == 2 && + "Transformation dimension must be less than 3."); + + if (dimension == 1) { + RUN_OR_ASSERT(DftiCreateDescriptor(handl, precision, forward_domain, + dimension, *sizes), + "DftiCreateDescriptor failed.") + if (forward_domain == DFTI_REAL) { + // Set CCE storage + RUN_OR_ASSERT(DftiSetValue(*handl, DFTI_CONJUGATE_EVEN_STORAGE, + DFTI_COMPLEX_COMPLEX), + "DftiSetValue failed.") + } + } else { + RUN_OR_ASSERT( + DftiCreateDescriptor(handl, precision, DFTI_COMPLEX, dimension, sizes), + "DftiCreateDescriptor failed.") + } + + RUN_OR_ASSERT(DftiSetValue(*handl, DFTI_PLACEMENT, DFTI_NOT_INPLACE), + "DftiSetValue failed.") + RUN_OR_ASSERT(DftiCommitDescriptor(*handl), "DftiCommitDescriptor failed.") +} + +template +struct plan {}; + +template <> +struct plan { + typedef float scalar_type; + typedef MKL_Complex8 complex_type; + + DFTI_DESCRIPTOR_HANDLE m_plan; + + plan() : m_plan(0) {} + ~plan() { + if (m_plan) DftiFreeDescriptor(&m_plan); + }; + + enum DFTI_CONFIG_VALUE precision = DFTI_SINGLE; + + inline void forward(complex_type* dst, complex_type* src, MKL_LONG nfft) { + if (m_plan == 0) { + configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 1, &nfft); + } + RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst), + "DftiComputeForward failed.") + } + + inline void inverse(complex_type* dst, complex_type* src, MKL_LONG nfft) { + if (m_plan == 0) { + configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 1, &nfft); + } + RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst), + "DftiComputeBackward failed.") + } + + inline void forward(complex_type* dst, scalar_type* src, MKL_LONG nfft) { + if (m_plan == 0) { + configure_descriptor(&m_plan, precision, DFTI_REAL, 1, &nfft); + } + RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst), + "DftiComputeForward failed.") + } + + inline void inverse(scalar_type* dst, complex_type* src, MKL_LONG nfft) { + if (m_plan == 0) { + configure_descriptor(&m_plan, precision, DFTI_REAL, 1, &nfft); + } + RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst), + "DftiComputeBackward failed.") + } + + inline void forward2(complex_type* dst, complex_type* src, int n0, int n1) { + if (m_plan == 0) { + MKL_LONG sizes[2] = {n0, n1}; + configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 2, sizes); + } + RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst), + "DftiComputeForward failed.") + } + + inline void inverse2(complex_type* dst, complex_type* src, int n0, int n1) { + if (m_plan == 0) { + MKL_LONG sizes[2] = {n0, n1}; + configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 2, sizes); + } + RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst), + "DftiComputeBackward failed.") + } +}; + +template <> +struct plan { + typedef double scalar_type; + typedef MKL_Complex16 complex_type; + + DFTI_DESCRIPTOR_HANDLE m_plan; + + plan() : m_plan(0) {} + ~plan() { + if (m_plan) DftiFreeDescriptor(&m_plan); + }; + + enum DFTI_CONFIG_VALUE precision = DFTI_DOUBLE; + + inline void forward(complex_type* dst, complex_type* src, MKL_LONG nfft) { + if (m_plan == 0) { + configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 1, &nfft); + } + RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst), + "DftiComputeForward failed.") + } + + inline void inverse(complex_type* dst, complex_type* src, MKL_LONG nfft) { + if (m_plan == 0) { + configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 1, &nfft); + } + RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst), + "DftiComputeBackward failed.") + } + + inline void forward(complex_type* dst, scalar_type* src, MKL_LONG nfft) { + if (m_plan == 0) { + configure_descriptor(&m_plan, precision, DFTI_REAL, 1, &nfft); + } + RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst), + "DftiComputeForward failed.") + } + + inline void inverse(scalar_type* dst, complex_type* src, MKL_LONG nfft) { + if (m_plan == 0) { + configure_descriptor(&m_plan, precision, DFTI_REAL, 1, &nfft); + } + RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst), + "DftiComputeBackward failed.") + } + + inline void forward2(complex_type* dst, complex_type* src, int n0, int n1) { + if (m_plan == 0) { + MKL_LONG sizes[2] = {n0, n1}; + configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 2, sizes); + } + RUN_OR_ASSERT(DftiComputeForward(m_plan, src, dst), + "DftiComputeForward failed.") + } + + inline void inverse2(complex_type* dst, complex_type* src, int n0, int n1) { + if (m_plan == 0) { + MKL_LONG sizes[2] = {n0, n1}; + configure_descriptor(&m_plan, precision, DFTI_COMPLEX, 2, sizes); + } + RUN_OR_ASSERT(DftiComputeBackward(m_plan, src, dst), + "DftiComputeBackward failed.") + } +}; + +template +struct imklfft_impl { + typedef Scalar_ Scalar; + typedef std::complex Complex; + + inline void clear() { m_plans.clear(); } + + // complex-to-complex forward FFT + inline void fwd(Complex* dst, const Complex* src, int nfft) { + MKL_LONG size = nfft; + get_plan(nfft, dst, src) + .forward(complex_cast(dst), complex_cast(src), size); + } + + // real-to-complex forward FFT + inline void fwd(Complex* dst, const Scalar* src, int nfft) { + MKL_LONG size = nfft; + get_plan(nfft, dst, src) + .forward(complex_cast(dst), const_cast(src), nfft); + } + + // 2-d complex-to-complex + inline void fwd2(Complex* dst, const Complex* src, int n0, int n1) { + get_plan(n0, n1, dst, src) + .forward2(complex_cast(dst), complex_cast(src), n0, n1); + } + + // inverse complex-to-complex + inline void inv(Complex* dst, const Complex* src, int nfft) { + MKL_LONG size = nfft; + get_plan(nfft, dst, src) + .inverse(complex_cast(dst), complex_cast(src), nfft); + } + + // half-complex to scalar + inline void inv(Scalar* dst, const Complex* src, int nfft) { + MKL_LONG size = nfft; + get_plan(nfft, dst, src) + .inverse(const_cast(dst), complex_cast(src), nfft); + } + + // 2-d complex-to-complex + inline void inv2(Complex* dst, const Complex* src, int n0, int n1) { + get_plan(n0, n1, dst, src) + .inverse2(complex_cast(dst), complex_cast(src), n0, n1); + } + + private: + std::map> m_plans; + + inline plan& get_plan(int nfft, void* dst, + const void* src) { + int inplace = dst == src ? 1 : 0; + int aligned = ((reinterpret_cast(src) & 15) | + (reinterpret_cast(dst) & 15)) == 0 + ? 1 + : 0; + int64_t key = ((nfft << 2) | (inplace << 1) | aligned) + << 1; + + // Create element if key does not exist. + return m_plans[key]; + } + + inline plan& get_plan(int n0, int n1, void* dst, + const void* src) { + int inplace = (dst == src) ? 1 : 0; + int aligned = ((reinterpret_cast(src) & 15) | + (reinterpret_cast(dst) & 15)) == 0 + ? 1 + : 0; + int64_t key = (((((int64_t)n0) << 31) | (n1 << 2) | + (inplace << 1) | aligned) + << 1) + + 1; + + // Create element if key does not exist. + return m_plans[key]; + } +}; + +#undef RUN_OR_ASSERT + +} // namespace imklfft +} // namespace internal +} // namespace Eigen diff --git a/unsupported/test/fft_test_shared.h b/unsupported/test/fft_test_shared.h index 06cf365f6..7c7e81515 100644 --- a/unsupported/test/fft_test_shared.h +++ b/unsupported/test/fft_test_shared.h @@ -261,15 +261,14 @@ EIGEN_DECLARE_TEST(FFTW) CALL_SUBTEST( ( test_complex2d () ) ); // fail to build since Eigen limit the stack allocation size,too big here. // CALL_SUBTEST( ( test_complex2d () ) ); - #endif - - #if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT + #if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT CALL_SUBTEST( ( test_complex2d () ) ); CALL_SUBTEST( ( test_complex2d () ) ); CALL_SUBTEST( ( test_complex2d () ) ); CALL_SUBTEST( ( test_complex2d () ) ); - +#endif +#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT CALL_SUBTEST( ( test_complex2d () ) ); CALL_SUBTEST( ( test_complex2d () ) ); CALL_SUBTEST( ( test_complex2d () ) ); diff --git a/unsupported/test/mklfft.cpp b/unsupported/test/mklfft.cpp new file mode 100644 index 000000000..631dd2005 --- /dev/null +++ b/unsupported/test/mklfft.cpp @@ -0,0 +1,2 @@ +#define EIGEN_MKL_DEFAULT 1 +#include "fft_test_shared.h"