From 92ca9fc032ecaa7f9595f5c5f7d9d8df6c972bbe Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:21:04 -0400 Subject: [PATCH 01/18] initial pass of FFT module -- includes complex 1-d case only --- unsupported/Eigen/FFT.h | 84 +++++ unsupported/Eigen/src/FFT/simple_fft_traits.h | 296 ++++++++++++++++++ unsupported/test/CMakeLists.txt | 1 + unsupported/test/FFT.cpp | 85 +++++ 4 files changed, 466 insertions(+) create mode 100644 unsupported/Eigen/FFT.h create mode 100644 unsupported/Eigen/src/FFT/simple_fft_traits.h create mode 100644 unsupported/test/FFT.cpp diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h new file mode 100644 index 000000000..03490d2c5 --- /dev/null +++ b/unsupported/Eigen/FFT.h @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_FFT_H +#define EIGEN_FFT_H + +// simple_fft_traits: small, free, reasonably efficient default, derived from kissfft +#include "src/FFT/simple_fft_traits.h" +#define DEFAULT_FFT_TRAITS simple_fft_traits + +// FFTW: faster, GPL-not LGPL, bigger code size +#ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines +// TODO +// #include "src/FFT/fftw_traits.h" +// #define DEFAULT_FFT_TRAITS fftw_traits +#endif + +// intel Math Kernel Library: fastest, commerical +#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines +// TODO +// #include "src/FFT/imkl_traits.h" +// #define DEFAULT_FFT_TRAITS imkl_traits +#endif + +namespace Eigen { + +template + > +class FFT +{ + public: + typedef _Traits traits_type; + typedef typename traits_type::Scalar Scalar; + typedef typename traits_type::Complex Complex; + + FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } + + void fwd( Complex * dst, const Complex * src, int nfft) + { + m_traits.prepare(nfft,false,dst,src); + m_traits.exec(dst,src); + m_traits.postprocess(dst); + } + + void inv( Complex * dst, const Complex * src, int nfft) + { + m_traits.prepare(nfft,true,dst,src); + m_traits.exec(dst,src); + m_traits.postprocess(dst); + } + + // TODO: fwd,inv for Scalar + // TODO: multi-dimensional FFTs + // TODO: handle Eigen MatrixBase + + traits_type & traits() {return m_traits;} + private: + traits_type m_traits; +}; +#undef DEFAULT_FFT_TRAITS +} +#endif diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h new file mode 100644 index 000000000..fe9d24b84 --- /dev/null +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -0,0 +1,296 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include +#include + +namespace Eigen { + + template + struct simple_fft_traits + { + typedef _Scalar Scalar; + typedef std::complex Complex; + simple_fft_traits() : m_nfft(0) {} + + void prepare(int nfft,bool inverse,Complex * dst,const Complex *src) + { + if (m_nfft == nfft) { + // reuse the twiddles, conjugate if necessary + if (inverse != m_inverse) + for (int i=0;in) + p=n;// no more factors + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + void exec(Complex * dst, const Complex * src) + { + work(0, dst, src, 1,1); + } + + void postprocess(Complex *dst) + { + if (m_inverse) { + Scalar scale = 1./m_nfft; + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + + scratch[0] *= epi3.imag(); + + *Fout += scratch[3]; + + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } + + int m_nfft; + bool m_inverse; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + }; +} diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index abfbb0185..49de7dfb2 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -19,3 +19,4 @@ ei_add_test(autodiff) ei_add_test(BVH) ei_add_test(matrixExponential) ei_add_test(alignedvector3) +ei_add_test(FFT) diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp new file mode 100644 index 000000000..b13a7810f --- /dev/null +++ b/unsupported/test/FFT.cpp @@ -0,0 +1,85 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" +#include + +//#include +//#include +//#include + +using namespace std; + +template +void test_fft(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + + //cout << "type:" << typeid(T).name() << " nfft:" << nfft; + + FFT fft; + + vector inbuf(nfft); + vector buf3(nfft); + vector outbuf(nfft); + for (int k=0;k acc = 0; + long double phinc = 2*k0* M_PIl / nfft; + for (int k1=0;k1 x(inbuf[k1].real(),inbuf[k1].imag()); + acc += x * exp( complex(0,-k1*phinc) ); + } + totalpower += norm(acc); + complex x(outbuf[k0].real(),outbuf[k0].imag()); + complex dif = acc - x; + difpower += norm(dif); + } + long double rmse = sqrt(difpower/totalpower); + VERIFY( rmse < 1e-5 );// gross check + + totalpower=0; + difpower=0; + for (int k=0;k(32) )); CALL_SUBTEST(( test_fft(32) )); CALL_SUBTEST(( test_fft(32) )); + CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); + CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); +} From be1b2ad4ec1851aa990dc97337fc74e16edd1dd0 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:26:19 -0400 Subject: [PATCH 02/18] removed unused code --- unsupported/test/FFT.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index b13a7810f..8347bb76b 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -25,10 +25,6 @@ #include "main.h" #include -//#include -//#include -//#include - using namespace std; template @@ -36,8 +32,6 @@ void test_fft(int nfft) { typedef typename Eigen::FFT::Complex Complex; - //cout << "type:" << typeid(T).name() << " nfft:" << nfft; - FFT fft; vector inbuf(nfft); From 68cad98bc935e53102a9432560085b81c5766743 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:34:38 -0400 Subject: [PATCH 03/18] indent level change --- unsupported/Eigen/src/FFT/simple_fft_traits.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index fe9d24b84..6fbbeac2e 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -80,13 +80,15 @@ namespace Eigen { void postprocess(Complex *dst) { - if (m_inverse) { - Scalar scale = 1./m_nfft; - for (int k=0;k Date: Fri, 22 May 2009 22:37:59 -0400 Subject: [PATCH 04/18] added non-optimized real forward fft (no inverse yet) --- unsupported/Eigen/FFT.h | 27 ++++- unsupported/Eigen/src/FFT/simple_fft_traits.h | 10 +- unsupported/test/FFT.cpp | 113 ++++++++++++------ 3 files changed, 106 insertions(+), 44 deletions(-) diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h index 03490d2c5..a1f87a609 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT.h @@ -57,21 +57,36 @@ class FFT FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } - void fwd( Complex * dst, const Complex * src, int nfft) + template + void fwd( Complex * dst, const _Input * src, int nfft) { m_traits.prepare(nfft,false,dst,src); m_traits.exec(dst,src); m_traits.postprocess(dst); } - void inv( Complex * dst, const Complex * src, int nfft) + template + void fwd( std::vector & dst, const std::vector<_Input> & src) { - m_traits.prepare(nfft,true,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + dst.resize( src.size() ); + fwd( &dst[0],&src[0],src.size() ); + } + + template + void inv( _Output * dst, const Complex * src, int nfft) + { + m_traits.prepare(nfft,true,dst,src); + m_traits.exec(dst,src); + m_traits.postprocess(dst); + } + + template + void inv( std::vector<_Output> & dst, const std::vector & src) + { + dst.resize( src.size() ); + inv( &dst[0],&src[0],src.size() ); } - // TODO: fwd,inv for Scalar // TODO: multi-dimensional FFTs // TODO: handle Eigen MatrixBase diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 6fbbeac2e..5a910dd1f 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -34,7 +34,8 @@ namespace Eigen { typedef std::complex Complex; simple_fft_traits() : m_nfft(0) {} - void prepare(int nfft,bool inverse,Complex * dst,const Complex *src) + template + void prepare(int nfft,bool inverse,Complex * dst,const _Src *src) { if (m_nfft == nfft) { // reuse the twiddles, conjugate if necessary @@ -73,7 +74,8 @@ namespace Eigen { }while(n>1); } - void exec(Complex * dst, const Complex * src) + template + void exec(Complex * dst, const _Src * src) { work(0, dst, src, 1,1); } @@ -89,7 +91,9 @@ namespace Eigen { private: - void work( int stage,Complex * Fout, const Complex * f, size_t fstride,size_t in_stride) + + template + void work( int stage,Complex * Fout, const _Src * f, size_t fstride,size_t in_stride) { int p = m_stageRadix[stage]; int m = m_stageRemainder[stage]; diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 8347bb76b..ef03359e2 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -25,55 +25,98 @@ #include "main.h" #include + using namespace std; +template < typename T> +complex promote(complex x) { return complex(x.real(),x.imag()); } + +complex promote(float x) { return complex( x); } +complex promote(double x) { return complex( x); } +complex promote(long double x) { return complex( x); } + + + template + long double fft_rmse( const vector & fftbuf,const vector & timebuf) + { + long double totalpower=0; + long double difpower=0; + for (size_t k0=0;k0 acc = 0; + long double phinc = -2.*k0* M_PIl / timebuf.size(); + for (size_t k1=0;k1(0,k1*phinc) ); + } + totalpower += norm(acc); + complex x = promote(fftbuf[k0]); + complex dif = acc - x; + difpower += norm(dif); + cerr << k0 << ":" << acc << " " << x << endl; + } + cerr << "rmse:" << sqrt(difpower/totalpower) << endl; + return sqrt(difpower/totalpower); + } + + template + long double dif_rmse( const vector buf1,const vector buf2) + { + long double totalpower=0; + long double difpower=0; + size_t n = min( buf1.size(),buf2.size() ); + for (size_t k=0;k -void test_fft(int nfft) +void test_scalar(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + typedef typename Eigen::FFT::Scalar Scalar; + + FFT fft; + vector inbuf(nfft); + vector outbuf; + for (int k=0;k +void test_complex(int nfft) { typedef typename Eigen::FFT::Complex Complex; FFT fft; vector inbuf(nfft); - vector buf3(nfft); - vector outbuf(nfft); + vector outbuf; + vector buf3; for (int k=0;k acc = 0; - long double phinc = 2*k0* M_PIl / nfft; - for (int k1=0;k1 x(inbuf[k1].real(),inbuf[k1].imag()); - acc += x * exp( complex(0,-k1*phinc) ); - } - totalpower += norm(acc); - complex x(outbuf[k0].real(),outbuf[k0].imag()); - complex dif = acc - x; - difpower += norm(dif); - } - long double rmse = sqrt(difpower/totalpower); - VERIFY( rmse < 1e-5 );// gross check + VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check - totalpower=0; - difpower=0; - for (int k=0;k(32) )); CALL_SUBTEST(( test_fft(32) )); CALL_SUBTEST(( test_fft(32) )); - CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); - CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); + CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); + CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); + + CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); } From 9c0fcd0f6213143216710a5b215aa2bb4a857ce5 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 10:09:48 -0400 Subject: [PATCH 05/18] started real optimization, added benchmark for FFT --- bench/benchFFT.cpp | 64 +++++++++ unsupported/Eigen/FFT.h | 8 +- unsupported/Eigen/src/FFT/simple_fft_traits.h | 125 +++++++++++++----- unsupported/test/FFT.cpp | 3 +- 4 files changed, 157 insertions(+), 43 deletions(-) create mode 100644 bench/benchFFT.cpp diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp new file mode 100644 index 000000000..041576b75 --- /dev/null +++ b/bench/benchFFT.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include +#include +#include +#include +#include + +using namespace Eigen; +using namespace std; + +#ifndef NFFT +#define NFFT 1024 +#endif + +#ifndef TYPE +#define TYPE float +#endif + +#ifndef NITS +#define NITS (10000000/NFFT) +#endif + +int main() +{ + vector > inbuf(NFFT); + vector > outbuf(NFFT); + Eigen::FFT fft; + + fft.fwd( outbuf , inbuf); + + BenchTimer timer; + timer.reset(); + for (int k=0;k<8;++k) { + timer.start(); + for(int i = 0; i < NITS; i++) + fft.fwd( outbuf , inbuf); + timer.stop(); + } + double mflops = 5.*NFFT*log2((double)NFFT) / (1e6 * timer.value() / (double)NITS ); + cout << "NFFT=" << NFFT << " " << (double(1e-6*NFFT*NITS)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; +} diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h index a1f87a609..c466423b7 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT.h @@ -60,9 +60,7 @@ class FFT template void fwd( Complex * dst, const _Input * src, int nfft) { - m_traits.prepare(nfft,false,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + m_traits.fwd(dst,src,nfft); } template @@ -75,9 +73,7 @@ class FFT template void inv( _Output * dst, const Complex * src, int nfft) { - m_traits.prepare(nfft,true,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + m_traits.inv( dst,src,nfft ); } template diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 5a910dd1f..33433ae03 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -35,7 +35,73 @@ namespace Eigen { simple_fft_traits() : m_nfft(0) {} template - void prepare(int nfft,bool inverse,Complex * dst,const _Src *src) + void fwd( Complex * dst,const _Src *src,int nfft) + { + prepare(nfft,false); + work(0, dst, src, 1,1); + scale(dst); + } + + void fwd( Complex * dst,const Scalar * src,int nfft) + { + if ( nfft&1 ) { + // use generic mode for odd + prepare(nfft,false); + work(0, dst, src, 1,1); + scale(dst); + }else{ + int ncfft = nfft>>1; + // use optimized mode for even real + prepare(nfft,false); + work(0,dst, reinterpret_cast (src),2,1); + Complex dc = dst[0].real() + dst[0].imag(); + Complex nyquist = dst[0].real() - dst[0].imag(); + + int k; + for ( k=1;k <= ncfft/2 ; ++k ) { +/** + fpk = st->tmpbuf[k]; + fpnk.r = st->tmpbuf[ncfft-k].r; + fpnk.i = - st->tmpbuf[ncfft-k].i; + + C_ADD( f1k, fpk , fpnk ); + C_SUB( f2k, fpk , fpnk ); + C_MUL( tw , f2k , st->super_twiddles[k-1]); + + freqdata[k].r = HALF_OF(f1k.r + tw.r); + freqdata[k].i = HALF_OF(f1k.i + tw.i); + freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); + freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); + */ + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + + Complex f1k = fpk + fpnk; + Complex f2k = fpnk - fpk; + //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double)k / ncfft + 1) ) ); // TODO repalce this with index into twiddles + Complex tw = f2k * m_twiddles[2*k];; + + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + } + // place conjugate-symmetric half at the end for completeness + // TODO: make this configurable ( opt-out ) + for ( k=1;k < ncfft ; ++k ) + dst[nfft-k] = conj(dst[k]); + + dst[0] = dc; + dst[ncfft] = nyquist; + } + } + + void inv(Complex * dst,const Complex *src,int nfft) + { + prepare(nfft,true); + work(0, dst, src, 1,1); + scale(dst); + } + + void prepare(int nfft,bool inverse) { if (m_nfft == nfft) { // reuse the twiddles, conjugate if necessary @@ -74,57 +140,49 @@ namespace Eigen { }while(n>1); } - template - void exec(Complex * dst, const _Src * src) - { - work(0, dst, src, 1,1); - } - - void postprocess(Complex *dst) + void scale(Complex *dst) { if (m_inverse) { - Scalar scale = 1./m_nfft; + Scalar s = 1./m_nfft; for (int k=0;k - void work( int stage,Complex * Fout, const _Src * f, size_t fstride,size_t in_stride) + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) { int p = m_stageRadix[stage]; int m = m_stageRemainder[stage]; - Complex * Fout_beg = Fout; - Complex * Fout_end = Fout + p*m; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; - if (m==1) { - do{ - *Fout = *f; - f += fstride*in_stride; - }while(++Fout != Fout_end ); - }else{ + if (m>1) { do{ // recursive call: // DFT of size m*p performed by doing // p instances of smaller DFTs of size m, // each one takes a decimated version of the input - work(stage+1, Fout , f, fstride*p,in_stride); - f += fstride*in_stride; - }while( (Fout += m) != Fout_end ); + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); } - - Fout=Fout_beg; + xout=Fout_beg; // recombine the p smaller DFTs switch (p) { - case 2: bfly2(Fout,fstride,m); break; - case 3: bfly3(Fout,fstride,m); break; - case 4: bfly4(Fout,fstride,m); break; - case 5: bfly5(Fout,fstride,m); break; - default: bfly_generic(Fout,fstride,m,p); break; + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; } } @@ -139,7 +197,7 @@ namespace Eigen { void bfly4( Complex * Fout, const size_t fstride, const size_t m) { - Complex scratch[7]; + Complex scratch[6]; int negative_if_inverse = m_inverse * -2 +1; for (size_t k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); ++Fout; }while(--k); diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index ef03359e2..13e98ba77 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -115,8 +115,9 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); - +/* CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); + */ } From 304798817268706463f3ff645c8c8b2c887c090a Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 12:50:07 -0400 Subject: [PATCH 06/18] scalar forward FFT optimized for even size, converts to cpx for odd --- unsupported/Eigen/src/FFT/simple_fft_traits.h | 156 ++++++++++-------- unsupported/test/FFT.cpp | 10 +- 2 files changed, 97 insertions(+), 69 deletions(-) diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 33433ae03..f7dd2b9cf 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -24,6 +24,7 @@ #include #include +#include namespace Eigen { @@ -39,51 +40,54 @@ namespace Eigen { { prepare(nfft,false); work(0, dst, src, 1,1); - scale(dst); } + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half void fwd( Complex * dst,const Scalar * src,int nfft) { if ( nfft&1 ) { // use generic mode for odd prepare(nfft,false); work(0, dst, src, 1,1); - scale(dst); }else{ int ncfft = nfft>>1; // use optimized mode for even real - prepare(nfft,false); - work(0,dst, reinterpret_cast (src),2,1); + fwd( dst, reinterpret_cast (src),ncfft); + make_real_twiddles(nfft); + std::cerr << "dst[0] = " << dst[0] << "\n"; Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft/2 ; ++k ) { -/** - fpk = st->tmpbuf[k]; - fpnk.r = st->tmpbuf[ncfft-k].r; - fpnk.i = - st->tmpbuf[ncfft-k].i; - - C_ADD( f1k, fpk , fpnk ); - C_SUB( f2k, fpk , fpnk ); - C_MUL( tw , f2k , st->super_twiddles[k-1]); - - freqdata[k].r = HALF_OF(f1k.r + tw.r); - freqdata[k].i = HALF_OF(f1k.i + tw.i); - freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); - freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); - */ +#if 0 + using namespace std; + cerr << "desired:\n"; + for ( k=1;k <= (ncfft>>1) ; ++k ) { + Complex x = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + cerr << k << " " << x << "angle(x):" << arg(x) << "\n"; + } + dc=0; + cerr << "twiddles:\n"; + for (k=0;k>1) ; ++k ) { Complex fpk = dst[k]; Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpnk - fpk; - //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double)k / ncfft + 1) ) ); // TODO repalce this with index into twiddles - Complex tw = f2k * m_twiddles[2*k];; - + Complex f2k = fpk - fpnk; + //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + Complex tw= f2k * m_realTwiddles[k-1]; + dst[k] = (f1k + tw) * Scalar(.5); dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); } + + // place conjugate-symmetric half at the end for completeness // TODO: make this configurable ( opt-out ) for ( k=1;k < ncfft ; ++k ) @@ -98,55 +102,74 @@ namespace Eigen { { prepare(nfft,true); work(0, dst, src, 1,1); - scale(dst); + scale(dst, Scalar(1)/m_nfft ); } void prepare(int nfft,bool inverse) { - if (m_nfft == nfft) { - // reuse the twiddles, conjugate if necessary - if (inverse != m_inverse) - for (int i=0;in) - p=n;// no more factors - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); + make_twiddles(nfft,inverse); + factorize(nfft); } - void scale(Complex *dst) + void make_real_twiddles(int nfft) + { + int ncfft2 = nfft>>2; + if ( m_realTwiddles.size() != ncfft2) { + m_realTwiddles.resize(ncfft2); + int ncfft= nfft>>1; + for (int k=1;k<=ncfft2;++k) + m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + } + } + + void make_twiddles(int nfft,bool inverse) + { + if ( m_twiddles.size() == nfft) { + // reuse the twiddles, conjugate if necessary + if (inverse != m_inverse) + for (int i=0;in) + p=n;// no more factors + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + m_nfft = nfft; + } + + void scale(Complex *dst,Scalar s) { - if (m_inverse) { - Scalar s = 1./m_nfft; for (int k=0;k m_twiddles; + std::vector m_realTwiddles; std::vector m_stageRadix; std::vector m_stageRemainder; }; diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 13e98ba77..41c7fed7b 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -41,6 +41,7 @@ complex promote(long double x) { return complex( x); { long double totalpower=0; long double difpower=0; + cerr <<"idx\ttruth\t\tvalue\n"; for (size_t k0=0;k0 acc = 0; long double phinc = -2.*k0* M_PIl / timebuf.size(); @@ -51,7 +52,7 @@ complex promote(long double x) { return complex( x); complex x = promote(fftbuf[k0]); complex dif = acc - x; difpower += norm(dif); - cerr << k0 << ":" << acc << " " << x << endl; + cerr << k0 << "\t" << acc << "\t" << x << endl; } cerr << "rmse:" << sqrt(difpower/totalpower) << endl; return sqrt(difpower/totalpower); @@ -108,6 +109,7 @@ void test_complex(int nfft) void test_FFT() { +#if 0 CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); @@ -115,9 +117,11 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); -/* +#endif + +#if 1 CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); - */ +#endif } From 326ea773908c2d7e46101085af8f72d20b3f8cbc Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 22:50:07 -0400 Subject: [PATCH 07/18] added FFT inverse complex-to-scalar interface (not yet optimized) --- bench/benchFFT.cpp | 73 ++++++++++++++----- unsupported/Eigen/src/FFT/simple_fft_traits.h | 31 ++++---- unsupported/test/FFT.cpp | 21 ++++-- 3 files changed, 81 insertions(+), 44 deletions(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 041576b75..84cc49fe3 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -31,34 +31,67 @@ using namespace Eigen; using namespace std; -#ifndef NFFT -#define NFFT 1024 -#endif + +template +string nameof(); + +template <> string nameof() {return "float";} +template <> string nameof() {return "double";} +template <> string nameof() {return "long double";} #ifndef TYPE #define TYPE float #endif -#ifndef NITS -#define NITS (10000000/NFFT) +#ifndef NFFT +#define NFFT 1024 +#endif +#ifndef NDATA +#define NDATA 1000000 #endif -int main() +using namespace Eigen; + +template +void bench(int nfft) { - vector > inbuf(NFFT); - vector > outbuf(NFFT); - Eigen::FFT fft; + typedef typename NumTraits::Real Scalar; + typedef typename std::complex Complex; + int nits = NDATA/nfft; + vector inbuf(nfft); + vector outbuf(nfft); + FFT< Scalar > fft; - fft.fwd( outbuf , inbuf); + fft.fwd( outbuf , inbuf); - BenchTimer timer; - timer.reset(); - for (int k=0;k<8;++k) { - timer.start(); - for(int i = 0; i < NITS; i++) - fft.fwd( outbuf , inbuf); - timer.stop(); - } - double mflops = 5.*NFFT*log2((double)NFFT) / (1e6 * timer.value() / (double)NITS ); - cout << "NFFT=" << NFFT << " " << (double(1e-6*NFFT*NITS)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; + BenchTimer timer; + timer.reset(); + for (int k=0;k<8;++k) { + timer.start(); + for(int i = 0; i < nits; i++) + fft.fwd( outbuf , inbuf); + timer.stop(); + } + + cout << nameof() << " "; + double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits ); + if ( NumTraits::IsComplex ) { + cout << "complex"; + }else{ + cout << "real "; + mflops /= 2; + } + + cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; +} + +int main(int argc,char ** argv) +{ + bench >(NFFT); + bench(NFFT); + bench >(NFFT); + bench(NFFT); + bench >(NFFT); + bench(NFFT); + return 0; } diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index f7dd2b9cf..1e2be8f79 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -54,28 +54,14 @@ namespace Eigen { work(0, dst, src, 1,1); }else{ int ncfft = nfft>>1; + int ncfft2 = nfft>>2; // use optimized mode for even real fwd( dst, reinterpret_cast (src),ncfft); make_real_twiddles(nfft); - std::cerr << "dst[0] = " << dst[0] << "\n"; Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); int k; -#if 0 - using namespace std; - cerr << "desired:\n"; - for ( k=1;k <= (ncfft>>1) ; ++k ) { - Complex x = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - cerr << k << " " << x << "angle(x):" << arg(x) << "\n"; - } - dc=0; - cerr << "twiddles:\n"; - for (k=0;k>1) ; ++k ) { + for ( k=1;k <= ncfft2 ; ++k ) { Complex fpk = dst[k]; Complex fpnk = conj(dst[ncfft-k]); Complex f1k = fpk + fpnk; @@ -87,7 +73,6 @@ namespace Eigen { dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); } - // place conjugate-symmetric half at the end for completeness // TODO: make this configurable ( opt-out ) for ( k=1;k < ncfft ; ++k ) @@ -98,6 +83,16 @@ namespace Eigen { } } + // half-complex to scalar + void inv( Scalar * dst,const Complex * src,int nfft) + { + // TODO add optimized version for even numbers + std::vector tmp(nfft); + inv(&tmp[0],src,nfft); + for (int k=0;kn) - p=n;// no more factors + p=n;// impossible to have a factor > sqrt(n) } n /= p; m_stageRadix.push_back(p); diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 41c7fed7b..75c33277d 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -28,6 +28,10 @@ using namespace std; +float norm(float x) {return x*x;} +double norm(double x) {return x*x;} +long double norm(long double x) {return x*x;} + template < typename T> complex promote(complex x) { return complex(x.real(),x.imag()); } @@ -83,7 +87,11 @@ void test_scalar(int nfft) for (int k=0;k() );// gross check + + vector buf3; + fft.inv( buf3 , outbuf); + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } template @@ -100,18 +108,18 @@ void test_complex(int nfft) inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) ); fft.fwd( outbuf , inbuf); - VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check + VERIFY( fft_rmse(outbuf,inbuf) < test_precision() );// gross check fft.inv( buf3 , outbuf); - VERIFY( dif_rmse(inbuf,buf3) < 1e-5 );// gross check + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } void test_FFT() { -#if 0 +#if 1 CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); - CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); + CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); @@ -120,8 +128,9 @@ void test_FFT() #endif #if 1 + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); - CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); + CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); #endif } From 210092d16c57ec2fd2f8f515151de284e8a737e3 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 20:35:24 -0400 Subject: [PATCH 08/18] changed name from simple_fft_traits to ei_kissfft_impl --- bench/benchFFT.cpp | 2 +- unsupported/Eigen/{FFT.h => FFT} | 18 ++++++------ ...{simple_fft_traits.h => ei_kissfft_impl.h} | 29 +++++++++++++++++-- unsupported/test/FFT.cpp | 3 +- 4 files changed, 37 insertions(+), 15 deletions(-) rename unsupported/Eigen/{FFT.h => FFT} (86%) rename unsupported/Eigen/src/FFT/{simple_fft_traits.h => ei_kissfft_impl.h} (95%) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 84cc49fe3..ffa4ffffc 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include using namespace Eigen; using namespace std; diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT similarity index 86% rename from unsupported/Eigen/FFT.h rename to unsupported/Eigen/FFT index c466423b7..3d852f5a2 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT @@ -25,28 +25,28 @@ #ifndef EIGEN_FFT_H #define EIGEN_FFT_H -// simple_fft_traits: small, free, reasonably efficient default, derived from kissfft -#include "src/FFT/simple_fft_traits.h" -#define DEFAULT_FFT_TRAITS simple_fft_traits +// ei_kissfft_impl: small, free, reasonably efficient default, derived from kissfft +#include "src/FFT/ei_kissfft_impl.h" +#define DEFAULT_FFT_IMPL ei_kissfft_impl // FFTW: faster, GPL-not LGPL, bigger code size #ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines // TODO -// #include "src/FFT/fftw_traits.h" -// #define DEFAULT_FFT_TRAITS fftw_traits +// #include "src/FFT/ei_fftw_impl.h" +// #define DEFAULT_FFT_IMPL ei_fftw_impl #endif // intel Math Kernel Library: fastest, commerical #ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines // TODO -// #include "src/FFT/imkl_traits.h" -// #define DEFAULT_FFT_TRAITS imkl_traits +// #include "src/FFT/ei_imkl_impl.h" +// #define DEFAULT_FFT_IMPL ei_imkl_impl #endif namespace Eigen { template + typename _Traits=DEFAULT_FFT_IMPL<_Scalar> > class FFT { @@ -90,6 +90,6 @@ class FFT private: traits_type m_traits; }; -#undef DEFAULT_FFT_TRAITS +#undef DEFAULT_FFT_IMPL } #endif diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h similarity index 95% rename from unsupported/Eigen/src/FFT/simple_fft_traits.h rename to unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 1e2be8f79..ce2c9f16e 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -24,16 +24,15 @@ #include #include -#include namespace Eigen { template - struct simple_fft_traits + struct ei_kissfft_impl { typedef _Scalar Scalar; typedef std::complex Complex; - simple_fft_traits() : m_nfft(0) {} + ei_kissfft_impl() : m_nfft(0) {} template void fwd( Complex * dst,const _Src *src,int nfft) @@ -370,5 +369,29 @@ namespace Eigen { std::vector m_realTwiddles; std::vector m_stageRadix; std::vector m_stageRemainder; +/* + enum {FORWARD,INVERSE,REAL,COMPLEX}; + + struct PlanKey + { + PlanKey(int nfft,bool isinverse,bool iscomplex) + { + _key = (nfft<<2) | (isinverse<<1) | iscomplex; + } + + bool operator<(const PlanKey & other) const + { + return this->_key < other._key; + } + int _key; + }; + + struct PlanData + { + std::vector m_twiddles; + }; + + std::map. #include "main.h" -#include - +#include using namespace std; From 03ed6f9bfb63879d475f5bb8ea46cff96063d010 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 23:06:49 -0400 Subject: [PATCH 09/18] refactored ei_kissfft_impl to maintain a cache of cpx fft plans --- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 612 ++++++++++---------- unsupported/test/FFT.cpp | 7 +- 2 files changed, 320 insertions(+), 299 deletions(-) diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index ce2c9f16e..3580e6c61 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -24,21 +24,279 @@ #include #include +#include namespace Eigen { + template + struct ei_kiss_cpx_fft + { + typedef _Scalar Scalar; + typedef std::complex Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; + + ei_kiss_cpx_fft() { } + + void make_twiddles(int nfft,bool inverse) + { + m_inverse = inverse; + m_twiddles.resize(nfft); + Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; + for (int i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; + } + } + + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } + }; + + template struct ei_kissfft_impl { typedef _Scalar Scalar; typedef std::complex Complex; - ei_kissfft_impl() : m_nfft(0) {} + ei_kissfft_impl() {} + + void clear() + { + m_plans.clear(); + m_realTwiddles.clear(); + } template void fwd( Complex * dst,const _Src *src,int nfft) { - prepare(nfft,false); - work(0, dst, src, 1,1); + get_plan(nfft,false).work(0, dst, src, 1,1); } // real-to-complex forward FFT @@ -47,16 +305,16 @@ namespace Eigen { // then fill in the conjugate symmetric half void fwd( Complex * dst,const Scalar * src,int nfft) { - if ( nfft&1 ) { + if ( nfft&3 ) { // use generic mode for odd - prepare(nfft,false); - work(0, dst, src, 1,1); + get_plan(nfft,false).work(0, dst, src, 1,1); }else{ int ncfft = nfft>>1; int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + // use optimized mode for even real - fwd( dst, reinterpret_cast (src),ncfft); - make_real_twiddles(nfft); + fwd( dst, reinterpret_cast (src), ncfft); Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); int k; @@ -65,8 +323,7 @@ namespace Eigen { Complex fpnk = conj(dst[ncfft-k]); Complex f1k = fpk + fpnk; Complex f2k = fpk - fpnk; - //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - Complex tw= f2k * m_realTwiddles[k-1]; + Complex tw= f2k * rtw[k-1]; dst[k] = (f1k + tw) * Scalar(.5); dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); @@ -94,304 +351,67 @@ namespace Eigen { void inv(Complex * dst,const Complex *src,int nfft) { - prepare(nfft,true); - work(0, dst, src, 1,1); - scale(dst, Scalar(1)/m_nfft ); - } - - void prepare(int nfft,bool inverse) - { - make_twiddles(nfft,inverse); - factorize(nfft); - } - - void make_real_twiddles(int nfft) - { - int ncfft2 = nfft>>2; - if ( m_realTwiddles.size() != ncfft2) { - m_realTwiddles.resize(ncfft2); - int ncfft= nfft>>1; - for (int k=1;k<=ncfft2;++k) - m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - } - } - - void make_twiddles(int nfft,bool inverse) - { - if ( m_twiddles.size() == nfft) { - // reuse the twiddles, conjugate if necessary - if (inverse != m_inverse) - for (int i=0;in) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - m_nfft = nfft; - } - - void scale(Complex *dst,Scalar s) - { - for (int k=0;k - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + typedef ei_kiss_cpx_fft PlanData; + + typedef std::map PlanMap; + PlanMap m_plans; + std::map > m_realTwiddles; + + int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + + PlanData & get_plan(int nfft,bool inverse) { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u::iterator MapIt; + MapIt it; + it = m_plans.find( PlanKey(nfft,inverse) ); + if (it == m_plans.end() ) { + // create new entry + it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); + MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); + if (it2 != m_plans.end() ) { + it->second = it2.second; + it->second.invert(); + }else{ + it->second.make_twiddles(nfft,inverse); + it->second.factorize(nfft); + } } - - k=u; - for ( q1=0 ; q1

=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; + return it->second; + */ + PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; + if ( pd.m_twiddles.size() == 0 ) { + pd.make_twiddles(nfft,inverse); + pd.factorize(nfft); } - } + return pd; } - int m_nfft; - bool m_inverse; - std::vector m_twiddles; - std::vector m_realTwiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; -/* - enum {FORWARD,INVERSE,REAL,COMPLEX}; - - struct PlanKey + Complex * real_twiddles(int ncfft2) { - PlanKey(int nfft,bool isinverse,bool iscomplex) - { - _key = (nfft<<2) | (isinverse<<1) | iscomplex; + std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there + if ( (int)twidref.size() != ncfft2 ) { + twidref.resize(ncfft2); + int ncfft= ncfft2<<1; + Scalar pi = acos( Scalar(-1) ); + for (int k=1;k<=ncfft2;++k) + twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); } + return &twidref[0]; + } - bool operator<(const PlanKey & other) const - { - return this->_key < other._key; - } - int _key; - }; - - struct PlanData + void scale(Complex *dst,int n,Scalar s) { - std::vector m_twiddles; - }; - - std::map promote(long double x) { return complex( x); { long double totalpower=0; long double difpower=0; - cerr <<"idx\ttruth\t\tvalue\n"; + cerr <<"idx\ttruth\t\tvalue\t|dif|=\n"; for (size_t k0=0;k0 acc = 0; long double phinc = -2.*k0* M_PIl / timebuf.size(); @@ -55,7 +55,7 @@ complex promote(long double x) { return complex( x); complex x = promote(fftbuf[k0]); complex dif = acc - x; difpower += norm(dif); - cerr << k0 << "\t" << acc << "\t" << x << endl; + cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(norm(dif)) << endl; } cerr << "rmse:" << sqrt(difpower/totalpower) << endl; return sqrt(difpower/totalpower); @@ -127,8 +127,9 @@ void test_FFT() #endif #if 1 - CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); #endif From 09b47332553a79dab30516e6b1d410dea90cf9b7 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 23:52:21 -0400 Subject: [PATCH 10/18] added real-optimized inverse FFT (NFFT must be multiple of 4) --- bench/benchFFT.cpp | 30 +- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 688 ++++++++++---------- 2 files changed, 370 insertions(+), 348 deletions(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index ffa4ffffc..14f5063fb 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -53,7 +53,7 @@ template <> string nameof() {return "long double";} using namespace Eigen; template -void bench(int nfft) +void bench(int nfft,bool fwd) { typedef typename NumTraits::Real Scalar; typedef typename std::complex Complex; @@ -69,7 +69,10 @@ void bench(int nfft) for (int k=0;k<8;++k) { timer.start(); for(int i = 0; i < nits; i++) - fft.fwd( outbuf , inbuf); + if (fwd) + fft.fwd( outbuf , inbuf); + else + fft.inv(inbuf,outbuf); timer.stop(); } @@ -82,16 +85,27 @@ void bench(int nfft) mflops /= 2; } + if (fwd) + cout << " fwd"; + else + cout << " inv"; + cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; } int main(int argc,char ** argv) { - bench >(NFFT); - bench(NFFT); - bench >(NFFT); - bench(NFFT); - bench >(NFFT); - bench(NFFT); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); return 0; } diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 3580e6c61..453c7f6da 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -28,390 +28,398 @@ namespace Eigen { - template - struct ei_kiss_cpx_fft + template + struct ei_kiss_cpx_fft + { + typedef _Scalar Scalar; + typedef std::complex Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; + + void make_twiddles(int nfft,bool inverse) { - typedef _Scalar Scalar; - typedef std::complex Complex; - std::vector m_twiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; - bool m_inverse; + m_inverse = inverse; + m_twiddles.resize(nfft); + Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; + for (int i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; + } + } + + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; } - - void factorize(int nfft) - { - if (m_stageRadix.size()==0 || m_stageRadix[0] * m_stageRemainder[0] != nfft) - { - m_stageRadix.resize(0); - m_stageRemainder.resize(0); - //factorize - //start factoring out 4's, then 2's, then 3,5,7,9,... - int n= nfft; - int p=4; - do { - while (n % p) { - switch (p) { - case 4: p = 2; break; - case 2: p = 3; break; - default: p += 2; break; - } - if (p*p>n) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - } - - template - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; - } - } - } - }; - + } + } + }; template struct ei_kissfft_impl { - typedef _Scalar Scalar; - typedef std::complex Complex; - ei_kissfft_impl() {} + typedef _Scalar Scalar; + typedef std::complex Complex; - void clear() - { + void clear() + { m_plans.clear(); m_realTwiddles.clear(); - } + } - template - void fwd( Complex * dst,const _Src *src,int nfft) - { + template + void fwd( Complex * dst,const _Src *src,int nfft) + { get_plan(nfft,false).work(0, dst, src, 1,1); - } + } - // real-to-complex forward FFT - // perform two FFTs of src even and src odd - // then twiddle to recombine them into the half-spectrum format - // then fill in the conjugate symmetric half - void fwd( Complex * dst,const Scalar * src,int nfft) - { + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half + void fwd( Complex * dst,const Scalar * src,int nfft) + { if ( nfft&3 ) { - // use generic mode for odd - get_plan(nfft,false).work(0, dst, src, 1,1); + // use generic mode for odd + get_plan(nfft,false).work(0, dst, src, 1,1); }else{ - int ncfft = nfft>>1; - int ncfft2 = nfft>>2; - Complex * rtw = real_twiddles(ncfft2); + int ncfft = nfft>>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); - // use optimized mode for even real - fwd( dst, reinterpret_cast (src), ncfft); - Complex dc = dst[0].real() + dst[0].imag(); - Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft2 ; ++k ) { - Complex fpk = dst[k]; - Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpk - fpnk; - Complex tw= f2k * rtw[k-1]; + // use optimized mode for even real + fwd( dst, reinterpret_cast (src), ncfft); + Complex dc = dst[0].real() + dst[0].imag(); + Complex nyquist = dst[0].real() - dst[0].imag(); + int k; + for ( k=1;k <= ncfft2 ; ++k ) { + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + Complex f1k = fpk + fpnk; + Complex f2k = fpk - fpnk; + Complex tw= f2k * rtw[k-1]; + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + } - dst[k] = (f1k + tw) * Scalar(.5); - dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); - } - - // place conjugate-symmetric half at the end for completeness - // TODO: make this configurable ( opt-out ) - for ( k=1;k < ncfft ; ++k ) - dst[nfft-k] = conj(dst[k]); - - dst[0] = dc; - dst[ncfft] = nyquist; + // place conjugate-symmetric half at the end for completeness + // TODO: make this configurable ( opt-out ) + for ( k=1;k < ncfft ; ++k ) + dst[nfft-k] = conj(dst[k]); + dst[0] = dc; + dst[ncfft] = nyquist; } - } + } - // half-complex to scalar - void inv( Scalar * dst,const Complex * src,int nfft) - { - // TODO add optimized version for even numbers - std::vector tmp(nfft); - inv(&tmp[0],src,nfft); - for (int k=0;k>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + m_scratchBuf.resize(ncfft); + m_scratchBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + for (int k = 1; k <= ncfft / 2; ++k) { + Complex fk = src[k]; + Complex fnkc = conj(src[ncfft-k]); + Complex fek = fk + fnkc; + Complex tmp = fk - fnkc; + Complex fok = tmp * conj(rtw[k-1]); + m_scratchBuf[k] = fek + fok; + m_scratchBuf[ncfft-k] = conj(fek - fok); + } + scale(&m_scratchBuf[0], ncfft, Scalar(1)/nfft ); + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_scratchBuf[0], 1,1); + } + } - typedef ei_kiss_cpx_fft PlanData; + private: - typedef std::map PlanMap; - PlanMap m_plans; - std::map > m_realTwiddles; + typedef ei_kiss_cpx_fft PlanData; - int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + typedef std::map PlanMap; + PlanMap m_plans; + std::map > m_realTwiddles; + std::vector m_scratchBuf; - PlanData & get_plan(int nfft,bool inverse) - { - /* + int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + + PlanData & get_plan(int nfft,bool inverse) + { + /* TODO: figure out why this does not work (g++ 4.3.2) * for some reason this does not work * - typedef typename std::map::iterator MapIt; - MapIt it; - it = m_plans.find( PlanKey(nfft,inverse) ); - if (it == m_plans.end() ) { - // create new entry - it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); - MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); - if (it2 != m_plans.end() ) { - it->second = it2.second; - it->second.invert(); - }else{ - it->second.make_twiddles(nfft,inverse); - it->second.factorize(nfft); - } + PlanMap::iterator it; + it = m_plans.find( PlanKey(nfft,inverse) ); + if (it == m_plans.end() ) { + // create new entry + it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); + MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); + if (it2 != m_plans.end() ) { + it->second = it2.second; + it->second.conjugate(); + }else{ + it->second.make_twiddles(nfft,inverse); + it->second.factorize(nfft); + } } return it->second; */ PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; if ( pd.m_twiddles.size() == 0 ) { - pd.make_twiddles(nfft,inverse); - pd.factorize(nfft); + pd.make_twiddles(nfft,inverse); + pd.factorize(nfft); } return pd; - } + } - Complex * real_twiddles(int ncfft2) - { + Complex * real_twiddles(int ncfft2) + { std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there if ( (int)twidref.size() != ncfft2 ) { - twidref.resize(ncfft2); - int ncfft= ncfft2<<1; - Scalar pi = acos( Scalar(-1) ); - for (int k=1;k<=ncfft2;++k) - twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); + twidref.resize(ncfft2); + int ncfft= ncfft2<<1; + Scalar pi = acos( Scalar(-1) ); + for (int k=1;k<=ncfft2;++k) + twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); } return &twidref[0]; - } + } - void scale(Complex *dst,int n,Scalar s) - { + void scale(Complex *dst,int n,Scalar s) + { for (int k=0;k Date: Wed, 27 May 2009 21:32:42 -0400 Subject: [PATCH 11/18] various comment changes --- unsupported/Eigen/FFT | 6 +- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 472 ++++++++++---------- 2 files changed, 231 insertions(+), 247 deletions(-) diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 3d852f5a2..31d8c74c5 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -1,5 +1,5 @@ // This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. +// for linear algebra. // // Copyright (C) 2009 Mark Borgerding mark a borgerding net // @@ -29,14 +29,14 @@ #include "src/FFT/ei_kissfft_impl.h" #define DEFAULT_FFT_IMPL ei_kissfft_impl -// FFTW: faster, GPL-not LGPL, bigger code size +// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size #ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines // TODO // #include "src/FFT/ei_fftw_impl.h" // #define DEFAULT_FFT_IMPL ei_fftw_impl #endif -// intel Math Kernel Library: fastest, commerical +// intel Math Kernel Library: fastest, commerical -- incompatible with Eigen in GPL form #ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines // TODO // #include "src/FFT/ei_imkl_impl.h" diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 453c7f6da..91fa5ca18 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -1,5 +1,5 @@ // This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. +// for linear algebra. // // Copyright (C) 2009 Mark Borgerding mark a borgerding net // @@ -28,252 +28,255 @@ namespace Eigen { + // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft + // Copyright 2003-2009 Mark Borgerding + template - struct ei_kiss_cpx_fft - { - typedef _Scalar Scalar; - typedef std::complex Complex; - std::vector m_twiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; - bool m_inverse; - - void make_twiddles(int nfft,bool inverse) + struct ei_kiss_cpx_fft { - m_inverse = inverse; - m_twiddles.resize(nfft); - Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; - for (int i=0;i Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; - void conjugate() - { - m_inverse = !m_inverse; - for ( size_t i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; } - if (p*p>n) - p=n;// impossible to have a factor > sqrt(n) } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - template - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;k1) { do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); + scratch[1]=Fout[m] * *tw1; + scratch[2]=Fout[m2] * *tw2; + + scratch[3]=scratch[1]+scratch[2]; + scratch[0]=scratch[1]-scratch[2]; + tw1 += fstride; + tw2 += fstride*2; + Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); } - xout=Fout_beg; - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; + /* perform the butterfly for one stage of a mixed radix FFT */ + void bfly_generic( + Complex * Fout, + const size_t fstride, + int m, + int p + ) + { + int u,k,q1,q; + Complex * twiddles = &m_twiddles[0]; + Complex t; + int Norig = m_twiddles.size(); + Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) ); + + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; } - k += m; } } - } - }; + }; template - struct ei_kissfft_impl - { + struct ei_kissfft_impl + { typedef _Scalar Scalar; typedef std::complex Complex; @@ -284,10 +287,10 @@ namespace Eigen { } template - void fwd( Complex * dst,const _Src *src,int nfft) - { - get_plan(nfft,false).work(0, dst, src, 1,1); - } + void fwd( Complex * dst,const _Src *src,int nfft) + { + get_plan(nfft,false).work(0, dst, src, 1,1); + } // real-to-complex forward FFT // perform two FFTs of src even and src odd @@ -363,11 +366,10 @@ namespace Eigen { } } - private: - + private: typedef ei_kiss_cpx_fft PlanData; - typedef std::map PlanMap; + PlanMap m_plans; std::map > m_realTwiddles; std::vector m_scratchBuf; @@ -376,25 +378,7 @@ namespace Eigen { PlanData & get_plan(int nfft,bool inverse) { - /* TODO: figure out why this does not work (g++ 4.3.2) - * for some reason this does not work - * - PlanMap::iterator it; - it = m_plans.find( PlanKey(nfft,inverse) ); - if (it == m_plans.end() ) { - // create new entry - it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); - MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); - if (it2 != m_plans.end() ) { - it->second = it2.second; - it->second.conjugate(); - }else{ - it->second.make_twiddles(nfft,inverse); - it->second.factorize(nfft); - } - } - return it->second; - */ + // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; if ( pd.m_twiddles.size() == 0 ) { pd.make_twiddles(nfft,inverse); @@ -421,5 +405,5 @@ namespace Eigen { for (int k=0;k Date: Sat, 30 May 2009 17:55:47 -0400 Subject: [PATCH 12/18] added ei_fftw_impl --- unsupported/Eigen/FFT | 5 +- unsupported/Eigen/src/FFT/ei_fftw_impl.h | 198 ++++++++++++++++++++ unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 25 +-- 3 files changed, 214 insertions(+), 14 deletions(-) create mode 100644 unsupported/Eigen/src/FFT/ei_fftw_impl.h diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 31d8c74c5..03f8504a4 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -30,9 +30,8 @@ #define DEFAULT_FFT_IMPL ei_kissfft_impl // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size -#ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines -// TODO -// #include "src/FFT/ei_fftw_impl.h" +#ifdef FFTW_ESTIMATE // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines +#include "src/FFT/ei_fftw_impl.h" // #define DEFAULT_FFT_IMPL ei_fftw_impl #endif diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h new file mode 100644 index 000000000..d592bbb20 --- /dev/null +++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h @@ -0,0 +1,198 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +namespace Eigen { + // FFTW uses non-const arguments + // so we must use ugly const_cast calls for all the args it uses + // + // This should be safe as long as + // 1. we use FFTW_ESTIMATE for all our planning + // see the FFTW docs section 4.3.2 "Planner Flags" + // 2. fftw_complex is compatible with std::complex + // This assumes std::complex layout is array of size 2 with real,imag + template + T * ei_fftw_cast(const T* p) + { + return const_cast( p); + } + + fftw_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + fftwf_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + fftwl_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + template + struct ei_fftw_plan {}; + + template <> + struct ei_fftw_plan + { + typedef float scalar_type; + typedef fftwf_complex complex_type; + fftwf_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftwf_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftwf_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwf_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwf_execute_dft_c2r( m_plan, src,dst); + } + }; + template <> + struct ei_fftw_plan + { + typedef double scalar_type; + typedef fftw_complex complex_type; + fftw_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftw_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftw_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftw_execute_dft_c2r( m_plan, src,dst); + } + }; + template <> + struct ei_fftw_plan + { + typedef long double scalar_type; + typedef fftwl_complex complex_type; + fftwl_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftwl_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftwl_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwl_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwl_execute_dft_c2r( m_plan, src,dst); + } + }; + + template + struct ei_fftw_impl + { + typedef _Scalar Scalar; + typedef std::complex Complex; + + void clear() + { + m_plans.clear(); + } + + void fwd( Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + } + + // real-to-complex forward FFT + void fwd( Complex * dst,const Scalar * src,int nfft) + { + get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft); + int nhbins=(nfft>>1)+1; + for (int k=nhbins;k < nfft; ++k ) + dst[k] = conj(dst[nfft-k]); + } + + // inverse complex-to-complex + void inv(Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + // scaling + Scalar s = 1./nfft; + for (int k=0;k PlanData; + typedef std::map PlanMap; + + PlanMap m_plans; + + PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src) + { + bool inplace = (dst==src); + bool aligned = ( (reinterpret_cast(src)&15) | (reinterpret_cast(dst)&15) ) == 0; + int key = (nfft<<3 ) | (inverse<<2) | (inplace<<1) | aligned; + return m_plans[key]; + } + }; +} diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 91fa5ca18..a84ac68a0 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -39,6 +39,7 @@ namespace Eigen { std::vector m_twiddles; std::vector m_stageRadix; std::vector m_stageRemainder; + std::vector m_scratchBuf; bool m_inverse; void make_twiddles(int nfft,bool inverse) @@ -75,6 +76,8 @@ namespace Eigen { n /= p; m_stageRadix.push_back(p); m_stageRemainder.push_back(n); + if ( p > 5 ) + m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic }while(n>1); } @@ -249,7 +252,7 @@ namespace Eigen { Complex * twiddles = &m_twiddles[0]; Complex t; int Norig = m_twiddles.size(); - Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) ); + Complex * scratchbuf = &m_scratchBuf[0]; for ( u=0; u>1; int ncfft2 = nfft>>2; Complex * rtw = real_twiddles(ncfft2); - m_scratchBuf.resize(ncfft); - m_scratchBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + m_tmpBuf.resize(ncfft); + m_tmpBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); for (int k = 1; k <= ncfft / 2; ++k) { Complex fk = src[k]; Complex fnkc = conj(src[ncfft-k]); Complex fek = fk + fnkc; Complex tmp = fk - fnkc; Complex fok = tmp * conj(rtw[k-1]); - m_scratchBuf[k] = fek + fok; - m_scratchBuf[ncfft-k] = conj(fek - fok); + m_tmpBuf[k] = fek + fok; + m_tmpBuf[ncfft-k] = conj(fek - fok); } - scale(&m_scratchBuf[0], ncfft, Scalar(1)/nfft ); - get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_scratchBuf[0], 1,1); + scale(&m_tmpBuf[0], ncfft, Scalar(1)/nfft ); + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_tmpBuf[0], 1,1); } } @@ -372,7 +375,7 @@ namespace Eigen { PlanMap m_plans; std::map > m_realTwiddles; - std::vector m_scratchBuf; + std::vector m_tmpBuf; int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } From 1c543401741f1328b65d30a764f6273ddf9b60b6 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sun, 31 May 2009 15:44:57 -0400 Subject: [PATCH 13/18] more work on ei_fftw_impl --- bench/benchFFT.cpp | 4 ++++ unsupported/Eigen/FFT | 3 ++- unsupported/test/FFT.cpp | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 14f5063fb..4b6cabb55 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -26,6 +26,10 @@ #include #include #include +#ifdef USE_FFTW +#include +#endif + #include using namespace Eigen; diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 03f8504a4..dc7e85908 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -32,7 +32,8 @@ // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size #ifdef FFTW_ESTIMATE // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines #include "src/FFT/ei_fftw_impl.h" -// #define DEFAULT_FFT_IMPL ei_fftw_impl +#undef DEFAULT_FFT_IMPL +#define DEFAULT_FFT_IMPL ei_fftw_impl #endif // intel Math Kernel Library: fastest, commerical -- incompatible with Eigen in GPL form diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 32d1393d0..28230e1c4 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -23,6 +23,11 @@ // Eigen. If not, see . #include "main.h" +//#define USE_FFTW +#ifdef USE_FFTW +#include +#endif + #include using namespace std; From 4d6b962ba47ca2a6445bebd144b8122f3b8e96b7 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 10 Jun 2009 22:16:32 -0400 Subject: [PATCH 14/18] added FindFFTW, but I don't think it's right yet --- cmake/FindFFTW.cmake | 24 ++++++++++++++++++++++++ unsupported/test/CMakeLists.txt | 6 ++++++ unsupported/test/FFT.cpp | 12 +++--------- 3 files changed, 33 insertions(+), 9 deletions(-) create mode 100644 cmake/FindFFTW.cmake diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake new file mode 100644 index 000000000..a56450b17 --- /dev/null +++ b/cmake/FindFFTW.cmake @@ -0,0 +1,24 @@ + +if (FFTW_INCLUDES AND FFTW_LIBRARIES) + set(FFTW_FIND_QUIETLY TRUE) +endif (FFTW_INCLUDES AND FFTW_LIBRARIES) + +find_path(FFTW_INCLUDES + NAMES + fftw3.h + PATHS + $ENV{FFTWDIR} + ${INCLUDE_INSTALL_DIR} +) + +find_library(FFTWF_LIB NAMES fftw3f PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR}) +find_library(FFTW_LIB NAMES fftw3 PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR}) +find_library(FFTWL_LIB NAMES fftw3l PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR}) +set(FFTW_LIBRARIES "${FFTWF_LIB} ${FFTW_LIB} ${FFTWL_LIB}" ) +message(STATUS "FFTW ${FFTW_LIBRARIES}" ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(FFTW DEFAULT_MSG + FFTW_INCLUDES FFTW_LIBRARIES) + +mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 49de7dfb2..23217811f 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -20,3 +20,9 @@ ei_add_test(BVH) ei_add_test(matrixExponential) ei_add_test(alignedvector3) ei_add_test(FFT) + +find_package(FFTW) +if(FFTW_FOUND) + ei_add_test(FFTW " " "${FFTW_LIBRARIES}" ) +endif(FFTW_FOUND) + diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 28230e1c4..f0b9b68bf 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -23,11 +23,6 @@ // Eigen. If not, see . #include "main.h" -//#define USE_FFTW -#ifdef USE_FFTW -#include -#endif - #include using namespace std; @@ -121,7 +116,7 @@ void test_complex(int nfft) void test_FFT() { -#if 1 + CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); @@ -129,13 +124,12 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); -#endif -#if 1 + + CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); -#endif } From 218711e18b1adc518a2f861fe53d5ce9da0e98b7 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 10 Jun 2009 23:25:27 -0400 Subject: [PATCH 15/18] example file --- unsupported/doc/examples/FFT.cpp | 117 +++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 unsupported/doc/examples/FFT.cpp diff --git a/unsupported/doc/examples/FFT.cpp b/unsupported/doc/examples/FFT.cpp new file mode 100644 index 000000000..55e29585a --- /dev/null +++ b/unsupported/doc/examples/FFT.cpp @@ -0,0 +1,117 @@ +// To use the simple FFT implementation +// g++ -o demofft -I.. -Wall -O3 FFT.cpp + +// To use the FFTW implementation +// g++ -o demofft -I.. -DUSE_FFTW -Wall -O3 FFT.cpp -lfftw3 -lfftw3f -lfftw3l + +#ifdef USE_FFTW +#include +#endif + +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace Eigen; + +template +T mag2(T a) +{ + return a*a; +} +template +T mag2(std::complex a) +{ + return norm(a); +} + +template +T mag2(const std::vector & vec) +{ + T out=0; + for (size_t k=0;k +T mag2(const std::vector > & vec) +{ + T out=0; + for (size_t k=0;k +vector operator-(const vector & a,const vector & b ) +{ + vector c(a); + for (size_t k=0;k +void RandomFill(std::vector & vec) +{ + for (size_t k=0;k +void RandomFill(std::vector > & vec) +{ + for (size_t k=0;k ( T( rand() )/T(RAND_MAX) - .5, T( rand() )/T(RAND_MAX) - .5); +} + +template +void fwd_inv(size_t nfft) +{ + typedef typename NumTraits::Real Scalar; + vector timebuf(nfft); + RandomFill(timebuf); + + vector freqbuf; + static FFT fft; + fft.fwd(freqbuf,timebuf); + + vector timebuf2; + fft.inv(timebuf2,freqbuf); + + long double rmse = mag2(timebuf - timebuf2) / mag2(timebuf); + cout << "roundtrip rmse: " << rmse << endl; +} + +template +void two_demos(int nfft) +{ + cout << " scalar "; + fwd_inv >(nfft); + cout << " complex "; + fwd_inv,std::complex >(nfft); +} + +void demo_all_types(int nfft) +{ + cout << "nfft=" << nfft << endl; + cout << " float" << endl; + two_demos(nfft); + cout << " double" << endl; + two_demos(nfft); + cout << " long double" << endl; + two_demos(nfft); +} + +int main() +{ + demo_all_types( 2*3*4*5*7 ); + demo_all_types( 2*9*16*25 ); + demo_all_types( 1024 ); + return 0; +} From e577c70e49dddd88c83175921431d2725128b0f3 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 16 Jun 2009 23:54:58 -0400 Subject: [PATCH 16/18] candidate header for Eigen/Complex --- unsupported/Eigen/Complex | 142 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 unsupported/Eigen/Complex diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex new file mode 100644 index 000000000..f8d9af25c --- /dev/null +++ b/unsupported/Eigen/Complex @@ -0,0 +1,142 @@ +#ifndef EIGEN_COMPLEX_H +#define EIGEN_COMPLEX_H + +#include + +namespace Eigen { + +template +struct castable_pointer +{ + castable_pointer(_NativePtr ptr) : _ptr(ptr) {} + operator _NativePtr () {return _ptr;} + operator _PunnedPtr () {return reinterpret_cast<_PunnedPtr>(_ptr);} + private: + _NativePtr _ptr; +}; + +template +struct Complex +{ + typedef typename std::complex StandardComplex; + typedef T value_type; + + Complex(const T& re = T(), const T& im = T()) : _re(re),_im(im) { } + Complex(const Complex&other ): _re(other.real()) ,_im(other.imag()) {} + + template + Complex(const Complex&other): _re(other.real()) ,_im(other.imag()) {} + template + Complex(const std::complex&other): _re(other.real()) ,_im(other.imag()) {} + + typedef castable_pointer< Complex*, StandardComplex* > pointer_type; + typedef castable_pointer< const Complex*, const StandardComplex* > const_pointer_type; + + pointer_type operator & () {return pointer_type(this);} + const_pointer_type operator & () const {return const_pointer_type(this);} + + StandardComplex std_type() const {return StandardComplex(real(),imag());} + StandardComplex & std_type() {return *(StandardComplex*)(&(*this));} + + operator StandardComplex () const {return std_type();} + operator StandardComplex & () {return std_type();} + + T & real() {return _re;} + T & imag() {return _im;} + const T & real() const {return _re;} + const T & imag() const {return _im;} + + // *** complex member functions: *** + Complex& operator= (const T& val) { _re=val;_im=0;return *this; } + Complex& operator+= (const T& val) {_re+=val;return *this;} + Complex& operator-= (const T& val) {_re-=val;return *this;} + Complex& operator*= (const T& val) {_re*=val;_im*=val;return *this; } + Complex& operator/= (const T& val) {_re/=val;_im/=val;return *this; } + + Complex& operator= (const Complex& rhs) {_re=rhs._re;_im=rhs._im;return *this;} + Complex& operator= (const StandardComplex& rhs) {_re=rhs.real();_im=rhs.imag();return *this;} + + template Complex& operator= (const Complex& rhs) { _re=rhs._re;_im=rhs._im;return *this;} + template Complex& operator+= (const Complex& rhs) { _re+=rhs._re;_im+=rhs._im;return *this;} + template Complex& operator-= (const Complex& rhs) { _re-=rhs._re;_im-=rhs._im;return *this;} + template Complex& operator*= (const Complex& rhs) { this->std_type() *= rhs.std_type(); return *this; } + template Complex& operator/= (const Complex& rhs) { this->std_type() /= rhs.std_type(); return *this; } + + private: + T _re; + T _im; +}; + +template +T ei_to_std( const T & x) {return x;} + +template +std::complex ei_to_std( const Complex & x) {return x.std_type();} + +// 26.2.6 operators +template Complex operator+(const Complex& rhs) {return rhs;} +template Complex operator-(const Complex& rhs) {return -ei_to_std(rhs);} + +template Complex operator+(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) + ei_to_std(rhs);} +template Complex operator-(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) - ei_to_std(rhs);} +template Complex operator*(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) * ei_to_std(rhs);} +template Complex operator/(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) / ei_to_std(rhs);} +template bool operator==(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) == ei_to_std(rhs);} +template bool operator!=(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) != ei_to_std(rhs);} + +template Complex operator+(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) + ei_to_std(rhs); } +template Complex operator-(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) - ei_to_std(rhs); } +template Complex operator*(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) * ei_to_std(rhs); } +template Complex operator/(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) / ei_to_std(rhs); } +template bool operator==(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) == ei_to_std(rhs); } +template bool operator!=(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) != ei_to_std(rhs); } + +template Complex operator+(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) + ei_to_std(rhs); } +template Complex operator-(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) - ei_to_std(rhs); } +template Complex operator*(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) * ei_to_std(rhs); } +template Complex operator/(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) / ei_to_std(rhs); } +template bool operator==(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) == ei_to_std(rhs); } +template bool operator!=(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) != ei_to_std(rhs); } + +template +std::basic_istream& + operator>> (std::basic_istream& istr, Complex& rhs) +{ + return istr >> rhs.std_type(); +} + +template +std::basic_ostream& +operator<< (std::basic_ostream& ostr, const Complex& rhs) +{ + return ostr << rhs.std_type(); +} + + // 26.2.7 values: + template T real(const Complex&x) {return real(ei_to_std(x));} + template T abs(const Complex&x) {return abs(ei_to_std(x));} + template T arg(const Complex&x) {return arg(ei_to_std(x));} + template T norm(const Complex&x) {return norm(ei_to_std(x));} + + template Complex conj(const Complex&x) { return conj(ei_to_std(x));} + template Complex polar(const T& x, const T&y) {return polar(ei_to_std(x),ei_to_std(y));} + // 26.2.8 transcendentals: + template Complex cos (const Complex&x){return cos(ei_to_std(x));} + template Complex cosh (const Complex&x){return cosh(ei_to_std(x));} + template Complex exp (const Complex&x){return exp(ei_to_std(x));} + template Complex log (const Complex&x){return log(ei_to_std(x));} + template Complex log10 (const Complex&x){return log10(ei_to_std(x));} + + template Complex pow(const Complex&x, int p) {return pow(ei_to_std(x),ei_to_std(p));} + template Complex pow(const Complex&x, const T&p) {return pow(ei_to_std(x),ei_to_std(p));} + template Complex pow(const Complex&x, const Complex&p) {return pow(ei_to_std(x),ei_to_std(p));} + template Complex pow(const T&x, const Complex&p) {return pow(ei_to_std(x),ei_to_std(p));} + + template Complex sin (const Complex&x){return sin(ei_to_std(x));} + template Complex sinh (const Complex&x){return sinh(ei_to_std(x));} + template Complex sqrt (const Complex&x){return sqrt(ei_to_std(x));} + template Complex tan (const Complex&x){return tan(ei_to_std(x));} + template Complex tanh (const Complex&x){return tanh(ei_to_std(x));} +} + +#endif From fb9a15e451371f4e600dbb60b8e5c9d098889844 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 17 Jun 2009 00:09:18 -0400 Subject: [PATCH 17/18] added copyright notice --- unsupported/Eigen/Complex | 50 ++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex index f8d9af25c..a0483abdf 100644 --- a/unsupported/Eigen/Complex +++ b/unsupported/Eigen/Complex @@ -1,6 +1,34 @@ #ifndef EIGEN_COMPLEX_H #define EIGEN_COMPLEX_H +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +// Eigen::Complex reuses as much as possible from std::complex +// and allows easy conversion to and from, even at the pointer level. + + #include namespace Eigen { @@ -21,6 +49,7 @@ struct Complex typedef typename std::complex StandardComplex; typedef T value_type; + // constructors Complex(const T& re = T(), const T& im = T()) : _re(re),_im(im) { } Complex(const Complex&other ): _re(other.real()) ,_im(other.imag()) {} @@ -29,22 +58,31 @@ struct Complex template Complex(const std::complex&other): _re(other.real()) ,_im(other.imag()) {} + + // allow binary access to the object as a std::complex typedef castable_pointer< Complex*, StandardComplex* > pointer_type; typedef castable_pointer< const Complex*, const StandardComplex* > const_pointer_type; - pointer_type operator & () {return pointer_type(this);} const_pointer_type operator & () const {return const_pointer_type(this);} - StandardComplex std_type() const {return StandardComplex(real(),imag());} - StandardComplex & std_type() {return *(StandardComplex*)(&(*this));} - operator StandardComplex () const {return std_type();} operator StandardComplex & () {return std_type();} - T & real() {return _re;} - T & imag() {return _im;} + StandardComplex std_type() const {return StandardComplex(real(),imag());} + StandardComplex & std_type() {return *reinterpret_cast(this);} + + + // every sort of accessor and mutator that has ever been in fashion. + // For a brief history, search for "std::complex over-encapsulated" + // http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#387 const T & real() const {return _re;} const T & imag() const {return _im;} + T & real() {return _re;} + T & imag() {return _im;} + T & real(const T & x) {return _re=x;} + T & imag(const T & x) {return _im=x;} + void set_real(const T & x) {_re = x;} + void set_imag(const T & x) {_im = x;} // *** complex member functions: *** Complex& operator= (const T& val) { _re=val;_im=0;return *this; } From a39de276a95234ff85f58fa93364b047f3d72c6b Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 14 Sep 2009 01:52:26 -0400 Subject: [PATCH 18/18] added the test case for FFTW --- unsupported/test/CMakeLists.txt | 2 +- unsupported/test/FFTW.cpp | 136 ++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 unsupported/test/FFTW.cpp diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 23217811f..f42077bdc 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -23,6 +23,6 @@ ei_add_test(FFT) find_package(FFTW) if(FFTW_FOUND) - ei_add_test(FFTW " " "${FFTW_LIBRARIES}" ) + ei_add_test(FFTW " " "-lfftw3 -lfftw3f -lfftw3l" ) endif(FFTW_FOUND) diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp new file mode 100644 index 000000000..cf7be75aa --- /dev/null +++ b/unsupported/test/FFTW.cpp @@ -0,0 +1,136 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" +#include +#include + +using namespace std; + +float norm(float x) {return x*x;} +double norm(double x) {return x*x;} +long double norm(long double x) {return x*x;} + +template < typename T> +complex promote(complex x) { return complex(x.real(),x.imag()); } + +complex promote(float x) { return complex( x); } +complex promote(double x) { return complex( x); } +complex promote(long double x) { return complex( x); } + + + template + long double fft_rmse( const vector & fftbuf,const vector & timebuf) + { + long double totalpower=0; + long double difpower=0; + cerr <<"idx\ttruth\t\tvalue\t|dif|=\n"; + for (size_t k0=0;k0 acc = 0; + long double phinc = -2.*k0* M_PIl / timebuf.size(); + for (size_t k1=0;k1(0,k1*phinc) ); + } + totalpower += norm(acc); + complex x = promote(fftbuf[k0]); + complex dif = acc - x; + difpower += norm(dif); + cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(norm(dif)) << endl; + } + cerr << "rmse:" << sqrt(difpower/totalpower) << endl; + return sqrt(difpower/totalpower); + } + + template + long double dif_rmse( const vector buf1,const vector buf2) + { + long double totalpower=0; + long double difpower=0; + size_t n = min( buf1.size(),buf2.size() ); + for (size_t k=0;k +void test_scalar(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + typedef typename Eigen::FFT::Scalar Scalar; + + FFT fft; + vector inbuf(nfft); + vector outbuf; + for (int k=0;k() );// gross check + + vector buf3; + fft.inv( buf3 , outbuf); + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check +} + +template +void test_complex(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + + FFT fft; + + vector inbuf(nfft); + vector outbuf; + vector buf3; + for (int k=0;k() );// gross check + + fft.inv( buf3 , outbuf); + + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check +} + +void test_FFTW() +{ + + CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); + CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); + + + + CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); + CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); +}