From cd7912313dc2477283de767029462d7d0e6ee8ab Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Fri, 22 Jan 2010 00:35:03 -0500
Subject: [PATCH] changed FFT function vector and Matrix args to pointer as
 Benoit suggested implemented 2D Complex FFT for FFTW impl

---
 unsupported/Eigen/FFT                    | 45 +++++++++++------
 unsupported/Eigen/src/FFT/ei_fftw_impl.h | 61 +++++++++++++++++++++-
 unsupported/test/FFT.cpp                 | 18 +++----
 unsupported/test/FFTW.cpp                | 64 +++++++++++++++++++++---
 4 files changed, 154 insertions(+), 34 deletions(-)
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index e0841a4e3..caaf79714 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -152,20 +152,26 @@ class FFT
         m_impl.fwd(dst,src,nfft);
     }
 
+    inline 
+    void fwd2(Complex * dst, const Complex * src, int nrows,int ncols)
+    {
+      m_impl.fwd2(dst,src,nrows,ncols);
+    }
+
     template <typename _Input>
     inline
-    void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src) 
+    void fwd( std::vector<Complex> * dst, const std::vector<_Input> & src) 
     {
       if ( NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum) )
-        dst.resize( (src.size()>>1)+1);
+        dst->resize( (src.size()>>1)+1);
       else
-        dst.resize(src.size());
-      fwd(&dst[0],&src[0],static_cast<int>(src.size()));
+        dst->resize(src.size());
+      fwd(&(*dst)[0],&src[0],static_cast<int>(src.size()));
     }
 
     template<typename InputDerived, typename ComplexDerived>
     inline
-    void fwd( MatrixBase<ComplexDerived> & dst, const MatrixBase<InputDerived> & src)
+    void fwd( MatrixBase<ComplexDerived> * dst, const MatrixBase<InputDerived> & src)
     {
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived)
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
@@ -176,10 +182,10 @@ class FFT
             THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
 
       if ( NumTraits< typename InputDerived::Scalar >::IsComplex == 0 && HasFlag(HalfSpectrum) )
-        dst.derived().resize( (src.size()>>1)+1);
+        dst->derived().resize( (src.size()>>1)+1);
       else
-        dst.derived().resize(src.size());
-      fwd( &dst[0],&src[0],src.size() );
+        dst->derived().resize(src.size());
+      fwd( &(*dst)[0],&src[0],src.size() );
     }
 
     inline
@@ -200,7 +206,7 @@ class FFT
 
     template<typename OutputDerived, typename ComplexDerived>
     inline
-    void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src)
+    void inv( MatrixBase<OutputDerived> * dst, const MatrixBase<ComplexDerived> & src)
     {
         EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
         EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
@@ -212,19 +218,28 @@ class FFT
 
         int nfft = src.size();
         int nout = HasFlag(HalfSpectrum) ? ((nfft>>1)+1) : nfft;
-        dst.derived().resize( nout );
-        inv( &dst[0],&src[0], nfft);
+        dst->derived().resize( nout );
+        inv( &(*dst)[0],&src[0], nfft);
     }
 
     template <typename _Output>
     inline
-    void inv( std::vector<_Output> & dst, const std::vector<Complex> & src)
+    void inv( std::vector<_Output> * dst, const std::vector<Complex> & src)
     {
       if ( NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum) )
-        dst.resize( 2*(src.size()-1) );
+        dst->resize( 2*(src.size()-1) );
       else
-        dst.resize( src.size() );
-      inv( &dst[0],&src[0],static_cast<int>(dst.size()) );
+        dst->resize( src.size() );
+      inv( &(*dst)[0],&src[0],static_cast<int>(dst->size()) );
+    }
+
+
+    inline 
+    void inv2(Complex * dst, const Complex * src, int nrows,int ncols)
+    {
+      m_impl.inv2(dst,src,nrows,ncols);
+      if ( HasFlag( Unscaled ) == false)
+          scale(dst,1./(nrows*ncols),nrows*ncols);
     }
 
     // TODO: multi-dimensional FFTs
diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
index a66b7398c..411ff7425 100644
--- a/unsupported/Eigen/src/FFT/ei_fftw_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
@@ -90,6 +90,18 @@
               m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
           fftwf_execute_dft_c2r( m_plan, src,dst);
       }
+
+      inline 
+      void fwd2( complex_type * dst,complex_type * src,int nrows,int ncols) {
+          if (m_plan==NULL) m_plan = fftwf_plan_dft_2d(ncols,nrows,src,dst,FFTW_FORWARD,FFTW_ESTIMATE);
+          fftwf_execute_dft( m_plan, src,dst);
+      }
+      inline 
+      void inv2( complex_type * dst,complex_type * src,int nrows,int ncols) {
+          if (m_plan==NULL) m_plan = fftwf_plan_dft_2d(ncols,nrows,src,dst,FFTW_BACKWARD,FFTW_ESTIMATE);
+          fftwf_execute_dft( m_plan, src,dst);
+      }
+
   };
   template <> 
   struct ei_fftw_plan<double>
@@ -121,6 +133,16 @@
               m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
           fftw_execute_dft_c2r( m_plan, src,dst);
       }
+      inline 
+      void fwd2( complex_type * dst,complex_type * src,int nrows,int ncols) {
+          if (m_plan==NULL) m_plan = fftw_plan_dft_2d(ncols,nrows,src,dst,FFTW_FORWARD,FFTW_ESTIMATE);
+          fftw_execute_dft( m_plan, src,dst);
+      }
+      inline 
+      void inv2( complex_type * dst,complex_type * src,int nrows,int ncols) {
+          if (m_plan==NULL) m_plan = fftw_plan_dft_2d(ncols,nrows,src,dst,FFTW_BACKWARD,FFTW_ESTIMATE);
+          fftw_execute_dft( m_plan, src,dst);
+      }
   };
   template <> 
   struct ei_fftw_plan<long double>
@@ -152,6 +174,16 @@
               m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
           fftwl_execute_dft_c2r( m_plan, src,dst);
       }
+      inline 
+      void fwd2( complex_type * dst,complex_type * src,int nrows,int ncols) {
+          if (m_plan==NULL) m_plan = fftwl_plan_dft_2d(ncols,nrows,src,dst,FFTW_FORWARD,FFTW_ESTIMATE);
+          fftwl_execute_dft( m_plan, src,dst);
+      }
+      inline 
+      void inv2( complex_type * dst,complex_type * src,int nrows,int ncols) {
+          if (m_plan==NULL) m_plan = fftwl_plan_dft_2d(ncols,nrows,src,dst,FFTW_BACKWARD,FFTW_ESTIMATE);
+          fftwl_execute_dft( m_plan, src,dst);
+      }
   };
 
   template <typename _Scalar>
@@ -180,6 +212,13 @@
           get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft);
       }
 
+      // 2-d complex-to-complex
+      inline
+      void fwd2(Complex * dst, const Complex * src, int nrows,int ncols)
+      {
+          get_plan(nrows,ncols,false,dst,src).fwd2(ei_fftw_cast(dst), ei_fftw_cast(src) ,nrows,ncols);
+      }
+
       // inverse complex-to-complex
       inline
       void inv(Complex * dst,const Complex  *src,int nfft)
@@ -194,9 +233,18 @@
         get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
       }
 
+      // 2-d complex-to-complex
+      inline
+      void inv2(Complex * dst, const Complex * src, int nrows,int ncols)
+      {
+        get_plan(nrows,ncols,true,dst,src).inv2(ei_fftw_cast(dst), ei_fftw_cast(src) ,nrows,ncols);
+      }
+
+
   protected:
       typedef ei_fftw_plan<Scalar> PlanData;
-      typedef std::map<int,PlanData> PlanMap;
+
+      typedef std::map<int64_t,PlanData> PlanMap;
 
       PlanMap m_plans;
 
@@ -205,7 +253,16 @@
       {
           bool inplace = (dst==src);
           bool aligned = ( (reinterpret_cast<size_t>(src)&15) | (reinterpret_cast<size_t>(dst)&15) ) == 0;
-          int key = (nfft<<3 ) | (inverse<<2) | (inplace<<1) | aligned;
+          int64_t key = ( (nfft<<3 ) | (inverse<<2) | (inplace<<1) | aligned ) << 1;
+          return m_plans[key];
+      }
+
+      inline
+      PlanData & get_plan(int nrows,int ncols,bool inverse,void * dst,const void * src)
+      {
+          bool inplace = (dst==src);
+          bool aligned = ( (reinterpret_cast<size_t>(src)&15) | (reinterpret_cast<size_t>(dst)&15) ) == 0;
+          int64_t key = ( ( (((int64_t)ncols) << 30)|(nrows<<3 ) | (inverse<<2) | (inplace<<1) | aligned ) << 1 ) + 1;
           return m_plans[key];
       }
   };
diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp
index 056be2ef3..a2f1d9201 100644
--- a/unsupported/test/FFT.cpp
+++ b/unsupported/test/FFT.cpp
@@ -106,29 +106,29 @@ void test_scalar_generic(int nfft)
     // make sure it DOESN'T give the right full spectrum answer
     // if we've asked for half-spectrum
     fft.SetFlag(fft.HalfSpectrum );
-    fft.fwd( outbuf,inbuf);
+    fft.fwd( &outbuf,inbuf);
     VERIFY(outbuf.size() == (size_t)( (nfft>>1)+1) );
     VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
 
     fft.ClearFlag(fft.HalfSpectrum );
-    fft.fwd( outbuf,inbuf);
+    fft.fwd( &outbuf,inbuf);
     VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
 
     ScalarVector buf3;
-    fft.inv( buf3 , outbuf);
+    fft.inv( &buf3 , outbuf);
     VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
 
     // verify that the Unscaled flag takes effect
     ComplexVector buf4;
     fft.SetFlag(fft.Unscaled);
-    fft.inv( buf4 , outbuf);
+    fft.inv( &buf4 , outbuf);
     for (int k=0;k<nfft;++k)
         buf4[k] *= T(1./nfft);
     VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>()  );// gross check
 
     // verify that ClearFlag works
     fft.ClearFlag(fft.Unscaled);
-    fft.inv( buf3 , outbuf);
+    fft.inv( &buf3 , outbuf);
     VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
 }
 
@@ -152,25 +152,25 @@ void test_complex_generic(int nfft)
     ComplexVector buf3;
     for (int k=0;k<nfft;++k)
         inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
-    fft.fwd( outbuf , inbuf);
+    fft.fwd( &outbuf , inbuf);
 
     VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
 
-    fft.inv( buf3 , outbuf);
+    fft.inv( &buf3 , outbuf);
 
     VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
 
     // verify that the Unscaled flag takes effect
     ComplexVector buf4;
     fft.SetFlag(fft.Unscaled);
-    fft.inv( buf4 , outbuf);
+    fft.inv( &buf4 , outbuf);
     for (int k=0;k<nfft;++k)
         buf4[k] *= T(1./nfft);
     VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>()  );// gross check
 
     // verify that ClearFlag works
     fft.ClearFlag(fft.Unscaled);
-    fft.inv( buf3 , outbuf);
+    fft.inv( &buf3 , outbuf);
     VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
 }
 
diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp
index c182cab9d..df38efe64 100644
--- a/unsupported/test/FFTW.cpp
+++ b/unsupported/test/FFTW.cpp
@@ -26,7 +26,11 @@
 #include <fftw3.h>
 #include <unsupported/Eigen/FFT>
 
+template <typename T> 
+std::complex<T> RandomCpx() { return std::complex<T>( (T)(rand()/(T)RAND_MAX - .5), (T)(rand()/(T)RAND_MAX - .5) ); }
+
 using namespace std;
+using namespace Eigen;
 
 float norm(float x) {return x*x;}
 double norm(double x) {return x*x;}
@@ -87,11 +91,11 @@ void test_scalar(int nfft)
     vector<Complex> outbuf;
     for (int k=0;k<nfft;++k)
         inbuf[k]= (T)(rand()/(double)RAND_MAX - .5);
-    fft.fwd( outbuf,inbuf);
+    fft.fwd( &outbuf,inbuf);
     VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
 
     vector<Scalar> buf3;
-    fft.inv( buf3 , outbuf);
+    fft.inv( &buf3 , outbuf);
     VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
 }
 
@@ -106,19 +110,65 @@ void test_complex(int nfft)
     vector<Complex> outbuf;
     vector<Complex> buf3;
     for (int k=0;k<nfft;++k)
-        inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
-    fft.fwd( outbuf , inbuf);
+        inbuf[k]= RandomCpx<T>();
+    fft.fwd( &outbuf , inbuf);
 
     VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
 
-    fft.inv( buf3 , outbuf);
+    fft.inv( &buf3 , outbuf);
 
     VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
 }
 
-void test_FFTW()
+template <typename T,int nrows,int ncols>
+void test_complex2d()
 {
 
+    typedef typename Eigen::FFT<T>::Complex Complex;
+    FFT<T> fft;
+
+    Eigen::Matrix<Complex,nrows,ncols> src;
+    Eigen::Matrix<Complex,nrows,ncols> dst;
+    Eigen::Matrix<Complex,nrows,ncols> src2;
+    Eigen::Matrix<Complex,nrows,ncols> dst2;
+
+    //src = Eigen::Matrix<Complex,nrows,ncols>::Random();
+    src =  Eigen::Matrix<Complex,nrows,ncols>::Identity();
+
+    for (int k=0;k<ncols;k++) {
+        Eigen::Matrix<Complex,nrows,1> tmpIn =  src.col(k);
+        Eigen::Matrix<Complex,nrows,1> tmpOut;
+        fft.fwd( &tmpOut,tmpIn );
+        dst2.col(k) = tmpOut;
+    }
+    //cout << "dst2: " << dst2 << "\n\n";
+
+    for (int k=0;k<nrows;k++) {
+        Eigen::Matrix<Complex,1,ncols> tmpIn =  dst2.row(k);
+        Eigen::Matrix<Complex,1,ncols> tmpOut;
+        fft.fwd( &tmpOut, tmpIn);
+        dst2.row(k) = tmpOut;
+    }
+
+/*
+*/
+    fft.fwd2(dst.data(),src.data(),nrows,ncols);
+    fft.inv2(src2.data(),dst.data(),nrows,ncols);
+    /*
+    cout << "src: " << src << "\n\n";
+    cout << "dst: " << dst << "\n\n";
+    cout << "src2: " << src2 << "\n\n";
+    cout << "dst2: " << dst2 << "\n\n";
+    */
+    VERIFY( (src-src2).norm() < test_precision<T>() );
+    VERIFY( (dst-dst2).norm() < test_precision<T>() );
+}
+
+void test_FFTW()
+{
+  CALL_SUBTEST( ( test_complex2d<float,4,8> () ) );
+  CALL_SUBTEST( ( test_complex2d<double,4,8> () ) );
+  //CALL_SUBTEST( ( test_complex2d<long double,4,8> () ) );
   CALL_SUBTEST( test_complex<float>(32) ); CALL_SUBTEST( test_complex<double>(32) ); CALL_SUBTEST( test_complex<long double>(32) );
   CALL_SUBTEST( test_complex<float>(256) ); CALL_SUBTEST( test_complex<double>(256) ); CALL_SUBTEST( test_complex<long double>(256) );
   CALL_SUBTEST( test_complex<float>(3*8) ); CALL_SUBTEST( test_complex<double>(3*8) ); CALL_SUBTEST( test_complex<long double>(3*8) );
@@ -127,8 +177,6 @@ void test_FFTW()
   CALL_SUBTEST( test_complex<float>(2*3*4*5) ); CALL_SUBTEST( test_complex<double>(2*3*4*5) ); CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
   CALL_SUBTEST( test_complex<float>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<double>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
 
-
-
   CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) ); CALL_SUBTEST( test_scalar<long double>(32) );
   CALL_SUBTEST( test_scalar<float>(45) ); CALL_SUBTEST( test_scalar<double>(45) ); CALL_SUBTEST( test_scalar<long double>(45) );
   CALL_SUBTEST( test_scalar<float>(50) ); CALL_SUBTEST( test_scalar<double>(50) ); CALL_SUBTEST( test_scalar<long double>(50) );