From 92ca9fc032ecaa7f9595f5c5f7d9d8df6c972bbe Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:21:04 -0400 Subject: [PATCH 01/72] initial pass of FFT module -- includes complex 1-d case only --- unsupported/Eigen/FFT.h | 84 +++++ unsupported/Eigen/src/FFT/simple_fft_traits.h | 296 ++++++++++++++++++ unsupported/test/CMakeLists.txt | 1 + unsupported/test/FFT.cpp | 85 +++++ 4 files changed, 466 insertions(+) create mode 100644 unsupported/Eigen/FFT.h create mode 100644 unsupported/Eigen/src/FFT/simple_fft_traits.h create mode 100644 unsupported/test/FFT.cpp diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h new file mode 100644 index 000000000..03490d2c5 --- /dev/null +++ b/unsupported/Eigen/FFT.h @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_FFT_H +#define EIGEN_FFT_H + +// simple_fft_traits: small, free, reasonably efficient default, derived from kissfft +#include "src/FFT/simple_fft_traits.h" +#define DEFAULT_FFT_TRAITS simple_fft_traits + +// FFTW: faster, GPL-not LGPL, bigger code size +#ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines +// TODO +// #include "src/FFT/fftw_traits.h" +// #define DEFAULT_FFT_TRAITS fftw_traits +#endif + +// intel Math Kernel Library: fastest, commerical +#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines +// TODO +// #include "src/FFT/imkl_traits.h" +// #define DEFAULT_FFT_TRAITS imkl_traits +#endif + +namespace Eigen { + +template + > +class FFT +{ + public: + typedef _Traits traits_type; + typedef typename traits_type::Scalar Scalar; + typedef typename traits_type::Complex Complex; + + FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } + + void fwd( Complex * dst, const Complex * src, int nfft) + { + m_traits.prepare(nfft,false,dst,src); + m_traits.exec(dst,src); + m_traits.postprocess(dst); + } + + void inv( Complex * dst, const Complex * src, int nfft) + { + m_traits.prepare(nfft,true,dst,src); + m_traits.exec(dst,src); + m_traits.postprocess(dst); + } + + // TODO: fwd,inv for Scalar + // TODO: multi-dimensional FFTs + // TODO: handle Eigen MatrixBase + + traits_type & traits() {return m_traits;} + private: + traits_type m_traits; +}; +#undef DEFAULT_FFT_TRAITS +} +#endif diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h new file mode 100644 index 000000000..fe9d24b84 --- /dev/null +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -0,0 +1,296 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include +#include + +namespace Eigen { + + template + struct simple_fft_traits + { + typedef _Scalar Scalar; + typedef std::complex Complex; + simple_fft_traits() : m_nfft(0) {} + + void prepare(int nfft,bool inverse,Complex * dst,const Complex *src) + { + if (m_nfft == nfft) { + // reuse the twiddles, conjugate if necessary + if (inverse != m_inverse) + for (int i=0;in) + p=n;// no more factors + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + void exec(Complex * dst, const Complex * src) + { + work(0, dst, src, 1,1); + } + + void postprocess(Complex *dst) + { + if (m_inverse) { + Scalar scale = 1./m_nfft; + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + + scratch[0] *= epi3.imag(); + + *Fout += scratch[3]; + + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } + + int m_nfft; + bool m_inverse; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + }; +} diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index abfbb0185..49de7dfb2 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -19,3 +19,4 @@ ei_add_test(autodiff) ei_add_test(BVH) ei_add_test(matrixExponential) ei_add_test(alignedvector3) +ei_add_test(FFT) diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp new file mode 100644 index 000000000..b13a7810f --- /dev/null +++ b/unsupported/test/FFT.cpp @@ -0,0 +1,85 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" +#include + +//#include +//#include +//#include + +using namespace std; + +template +void test_fft(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + + //cout << "type:" << typeid(T).name() << " nfft:" << nfft; + + FFT fft; + + vector inbuf(nfft); + vector buf3(nfft); + vector outbuf(nfft); + for (int k=0;k acc = 0; + long double phinc = 2*k0* M_PIl / nfft; + for (int k1=0;k1 x(inbuf[k1].real(),inbuf[k1].imag()); + acc += x * exp( complex(0,-k1*phinc) ); + } + totalpower += norm(acc); + complex x(outbuf[k0].real(),outbuf[k0].imag()); + complex dif = acc - x; + difpower += norm(dif); + } + long double rmse = sqrt(difpower/totalpower); + VERIFY( rmse < 1e-5 );// gross check + + totalpower=0; + difpower=0; + for (int k=0;k(32) )); CALL_SUBTEST(( test_fft(32) )); CALL_SUBTEST(( test_fft(32) )); + CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); + CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); +} From be1b2ad4ec1851aa990dc97337fc74e16edd1dd0 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:26:19 -0400 Subject: [PATCH 02/72] removed unused code --- unsupported/test/FFT.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index b13a7810f..8347bb76b 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -25,10 +25,6 @@ #include "main.h" #include -//#include -//#include -//#include - using namespace std; template @@ -36,8 +32,6 @@ void test_fft(int nfft) { typedef typename Eigen::FFT::Complex Complex; - //cout << "type:" << typeid(T).name() << " nfft:" << nfft; - FFT fft; vector inbuf(nfft); From 68cad98bc935e53102a9432560085b81c5766743 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:34:38 -0400 Subject: [PATCH 03/72] indent level change --- unsupported/Eigen/src/FFT/simple_fft_traits.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index fe9d24b84..6fbbeac2e 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -80,13 +80,15 @@ namespace Eigen { void postprocess(Complex *dst) { - if (m_inverse) { - Scalar scale = 1./m_nfft; - for (int k=0;k Date: Fri, 22 May 2009 22:37:59 -0400 Subject: [PATCH 04/72] added non-optimized real forward fft (no inverse yet) --- unsupported/Eigen/FFT.h | 27 ++++- unsupported/Eigen/src/FFT/simple_fft_traits.h | 10 +- unsupported/test/FFT.cpp | 113 ++++++++++++------ 3 files changed, 106 insertions(+), 44 deletions(-) diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h index 03490d2c5..a1f87a609 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT.h @@ -57,21 +57,36 @@ class FFT FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } - void fwd( Complex * dst, const Complex * src, int nfft) + template + void fwd( Complex * dst, const _Input * src, int nfft) { m_traits.prepare(nfft,false,dst,src); m_traits.exec(dst,src); m_traits.postprocess(dst); } - void inv( Complex * dst, const Complex * src, int nfft) + template + void fwd( std::vector & dst, const std::vector<_Input> & src) { - m_traits.prepare(nfft,true,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + dst.resize( src.size() ); + fwd( &dst[0],&src[0],src.size() ); + } + + template + void inv( _Output * dst, const Complex * src, int nfft) + { + m_traits.prepare(nfft,true,dst,src); + m_traits.exec(dst,src); + m_traits.postprocess(dst); + } + + template + void inv( std::vector<_Output> & dst, const std::vector & src) + { + dst.resize( src.size() ); + inv( &dst[0],&src[0],src.size() ); } - // TODO: fwd,inv for Scalar // TODO: multi-dimensional FFTs // TODO: handle Eigen MatrixBase diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 6fbbeac2e..5a910dd1f 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -34,7 +34,8 @@ namespace Eigen { typedef std::complex Complex; simple_fft_traits() : m_nfft(0) {} - void prepare(int nfft,bool inverse,Complex * dst,const Complex *src) + template + void prepare(int nfft,bool inverse,Complex * dst,const _Src *src) { if (m_nfft == nfft) { // reuse the twiddles, conjugate if necessary @@ -73,7 +74,8 @@ namespace Eigen { }while(n>1); } - void exec(Complex * dst, const Complex * src) + template + void exec(Complex * dst, const _Src * src) { work(0, dst, src, 1,1); } @@ -89,7 +91,9 @@ namespace Eigen { private: - void work( int stage,Complex * Fout, const Complex * f, size_t fstride,size_t in_stride) + + template + void work( int stage,Complex * Fout, const _Src * f, size_t fstride,size_t in_stride) { int p = m_stageRadix[stage]; int m = m_stageRemainder[stage]; diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 8347bb76b..ef03359e2 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -25,55 +25,98 @@ #include "main.h" #include + using namespace std; +template < typename T> +complex promote(complex x) { return complex(x.real(),x.imag()); } + +complex promote(float x) { return complex( x); } +complex promote(double x) { return complex( x); } +complex promote(long double x) { return complex( x); } + + + template + long double fft_rmse( const vector & fftbuf,const vector & timebuf) + { + long double totalpower=0; + long double difpower=0; + for (size_t k0=0;k0 acc = 0; + long double phinc = -2.*k0* M_PIl / timebuf.size(); + for (size_t k1=0;k1(0,k1*phinc) ); + } + totalpower += norm(acc); + complex x = promote(fftbuf[k0]); + complex dif = acc - x; + difpower += norm(dif); + cerr << k0 << ":" << acc << " " << x << endl; + } + cerr << "rmse:" << sqrt(difpower/totalpower) << endl; + return sqrt(difpower/totalpower); + } + + template + long double dif_rmse( const vector buf1,const vector buf2) + { + long double totalpower=0; + long double difpower=0; + size_t n = min( buf1.size(),buf2.size() ); + for (size_t k=0;k -void test_fft(int nfft) +void test_scalar(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + typedef typename Eigen::FFT::Scalar Scalar; + + FFT fft; + vector inbuf(nfft); + vector outbuf; + for (int k=0;k +void test_complex(int nfft) { typedef typename Eigen::FFT::Complex Complex; FFT fft; vector inbuf(nfft); - vector buf3(nfft); - vector outbuf(nfft); + vector outbuf; + vector buf3; for (int k=0;k acc = 0; - long double phinc = 2*k0* M_PIl / nfft; - for (int k1=0;k1 x(inbuf[k1].real(),inbuf[k1].imag()); - acc += x * exp( complex(0,-k1*phinc) ); - } - totalpower += norm(acc); - complex x(outbuf[k0].real(),outbuf[k0].imag()); - complex dif = acc - x; - difpower += norm(dif); - } - long double rmse = sqrt(difpower/totalpower); - VERIFY( rmse < 1e-5 );// gross check + VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check - totalpower=0; - difpower=0; - for (int k=0;k(32) )); CALL_SUBTEST(( test_fft(32) )); CALL_SUBTEST(( test_fft(32) )); - CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); - CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); + CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); + CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); + + CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); } From 9c0fcd0f6213143216710a5b215aa2bb4a857ce5 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 10:09:48 -0400 Subject: [PATCH 05/72] started real optimization, added benchmark for FFT --- bench/benchFFT.cpp | 64 +++++++++ unsupported/Eigen/FFT.h | 8 +- unsupported/Eigen/src/FFT/simple_fft_traits.h | 125 +++++++++++++----- unsupported/test/FFT.cpp | 3 +- 4 files changed, 157 insertions(+), 43 deletions(-) create mode 100644 bench/benchFFT.cpp diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp new file mode 100644 index 000000000..041576b75 --- /dev/null +++ b/bench/benchFFT.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include +#include +#include +#include +#include + +using namespace Eigen; +using namespace std; + +#ifndef NFFT +#define NFFT 1024 +#endif + +#ifndef TYPE +#define TYPE float +#endif + +#ifndef NITS +#define NITS (10000000/NFFT) +#endif + +int main() +{ + vector > inbuf(NFFT); + vector > outbuf(NFFT); + Eigen::FFT fft; + + fft.fwd( outbuf , inbuf); + + BenchTimer timer; + timer.reset(); + for (int k=0;k<8;++k) { + timer.start(); + for(int i = 0; i < NITS; i++) + fft.fwd( outbuf , inbuf); + timer.stop(); + } + double mflops = 5.*NFFT*log2((double)NFFT) / (1e6 * timer.value() / (double)NITS ); + cout << "NFFT=" << NFFT << " " << (double(1e-6*NFFT*NITS)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; +} diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h index a1f87a609..c466423b7 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT.h @@ -60,9 +60,7 @@ class FFT template void fwd( Complex * dst, const _Input * src, int nfft) { - m_traits.prepare(nfft,false,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + m_traits.fwd(dst,src,nfft); } template @@ -75,9 +73,7 @@ class FFT template void inv( _Output * dst, const Complex * src, int nfft) { - m_traits.prepare(nfft,true,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + m_traits.inv( dst,src,nfft ); } template diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 5a910dd1f..33433ae03 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -35,7 +35,73 @@ namespace Eigen { simple_fft_traits() : m_nfft(0) {} template - void prepare(int nfft,bool inverse,Complex * dst,const _Src *src) + void fwd( Complex * dst,const _Src *src,int nfft) + { + prepare(nfft,false); + work(0, dst, src, 1,1); + scale(dst); + } + + void fwd( Complex * dst,const Scalar * src,int nfft) + { + if ( nfft&1 ) { + // use generic mode for odd + prepare(nfft,false); + work(0, dst, src, 1,1); + scale(dst); + }else{ + int ncfft = nfft>>1; + // use optimized mode for even real + prepare(nfft,false); + work(0,dst, reinterpret_cast (src),2,1); + Complex dc = dst[0].real() + dst[0].imag(); + Complex nyquist = dst[0].real() - dst[0].imag(); + + int k; + for ( k=1;k <= ncfft/2 ; ++k ) { +/** + fpk = st->tmpbuf[k]; + fpnk.r = st->tmpbuf[ncfft-k].r; + fpnk.i = - st->tmpbuf[ncfft-k].i; + + C_ADD( f1k, fpk , fpnk ); + C_SUB( f2k, fpk , fpnk ); + C_MUL( tw , f2k , st->super_twiddles[k-1]); + + freqdata[k].r = HALF_OF(f1k.r + tw.r); + freqdata[k].i = HALF_OF(f1k.i + tw.i); + freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); + freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); + */ + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + + Complex f1k = fpk + fpnk; + Complex f2k = fpnk - fpk; + //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double)k / ncfft + 1) ) ); // TODO repalce this with index into twiddles + Complex tw = f2k * m_twiddles[2*k];; + + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + } + // place conjugate-symmetric half at the end for completeness + // TODO: make this configurable ( opt-out ) + for ( k=1;k < ncfft ; ++k ) + dst[nfft-k] = conj(dst[k]); + + dst[0] = dc; + dst[ncfft] = nyquist; + } + } + + void inv(Complex * dst,const Complex *src,int nfft) + { + prepare(nfft,true); + work(0, dst, src, 1,1); + scale(dst); + } + + void prepare(int nfft,bool inverse) { if (m_nfft == nfft) { // reuse the twiddles, conjugate if necessary @@ -74,57 +140,49 @@ namespace Eigen { }while(n>1); } - template - void exec(Complex * dst, const _Src * src) - { - work(0, dst, src, 1,1); - } - - void postprocess(Complex *dst) + void scale(Complex *dst) { if (m_inverse) { - Scalar scale = 1./m_nfft; + Scalar s = 1./m_nfft; for (int k=0;k - void work( int stage,Complex * Fout, const _Src * f, size_t fstride,size_t in_stride) + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) { int p = m_stageRadix[stage]; int m = m_stageRemainder[stage]; - Complex * Fout_beg = Fout; - Complex * Fout_end = Fout + p*m; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; - if (m==1) { - do{ - *Fout = *f; - f += fstride*in_stride; - }while(++Fout != Fout_end ); - }else{ + if (m>1) { do{ // recursive call: // DFT of size m*p performed by doing // p instances of smaller DFTs of size m, // each one takes a decimated version of the input - work(stage+1, Fout , f, fstride*p,in_stride); - f += fstride*in_stride; - }while( (Fout += m) != Fout_end ); + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); } - - Fout=Fout_beg; + xout=Fout_beg; // recombine the p smaller DFTs switch (p) { - case 2: bfly2(Fout,fstride,m); break; - case 3: bfly3(Fout,fstride,m); break; - case 4: bfly4(Fout,fstride,m); break; - case 5: bfly5(Fout,fstride,m); break; - default: bfly_generic(Fout,fstride,m,p); break; + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; } } @@ -139,7 +197,7 @@ namespace Eigen { void bfly4( Complex * Fout, const size_t fstride, const size_t m) { - Complex scratch[7]; + Complex scratch[6]; int negative_if_inverse = m_inverse * -2 +1; for (size_t k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); ++Fout; }while(--k); diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index ef03359e2..13e98ba77 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -115,8 +115,9 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); - +/* CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); + */ } From 304798817268706463f3ff645c8c8b2c887c090a Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 12:50:07 -0400 Subject: [PATCH 06/72] scalar forward FFT optimized for even size, converts to cpx for odd --- unsupported/Eigen/src/FFT/simple_fft_traits.h | 156 ++++++++++-------- unsupported/test/FFT.cpp | 10 +- 2 files changed, 97 insertions(+), 69 deletions(-) diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 33433ae03..f7dd2b9cf 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -24,6 +24,7 @@ #include #include +#include namespace Eigen { @@ -39,51 +40,54 @@ namespace Eigen { { prepare(nfft,false); work(0, dst, src, 1,1); - scale(dst); } + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half void fwd( Complex * dst,const Scalar * src,int nfft) { if ( nfft&1 ) { // use generic mode for odd prepare(nfft,false); work(0, dst, src, 1,1); - scale(dst); }else{ int ncfft = nfft>>1; // use optimized mode for even real - prepare(nfft,false); - work(0,dst, reinterpret_cast (src),2,1); + fwd( dst, reinterpret_cast (src),ncfft); + make_real_twiddles(nfft); + std::cerr << "dst[0] = " << dst[0] << "\n"; Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft/2 ; ++k ) { -/** - fpk = st->tmpbuf[k]; - fpnk.r = st->tmpbuf[ncfft-k].r; - fpnk.i = - st->tmpbuf[ncfft-k].i; - - C_ADD( f1k, fpk , fpnk ); - C_SUB( f2k, fpk , fpnk ); - C_MUL( tw , f2k , st->super_twiddles[k-1]); - - freqdata[k].r = HALF_OF(f1k.r + tw.r); - freqdata[k].i = HALF_OF(f1k.i + tw.i); - freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); - freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); - */ +#if 0 + using namespace std; + cerr << "desired:\n"; + for ( k=1;k <= (ncfft>>1) ; ++k ) { + Complex x = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + cerr << k << " " << x << "angle(x):" << arg(x) << "\n"; + } + dc=0; + cerr << "twiddles:\n"; + for (k=0;k>1) ; ++k ) { Complex fpk = dst[k]; Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpnk - fpk; - //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double)k / ncfft + 1) ) ); // TODO repalce this with index into twiddles - Complex tw = f2k * m_twiddles[2*k];; - + Complex f2k = fpk - fpnk; + //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + Complex tw= f2k * m_realTwiddles[k-1]; + dst[k] = (f1k + tw) * Scalar(.5); dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); } + + // place conjugate-symmetric half at the end for completeness // TODO: make this configurable ( opt-out ) for ( k=1;k < ncfft ; ++k ) @@ -98,55 +102,74 @@ namespace Eigen { { prepare(nfft,true); work(0, dst, src, 1,1); - scale(dst); + scale(dst, Scalar(1)/m_nfft ); } void prepare(int nfft,bool inverse) { - if (m_nfft == nfft) { - // reuse the twiddles, conjugate if necessary - if (inverse != m_inverse) - for (int i=0;in) - p=n;// no more factors - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); + make_twiddles(nfft,inverse); + factorize(nfft); } - void scale(Complex *dst) + void make_real_twiddles(int nfft) + { + int ncfft2 = nfft>>2; + if ( m_realTwiddles.size() != ncfft2) { + m_realTwiddles.resize(ncfft2); + int ncfft= nfft>>1; + for (int k=1;k<=ncfft2;++k) + m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + } + } + + void make_twiddles(int nfft,bool inverse) + { + if ( m_twiddles.size() == nfft) { + // reuse the twiddles, conjugate if necessary + if (inverse != m_inverse) + for (int i=0;in) + p=n;// no more factors + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + m_nfft = nfft; + } + + void scale(Complex *dst,Scalar s) { - if (m_inverse) { - Scalar s = 1./m_nfft; for (int k=0;k m_twiddles; + std::vector m_realTwiddles; std::vector m_stageRadix; std::vector m_stageRemainder; }; diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 13e98ba77..41c7fed7b 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -41,6 +41,7 @@ complex promote(long double x) { return complex( x); { long double totalpower=0; long double difpower=0; + cerr <<"idx\ttruth\t\tvalue\n"; for (size_t k0=0;k0 acc = 0; long double phinc = -2.*k0* M_PIl / timebuf.size(); @@ -51,7 +52,7 @@ complex promote(long double x) { return complex( x); complex x = promote(fftbuf[k0]); complex dif = acc - x; difpower += norm(dif); - cerr << k0 << ":" << acc << " " << x << endl; + cerr << k0 << "\t" << acc << "\t" << x << endl; } cerr << "rmse:" << sqrt(difpower/totalpower) << endl; return sqrt(difpower/totalpower); @@ -108,6 +109,7 @@ void test_complex(int nfft) void test_FFT() { +#if 0 CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); @@ -115,9 +117,11 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); -/* +#endif + +#if 1 CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); - */ +#endif } From 326ea773908c2d7e46101085af8f72d20b3f8cbc Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 22:50:07 -0400 Subject: [PATCH 07/72] added FFT inverse complex-to-scalar interface (not yet optimized) --- bench/benchFFT.cpp | 73 ++++++++++++++----- unsupported/Eigen/src/FFT/simple_fft_traits.h | 31 ++++---- unsupported/test/FFT.cpp | 21 ++++-- 3 files changed, 81 insertions(+), 44 deletions(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 041576b75..84cc49fe3 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -31,34 +31,67 @@ using namespace Eigen; using namespace std; -#ifndef NFFT -#define NFFT 1024 -#endif + +template +string nameof(); + +template <> string nameof() {return "float";} +template <> string nameof() {return "double";} +template <> string nameof() {return "long double";} #ifndef TYPE #define TYPE float #endif -#ifndef NITS -#define NITS (10000000/NFFT) +#ifndef NFFT +#define NFFT 1024 +#endif +#ifndef NDATA +#define NDATA 1000000 #endif -int main() +using namespace Eigen; + +template +void bench(int nfft) { - vector > inbuf(NFFT); - vector > outbuf(NFFT); - Eigen::FFT fft; + typedef typename NumTraits::Real Scalar; + typedef typename std::complex Complex; + int nits = NDATA/nfft; + vector inbuf(nfft); + vector outbuf(nfft); + FFT< Scalar > fft; - fft.fwd( outbuf , inbuf); + fft.fwd( outbuf , inbuf); - BenchTimer timer; - timer.reset(); - for (int k=0;k<8;++k) { - timer.start(); - for(int i = 0; i < NITS; i++) - fft.fwd( outbuf , inbuf); - timer.stop(); - } - double mflops = 5.*NFFT*log2((double)NFFT) / (1e6 * timer.value() / (double)NITS ); - cout << "NFFT=" << NFFT << " " << (double(1e-6*NFFT*NITS)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; + BenchTimer timer; + timer.reset(); + for (int k=0;k<8;++k) { + timer.start(); + for(int i = 0; i < nits; i++) + fft.fwd( outbuf , inbuf); + timer.stop(); + } + + cout << nameof() << " "; + double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits ); + if ( NumTraits::IsComplex ) { + cout << "complex"; + }else{ + cout << "real "; + mflops /= 2; + } + + cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; +} + +int main(int argc,char ** argv) +{ + bench >(NFFT); + bench(NFFT); + bench >(NFFT); + bench(NFFT); + bench >(NFFT); + bench(NFFT); + return 0; } diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index f7dd2b9cf..1e2be8f79 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -54,28 +54,14 @@ namespace Eigen { work(0, dst, src, 1,1); }else{ int ncfft = nfft>>1; + int ncfft2 = nfft>>2; // use optimized mode for even real fwd( dst, reinterpret_cast (src),ncfft); make_real_twiddles(nfft); - std::cerr << "dst[0] = " << dst[0] << "\n"; Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); int k; -#if 0 - using namespace std; - cerr << "desired:\n"; - for ( k=1;k <= (ncfft>>1) ; ++k ) { - Complex x = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - cerr << k << " " << x << "angle(x):" << arg(x) << "\n"; - } - dc=0; - cerr << "twiddles:\n"; - for (k=0;k>1) ; ++k ) { + for ( k=1;k <= ncfft2 ; ++k ) { Complex fpk = dst[k]; Complex fpnk = conj(dst[ncfft-k]); Complex f1k = fpk + fpnk; @@ -87,7 +73,6 @@ namespace Eigen { dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); } - // place conjugate-symmetric half at the end for completeness // TODO: make this configurable ( opt-out ) for ( k=1;k < ncfft ; ++k ) @@ -98,6 +83,16 @@ namespace Eigen { } } + // half-complex to scalar + void inv( Scalar * dst,const Complex * src,int nfft) + { + // TODO add optimized version for even numbers + std::vector tmp(nfft); + inv(&tmp[0],src,nfft); + for (int k=0;kn) - p=n;// no more factors + p=n;// impossible to have a factor > sqrt(n) } n /= p; m_stageRadix.push_back(p); diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 41c7fed7b..75c33277d 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -28,6 +28,10 @@ using namespace std; +float norm(float x) {return x*x;} +double norm(double x) {return x*x;} +long double norm(long double x) {return x*x;} + template < typename T> complex promote(complex x) { return complex(x.real(),x.imag()); } @@ -83,7 +87,11 @@ void test_scalar(int nfft) for (int k=0;k() );// gross check + + vector buf3; + fft.inv( buf3 , outbuf); + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } template @@ -100,18 +108,18 @@ void test_complex(int nfft) inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) ); fft.fwd( outbuf , inbuf); - VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check + VERIFY( fft_rmse(outbuf,inbuf) < test_precision() );// gross check fft.inv( buf3 , outbuf); - VERIFY( dif_rmse(inbuf,buf3) < 1e-5 );// gross check + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } void test_FFT() { -#if 0 +#if 1 CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); - CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); + CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); @@ -120,8 +128,9 @@ void test_FFT() #endif #if 1 + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); - CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); + CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); #endif } From 210092d16c57ec2fd2f8f515151de284e8a737e3 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 20:35:24 -0400 Subject: [PATCH 08/72] changed name from simple_fft_traits to ei_kissfft_impl --- bench/benchFFT.cpp | 2 +- unsupported/Eigen/{FFT.h => FFT} | 18 ++++++------ ...{simple_fft_traits.h => ei_kissfft_impl.h} | 29 +++++++++++++++++-- unsupported/test/FFT.cpp | 3 +- 4 files changed, 37 insertions(+), 15 deletions(-) rename unsupported/Eigen/{FFT.h => FFT} (86%) rename unsupported/Eigen/src/FFT/{simple_fft_traits.h => ei_kissfft_impl.h} (95%) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 84cc49fe3..ffa4ffffc 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include using namespace Eigen; using namespace std; diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT similarity index 86% rename from unsupported/Eigen/FFT.h rename to unsupported/Eigen/FFT index c466423b7..3d852f5a2 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT @@ -25,28 +25,28 @@ #ifndef EIGEN_FFT_H #define EIGEN_FFT_H -// simple_fft_traits: small, free, reasonably efficient default, derived from kissfft -#include "src/FFT/simple_fft_traits.h" -#define DEFAULT_FFT_TRAITS simple_fft_traits +// ei_kissfft_impl: small, free, reasonably efficient default, derived from kissfft +#include "src/FFT/ei_kissfft_impl.h" +#define DEFAULT_FFT_IMPL ei_kissfft_impl // FFTW: faster, GPL-not LGPL, bigger code size #ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines // TODO -// #include "src/FFT/fftw_traits.h" -// #define DEFAULT_FFT_TRAITS fftw_traits +// #include "src/FFT/ei_fftw_impl.h" +// #define DEFAULT_FFT_IMPL ei_fftw_impl #endif // intel Math Kernel Library: fastest, commerical #ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines // TODO -// #include "src/FFT/imkl_traits.h" -// #define DEFAULT_FFT_TRAITS imkl_traits +// #include "src/FFT/ei_imkl_impl.h" +// #define DEFAULT_FFT_IMPL ei_imkl_impl #endif namespace Eigen { template + typename _Traits=DEFAULT_FFT_IMPL<_Scalar> > class FFT { @@ -90,6 +90,6 @@ class FFT private: traits_type m_traits; }; -#undef DEFAULT_FFT_TRAITS +#undef DEFAULT_FFT_IMPL } #endif diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h similarity index 95% rename from unsupported/Eigen/src/FFT/simple_fft_traits.h rename to unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 1e2be8f79..ce2c9f16e 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -24,16 +24,15 @@ #include #include -#include namespace Eigen { template - struct simple_fft_traits + struct ei_kissfft_impl { typedef _Scalar Scalar; typedef std::complex Complex; - simple_fft_traits() : m_nfft(0) {} + ei_kissfft_impl() : m_nfft(0) {} template void fwd( Complex * dst,const _Src *src,int nfft) @@ -370,5 +369,29 @@ namespace Eigen { std::vector m_realTwiddles; std::vector m_stageRadix; std::vector m_stageRemainder; +/* + enum {FORWARD,INVERSE,REAL,COMPLEX}; + + struct PlanKey + { + PlanKey(int nfft,bool isinverse,bool iscomplex) + { + _key = (nfft<<2) | (isinverse<<1) | iscomplex; + } + + bool operator<(const PlanKey & other) const + { + return this->_key < other._key; + } + int _key; + }; + + struct PlanData + { + std::vector m_twiddles; + }; + + std::map. #include "main.h" -#include - +#include using namespace std; From 03ed6f9bfb63879d475f5bb8ea46cff96063d010 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 23:06:49 -0400 Subject: [PATCH 09/72] refactored ei_kissfft_impl to maintain a cache of cpx fft plans --- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 612 ++++++++++---------- unsupported/test/FFT.cpp | 7 +- 2 files changed, 320 insertions(+), 299 deletions(-) diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index ce2c9f16e..3580e6c61 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -24,21 +24,279 @@ #include #include +#include namespace Eigen { + template + struct ei_kiss_cpx_fft + { + typedef _Scalar Scalar; + typedef std::complex Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; + + ei_kiss_cpx_fft() { } + + void make_twiddles(int nfft,bool inverse) + { + m_inverse = inverse; + m_twiddles.resize(nfft); + Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; + for (int i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; + } + } + + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } + }; + + template struct ei_kissfft_impl { typedef _Scalar Scalar; typedef std::complex Complex; - ei_kissfft_impl() : m_nfft(0) {} + ei_kissfft_impl() {} + + void clear() + { + m_plans.clear(); + m_realTwiddles.clear(); + } template void fwd( Complex * dst,const _Src *src,int nfft) { - prepare(nfft,false); - work(0, dst, src, 1,1); + get_plan(nfft,false).work(0, dst, src, 1,1); } // real-to-complex forward FFT @@ -47,16 +305,16 @@ namespace Eigen { // then fill in the conjugate symmetric half void fwd( Complex * dst,const Scalar * src,int nfft) { - if ( nfft&1 ) { + if ( nfft&3 ) { // use generic mode for odd - prepare(nfft,false); - work(0, dst, src, 1,1); + get_plan(nfft,false).work(0, dst, src, 1,1); }else{ int ncfft = nfft>>1; int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + // use optimized mode for even real - fwd( dst, reinterpret_cast (src),ncfft); - make_real_twiddles(nfft); + fwd( dst, reinterpret_cast (src), ncfft); Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); int k; @@ -65,8 +323,7 @@ namespace Eigen { Complex fpnk = conj(dst[ncfft-k]); Complex f1k = fpk + fpnk; Complex f2k = fpk - fpnk; - //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - Complex tw= f2k * m_realTwiddles[k-1]; + Complex tw= f2k * rtw[k-1]; dst[k] = (f1k + tw) * Scalar(.5); dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); @@ -94,304 +351,67 @@ namespace Eigen { void inv(Complex * dst,const Complex *src,int nfft) { - prepare(nfft,true); - work(0, dst, src, 1,1); - scale(dst, Scalar(1)/m_nfft ); - } - - void prepare(int nfft,bool inverse) - { - make_twiddles(nfft,inverse); - factorize(nfft); - } - - void make_real_twiddles(int nfft) - { - int ncfft2 = nfft>>2; - if ( m_realTwiddles.size() != ncfft2) { - m_realTwiddles.resize(ncfft2); - int ncfft= nfft>>1; - for (int k=1;k<=ncfft2;++k) - m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - } - } - - void make_twiddles(int nfft,bool inverse) - { - if ( m_twiddles.size() == nfft) { - // reuse the twiddles, conjugate if necessary - if (inverse != m_inverse) - for (int i=0;in) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - m_nfft = nfft; - } - - void scale(Complex *dst,Scalar s) - { - for (int k=0;k - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + typedef ei_kiss_cpx_fft PlanData; + + typedef std::map PlanMap; + PlanMap m_plans; + std::map > m_realTwiddles; + + int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + + PlanData & get_plan(int nfft,bool inverse) { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u::iterator MapIt; + MapIt it; + it = m_plans.find( PlanKey(nfft,inverse) ); + if (it == m_plans.end() ) { + // create new entry + it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); + MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); + if (it2 != m_plans.end() ) { + it->second = it2.second; + it->second.invert(); + }else{ + it->second.make_twiddles(nfft,inverse); + it->second.factorize(nfft); + } } - - k=u; - for ( q1=0 ; q1

=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; + return it->second; + */ + PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; + if ( pd.m_twiddles.size() == 0 ) { + pd.make_twiddles(nfft,inverse); + pd.factorize(nfft); } - } + return pd; } - int m_nfft; - bool m_inverse; - std::vector m_twiddles; - std::vector m_realTwiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; -/* - enum {FORWARD,INVERSE,REAL,COMPLEX}; - - struct PlanKey + Complex * real_twiddles(int ncfft2) { - PlanKey(int nfft,bool isinverse,bool iscomplex) - { - _key = (nfft<<2) | (isinverse<<1) | iscomplex; + std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there + if ( (int)twidref.size() != ncfft2 ) { + twidref.resize(ncfft2); + int ncfft= ncfft2<<1; + Scalar pi = acos( Scalar(-1) ); + for (int k=1;k<=ncfft2;++k) + twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); } + return &twidref[0]; + } - bool operator<(const PlanKey & other) const - { - return this->_key < other._key; - } - int _key; - }; - - struct PlanData + void scale(Complex *dst,int n,Scalar s) { - std::vector m_twiddles; - }; - - std::map promote(long double x) { return complex( x); { long double totalpower=0; long double difpower=0; - cerr <<"idx\ttruth\t\tvalue\n"; + cerr <<"idx\ttruth\t\tvalue\t|dif|=\n"; for (size_t k0=0;k0 acc = 0; long double phinc = -2.*k0* M_PIl / timebuf.size(); @@ -55,7 +55,7 @@ complex promote(long double x) { return complex( x); complex x = promote(fftbuf[k0]); complex dif = acc - x; difpower += norm(dif); - cerr << k0 << "\t" << acc << "\t" << x << endl; + cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(norm(dif)) << endl; } cerr << "rmse:" << sqrt(difpower/totalpower) << endl; return sqrt(difpower/totalpower); @@ -127,8 +127,9 @@ void test_FFT() #endif #if 1 - CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); #endif From 09b47332553a79dab30516e6b1d410dea90cf9b7 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 23:52:21 -0400 Subject: [PATCH 10/72] added real-optimized inverse FFT (NFFT must be multiple of 4) --- bench/benchFFT.cpp | 30 +- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 688 ++++++++++---------- 2 files changed, 370 insertions(+), 348 deletions(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index ffa4ffffc..14f5063fb 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -53,7 +53,7 @@ template <> string nameof() {return "long double";} using namespace Eigen; template -void bench(int nfft) +void bench(int nfft,bool fwd) { typedef typename NumTraits::Real Scalar; typedef typename std::complex Complex; @@ -69,7 +69,10 @@ void bench(int nfft) for (int k=0;k<8;++k) { timer.start(); for(int i = 0; i < nits; i++) - fft.fwd( outbuf , inbuf); + if (fwd) + fft.fwd( outbuf , inbuf); + else + fft.inv(inbuf,outbuf); timer.stop(); } @@ -82,16 +85,27 @@ void bench(int nfft) mflops /= 2; } + if (fwd) + cout << " fwd"; + else + cout << " inv"; + cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; } int main(int argc,char ** argv) { - bench >(NFFT); - bench(NFFT); - bench >(NFFT); - bench(NFFT); - bench >(NFFT); - bench(NFFT); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); return 0; } diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 3580e6c61..453c7f6da 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -28,390 +28,398 @@ namespace Eigen { - template - struct ei_kiss_cpx_fft + template + struct ei_kiss_cpx_fft + { + typedef _Scalar Scalar; + typedef std::complex Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; + + void make_twiddles(int nfft,bool inverse) { - typedef _Scalar Scalar; - typedef std::complex Complex; - std::vector m_twiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; - bool m_inverse; + m_inverse = inverse; + m_twiddles.resize(nfft); + Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; + for (int i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; + } + } + + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; } - - void factorize(int nfft) - { - if (m_stageRadix.size()==0 || m_stageRadix[0] * m_stageRemainder[0] != nfft) - { - m_stageRadix.resize(0); - m_stageRemainder.resize(0); - //factorize - //start factoring out 4's, then 2's, then 3,5,7,9,... - int n= nfft; - int p=4; - do { - while (n % p) { - switch (p) { - case 4: p = 2; break; - case 2: p = 3; break; - default: p += 2; break; - } - if (p*p>n) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - } - - template - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; - } - } - } - }; - + } + } + }; template struct ei_kissfft_impl { - typedef _Scalar Scalar; - typedef std::complex Complex; - ei_kissfft_impl() {} + typedef _Scalar Scalar; + typedef std::complex Complex; - void clear() - { + void clear() + { m_plans.clear(); m_realTwiddles.clear(); - } + } - template - void fwd( Complex * dst,const _Src *src,int nfft) - { + template + void fwd( Complex * dst,const _Src *src,int nfft) + { get_plan(nfft,false).work(0, dst, src, 1,1); - } + } - // real-to-complex forward FFT - // perform two FFTs of src even and src odd - // then twiddle to recombine them into the half-spectrum format - // then fill in the conjugate symmetric half - void fwd( Complex * dst,const Scalar * src,int nfft) - { + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half + void fwd( Complex * dst,const Scalar * src,int nfft) + { if ( nfft&3 ) { - // use generic mode for odd - get_plan(nfft,false).work(0, dst, src, 1,1); + // use generic mode for odd + get_plan(nfft,false).work(0, dst, src, 1,1); }else{ - int ncfft = nfft>>1; - int ncfft2 = nfft>>2; - Complex * rtw = real_twiddles(ncfft2); + int ncfft = nfft>>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); - // use optimized mode for even real - fwd( dst, reinterpret_cast (src), ncfft); - Complex dc = dst[0].real() + dst[0].imag(); - Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft2 ; ++k ) { - Complex fpk = dst[k]; - Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpk - fpnk; - Complex tw= f2k * rtw[k-1]; + // use optimized mode for even real + fwd( dst, reinterpret_cast (src), ncfft); + Complex dc = dst[0].real() + dst[0].imag(); + Complex nyquist = dst[0].real() - dst[0].imag(); + int k; + for ( k=1;k <= ncfft2 ; ++k ) { + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + Complex f1k = fpk + fpnk; + Complex f2k = fpk - fpnk; + Complex tw= f2k * rtw[k-1]; + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + } - dst[k] = (f1k + tw) * Scalar(.5); - dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); - } - - // place conjugate-symmetric half at the end for completeness - // TODO: make this configurable ( opt-out ) - for ( k=1;k < ncfft ; ++k ) - dst[nfft-k] = conj(dst[k]); - - dst[0] = dc; - dst[ncfft] = nyquist; + // place conjugate-symmetric half at the end for completeness + // TODO: make this configurable ( opt-out ) + for ( k=1;k < ncfft ; ++k ) + dst[nfft-k] = conj(dst[k]); + dst[0] = dc; + dst[ncfft] = nyquist; } - } + } - // half-complex to scalar - void inv( Scalar * dst,const Complex * src,int nfft) - { - // TODO add optimized version for even numbers - std::vector tmp(nfft); - inv(&tmp[0],src,nfft); - for (int k=0;k>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + m_scratchBuf.resize(ncfft); + m_scratchBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + for (int k = 1; k <= ncfft / 2; ++k) { + Complex fk = src[k]; + Complex fnkc = conj(src[ncfft-k]); + Complex fek = fk + fnkc; + Complex tmp = fk - fnkc; + Complex fok = tmp * conj(rtw[k-1]); + m_scratchBuf[k] = fek + fok; + m_scratchBuf[ncfft-k] = conj(fek - fok); + } + scale(&m_scratchBuf[0], ncfft, Scalar(1)/nfft ); + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_scratchBuf[0], 1,1); + } + } - typedef ei_kiss_cpx_fft PlanData; + private: - typedef std::map PlanMap; - PlanMap m_plans; - std::map > m_realTwiddles; + typedef ei_kiss_cpx_fft PlanData; - int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + typedef std::map PlanMap; + PlanMap m_plans; + std::map > m_realTwiddles; + std::vector m_scratchBuf; - PlanData & get_plan(int nfft,bool inverse) - { - /* + int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + + PlanData & get_plan(int nfft,bool inverse) + { + /* TODO: figure out why this does not work (g++ 4.3.2) * for some reason this does not work * - typedef typename std::map::iterator MapIt; - MapIt it; - it = m_plans.find( PlanKey(nfft,inverse) ); - if (it == m_plans.end() ) { - // create new entry - it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); - MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); - if (it2 != m_plans.end() ) { - it->second = it2.second; - it->second.invert(); - }else{ - it->second.make_twiddles(nfft,inverse); - it->second.factorize(nfft); - } + PlanMap::iterator it; + it = m_plans.find( PlanKey(nfft,inverse) ); + if (it == m_plans.end() ) { + // create new entry + it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); + MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); + if (it2 != m_plans.end() ) { + it->second = it2.second; + it->second.conjugate(); + }else{ + it->second.make_twiddles(nfft,inverse); + it->second.factorize(nfft); + } } return it->second; */ PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; if ( pd.m_twiddles.size() == 0 ) { - pd.make_twiddles(nfft,inverse); - pd.factorize(nfft); + pd.make_twiddles(nfft,inverse); + pd.factorize(nfft); } return pd; - } + } - Complex * real_twiddles(int ncfft2) - { + Complex * real_twiddles(int ncfft2) + { std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there if ( (int)twidref.size() != ncfft2 ) { - twidref.resize(ncfft2); - int ncfft= ncfft2<<1; - Scalar pi = acos( Scalar(-1) ); - for (int k=1;k<=ncfft2;++k) - twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); + twidref.resize(ncfft2); + int ncfft= ncfft2<<1; + Scalar pi = acos( Scalar(-1) ); + for (int k=1;k<=ncfft2;++k) + twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); } return &twidref[0]; - } + } - void scale(Complex *dst,int n,Scalar s) - { + void scale(Complex *dst,int n,Scalar s) + { for (int k=0;k Date: Wed, 27 May 2009 21:32:42 -0400 Subject: [PATCH 11/72] various comment changes --- unsupported/Eigen/FFT | 6 +- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 472 ++++++++++---------- 2 files changed, 231 insertions(+), 247 deletions(-) diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 3d852f5a2..31d8c74c5 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -1,5 +1,5 @@ // This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. +// for linear algebra. // // Copyright (C) 2009 Mark Borgerding mark a borgerding net // @@ -29,14 +29,14 @@ #include "src/FFT/ei_kissfft_impl.h" #define DEFAULT_FFT_IMPL ei_kissfft_impl -// FFTW: faster, GPL-not LGPL, bigger code size +// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size #ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines // TODO // #include "src/FFT/ei_fftw_impl.h" // #define DEFAULT_FFT_IMPL ei_fftw_impl #endif -// intel Math Kernel Library: fastest, commerical +// intel Math Kernel Library: fastest, commerical -- incompatible with Eigen in GPL form #ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines // TODO // #include "src/FFT/ei_imkl_impl.h" diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 453c7f6da..91fa5ca18 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -1,5 +1,5 @@ // This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. +// for linear algebra. // // Copyright (C) 2009 Mark Borgerding mark a borgerding net // @@ -28,252 +28,255 @@ namespace Eigen { + // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft + // Copyright 2003-2009 Mark Borgerding + template - struct ei_kiss_cpx_fft - { - typedef _Scalar Scalar; - typedef std::complex Complex; - std::vector m_twiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; - bool m_inverse; - - void make_twiddles(int nfft,bool inverse) + struct ei_kiss_cpx_fft { - m_inverse = inverse; - m_twiddles.resize(nfft); - Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; - for (int i=0;i Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; - void conjugate() - { - m_inverse = !m_inverse; - for ( size_t i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; } - if (p*p>n) - p=n;// impossible to have a factor > sqrt(n) } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - template - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;k1) { do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); + scratch[1]=Fout[m] * *tw1; + scratch[2]=Fout[m2] * *tw2; + + scratch[3]=scratch[1]+scratch[2]; + scratch[0]=scratch[1]-scratch[2]; + tw1 += fstride; + tw2 += fstride*2; + Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); } - xout=Fout_beg; - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; + /* perform the butterfly for one stage of a mixed radix FFT */ + void bfly_generic( + Complex * Fout, + const size_t fstride, + int m, + int p + ) + { + int u,k,q1,q; + Complex * twiddles = &m_twiddles[0]; + Complex t; + int Norig = m_twiddles.size(); + Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) ); + + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; } - k += m; } } - } - }; + }; template - struct ei_kissfft_impl - { + struct ei_kissfft_impl + { typedef _Scalar Scalar; typedef std::complex Complex; @@ -284,10 +287,10 @@ namespace Eigen { } template - void fwd( Complex * dst,const _Src *src,int nfft) - { - get_plan(nfft,false).work(0, dst, src, 1,1); - } + void fwd( Complex * dst,const _Src *src,int nfft) + { + get_plan(nfft,false).work(0, dst, src, 1,1); + } // real-to-complex forward FFT // perform two FFTs of src even and src odd @@ -363,11 +366,10 @@ namespace Eigen { } } - private: - + private: typedef ei_kiss_cpx_fft PlanData; - typedef std::map PlanMap; + PlanMap m_plans; std::map > m_realTwiddles; std::vector m_scratchBuf; @@ -376,25 +378,7 @@ namespace Eigen { PlanData & get_plan(int nfft,bool inverse) { - /* TODO: figure out why this does not work (g++ 4.3.2) - * for some reason this does not work - * - PlanMap::iterator it; - it = m_plans.find( PlanKey(nfft,inverse) ); - if (it == m_plans.end() ) { - // create new entry - it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); - MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); - if (it2 != m_plans.end() ) { - it->second = it2.second; - it->second.conjugate(); - }else{ - it->second.make_twiddles(nfft,inverse); - it->second.factorize(nfft); - } - } - return it->second; - */ + // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; if ( pd.m_twiddles.size() == 0 ) { pd.make_twiddles(nfft,inverse); @@ -421,5 +405,5 @@ namespace Eigen { for (int k=0;k Date: Sat, 30 May 2009 17:55:47 -0400 Subject: [PATCH 12/72] added ei_fftw_impl --- unsupported/Eigen/FFT | 5 +- unsupported/Eigen/src/FFT/ei_fftw_impl.h | 198 ++++++++++++++++++++ unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 25 +-- 3 files changed, 214 insertions(+), 14 deletions(-) create mode 100644 unsupported/Eigen/src/FFT/ei_fftw_impl.h diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 31d8c74c5..03f8504a4 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -30,9 +30,8 @@ #define DEFAULT_FFT_IMPL ei_kissfft_impl // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size -#ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines -// TODO -// #include "src/FFT/ei_fftw_impl.h" +#ifdef FFTW_ESTIMATE // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines +#include "src/FFT/ei_fftw_impl.h" // #define DEFAULT_FFT_IMPL ei_fftw_impl #endif diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h new file mode 100644 index 000000000..d592bbb20 --- /dev/null +++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h @@ -0,0 +1,198 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +namespace Eigen { + // FFTW uses non-const arguments + // so we must use ugly const_cast calls for all the args it uses + // + // This should be safe as long as + // 1. we use FFTW_ESTIMATE for all our planning + // see the FFTW docs section 4.3.2 "Planner Flags" + // 2. fftw_complex is compatible with std::complex + // This assumes std::complex layout is array of size 2 with real,imag + template + T * ei_fftw_cast(const T* p) + { + return const_cast( p); + } + + fftw_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + fftwf_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + fftwl_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + template + struct ei_fftw_plan {}; + + template <> + struct ei_fftw_plan + { + typedef float scalar_type; + typedef fftwf_complex complex_type; + fftwf_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftwf_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftwf_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwf_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwf_execute_dft_c2r( m_plan, src,dst); + } + }; + template <> + struct ei_fftw_plan + { + typedef double scalar_type; + typedef fftw_complex complex_type; + fftw_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftw_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftw_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftw_execute_dft_c2r( m_plan, src,dst); + } + }; + template <> + struct ei_fftw_plan + { + typedef long double scalar_type; + typedef fftwl_complex complex_type; + fftwl_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftwl_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftwl_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwl_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwl_execute_dft_c2r( m_plan, src,dst); + } + }; + + template + struct ei_fftw_impl + { + typedef _Scalar Scalar; + typedef std::complex Complex; + + void clear() + { + m_plans.clear(); + } + + void fwd( Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + } + + // real-to-complex forward FFT + void fwd( Complex * dst,const Scalar * src,int nfft) + { + get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft); + int nhbins=(nfft>>1)+1; + for (int k=nhbins;k < nfft; ++k ) + dst[k] = conj(dst[nfft-k]); + } + + // inverse complex-to-complex + void inv(Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + // scaling + Scalar s = 1./nfft; + for (int k=0;k PlanData; + typedef std::map PlanMap; + + PlanMap m_plans; + + PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src) + { + bool inplace = (dst==src); + bool aligned = ( (reinterpret_cast(src)&15) | (reinterpret_cast(dst)&15) ) == 0; + int key = (nfft<<3 ) | (inverse<<2) | (inplace<<1) | aligned; + return m_plans[key]; + } + }; +} diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 91fa5ca18..a84ac68a0 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -39,6 +39,7 @@ namespace Eigen { std::vector m_twiddles; std::vector m_stageRadix; std::vector m_stageRemainder; + std::vector m_scratchBuf; bool m_inverse; void make_twiddles(int nfft,bool inverse) @@ -75,6 +76,8 @@ namespace Eigen { n /= p; m_stageRadix.push_back(p); m_stageRemainder.push_back(n); + if ( p > 5 ) + m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic }while(n>1); } @@ -249,7 +252,7 @@ namespace Eigen { Complex * twiddles = &m_twiddles[0]; Complex t; int Norig = m_twiddles.size(); - Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) ); + Complex * scratchbuf = &m_scratchBuf[0]; for ( u=0; u>1; int ncfft2 = nfft>>2; Complex * rtw = real_twiddles(ncfft2); - m_scratchBuf.resize(ncfft); - m_scratchBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + m_tmpBuf.resize(ncfft); + m_tmpBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); for (int k = 1; k <= ncfft / 2; ++k) { Complex fk = src[k]; Complex fnkc = conj(src[ncfft-k]); Complex fek = fk + fnkc; Complex tmp = fk - fnkc; Complex fok = tmp * conj(rtw[k-1]); - m_scratchBuf[k] = fek + fok; - m_scratchBuf[ncfft-k] = conj(fek - fok); + m_tmpBuf[k] = fek + fok; + m_tmpBuf[ncfft-k] = conj(fek - fok); } - scale(&m_scratchBuf[0], ncfft, Scalar(1)/nfft ); - get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_scratchBuf[0], 1,1); + scale(&m_tmpBuf[0], ncfft, Scalar(1)/nfft ); + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_tmpBuf[0], 1,1); } } @@ -372,7 +375,7 @@ namespace Eigen { PlanMap m_plans; std::map > m_realTwiddles; - std::vector m_scratchBuf; + std::vector m_tmpBuf; int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } From 1c543401741f1328b65d30a764f6273ddf9b60b6 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sun, 31 May 2009 15:44:57 -0400 Subject: [PATCH 13/72] more work on ei_fftw_impl --- bench/benchFFT.cpp | 4 ++++ unsupported/Eigen/FFT | 3 ++- unsupported/test/FFT.cpp | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 14f5063fb..4b6cabb55 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -26,6 +26,10 @@ #include #include #include +#ifdef USE_FFTW +#include +#endif + #include using namespace Eigen; diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 03f8504a4..dc7e85908 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -32,7 +32,8 @@ // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size #ifdef FFTW_ESTIMATE // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines #include "src/FFT/ei_fftw_impl.h" -// #define DEFAULT_FFT_IMPL ei_fftw_impl +#undef DEFAULT_FFT_IMPL +#define DEFAULT_FFT_IMPL ei_fftw_impl #endif // intel Math Kernel Library: fastest, commerical -- incompatible with Eigen in GPL form diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 32d1393d0..28230e1c4 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -23,6 +23,11 @@ // Eigen. If not, see . #include "main.h" +//#define USE_FFTW +#ifdef USE_FFTW +#include +#endif + #include using namespace std; From 4d6b962ba47ca2a6445bebd144b8122f3b8e96b7 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 10 Jun 2009 22:16:32 -0400 Subject: [PATCH 14/72] added FindFFTW, but I don't think it's right yet --- cmake/FindFFTW.cmake | 24 ++++++++++++++++++++++++ unsupported/test/CMakeLists.txt | 6 ++++++ unsupported/test/FFT.cpp | 12 +++--------- 3 files changed, 33 insertions(+), 9 deletions(-) create mode 100644 cmake/FindFFTW.cmake diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake new file mode 100644 index 000000000..a56450b17 --- /dev/null +++ b/cmake/FindFFTW.cmake @@ -0,0 +1,24 @@ + +if (FFTW_INCLUDES AND FFTW_LIBRARIES) + set(FFTW_FIND_QUIETLY TRUE) +endif (FFTW_INCLUDES AND FFTW_LIBRARIES) + +find_path(FFTW_INCLUDES + NAMES + fftw3.h + PATHS + $ENV{FFTWDIR} + ${INCLUDE_INSTALL_DIR} +) + +find_library(FFTWF_LIB NAMES fftw3f PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR}) +find_library(FFTW_LIB NAMES fftw3 PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR}) +find_library(FFTWL_LIB NAMES fftw3l PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR}) +set(FFTW_LIBRARIES "${FFTWF_LIB} ${FFTW_LIB} ${FFTWL_LIB}" ) +message(STATUS "FFTW ${FFTW_LIBRARIES}" ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(FFTW DEFAULT_MSG + FFTW_INCLUDES FFTW_LIBRARIES) + +mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 49de7dfb2..23217811f 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -20,3 +20,9 @@ ei_add_test(BVH) ei_add_test(matrixExponential) ei_add_test(alignedvector3) ei_add_test(FFT) + +find_package(FFTW) +if(FFTW_FOUND) + ei_add_test(FFTW " " "${FFTW_LIBRARIES}" ) +endif(FFTW_FOUND) + diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 28230e1c4..f0b9b68bf 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -23,11 +23,6 @@ // Eigen. If not, see . #include "main.h" -//#define USE_FFTW -#ifdef USE_FFTW -#include -#endif - #include using namespace std; @@ -121,7 +116,7 @@ void test_complex(int nfft) void test_FFT() { -#if 1 + CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); @@ -129,13 +124,12 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); -#endif -#if 1 + + CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); -#endif } From 218711e18b1adc518a2f861fe53d5ce9da0e98b7 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 10 Jun 2009 23:25:27 -0400 Subject: [PATCH 15/72] example file --- unsupported/doc/examples/FFT.cpp | 117 +++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 unsupported/doc/examples/FFT.cpp diff --git a/unsupported/doc/examples/FFT.cpp b/unsupported/doc/examples/FFT.cpp new file mode 100644 index 000000000..55e29585a --- /dev/null +++ b/unsupported/doc/examples/FFT.cpp @@ -0,0 +1,117 @@ +// To use the simple FFT implementation +// g++ -o demofft -I.. -Wall -O3 FFT.cpp + +// To use the FFTW implementation +// g++ -o demofft -I.. -DUSE_FFTW -Wall -O3 FFT.cpp -lfftw3 -lfftw3f -lfftw3l + +#ifdef USE_FFTW +#include +#endif + +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace Eigen; + +template +T mag2(T a) +{ + return a*a; +} +template +T mag2(std::complex a) +{ + return norm(a); +} + +template +T mag2(const std::vector & vec) +{ + T out=0; + for (size_t k=0;k +T mag2(const std::vector > & vec) +{ + T out=0; + for (size_t k=0;k +vector operator-(const vector & a,const vector & b ) +{ + vector c(a); + for (size_t k=0;k +void RandomFill(std::vector & vec) +{ + for (size_t k=0;k +void RandomFill(std::vector > & vec) +{ + for (size_t k=0;k ( T( rand() )/T(RAND_MAX) - .5, T( rand() )/T(RAND_MAX) - .5); +} + +template +void fwd_inv(size_t nfft) +{ + typedef typename NumTraits::Real Scalar; + vector timebuf(nfft); + RandomFill(timebuf); + + vector freqbuf; + static FFT fft; + fft.fwd(freqbuf,timebuf); + + vector timebuf2; + fft.inv(timebuf2,freqbuf); + + long double rmse = mag2(timebuf - timebuf2) / mag2(timebuf); + cout << "roundtrip rmse: " << rmse << endl; +} + +template +void two_demos(int nfft) +{ + cout << " scalar "; + fwd_inv >(nfft); + cout << " complex "; + fwd_inv,std::complex >(nfft); +} + +void demo_all_types(int nfft) +{ + cout << "nfft=" << nfft << endl; + cout << " float" << endl; + two_demos(nfft); + cout << " double" << endl; + two_demos(nfft); + cout << " long double" << endl; + two_demos(nfft); +} + +int main() +{ + demo_all_types( 2*3*4*5*7 ); + demo_all_types( 2*9*16*25 ); + demo_all_types( 1024 ); + return 0; +} From e577c70e49dddd88c83175921431d2725128b0f3 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 16 Jun 2009 23:54:58 -0400 Subject: [PATCH 16/72] candidate header for Eigen/Complex --- unsupported/Eigen/Complex | 142 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 unsupported/Eigen/Complex diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex new file mode 100644 index 000000000..f8d9af25c --- /dev/null +++ b/unsupported/Eigen/Complex @@ -0,0 +1,142 @@ +#ifndef EIGEN_COMPLEX_H +#define EIGEN_COMPLEX_H + +#include + +namespace Eigen { + +template +struct castable_pointer +{ + castable_pointer(_NativePtr ptr) : _ptr(ptr) {} + operator _NativePtr () {return _ptr;} + operator _PunnedPtr () {return reinterpret_cast<_PunnedPtr>(_ptr);} + private: + _NativePtr _ptr; +}; + +template +struct Complex +{ + typedef typename std::complex StandardComplex; + typedef T value_type; + + Complex(const T& re = T(), const T& im = T()) : _re(re),_im(im) { } + Complex(const Complex&other ): _re(other.real()) ,_im(other.imag()) {} + + template + Complex(const Complex&other): _re(other.real()) ,_im(other.imag()) {} + template + Complex(const std::complex&other): _re(other.real()) ,_im(other.imag()) {} + + typedef castable_pointer< Complex*, StandardComplex* > pointer_type; + typedef castable_pointer< const Complex*, const StandardComplex* > const_pointer_type; + + pointer_type operator & () {return pointer_type(this);} + const_pointer_type operator & () const {return const_pointer_type(this);} + + StandardComplex std_type() const {return StandardComplex(real(),imag());} + StandardComplex & std_type() {return *(StandardComplex*)(&(*this));} + + operator StandardComplex () const {return std_type();} + operator StandardComplex & () {return std_type();} + + T & real() {return _re;} + T & imag() {return _im;} + const T & real() const {return _re;} + const T & imag() const {return _im;} + + // *** complex member functions: *** + Complex& operator= (const T& val) { _re=val;_im=0;return *this; } + Complex& operator+= (const T& val) {_re+=val;return *this;} + Complex& operator-= (const T& val) {_re-=val;return *this;} + Complex& operator*= (const T& val) {_re*=val;_im*=val;return *this; } + Complex& operator/= (const T& val) {_re/=val;_im/=val;return *this; } + + Complex& operator= (const Complex& rhs) {_re=rhs._re;_im=rhs._im;return *this;} + Complex& operator= (const StandardComplex& rhs) {_re=rhs.real();_im=rhs.imag();return *this;} + + template Complex& operator= (const Complex& rhs) { _re=rhs._re;_im=rhs._im;return *this;} + template Complex& operator+= (const Complex& rhs) { _re+=rhs._re;_im+=rhs._im;return *this;} + template Complex& operator-= (const Complex& rhs) { _re-=rhs._re;_im-=rhs._im;return *this;} + template Complex& operator*= (const Complex& rhs) { this->std_type() *= rhs.std_type(); return *this; } + template Complex& operator/= (const Complex& rhs) { this->std_type() /= rhs.std_type(); return *this; } + + private: + T _re; + T _im; +}; + +template +T ei_to_std( const T & x) {return x;} + +template +std::complex ei_to_std( const Complex & x) {return x.std_type();} + +// 26.2.6 operators +template Complex operator+(const Complex& rhs) {return rhs;} +template Complex operator-(const Complex& rhs) {return -ei_to_std(rhs);} + +template Complex operator+(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) + ei_to_std(rhs);} +template Complex operator-(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) - ei_to_std(rhs);} +template Complex operator*(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) * ei_to_std(rhs);} +template Complex operator/(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) / ei_to_std(rhs);} +template bool operator==(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) == ei_to_std(rhs);} +template bool operator!=(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) != ei_to_std(rhs);} + +template Complex operator+(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) + ei_to_std(rhs); } +template Complex operator-(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) - ei_to_std(rhs); } +template Complex operator*(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) * ei_to_std(rhs); } +template Complex operator/(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) / ei_to_std(rhs); } +template bool operator==(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) == ei_to_std(rhs); } +template bool operator!=(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) != ei_to_std(rhs); } + +template Complex operator+(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) + ei_to_std(rhs); } +template Complex operator-(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) - ei_to_std(rhs); } +template Complex operator*(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) * ei_to_std(rhs); } +template Complex operator/(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) / ei_to_std(rhs); } +template bool operator==(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) == ei_to_std(rhs); } +template bool operator!=(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) != ei_to_std(rhs); } + +template +std::basic_istream& + operator>> (std::basic_istream& istr, Complex& rhs) +{ + return istr >> rhs.std_type(); +} + +template +std::basic_ostream& +operator<< (std::basic_ostream& ostr, const Complex& rhs) +{ + return ostr << rhs.std_type(); +} + + // 26.2.7 values: + template T real(const Complex&x) {return real(ei_to_std(x));} + template T abs(const Complex&x) {return abs(ei_to_std(x));} + template T arg(const Complex&x) {return arg(ei_to_std(x));} + template T norm(const Complex&x) {return norm(ei_to_std(x));} + + template Complex conj(const Complex&x) { return conj(ei_to_std(x));} + template Complex polar(const T& x, const T&y) {return polar(ei_to_std(x),ei_to_std(y));} + // 26.2.8 transcendentals: + template Complex cos (const Complex&x){return cos(ei_to_std(x));} + template Complex cosh (const Complex&x){return cosh(ei_to_std(x));} + template Complex exp (const Complex&x){return exp(ei_to_std(x));} + template Complex log (const Complex&x){return log(ei_to_std(x));} + template Complex log10 (const Complex&x){return log10(ei_to_std(x));} + + template Complex pow(const Complex&x, int p) {return pow(ei_to_std(x),ei_to_std(p));} + template Complex pow(const Complex&x, const T&p) {return pow(ei_to_std(x),ei_to_std(p));} + template Complex pow(const Complex&x, const Complex&p) {return pow(ei_to_std(x),ei_to_std(p));} + template Complex pow(const T&x, const Complex&p) {return pow(ei_to_std(x),ei_to_std(p));} + + template Complex sin (const Complex&x){return sin(ei_to_std(x));} + template Complex sinh (const Complex&x){return sinh(ei_to_std(x));} + template Complex sqrt (const Complex&x){return sqrt(ei_to_std(x));} + template Complex tan (const Complex&x){return tan(ei_to_std(x));} + template Complex tanh (const Complex&x){return tanh(ei_to_std(x));} +} + +#endif From fb9a15e451371f4e600dbb60b8e5c9d098889844 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 17 Jun 2009 00:09:18 -0400 Subject: [PATCH 17/72] added copyright notice --- unsupported/Eigen/Complex | 50 ++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex index f8d9af25c..a0483abdf 100644 --- a/unsupported/Eigen/Complex +++ b/unsupported/Eigen/Complex @@ -1,6 +1,34 @@ #ifndef EIGEN_COMPLEX_H #define EIGEN_COMPLEX_H +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +// Eigen::Complex reuses as much as possible from std::complex +// and allows easy conversion to and from, even at the pointer level. + + #include namespace Eigen { @@ -21,6 +49,7 @@ struct Complex typedef typename std::complex StandardComplex; typedef T value_type; + // constructors Complex(const T& re = T(), const T& im = T()) : _re(re),_im(im) { } Complex(const Complex&other ): _re(other.real()) ,_im(other.imag()) {} @@ -29,22 +58,31 @@ struct Complex template Complex(const std::complex&other): _re(other.real()) ,_im(other.imag()) {} + + // allow binary access to the object as a std::complex typedef castable_pointer< Complex*, StandardComplex* > pointer_type; typedef castable_pointer< const Complex*, const StandardComplex* > const_pointer_type; - pointer_type operator & () {return pointer_type(this);} const_pointer_type operator & () const {return const_pointer_type(this);} - StandardComplex std_type() const {return StandardComplex(real(),imag());} - StandardComplex & std_type() {return *(StandardComplex*)(&(*this));} - operator StandardComplex () const {return std_type();} operator StandardComplex & () {return std_type();} - T & real() {return _re;} - T & imag() {return _im;} + StandardComplex std_type() const {return StandardComplex(real(),imag());} + StandardComplex & std_type() {return *reinterpret_cast(this);} + + + // every sort of accessor and mutator that has ever been in fashion. + // For a brief history, search for "std::complex over-encapsulated" + // http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#387 const T & real() const {return _re;} const T & imag() const {return _im;} + T & real() {return _re;} + T & imag() {return _im;} + T & real(const T & x) {return _re=x;} + T & imag(const T & x) {return _im=x;} + void set_real(const T & x) {_re = x;} + void set_imag(const T & x) {_im = x;} // *** complex member functions: *** Complex& operator= (const T& val) { _re=val;_im=0;return *this; } From a39de276a95234ff85f58fa93364b047f3d72c6b Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 14 Sep 2009 01:52:26 -0400 Subject: [PATCH 18/72] added the test case for FFTW --- unsupported/test/CMakeLists.txt | 2 +- unsupported/test/FFTW.cpp | 136 ++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 unsupported/test/FFTW.cpp diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 23217811f..f42077bdc 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -23,6 +23,6 @@ ei_add_test(FFT) find_package(FFTW) if(FFTW_FOUND) - ei_add_test(FFTW " " "${FFTW_LIBRARIES}" ) + ei_add_test(FFTW " " "-lfftw3 -lfftw3f -lfftw3l" ) endif(FFTW_FOUND) diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp new file mode 100644 index 000000000..cf7be75aa --- /dev/null +++ b/unsupported/test/FFTW.cpp @@ -0,0 +1,136 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" +#include +#include + +using namespace std; + +float norm(float x) {return x*x;} +double norm(double x) {return x*x;} +long double norm(long double x) {return x*x;} + +template < typename T> +complex promote(complex x) { return complex(x.real(),x.imag()); } + +complex promote(float x) { return complex( x); } +complex promote(double x) { return complex( x); } +complex promote(long double x) { return complex( x); } + + + template + long double fft_rmse( const vector & fftbuf,const vector & timebuf) + { + long double totalpower=0; + long double difpower=0; + cerr <<"idx\ttruth\t\tvalue\t|dif|=\n"; + for (size_t k0=0;k0 acc = 0; + long double phinc = -2.*k0* M_PIl / timebuf.size(); + for (size_t k1=0;k1(0,k1*phinc) ); + } + totalpower += norm(acc); + complex x = promote(fftbuf[k0]); + complex dif = acc - x; + difpower += norm(dif); + cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(norm(dif)) << endl; + } + cerr << "rmse:" << sqrt(difpower/totalpower) << endl; + return sqrt(difpower/totalpower); + } + + template + long double dif_rmse( const vector buf1,const vector buf2) + { + long double totalpower=0; + long double difpower=0; + size_t n = min( buf1.size(),buf2.size() ); + for (size_t k=0;k +void test_scalar(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + typedef typename Eigen::FFT::Scalar Scalar; + + FFT fft; + vector inbuf(nfft); + vector outbuf; + for (int k=0;k() );// gross check + + vector buf3; + fft.inv( buf3 , outbuf); + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check +} + +template +void test_complex(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + + FFT fft; + + vector inbuf(nfft); + vector outbuf; + vector buf3; + for (int k=0;k() );// gross check + + fft.inv( buf3 , outbuf); + + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check +} + +void test_FFTW() +{ + + CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); + CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); + + + + CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); + CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); +} From c4ab6a2032895957e0e8dbea564b0c9c7f88b48a Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 12 Oct 2009 22:33:51 -0400 Subject: [PATCH 19/72] also test that the matrix Q is unitary --- test/qr.cpp | 6 +++++- test/qr_colpivoting.cpp | 6 +++++- test/qr_fullpivoting.cpp | 6 +++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/test/qr.cpp b/test/qr.cpp index 036a3c9f2..864828750 100644 --- a/test/qr.cpp +++ b/test/qr.cpp @@ -31,12 +31,16 @@ template void qr(const MatrixType& m) int cols = m.cols(); typedef typename MatrixType::Scalar Scalar; - typedef Matrix SquareMatrixType; + typedef Matrix MatrixQType; typedef Matrix VectorType; MatrixType a = MatrixType::Random(rows,cols); HouseholderQR qrOfA(a); MatrixType r = qrOfA.matrixQR(); + + MatrixQType q = qrOfA.matrixQ(); + VERIFY_IS_UNITARY(q); + // FIXME need better way to construct trapezoid for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) if(i>j) r(i,j) = Scalar(0); diff --git a/test/qr_colpivoting.cpp b/test/qr_colpivoting.cpp index 4b6f7dd6b..5c5c5d259 100644 --- a/test/qr_colpivoting.cpp +++ b/test/qr_colpivoting.cpp @@ -32,7 +32,7 @@ template void qr() int rank = ei_random(1, std::min(rows, cols)-1); typedef typename MatrixType::Scalar Scalar; - typedef Matrix SquareMatrixType; + typedef Matrix MatrixQType; typedef Matrix VectorType; MatrixType m1; createRandomMatrixOfRank(rank,rows,cols,m1); @@ -44,6 +44,10 @@ template void qr() VERIFY(!qr.isSurjective()); MatrixType r = qr.matrixQR(); + + MatrixQType q = qr.matrixQ(); + VERIFY_IS_UNITARY(q); + // FIXME need better way to construct trapezoid for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) if(i>j) r(i,j) = Scalar(0); diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp index 3a37bcb46..891c2a527 100644 --- a/test/qr_fullpivoting.cpp +++ b/test/qr_fullpivoting.cpp @@ -32,7 +32,7 @@ template void qr() int rank = ei_random(1, std::min(rows, cols)-1); typedef typename MatrixType::Scalar Scalar; - typedef Matrix SquareMatrixType; + typedef Matrix MatrixQType; typedef Matrix VectorType; MatrixType m1; createRandomMatrixOfRank(rank,rows,cols,m1); @@ -44,6 +44,10 @@ template void qr() VERIFY(!qr.isSurjective()); MatrixType r = qr.matrixQR(); + + MatrixQType q = qr.matrixQ(); + VERIFY_IS_UNITARY(q); + // FIXME need better way to construct trapezoid for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) if(i>j) r(i,j) = Scalar(0); From 2049f742e4a1a6ff8b7f1617a296a80b2ebb0281 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 13 Oct 2009 08:53:01 +0200 Subject: [PATCH 20/72] trivial compilation fix --- test/unalignedassert.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp index 233268d1d..2b819417e 100644 --- a/test/unalignedassert.cpp +++ b/test/unalignedassert.cpp @@ -93,25 +93,27 @@ void construct_at_boundary(int boundary) void unalignedassert() { + #if EIGEN_ALIGN construct_at_boundary(4); construct_at_boundary(4); construct_at_boundary(16); construct_at_boundary(16); construct_at_boundary(4); construct_at_boundary(16); - + construct_at_boundary(16); construct_at_boundary(4); construct_at_boundary(16); construct_at_boundary(16); construct_at_boundary(4); construct_at_boundary(16); - + construct_at_boundary(16); construct_at_boundary(4); construct_at_boundary(16); construct_at_boundary(16); - + #endif + check_unalignedassert_good(); check_unalignedassert_good(); check_unalignedassert_good(); @@ -120,7 +122,7 @@ void unalignedassert() check_unalignedassert_good(); check_unalignedassert_good(); check_unalignedassert_good >(); - + #if EIGEN_ALIGN VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); From 14430940729b043ae14576d388555fc048619a3c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 13 Oct 2009 09:23:09 +0200 Subject: [PATCH 21/72] compilation fix: make the generic template ctor explicit --- Eigen/src/Array/Replicate.h | 12 ++++-------- test/array_replicate.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/Eigen/src/Array/Replicate.h b/Eigen/src/Array/Replicate.h index b20bcd49a..653bda666 100644 --- a/Eigen/src/Array/Replicate.h +++ b/Eigen/src/Array/Replicate.h @@ -45,14 +45,10 @@ struct ei_traits > typedef typename ei_nested::type MatrixTypeNested; typedef typename ei_unref::type _MatrixTypeNested; enum { - RowsPlusOne = (MatrixType::RowsAtCompileTime != Dynamic) ? - int(MatrixType::RowsAtCompileTime) + 1 : Dynamic, - ColsPlusOne = (MatrixType::ColsAtCompileTime != Dynamic) ? - int(MatrixType::ColsAtCompileTime) + 1 : Dynamic, - RowsAtCompileTime = RowFactor==Dynamic || MatrixType::RowsAtCompileTime==Dynamic + RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic ? Dynamic : RowFactor * MatrixType::RowsAtCompileTime, - ColsAtCompileTime = ColFactor==Dynamic || MatrixType::ColsAtCompileTime==Dynamic + ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic ? Dynamic : ColFactor * MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = RowsAtCompileTime, @@ -70,7 +66,7 @@ template class Replicate EIGEN_GENERIC_PUBLIC_INTERFACE(Replicate) template - inline Replicate(const OriginalMatrixType& matrix) + inline explicit Replicate(const OriginalMatrixType& matrix) : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) { EIGEN_STATIC_ASSERT((ei_is_same_type::ret), @@ -113,7 +109,7 @@ template inline const Replicate MatrixBase::replicate() const { - return derived(); + return Replicate(derived()); } /** \nonstableyet diff --git a/test/array_replicate.cpp b/test/array_replicate.cpp index d1608915f..cd0f65f26 100644 --- a/test/array_replicate.cpp +++ b/test/array_replicate.cpp @@ -42,9 +42,9 @@ template void replicate(const MatrixType& m) MatrixType m1 = MatrixType::Random(rows, cols), m2 = MatrixType::Random(rows, cols); - + VectorType v1 = VectorType::Random(rows); - + MatrixX x1, x2; VectorX vx1; @@ -56,17 +56,17 @@ template void replicate(const MatrixType& m) for(int i=0; i())); - + x2.resize(rows,f1); for (int j=0; j Date: Tue, 13 Oct 2009 14:41:57 +0200 Subject: [PATCH 22/72] add missing PartialReduxExpr::coeff(index) function --- Eigen/src/Array/VectorwiseOp.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Eigen/src/Array/VectorwiseOp.h b/Eigen/src/Array/VectorwiseOp.h index fa0958987..27e23ce59 100644 --- a/Eigen/src/Array/VectorwiseOp.h +++ b/Eigen/src/Array/VectorwiseOp.h @@ -95,6 +95,14 @@ class PartialReduxExpr : ei_no_assignment_operator, return m_functor(m_matrix.row(i)); } + const Scalar coeff(int index) const + { + if (Direction==Vertical) + return m_functor(m_matrix.col(index)); + else + return m_functor(m_matrix.row(index)); + } + protected: const MatrixTypeNested m_matrix; const MemberOp m_functor; From 949582c80986c14e200f5ab1cb2752b9779344e4 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Wed, 14 Oct 2009 09:40:22 +0200 Subject: [PATCH 23/72] Added prod() reduction to the AsciiQuickReference. --- doc/AsciiQuickReference.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/AsciiQuickReference.txt b/doc/AsciiQuickReference.txt index b868741f3..6c1c4fbd8 100644 --- a/doc/AsciiQuickReference.txt +++ b/doc/AsciiQuickReference.txt @@ -118,6 +118,9 @@ s = R.maxCoeff(&r, &c) // [aa, bb] = max(R); [cc, dd] = max(aa); R.sum() // sum(R(:)) R.colwise.sum() // sum(R) R.rowwise.sum() // sum(R, 2) or sum(R')' +R.prod() // prod(R(:)) +R.colwise.prod() // prod(R) +R.rowwise.prod() // prod(R, 2) or prod(R')' R.trace() // trace(R) R.all() // all(R(:)) R.colwise().all() // all(R) From f4661e696ec190090ec162aac3832650d6168492 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Wed, 14 Oct 2009 11:07:11 +0200 Subject: [PATCH 24/72] Resize is only defined in Matrix and not in MatrixBase. I am not sure whether the better fix is to move the resize functions to MatrixBase. --- Eigen/src/QR/HouseholderQR.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h index 4cc553926..01cd2adb5 100644 --- a/Eigen/src/QR/HouseholderQR.h +++ b/Eigen/src/QR/HouseholderQR.h @@ -206,7 +206,7 @@ void HouseholderQR::solve( ) const { ei_assert(m_isInitialized && "HouseholderQR is not initialized."); - result->resize(m_qr.cols(), b.cols()); + result->derived().resize(m_qr.cols(), b.cols()); const int rows = m_qr.rows(); const int rank = std::min(m_qr.rows(), m_qr.cols()); ei_assert(b.rows() == rows); From c37cfc32b34f29cea9d7622ca07b74faceba87b7 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Wed, 14 Oct 2009 11:08:00 +0200 Subject: [PATCH 25/72] Fixed more W4 warnings. --- Eigen/src/Array/Replicate.h | 3 +++ Eigen/src/Array/VectorwiseOp.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/Eigen/src/Array/Replicate.h b/Eigen/src/Array/Replicate.h index 653bda666..478c0bf68 100644 --- a/Eigen/src/Array/Replicate.h +++ b/Eigen/src/Array/Replicate.h @@ -94,6 +94,9 @@ template class Replicate const typename MatrixType::Nested m_matrix; const ei_int_if_dynamic m_rowFactor; const ei_int_if_dynamic m_colFactor; + + private: + Replicate& operator=(const Replicate&); }; /** \nonstableyet diff --git a/Eigen/src/Array/VectorwiseOp.h b/Eigen/src/Array/VectorwiseOp.h index 27e23ce59..4cb0083fa 100644 --- a/Eigen/src/Array/VectorwiseOp.h +++ b/Eigen/src/Array/VectorwiseOp.h @@ -450,6 +450,9 @@ template class VectorwiseOp protected: ExpressionTypeNested m_matrix; + + private: + VectorwiseOp& operator=(const VectorwiseOp&); }; /** \array_module From 0927ba1fd328b23b88b0c9b44eecfd99494c2007 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Wed, 14 Oct 2009 19:55:23 +0200 Subject: [PATCH 26/72] More warning fixes. --- Eigen/src/Core/Swap.h | 3 +++ test/visitor.cpp | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h index a7cf219f7..45c180983 100644 --- a/Eigen/src/Core/Swap.h +++ b/Eigen/src/Core/Swap.h @@ -117,6 +117,9 @@ template class SwapWrapper protected: ExpressionType& m_expression; + + private: + SwapWrapper& operator=(const SwapWrapper&); }; /** swaps *this with the expression \a other. diff --git a/test/visitor.cpp b/test/visitor.cpp index b78782b78..6ec442bc8 100644 --- a/test/visitor.cpp +++ b/test/visitor.cpp @@ -40,7 +40,7 @@ template void matrixVisitor(const MatrixType& p) m(i) = ei_random(); Scalar minc = Scalar(1000), maxc = Scalar(-1000); - int minrow,mincol,maxrow,maxcol; + int minrow=0,mincol=0,maxrow=0,maxcol=0; for(int j = 0; j < cols; j++) for(int i = 0; i < rows; i++) { @@ -86,7 +86,7 @@ template void vectorVisitor(const VectorType& w) v(i) = ei_random(); Scalar minc = Scalar(1000), maxc = Scalar(-1000); - int minidx,maxidx; + int minidx=0,maxidx=0; for(int i = 0; i < size; i++) { if(v(i) < minc) From 15030439816910327478b98ba1a253e18cc6165f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 15 Oct 2009 18:43:15 +0200 Subject: [PATCH 27/72] autodiff: * fix namespace issue * simplify Jacobian code * fix issue with "Dynamic derivatives" --- .../Eigen/src/AutoDiff/AutoDiffJacobian.h | 23 ++- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 132 +++++++++++++----- unsupported/test/autodiff.cpp | 8 +- 3 files changed, 110 insertions(+), 53 deletions(-) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h index d42197345..a5e881487 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h @@ -46,13 +46,14 @@ public: InputsAtCompileTime = Functor::InputsAtCompileTime, ValuesAtCompileTime = Functor::ValuesAtCompileTime }; - + typedef typename Functor::InputType InputType; typedef typename Functor::ValueType ValueType; typedef typename Functor::JacobianType JacobianType; - typedef AutoDiffScalar > ActiveScalar; - + typedef Matrix DerivativeType; + typedef AutoDiffScalar ActiveScalar; + typedef Matrix ActiveInput; typedef Matrix ActiveValue; @@ -69,26 +70,20 @@ public: ActiveInput ax = x.template cast(); ActiveValue av(jac.rows()); - + if(InputsAtCompileTime==Dynamic) - { - for (int j=0; jinputs()); for (int j=0; jinputs()); - } - - for (int j=0; jinputs(),i); Functor::operator()(ax, &av); for (int i=0; i +struct ei_make_coherent_impl { + static void run(A& a, B& b) {} +}; + +// resize a to match b is a.size()==0, and conversely. +template +void ei_make_coherent(const A& a, const B&b) +{ + ei_make_coherent_impl::run(a.const_cast_derived(), b.const_cast_derived()); +} + /** \class AutoDiffScalar * \brief A scalar type replacement with automatic differentation capability * @@ -35,7 +47,7 @@ namespace Eigen { * This class represents a scalar value while tracking its respective derivatives. * * It supports the following list of global math function: - * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, + * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, * - ei_abs, ei_sqrt, ei_pow, ei_exp, ei_log, ei_sin, ei_cos, * - ei_conj, ei_real, ei_imag, ei_abs2. * @@ -49,29 +61,29 @@ class AutoDiffScalar { public: typedef typename ei_traits::Scalar Scalar; - + inline AutoDiffScalar() {} - + inline AutoDiffScalar(const Scalar& value) : m_value(value) { if(m_derivatives.size()>0) m_derivatives.setZero(); } - + inline AutoDiffScalar(const Scalar& value, const DerType& der) : m_value(value), m_derivatives(der) {} - + template inline AutoDiffScalar(const AutoDiffScalar& other) : m_value(other.value()), m_derivatives(other.derivatives()) {} - + inline AutoDiffScalar(const AutoDiffScalar& other) : m_value(other.value()), m_derivatives(other.derivatives()) {} - + template inline AutoDiffScalar& operator=(const AutoDiffScalar& other) { @@ -79,32 +91,33 @@ class AutoDiffScalar m_derivatives = other.derivatives(); return *this; } - + inline AutoDiffScalar& operator=(const AutoDiffScalar& other) { m_value = other.value(); m_derivatives = other.derivatives(); return *this; } - + // inline operator const Scalar& () const { return m_value; } // inline operator Scalar& () { return m_value; } inline const Scalar& value() const { return m_value; } inline Scalar& value() { return m_value; } - + inline const DerType& derivatives() const { return m_derivatives; } inline DerType& derivatives() { return m_derivatives; } - + template inline const AutoDiffScalar,DerType,OtherDerType> > operator+(const AutoDiffScalar& other) const { + ei_make_coherent(m_derivatives, other.derivatives()); return AutoDiffScalar,DerType,OtherDerType> >( m_value + other.value(), m_derivatives + other.derivatives()); } - + template inline AutoDiffScalar& operator+=(const AutoDiffScalar& other) @@ -112,16 +125,17 @@ class AutoDiffScalar (*this) = (*this) + other; return *this; } - + template inline const AutoDiffScalar, DerType,OtherDerType> > operator-(const AutoDiffScalar& other) const { + ei_make_coherent(m_derivatives, other.derivatives()); return AutoDiffScalar, DerType,OtherDerType> >( m_value - other.value(), m_derivatives - other.derivatives()); } - + template inline AutoDiffScalar& operator-=(const AutoDiffScalar& other) @@ -129,7 +143,7 @@ class AutoDiffScalar *this = *this - other; return *this; } - + template inline const AutoDiffScalar, DerType> > operator-() const @@ -138,7 +152,7 @@ class AutoDiffScalar -m_value, -m_derivatives); } - + inline const AutoDiffScalar, DerType> > operator*(const Scalar& other) const { @@ -146,7 +160,7 @@ class AutoDiffScalar m_value * other, (m_derivatives * other)); } - + friend inline const AutoDiffScalar, DerType> > operator*(const Scalar& other, const AutoDiffScalar& a) { @@ -154,7 +168,7 @@ class AutoDiffScalar a.value() * other, a.derivatives() * other); } - + inline const AutoDiffScalar, DerType> > operator/(const Scalar& other) const { @@ -162,7 +176,7 @@ class AutoDiffScalar m_value / other, (m_derivatives * (Scalar(1)/other))); } - + friend inline const AutoDiffScalar, DerType> > operator/(const Scalar& other, const AutoDiffScalar& a) { @@ -170,7 +184,7 @@ class AutoDiffScalar other / a.value(), a.derivatives() * (-Scalar(1)/other)); } - + template inline const AutoDiffScalar, NestByValue, @@ -178,6 +192,7 @@ class AutoDiffScalar NestByValue, OtherDerType> > > > > > operator/(const AutoDiffScalar& other) const { + ei_make_coherent(m_derivatives, other.derivatives()); return AutoDiffScalar, NestByValue, NestByValue, DerType> >, @@ -186,45 +201,91 @@ class AutoDiffScalar ((m_derivatives * other.value()).nestByValue() - (m_value * other.derivatives()).nestByValue()).nestByValue() * (Scalar(1)/(other.value()*other.value()))); } - + template inline const AutoDiffScalar, NestByValue, DerType> >, NestByValue, OtherDerType> > > > operator*(const AutoDiffScalar& other) const { + ei_make_coherent(m_derivatives, other.derivatives()); return AutoDiffScalar, NestByValue, DerType> >, NestByValue, OtherDerType> > > >( m_value * other.value(), (m_derivatives * other.value()).nestByValue() + (m_value * other.derivatives()).nestByValue()); } - + inline AutoDiffScalar& operator*=(const Scalar& other) { *this = *this * other; return *this; } - + template inline AutoDiffScalar& operator*=(const AutoDiffScalar& other) { *this = *this * other; return *this; } - + protected: Scalar m_value; DerType m_derivatives; - + +}; + +template +struct ei_make_coherent_impl, B> { + typedef Matrix A; + static void run(A& a, B& b) { + if((A_Rows==Dynamic || A_Cols==Dynamic) && (a.size()==0)) + { + a.resize(b.size()); + a.setZero(); + } + } +}; + +template +struct ei_make_coherent_impl > { + typedef Matrix B; + static void run(A& a, B& b) { + if((B_Rows==Dynamic || B_Cols==Dynamic) && (b.size()==0)) + { + b.resize(a.size()); + b.setZero(); + } + } +}; + +template +struct ei_make_coherent_impl, + Matrix > { + typedef Matrix A; + typedef Matrix B; + static void run(A& a, B& b) { + if((A_Rows==Dynamic || A_Cols==Dynamic) && (a.size()==0)) + { + a.resize(b.size()); + a.setZero(); + } + else if((B_Rows==Dynamic || B_Cols==Dynamic) && (b.size()==0)) + { + b.resize(a.size()); + b.setZero(); + } + } }; } #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \ template \ - inline const AutoDiffScalar::Scalar>, DerType> > \ - FUNC(const AutoDiffScalar& x) { \ + inline const Eigen::AutoDiffScalar::Scalar>, DerType> > \ + FUNC(const Eigen::AutoDiffScalar& x) { \ + using namespace Eigen; \ typedef typename ei_traits::Scalar Scalar; \ typedef AutoDiffScalar, DerType> > ReturnType; \ CODE; \ @@ -234,34 +295,35 @@ namespace std { EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs, return ReturnType(std::abs(x.value()), x.derivatives() * (sign(x.value())));) - + EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sqrt, Scalar sqrtx = std::sqrt(x.value()); return ReturnType(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));) - + EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cos, return ReturnType(std::cos(x.value()), x.derivatives() * (-std::sin(x.value())));) - + EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sin, return ReturnType(std::sin(x.value()),x.derivatives() * std::cos(x.value()));) - + EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(exp, Scalar expx = std::exp(x.value()); return ReturnType(expx,x.derivatives() * expx);) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(ei_log, return ReturnType(std::log(x.value),x.derivatives() * (Scalar(1).x.value()));) - + template - inline const AutoDiffScalar::Scalar>, DerType> > - pow(const AutoDiffScalar& x, typename ei_traits::Scalar y) + inline const Eigen::AutoDiffScalar::Scalar>, DerType> > + pow(const Eigen::AutoDiffScalar& x, typename Eigen::ei_traits::Scalar y) { + using namespace Eigen; typedef typename ei_traits::Scalar Scalar; return AutoDiffScalar, DerType> >( std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1))); } - + } namespace Eigen { diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp index b1164897c..a96927b41 100644 --- a/unsupported/test/autodiff.cpp +++ b/unsupported/test/autodiff.cpp @@ -46,12 +46,12 @@ struct TestFunc1 typedef Matrix InputType; typedef Matrix ValueType; typedef Matrix JacobianType; - + int m_inputs, m_values; - + TestFunc1() : m_inputs(InputsAtCompileTime), m_values(ValuesAtCompileTime) {} TestFunc1(int inputs, int values) : m_inputs(inputs), m_values(values) {} - + int inputs() const { return m_inputs; } int values() const { return m_values; } @@ -142,7 +142,7 @@ void test_autodiff_scalar() std::cerr << foo >(ax,ay).value() << " <> " << foo >(ax,ay).derivatives().transpose() << "\n\n"; } - + void test_autodiff_jacobian() { for(int i = 0; i < g_repeat; i++) { From d177c1f3aca150d36d17c8116504e3ea1e7b30cf Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Thu, 15 Oct 2009 21:07:14 +0200 Subject: [PATCH 28/72] Inlining fixes + fixed typo. Removed ei_assert in presence of static assert. --- Eigen/src/Core/CwiseNullaryOp.h | 1 - Eigen/src/Core/Redux.h | 12 +++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 61ce51885..7c1984be6 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -147,7 +147,6 @@ EIGEN_STRONG_INLINE const CwiseNullaryOp MatrixBase::NullaryExpr(int size, const CustomNullaryOp& func) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - ei_assert(IsVectorAtCompileTime); if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); else return CwiseNullaryOp(size, 1, func); } diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index f437208c0..0df095750 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -112,6 +112,16 @@ struct ei_redux_novec_unroller } }; +// This is actually dead code and will never be called. It is required +// to prevent false warnings regarding failed inlining though +// for 0 length run() will never be called at all. +template +struct ei_redux_novec_unroller +{ + typedef typename Derived::Scalar Scalar; + EIGEN_STRONG_INLINE static Scalar run(const Derived&, const Func&) { return Scalar(); } +}; + /*** vectorization ***/ template @@ -297,7 +307,7 @@ struct ei_redux_impl /** \returns the result of a full redux operation on the whole matrix or vector using \a func * * The template parameter \a BinaryOp is the type of the functor \a func which must be - * an assiociative operator. Both current STL and TR1 functor styles are handled. + * an associative operator. Both current STL and TR1 functor styles are handled. * * \sa MatrixBase::sum(), MatrixBase::minCoeff(), MatrixBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() */ From 44ba4b1d6d5cd39d824bb83876175d0dc39a9cc3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 16 Oct 2009 11:27:04 +0200 Subject: [PATCH 29/72] add operator+ scalar to AutoDiffScalar --- unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 888aa5c8c..fc5e237ab 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -108,6 +108,22 @@ class AutoDiffScalar inline const DerType& derivatives() const { return m_derivatives; } inline DerType& derivatives() { return m_derivatives; } + inline const AutoDiffScalar operator+(const Scalar& other) const + { + return AutoDiffScalar(m_value + other, m_derivatives); + } + + friend inline const AutoDiffScalar operator+(const Scalar& a, const AutoDiffScalar& b) + { + return AutoDiffScalar(a + b.value(), b.derivatives()); + } + + inline AutoDiffScalar& operator+=(const Scalar& other) + { + value() += other; + return *this; + } + template inline const AutoDiffScalar,DerType,OtherDerType> > operator+(const AutoDiffScalar& other) const From 7b0c4102facc9b5f6ca99ef76febb05a9499b8b0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 16 Oct 2009 13:22:38 +0200 Subject: [PATCH 30/72] * add a Make* expression type builder to allow the construction of generic expressions working for both dense and sparse matrix. A nicer solution would be to use CwiseBinaryOp for any kind of matrix. To this end we either need to change the overall design so that the base class(es) depends on the kind of matrix, or we could add a template parameter to each expression type (e.g., int Kind = ei_traits::Kind) allowing to specialize each expression for each kind of matrix. * Extend AutoDiffScalar to work with sparse vector expression for the derivatives. --- Eigen/Core | 1 + Eigen/Sparse | 1 + Eigen/src/Core/ExpressionMaker.h | 61 +++++++++++++ Eigen/src/Sparse/SparseExpressionMaker.h | 48 +++++++++++ .../Eigen/src/AutoDiff/AutoDiffJacobian.h | 4 +- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 85 ++++++++++--------- 6 files changed, 161 insertions(+), 39 deletions(-) create mode 100644 Eigen/src/Core/ExpressionMaker.h create mode 100644 Eigen/src/Sparse/SparseExpressionMaker.h diff --git a/Eigen/Core b/Eigen/Core index c8fcb1c09..3dce6422f 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -200,6 +200,7 @@ namespace Eigen { #include "src/Core/products/TriangularMatrixMatrix.h" #include "src/Core/products/TriangularSolverMatrix.h" #include "src/Core/BandMatrix.h" +#include "src/Core/ExpressionMaker.h" } // namespace Eigen diff --git a/Eigen/Sparse b/Eigen/Sparse index a8888daa3..96bd61419 100644 --- a/Eigen/Sparse +++ b/Eigen/Sparse @@ -110,6 +110,7 @@ namespace Eigen { #include "src/Sparse/SparseLLT.h" #include "src/Sparse/SparseLDLT.h" #include "src/Sparse/SparseLU.h" +#include "src/Sparse/SparseExpressionMaker.h" #ifdef EIGEN_CHOLMOD_SUPPORT # include "src/Sparse/CholmodSupport.h" diff --git a/Eigen/src/Core/ExpressionMaker.h b/Eigen/src/Core/ExpressionMaker.h new file mode 100644 index 000000000..1d265b63c --- /dev/null +++ b/Eigen/src/Core/ExpressionMaker.h @@ -0,0 +1,61 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_EXPRESSIONMAKER_H +#define EIGEN_EXPRESSIONMAKER_H + +// computes the shape of a matrix from its traits flag +template struct ei_shape_of +{ + enum { ret = ei_traits::Flags&SparseBit ? IsSparse : IsDense }; +}; + + +// Since the Sparse module is completely separated from the Core module, there is +// no way to write the type of a generic expression working for both dense and sparse +// matrix. Unless we change the overall design, here is a workaround. +// There is an example in unsuported/Eigen/src/AutoDiff/AutoDiffScalar. + +template::ret> +struct MakeNestByValue +{ + typedef NestByValue Type; +}; + +template::ret> +struct MakeCwiseUnaryOp +{ + typedef CwiseUnaryOp Type; +}; + +template::ret> +struct MakeCwiseBinaryOp +{ + typedef CwiseBinaryOp Type; +}; + +// TODO complete the list + + +#endif // EIGEN_EXPRESSIONMAKER_H diff --git a/Eigen/src/Sparse/SparseExpressionMaker.h b/Eigen/src/Sparse/SparseExpressionMaker.h new file mode 100644 index 000000000..1fdcbb1f2 --- /dev/null +++ b/Eigen/src/Sparse/SparseExpressionMaker.h @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_SPARSE_EXPRESSIONMAKER_H +#define EIGEN_SPARSE_EXPRESSIONMAKER_H + +template +struct MakeNestByValue +{ + typedef SparseNestByValue Type; +}; + +template +struct MakeCwiseUnaryOp +{ + typedef SparseCwiseUnaryOp Type; +}; + +template +struct MakeCwiseBinaryOp +{ + typedef SparseCwiseBinaryOp Type; +}; + +// TODO complete the list + +#endif // EIGEN_SPARSE_EXPRESSIONMAKER_H diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h index a5e881487..b3983f8a6 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h @@ -50,10 +50,12 @@ public: typedef typename Functor::InputType InputType; typedef typename Functor::ValueType ValueType; typedef typename Functor::JacobianType JacobianType; + typedef typename JacobianType::Scalar Scalar; - typedef Matrix DerivativeType; + typedef Matrix DerivativeType; typedef AutoDiffScalar ActiveScalar; + typedef Matrix ActiveInput; typedef Matrix ActiveValue; diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index fc5e237ab..2fb733a99 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -42,9 +42,17 @@ void ei_make_coherent(const A& a, const B&b) /** \class AutoDiffScalar * \brief A scalar type replacement with automatic differentation capability * - * \param DerType the vector type used to store/represent the derivatives (e.g. Vector3f) + * \param _DerType the vector type used to store/represent the derivatives. The base scalar type + * as well as the number of derivatives to compute are determined from this type. + * Typical choices include, e.g., \c Vector4f for 4 derivatives, or \c VectorXf + * if the number of derivatives is not known at compile time, and/or, the number + * of derivatives is large. + * Note that _DerType can also be a reference (e.g., \c VectorXf&) to wrap a + * existing vector into an AutoDiffScalar. + * Finally, _DerType can also be any Eigen compatible expression. * - * This class represents a scalar value while tracking its respective derivatives. + * This class represents a scalar value while tracking its respective derivatives using Eigen's expression + * template mechanism. * * It supports the following list of global math function: * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, @@ -56,10 +64,11 @@ void ei_make_coherent(const A& a, const B&b) * while derivatives are computed right away. * */ -template +template class AutoDiffScalar { public: + typedef typename ei_cleantype<_DerType>::type DerType; typedef typename ei_traits::Scalar Scalar; inline AutoDiffScalar() {} @@ -108,12 +117,12 @@ class AutoDiffScalar inline const DerType& derivatives() const { return m_derivatives; } inline DerType& derivatives() { return m_derivatives; } - inline const AutoDiffScalar operator+(const Scalar& other) const + inline const AutoDiffScalar operator+(const Scalar& other) const { return AutoDiffScalar(m_value + other, m_derivatives); } - friend inline const AutoDiffScalar operator+(const Scalar& a, const AutoDiffScalar& b) + friend inline const AutoDiffScalar operator+(const Scalar& a, const AutoDiffScalar& b) { return AutoDiffScalar(a + b.value(), b.derivatives()); } @@ -125,11 +134,11 @@ class AutoDiffScalar } template - inline const AutoDiffScalar,DerType,OtherDerType> > + inline const AutoDiffScalar,DerType,typename ei_cleantype::type>::Type > operator+(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar,DerType,OtherDerType> >( + return AutoDiffScalar,DerType,typename ei_cleantype::type>::Type >( m_value + other.value(), m_derivatives + other.derivatives()); } @@ -143,11 +152,11 @@ class AutoDiffScalar } template - inline const AutoDiffScalar, DerType,OtherDerType> > + inline const AutoDiffScalar, DerType,typename ei_cleantype::type>::Type > operator-(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, DerType,OtherDerType> >( + return AutoDiffScalar, DerType,typename ei_cleantype::type>::Type >( m_value - other.value(), m_derivatives - other.derivatives()); } @@ -161,73 +170,73 @@ class AutoDiffScalar } template - inline const AutoDiffScalar, DerType> > + inline const AutoDiffScalar, DerType>::Type > operator-() const { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( -m_value, -m_derivatives); } - inline const AutoDiffScalar, DerType> > + inline const AutoDiffScalar, DerType>::Type > operator*(const Scalar& other) const { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( m_value * other, (m_derivatives * other)); } - friend inline const AutoDiffScalar, DerType> > + friend inline const AutoDiffScalar, DerType>::Type > operator*(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( a.value() * other, a.derivatives() * other); } - inline const AutoDiffScalar, DerType> > + inline const AutoDiffScalar, DerType>::Type > operator/(const Scalar& other) const { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( m_value / other, (m_derivatives * (Scalar(1)/other))); } - friend inline const AutoDiffScalar, DerType> > + friend inline const AutoDiffScalar, DerType>::Type > operator/(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( other / a.value(), a.derivatives() * (-Scalar(1)/other)); } template - inline const AutoDiffScalar, - NestByValue, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > > > > + inline const AutoDiffScalar, + typename MakeNestByValue, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type >::Type >::Type > operator/(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - NestByValue, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > > > >( + return AutoDiffScalar, + typename MakeNestByValue, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type >::Type >::Type >( m_value / other.value(), ((m_derivatives * other.value()).nestByValue() - (m_value * other.derivatives()).nestByValue()).nestByValue() * (Scalar(1)/(other.value()*other.value()))); } template - inline const AutoDiffScalar, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > > + inline const AutoDiffScalar, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type > operator*(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > >( + return AutoDiffScalar, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type >( m_value * other.value(), (m_derivatives * other.value()).nestByValue() + (m_value * other.derivatives()).nestByValue()); } @@ -299,11 +308,11 @@ struct ei_make_coherent_impl \ - inline const Eigen::AutoDiffScalar::Scalar>, DerType> > \ + inline const Eigen::AutoDiffScalar::Scalar>, DerType>::Type > \ FUNC(const Eigen::AutoDiffScalar& x) { \ using namespace Eigen; \ typedef typename ei_traits::Scalar Scalar; \ - typedef AutoDiffScalar, DerType> > ReturnType; \ + typedef AutoDiffScalar, DerType>::Type > ReturnType; \ CODE; \ } @@ -330,12 +339,12 @@ namespace std return ReturnType(std::log(x.value),x.derivatives() * (Scalar(1).x.value()));) template - inline const Eigen::AutoDiffScalar::Scalar>, DerType> > + inline const Eigen::AutoDiffScalar::Scalar>, DerType>::Type > pow(const Eigen::AutoDiffScalar& x, typename Eigen::ei_traits::Scalar y) { using namespace Eigen; typedef typename ei_traits::Scalar Scalar; - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1))); } @@ -375,7 +384,7 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(ei_log, return ReturnType(ei_log(x.value),x.derivatives() * (Scalar(1).x.value()));) template -inline const AutoDiffScalar::Scalar>, DerType> > +inline const AutoDiffScalar::Scalar>, DerType>::Type > ei_pow(const AutoDiffScalar& x, typename ei_traits::Scalar y) { return std::pow(x,y);} From 5e3e6ff71a314b0a0ca569d6a7726757a39e9434 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Tue, 20 Oct 2009 22:08:13 +0200 Subject: [PATCH 31/72] Added Windows support to the BenchTimer. --- bench/BenchTimer.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h index bfc3a99b3..c1f473597 100644 --- a/bench/BenchTimer.h +++ b/bench/BenchTimer.h @@ -26,8 +26,14 @@ #ifndef EIGEN_BENCH_TIMER_H #define EIGEN_BENCH_TIMER_H +#ifndef WIN32 #include #include +#else +#define NOMINMAX +#include +#endif + #include #include @@ -40,7 +46,15 @@ class BenchTimer { public: - BenchTimer() { reset(); } + BenchTimer() + { +#ifdef WIN32 + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + m_frequency = (double)freq.QuadPart; +#endif + reset(); + } ~BenchTimer() {} @@ -51,23 +65,35 @@ public: m_best = std::min(m_best, getTime() - m_start); } - /** Return the best elapsed time. + /** Return the best elapsed time in seconds. */ inline double value(void) { - return m_best; + return m_best; } +#ifdef WIN32 + inline double getTime(void) +#else static inline double getTime(void) +#endif { +#ifdef WIN32 + LARGE_INTEGER query_ticks; + QueryPerformanceCounter(&query_ticks); + return query_ticks.QuadPart/m_frequency; +#else struct timeval tv; struct timezone tz; gettimeofday(&tv, &tz); return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; +#endif } protected: - +#ifdef WIN32 + double m_frequency; +#endif double m_best, m_start; }; From 471b4d509234cbcbd4a1cd45d48fe10efcc2bcf1 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 20 Oct 2009 21:53:54 -0400 Subject: [PATCH 32/72] handle mark's first commits before he configured his id --- scripts/eigen_gen_credits.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/eigen_gen_credits.cpp b/scripts/eigen_gen_credits.cpp index 086548e26..d7a625d47 100644 --- a/scripts/eigen_gen_credits.cpp +++ b/scripts/eigen_gen_credits.cpp @@ -13,10 +13,24 @@ using namespace std; std::string contributor_name(const std::string& line) { string result; + + // let's first take care of the case of isolated email addresses, like + // "user@localhost.localdomain" entries + if(line.find("markb@localhost.localdomain") != string::npos) + { + return "Mark Borgerding"; + } + + // from there on we assume that we have a entry of the form + // either: + // Bla bli Blurp + // or: + // Bla bli Blurp + size_t position_of_email_address = line.find_first_of('<'); if(position_of_email_address != string::npos) { - // there is an e-mail address. + // there is an e-mail address in <...>. // Hauke once committed as "John Smith", fix that. if(line.find("hauke.heibel") != string::npos) @@ -29,7 +43,7 @@ std::string contributor_name(const std::string& line) } else { - // there is no e-mail address. + // there is no e-mail address in <...>. if(line.find("convert-repo") != string::npos) result = ""; From c3180b7ffbc98d69764b3c1ab17b36e289f7cf7e Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 20 Oct 2009 23:25:49 -0400 Subject: [PATCH 33/72] MatrixBase: * support resize() to same size (nop). The case of FFT was another case where that make one's life far easier. hope that's ok with you Gael. but indeed, i don't use it in the ReturnByValue stuff. FFT: * Support MatrixBase (well, in the case with direct memory access such as Map) * adapt unit test --- Eigen/src/Core/MatrixBase.h | 19 +++++ Eigen/src/Core/util/StaticAssert.h | 3 +- unsupported/Eigen/FFT | 32 +++++++- unsupported/test/FFT.cpp | 127 ++++++++++++++++++++++------- 4 files changed, 148 insertions(+), 33 deletions(-) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 387c11385..0e8b49f75 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -190,6 +190,25 @@ template class MatrixBase * i.e., the number of rows for a columns major matrix, and the number of cols otherwise */ int innerSize() const { return (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); } + /** Only plain matrices, not expressions may be resized; therefore the only useful resize method is + * Matrix::resize(). The present method only asserts that the new size equals the old size, and does + * nothing else. + */ + void resize(int size) + { + ei_assert(size == this->size() + && "MatrixBase::resize() does not actually allow to resize."); + } + /** Only plain matrices, not expressions may be resized; therefore the only useful resize method is + * Matrix::resize(). The present method only asserts that the new size equals the old size, and does + * nothing else. + */ + void resize(int rows, int cols) + { + ei_assert(rows == this->rows() && cols == this->cols() + && "MatrixBase::resize() does not actually allow to resize."); + } + #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 883f2d95e..6210bc91c 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -78,7 +78,8 @@ INVALID_MATRIX_TEMPLATE_PARAMETERS, BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER, THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX, - THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES }; }; diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index dc7e85908..fafdb829b 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -36,7 +36,7 @@ #define DEFAULT_FFT_IMPL ei_fftw_impl #endif -// intel Math Kernel Library: fastest, commerical -- incompatible with Eigen in GPL form +// intel Math Kernel Library: fastest, commercial -- incompatible with Eigen in GPL form #ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines // TODO // #include "src/FFT/ei_imkl_impl.h" @@ -70,6 +70,20 @@ class FFT fwd( &dst[0],&src[0],src.size() ); } + template + void fwd( MatrixBase & dst, const MatrixBase & src) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,InputDerived) // size at compile-time + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) + dst.derived().resize( src.size() ); + fwd( &dst[0],&src[0],src.size() ); + } + template void inv( _Output * dst, const Complex * src, int nfft) { @@ -83,8 +97,24 @@ class FFT inv( &dst[0],&src[0],src.size() ); } + template + void inv( MatrixBase & dst, const MatrixBase & src) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) + dst.derived().resize( src.size() ); + inv( &dst[0],&src[0],src.size() ); + } + // TODO: multi-dimensional FFTs + // TODO: handle Eigen MatrixBase + // ---> i added fwd and inv specializations above + unit test, is this enough? (bjacob) traits_type & traits() {return m_traits;} private: diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index f0b9b68bf..cc68f3718 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -39,16 +39,16 @@ complex promote(double x) { return complex( x); } complex promote(long double x) { return complex( x); } - template - long double fft_rmse( const vector & fftbuf,const vector & timebuf) + template + long double fft_rmse( const VectorType1 & fftbuf,const VectorType2 & timebuf) { long double totalpower=0; long double difpower=0; cerr <<"idx\ttruth\t\tvalue\t|dif|=\n"; - for (size_t k0=0;k0 acc = 0; long double phinc = -2.*k0* M_PIl / timebuf.size(); - for (size_t k1=0;k1(0,k1*phinc) ); } totalpower += norm(acc); @@ -61,8 +61,8 @@ complex promote(long double x) { return complex( x); return sqrt(difpower/totalpower); } - template - long double dif_rmse( const vector buf1,const vector buf2) + template + long double dif_rmse( const VectorType1& buf1,const VectorType2& buf2) { long double totalpower=0; long double difpower=0; @@ -74,35 +74,59 @@ complex promote(long double x) { return complex( x); return sqrt(difpower/totalpower); } -template -void test_scalar(int nfft) +enum { StdVectorContainer, EigenVectorContainer }; + +template struct VectorType; + +template struct VectorType { - typedef typename Eigen::FFT::Complex Complex; - typedef typename Eigen::FFT::Scalar Scalar; + typedef vector type; +}; + +template struct VectorType +{ + typedef Matrix type; +}; + +template +void test_scalar_generic(int nfft) +{ + typedef typename FFT::Complex Complex; + typedef typename FFT::Scalar Scalar; + typedef typename VectorType::type ScalarVector; + typedef typename VectorType::type ComplexVector; FFT fft; - vector inbuf(nfft); - vector outbuf; + ScalarVector inbuf(nfft); + ComplexVector outbuf; for (int k=0;k() );// gross check - vector buf3; + ScalarVector buf3; fft.inv( buf3 , outbuf); VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } -template -void test_complex(int nfft) +template +void test_scalar(int nfft) { - typedef typename Eigen::FFT::Complex Complex; + test_scalar_generic(nfft); + test_scalar_generic(nfft); +} + +template +void test_complex_generic(int nfft) +{ + typedef typename FFT::Complex Complex; + typedef typename VectorType::type ComplexVector; FFT fft; - vector inbuf(nfft); - vector outbuf; - vector buf3; + ComplexVector inbuf(nfft); + ComplexVector outbuf; + ComplexVector buf3; for (int k=0;k() );// gross check } +template +void test_complex(int nfft) +{ + test_complex_generic(nfft); + test_complex_generic(nfft); +} + void test_FFT() { - CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); - CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); - CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); - CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); - CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); - CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); - CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); + CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(32) ); + + CALL_SUBTEST( test_complex(256) ); + CALL_SUBTEST( test_complex(256) ); + CALL_SUBTEST( test_complex(256) ); + + CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(3*8) ); + + CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(5*32) ); + + CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4) ); + + CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); + + CALL_SUBTEST( test_complex(2*3*4*5*7) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); - CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); - CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); - CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); - CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); - CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); + CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(32) ); + + CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(45) ); + + CALL_SUBTEST( test_scalar(50) ); + CALL_SUBTEST( test_scalar(50) ); + CALL_SUBTEST( test_scalar(50) ); + + CALL_SUBTEST( test_scalar(256) ); + CALL_SUBTEST( test_scalar(256) ); + CALL_SUBTEST( test_scalar(256) ); + + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); } From 85f8d1f0c65b3d0ce511b85a2847fec57637f4b6 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 21 Oct 2009 10:27:17 -0400 Subject: [PATCH 34/72] renamed 'Traits' to 'Impl', added vim modelines for syntax highlighting --- unsupported/Eigen/Complex | 2 ++ unsupported/Eigen/FFT | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex index a0483abdf..e1c41ab38 100644 --- a/unsupported/Eigen/Complex +++ b/unsupported/Eigen/Complex @@ -178,3 +178,5 @@ operator<< (std::basic_ostream& ostr, const Complex& rhs) } #endif +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index dc7e85908..1e96f4975 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -46,21 +46,21 @@ namespace Eigen { template + typename _Impl=DEFAULT_FFT_IMPL<_Scalar> > class FFT { public: - typedef _Traits traits_type; - typedef typename traits_type::Scalar Scalar; - typedef typename traits_type::Complex Complex; + typedef _Impl impl_type; + typedef typename impl_type::Scalar Scalar; + typedef typename impl_type::Complex Complex; - FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } + FFT(const impl_type & impl=impl_type() ) :m_impl(impl) { } template void fwd( Complex * dst, const _Input * src, int nfft) { - m_traits.fwd(dst,src,nfft); + m_impl.fwd(dst,src,nfft); } template @@ -73,7 +73,7 @@ class FFT template void inv( _Output * dst, const Complex * src, int nfft) { - m_traits.inv( dst,src,nfft ); + m_impl.inv( dst,src,nfft ); } template @@ -86,10 +86,11 @@ class FFT // TODO: multi-dimensional FFTs // TODO: handle Eigen MatrixBase - traits_type & traits() {return m_traits;} + impl_type & impl() {return m_impl;} private: - traits_type m_traits; + impl_type m_impl; }; #undef DEFAULT_FFT_IMPL } #endif +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ From e3d08443dc272f740447de0147efc69cf7de1c93 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 21 Oct 2009 20:53:05 -0400 Subject: [PATCH 35/72] inlining,all namespace declaration moved to FFT, removed preprocessor definitions, --- unsupported/Eigen/FFT | 41 +++++++++++++-------- unsupported/Eigen/src/FFT/ei_fftw_impl.h | 38 ++++++++++++++++--- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 28 +++++++------- unsupported/test/CMakeLists.txt | 2 +- 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 97ec8e49b..36afdde8d 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -25,29 +25,39 @@ #ifndef EIGEN_FFT_H #define EIGEN_FFT_H -// ei_kissfft_impl: small, free, reasonably efficient default, derived from kissfft -#include "src/FFT/ei_kissfft_impl.h" -#define DEFAULT_FFT_IMPL ei_kissfft_impl +#include +#include +#include +#ifdef EIGEN_FFTW_DEFAULT // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size -#ifdef FFTW_ESTIMATE // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines -#include "src/FFT/ei_fftw_impl.h" -#undef DEFAULT_FFT_IMPL -#define DEFAULT_FFT_IMPL ei_fftw_impl -#endif - -// intel Math Kernel Library: fastest, commercial -- incompatible with Eigen in GPL form -#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines +# include + namespace Eigen { +# include "src/FFT/ei_fftw_impl.h" + //template typedef struct ei_fftw_impl default_fft_impl; this does not work + template struct default_fft_impl : public ei_fftw_impl {}; + } +#elif defined EIGEN_MKL_DEFAULT // TODO -// #include "src/FFT/ei_imkl_impl.h" -// #define DEFAULT_FFT_IMPL ei_imkl_impl +// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form + namespace Eigen { +# include "src/FFT/ei_imklfft_impl.h" + template struct default_fft_impl : public ei_imklfft_impl {}; + } +#else +// ei_kissfft_impl: small, free, reasonably efficient default, derived from kissfft +// + namespace Eigen { +# include "src/FFT/ei_kissfft_impl.h" + template + struct default_fft_impl : public ei_kissfft_impl {}; + } #endif namespace Eigen { template - > + typename _Impl=default_fft_impl<_Scalar> > class FFT { public: @@ -120,7 +130,6 @@ class FFT private: impl_type m_impl; }; -#undef DEFAULT_FFT_IMPL } #endif /* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h index d592bbb20..e1f67f334 100644 --- a/unsupported/Eigen/src/FFT/ei_fftw_impl.h +++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h @@ -22,7 +22,8 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . -namespace Eigen { + + // FFTW uses non-const arguments // so we must use ugly const_cast calls for all the args it uses // @@ -32,21 +33,25 @@ namespace Eigen { // 2. fftw_complex is compatible with std::complex // This assumes std::complex layout is array of size 2 with real,imag template + inline T * ei_fftw_cast(const T* p) { return const_cast( p); } + inline fftw_complex * ei_fftw_cast( const std::complex * p) - { + { return const_cast( reinterpret_cast(p) ); } + inline fftwf_complex * ei_fftw_cast( const std::complex * p) { return const_cast( reinterpret_cast(p) ); } + inline fftwl_complex * ei_fftw_cast( const std::complex * p) { return const_cast( reinterpret_cast(p) ); @@ -64,18 +69,22 @@ namespace Eigen { ei_fftw_plan() :m_plan(NULL) {} ~ei_fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);} + inline void fwd(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute_dft( m_plan, src,dst); } + inline void inv(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); fftwf_execute_dft( m_plan, src,dst); } + inline void fwd(complex_type * dst,scalar_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); fftwf_execute_dft_r2c( m_plan,src,dst); } + inline void inv(scalar_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); @@ -91,18 +100,22 @@ namespace Eigen { ei_fftw_plan() :m_plan(NULL) {} ~ei_fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);} + inline void fwd(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); fftw_execute_dft( m_plan, src,dst); } + inline void inv(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); fftw_execute_dft( m_plan, src,dst); } + inline void fwd(complex_type * dst,scalar_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); fftw_execute_dft_r2c( m_plan,src,dst); } + inline void inv(scalar_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); @@ -118,18 +131,22 @@ namespace Eigen { ei_fftw_plan() :m_plan(NULL) {} ~ei_fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);} + inline void fwd(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); fftwl_execute_dft( m_plan, src,dst); } + inline void inv(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); fftwl_execute_dft( m_plan, src,dst); } + inline void fwd(complex_type * dst,scalar_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); fftwl_execute_dft_r2c( m_plan,src,dst); } + inline void inv(scalar_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); @@ -143,17 +160,20 @@ namespace Eigen { typedef _Scalar Scalar; typedef std::complex Complex; + inline void clear() { m_plans.clear(); } + inline void fwd( Complex * dst,const Complex *src,int nfft) { get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); } // real-to-complex forward FFT + inline void fwd( Complex * dst,const Scalar * src,int nfft) { get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft); @@ -163,30 +183,37 @@ namespace Eigen { } // inverse complex-to-complex + inline void inv(Complex * dst,const Complex *src,int nfft) { get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + + //TODO move scaling to Eigen::FFT // scaling - Scalar s = 1./nfft; + Scalar s = Scalar(1.)/nfft; for (int k=0;k PlanData; typedef std::map PlanMap; PlanMap m_plans; + inline PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src) { bool inplace = (dst==src); @@ -195,4 +222,3 @@ namespace Eigen { return m_plans[key]; } }; -} diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index a84ac68a0..c068d8765 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -22,11 +22,7 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . -#include -#include -#include -namespace Eigen { // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft // Copyright 2003-2009 Mark Borgerding @@ -51,13 +47,6 @@ namespace Eigen { m_twiddles[i] = exp( Complex(0,i*phinc) ); } - void conjugate() - { - m_inverse = !m_inverse; - for ( size_t i=0;i + inline void fwd( Complex * dst,const _Src *src,int nfft) { get_plan(nfft,false).work(0, dst, src, 1,1); @@ -299,6 +294,7 @@ namespace Eigen { // perform two FFTs of src even and src odd // then twiddle to recombine them into the half-spectrum format // then fill in the conjugate symmetric half + inline void fwd( Complex * dst,const Scalar * src,int nfft) { if ( nfft&3 ) { @@ -334,6 +330,7 @@ namespace Eigen { } // inverse complex-to-complex + inline void inv(Complex * dst,const Complex *src,int nfft) { get_plan(nfft,true).work(0, dst, src, 1,1); @@ -341,6 +338,7 @@ namespace Eigen { } // half-complex to scalar + inline void inv( Scalar * dst,const Complex * src,int nfft) { if (nfft&3) { @@ -369,7 +367,7 @@ namespace Eigen { } } - private: + protected: typedef ei_kiss_cpx_fft PlanData; typedef std::map PlanMap; @@ -377,8 +375,10 @@ namespace Eigen { std::map > m_realTwiddles; std::vector m_tmpBuf; + inline int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + inline PlanData & get_plan(int nfft,bool inverse) { // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles @@ -390,6 +390,7 @@ namespace Eigen { return pd; } + inline Complex * real_twiddles(int ncfft2) { std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there @@ -403,10 +404,11 @@ namespace Eigen { return &twidref[0]; } + // TODO move scaling up into Eigen::FFT + inline void scale(Complex *dst,int n,Scalar s) { for (int k=0;k Date: Thu, 22 Oct 2009 10:11:26 +0200 Subject: [PATCH 36/72] demeaning with colwise expression --- Eigen/src/Geometry/Umeyama.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h index 7652aa92e..d0f36d2c9 100644 --- a/Eigen/src/Geometry/Umeyama.h +++ b/Eigen/src/Geometry/Umeyama.h @@ -131,17 +131,11 @@ umeyama(const MatrixBase& src, const MatrixBase& dst, boo const VectorType dst_mean = dst.rowwise().sum() * one_over_n; // demeaning of src and dst points - RowMajorMatrixType src_demean(m,n); - RowMajorMatrixType dst_demean(m,n); - for (int i=0; i Date: Thu, 22 Oct 2009 15:56:10 -0400 Subject: [PATCH 37/72] support gcc 3.3 --- test/geo_hyperplane.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/geo_hyperplane.cpp b/test/geo_hyperplane.cpp index f8281a16b..010989feb 100644 --- a/test/geo_hyperplane.cpp +++ b/test/geo_hyperplane.cpp @@ -121,7 +121,8 @@ template void lines() VERIFY_IS_APPROX(result, center); // check conversions between two types of lines - CoeffsType converted_coeffs = HLine(PLine(line_u)).coeffs(); + PLine pl(line_u); // gcc 3.3 will commit suicide if we don't name this variable + CoeffsType converted_coeffs = HLine(pl).coeffs(); converted_coeffs *= (line_u.coeffs()[0])/(converted_coeffs[0]); VERIFY(line_u.coeffs().isApprox(converted_coeffs)); } From 0167f5ef434eaeee42b9be10b0150b5f0f983b93 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Thu, 22 Oct 2009 23:06:19 -0400 Subject: [PATCH 38/72] added inline to many functions --- bench/benchFFT.cpp | 3 -- unsupported/Eigen/Complex | 61 ++++++++++++++++++++++++++------- unsupported/test/CMakeLists.txt | 1 + 3 files changed, 49 insertions(+), 16 deletions(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 4b6cabb55..fccc02a6c 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -26,9 +26,6 @@ #include #include #include -#ifdef USE_FFTW -#include -#endif #include diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex index e1c41ab38..065c0305d 100644 --- a/unsupported/Eigen/Complex +++ b/unsupported/Eigen/Complex @@ -33,14 +33,26 @@ namespace Eigen { -template +template struct castable_pointer { - castable_pointer(_NativePtr ptr) : _ptr(ptr) {} - operator _NativePtr () {return _ptr;} - operator _PunnedPtr () {return reinterpret_cast<_PunnedPtr>(_ptr);} + castable_pointer(_NativeData * ptr) : _ptr(ptr) { } + operator _NativeData * () {return _ptr;} + operator _PunnedData * () {return reinterpret_cast<_PunnedData*>(_ptr);} + operator const _NativeData * () const {return _ptr;} + operator const _PunnedData * () const {return reinterpret_cast<_PunnedData*>(_ptr);} private: - _NativePtr _ptr; + _NativeData * _ptr; +}; + +template +struct const_castable_pointer +{ + const_castable_pointer(_NativeData * ptr) : _ptr(ptr) { } + operator const _NativeData * () const {return _ptr;} + operator const _PunnedData * () const {return reinterpret_cast<_PunnedData*>(_ptr);} + private: + _NativeData * _ptr; }; template @@ -50,7 +62,8 @@ struct Complex typedef T value_type; // constructors - Complex(const T& re = T(), const T& im = T()) : _re(re),_im(im) { } + Complex() {} + Complex(const T& re, const T& im = T()) : _re(re),_im(im) { } Complex(const Complex&other ): _re(other.real()) ,_im(other.imag()) {} template @@ -58,40 +71,63 @@ struct Complex template Complex(const std::complex&other): _re(other.real()) ,_im(other.imag()) {} - // allow binary access to the object as a std::complex - typedef castable_pointer< Complex*, StandardComplex* > pointer_type; - typedef castable_pointer< const Complex*, const StandardComplex* > const_pointer_type; + typedef castable_pointer< Complex, StandardComplex > pointer_type; + typedef const_castable_pointer< Complex, StandardComplex > const_pointer_type; + + inline pointer_type operator & () {return pointer_type(this);} + + inline const_pointer_type operator & () const {return const_pointer_type(this);} + inline operator StandardComplex () const {return std_type();} + inline operator StandardComplex & () {return std_type();} - StandardComplex std_type() const {return StandardComplex(real(),imag());} + inline + const StandardComplex & std_type() const {return *reinterpret_cast(this);} + + inline StandardComplex & std_type() {return *reinterpret_cast(this);} // every sort of accessor and mutator that has ever been in fashion. // For a brief history, search for "std::complex over-encapsulated" // http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#387 + inline const T & real() const {return _re;} + inline const T & imag() const {return _im;} + inline T & real() {return _re;} + inline T & imag() {return _im;} + inline T & real(const T & x) {return _re=x;} + inline T & imag(const T & x) {return _im=x;} + inline void set_real(const T & x) {_re = x;} + inline void set_imag(const T & x) {_im = x;} // *** complex member functions: *** + inline Complex& operator= (const T& val) { _re=val;_im=0;return *this; } + inline Complex& operator+= (const T& val) {_re+=val;return *this;} + inline Complex& operator-= (const T& val) {_re-=val;return *this;} + inline Complex& operator*= (const T& val) {_re*=val;_im*=val;return *this; } + inline Complex& operator/= (const T& val) {_re/=val;_im/=val;return *this; } + inline Complex& operator= (const Complex& rhs) {_re=rhs._re;_im=rhs._im;return *this;} + inline Complex& operator= (const StandardComplex& rhs) {_re=rhs.real();_im=rhs.imag();return *this;} template Complex& operator= (const Complex& rhs) { _re=rhs._re;_im=rhs._im;return *this;} @@ -105,8 +141,7 @@ struct Complex T _im; }; -template -T ei_to_std( const T & x) {return x;} +//template T ei_to_std( const T & x) {return x;} template std::complex ei_to_std( const Complex & x) {return x.std_type();} @@ -165,7 +200,7 @@ operator<< (std::basic_ostream& ostr, const Complex& rhs) template Complex log (const Complex&x){return log(ei_to_std(x));} template Complex log10 (const Complex&x){return log10(ei_to_std(x));} - template Complex pow(const Complex&x, int p) {return pow(ei_to_std(x),ei_to_std(p));} + template Complex pow(const Complex&x, int p) {return pow(ei_to_std(x),p);} template Complex pow(const Complex&x, const T&p) {return pow(ei_to_std(x),ei_to_std(p));} template Complex pow(const Complex&x, const Complex&p) {return pow(ei_to_std(x),ei_to_std(p));} template Complex pow(const T&x, const Complex&p) {return pow(ei_to_std(x),ei_to_std(p));} diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index d182c9abf..c75d5eb0b 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -26,3 +26,4 @@ if(FFTW_FOUND) ei_add_test(FFTW "-DEIGEN_FFTW_DEFAULT " "-lfftw3 -lfftw3f -lfftw3l" ) endif(FFTW_FOUND) +ei_add_test(Complex) From a382963b04d4b847c6b0ef0611511fb6a8e11718 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 23 Oct 2009 14:26:14 +0200 Subject: [PATCH 39/72] * extend Map to allow the user to specify whether the mapped data is aligned or not. This is done using the Aligned constant: Map::Map(data); * rename ForceAligned to EnforceAlignedAccess, and update its doc, and emphasize this is mainly an internal stuff. --- Eigen/src/Core/Block.h | 18 +++++++-------- Eigen/src/Core/Map.h | 35 ++++++++++++++++------------- Eigen/src/Core/MapBase.h | 40 +++++++++++++++++++++++---------- Eigen/src/Core/StableNorm.h | 2 +- Eigen/src/Core/util/Constants.h | 4 ++-- test/map.cpp | 7 +++--- 6 files changed, 63 insertions(+), 43 deletions(-) diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index cebfeaf75..5fffdcb01 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -33,10 +33,10 @@ * \param MatrixType the type of the object in which we are taking a block * \param BlockRows the number of rows of the block we are taking at compile time (optional) * \param BlockCols the number of columns of the block we are taking at compile time (optional) - * \param _PacketAccess allows to enforce aligned loads and stores if set to \b ForceAligned. - * The default is \b AsRequested. This parameter is internaly used by Eigen - * in expressions such as \code mat.block() += other; \endcode and most of - * the time this is the only way it is used. + * \param _PacketAccess \internal used to enforce aligned loads in expressions such as + * \code mat.block() += other; \endcode. Possible values are + * \c AsRequested (default) and \c EnforceAlignedAccess. + * See class MapBase for more details. * \param _DirectAccessStatus \internal used for partial specialization * * This class represents an expression of either a fixed-size or dynamic-size block. It is the return @@ -84,9 +84,9 @@ struct ei_traits::CoeffReadCost, PacketAccess = _PacketAccess }; - typedef typename ei_meta_if&, - Block >::ret AlignedDerivedType; + Block >::ret AlignedDerivedType; }; template class Block @@ -228,13 +228,13 @@ class Block class InnerIterator; typedef typename ei_traits::AlignedDerivedType AlignedDerivedType; - friend class Block; + friend class Block; EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) - AlignedDerivedType _convertToForceAligned() + AlignedDerivedType _convertToEnforceAlignedAccess() { - return Block + return Block (m_matrix, Base::m_data, Base::m_rows.value(), Base::m_cols.value()); } diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index f6bc814e2..dba7e20e4 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -31,16 +31,14 @@ * \brief A matrix or vector expression mapping an existing array of data. * * \param MatrixType the equivalent matrix type of the mapped data - * \param _PacketAccess allows to enforce aligned loads and stores if set to ForceAligned. - * The default is AsRequested. This parameter is internaly used by Eigen - * in expressions such as \code Map<...>(...) += other; \endcode and most - * of the time this is the only way it is used. + * \param PointerAlignment specifies whether the pointer is \c Aligned, or \c Unaligned. + * The default is \c Unaligned. * * This class represents a matrix or vector expression mapping an existing array of data. * It can be used to let Eigen interface without any overhead with non-Eigen data structures, * such as plain C arrays or structures from other libraries. * - * \b Tips: to change the array of data mapped by a Map object, you can use the C++ + * \b Tip: to change the array of data mapped by a Map object, you can use the C++ * placement new syntax: * * Example: \include Map_placement_new.cpp @@ -48,22 +46,27 @@ * * This class is the return type of Matrix::Map() but can also be used directly. * + * \b Note \b to \b Eigen \b developers: The template parameter \c PointerAlignment + * can also be or-ed with \c EnforceAlignedAccess in order to enforce aligned read + * in expressions such as \code A += B; \endcode. See class MapBase for further details. + * * \sa Matrix::Map() */ -template -struct ei_traits > : public ei_traits +template +struct ei_traits > : public ei_traits { enum { - PacketAccess = _PacketAccess, - Flags = ei_traits::Flags & ~AlignedBit + PacketAccess = Options & EnforceAlignedAccess, + Flags = (Options&Aligned)==Aligned ? ei_traits::Flags | AlignedBit + : ei_traits::Flags & ~AlignedBit }; - typedef typename ei_meta_if&, - Map >::ret AlignedDerivedType; + typedef typename ei_meta_if&, + Map >::ret AlignedDerivedType; }; -template class Map - : public MapBase > +template class Map + : public MapBase > { public: @@ -72,9 +75,9 @@ template class Map inline int stride() const { return this->innerSize(); } - AlignedDerivedType _convertToForceAligned() + AlignedDerivedType _convertToEnforceAlignedAccess() { - return Map(Base::m_data, Base::m_rows.value(), Base::m_cols.value()); + return AlignedDerivedType(Base::m_data, Base::m_rows.value(), Base::m_cols.value()); } inline Map(const Scalar* data) : Base(data) {} diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 88a3fac1e..8770732de 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -32,11 +32,17 @@ * * Expression classes inheriting MapBase must define the constant \c PacketAccess, * and type \c AlignedDerivedType in their respective ei_traits<> specialization structure. - * The value of \c PacketAccess can be either: - * - \b ForceAligned which enforces both aligned loads and stores - * - \b AsRequested which is the default behavior + * The value of \c PacketAccess can be either \b AsRequested, or set to \b EnforceAlignedAccess which + * enforces both aligned loads and stores. + * + * \c EnforceAlignedAccess is automatically set in expressions such as + * \code A += B; \endcode where A is either a Block or a Map. Here, + * this expression is transfomed into \code A = A_with_EnforceAlignedAccess + B; \endcode + * avoiding unaligned loads from A. Indeed, since Eigen's packet evaluation mechanism + * automatically align to the destination matrix, we know that loads to A will be aligned too. + * * The type \c AlignedDerivedType should correspond to the equivalent expression type - * with \c PacketAccess being \c ForceAligned. + * with \c PacketAccess set to \c EnforceAlignedAccess. * * \sa class Map, class Block */ @@ -79,19 +85,19 @@ template class MapBase * \sa MapBase::stride() */ inline const Scalar* data() const { return m_data; } - template struct force_aligned_impl { + template struct force_aligned_impl { static AlignedDerivedType run(MapBase& a) { return a.derived(); } }; template struct force_aligned_impl { - static AlignedDerivedType run(MapBase& a) { return a.derived()._convertToForceAligned(); } + static AlignedDerivedType run(MapBase& a) { return a.derived()._convertToEnforceAlignedAccess(); } }; /** \returns an expression equivalent to \c *this but having the \c PacketAccess constant - * set to \c ForceAligned. Must be reimplemented by the derived class. */ + * set to \c EnforceAlignedAccess. Must be reimplemented by the derived class. */ AlignedDerivedType forceAligned() { - return force_aligned_impl::run(*this); + return force_aligned_impl::run(*this); } inline const Scalar& coeff(int row, int col) const @@ -131,7 +137,7 @@ template class MapBase template inline PacketScalar packet(int row, int col) const { - return ei_ploadt + return ei_ploadt (m_data + (IsRowMajor ? col + row * stride() : row + col * stride())); } @@ -139,13 +145,13 @@ template class MapBase template inline PacketScalar packet(int index) const { - return ei_ploadt(m_data + index); + return ei_ploadt(m_data + index); } template inline void writePacket(int row, int col, const PacketScalar& x) { - ei_pstoret + ei_pstoret (const_cast(m_data) + (IsRowMajor ? col + row * stride() : row + col * stride()), x); } @@ -153,13 +159,14 @@ template class MapBase template inline void writePacket(int index, const PacketScalar& x) { - ei_pstoret + ei_pstoret (const_cast(m_data) + index, x); } inline MapBase(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + checkDataAlignment(); } inline MapBase(const Scalar* data, int size) @@ -170,6 +177,7 @@ template class MapBase EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) ei_assert(size >= 0); ei_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size); + checkDataAlignment(); } inline MapBase(const Scalar* data, int rows, int cols) @@ -178,6 +186,7 @@ template class MapBase ei_assert( (data == 0) || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); + checkDataAlignment(); } Derived& operator=(const MapBase& other) @@ -215,6 +224,13 @@ template class MapBase { return derived() = forceAligned() / other; } protected: + + void checkDataAlignment() const + { + ei_assert( ((!(ei_traits::Flags&AlignedBit)) + || ((std::size_t(m_data)&0xf)==0)) && "data is not aligned"); + } + const Scalar* EIGEN_RESTRICT m_data; const ei_int_if_dynamic m_rows; const ei_int_if_dynamic m_cols; diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index 06e69c448..f2d1e7240 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -59,7 +59,7 @@ MatrixBase::stableNorm() const RealScalar invScale = 1; RealScalar ssq = 0; // sum of square enum { - Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? ForceAligned : AsRequested + Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? EnforceAlignedAccess : AsRequested }; int n = size(); int bi=0; diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index affc1d478..169fb5aec 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -196,8 +196,8 @@ const unsigned int UnitLowerTriangular = LowerTriangularBit | UnitDiagBit; enum { DiagonalOnTheLeft, DiagonalOnTheRight }; -enum { Aligned, Unaligned }; -enum { ForceAligned, AsRequested }; +enum { Unaligned=0, Aligned=1 }; +enum { AsRequested=0, EnforceAlignedAccess=2 }; enum { ConditionalJumpCost = 5 }; enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight }; enum DirectionType { Vertical, Horizontal, BothDirections }; diff --git a/test/map.cpp b/test/map.cpp index 62e727304..fbff647f6 100644 --- a/test/map.cpp +++ b/test/map.cpp @@ -37,14 +37,15 @@ template void map_class(const VectorType& m) Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3; Map(array1, size) = VectorType::Random(size); - Map(array2, size) = Map(array1, size); + Map(array2, size) = Map(array1, size); Map(array3unaligned, size) = Map(array1, size); - VectorType ma1 = Map(array1, size); + VectorType ma1 = Map(array1, size); VectorType ma2 = Map(array2, size); VectorType ma3 = Map(array3unaligned, size); VERIFY_IS_APPROX(ma1, ma2); VERIFY_IS_APPROX(ma1, ma3); - + VERIFY_RAISES_ASSERT((Map(array3unaligned, size))); + ei_aligned_delete(array1, size); ei_aligned_delete(array2, size); delete[] array3; From 66fe5a5f36c80c64fb6078c7e7cb089057bff96f Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Sat, 24 Oct 2009 14:48:34 +0200 Subject: [PATCH 40/72] It is just not that easy and requires more work to get it done right. --- Eigen/src/Core/util/Macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 706b30174..5ee17e91e 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -256,7 +256,7 @@ using Eigen::ei_cos; // C++0x features #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(_MSC_VER) && (_MSC_VER >= 1600)) - #define EIGEN_REF_TO_TEMPORARY && + #define EIGEN_REF_TO_TEMPORARY const & #else #define EIGEN_REF_TO_TEMPORARY const & #endif From 7cc9fb5d0adc1a4a2d515d6c42c92c93f80a8966 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Tue, 27 Oct 2009 15:29:12 +0100 Subject: [PATCH 41/72] Umeyama is now working with fixed size src and dst points. --- Eigen/src/Geometry/Umeyama.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h index d0f36d2c9..551a69e5b 100644 --- a/Eigen/src/Geometry/Umeyama.h +++ b/Eigen/src/Geometry/Umeyama.h @@ -117,7 +117,7 @@ umeyama(const MatrixBase& src, const MatrixBase& dst, boo enum { Dimension = EIGEN_ENUM_MIN(Derived::RowsAtCompileTime, OtherDerived::RowsAtCompileTime) }; typedef Matrix VectorType; - typedef typename ei_plain_matrix_type::type MatrixType; + typedef Matrix MatrixType; typedef typename ei_plain_matrix_type_row_major::type RowMajorMatrixType; const int m = src.rows(); // dimension From dbaba9019ba2ca116406f286a71a745179e61531 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Tue, 27 Oct 2009 15:57:21 +0100 Subject: [PATCH 42/72] Added more common typedefs. --- Eigen/src/Core/Matrix.h | 11 ++++++++++- Eigen/src/Geometry/Transform.h | 19 ++++++++++++++----- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 027e6bb70..cc2d71588 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -794,11 +794,20 @@ typedef Matrix Vector##SizeSuffix##TypeSuffix; \ /** \ingroup matrixtypedefs */ \ typedef Matrix RowVector##SizeSuffix##TypeSuffix; +#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##Size##X##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##X##Size##TypeSuffix; + #define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \ EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \ EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \ EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \ -EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4) EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i) EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f) diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index d03fd52fd..c49356361 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -480,6 +480,15 @@ typedef Transform Transform2d; /** \ingroup Geometry_Module */ typedef Transform Transform3d; +/** \ingroup Geometry_Module */ +typedef Transform Isometry2f; +/** \ingroup Geometry_Module */ +typedef Transform Isometry3f; +/** \ingroup Geometry_Module */ +typedef Transform Isometry2d; +/** \ingroup Geometry_Module */ +typedef Transform Isometry3d; + /** \ingroup Geometry_Module */ typedef Transform Affine2f; /** \ingroup Geometry_Module */ @@ -512,7 +521,7 @@ typedef Transform Projective3d; **************************/ #ifdef EIGEN_QT_SUPPORT -/** Initialises \c *this from a QMatrix assuming the dimension is 2. +/** Initializes \c *this from a QMatrix assuming the dimension is 2. * * This function is available only if the token EIGEN_QT_SUPPORT is defined. */ @@ -538,7 +547,7 @@ Transform& Transform::operator=(const QMatrix& /** \returns a QMatrix from \c *this assuming the dimension is 2. * - * \warning this convertion might loss data if \c *this is not affine + * \warning this conversion might loss data if \c *this is not affine * * This function is available only if the token EIGEN_QT_SUPPORT is defined. */ @@ -551,7 +560,7 @@ QMatrix Transform::toQMatrix(void) const matrix.coeff(0,2), matrix.coeff(1,2)); } -/** Initialises \c *this from a QTransform assuming the dimension is 2. +/** Initializes \c *this from a QTransform assuming the dimension is 2. * * This function is available only if the token EIGEN_QT_SUPPORT is defined. */ @@ -881,7 +890,7 @@ Transform::fromPositionOrientationScale(const MatrixBase > { }; /***************************************************** -*** Specializations of construct from matix *** +*** Specializations of construct from matrix *** *****************************************************/ template From 427f8a87d1c184e9fb0c5fdc62197be2a0abb910 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Tue, 27 Oct 2009 16:02:36 +0100 Subject: [PATCH 43/72] Added dox for the new typedefs. --- Eigen/src/Core/Matrix.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index cc2d71588..17d2f2836 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -58,6 +58,9 @@ template ) * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix) * + * \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix) + * \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix) + * * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs. * * You can access elements of vectors and matrices using normal subscripting: From 611d2b0b1d64880ff207368a901c7faba73a78c5 Mon Sep 17 00:00:00 2001 From: Mathieu Gautier Date: Tue, 27 Oct 2009 13:19:16 +0000 Subject: [PATCH 44/72] Quaternion could now map an array of 4 scalars : new classes : * QuaternionBase * Map --- Eigen/src/Core/util/ForwardDeclarations.h | 1 + Eigen/src/Geometry/Quaternion.h | 545 ++++++++++++++++++++++ Eigen/src/Geometry/arch/Geometry_SSE.h | 21 + 3 files changed, 567 insertions(+) diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 65e5ce687..025904734 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -129,6 +129,7 @@ template class PlanarRotation; // Geometry module: template class RotationBase; template class Cross; +template class QuaternionBase; template class Quaternion; template class Rotation2D; template class AngleAxis; diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 2f9f97807..f3f60a08a 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -507,4 +507,549 @@ struct ei_quaternion_assign_impl } }; +/*################################################################### + QuaternionBase and Map and Quat + ###################################################################*/ + +template +struct ei_quaternionbase_assign_impl; + +template class Quat; // [XXX] => remove when Quat becomes Quaternion + +template +struct ei_traits > +{ + typedef typename ei_traits::Scalar Scalar; + enum { + PacketAccess = ei_traits::PacketAccess + }; +}; + +template +class QuaternionBase : public RotationBase { + typedef RotationBase Base; +public: + using Base::operator*; + + typedef typename ei_traits >::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + // typedef typename Matrix Coefficients; + /** the type of a 3D vector */ + typedef Matrix Vector3; + /** the equivalent rotation matrix type */ + typedef Matrix Matrix3; + /** the equivalent angle-axis type */ + typedef AngleAxis AngleAxisType; + + /** \returns the \c x coefficient */ + inline Scalar x() const { return this->derived().coeffs().coeff(0); } + /** \returns the \c y coefficient */ + inline Scalar y() const { return this->derived().coeffs().coeff(1); } + /** \returns the \c z coefficient */ + inline Scalar z() const { return this->derived().coeffs().coeff(2); } + /** \returns the \c w coefficient */ + inline Scalar w() const { return this->derived().coeffs().coeff(3); } + + /** \returns a reference to the \c x coefficient */ + inline Scalar& x() { return this->derived().coeffs().coeffRef(0); } + /** \returns a reference to the \c y coefficient */ + inline Scalar& y() { return this->derived().coeffs().coeffRef(1); } + /** \returns a reference to the \c z coefficient */ + inline Scalar& z() { return this->derived().coeffs().coeffRef(2); } + /** \returns a reference to the \c w coefficient */ + inline Scalar& w() { return this->derived().coeffs().coeffRef(3); } + + /** \returns a read-only vector expression of the imaginary part (x,y,z) */ + inline const VectorBlock::Coefficients,3,1> vec() const { return this->derived().coeffs().template start<3>(); } + + /** \returns a vector expression of the imaginary part (x,y,z) */ + inline VectorBlock::Coefficients,3,1> vec() { return this->derived().coeffs().template start<3>(); } + + /** \returns a read-only vector expression of the coefficients (x,y,z,w) */ + inline const typename ei_traits::Coefficients& coeffs() const { return this->derived().coeffs(); } + + /** \returns a vector expression of the coefficients (x,y,z,w) */ + inline typename ei_traits::Coefficients& coeffs() { return this->derived().coeffs(); } + + template QuaternionBase& operator=(const QuaternionBase& other); + QuaternionBase& operator=(const AngleAxisType& aa); + template + QuaternionBase& operator=(const MatrixBase& m); + + /** \returns a quaternion representing an identity rotation + * \sa MatrixBase::Identity() + */ + inline static Quat Identity() { return Quat(1, 0, 0, 0); } + + /** \sa Quaternion2::Identity(), MatrixBase::setIdentity() + */ + inline QuaternionBase& setIdentity() { coeffs() << 0, 0, 0, 1; return *this; } + + /** \returns the squared norm of the quaternion's coefficients + * \sa Quaternion2::norm(), MatrixBase::squaredNorm() + */ + inline Scalar squaredNorm() const { return coeffs().squaredNorm(); } + + /** \returns the norm of the quaternion's coefficients + * \sa Quaternion2::squaredNorm(), MatrixBase::norm() + */ + inline Scalar norm() const { return coeffs().norm(); } + + /** Normalizes the quaternion \c *this + * \sa normalized(), MatrixBase::normalize() */ + inline void normalize() { coeffs().normalize(); } + /** \returns a normalized version of \c *this + * \sa normalize(), MatrixBase::normalized() */ + inline Quat normalized() const { return Quat(coeffs().normalized()); } + + /** \returns the dot product of \c *this and \a other + * Geometrically speaking, the dot product of two unit quaternions + * corresponds to the cosine of half the angle between the two rotations. + * \sa angularDistance() + */ + template inline Scalar dot(const QuaternionBase& other) const { return coeffs().dot(other.coeffs()); } + + template inline Scalar angularDistance(const QuaternionBase& other) const; + + Matrix3 toRotationMatrix(void) const; + + template + QuaternionBase& setFromTwoVectors(const MatrixBase& a, const MatrixBase& b); + + template inline Quat operator* (const QuaternionBase& q) const; + template inline QuaternionBase& operator*= (const QuaternionBase& q); + + Quat inverse(void) const; + Quat conjugate(void) const; + + template Quat slerp(Scalar t, const QuaternionBase& other) const; + + /** \returns \c true if \c *this is approximately equal to \a other, within the precision + * determined by \a prec. + * + * \sa MatrixBase::isApprox() */ + bool isApprox(const QuaternionBase& other, typename RealScalar prec = precision()) const + { return coeffs().isApprox(other.coeffs(), prec); } + + Vector3 _transformVector(Vector3 v) const; + +}; + +/* ########### Quat -> Quaternion */ + +template +struct ei_traits > +{ + typedef _Scalar Scalar; + typedef Matrix<_Scalar,4,1> Coefficients; + enum{ + PacketAccess = Aligned + }; +}; + +template +class Quat : public QuaternionBase >{ + typedef QuaternionBase > Base; +public: + using Base::operator=; + + typedef _Scalar Scalar; + + typedef typename ei_traits >::Coefficients Coefficients; + typedef typename Base::AngleAxisType AngleAxisType; + + /** Default constructor leaving the quaternion uninitialized. */ + inline Quat() {} + + /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from + * its four coefficients \a w, \a x, \a y and \a z. + * + * \warning Note the order of the arguments: the real \a w coefficient first, + * while internally the coefficients are stored in the following order: + * [\c x, \c y, \c z, \c w] + */ + inline Quat(Scalar w, Scalar x, Scalar y, Scalar z) + { coeffs() << x, y, z, w; } + + /** Constructs and initialize a quaternion from the array data + * This constructor is also used to map an array */ + inline Quat(const Scalar* data) : m_coeffs(data) {} + + /** Copy constructor */ +// template inline Quat(const QuaternionBase& other) { m_coeffs = other.coeffs(); } [XXX] redundant with 703 + + /** Constructs and initializes a quaternion from the angle-axis \a aa */ + explicit inline Quat(const AngleAxisType& aa) { *this = aa; } + + /** Constructs and initializes a quaternion from either: + * - a rotation matrix expression, + * - a 4D vector expression representing quaternion coefficients. + */ + template + explicit inline Quat(const MatrixBase& other) { *this = other; } + + /** \returns \c *this with scalar type casted to \a NewScalarType + * + * Note that if \a NewScalarType is equal to the current scalar type of \c *this + * then this function smartly returns a const reference to \c *this. + */ + template + inline typename ei_cast_return_type >::type cast() const + { return typename ei_cast_return_type >::type(*this); } + + /** Copy constructor with scalar type conversion */ + template + inline explicit Quat(const QuaternionBase& other) + { m_coeffs = other.coeffs().template cast(); } + + inline Coefficients& coeffs() { return m_coeffs;} + inline const Coefficients& coeffs() const { return m_coeffs;} + +protected: + Coefficients m_coeffs; +}; + +/* ########### Map */ + +/** \class Map + * \nonstableyet + * + * \brief Expression of a quaternion + * + * \param Scalar the type of the vector of diagonal coefficients + * + * \sa class Quaternion, class QuaternionBase + */ +template +struct ei_traits, _PacketAccess> >: +ei_traits > +{ + typedef _Scalar Scalar; + typedef Map > Coefficients; + enum { + PacketAccess = _PacketAccess + }; +}; + +template +class Map, PacketAccess > : public QuaternionBase, PacketAccess> >, ei_no_assignment_operator { + public: + + typedef _Scalar Scalar; + + typedef typename ei_traits, PacketAccess> >::Coefficients Coefficients; + + inline Map, PacketAccess >(const Scalar* coeffs) : m_coeffs(coeffs) {} + + inline Coefficients& coeffs() { return m_coeffs;} + inline const Coefficients& coeffs() const { return m_coeffs;} + + protected: + Coefficients m_coeffs; +}; + +typedef Map > QuaternionMapd; +typedef Map > QuaternionMapf; +typedef Map, Aligned> QuaternionMapAlignedd; +typedef Map, Aligned> QuaternionMapAlignedf; + +// Generic Quaternion * Quaternion product +template struct ei_quat_product +{ + inline static Quat run(const QuaternionBase& a, const QuaternionBase& b){ + return Quat + ( + a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(), + a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(), + a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(), + a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x() + ); + } +}; + +/** \returns the concatenation of two rotations as a quaternion-quaternion product */ +template +template +inline Quat >::Scalar> QuaternionBase::operator* (const QuaternionBase& other) const +{ + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + return ei_quat_product::Scalar, + ei_traits::PacketAccess && ei_traits::PacketAccess>::run(*this, other); +} + +/** \sa operator*(Quaternion) */ +template +template +inline QuaternionBase& QuaternionBase::operator*= (const QuaternionBase& other) +{ + return (*this = *this * other); +} + +/** Rotation of a vector by a quaternion. + * \remarks If the quaternion is used to rotate several points (>1) + * then it is much more efficient to first convert it to a 3x3 Matrix. + * Comparison of the operation cost for n transformations: + * - Quaternion2: 30n + * - Via a Matrix3: 24 + 15n + */ +template +inline typename QuaternionBase::Vector3 +QuaternionBase::_transformVector(Vector3 v) const +{ + // Note that this algorithm comes from the optimization by hand + // of the conversion to a Matrix followed by a Matrix/Vector product. + // It appears to be much faster than the common algorithm found + // in the litterature (30 versus 39 flops). It also requires two + // Vector3 as temporaries. + Vector3 uv = Scalar(2) * this->vec().cross(v); + return v + this->w() * uv + this->vec().cross(uv); +} + +template +template +inline QuaternionBase& QuaternionBase::operator=(const QuaternionBase& other) +{ + coeffs() = other.coeffs(); + return *this; +} + +/** Set \c *this from an angle-axis \a aa and returns a reference to \c *this + */ +template +inline QuaternionBase& QuaternionBase::operator=(const AngleAxisType& aa) +{ + Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings + this->w() = ei_cos(ha); + this->vec() = ei_sin(ha) * aa.axis(); + return *this; +} + +/** Set \c *this from the expression \a xpr: + * - if \a xpr is a 4x1 vector, then \a xpr is assumed to be a quaternion + * - if \a xpr is a 3x3 matrix, then \a xpr is assumed to be rotation matrix + * and \a xpr is converted to a quaternion + */ + +template +template +inline QuaternionBase& QuaternionBase::operator=(const MatrixBase& xpr) +{ + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + ei_quaternionbase_assign_impl::run(*this, xpr.derived()); + return *this; +} + +/** Convert the quaternion to a 3x3 rotation matrix. The quaternion is required to + * be normalized, otherwise the result is undefined. + */ +template +inline typename QuaternionBase::Matrix3 +QuaternionBase::toRotationMatrix(void) const +{ + // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!) + // if not inlined then the cost of the return by value is huge ~ +35%, + // however, not inlining this function is an order of magnitude slower, so + // it has to be inlined, and so the return by value is not an issue + Matrix3 res; + + const Scalar tx = 2*this->x(); + const Scalar ty = 2*this->y(); + const Scalar tz = 2*this->z(); + const Scalar twx = tx*this->w(); + const Scalar twy = ty*this->w(); + const Scalar twz = tz*this->w(); + const Scalar txx = tx*this->x(); + const Scalar txy = ty*this->x(); + const Scalar txz = tz*this->x(); + const Scalar tyy = ty*this->y(); + const Scalar tyz = tz*this->y(); + const Scalar tzz = tz*this->z(); + + res.coeffRef(0,0) = 1-(tyy+tzz); + res.coeffRef(0,1) = txy-twz; + res.coeffRef(0,2) = txz+twy; + res.coeffRef(1,0) = txy+twz; + res.coeffRef(1,1) = 1-(txx+tzz); + res.coeffRef(1,2) = tyz-twx; + res.coeffRef(2,0) = txz-twy; + res.coeffRef(2,1) = tyz+twx; + res.coeffRef(2,2) = 1-(txx+tyy); + + return res; +} + +/** Sets \c *this to be a quaternion representing a rotation between + * the two arbitrary vectors \a a and \a b. In other words, the built + * rotation represent a rotation sending the line of direction \a a + * to the line of direction \a b, both lines passing through the origin. + * + * \returns a reference to \c *this. + * + * Note that the two input vectors do \b not have to be normalized, and + * do not need to have the same norm. + */ +template +template +inline QuaternionBase& QuaternionBase::setFromTwoVectors(const MatrixBase& a, const MatrixBase& b) +{ + Vector3 v0 = a.normalized(); + Vector3 v1 = b.normalized(); + Scalar c = v1.dot(v0); + + // if dot == -1, vectors are nearly opposites + // => accuraletly compute the rotation axis by computing the + // intersection of the two planes. This is done by solving: + // x^T v0 = 0 + // x^T v1 = 0 + // under the constraint: + // ||x|| = 1 + // which yields a singular value problem + if (c < Scalar(-1)+precision()) + { + c = std::max(c,-1); + Matrix m; m << v0.transpose(), v1.transpose(); + SVD > svd(m); + Vector3 axis = svd.matrixV().col(2); + + Scalar w2 = (Scalar(1)+c)*Scalar(0.5); + this->w() = ei_sqrt(w2); + this->vec() = axis * ei_sqrt(Scalar(1) - w2); + return *this; + } + Vector3 axis = v0.cross(v1); + Scalar s = ei_sqrt((Scalar(1)+c)*Scalar(2)); + Scalar invs = Scalar(1)/s; + this->vec() = axis * invs; + this->w() = s * Scalar(0.5); + + return *this; +} + +/** \returns the multiplicative inverse of \c *this + * Note that in most cases, i.e., if you simply want the opposite rotation, + * and/or the quaternion is normalized, then it is enough to use the conjugate. + * + * \sa Quaternion2::conjugate() + */ +template +inline Quat >::Scalar> QuaternionBase::inverse() const +{ + // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ?? + Scalar n2 = this->squaredNorm(); + if (n2 > 0) + return Quat(conjugate().coeffs() / n2); + else + { + // return an invalid result to flag the error + return Quat(ei_traits::Coefficients::Zero()); + } +} + +/** \returns the conjugate of the \c *this which is equal to the multiplicative inverse + * if the quaternion is normalized. + * The conjugate of a quaternion represents the opposite rotation. + * + * \sa Quaternion2::inverse() + */ +template +inline Quat >::Scalar> QuaternionBase::conjugate() const +{ + return Quat(this->w(),-this->x(),-this->y(),-this->z()); +} + +/** \returns the angle (in radian) between two rotations + * \sa dot() + */ +template +template +inline typename ei_traits >::Scalar QuaternionBase::angularDistance(const QuaternionBase& other) const +{ + double d = ei_abs(this->dot(other)); + if (d>=1.0) + return 0; + return Scalar(2) * std::acos(d); +} + +/** \returns the spherical linear interpolation between the two quaternions + * \c *this and \a other at the parameter \a t + */ +template +template +Quat >::Scalar> QuaternionBase::slerp(Scalar t, const QuaternionBase& other) const +{ + static const Scalar one = Scalar(1) - precision(); + Scalar d = this->dot(other); + Scalar absD = ei_abs(d); + if (absD>=one) + return Quat(*this); + + // theta is the angle between the 2 quaternions + Scalar theta = std::acos(absD); + Scalar sinTheta = ei_sin(theta); + + Scalar scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta; + Scalar scale1 = ei_sin( ( t * theta) ) / sinTheta; + if (d<0) + scale1 = -scale1; + + return Quat(scale0 * coeffs() + scale1 * other.coeffs()); +} + +// set from a rotation matrix +template +struct ei_quaternionbase_assign_impl +{ + typedef typename Other::Scalar Scalar; + template inline static void run(QuaternionBase& q, const Other& mat) + { + // This algorithm comes from "Quaternion Calculus and Fast Animation", + // Ken Shoemake, 1987 SIGGRAPH course notes + Scalar t = mat.trace(); + if (t > 0) + { + t = ei_sqrt(t + Scalar(1.0)); + q.w() = Scalar(0.5)*t; + t = Scalar(0.5)/t; + q.x() = (mat.coeff(2,1) - mat.coeff(1,2)) * t; + q.y() = (mat.coeff(0,2) - mat.coeff(2,0)) * t; + q.z() = (mat.coeff(1,0) - mat.coeff(0,1)) * t; + } + else + { + int i = 0; + if (mat.coeff(1,1) > mat.coeff(0,0)) + i = 1; + if (mat.coeff(2,2) > mat.coeff(i,i)) + i = 2; + int j = (i+1)%3; + int k = (j+1)%3; + + t = ei_sqrt(mat.coeff(i,i)-mat.coeff(j,j)-mat.coeff(k,k) + Scalar(1.0)); + q.coeffs().coeffRef(i) = Scalar(0.5) * t; + t = Scalar(0.5)/t; + q.w() = (mat.coeff(k,j)-mat.coeff(j,k))*t; + q.coeffs().coeffRef(j) = (mat.coeff(j,i)+mat.coeff(i,j))*t; + q.coeffs().coeffRef(k) = (mat.coeff(k,i)+mat.coeff(i,k))*t; + } + } +}; + +// set from a vector of coefficients assumed to be a quaternion +template +struct ei_quaternionbase_assign_impl +{ + typedef typename Other::Scalar Scalar; + template inline static void run(QuaternionBase& q, const Other& vec) + { + q.coeffs() = vec; + } +}; + + #endif // EIGEN_QUATERNION_H diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index d0342febc..c608e4843 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -45,6 +45,27 @@ ei_quaternion_product(const Quaternion& _a, const Quate return res; } +template struct ei_quat_product +{ + inline static Quat run(const QuaternionBase& _a, const QuaternionBase& _b) + { + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000)); + Quat res; + __m128 a = _a.coeffs().packet(0); + __m128 b = _b.coeffs().packet(0); + __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2), + ei_vec4f_swizzle1(b,2,0,1,2)),mask); + __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1), + ei_vec4f_swizzle1(b,0,1,2,1)),mask); + ei_pstore(&res.x(), + _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)), + _mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0), + ei_vec4f_swizzle1(b,1,2,0,0))), + _mm_add_ps(flip1,flip2))); + return res; + } +}; + template struct ei_cross3_impl { inline static typename ei_plain_matrix_type::type From 6219f9acfa61e54baf266f816b7eaf9ffbd9841e Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 27 Oct 2009 15:45:24 -0400 Subject: [PATCH 45/72] * rename new Quat class to Quaternion, remove existing Quaternion * add Copyright line for Mathieu * cast() was broken (compile errors) and needed anyway to be in QuaternionBase * it's VectorBlock, don't pass additional parameter 1, it has different meaning!! * make it compile with GCC (put 'typename' at the right location) --- Eigen/src/Geometry/Quaternion.h | 578 +++---------------------- Eigen/src/Geometry/arch/Geometry_SSE.h | 23 +- 2 files changed, 60 insertions(+), 541 deletions(-) diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index f3f60a08a..67b040165 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2009 Mathieu Gautier // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -25,11 +26,6 @@ #ifndef EIGEN_QUATERNION_H #define EIGEN_QUATERNION_H -template -struct ei_quaternion_assign_impl; - /** \geometry_module \ingroup Geometry_Module * * \class Quaternion @@ -52,471 +48,12 @@ struct ei_quaternion_assign_impl; * \sa class AngleAxis, class Transform */ -template struct ei_traits > -{ - typedef _Scalar Scalar; -}; - -template -class Quaternion : public RotationBase,3> -{ - typedef RotationBase,3> Base; - - - -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,4) - - using Base::operator*; - - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - - /** the type of the Coefficients 4-vector */ - typedef Matrix Coefficients; - /** the type of a 3D vector */ - typedef Matrix Vector3; - /** the equivalent rotation matrix type */ - typedef Matrix Matrix3; - /** the equivalent angle-axis type */ - typedef AngleAxis AngleAxisType; - - /** \returns the \c x coefficient */ - inline Scalar x() const { return m_coeffs.coeff(0); } - /** \returns the \c y coefficient */ - inline Scalar y() const { return m_coeffs.coeff(1); } - /** \returns the \c z coefficient */ - inline Scalar z() const { return m_coeffs.coeff(2); } - /** \returns the \c w coefficient */ - inline Scalar w() const { return m_coeffs.coeff(3); } - - /** \returns a reference to the \c x coefficient */ - inline Scalar& x() { return m_coeffs.coeffRef(0); } - /** \returns a reference to the \c y coefficient */ - inline Scalar& y() { return m_coeffs.coeffRef(1); } - /** \returns a reference to the \c z coefficient */ - inline Scalar& z() { return m_coeffs.coeffRef(2); } - /** \returns a reference to the \c w coefficient */ - inline Scalar& w() { return m_coeffs.coeffRef(3); } - - /** \returns a read-only vector expression of the imaginary part (x,y,z) */ - inline const Block vec() const { return m_coeffs.template start<3>(); } - - /** \returns a vector expression of the imaginary part (x,y,z) */ - inline Block vec() { return m_coeffs.template start<3>(); } - - /** \returns a read-only vector expression of the coefficients (x,y,z,w) */ - inline const Coefficients& coeffs() const { return m_coeffs; } - - /** \returns a vector expression of the coefficients (x,y,z,w) */ - inline Coefficients& coeffs() { return m_coeffs; } - - /** Default constructor leaving the quaternion uninitialized. */ - inline Quaternion() {} - - /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from - * its four coefficients \a w, \a x, \a y and \a z. - * - * \warning Note the order of the arguments: the real \a w coefficient first, - * while internally the coefficients are stored in the following order: - * [\c x, \c y, \c z, \c w] - */ - inline Quaternion(Scalar w, Scalar x, Scalar y, Scalar z) - { m_coeffs << x, y, z, w; } - - /** Copy constructor */ - inline Quaternion(const Quaternion& other) { m_coeffs = other.m_coeffs; } - - /** Constructs and initializes a quaternion from the angle-axis \a aa */ - explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; } - - /** Constructs and initializes a quaternion from either: - * - a rotation matrix expression, - * - a 4D vector expression representing quaternion coefficients. - * \sa operator=(MatrixBase) - */ - template - explicit inline Quaternion(const MatrixBase& other) { *this = other; } - - Quaternion& operator=(const Quaternion& other); - Quaternion& operator=(const AngleAxisType& aa); - template - Quaternion& operator=(const MatrixBase& m); - - /** \returns a quaternion representing an identity rotation - * \sa MatrixBase::Identity() - */ - inline static Quaternion Identity() { return Quaternion(1, 0, 0, 0); } - - /** \sa Quaternion::Identity(), MatrixBase::setIdentity() - */ - inline Quaternion& setIdentity() { m_coeffs << 0, 0, 0, 1; return *this; } - - /** \returns the squared norm of the quaternion's coefficients - * \sa Quaternion::norm(), MatrixBase::squaredNorm() - */ - inline Scalar squaredNorm() const { return m_coeffs.squaredNorm(); } - - /** \returns the norm of the quaternion's coefficients - * \sa Quaternion::squaredNorm(), MatrixBase::norm() - */ - inline Scalar norm() const { return m_coeffs.norm(); } - - /** Normalizes the quaternion \c *this - * \sa normalized(), MatrixBase::normalize() */ - inline void normalize() { m_coeffs.normalize(); } - /** \returns a normalized version of \c *this - * \sa normalize(), MatrixBase::normalized() */ - inline Quaternion normalized() const { return Quaternion(m_coeffs.normalized()); } - - /** \returns the dot product of \c *this and \a other - * Geometrically speaking, the dot product of two unit quaternions - * corresponds to the cosine of half the angle between the two rotations. - * \sa angularDistance() - */ - inline Scalar dot(const Quaternion& other) const { return m_coeffs.dot(other.m_coeffs); } - - inline Scalar angularDistance(const Quaternion& other) const; - - Matrix3 toRotationMatrix(void) const; - - template - Quaternion& setFromTwoVectors(const MatrixBase& a, const MatrixBase& b); - - inline Quaternion operator* (const Quaternion& q) const; - inline Quaternion& operator*= (const Quaternion& q); - - Quaternion inverse(void) const; - Quaternion conjugate(void) const; - - Quaternion slerp(Scalar t, const Quaternion& other) const; - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename ei_cast_return_type >::type cast() const - { return typename ei_cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Quaternion(const Quaternion& other) - { m_coeffs = other.coeffs().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Quaternion& other, typename NumTraits::Real prec = precision()) const - { return m_coeffs.isApprox(other.m_coeffs, prec); } - - Vector3 _transformVector(Vector3 v) const; - -protected: - Coefficients m_coeffs; -}; - -/** \ingroup Geometry_Module - * single precision quaternion type */ -typedef Quaternion Quaternionf; -/** \ingroup Geometry_Module - * double precision quaternion type */ -typedef Quaternion Quaterniond; - -// Generic Quaternion * Quaternion product -template inline Quaternion -ei_quaternion_product(const Quaternion& a, const Quaternion& b) -{ - return Quaternion - ( - a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(), - a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(), - a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(), - a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x() - ); -} - -/** \returns the concatenation of two rotations as a quaternion-quaternion product */ -template -inline Quaternion Quaternion::operator* (const Quaternion& other) const -{ - return ei_quaternion_product(*this,other); -} - -/** \sa operator*(Quaternion) */ -template -inline Quaternion& Quaternion::operator*= (const Quaternion& other) -{ - return (*this = *this * other); -} - -/** Rotation of a vector by a quaternion. - * \remarks If the quaternion is used to rotate several points (>1) - * then it is much more efficient to first convert it to a 3x3 Matrix. - * Comparison of the operation cost for n transformations: - * - Quaternion: 30n - * - Via a Matrix3: 24 + 15n - */ -template -inline typename Quaternion::Vector3 -Quaternion::_transformVector(Vector3 v) const -{ - // Note that this algorithm comes from the optimization by hand - // of the conversion to a Matrix followed by a Matrix/Vector product. - // It appears to be much faster than the common algorithm found - // in the litterature (30 versus 39 flops). It also requires two - // Vector3 as temporaries. - Vector3 uv = Scalar(2) * this->vec().cross(v); - return v + this->w() * uv + this->vec().cross(uv); -} - -template -inline Quaternion& Quaternion::operator=(const Quaternion& other) -{ - m_coeffs = other.m_coeffs; - return *this; -} - -/** Set \c *this from an angle-axis \a aa and returns a reference to \c *this - */ -template -inline Quaternion& Quaternion::operator=(const AngleAxisType& aa) -{ - Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings - this->w() = ei_cos(ha); - this->vec() = ei_sin(ha) * aa.axis(); - return *this; -} - -/** Set \c *this from the expression \a xpr: - * - if \a xpr is a 4x1 vector, then \a xpr is assumed to be a quaternion - * - if \a xpr is a 3x3 matrix, then \a xpr is assumed to be rotation matrix - * and \a xpr is converted to a quaternion - */ -template -template -inline Quaternion& Quaternion::operator=(const MatrixBase& xpr) -{ - ei_quaternion_assign_impl::run(*this, xpr.derived()); - return *this; -} - -/** Convert the quaternion to a 3x3 rotation matrix. The quaternion is required to - * be normalized, otherwise the result is undefined. - */ -template -inline typename Quaternion::Matrix3 -Quaternion::toRotationMatrix(void) const -{ - // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!) - // if not inlined then the cost of the return by value is huge ~ +35%, - // however, not inlining this function is an order of magnitude slower, so - // it has to be inlined, and so the return by value is not an issue - Matrix3 res; - - const Scalar tx = 2*this->x(); - const Scalar ty = 2*this->y(); - const Scalar tz = 2*this->z(); - const Scalar twx = tx*this->w(); - const Scalar twy = ty*this->w(); - const Scalar twz = tz*this->w(); - const Scalar txx = tx*this->x(); - const Scalar txy = ty*this->x(); - const Scalar txz = tz*this->x(); - const Scalar tyy = ty*this->y(); - const Scalar tyz = tz*this->y(); - const Scalar tzz = tz*this->z(); - - res.coeffRef(0,0) = 1-(tyy+tzz); - res.coeffRef(0,1) = txy-twz; - res.coeffRef(0,2) = txz+twy; - res.coeffRef(1,0) = txy+twz; - res.coeffRef(1,1) = 1-(txx+tzz); - res.coeffRef(1,2) = tyz-twx; - res.coeffRef(2,0) = txz-twy; - res.coeffRef(2,1) = tyz+twx; - res.coeffRef(2,2) = 1-(txx+tyy); - - return res; -} - -/** Sets \c *this to be a quaternion representing a rotation between - * the two arbitrary vectors \a a and \a b. In other words, the built - * rotation represent a rotation sending the line of direction \a a - * to the line of direction \a b, both lines passing through the origin. - * - * \returns a reference to \c *this. - * - * Note that the two input vectors do \b not have to be normalized, and - * do not need to have the same norm. - */ -template -template -inline Quaternion& Quaternion::setFromTwoVectors(const MatrixBase& a, const MatrixBase& b) -{ - Vector3 v0 = a.normalized(); - Vector3 v1 = b.normalized(); - Scalar c = v1.dot(v0); - - // if dot == -1, vectors are nearly opposites - // => accuraletly compute the rotation axis by computing the - // intersection of the two planes. This is done by solving: - // x^T v0 = 0 - // x^T v1 = 0 - // under the constraint: - // ||x|| = 1 - // which yields a singular value problem - if (c < Scalar(-1)+precision()) - { - c = std::max(c,-1); - Matrix m; m << v0.transpose(), v1.transpose(); - SVD > svd(m); - Vector3 axis = svd.matrixV().col(2); - - Scalar w2 = (Scalar(1)+c)*Scalar(0.5); - this->w() = ei_sqrt(w2); - this->vec() = axis * ei_sqrt(Scalar(1) - w2); - return *this; - } - Vector3 axis = v0.cross(v1); - Scalar s = ei_sqrt((Scalar(1)+c)*Scalar(2)); - Scalar invs = Scalar(1)/s; - this->vec() = axis * invs; - this->w() = s * Scalar(0.5); - - return *this; -} - -/** \returns the multiplicative inverse of \c *this - * Note that in most cases, i.e., if you simply want the opposite rotation, - * and/or the quaternion is normalized, then it is enough to use the conjugate. - * - * \sa Quaternion::conjugate() - */ -template -inline Quaternion Quaternion::inverse() const -{ - // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ?? - Scalar n2 = this->squaredNorm(); - if (n2 > 0) - return Quaternion(conjugate().coeffs() / n2); - else - { - // return an invalid result to flag the error - return Quaternion(Coefficients::Zero()); - } -} - -/** \returns the conjugate of the \c *this which is equal to the multiplicative inverse - * if the quaternion is normalized. - * The conjugate of a quaternion represents the opposite rotation. - * - * \sa Quaternion::inverse() - */ -template -inline Quaternion Quaternion::conjugate() const -{ - return Quaternion(this->w(),-this->x(),-this->y(),-this->z()); -} - -/** \returns the angle (in radian) between two rotations - * \sa dot() - */ -template -inline Scalar Quaternion::angularDistance(const Quaternion& other) const -{ - double d = ei_abs(this->dot(other)); - if (d>=1.0) - return 0; - return Scalar(2) * std::acos(d); -} - -/** \returns the spherical linear interpolation between the two quaternions - * \c *this and \a other at the parameter \a t - */ -template -Quaternion Quaternion::slerp(Scalar t, const Quaternion& other) const -{ - static const Scalar one = Scalar(1) - precision(); - Scalar d = this->dot(other); - Scalar absD = ei_abs(d); - if (absD>=one) - return *this; - - // theta is the angle between the 2 quaternions - Scalar theta = std::acos(absD); - Scalar sinTheta = ei_sin(theta); - - Scalar scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta; - Scalar scale1 = ei_sin( ( t * theta) ) / sinTheta; - if (d<0) - scale1 = -scale1; - - return Quaternion(scale0 * m_coeffs + scale1 * other.m_coeffs); -} - -// set from a rotation matrix -template -struct ei_quaternion_assign_impl -{ - typedef typename Other::Scalar Scalar; - inline static void run(Quaternion& q, const Other& mat) - { - // This algorithm comes from "Quaternion Calculus and Fast Animation", - // Ken Shoemake, 1987 SIGGRAPH course notes - Scalar t = mat.trace(); - if (t > 0) - { - t = ei_sqrt(t + Scalar(1.0)); - q.w() = Scalar(0.5)*t; - t = Scalar(0.5)/t; - q.x() = (mat.coeff(2,1) - mat.coeff(1,2)) * t; - q.y() = (mat.coeff(0,2) - mat.coeff(2,0)) * t; - q.z() = (mat.coeff(1,0) - mat.coeff(0,1)) * t; - } - else - { - int i = 0; - if (mat.coeff(1,1) > mat.coeff(0,0)) - i = 1; - if (mat.coeff(2,2) > mat.coeff(i,i)) - i = 2; - int j = (i+1)%3; - int k = (j+1)%3; - - t = ei_sqrt(mat.coeff(i,i)-mat.coeff(j,j)-mat.coeff(k,k) + Scalar(1.0)); - q.coeffs().coeffRef(i) = Scalar(0.5) * t; - t = Scalar(0.5)/t; - q.w() = (mat.coeff(k,j)-mat.coeff(j,k))*t; - q.coeffs().coeffRef(j) = (mat.coeff(j,i)+mat.coeff(i,j))*t; - q.coeffs().coeffRef(k) = (mat.coeff(k,i)+mat.coeff(i,k))*t; - } - } -}; - -// set from a vector of coefficients assumed to be a quaternion -template -struct ei_quaternion_assign_impl -{ - typedef typename Other::Scalar Scalar; - inline static void run(Quaternion& q, const Other& vec) - { - q.coeffs() = vec; - } -}; - -/*################################################################### - QuaternionBase and Map and Quat - ###################################################################*/ - template struct ei_quaternionbase_assign_impl; -template class Quat; // [XXX] => remove when Quat becomes Quaternion +template class Quaternion; // [XXX] => remove when Quaternion becomes Quaternion template struct ei_traits > @@ -528,7 +65,8 @@ struct ei_traits > }; template -class QuaternionBase : public RotationBase { +class QuaternionBase : public RotationBase +{ typedef RotationBase Base; public: using Base::operator*; @@ -563,10 +101,10 @@ public: inline Scalar& w() { return this->derived().coeffs().coeffRef(3); } /** \returns a read-only vector expression of the imaginary part (x,y,z) */ - inline const VectorBlock::Coefficients,3,1> vec() const { return this->derived().coeffs().template start<3>(); } + inline const VectorBlock::Coefficients,3> vec() const { return this->derived().coeffs().template start<3>(); } /** \returns a vector expression of the imaginary part (x,y,z) */ - inline VectorBlock::Coefficients,3,1> vec() { return this->derived().coeffs().template start<3>(); } + inline VectorBlock::Coefficients,3> vec() { return this->derived().coeffs().template start<3>(); } /** \returns a read-only vector expression of the coefficients (x,y,z,w) */ inline const typename ei_traits::Coefficients& coeffs() const { return this->derived().coeffs(); } @@ -582,7 +120,7 @@ public: /** \returns a quaternion representing an identity rotation * \sa MatrixBase::Identity() */ - inline static Quat Identity() { return Quat(1, 0, 0, 0); } + inline static Quaternion Identity() { return Quaternion(1, 0, 0, 0); } /** \sa Quaternion2::Identity(), MatrixBase::setIdentity() */ @@ -603,7 +141,7 @@ public: inline void normalize() { coeffs().normalize(); } /** \returns a normalized version of \c *this * \sa normalize(), MatrixBase::normalized() */ - inline Quat normalized() const { return Quat(coeffs().normalized()); } + inline Quaternion normalized() const { return Quaternion(coeffs().normalized()); } /** \returns the dot product of \c *this and \a other * Geometrically speaking, the dot product of two unit quaternions @@ -619,29 +157,38 @@ public: template QuaternionBase& setFromTwoVectors(const MatrixBase& a, const MatrixBase& b); - template inline Quat operator* (const QuaternionBase& q) const; + template inline Quaternion operator* (const QuaternionBase& q) const; template inline QuaternionBase& operator*= (const QuaternionBase& q); - Quat inverse(void) const; - Quat conjugate(void) const; + Quaternion inverse(void) const; + Quaternion conjugate(void) const; - template Quat slerp(Scalar t, const QuaternionBase& other) const; + template Quaternion slerp(Scalar t, const QuaternionBase& other) const; /** \returns \c true if \c *this is approximately equal to \a other, within the precision * determined by \a prec. * * \sa MatrixBase::isApprox() */ - bool isApprox(const QuaternionBase& other, typename RealScalar prec = precision()) const + bool isApprox(const QuaternionBase& other, RealScalar prec = precision()) const { return coeffs().isApprox(other.coeffs(), prec); } Vector3 _transformVector(Vector3 v) const; + /** \returns \c *this with scalar type casted to \a NewScalarType + * + * Note that if \a NewScalarType is equal to the current scalar type of \c *this + * then this function smartly returns a const reference to \c *this. + */ + template + inline typename ei_cast_return_type >::type cast() const + { + return typename ei_cast_return_type >::type( + coeffs().template cast()); + } }; -/* ########### Quat -> Quaternion */ - template -struct ei_traits > +struct ei_traits > { typedef _Scalar Scalar; typedef Matrix<_Scalar,4,1> Coefficients; @@ -651,18 +198,18 @@ struct ei_traits > }; template -class Quat : public QuaternionBase >{ - typedef QuaternionBase > Base; +class Quaternion : public QuaternionBase >{ + typedef QuaternionBase > Base; public: using Base::operator=; typedef _Scalar Scalar; - typedef typename ei_traits >::Coefficients Coefficients; + typedef typename ei_traits >::Coefficients Coefficients; typedef typename Base::AngleAxisType AngleAxisType; /** Default constructor leaving the quaternion uninitialized. */ - inline Quat() {} + inline Quaternion() {} /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from * its four coefficients \a w, \a x, \a y and \a z. @@ -671,38 +218,29 @@ public: * while internally the coefficients are stored in the following order: * [\c x, \c y, \c z, \c w] */ - inline Quat(Scalar w, Scalar x, Scalar y, Scalar z) + inline Quaternion(Scalar w, Scalar x, Scalar y, Scalar z) { coeffs() << x, y, z, w; } /** Constructs and initialize a quaternion from the array data * This constructor is also used to map an array */ - inline Quat(const Scalar* data) : m_coeffs(data) {} + inline Quaternion(const Scalar* data) : m_coeffs(data) {} /** Copy constructor */ -// template inline Quat(const QuaternionBase& other) { m_coeffs = other.coeffs(); } [XXX] redundant with 703 +// template inline Quaternion(const QuaternionBase& other) { m_coeffs = other.coeffs(); } [XXX] redundant with 703 /** Constructs and initializes a quaternion from the angle-axis \a aa */ - explicit inline Quat(const AngleAxisType& aa) { *this = aa; } + explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; } /** Constructs and initializes a quaternion from either: * - a rotation matrix expression, * - a 4D vector expression representing quaternion coefficients. */ template - explicit inline Quat(const MatrixBase& other) { *this = other; } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename ei_cast_return_type >::type cast() const - { return typename ei_cast_return_type >::type(*this); } + explicit inline Quaternion(const MatrixBase& other) { *this = other; } /** Copy constructor with scalar type conversion */ template - inline explicit Quat(const QuaternionBase& other) + inline explicit Quaternion(const QuaternionBase& other) { m_coeffs = other.coeffs().template cast(); } inline Coefficients& coeffs() { return m_coeffs;} @@ -712,9 +250,9 @@ protected: Coefficients m_coeffs; }; -/* ########### Map */ +/* ########### Map */ -/** \class Map +/** \class Map * \nonstableyet * * \brief Expression of a quaternion @@ -724,8 +262,8 @@ protected: * \sa class Quaternion, class QuaternionBase */ template -struct ei_traits, _PacketAccess> >: -ei_traits > +struct ei_traits, _PacketAccess> >: +ei_traits > { typedef _Scalar Scalar; typedef Map > Coefficients; @@ -735,14 +273,14 @@ ei_traits > }; template -class Map, PacketAccess > : public QuaternionBase, PacketAccess> >, ei_no_assignment_operator { +class Map, PacketAccess > : public QuaternionBase, PacketAccess> >, ei_no_assignment_operator { public: typedef _Scalar Scalar; - typedef typename ei_traits, PacketAccess> >::Coefficients Coefficients; + typedef typename ei_traits, PacketAccess> >::Coefficients Coefficients; - inline Map, PacketAccess >(const Scalar* coeffs) : m_coeffs(coeffs) {} + inline Map, PacketAccess >(const Scalar* coeffs) : m_coeffs(coeffs) {} inline Coefficients& coeffs() { return m_coeffs;} inline const Coefficients& coeffs() const { return m_coeffs;} @@ -751,16 +289,16 @@ class Map, PacketAccess > : public QuaternionBase > QuaternionMapd; -typedef Map > QuaternionMapf; -typedef Map, Aligned> QuaternionMapAlignedd; -typedef Map, Aligned> QuaternionMapAlignedf; +typedef Map > QuaternionMapd; +typedef Map > QuaternionMapf; +typedef Map, Aligned> QuaternionMapAlignedd; +typedef Map, Aligned> QuaternionMapAlignedf; // Generic Quaternion * Quaternion product template struct ei_quat_product { - inline static Quat run(const QuaternionBase& a, const QuaternionBase& b){ - return Quat + inline static Quaternion run(const QuaternionBase& a, const QuaternionBase& b){ + return Quaternion ( a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(), a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(), @@ -773,12 +311,12 @@ template template -inline Quat >::Scalar> QuaternionBase::operator* (const QuaternionBase& other) const +inline Quaternion >::Scalar> QuaternionBase::operator* (const QuaternionBase& other) const { EIGEN_STATIC_ASSERT((ei_is_same_type::ret), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) return ei_quat_product::Scalar, + typename ei_traits::Scalar, ei_traits::PacketAccess && ei_traits::PacketAccess>::run(*this, other); } @@ -938,16 +476,16 @@ inline QuaternionBase& QuaternionBase::setFromTwoVectors(const * \sa Quaternion2::conjugate() */ template -inline Quat >::Scalar> QuaternionBase::inverse() const +inline Quaternion >::Scalar> QuaternionBase::inverse() const { // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ?? Scalar n2 = this->squaredNorm(); if (n2 > 0) - return Quat(conjugate().coeffs() / n2); + return Quaternion(conjugate().coeffs() / n2); else { // return an invalid result to flag the error - return Quat(ei_traits::Coefficients::Zero()); + return Quaternion(ei_traits::Coefficients::Zero()); } } @@ -958,9 +496,9 @@ inline Quat >::Scalar> QuaternionBase * \sa Quaternion2::inverse() */ template -inline Quat >::Scalar> QuaternionBase::conjugate() const +inline Quaternion >::Scalar> QuaternionBase::conjugate() const { - return Quat(this->w(),-this->x(),-this->y(),-this->z()); + return Quaternion(this->w(),-this->x(),-this->y(),-this->z()); } /** \returns the angle (in radian) between two rotations @@ -981,13 +519,13 @@ inline typename ei_traits >::Scalar QuaternionBase template -Quat >::Scalar> QuaternionBase::slerp(Scalar t, const QuaternionBase& other) const +Quaternion >::Scalar> QuaternionBase::slerp(Scalar t, const QuaternionBase& other) const { static const Scalar one = Scalar(1) - precision(); Scalar d = this->dot(other); Scalar absD = ei_abs(d); if (absD>=one) - return Quat(*this); + return Quaternion(*this); // theta is the angle between the 2 quaternions Scalar theta = std::acos(absD); @@ -998,7 +536,7 @@ Quat >::Scalar> QuaternionBase(scale0 * coeffs() + scale1 * other.coeffs()); + return Quaternion(scale0 * coeffs() + scale1 * other.coeffs()); } // set from a rotation matrix diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index c608e4843..1b8f6aead 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -26,31 +26,12 @@ #ifndef EIGEN_GEOMETRY_SSE_H #define EIGEN_GEOMETRY_SSE_H -template<> inline Quaternion -ei_quaternion_product(const Quaternion& _a, const Quaternion& _b) -{ - const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000)); - Quaternion res; - __m128 a = _a.coeffs().packet(0); - __m128 b = _b.coeffs().packet(0); - __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2), - ei_vec4f_swizzle1(b,2,0,1,2)),mask); - __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1), - ei_vec4f_swizzle1(b,0,1,2,1)),mask); - ei_pstore(&res.x(), - _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)), - _mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0), - ei_vec4f_swizzle1(b,1,2,0,0))), - _mm_add_ps(flip1,flip2))); - return res; -} - template struct ei_quat_product { - inline static Quat run(const QuaternionBase& _a, const QuaternionBase& _b) + inline static Quaternion run(const QuaternionBase& _a, const QuaternionBase& _b) { const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000)); - Quat res; + Quaternion res; __m128 a = _a.coeffs().packet(0); __m128 b = _b.coeffs().packet(0); __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2), From 288ba155f1821a31deb70611321082083050239a Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 28 Oct 2009 23:05:18 -0400 Subject: [PATCH 46/72] forgot Complex test file --- unsupported/test/Complex.cpp | 72 ++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 unsupported/test/Complex.cpp diff --git a/unsupported/test/Complex.cpp b/unsupported/test/Complex.cpp new file mode 100644 index 000000000..6360b58e4 --- /dev/null +++ b/unsupported/test/Complex.cpp @@ -0,0 +1,72 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . +#ifdef EIGEN_TEST_FUNC +#include "main.h" +#else +#include + +#define CALL_SUBTEST(x) x +#define VERIFY(x) x +#define test_Complex main +#endif + +#include +#include + +using namespace std; +using namespace Eigen; + +template +void take_std( std::complex * dst, int n ) +{ + cout << dst[n-1] << endl; +} + + +template +void syntax() +{ + vector< Complex > a; + a.resize( 9 ); + //Complex a[9]; + Complex b[9]; + + std::complex * pa = &a[0]; // this works fine + std::complex * pb = &b[0]; // this works fine + //VERIFY() + // this does not compile: + // take_std( &a[0] , a.size() ); + + take_std( pa,9); + take_std( pb,9); + //take_std( static_cast *>( &a[0] ) , a.size() ); + //take_std( &b[0] , 9 ); +} + +int test_Complex() +{ + CALL_SUBTEST( syntax() ); + //CALL_SUBTEST( syntax() ); + //CALL_SUBTEST( syntax() ); +} From 7911df4b6e766ecdc57ccbc233e2b454288c5b51 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 28 Oct 2009 23:22:10 -0400 Subject: [PATCH 47/72] improved selftest for Eigen::Complex -- mainly a documentation of what does not work --- unsupported/Eigen/Complex | 14 ++++++++++- unsupported/test/Complex.cpp | 49 ++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex index 065c0305d..04228c95a 100644 --- a/unsupported/Eigen/Complex +++ b/unsupported/Eigen/Complex @@ -210,8 +210,20 @@ operator<< (std::basic_ostream& ostr, const Complex& rhs) template Complex sqrt (const Complex&x){return sqrt(ei_to_std(x));} template Complex tan (const Complex&x){return tan(ei_to_std(x));} template Complex tanh (const Complex&x){return tanh(ei_to_std(x));} -} + template struct NumTraits > + { + typedef _Real Real; + typedef Complex<_Real> FloatingPoint; + enum { + IsComplex = 1, + HasFloatingPoint = NumTraits::HasFloatingPoint, + ReadCost = 2, + AddCost = 2 * NumTraits::AddCost, + MulCost = 4 * NumTraits::MulCost + 2 * NumTraits::AddCost + }; + }; +} #endif /* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/unsupported/test/Complex.cpp b/unsupported/test/Complex.cpp index 6360b58e4..bedeb9f27 100644 --- a/unsupported/test/Complex.cpp +++ b/unsupported/test/Complex.cpp @@ -22,13 +22,12 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifdef EIGEN_TEST_FUNC -#include "main.h" +# include "main.h" #else -#include - -#define CALL_SUBTEST(x) x -#define VERIFY(x) x -#define test_Complex main +# include +# define CALL_SUBTEST(x) x +# define VERIFY(x) x +# define test_Complex main #endif #include @@ -47,26 +46,32 @@ void take_std( std::complex * dst, int n ) template void syntax() { - vector< Complex > a; - a.resize( 9 ); - //Complex a[9]; - Complex b[9]; - - std::complex * pa = &a[0]; // this works fine - std::complex * pb = &b[0]; // this works fine - //VERIFY() - // this does not compile: - // take_std( &a[0] , a.size() ); - + // this works fine + Matrix< Complex, 9, 1> a; + std::complex * pa = &a[0]; + Complex * pa2 = &a[0]; take_std( pa,9); + + // this does not work, but I wish it would + // take_std(&a[0];) + // this does + take_std( (std::complex *)&a[0],9); + + // this does not work, but it would be really nice + //vector< Complex > a; + // (on my gcc 4.4.1 ) + // std::vector assumes operator& returns a POD pointer + + // this works fine + Complex b[9]; + std::complex * pb = &b[0]; // this works fine + take_std( pb,9); - //take_std( static_cast *>( &a[0] ) , a.size() ); - //take_std( &b[0] , 9 ); } -int test_Complex() +void test_Complex() { CALL_SUBTEST( syntax() ); - //CALL_SUBTEST( syntax() ); - //CALL_SUBTEST( syntax() ); + CALL_SUBTEST( syntax() ); + CALL_SUBTEST( syntax() ); } From 541eac0828d8ebbacc424f7b526dfb7c24c5dc91 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 29 Oct 2009 14:22:02 +0100 Subject: [PATCH 48/72] fix #65: MatrixBase::nonZero() --- Eigen/src/Core/MatrixBase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 0e8b49f75..2691fdecd 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -177,7 +177,7 @@ template class MatrixBase inline int diagonalSize() const { return std::min(rows(),cols()); } /** \returns the number of nonzero coefficients which is in practice the number * of stored coefficients. */ - inline int nonZeros() const { return derived().nonZeros(); } + inline int nonZeros() const { return size(); } /** \returns true if either the number of rows or the number of columns is equal to 1. * In other words, this function returns * \code rows()==1 || cols()==1 \endcode From e513cc75c4661d490ccb425068740458f3483961 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 29 Oct 2009 14:24:09 +0100 Subject: [PATCH 49/72] oops I forgot to include that file in the previous commit (fixing #65) --- Eigen/src/Core/Transpose.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 8527edc2a..990aa3807 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -69,7 +69,6 @@ template class Transpose inline int rows() const { return m_matrix.cols(); } inline int cols() const { return m_matrix.rows(); } - inline int nonZeros() const { return m_matrix.nonZeros(); } inline int stride() const { return m_matrix.stride(); } inline Scalar* data() { return m_matrix.data(); } inline const Scalar* data() const { return m_matrix.data(); } @@ -354,5 +353,5 @@ lazyAssign(const CwiseBinaryOp,DerivedA,CwiseUnaryOp,DerivedA,CwiseUnaryOp, NestByValue > > > >& >(other)); } #endif - + #endif // EIGEN_TRANSPOSE_H From c70a603e342e486f6c534f9fa3fc2c5d5f3aa7f8 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Thu, 29 Oct 2009 19:56:58 +0100 Subject: [PATCH 50/72] added mean() reduction --- Eigen/src/Array/VectorwiseOp.h | 8 ++++++++ Eigen/src/Core/MatrixBase.h | 3 ++- Eigen/src/Core/Redux.h | 15 +++++++++++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Array/VectorwiseOp.h b/Eigen/src/Array/VectorwiseOp.h index 4cb0083fa..6f6c2c8a7 100644 --- a/Eigen/src/Array/VectorwiseOp.h +++ b/Eigen/src/Array/VectorwiseOp.h @@ -122,6 +122,7 @@ class PartialReduxExpr : ei_no_assignment_operator, EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits::MulCost + (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits::MulCost + (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits::AddCost); +EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits::AddCost); @@ -297,6 +298,13 @@ template class VectorwiseOp const typename ReturnType::Type sum() const { return _expression(); } + /** \returns a row (or column) vector expression of the mean + * of each column (or row) of the referenced expression. + * + * \sa MatrixBase::mean() */ + const typename ReturnType::Type mean() const + { return _expression(); } + /** \returns a row (or column) vector expression representing * whether \b all coefficients of each respective column (or row) are \c true. * diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 2691fdecd..cb1589f85 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -645,8 +645,9 @@ template class MatrixBase const CwiseBinaryOp binaryExpr(const MatrixBase &other, const CustomBinaryOp& func = CustomBinaryOp()) const; - + Scalar sum() const; + Scalar mean() const; Scalar trace() const; Scalar prod() const; diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 0df095750..9f796157a 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -342,7 +342,7 @@ MatrixBase::maxCoeff() const /** \returns the sum of all coefficients of *this * - * \sa trace(), prod() + * \sa trace(), prod(), mean() */ template EIGEN_STRONG_INLINE typename ei_traits::Scalar @@ -351,12 +351,23 @@ MatrixBase::sum() const return this->redux(Eigen::ei_scalar_sum_op()); } +/** \returns the mean of all coefficients of *this +* +* \sa trace(), prod(), sum() +*/ +template +EIGEN_STRONG_INLINE typename ei_traits::Scalar +MatrixBase::mean() const +{ + return this->redux(Eigen::ei_scalar_sum_op()) / this->size(); +} + /** \returns the product of all coefficients of *this * * Example: \include MatrixBase_prod.cpp * Output: \verbinclude MatrixBase_prod.out * - * \sa sum() + * \sa sum(), mean(), trace() */ template EIGEN_STRONG_INLINE typename ei_traits::Scalar From d0562bd47392f94abe98b1bc377a4647af21257c Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Thu, 29 Oct 2009 19:58:54 +0100 Subject: [PATCH 51/72] corrected the computation cost of mean --- Eigen/src/Array/VectorwiseOp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Array/VectorwiseOp.h b/Eigen/src/Array/VectorwiseOp.h index 6f6c2c8a7..880567212 100644 --- a/Eigen/src/Array/VectorwiseOp.h +++ b/Eigen/src/Array/VectorwiseOp.h @@ -122,7 +122,7 @@ class PartialReduxExpr : ei_no_assignment_operator, EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits::MulCost + (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits::MulCost + (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits::AddCost); -EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits::AddCost); +EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits::AddCost + NumTraits::MulCost); EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits::AddCost); From a2268ca6b3b3dc132c984c6e7eacf13af3ef3428 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Thu, 29 Oct 2009 15:47:56 -0400 Subject: [PATCH 52/72] properly implement BenchTimer on POSIX (may require a platform check for the clock name on non-linux platforms) --- bench/BenchTimer.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h index c1f473597..70173427f 100644 --- a/bench/BenchTimer.h +++ b/bench/BenchTimer.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2009 Benoit Jacob // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -27,7 +27,7 @@ #define EIGEN_BENCH_TIMER_H #ifndef WIN32 -#include +#include #include #else #define NOMINMAX @@ -41,6 +41,11 @@ namespace Eigen { /** Elapsed time timer keeping the best try. + * + * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID. + * On Windows we use QueryPerformanceCounter + * + * Important: on linux, you must link with -lrt */ class BenchTimer { @@ -83,10 +88,9 @@ public: QueryPerformanceCounter(&query_ticks); return query_ticks.QuadPart/m_frequency; #else - struct timeval tv; - struct timezone tz; - gettimeofday(&tv, &tz); - return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; + timespec ts; + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); + return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec); #endif } From 0fa68b9e50ba8b9e7eca9127ba5b10262e2e25d9 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Fri, 30 Oct 2009 19:46:45 -0400 Subject: [PATCH 53/72] switched to BenchUtil.h --- bench/benchFFT.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index fccc02a6c..3104929ba 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -22,10 +22,10 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . +#include #include #include #include -#include #include From a26b729cc9784eba82a5951be0e7738c1d209484 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Fri, 30 Oct 2009 19:50:11 -0400 Subject: [PATCH 54/72] moved scaling to Eigen::FFT --- unsupported/Eigen/FFT | 73 +++++++++++++++++---- unsupported/Eigen/src/FFT/ei_fftw_impl.h | 11 ---- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 10 --- 3 files changed, 61 insertions(+), 33 deletions(-) diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 36afdde8d..e2705abf6 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef EIGEN_FFTW_DEFAULT // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size @@ -65,10 +66,31 @@ class FFT typedef typename impl_type::Scalar Scalar; typedef typename impl_type::Complex Complex; - FFT(const impl_type & impl=impl_type() ) :m_impl(impl) { } + enum Flag { + Default=0, // goof proof + Unscaled=1, + HalfSpectrum=2, + // SomeOtherSpeedOptimization=4 + Speedy=32767 + }; - template - void fwd( Complex * dst, const _Input * src, int nfft) + FFT( const impl_type & impl=impl_type() , Flag flags=Default ) :m_impl(impl),m_flag(flags) { } + + inline + bool HasFlag(Flag f) const { return (m_flag & (int)f) == f;} + + inline + void SetFlag(Flag f) { m_flag |= (int)f;} + + inline + void ClearFlag(Flag f) { m_flag &= (~(int)f);} + + void fwd( Complex * dst, const Scalar * src, int nfft) + { + m_impl.fwd(dst,src,nfft); + } + + void fwd( Complex * dst, const Complex * src, int nfft) { m_impl.fwd(dst,src,nfft); } @@ -76,8 +98,11 @@ class FFT template void fwd( std::vector & dst, const std::vector<_Input> & src) { - dst.resize( src.size() ); - fwd( &dst[0],&src[0],src.size() ); + if ( NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum) ) + dst.resize( (src.size()>>1)+1); + else + dst.resize(src.size()); + fwd(&dst[0],&src[0],src.size()); } template @@ -94,17 +119,18 @@ class FFT fwd( &dst[0],&src[0],src.size() ); } - template - void inv( _Output * dst, const Complex * src, int nfft) + void inv( Complex * dst, const Complex * src, int nfft) { m_impl.inv( dst,src,nfft ); + if ( HasFlag( Unscaled ) == false) + scale(dst,1./nfft,nfft); } - template - void inv( std::vector<_Output> & dst, const std::vector & src) + void inv( Scalar * dst, const Complex * src, int nfft) { - dst.resize( src.size() ); - inv( &dst[0],&src[0],src.size() ); + m_impl.inv( dst,src,nfft ); + if ( HasFlag( Unscaled ) == false) + scale(dst,1./nfft,nfft); } template @@ -117,10 +143,24 @@ class FFT YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) - dst.derived().resize( src.size() ); + + int nfft = src.size(); + int nout = HasFlag(HalfSpectrum) ? ((nfft>>1)+1) : nfft; + dst.derived().resize( nout ); inv( &dst[0],&src[0],src.size() ); } + template + void inv( std::vector<_Output> & dst, const std::vector & src) + { + if ( NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum) ) + dst.resize( 2*(src.size()-1) ); + else + dst.resize( src.size() ); + inv( &dst[0],&src[0],dst.size() ); + } + + // TODO: multi-dimensional FFTs // TODO: handle Eigen MatrixBase @@ -128,7 +168,16 @@ class FFT impl_type & impl() {return m_impl;} private: + + template + void scale(_It x,_Val s,int nx) + { + for (int k=0;k(dst), &m_tmpBuf[0], 1,1); } } @@ -403,12 +401,4 @@ } return &twidref[0]; } - - // TODO move scaling up into Eigen::FFT - inline - void scale(Complex *dst,int n,Scalar s) - { - for (int k=0;k Date: Fri, 30 Oct 2009 20:26:30 -0400 Subject: [PATCH 55/72] moved real-half-spectrum reflection into Eigen::FFT --- unsupported/Eigen/FFT | 32 ++++++++++++++------- unsupported/Eigen/src/FFT/ei_fftw_impl.h | 3 -- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 2 -- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index e2705abf6..b1d3d9f0e 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -88,6 +88,8 @@ class FFT void fwd( Complex * dst, const Scalar * src, int nfft) { m_impl.fwd(dst,src,nfft); + if ( HasFlag(HalfSpectrum) == false) + ReflectSpectrum(dst,nfft); } void fwd( Complex * dst, const Complex * src, int nfft) @@ -108,15 +110,19 @@ class FFT template void fwd( MatrixBase & dst, const MatrixBase & src) { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,InputDerived) // size at compile-time - EIGEN_STATIC_ASSERT((ei_is_same_type::ret), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) - dst.derived().resize( src.size() ); - fwd( &dst[0],&src[0],src.size() ); + EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,InputDerived) // size at compile-time + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) + + if ( NumTraits< typename InputDerived::Scalar >::IsComplex == 0 && HasFlag(HalfSpectrum) ) + dst.derived().resize( (src.size()>>1)+1); + else + dst.derived().resize(src.size()); + fwd( &dst[0],&src[0],src.size() ); } void inv( Complex * dst, const Complex * src, int nfft) @@ -160,7 +166,6 @@ class FFT inv( &dst[0],&src[0],dst.size() ); } - // TODO: multi-dimensional FFTs // TODO: handle Eigen MatrixBase @@ -176,6 +181,13 @@ class FFT *x++ *= s; } + void ReflectSpectrum(Complex * freq,int nfft) + { + int nhbins=(nfft>>1)+1; + for (int k=nhbins;k < nfft; ++k ) + freq[k] = conj(freq[nfft-k]); + } + impl_type m_impl; int m_flag; }; diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h index cdb5861e8..18473a29b 100644 --- a/unsupported/Eigen/src/FFT/ei_fftw_impl.h +++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h @@ -177,9 +177,6 @@ void fwd( Complex * dst,const Scalar * src,int nfft) { get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft); - int nhbins=(nfft>>1)+1; - for (int k=nhbins;k < nfft; ++k ) - dst[k] = conj(dst[nfft-k]); } // inverse complex-to-complex diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 86ec7a1ca..859e7e6c9 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -322,8 +322,6 @@ // place conjugate-symmetric half at the end for completeness // TODO: make this configurable ( opt-out ) - for ( k=1;k < ncfft ; ++k ) - dst[nfft-k] = conj(dst[k]); dst[0] = dc; dst[ncfft] = nyquist; } From 4c3345364e079429dcfc17da63364ee75b9c0636 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Fri, 30 Oct 2009 23:38:13 -0400 Subject: [PATCH 56/72] moved half-spectrum logic to Eigen::FFT --- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 39 +++++++++++---------- unsupported/test/FFT.cpp | 35 ++++++++++++++++++ 2 files changed, 56 insertions(+), 18 deletions(-) diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 859e7e6c9..091e730d1 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -283,12 +283,11 @@ m_realTwiddles.clear(); } - template inline - void fwd( Complex * dst,const _Src *src,int nfft) - { - get_plan(nfft,false).work(0, dst, src, 1,1); - } + void fwd( Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,false).work(0, dst, src, 1,1); + } // real-to-complex forward FFT // perform two FFTs of src even and src odd @@ -299,7 +298,9 @@ { if ( nfft&3 ) { // use generic mode for odd - get_plan(nfft,false).work(0, dst, src, 1,1); + m_tmpBuf1.resize(nfft); + get_plan(nfft,false).work(0, &m_tmpBuf1[0], src, 1,1); + std::copy(m_tmpBuf1.begin(),m_tmpBuf1.begin()+(nfft>>1)+1,dst ); }else{ int ncfft = nfft>>1; int ncfft2 = nfft>>2; @@ -319,9 +320,6 @@ dst[k] = (f1k + tw) * Scalar(.5); dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); } - - // place conjugate-symmetric half at the end for completeness - // TODO: make this configurable ( opt-out ) dst[0] = dc; dst[ncfft] = nyquist; } @@ -339,27 +337,31 @@ void inv( Scalar * dst,const Complex * src,int nfft) { if (nfft&3) { - m_tmpBuf.resize(nfft); - inv(&m_tmpBuf[0],src,nfft); + m_tmpBuf1.resize(nfft); + m_tmpBuf2.resize(nfft); + std::copy(src,src+(nfft>>1)+1,m_tmpBuf1.begin() ); + for (int k=1;k<(nfft>>1)+1;++k) + m_tmpBuf1[nfft-k] = conj(m_tmpBuf1[k]); + inv(&m_tmpBuf2[0],&m_tmpBuf1[0],nfft); for (int k=0;k>1; int ncfft2 = nfft>>2; Complex * rtw = real_twiddles(ncfft2); - m_tmpBuf.resize(ncfft); - m_tmpBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + m_tmpBuf1.resize(ncfft); + m_tmpBuf1[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); for (int k = 1; k <= ncfft / 2; ++k) { Complex fk = src[k]; Complex fnkc = conj(src[ncfft-k]); Complex fek = fk + fnkc; Complex tmp = fk - fnkc; Complex fok = tmp * conj(rtw[k-1]); - m_tmpBuf[k] = fek + fok; - m_tmpBuf[ncfft-k] = conj(fek - fok); + m_tmpBuf1[k] = fek + fok; + m_tmpBuf1[ncfft-k] = conj(fek - fok); } - get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_tmpBuf[0], 1,1); + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_tmpBuf1[0], 1,1); } } @@ -369,7 +371,8 @@ PlanMap m_plans; std::map > m_realTwiddles; - std::vector m_tmpBuf; + std::vector m_tmpBuf1; + std::vector m_tmpBuf2; inline int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index cc68f3718..ad0d426e4 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -101,12 +101,34 @@ void test_scalar_generic(int nfft) ComplexVector outbuf; for (int k=0;k>1)+1); + VERIFY( fft_rmse(outbuf,inbuf) < test_precision() );// gross check + + fft.ClearFlag(fft.HalfSpectrum ); fft.fwd( outbuf,inbuf); VERIFY( fft_rmse(outbuf,inbuf) < test_precision() );// gross check ScalarVector buf3; fft.inv( buf3 , outbuf); VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check + + // verify that the Unscaled flag takes effect + ComplexVector buf4; + fft.SetFlag(fft.Unscaled); + fft.inv( buf4 , outbuf); + for (int k=0;k() );// gross check + + // verify that ClearFlag works + fft.ClearFlag(fft.Unscaled); + fft.inv( buf3 , outbuf); + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } template @@ -136,6 +158,19 @@ void test_complex_generic(int nfft) fft.inv( buf3 , outbuf); VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check + + // verify that the Unscaled flag takes effect + ComplexVector buf4; + fft.SetFlag(fft.Unscaled); + fft.inv( buf4 , outbuf); + for (int k=0;k() );// gross check + + // verify that ClearFlag works + fft.ClearFlag(fft.Unscaled); + fft.inv( buf3 , outbuf); + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } template From ec70f8006be1e46055a0d65850c8c60c3bebf6ed Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 31 Oct 2009 00:13:22 -0400 Subject: [PATCH 57/72] added inlines to a bunch of functions --- unsupported/Eigen/FFT | 12 + unsupported/Eigen/src/FFT/ei_fftw_impl.h | 3 + unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 697 ++++++++++---------- 3 files changed, 366 insertions(+), 346 deletions(-) diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index b1d3d9f0e..8f7a2fcae 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -85,6 +85,7 @@ class FFT inline void ClearFlag(Flag f) { m_flag &= (~(int)f);} + inline void fwd( Complex * dst, const Scalar * src, int nfft) { m_impl.fwd(dst,src,nfft); @@ -92,12 +93,14 @@ class FFT ReflectSpectrum(dst,nfft); } + inline void fwd( Complex * dst, const Complex * src, int nfft) { m_impl.fwd(dst,src,nfft); } template + inline void fwd( std::vector & dst, const std::vector<_Input> & src) { if ( NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum) ) @@ -108,6 +111,7 @@ class FFT } template + inline void fwd( MatrixBase & dst, const MatrixBase & src) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived) @@ -125,6 +129,7 @@ class FFT fwd( &dst[0],&src[0],src.size() ); } + inline void inv( Complex * dst, const Complex * src, int nfft) { m_impl.inv( dst,src,nfft ); @@ -132,6 +137,7 @@ class FFT scale(dst,1./nfft,nfft); } + inline void inv( Scalar * dst, const Complex * src, int nfft) { m_impl.inv( dst,src,nfft ); @@ -140,6 +146,7 @@ class FFT } template + inline void inv( MatrixBase & dst, const MatrixBase & src) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived) @@ -157,6 +164,7 @@ class FFT } template + inline void inv( std::vector<_Output> & dst, const std::vector & src) { if ( NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum) ) @@ -171,18 +179,22 @@ class FFT // TODO: handle Eigen MatrixBase // ---> i added fwd and inv specializations above + unit test, is this enough? (bjacob) + inline impl_type & impl() {return m_impl;} private: template + inline void scale(_It x,_Val s,int nx) { for (int k=0;k>1)+1; for (int k=nhbins;k < nfft; ++k ) freq[k] = conj(freq[nfft-k]); diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h index 18473a29b..a66b7398c 100644 --- a/unsupported/Eigen/src/FFT/ei_fftw_impl.h +++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h @@ -166,6 +166,7 @@ m_plans.clear(); } + // complex-to-complex forward FFT inline void fwd( Complex * dst,const Complex *src,int nfft) { @@ -208,3 +209,5 @@ return m_plans[key]; } }; +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 091e730d1..5c958d1ec 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -27,379 +27,384 @@ // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft // Copyright 2003-2009 Mark Borgerding - template - struct ei_kiss_cpx_fft +template +struct ei_kiss_cpx_fft +{ + typedef _Scalar Scalar; + typedef std::complex Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + std::vector m_scratchBuf; + bool m_inverse; + + inline + void make_twiddles(int nfft,bool inverse) { - typedef _Scalar Scalar; - typedef std::complex Complex; - std::vector m_twiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; - std::vector m_scratchBuf; - bool m_inverse; + m_inverse = inverse; + m_twiddles.resize(nfft); + Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; + for (int i=0;in) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - if ( p > 5 ) - m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic - }while(n>1); - } - - template - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - inline - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kn) + p=n;// impossible to have a factor > sqrt(n) } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + if ( p > 5 ) + m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic + }while(n>1); + } - inline - void bfly4( Complex * Fout, const size_t fstride, const size_t m) - { - Complex scratch[6]; - int negative_if_inverse = m_inverse * -2 +1; - for (size_t k=0;k + inline + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + if (m>1) { do{ - scratch[1]=Fout[m] * *tw1; - scratch[2]=Fout[m2] * *tw2; - - scratch[3]=scratch[1]+scratch[2]; - scratch[0]=scratch[1]-scratch[2]; - tw1 += fstride; - tw2 += fstride*2; - Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); } + xout=Fout_beg; - inline - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; - } - } - } - }; - - template - struct ei_kissfft_impl + inline + void bfly2( Complex * Fout, const size_t fstride, int m) { - typedef _Scalar Scalar; - typedef std::complex Complex; - - void clear() - { - m_plans.clear(); - m_realTwiddles.clear(); + for (int k=0;k>1)+1,dst ); - }else{ - int ncfft = nfft>>1; - int ncfft2 = nfft>>2; - Complex * rtw = real_twiddles(ncfft2); + inline + void bfly3( Complex * Fout, const size_t fstride, const size_t m) + { + size_t k=m; + const size_t m2 = 2*m; + Complex *tw1,*tw2; + Complex scratch[5]; + Complex epi3; + epi3 = m_twiddles[fstride*m]; - // use optimized mode for even real - fwd( dst, reinterpret_cast (src), ncfft); - Complex dc = dst[0].real() + dst[0].imag(); - Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft2 ; ++k ) { - Complex fpk = dst[k]; - Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpk - fpnk; - Complex tw= f2k * rtw[k-1]; - dst[k] = (f1k + tw) * Scalar(.5); - dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + tw1=tw2=&m_twiddles[0]; + + do{ + scratch[1]=Fout[m] * *tw1; + scratch[2]=Fout[m2] * *tw2; + + scratch[3]=scratch[1]+scratch[2]; + scratch[0]=scratch[1]-scratch[2]; + tw1 += fstride; + tw2 += fstride*2; + Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + inline + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; } - dst[0] = dc; - dst[ncfft] = nyquist; + k += m; } } + } +}; - // inverse complex-to-complex - inline - void inv(Complex * dst,const Complex *src,int nfft) - { - get_plan(nfft,true).work(0, dst, src, 1,1); - } +template +struct ei_kissfft_impl +{ + typedef _Scalar Scalar; + typedef std::complex Complex; - // half-complex to scalar - inline - void inv( Scalar * dst,const Complex * src,int nfft) - { - if (nfft&3) { - m_tmpBuf1.resize(nfft); - m_tmpBuf2.resize(nfft); - std::copy(src,src+(nfft>>1)+1,m_tmpBuf1.begin() ); - for (int k=1;k<(nfft>>1)+1;++k) - m_tmpBuf1[nfft-k] = conj(m_tmpBuf1[k]); - inv(&m_tmpBuf2[0],&m_tmpBuf1[0],nfft); - for (int k=0;k>1; - int ncfft2 = nfft>>2; - Complex * rtw = real_twiddles(ncfft2); - m_tmpBuf1.resize(ncfft); - m_tmpBuf1[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); - for (int k = 1; k <= ncfft / 2; ++k) { - Complex fk = src[k]; - Complex fnkc = conj(src[ncfft-k]); - Complex fek = fk + fnkc; - Complex tmp = fk - fnkc; - Complex fok = tmp * conj(rtw[k-1]); - m_tmpBuf1[k] = fek + fok; - m_tmpBuf1[ncfft-k] = conj(fek - fok); - } - get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_tmpBuf1[0], 1,1); + void clear() + { + m_plans.clear(); + m_realTwiddles.clear(); + } + + inline + void fwd( Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,false).work(0, dst, src, 1,1); + } + + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half + inline + void fwd( Complex * dst,const Scalar * src,int nfft) + { + if ( nfft&3 ) { + // use generic mode for odd + m_tmpBuf1.resize(nfft); + get_plan(nfft,false).work(0, &m_tmpBuf1[0], src, 1,1); + std::copy(m_tmpBuf1.begin(),m_tmpBuf1.begin()+(nfft>>1)+1,dst ); + }else{ + int ncfft = nfft>>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + + // use optimized mode for even real + fwd( dst, reinterpret_cast (src), ncfft); + Complex dc = dst[0].real() + dst[0].imag(); + Complex nyquist = dst[0].real() - dst[0].imag(); + int k; + for ( k=1;k <= ncfft2 ; ++k ) { + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + Complex f1k = fpk + fpnk; + Complex f2k = fpk - fpnk; + Complex tw= f2k * rtw[k-1]; + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); } + dst[0] = dc; + dst[ncfft] = nyquist; } + } - protected: - typedef ei_kiss_cpx_fft PlanData; - typedef std::map PlanMap; + // inverse complex-to-complex + inline + void inv(Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,true).work(0, dst, src, 1,1); + } - PlanMap m_plans; - std::map > m_realTwiddles; - std::vector m_tmpBuf1; - std::vector m_tmpBuf2; - - inline - int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } - - inline - PlanData & get_plan(int nfft,bool inverse) - { - // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles - PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; - if ( pd.m_twiddles.size() == 0 ) { - pd.make_twiddles(nfft,inverse); - pd.factorize(nfft); + // half-complex to scalar + inline + void inv( Scalar * dst,const Complex * src,int nfft) + { + if (nfft&3) { + m_tmpBuf1.resize(nfft); + m_tmpBuf2.resize(nfft); + std::copy(src,src+(nfft>>1)+1,m_tmpBuf1.begin() ); + for (int k=1;k<(nfft>>1)+1;++k) + m_tmpBuf1[nfft-k] = conj(m_tmpBuf1[k]); + inv(&m_tmpBuf2[0],&m_tmpBuf1[0],nfft); + for (int k=0;k>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + m_tmpBuf1.resize(ncfft); + m_tmpBuf1[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + for (int k = 1; k <= ncfft / 2; ++k) { + Complex fk = src[k]; + Complex fnkc = conj(src[ncfft-k]); + Complex fek = fk + fnkc; + Complex tmp = fk - fnkc; + Complex fok = tmp * conj(rtw[k-1]); + m_tmpBuf1[k] = fek + fok; + m_tmpBuf1[ncfft-k] = conj(fek - fok); } - return pd; + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_tmpBuf1[0], 1,1); } + } - inline - Complex * real_twiddles(int ncfft2) - { - std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there - if ( (int)twidref.size() != ncfft2 ) { - twidref.resize(ncfft2); - int ncfft= ncfft2<<1; - Scalar pi = acos( Scalar(-1) ); - for (int k=1;k<=ncfft2;++k) - twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); - } - return &twidref[0]; + protected: + typedef ei_kiss_cpx_fft PlanData; + typedef std::map PlanMap; + + PlanMap m_plans; + std::map > m_realTwiddles; + std::vector m_tmpBuf1; + std::vector m_tmpBuf2; + + inline + int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + + inline + PlanData & get_plan(int nfft,bool inverse) + { + // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles + PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; + if ( pd.m_twiddles.size() == 0 ) { + pd.make_twiddles(nfft,inverse); + pd.factorize(nfft); } - }; + return pd; + } + + inline + Complex * real_twiddles(int ncfft2) + { + std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there + if ( (int)twidref.size() != ncfft2 ) { + twidref.resize(ncfft2); + int ncfft= ncfft2<<1; + Scalar pi = acos( Scalar(-1) ); + for (int k=1;k<=ncfft2;++k) + twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); + } + return &twidref[0]; + } +}; + +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ + From 48261fc77381bb65401b1af3e41bae387fbbfce1 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sat, 31 Oct 2009 11:50:15 -0400 Subject: [PATCH 58/72] * default MatrixBase ctor: make it protected, make it a static assert, only do the check when debugging eigen to avoid slowing down compilation for everybody (this check is paranoiac, it's very seldom useful) * add private MatrixBase ctors to catch cases when the user tries to construct MatrixBase objects directly --- Eigen/src/Core/MatrixBase.h | 24 ++++++++++++++++++------ Eigen/src/Core/util/StaticAssert.h | 1 + Eigen/src/Core/util/XprHelper.h | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index cb1589f85..72ce865c1 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -145,12 +145,6 @@ template class MatrixBase #endif }; - /** Default constructor. Just checks at compile-time for self-consistency of the flags. */ - MatrixBase() - { - ei_assert(ei_are_flags_consistent::ret); - } - #ifndef EIGEN_PARSED_BY_DOXYGEN /** This is the "real scalar" type; if the \a Scalar type is already real numbers * (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If @@ -802,6 +796,24 @@ template class MatrixBase #ifdef EIGEN_MATRIXBASE_PLUGIN #include EIGEN_MATRIXBASE_PLUGIN #endif + + protected: + /** Default constructor. Do nothing. */ + MatrixBase() + { + /* Just checks for self-consistency of the flags. + * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down + */ +#ifdef EIGEN_INTERNAL_DEBUGGING + EIGEN_STATIC_ASSERT(ei_are_flags_consistent::ret, + INVALID_MATRIXBASE_TEMPLATE_PARAMETERS) +#endif + } + + private: + explicit MatrixBase(int); + MatrixBase(int,int); + template explicit MatrixBase(const MatrixBase&); }; #endif // EIGEN_MATRIXBASE_H diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 6210bc91c..56a57b706 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -76,6 +76,7 @@ THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES, THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES, INVALID_MATRIX_TEMPLATE_PARAMETERS, + INVALID_MATRIXBASE_TEMPLATE_PARAMETERS, BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER, THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX, THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE, diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index cea2faaa8..be4266f85 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -1,4 +1,4 @@ -// This file is part of Eigen, a lightweight C++ template library +// // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008 Gael Guennebaud From 3ae4e3880febfa7a1948bc8accdabe066d17344d Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sat, 31 Oct 2009 14:36:31 -0400 Subject: [PATCH 59/72] fix compilation --- Eigen/src/Geometry/arch/Geometry_SSE.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index 1b8f6aead..a6ed10d82 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -32,8 +32,8 @@ template struct ei_quat_product res; - __m128 a = _a.coeffs().packet(0); - __m128 b = _b.coeffs().packet(0); + __m128 a = _a.coeffs().template packet(0); + __m128 b = _b.coeffs().template packet(0); __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2), ei_vec4f_swizzle1(b,2,0,1,2)),mask); __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1), From 5ba19a53a6eb68c0bf1d2b531fe7fbffcca39929 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sat, 31 Oct 2009 14:37:11 -0400 Subject: [PATCH 60/72] rephrase tutorial on Map --- doc/C01_QuickStartGuide.dox | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/doc/C01_QuickStartGuide.dox b/doc/C01_QuickStartGuide.dox index 2943aed80..06b2595e7 100644 --- a/doc/C01_QuickStartGuide.dox +++ b/doc/C01_QuickStartGuide.dox @@ -278,18 +278,24 @@ Of course, fixed-size matrices can't be resized. \subsection TutorialMap Map -Any memory buffer can be mapped as an Eigen expression: -
+Any memory buffer can be mapped as an Eigen expression using the Map() static method: \code std::vector stlarray(10); -Map(&stlarray[0], stlarray.size()).setOnes(); -int data[4] = 1, 2, 3, 4; -Matrix2i mat2x2(data); -MatrixXi mat2x2 = Map(data); -MatrixXi mat2x2 = Map(data,2,2); +VectorXf::Map(&stlarray[0], stlarray.size()).squaredNorm(); +\endcode +Here VectorXf::Map returns an object of class Map, which behaves like a VectorXf except that it uses the existing array. You can write to this object, that will write to the existing array. You can also construct a named obtect to reuse it: +\code +float array[rows*cols]; +Map m(array,rows,cols); +m = othermatrix1 * othermatrix2; +m.eigenvalues(); +\endcode +In the fixed-size case, no need to pass sizes: +\code +float array[9]; +Map m(array); +Matrix3d::Map(array).setIdentity(); \endcode -
- \subsection TutorialCommaInit Comma initializer From 979431b9871bde9cee6c598ce9615a729d39e330 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 2 Nov 2009 10:46:40 +0100 Subject: [PATCH 61/72] fix #66 : upper triangular checks in ComplexSchur --- Eigen/Eigenvalues | 1 + Eigen/src/Eigenvalues/ComplexSchur.h | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues index 9a6443f39..8c6841549 100644 --- a/Eigen/Eigenvalues +++ b/Eigen/Eigenvalues @@ -8,6 +8,7 @@ #include "Cholesky" #include "Jacobi" #include "Householder" +#include "LU" // Note that EIGEN_HIDE_HEAVY_CODE has to be defined per module #if (defined EIGEN_EXTERN_INSTANTIATIONS) && (EIGEN_EXTERN_INSTANTIATIONS>=2) diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h index 0534715c4..a25af342d 100644 --- a/Eigen/src/Eigenvalues/ComplexSchur.h +++ b/Eigen/src/Eigenvalues/ComplexSchur.h @@ -167,10 +167,11 @@ void ComplexSchur::compute(const MatrixType& matrix, bool skipU) //locate the range in which to iterate while(iu > 0) { - d = ei_norm1(m_matT.coeffRef(iu,iu)) + ei_norm1(m_matT.coeffRef(iu-1,iu-1)); - sd = ei_norm1(m_matT.coeffRef(iu,iu-1)); + d = ei_norm1(m_matT.coeff(iu,iu)) + ei_norm1(m_matT.coeff(iu-1,iu-1)); + sd = ei_norm1(m_matT.coeff(iu,iu-1)); - if(sd >= eps * d) break; // FIXME : precision criterion ?? + if(!ei_isMuchSmallerThan(sd,d,eps)) + break; m_matT.coeffRef(iu,iu-1) = Complex(0); iter = 0; @@ -187,13 +188,14 @@ void ComplexSchur::compute(const MatrixType& matrix, bool skipU) } il = iu-1; - while( il > 0 ) + while(il > 0) { // check if the current 2x2 block on the diagonal is upper triangular - d = ei_norm1(m_matT.coeffRef(il,il)) + ei_norm1(m_matT.coeffRef(il-1,il-1)); - sd = ei_norm1(m_matT.coeffRef(il,il-1)); + d = ei_norm1(m_matT.coeff(il,il)) + ei_norm1(m_matT.coeff(il-1,il-1)); + sd = ei_norm1(m_matT.coeff(il,il-1)); - if(sd < eps * d) break; // FIXME : precision criterion ?? + if(ei_isMuchSmallerThan(sd,d,eps)) + break; --il; } From a7bebe0aeb798089e8f79a4133d84f5785e2b528 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 4 Nov 2009 09:04:50 +0100 Subject: [PATCH 62/72] an attempt to fix a compilation issue with MSVC --- Eigen/src/Sparse/CholmodSupport.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Sparse/CholmodSupport.h b/Eigen/src/Sparse/CholmodSupport.h index 30a33c3dc..f02374d7c 100644 --- a/Eigen/src/Sparse/CholmodSupport.h +++ b/Eigen/src/Sparse/CholmodSupport.h @@ -126,6 +126,7 @@ class SparseLLT : public SparseLLT typedef SparseLLT Base; typedef typename Base::Scalar Scalar; typedef typename Base::RealScalar RealScalar; + typedef typename Base::CholMatrixType CholMatrixType; using Base::MatrixLIsDirty; using Base::SupernodalFactorIsDirty; using Base::m_flags; @@ -154,7 +155,7 @@ class SparseLLT : public SparseLLT cholmod_finish(&m_cholmod); } - inline const typename Base::CholMatrixType& matrixL(void) const; + inline const CholMatrixType& matrixL() const; template bool solveInPlace(MatrixBase &b) const; @@ -198,7 +199,7 @@ void SparseLLT::compute(const MatrixType& a) } template -inline const typename SparseLLT::CholMatrixType& +inline const typename SparseLLT::CholMatrixType& SparseLLT::matrixL() const { if (m_status & MatrixLIsDirty) From e2170b9f7ebc119118773a541fa3fa0a8ed35c14 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Wed, 4 Nov 2009 15:38:11 +0100 Subject: [PATCH 63/72] Direct access of the packet structs fixes bug #62 and doe not seem to influence compiler optimization. --- Eigen/src/Core/arch/SSE/PacketMath.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index eb1c2d311..c1d7e5786 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -220,10 +220,11 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu(double* to, const Packet2 template<> EIGEN_STRONG_INLINE void ei_pstoreu(float* to, const Packet4f& from) { ei_pstoreu((double*)to, _mm_castps_pd(from)); } template<> EIGEN_STRONG_INLINE void ei_pstoreu(int* to, const Packet4i& from) { ei_pstoreu((double*)to, _mm_castsi128_pd(from)); } -#ifdef _MSC_VER -// this fix internal compilation error -template<> EIGEN_STRONG_INLINE float ei_pfirst(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; } -template<> EIGEN_STRONG_INLINE double ei_pfirst(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; } +#ifdef _MSC_VER <= 1500 +// The temporary variable fixes an internal compilation error. +// Direct of the struct members fixed bug #62. +template<> EIGEN_STRONG_INLINE float ei_pfirst(const Packet4f& a) { return a.m128_f32[0]; } +template<> EIGEN_STRONG_INLINE double ei_pfirst(const Packet2d& a) { return a.m128d_f64[0]; } template<> EIGEN_STRONG_INLINE int ei_pfirst(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } #else template<> EIGEN_STRONG_INLINE float ei_pfirst(const Packet4f& a) { return _mm_cvtss_f32(a); } From 3979f6d8aad001174160774b49b747430a7686b5 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Wed, 4 Nov 2009 15:49:22 +0100 Subject: [PATCH 64/72] Let's try to stick to the original code, thus activate the fix of #62 only for 64 bit builds. --- Eigen/src/Core/arch/SSE/PacketMath.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index c1d7e5786..f588a8621 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -220,12 +220,17 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu(double* to, const Packet2 template<> EIGEN_STRONG_INLINE void ei_pstoreu(float* to, const Packet4f& from) { ei_pstoreu((double*)to, _mm_castps_pd(from)); } template<> EIGEN_STRONG_INLINE void ei_pstoreu(int* to, const Packet4i& from) { ei_pstoreu((double*)to, _mm_castsi128_pd(from)); } -#ifdef _MSC_VER <= 1500 +#if (_MSC_VER <= 1500) && defined(_WIN64) // The temporary variable fixes an internal compilation error. // Direct of the struct members fixed bug #62. template<> EIGEN_STRONG_INLINE float ei_pfirst(const Packet4f& a) { return a.m128_f32[0]; } template<> EIGEN_STRONG_INLINE double ei_pfirst(const Packet2d& a) { return a.m128d_f64[0]; } template<> EIGEN_STRONG_INLINE int ei_pfirst(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } +#elif (_MSC_VER <= 1500) +// The temporary variable fixes an internal compilation error. +template<> EIGEN_STRONG_INLINE float ei_pfirst(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; } +template<> EIGEN_STRONG_INLINE double ei_pfirst(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; } +template<> EIGEN_STRONG_INLINE int ei_pfirst(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } #else template<> EIGEN_STRONG_INLINE float ei_pfirst(const Packet4f& a) { return _mm_cvtss_f32(a); } template<> EIGEN_STRONG_INLINE double ei_pfirst(const Packet2d& a) { return _mm_cvtsd_f64(a); } From 4c456d4211bead14edf2a6b11bfa9617b455b7e4 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Wed, 4 Nov 2009 11:46:17 -0500 Subject: [PATCH 65/72] fix bug in svd solve reported on forum, was apparently assuming square matrix, not sure how the unit test could work. --- Eigen/src/SVD/SVD.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Eigen/src/SVD/SVD.h b/Eigen/src/SVD/SVD.h index da01cf396..99272258e 100644 --- a/Eigen/src/SVD/SVD.h +++ b/Eigen/src/SVD/SVD.h @@ -426,8 +426,11 @@ bool SVD::solve(const MatrixBase &b, ResultType* resul else aux.coeffRef(i) /= si; } - - result->col(j) = m_matV * aux; + const int cols = m_matV.rows(); + const int minsize = std::min(rows,cols); + result->col(j).start(minsize) = aux.start(minsize); + if(cols>rows) result->col(j).end(cols-minsize).setZero(); + result->col(j) = m_matV * result->col(j); } return true; } From daa4574a43caf68122bde9dfb8a43b764a0e8941 Mon Sep 17 00:00:00 2001 From: Jitse Niesen Date: Thu, 5 Nov 2009 03:46:18 +0000 Subject: [PATCH 66/72] Add regression test for issue #66 (ComplexSchur of zero matrix). --- test/eigensolver_complex.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/eigensolver_complex.cpp b/test/eigensolver_complex.cpp index 38ede7c4a..e1ce575e1 100644 --- a/test/eigensolver_complex.cpp +++ b/test/eigensolver_complex.cpp @@ -49,6 +49,10 @@ template void eigensolver(const MatrixType& m) ComplexEigenSolver ei1(a); VERIFY_IS_APPROX(a * ei1.eigenvectors(), ei1.eigenvectors() * ei1.eigenvalues().asDiagonal()); + // Regression test for issue #66 + MatrixType z = MatrixType::Zero(rows,cols); + ComplexEigenSolver eiz(z); + VERIFY((eiz.eigenvalues().cwise()==0).all()); } void test_eigensolver_complex() @@ -58,4 +62,3 @@ void test_eigensolver_complex() CALL_SUBTEST( eigensolver(MatrixXcd(14,14)) ); } } - From fe81b3f6512514ab70ed96566108c5992a8ee7d3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 5 Nov 2009 18:06:33 +0100 Subject: [PATCH 67/72] Add the possibility to control the storage mode of scalar value (by value or reference) in order to avoid unecessary copies when using complex scalar types (e.g., a AutoDiffScalar) --- Eigen/src/Core/Functors.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index cbaeb83e2..ef4485f5b 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -350,7 +350,7 @@ struct ei_scalar_multiple_op { EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return ei_pmul(a, ei_pset1(m_other)); } - const Scalar m_other; + const typename NumTraits::Nested m_other; private: ei_scalar_multiple_op& operator=(const ei_scalar_multiple_op&); }; @@ -364,7 +364,7 @@ struct ei_scalar_multiple2_op { EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const ei_scalar_multiple2_op& other) : m_other(other.m_other) { } EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const Scalar2& other) : m_other(other) { } EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - const Scalar2 m_other; + const typename NumTraits::Nested m_other; }; template struct ei_functor_traits > @@ -393,7 +393,7 @@ struct ei_scalar_quotient1_impl { EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { } EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const Scalar& other) : m_other(other) {} EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - const Scalar m_other; + const typename NumTraits::Nested m_other; }; template struct ei_functor_traits > From 1470afda5bc70b0fccde57bb680e5aac4c227c18 Mon Sep 17 00:00:00 2001 From: Jitse Niesen Date: Fri, 6 Nov 2009 09:16:25 +0000 Subject: [PATCH 68/72] Backed out previous changeset: Does not compile. There is no member Nested in NumTraits. --- Eigen/src/Core/Functors.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index ef4485f5b..cbaeb83e2 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -350,7 +350,7 @@ struct ei_scalar_multiple_op { EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return ei_pmul(a, ei_pset1(m_other)); } - const typename NumTraits::Nested m_other; + const Scalar m_other; private: ei_scalar_multiple_op& operator=(const ei_scalar_multiple_op&); }; @@ -364,7 +364,7 @@ struct ei_scalar_multiple2_op { EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const ei_scalar_multiple2_op& other) : m_other(other.m_other) { } EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const Scalar2& other) : m_other(other) { } EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - const typename NumTraits::Nested m_other; + const Scalar2 m_other; }; template struct ei_functor_traits > @@ -393,7 +393,7 @@ struct ei_scalar_quotient1_impl { EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { } EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const Scalar& other) : m_other(other) {} EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - const typename NumTraits::Nested m_other; + const Scalar m_other; }; template struct ei_functor_traits > From 771c0507fbeefaa6498eae58efd3835eae6c2dc9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 6 Nov 2009 11:23:18 +0100 Subject: [PATCH 69/72] back out previous back out, and this time don't forget to include the NumTraits.h file in the commit ;) --- Eigen/src/Core/Functors.h | 6 +++--- Eigen/src/Core/NumTraits.h | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index cbaeb83e2..ef4485f5b 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -350,7 +350,7 @@ struct ei_scalar_multiple_op { EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return ei_pmul(a, ei_pset1(m_other)); } - const Scalar m_other; + const typename NumTraits::Nested m_other; private: ei_scalar_multiple_op& operator=(const ei_scalar_multiple_op&); }; @@ -364,7 +364,7 @@ struct ei_scalar_multiple2_op { EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const ei_scalar_multiple2_op& other) : m_other(other.m_other) { } EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const Scalar2& other) : m_other(other) { } EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - const Scalar2 m_other; + const typename NumTraits::Nested m_other; }; template struct ei_functor_traits > @@ -393,7 +393,7 @@ struct ei_scalar_quotient1_impl { EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { } EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const Scalar& other) : m_other(other) {} EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - const Scalar m_other; + const typename NumTraits::Nested m_other; }; template struct ei_functor_traits > diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 24afe54c5..304e2c1d6 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -52,6 +52,7 @@ template<> struct NumTraits { typedef int Real; typedef double FloatingPoint; + typedef int Nested; enum { IsComplex = 0, HasFloatingPoint = 0, @@ -65,6 +66,7 @@ template<> struct NumTraits { typedef float Real; typedef float FloatingPoint; + typedef float Nested; enum { IsComplex = 0, HasFloatingPoint = 1, @@ -78,6 +80,7 @@ template<> struct NumTraits { typedef double Real; typedef double FloatingPoint; + typedef double Nested; enum { IsComplex = 0, HasFloatingPoint = 1, @@ -91,6 +94,7 @@ template struct NumTraits > { typedef _Real Real; typedef std::complex<_Real> FloatingPoint; + typedef std::complex<_Real> Nested; enum { IsComplex = 1, HasFloatingPoint = NumTraits::HasFloatingPoint, @@ -104,6 +108,7 @@ template<> struct NumTraits { typedef long long int Real; typedef long double FloatingPoint; + typedef long long int Nested; enum { IsComplex = 0, HasFloatingPoint = 0, @@ -117,6 +122,7 @@ template<> struct NumTraits { typedef long double Real; typedef long double FloatingPoint; + typedef long double Nested; enum { IsComplex = 0, HasFloatingPoint = 1, @@ -130,6 +136,7 @@ template<> struct NumTraits { typedef bool Real; typedef float FloatingPoint; + typedef bool Nested; enum { IsComplex = 0, HasFloatingPoint = 0, From 6647a588475c34e5460c64ebce3a958ada95b96b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 6 Nov 2009 11:33:18 +0100 Subject: [PATCH 70/72] update product bench --- bench/benchBlasGemm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/benchBlasGemm.cpp b/bench/benchBlasGemm.cpp index 25458f823..a4a9e780a 100644 --- a/bench/benchBlasGemm.cpp +++ b/bench/benchBlasGemm.cpp @@ -178,13 +178,13 @@ using namespace Eigen; void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) { for (uint j=0 ; j(ma,mb).lazy(); + mc.noalias() += GeneralProduct(ma,mb); } #define MYVERIFY(A,M) if (!(A)) { \ From 5dc02fe5e9388f088bb1d12d7c1247c817d10aa0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 6 Nov 2009 11:34:58 +0100 Subject: [PATCH 71/72] improve a bit AutoDiffVector, but it still not working --- .../Eigen/src/AutoDiff/AutoDiffVector.h | 128 ++++++++++-------- 1 file changed, 74 insertions(+), 54 deletions(-) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h b/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h index 69ea9144e..03c82b7e8 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h @@ -35,7 +35,7 @@ namespace Eigen { * This class represents a scalar value while tracking its respective derivatives. * * It supports the following list of global math function: - * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, + * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, * - ei_abs, ei_sqrt, ei_pow, ei_exp, ei_log, ei_sin, ei_cos, * - ei_conj, ei_real, ei_imag, ei_abs2. * @@ -48,130 +48,150 @@ template class AutoDiffVector { public: - typedef typename ei_traits::Scalar Scalar; - + //typedef typename ei_traits::Scalar Scalar; + typedef typename ei_traits::Scalar BaseScalar; + typedef AutoDiffScalar > ActiveScalar; + typedef ActiveScalar Scalar; + typedef AutoDiffScalar CoeffType; + inline AutoDiffVector() {} - + inline AutoDiffVector(const ValueType& values) : m_values(values) { m_jacobian.setZero(); } - + + + CoeffType operator[] (int i) { return CoeffType(m_values[i], m_jacobian.col(i)); } + const CoeffType operator[] (int i) const { return CoeffType(m_values[i], m_jacobian.col(i)); } + + CoeffType operator() (int i) { return CoeffType(m_values[i], m_jacobian.col(i)); } + const CoeffType operator() (int i) const { return CoeffType(m_values[i], m_jacobian.col(i)); } + + CoeffType coeffRef(int i) { return CoeffType(m_values[i], m_jacobian.col(i)); } + const CoeffType coeffRef(int i) const { return CoeffType(m_values[i], m_jacobian.col(i)); } + + int size() const { return m_values.size(); } + + // FIXME here we could return an expression of the sum + Scalar sum() const { /*std::cerr << "sum \n\n";*/ /*std::cerr << m_jacobian.rowwise().sum() << "\n\n";*/ return Scalar(m_values.sum(), m_jacobian.rowwise().sum()); } + + inline AutoDiffVector(const ValueType& values, const JacobianType& jac) : m_values(values), m_jacobian(jac) {} - + template inline AutoDiffVector(const AutoDiffVector& other) : m_values(other.values()), m_jacobian(other.jacobian()) {} - + inline AutoDiffVector(const AutoDiffVector& other) : m_values(other.values()), m_jacobian(other.jacobian()) {} - + template - inline AutoDiffScalar& operator=(const AutoDiffVector& other) + inline AutoDiffVector& operator=(const AutoDiffVector& other) { m_values = other.values(); m_jacobian = other.jacobian(); return *this; } - + inline AutoDiffVector& operator=(const AutoDiffVector& other) { m_values = other.values(); m_jacobian = other.jacobian(); return *this; } - + inline const ValueType& values() const { return m_values; } inline ValueType& values() { return m_values; } - + inline const JacobianType& jacobian() const { return m_jacobian; } inline JacobianType& jacobian() { return m_jacobian; } - + template inline const AutoDiffVector< - CwiseBinaryOp,ValueType,OtherValueType> > - CwiseBinaryOp,JacobianType,OtherJacobianType> > - operator+(const AutoDiffScalar& other) const + typename MakeCwiseBinaryOp,ValueType,OtherValueType>::Type, + typename MakeCwiseBinaryOp,JacobianType,OtherJacobianType>::Type > + operator+(const AutoDiffVector& other) const { return AutoDiffVector< - CwiseBinaryOp,ValueType,OtherValueType> > - CwiseBinaryOp,JacobianType,OtherJacobianType> >( + typename MakeCwiseBinaryOp,ValueType,OtherValueType>::Type, + typename MakeCwiseBinaryOp,JacobianType,OtherJacobianType>::Type >( m_values + other.values(), m_jacobian + other.jacobian()); } - + template inline AutoDiffVector& - operator+=(const AutoDiffVector& other) + operator+=(const AutoDiffVector& other) { m_values += other.values(); m_jacobian += other.jacobian(); return *this; } - + template inline const AutoDiffVector< - CwiseBinaryOp,ValueType,OtherValueType> > - CwiseBinaryOp,JacobianType,OtherJacobianType> > - operator-(const AutoDiffScalar& other) const + typename MakeCwiseBinaryOp,ValueType,OtherValueType>::Type, + typename MakeCwiseBinaryOp,JacobianType,OtherJacobianType>::Type > + operator-(const AutoDiffVector& other) const { return AutoDiffVector< - CwiseBinaryOp,ValueType,OtherValueType> > - CwiseBinaryOp,JacobianType,OtherJacobianType> >( - m_values - other.values(), - m_jacobian - other.jacobian()); + typename MakeCwiseBinaryOp,ValueType,OtherValueType>::Type, + typename MakeCwiseBinaryOp,JacobianType,OtherJacobianType>::Type >( + m_values - other.values(), + m_jacobian - other.jacobian()); } - + template inline AutoDiffVector& - operator-=(const AutoDiffVector& other) + operator-=(const AutoDiffVector& other) { m_values -= other.values(); m_jacobian -= other.jacobian(); return *this; } - + inline const AutoDiffVector< - CwiseUnaryOp, ValueType> - CwiseUnaryOp, JacobianType> > + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type > operator-() const { return AutoDiffVector< - CwiseUnaryOp, ValueType> - CwiseUnaryOp, JacobianType> >( - -m_values, - -m_jacobian); + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type >( + -m_values, + -m_jacobian); } - + inline const AutoDiffVector< - CwiseUnaryOp, ValueType> - CwiseUnaryOp, JacobianType> > - operator*(const Scalar& other) const + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type> + operator*(const BaseScalar& other) const { return AutoDiffVector< - CwiseUnaryOp, ValueType> - CwiseUnaryOp, JacobianType> >( + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type >( m_values * other, - (m_jacobian * other)); + m_jacobian * other); } - + friend inline const AutoDiffVector< - CwiseUnaryOp, ValueType> - CwiseUnaryOp, JacobianType> > + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type > operator*(const Scalar& other, const AutoDiffVector& v) { return AutoDiffVector< - CwiseUnaryOp, ValueType> - CwiseUnaryOp, JacobianType> >( + typename MakeCwiseUnaryOp, ValueType>::Type, + typename MakeCwiseUnaryOp, JacobianType>::Type >( v.values() * other, v.jacobian() * other); } - + // template // inline const AutoDiffVector< // CwiseBinaryOp, ValueType, OtherValueType> @@ -188,25 +208,25 @@ class AutoDiffVector // m_values.cwise() * other.values(), // (m_jacobian * other.values()).nestByValue() + (m_values * other.jacobian()).nestByValue()); // } - + inline AutoDiffVector& operator*=(const Scalar& other) { m_values *= other; m_jacobian *= other; return *this; } - + template inline AutoDiffVector& operator*=(const AutoDiffVector& other) { *this = *this * other; return *this; } - + protected: ValueType m_values; JacobianType m_jacobian; - + }; } From aa0974286f6d0f6a86bf7710c91a51fe77596e2b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 7 Nov 2009 09:07:23 +0100 Subject: [PATCH 72/72] fix compilation adding a makeconst helper struct --- Eigen/src/Core/Assign.h | 2 +- Eigen/src/Core/Functors.h | 6 +++--- Eigen/src/Core/util/Memory.h | 28 ++++++++++++++-------------- Eigen/src/Core/util/Meta.h | 7 +++++++ 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 4bd1046a7..8dc015715 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -93,7 +93,7 @@ public: ? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(NoUnrolling) }; - + static void debug() { EIGEN_DEBUG_VAR(DstIsAligned) diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index ef4485f5b..259f40244 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -350,7 +350,7 @@ struct ei_scalar_multiple_op { EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return ei_pmul(a, ei_pset1(m_other)); } - const typename NumTraits::Nested m_other; + typename ei_makeconst::Nested>::type m_other; private: ei_scalar_multiple_op& operator=(const ei_scalar_multiple_op&); }; @@ -364,7 +364,7 @@ struct ei_scalar_multiple2_op { EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const ei_scalar_multiple2_op& other) : m_other(other.m_other) { } EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const Scalar2& other) : m_other(other) { } EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - const typename NumTraits::Nested m_other; + typename ei_makeconst::Nested>::type m_other; }; template struct ei_functor_traits > @@ -393,7 +393,7 @@ struct ei_scalar_quotient1_impl { EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { } EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const Scalar& other) : m_other(other) {} EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - const typename NumTraits::Nested m_other; + typename ei_makeconst::Nested>::type m_other; }; template struct ei_functor_traits > diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index f8581eebc..bc5235582 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -83,7 +83,7 @@ inline void* ei_aligned_malloc(size_t size) ei_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)"); #endif - void *result; + void *result; #if !EIGEN_ALIGN result = malloc(size); #elif EIGEN_MALLOC_ALREADY_ALIGNED @@ -97,7 +97,7 @@ inline void* ei_aligned_malloc(size_t size) #else result = ei_handmade_aligned_malloc(size); #endif - + #ifdef EIGEN_EXCEPTIONS if(result == 0) throw std::bad_alloc(); @@ -324,34 +324,34 @@ public: typedef aligned_allocator other; }; - pointer address( reference value ) const + pointer address( reference value ) const { return &value; } - const_pointer address( const_reference value ) const + const_pointer address( const_reference value ) const { return &value; } - aligned_allocator() throw() + aligned_allocator() throw() { } - aligned_allocator( const aligned_allocator& ) throw() + aligned_allocator( const aligned_allocator& ) throw() { } template - aligned_allocator( const aligned_allocator& ) throw() + aligned_allocator( const aligned_allocator& ) throw() { } - ~aligned_allocator() throw() + ~aligned_allocator() throw() { } - size_type max_size() const throw() + size_type max_size() const throw() { return std::numeric_limits::max(); } @@ -362,24 +362,24 @@ public: return static_cast( ei_aligned_malloc( num * sizeof(T) ) ); } - void construct( pointer p, const T& value ) + void construct( pointer p, const T& value ) { ::new( p ) T( value ); } - void destroy( pointer p ) + void destroy( pointer p ) { p->~T(); } - void deallocate( pointer p, size_type /*num*/ ) + void deallocate( pointer p, size_type /*num*/ ) { ei_aligned_free( p ); } - + bool operator!=(const aligned_allocator& other) const { return false; } - + bool operator==(const aligned_allocator& other) const { return true; } }; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 3a960bea6..2fdfd93b5 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -64,6 +64,13 @@ template struct ei_cleantype { typedef typename ei_cleant template struct ei_cleantype { typedef typename ei_cleantype::type type; }; template struct ei_cleantype { typedef typename ei_cleantype::type type; }; +template struct ei_makeconst { typedef const T type; }; +template struct ei_makeconst { typedef const T type; }; +template struct ei_makeconst { typedef const T& type; }; +template struct ei_makeconst { typedef const T& type; }; +template struct ei_makeconst { typedef const T* type; }; +template struct ei_makeconst { typedef const T* type; }; + /** \internal Allows to enable/disable an overload * according to a compile time condition. */