diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index fe1987bdd..18e913b0e 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -27,6 +27,12 @@ #ifndef EIGEN_EXTERN_INSTANTIATIONS +#ifdef EIGEN_HAS_FUSE_CJMADD +#define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C); +#else +#define CJMADD(A,B,C,T) T = A; T = cj.pmul(T,B); C = ei_padd(C,T); +#endif + // optimized GEneral packed Block * packed Panel product kernel template struct ei_gebp_kernel @@ -74,65 +80,111 @@ struct ei_gebp_kernel const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr]; for(int k=0; k // skip what we have after if(PanelMode) count += PacketSize * nr * (stride-offset-depth); } - + // copy the remaining columns one at a time (nr==1) for(int j2=packet_cols; j2. -#ifndef EIGEN_BENCH_TIMER_H -#define EIGEN_BENCH_TIMER_H +#ifndef EIGEN_BENCH_TIMERR_H +#define EIGEN_BENCH_TIMERR_H #if defined(_WIN32) || defined(__CYGWIN__) #define NOMINMAX #define WIN32_LEAN_AND_MEAN #include #else +#include #include #include #endif +#include #include #include namespace Eigen { +enum { + CPU_TIMER = 0, + REAL_TIMER = 1 +}; + /** Elapsed time timer keeping the best try. * * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID. @@ -52,37 +59,58 @@ class BenchTimer { public: - BenchTimer() - { + BenchTimer() + { #if defined(_WIN32) || defined(__CYGWIN__) LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); m_frequency = (double)freq.QuadPart; #endif - reset(); + reset(); } ~BenchTimer() {} - inline void reset(void) {m_best = 1e6;} - inline void start(void) {m_start = getTime();} - inline void stop(void) + inline void reset() { - m_best = std::min(m_best, getTime() - m_start); + m_bests.fill(1e9); + m_totals.setZero(); + } + inline void start() + { + m_starts[CPU_TIMER] = getCpuTime(); + m_starts[REAL_TIMER] = getRealTime(); + } + inline void stop() + { + m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER]; + m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER]; + m_bests = m_bests.cwiseMin(m_times); + m_totals += m_times; } - /** Return the best elapsed time in seconds. + /** Return the elapsed time in seconds between the last start/stop pair */ - inline double value(void) + inline double value(int TIMER = CPU_TIMER) { - return m_best; + return m_times[TIMER]; } -#if defined(_WIN32) || defined(__CYGWIN__) - inline double getTime(void) -#else - static inline double getTime(void) -#endif + /** Return the best elapsed time in seconds + */ + inline double best(int TIMER = CPU_TIMER) + { + return m_bests[TIMER]; + } + + /** Return the total elapsed time in seconds. + */ + inline double total(int TIMER = CPU_TIMER) + { + return m_totals[TIMER]; + } + + inline double getCpuTime() { #ifdef WIN32 LARGE_INTEGER query_ticks; @@ -95,14 +123,42 @@ public: #endif } + inline double getRealTime() + { +#ifdef WIN32 + SYSTEMTIME st; + GetSystemTime(&st); + return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds; +#else + struct timeval tv; + struct timezone tz; + gettimeofday(&tv, &tz); + return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; +#endif + } + protected: #if defined(_WIN32) || defined(__CYGWIN__) double m_frequency; #endif - double m_best, m_start; + Vector2d m_starts; + Vector2d m_times; + Vector2d m_bests; + Vector2d m_totals; }; +#define BENCH(TIMER,TRIES,REP,CODE) { \ + TIMER.reset(); \ + for(int uglyvarname1=0; uglyvarname1/dev/null + $CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null echo " " done diff --git a/bench/benchmark_suite b/bench/benchmark_suite index a8fc6dced..3f21d3661 100755 --- a/bench/benchmark_suite +++ b/bench/benchmark_suite @@ -1,4 +1,5 @@ #!/bin/bash +CXX=${CXX-g++} # default value unless caller has defined CXX echo "Fixed size 3x3, column-major, -DNDEBUG" $CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null echo "Fixed size 3x3, column-major, with asserts" diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h index 12322a256..e8069154c 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h @@ -178,9 +178,9 @@ class MatrixFunction * * This is morally a \c static \c const \c Scalar, but only * integers can be static constant class members in C++. The - * separation constant is set to 0.01, a value taken from the + * separation constant is set to 0.1, a value taken from the * paper by Davies and Higham. */ - static const RealScalar separation() { return static_cast(0.01); } + static const RealScalar separation() { return static_cast(0.1); } }; /** \brief Constructor. @@ -492,14 +492,12 @@ typename MatrixFunction::DynMatrixType MatrixFunction class MatrixFunctionReturnValue : public ReturnByValue > { - private: + public: typedef typename ei_traits::Scalar Scalar; typedef typename ei_stem_function::type StemFunction; - public: - - /** \brief Constructor. + /** \brief Constructor. * * \param[in] A %Matrix (expression) forming the argument of the * matrix function. diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp index 4ff6d7f1e..7a1501da2 100644 --- a/unsupported/test/matrix_function.cpp +++ b/unsupported/test/matrix_function.cpp @@ -109,11 +109,10 @@ template void testHyperbolicFunctions(const MatrixType& A) { for (int i = 0; i < g_repeat; i++) { - MatrixType sinhA = ei_matrix_sinh(A); - MatrixType coshA = ei_matrix_cosh(A); MatrixType expA = ei_matrix_exponential(A); - VERIFY_IS_APPROX(sinhA, (expA - expA.inverse())/2); - VERIFY_IS_APPROX(coshA, (expA + expA.inverse())/2); + MatrixType expmA = ei_matrix_exponential(-A); + VERIFY_IS_APPROX(ei_matrix_sinh(A), (expA - expmA) / 2); + VERIFY_IS_APPROX(ei_matrix_cosh(A), (expA + expmA) / 2); } } @@ -134,14 +133,15 @@ void testGonioFunctions(const MatrixType& A) ComplexMatrix Ac = A.template cast(); ComplexMatrix exp_iA = ei_matrix_exponential(imagUnit * Ac); + ComplexMatrix exp_miA = ei_matrix_exponential(-imagUnit * Ac); MatrixType sinA = ei_matrix_sin(A); ComplexMatrix sinAc = sinA.template cast(); - VERIFY_IS_APPROX(sinAc, (exp_iA - exp_iA.inverse()) / (two*imagUnit)); + VERIFY_IS_APPROX(sinAc, (exp_iA - exp_miA) / (two*imagUnit)); MatrixType cosA = ei_matrix_cos(A); ComplexMatrix cosAc = cosA.template cast(); - VERIFY_IS_APPROX(cosAc, (exp_iA + exp_iA.inverse()) / 2); + VERIFY_IS_APPROX(cosAc, (exp_iA + exp_miA) / 2); } }