diff --git a/Eigen/Geometry b/Eigen/Geometry index 7a3ca9ede..d62723188 100644 --- a/Eigen/Geometry +++ b/Eigen/Geometry @@ -3,6 +3,10 @@ #include "Core" +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + namespace Eigen { /** \defgroup Geometry Geometry module diff --git a/Eigen/src/Core/CacheFriendlyProduct.h b/Eigen/src/Core/CacheFriendlyProduct.h index 782fabc88..649b39cdb 100644 --- a/Eigen/src/Core/CacheFriendlyProduct.h +++ b/Eigen/src/Core/CacheFriendlyProduct.h @@ -34,8 +34,8 @@ static void ei_cache_friendly_product( bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride, bool resRowMajor, Scalar* res, int resStride) { - const Scalar* __restrict__ lhs; - const Scalar* __restrict__ rhs; + const Scalar* EIGEN_RESTRICT lhs; + const Scalar* EIGEN_RESTRICT rhs; int lhsStride, rhsStride, rows, cols; bool lhsRowMajor; @@ -88,11 +88,11 @@ static void ei_cache_friendly_product( const int l2BlockSize = MaxL2BlockSize > size ? size : MaxL2BlockSize; const int l2BlockSizeAligned = (1 + std::max(l2BlockSize,l2BlockCols)/PacketSize)*PacketSize; const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0)); - Scalar* __restrict__ block = 0; + Scalar* EIGEN_RESTRICT block = 0; const int allocBlockSize = sizeof(Scalar)*l2BlockRows*size; const bool allocBlockUsingAlloca = EIGEN_USE_ALLOCA && allocBlockSize<=16000000; block = (Scalar*)ei_alloca_or_malloc(allocBlockUsingAlloca, allocBlockSize); - Scalar* __restrict__ rhsCopy + Scalar* EIGEN_RESTRICT rhsCopy = (Scalar*)ei_alloca_or_malloc(true, sizeof(Scalar)*l2BlockSizeAligned*l2BlockSizeAligned); // loops on each L2 cache friendly blocks of the result @@ -107,7 +107,6 @@ static void ei_cache_friendly_product( int count = 0; // copy l2blocksize rows of m_lhs to blocks of ps x bw - asm("#eigen begin buildblocks"); for(int l2k=0; l2k1 && resIsAligned) { @@ -250,7 +247,6 @@ static void ei_cache_friendly_product( localRes[7] += ei_predux(dst[7]); } } - asm("#eigen endcore"); } } if (l2blockRemainingRows>0) @@ -258,10 +254,9 @@ static void ei_cache_friendly_product( int offsetblock = l2k * (l2blockRowEnd-l2i) + (l2blockRowEndBW-l2i)*(l2blockSizeEnd-l2k) - l2k*l2blockRemainingRows; const Scalar* localB = &block[offsetblock]; - asm("#eigen begin dynkernel"); for(int l1j=l2j; l1j=8) localRes[7] += ei_predux(dst[7]); } - asm("#eigen end dynkernel"); } } } @@ -373,7 +367,6 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector( ei_padd(ei_pmul(ptmp0,ei_pload ## A0(&lhs0[j OFFSET])),ei_pmul(ptmp1,ei_pload ## A13(&lhs1[j OFFSET]))), \ ei_padd(ei_pmul(ptmp2,ei_pload ## A2(&lhs2[j OFFSET])),ei_pmul(ptmp3,ei_pload ## A13(&lhs3[j OFFSET]))) ))) - asm("#begin matrix_vector_product"); typedef typename ei_packet_traits::type Packet; const int PacketSize = sizeof(Packet)/sizeof(Scalar); @@ -541,7 +534,6 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector( else break; } while(PacketSize>1); - asm("#end matrix_vector_product"); #undef _EIGEN_ACCUMULATE_PACKETS } @@ -563,7 +555,6 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector( ptmp2 = ei_pmadd(b, ei_pload##A2 (&lhs2[j]), ptmp2); \ ptmp3 = ei_pmadd(b, ei_pload##A13(&lhs3[j]), ptmp3); } - asm("#begin matrix_vector_product"); typedef typename ei_packet_traits::type Packet; const int PacketSize = sizeof(Packet)/sizeof(Scalar); @@ -752,7 +743,6 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector( else break; } while(PacketSize>1); - asm("#end matrix_vector_product"); #undef _EIGEN_ACCUMULATE_PACKETS } diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index 85a8872c0..a50a9c30d 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -166,7 +166,7 @@ Cwise::abs2() const * * \sa adjoint() */ template -inline const typename MatrixBase::ConjugateReturnType +inline typename MatrixBase::ConjugateReturnType MatrixBase::conjugate() const { return ConjugateReturnType(derived()); diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 4c1a0cf2f..74b7d76aa 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -166,7 +166,7 @@ template class MapBase { return derived() = forceAligned() / other; } protected: - const Scalar* __restrict__ m_data; + const Scalar* EIGEN_RESTRICT m_data; const ei_int_if_dynamic m_rows; const ei_int_if_dynamic m_cols; }; diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 268261b77..74806833b 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -286,8 +286,8 @@ class Matrix : public MatrixBase class MatrixBase typedef CwiseUnaryOp, Derived> ScalarQuotient1ReturnType; /** \internal the return type of MatrixBase::conjugate() */ typedef typename ei_meta_if::IsComplex, - CwiseUnaryOp, Derived>, - Derived& + const CwiseUnaryOp, Derived>, + const Derived& >::ret ConjugateReturnType; /** \internal the return type of MatrixBase::real() */ typedef CwiseUnaryOp, Derived> RealReturnType; /** \internal the return type of MatrixBase::adjoint() */ - typedef Transpose::type> > + typedef Transpose::type> > AdjointReturnType; /** \internal the return type of MatrixBase::eigenvalues() */ typedef Matrix::Scalar>::Real, ei_traits::ColsAtCompileTime, 1> EigenvaluesReturnType; @@ -489,7 +489,7 @@ template class MatrixBase inline const NestByValue nestByValue() const; - const ConjugateReturnType conjugate() const; + ConjugateReturnType conjugate() const; const RealReturnType real() const; template diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 2774b3f3b..6d965e611 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -545,7 +545,7 @@ struct ei_cache_friendly_product_selector::size==1) ||((DestDerived::Flags&ActualPacketAccessBit) && (!(DestDerived::Flags & RowMajorBit))) }; - Scalar* __restrict__ _res; + Scalar* EIGEN_RESTRICT _res; if (EvalToRes) _res = &res.coeffRef(0); else @@ -590,7 +590,7 @@ struct ei_cache_friendly_product_selector::size==1) ||((DestDerived::Flags & ActualPacketAccessBit) && (DestDerived::Flags & RowMajorBit)) }; - Scalar* __restrict__ _res; + Scalar* EIGEN_RESTRICT _res; if (EvalToRes) _res = &res.coeffRef(0); else @@ -622,7 +622,7 @@ struct ei_cache_friendly_product_selector inline static void run(DestDerived& res, const ProductType& product) { - Scalar* __restrict__ _rhs; + Scalar* EIGEN_RESTRICT _rhs; if (UseRhsDirectly) _rhs = &product.rhs().const_cast_derived().coeffRef(0); else @@ -650,7 +650,7 @@ struct ei_cache_friendly_product_selector inline static void run(DestDerived& res, const ProductType& product) { - Scalar* __restrict__ _lhs; + Scalar* EIGEN_RESTRICT _lhs; if (UseLhsDirectly) _lhs = &product.lhs().const_cast_derived().coeffRef(0); else diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 51b7a8669..c50aef9a6 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -99,6 +99,8 @@ using Eigen::MatrixBase; #define EIGEN_ALIGN_128 #endif +#define EIGEN_RESTRICT __restrict + #define EIGEN_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \ template \ Derived& operator Op(const MatrixBase& other) \ diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 9d844d222..7e4e0fb82 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -224,12 +224,16 @@ template struct ei_must_nest_by_value > { enum { ret template::type> struct ei_nested { + enum { + CostEval = (n+1) * int(NumTraits::Scalar>::ReadCost), + CostNoEval = (n-1) * int(ei_traits::CoeffReadCost) + }; typedef typename ei_meta_if< ei_must_nest_by_value::ret, T, typename ei_meta_if< (int(ei_traits::Flags) & EvalBeforeNestingBit) - || ((n+1) * int(NumTraits::Scalar>::ReadCost) <= (n-1) * int(T::CoeffReadCost)), + || ( int(CostEval) <= int(CostNoEval) ), EvalType, const T& >::ret diff --git a/Eigen/src/LU/LU.h b/Eigen/src/LU/LU.h index 1267ec386..af385df29 100644 --- a/Eigen/src/LU/LU.h +++ b/Eigen/src/LU/LU.h @@ -71,6 +71,9 @@ template class LU MatrixType::MaxRowsAtCompileTime) }; + typedef Matrix KernelReturnType; + /** Constructor. * * \param matrix the matrix of which to compute the LU decomposition. @@ -165,9 +168,8 @@ template class LU * Output: \verbinclude LU_kernel.out * * \sa computeKernel() - */ const Matrix::MaxSmallDimAtCompileTime> kernel() const; + */ + const KernelReturnType kernel() const; /** This method finds a solution x to the equation Ax=b, where A is the matrix of which * *this is the LU decomposition, if any exists. @@ -408,9 +410,7 @@ void LU::computeKernel(Matrix -const Matrix::MaxSmallDimAtCompileTime> +const typename LU::KernelReturnType LU::kernel() const { Matrix::_compute(MatrixType& matA, CoeffVectorType& hCoeffs.end(n-i-1) += (h * Scalar(-0.5) * matA.col(i).end(n-i-1).dot(hCoeffs.end(n-i-1))) * matA.col(i).end(n-i-1); - const Scalar* __restrict__ pb = &matA.coeffRef(0,i); - const Scalar* __restrict__ pa = (&hCoeffs.coeffRef(0)) - 1; + const Scalar* EIGEN_RESTRICT pb = &matA.coeffRef(0,i); + const Scalar* EIGEN_RESTRICT pa = (&hCoeffs.coeffRef(0)) - 1; for (int j1=i+1; j1 void MandelbrotThread::render(int img_width, int img_hei // in which case we can stop iterating. int j = 0; typedef Eigen::Matrix Packeti; - Packeti pix_iter = Packeti::zero(), // number of iteration per pixel in the packet + Packeti pix_iter = Packeti::Zero(), // number of iteration per pixel in the packet pix_dont_diverge; // whether or not each pixel has already diverged do { @@ -93,7 +93,7 @@ template void MandelbrotThread::render(int img_width, int img_hei } pix_dont_diverge = ((pzr.cwise().square() + pzi.cwise().square()) .eval() // temporary fix as what follows is not yet vectorized by Eigen - .cwise() <= Packet::constant(4)) + .cwise() <= Packet::Constant(4)) // the 4 here is not a magic value, it's a math fact that if // the square modulus is >4 then divergence is inevitable. .template cast(); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 407893e3e..b853e65e1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,5 +1,6 @@ IF(BUILD_TESTS) + IF(CMAKE_COMPILER_IS_GNUCXX) IF(CMAKE_SYSTEM_NAME MATCHES Linux) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g2") @@ -7,6 +8,9 @@ IF(CMAKE_COMPILER_IS_GNUCXX) SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fno-inline-functions") SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g2") ENDIF(CMAKE_SYSTEM_NAME MATCHES Linux) + SET(EI_OFLAG "-O2") +ELSE(CMAKE_COMPILER_IS_GNUCXX) + SET(EI_OFLAG "") ENDIF(CMAKE_COMPILER_IS_GNUCXX) OPTION(EIGEN_NO_ASSERTION_CHECKING "Disable checking of assertions" OFF) @@ -38,11 +42,7 @@ MACRO(EI_ADD_TEST testname) SET(targetname test_${testname}) -# IF(${ARGC} EQUAL 2) -# SET(filename ${ARGV1}) -# ELSE(${ARGC} EQUAL 2) - SET(filename ${testname}.cpp) -# ENDIF(${ARGC} EQUAL 2) + SET(filename ${testname}.cpp) ADD_EXECUTABLE(${targetname} ${filename}) IF(NOT EIGEN_NO_ASSERTION_CHECKING) @@ -90,7 +90,7 @@ EI_ADD_TEST(basicstuff) EI_ADD_TEST(linearstructure) EI_ADD_TEST(cwiseop) EI_ADD_TEST(product_small) -EI_ADD_TEST(product_large "-O2") +EI_ADD_TEST(product_large ${EI_OFLAG}) EI_ADD_TEST(adjoint) EI_ADD_TEST(submatrices) EI_ADD_TEST(miscmatrices) @@ -99,7 +99,7 @@ EI_ADD_TEST(map) EI_ADD_TEST(array) EI_ADD_TEST(triangular) EI_ADD_TEST(cholesky) -EI_ADD_TEST(lu "-O2") +EI_ADD_TEST(lu ${EI_OFLAG}) EI_ADD_TEST(determinant) EI_ADD_TEST(inverse) EI_ADD_TEST(qr) diff --git a/test/adjoint.cpp b/test/adjoint.cpp index 3b779f14e..50ebb70dc 100644 --- a/test/adjoint.cpp +++ b/test/adjoint.cpp @@ -72,6 +72,7 @@ template void adjoint(const MatrixType& m) VERIFY_IS_MUCH_SMALLER_THAN(vzero.norm(), static_cast(1)); // check compatibility of dot and adjoint + // FIXME this line failed with MSVC and complex in the ei_aligned_free() VERIFY_IS_APPROX(v1.dot(square * v2), (square.adjoint() * v1).dot(v2)); // like in testBasicStuff, test operator() to check const-qualification