mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-24 02:29:33 +08:00
give up on OpenMP... for now
This commit is contained in:
parent
acfd6f3bda
commit
6ae037dfb5
@ -10,13 +10,6 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_DONT_PARALLELIZE
|
||||
#ifdef _OPENMP
|
||||
#define EIGEN_USE_OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
|
@ -135,11 +135,6 @@ Derived& MatrixBase<Derived>
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T1, typename T2> bool ei_should_parallelize_assignment(const T1& t, const T2&)
|
||||
{
|
||||
return (T1::Flags & T2::Flags & LargeBit) && t.size() >= EIGEN_PARALLELIZATION_TRESHOLD;
|
||||
}
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct ei_assignment_impl<Derived, OtherDerived, false>
|
||||
{
|
||||
@ -158,23 +153,17 @@ struct ei_assignment_impl<Derived, OtherDerived, false>
|
||||
{
|
||||
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
|
||||
{
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int j = 0; j < dst.cols(); j++) \
|
||||
for(int i = 0; i < dst.rows(); i++) \
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
else
|
||||
{
|
||||
// traverse in row-major order
|
||||
// in order to allow the compiler to unroll the inner loop
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int i = 0; i < dst.rows(); i++) \
|
||||
for(int j = 0; j < dst.cols(); j++) \
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -199,21 +188,15 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
|
||||
{
|
||||
if(OtherDerived::Flags&RowMajorBit)
|
||||
{
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int i = 0; i < dst.rows(); i++) \
|
||||
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) \
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
}
|
||||
else
|
||||
{
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int j = 0; j < dst.cols(); j++) \
|
||||
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) \
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -280,75 +280,67 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
|
||||
{
|
||||
res.setZero();
|
||||
const int cols4 = m_lhs.cols() & 0xfffffffC;
|
||||
const bool should_parallelize = (Flags & DestDerived::Flags & LargeBit)
|
||||
&& res.size() >= EIGEN_PARALLELIZATION_TRESHOLD;
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
|
||||
{
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int k=0; k<this->cols(); k++) \
|
||||
{ \
|
||||
int j=0; \
|
||||
for(; j<cols4; j+=4) \
|
||||
{ \
|
||||
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k)); \
|
||||
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k)); \
|
||||
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k)); \
|
||||
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k)); \
|
||||
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) \
|
||||
{ \
|
||||
res.writePacketCoeff(i,k,\
|
||||
ei_padd( \
|
||||
res.packetCoeff(i,k), \
|
||||
ei_padd( \
|
||||
ei_padd( \
|
||||
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)), \
|
||||
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))), \
|
||||
ei_padd( \
|
||||
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)), \
|
||||
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3)) \
|
||||
) \
|
||||
) \
|
||||
) \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
for(; j<m_lhs.cols(); ++j) \
|
||||
{ \
|
||||
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k)); \
|
||||
for (int i=0; i<this->rows(); ++i) \
|
||||
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j))); \
|
||||
} \
|
||||
{
|
||||
for(int k=0; k<this->cols(); k++)
|
||||
{
|
||||
int j=0;
|
||||
for(; j<cols4; j+=4)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
|
||||
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
|
||||
{
|
||||
res.writePacketCoeff(i,k,\
|
||||
ei_padd(
|
||||
res.packetCoeff(i,k),
|
||||
ei_padd(
|
||||
ei_padd(
|
||||
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
|
||||
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
|
||||
ei_padd(
|
||||
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
|
||||
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(should_parallelize)
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
for(; j<m_lhs.cols(); ++j)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
|
||||
for (int i=0; i<this->rows(); ++i)
|
||||
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif // EIGEN_VECTORIZE
|
||||
{
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int k=0; k<this->cols(); ++k) \
|
||||
{ \
|
||||
int j=0; \
|
||||
for(; j<cols4; j+=4) \
|
||||
{ \
|
||||
const Scalar tmp0 = m_rhs.coeff(j ,k); \
|
||||
const Scalar tmp1 = m_rhs.coeff(j+1,k); \
|
||||
const Scalar tmp2 = m_rhs.coeff(j+2,k); \
|
||||
const Scalar tmp3 = m_rhs.coeff(j+3,k); \
|
||||
for (int i=0; i<this->rows(); ++i) \
|
||||
res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1) \
|
||||
+ tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3); \
|
||||
} \
|
||||
for(; j<m_lhs.cols(); ++j) \
|
||||
{ \
|
||||
const Scalar tmp = m_rhs.coeff(j,k); \
|
||||
for (int i=0; i<this->rows(); ++i) \
|
||||
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); \
|
||||
} \
|
||||
for(int k=0; k<this->cols(); ++k)
|
||||
{
|
||||
int j=0;
|
||||
for(; j<cols4; j+=4)
|
||||
{
|
||||
const Scalar tmp0 = m_rhs.coeff(j ,k);
|
||||
const Scalar tmp1 = m_rhs.coeff(j+1,k);
|
||||
const Scalar tmp2 = m_rhs.coeff(j+2,k);
|
||||
const Scalar tmp3 = m_rhs.coeff(j+3,k);
|
||||
for (int i=0; i<this->rows(); ++i)
|
||||
res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1)
|
||||
+ tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
|
||||
}
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(should_parallelize)
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
for(; j<m_lhs.cols(); ++j)
|
||||
{
|
||||
const Scalar tmp = m_rhs.coeff(j,k);
|
||||
for (int i=0; i<this->rows(); ++i)
|
||||
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,10 +37,6 @@
|
||||
#define EIGEN_UNROLLING_LIMIT 400
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_PARALLELIZATION_TRESHOLD
|
||||
#define EIGEN_PARALLELIZATION_TRESHOLD 2000
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER RowMajorBit
|
||||
#else
|
||||
@ -78,30 +74,6 @@ using Eigen::MatrixBase;
|
||||
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_USE_OPENMP
|
||||
# ifdef __INTEL_COMPILER
|
||||
# define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none) shared(other)")
|
||||
# else
|
||||
# define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none)")
|
||||
# endif
|
||||
# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) \
|
||||
if(condition) \
|
||||
{ \
|
||||
EIGEN_PRAGMA_OMP_PARALLEL \
|
||||
{ \
|
||||
_Pragma("omp for") \
|
||||
EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
}
|
||||
#else // EIGEN_USE_OPENMP
|
||||
# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
#endif
|
||||
|
||||
|
||||
// FIXME with the always_inline attribute,
|
||||
// gcc 3.4.x reports the following compilation error:
|
||||
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
|
||||
|
@ -92,7 +92,6 @@ template<typename MatrixType, bool CheckExistence> class Inverse : ei_no_assignm
|
||||
enum { _Size = MatrixType::RowsAtCompileTime };
|
||||
void _compute(const MatrixType& matrix);
|
||||
void _compute_in_general_case(const MatrixType& matrix);
|
||||
void _compute_in_size1_case(const MatrixType& matrix);
|
||||
void _compute_in_size2_case(const MatrixType& matrix);
|
||||
void _compute_in_size3_case(const MatrixType& matrix);
|
||||
void _compute_in_size4_case(const MatrixType& matrix);
|
||||
|
@ -5,12 +5,12 @@
|
||||
using namespace std;
|
||||
USING_PART_OF_NAMESPACE_EIGEN
|
||||
|
||||
#ifndef MATTYPE
|
||||
#define MATTYPE MatrixXLd
|
||||
#ifndef VECTYPE
|
||||
#define VECTYPE VectorXLd
|
||||
#endif
|
||||
|
||||
#ifndef MATSIZE
|
||||
#define MATSIZE 1000000
|
||||
#ifndef VECSIZE
|
||||
#define VECSIZE 1000000
|
||||
#endif
|
||||
|
||||
#ifndef REPEAT
|
||||
@ -19,16 +19,16 @@ USING_PART_OF_NAMESPACE_EIGEN
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
MATTYPE I = MATTYPE::ones(MATSIZE,1);
|
||||
MATTYPE m(MATSIZE,1);
|
||||
for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < 1; j++)
|
||||
VECTYPE I = VECTYPE::ones(VECSIZE);
|
||||
VECTYPE m(VECSIZE,1);
|
||||
for(int i = 0; i < VECSIZE; i++)
|
||||
{
|
||||
m(i,j) = 0.1 * (i+j+1)/MATSIZE/MATSIZE;
|
||||
m[i] = 0.1 * i/VECSIZE;
|
||||
}
|
||||
for(int a = 0; a < REPEAT; a++)
|
||||
{
|
||||
m = MATTYPE::ones(MATSIZE,1) + 0.00005 * (m.cwiseProduct(m) + m/4);
|
||||
m = VECTYPE::ones(VECSIZE) + 0.00005 * (m.cwiseProduct(m) + m/4);
|
||||
}
|
||||
cout << m(0,0) << endl;
|
||||
cout << m[0] << endl;
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user