add bench_reverse, draft of a reverse vectorization for AltiVec, make

global Scaling function static
This commit is contained in:
Gael Guennebaud 2009-02-06 13:28:55 +00:00
parent f5d96df800
commit cc90495e30
3 changed files with 105 additions and 11 deletions

View File

@ -111,7 +111,7 @@ template<> inline v4i ei_pmul(const v4i& a, const v4i& b)
USE_CONST_v1i;
USE_CONST_v16i_;
// Get the absolute values
// Get the absolute values
a1 = vec_abs(a);
b1 = vec_abs(b);
@ -146,7 +146,7 @@ template<> inline v4f ei_pdiv(const v4f& a, const v4f& b) {
// Altivec does not offer a divide instruction, we have to do a reciprocal approximation
y_0 = vec_re(b);
// Do one Newton-Raphson iteration to get the needed accuracy
t = vec_nmsub(y_0, b, v1f);
y_1 = vec_madd(y_0, t, y_0);
@ -260,6 +260,17 @@ template<> inline int ei_pfirst(const v4i& a)
return ai[0];
}
template<> EIGEN_STRONG_INLINE v4f ei_preverse(const v4f& a)
{
static const unsigned char __attribute__(aligned(16)) reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
return (v4f)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
}
template<> EIGEN_STRONG_INLINE v4i ei_preverse(const v4i& a)
{
static const unsigned char __attribute__(aligned(16)) reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
return (v4i)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
}
inline v4f ei_preduxp(const v4f* vecs)
{
v4f v[4], sum[4];

View File

@ -120,28 +120,28 @@ public:
};
/** Constructs a uniform scaling from scale factor \a s */
inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
static inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
/** Constructs a uniform scaling from scale factor \a s */
inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
static inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
/** Constructs a uniform scaling from scale factor \a s */
template<typename RealScalar> inline UniformScaling<std::complex<RealScalar> >
Scaling(const std::complex<RealScalar>& s)
template<typename RealScalar>
static inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)
{ return UniformScaling<std::complex<RealScalar> >(s); }
/** Constructs a 2D axis aligned scaling */
template<typename Scalar> inline DiagonalMatrix<Scalar,2>
Scaling(Scalar sx, Scalar sy)
template<typename Scalar>
static inline DiagonalMatrix<Scalar,2> Scaling(Scalar sx, Scalar sy)
{ return DiagonalMatrix<Scalar,2>(sx, sy); }
/** Constructs a 3D axis aligned scaling */
template<typename Scalar> inline DiagonalMatrix<Scalar,3>
Scaling(Scalar sx, Scalar sy, Scalar sz)
template<typename Scalar>
static inline DiagonalMatrix<Scalar,3> Scaling(Scalar sx, Scalar sy, Scalar sz)
{ return DiagonalMatrix<Scalar,3>(sx, sy, sz); }
/** Constructs an axis aligned scaling expression from vector expression \a coeffs
* This is an alias for coeffs.asDiagonal()
*/
template<typename Derived>
inline const DiagonalMatrixWrapper<Derived> Scaling(const MatrixBase<Derived>& coeffs)
static inline const DiagonalMatrixWrapper<Derived> Scaling(const MatrixBase<Derived>& coeffs)
{ return coeffs.asDiagonal(); }
/** \addtogroup Geometry_Module */

83
bench/bench_reverse.cpp Normal file
View File

@ -0,0 +1,83 @@
#include <Eigen/Array>
#include <bench/BenchUtil.h>
using namespace Eigen;
#ifndef REPEAT
#define REPEAT 100000
#endif
#ifndef TRIES
#define TRIES 20
#endif
typedef double Scalar;
template <typename MatrixType>
__attribute__ ((noinline)) void bench_reverse(const MatrixType& m)
{
int rows = m.rows();
int cols = m.cols();
int size = m.size();
int repeats = (REPEAT*1000)/size;
MatrixType a = MatrixType::Random(rows,cols);
MatrixType b = MatrixType::Random(rows,cols);
BenchTimer timerB, timerH, timerV;
Scalar acc = 0;
int r = ei_random<int>(0,rows-1);
int c = ei_random<int>(0,cols-1);
for (int t=0; t<TRIES; ++t)
{
timerB.start();
for (int k=0; k<repeats; ++k)
{
asm("#begin foo");
b = a.reverse();
asm("#end foo");
acc += b.coeff(r,c);
}
timerB.stop();
}
if (MatrixType::RowsAtCompileTime==Dynamic)
std::cout << "dyn ";
else
std::cout << "fixed ";
std::cout << rows << " x " << cols << " \t"
<< (timerB.value() * REPEAT) / repeats << "s "
<< "(" << 1e-6 * size*repeats/timerB.value() << " MFLOPS)\t";
std::cout << "\n";
// make sure the compiler does not optimize too much
if (acc==123)
std::cout << acc;
}
int main(int argc, char* argv[])
{
const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,0};
std::cout << "size no sqrt standard";
// #ifdef BENCH_GSL
// std::cout << " GSL (standard + double + ATLAS) ";
// #endif
std::cout << "\n";
for (uint i=0; dynsizes[i]>0; ++i)
{
bench_reverse(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
bench_reverse(Matrix<Scalar,Dynamic,1>(dynsizes[i]*dynsizes[i]));
}
// bench_reverse(Matrix<Scalar,2,2>());
// bench_reverse(Matrix<Scalar,3,3>());
// bench_reverse(Matrix<Scalar,4,4>());
// bench_reverse(Matrix<Scalar,5,5>());
// bench_reverse(Matrix<Scalar,6,6>());
// bench_reverse(Matrix<Scalar,7,7>());
// bench_reverse(Matrix<Scalar,8,8>());
// bench_reverse(Matrix<Scalar,12,12>());
// bench_reverse(Matrix<Scalar,16,16>());
return 0;
}