Fix a couple of issue with the vectorization. In particular, default ei_p* functions

are provided to handle not suported types seemlessly.

Added a generic null-ary expression with null-ary functors. They replace
Zero, Ones, Identity and Random.
This commit is contained in:
Gael Guennebaud 2008-04-24 18:35:39 +00:00
parent 6ae037dfb5
commit 9385793f71
15 changed files with 109 additions and 696 deletions

View File

@ -36,17 +36,14 @@ namespace Eigen {
#include "src/Core/Temporary.h" #include "src/Core/Temporary.h"
#include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseBinaryOp.h"
#include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseUnaryOp.h"
#include "src/Core/CwiseNullaryOp.h"
#include "src/Core/Product.h" #include "src/Core/Product.h"
#include "src/Core/Block.h" #include "src/Core/Block.h"
#include "src/Core/Minor.h" #include "src/Core/Minor.h"
#include "src/Core/Transpose.h" #include "src/Core/Transpose.h"
#include "src/Core/Dot.h" #include "src/Core/Dot.h"
#include "src/Core/Random.h"
#include "src/Core/Zero.h"
#include "src/Core/Ones.h"
#include "src/Core/DiagonalMatrix.h" #include "src/Core/DiagonalMatrix.h"
#include "src/Core/DiagonalCoeffs.h" #include "src/Core/DiagonalCoeffs.h"
#include "src/Core/Identity.h"
#include "src/Core/Redux.h" #include "src/Core/Redux.h"
#include "src/Core/Visitor.h" #include "src/Core/Visitor.h"
#include "src/Core/Fuzzy.h" #include "src/Core/Fuzzy.h"

View File

@ -71,7 +71,7 @@ struct ei_traits<Block<MatrixType, BlockRows, BlockCols> >
|| (ColsAtCompileTime != Dynamic && MatrixType::ColsAtCompileTime == Dynamic)) || (ColsAtCompileTime != Dynamic && MatrixType::ColsAtCompileTime == Dynamic))
? ~LargeBit ? ~LargeBit
: ~(unsigned int)0, : ~(unsigned int)0,
Flags = MatrixType::Flags & FlagsMaskLargeBit & ~VectorizableBit, Flags = MatrixType::Flags & FlagsMaskLargeBit & ~(VectorizableBit | Like1DArrayBit),
CoeffReadCost = MatrixType::CoeffReadCost CoeffReadCost = MatrixType::CoeffReadCost
}; };
}; };

View File

@ -54,7 +54,7 @@ struct ei_traits<DiagonalCoeffs<MatrixType> >
MaxColsAtCompileTime = 1, MaxColsAtCompileTime = 1,
Flags = (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic Flags = (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic
? (unsigned int)MatrixType::Flags ? (unsigned int)MatrixType::Flags
: (unsigned int)MatrixType::Flags &~ LargeBit) & ~VectorizableBit, : (unsigned int)MatrixType::Flags &~ LargeBit) & ~(VectorizableBit | Like1DArrayBit),
CoeffReadCost = MatrixType::CoeffReadCost CoeffReadCost = MatrixType::CoeffReadCost
}; };
}; };

View File

@ -47,7 +47,7 @@ struct ei_traits<DiagonalMatrix<CoeffsVectorType> >
ColsAtCompileTime = CoeffsVectorType::SizeAtCompileTime, ColsAtCompileTime = CoeffsVectorType::SizeAtCompileTime,
MaxRowsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime, MaxRowsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime,
MaxColsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime, MaxColsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime,
Flags = CoeffsVectorType::Flags & ~VectorizableBit, Flags = CoeffsVectorType::Flags & ~(VectorizableBit | Like1DArrayBit),
CoeffReadCost = CoeffsVectorType::CoeffReadCost CoeffReadCost = CoeffsVectorType::CoeffReadCost
}; };
}; };

View File

@ -340,6 +340,46 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_pow_op<Scalar> > struct ei_functor_traits<ei_scalar_pow_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; }; { enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
// nullary functors
template<typename Scalar, bool IsVectorizable = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_constant_op;
template<typename Scalar>
struct ei_scalar_constant_op<Scalar,true> {
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
ei_scalar_constant_op(const Scalar& other) : m_other(ei_pset1(other)) { }
Scalar operator() (int, int) const { return ei_pfirst(m_other); }
PacketScalar packetOp() const
{ return m_other; }
const PacketScalar m_other;
};
template<typename Scalar>
struct ei_scalar_constant_op<Scalar,false> {
ei_scalar_constant_op(const Scalar& other) : m_other(other) { }
Scalar operator() (int, int) const { return m_other; }
const Scalar m_other;
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_constant_op<Scalar> >
{ enum { Cost = 1, IsVectorizable = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
template<typename Scalar> struct ei_scalar_random_op EIGEN_EMPTY_STRUCT {
ei_scalar_random_op(void) {}
Scalar operator() (int, int) const { return ei_random<Scalar>(); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_random_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false, IsRepeatable = false }; };
template<typename Scalar> struct ei_scalar_identity_op EIGEN_EMPTY_STRUCT {
ei_scalar_identity_op(void) {}
Scalar operator() (int row, int col) const { return row==col ? Scalar(1) : Scalar(0); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_identity_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, IsVectorizable = false, IsRepeatable = true }; };
// default ei_functor_traits for STL functors: // default ei_functor_traits for STL functors:

View File

@ -1,161 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_IDENTITY_H
#define EIGEN_IDENTITY_H
/** \class Identity
*
* \brief Expression of the identity matrix of some size.
*
* \sa MatrixBase::identity(), MatrixBase::identity(int,int), MatrixBase::setIdentity()
*/
template<typename MatrixType>
struct ei_traits<Identity<MatrixType> >
{
typedef typename MatrixType::Scalar Scalar;
enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = MatrixType::Flags & ~VectorizableBit,
CoeffReadCost = NumTraits<Scalar>::ReadCost
};
};
template<typename MatrixType> class Identity : ei_no_assignment_operator,
public MatrixBase<Identity<MatrixType> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Identity)
Identity(int rows, int cols) : m_rows(rows), m_cols(cols)
{
ei_assert(rows > 0
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols > 0
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
}
private:
int _rows() const { return m_rows.value(); }
int _cols() const { return m_cols.value(); }
const Scalar _coeff(int row, int col) const
{
return row == col ? static_cast<Scalar>(1) : static_cast<Scalar>(0);
}
protected:
const ei_int_if_dynamic<RowsAtCompileTime> m_rows;
const ei_int_if_dynamic<ColsAtCompileTime> m_cols;
};
/** \returns an expression of the identity matrix (not necessarily square).
*
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so identity() should be used
* instead.
*
* Example: \include MatrixBase_identity_int_int.cpp
* Output: \verbinclude MatrixBase_identity_int_int.out
*
* \sa identity(), setIdentity(), isIdentity()
*/
template<typename Derived>
const Identity<Derived> MatrixBase<Derived>::identity(int rows, int cols)
{
return Identity<Derived>(rows, cols);
}
/** \returns an expression of the identity matrix (not necessarily square).
*
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
* need to use the variant taking size arguments.
*
* Example: \include MatrixBase_identity.cpp
* Output: \verbinclude MatrixBase_identity.out
*
* \sa identity(int,int), setIdentity(), isIdentity()
*/
template<typename Derived>
const Identity<Derived> MatrixBase<Derived>::identity()
{
return Identity<Derived>(RowsAtCompileTime, ColsAtCompileTime);
}
/** \returns true if *this is approximately equal to the identity matrix
* (not necessarily square),
* within the precision given by \a prec.
*
* Example: \include MatrixBase_isIdentity.cpp
* Output: \verbinclude MatrixBase_isIdentity.out
*
* \sa class Identity, identity(), identity(int,int), setIdentity()
*/
template<typename Derived>
bool MatrixBase<Derived>::isIdentity
(typename NumTraits<Scalar>::Real prec) const
{
for(int j = 0; j < cols(); j++)
{
for(int i = 0; i < rows(); i++)
{
if(i == j)
{
if(!ei_isApprox(coeff(i, j), static_cast<Scalar>(1), prec))
return false;
}
else
{
if(!ei_isMuchSmallerThan(coeff(i, j), static_cast<RealScalar>(1), prec))
return false;
}
}
}
return true;
}
/** Writes the identity expression (not necessarily square) into *this.
*
* Example: \include MatrixBase_setIdentity.cpp
* Output: \verbinclude MatrixBase_setIdentity.out
*
* \sa class Identity, identity(), identity(int,int), isIdentity()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setIdentity()
{
return *this = Identity<Derived>(rows(), cols());
}
#endif // EIGEN_IDENTITY_H

View File

@ -339,20 +339,38 @@ template<typename Derived> class MatrixBase
/// \name Generating special matrices /// \name Generating special matrices
//@{ //@{
static const Random<Derived> random(int rows, int cols); static const CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived>
static const Random<Derived> random(int size); constant(int rows, int cols, const Scalar& value);
static const Random<Derived> random(); static const CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived>
static const Zero<Derived> zero(int rows, int cols); constant(int size, const Scalar& value);
static const Zero<Derived> zero(int size); static const CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived>
static const Zero<Derived> zero(); constant(const Scalar& value);
static const Ones<Derived> ones(int rows, int cols);
static const Ones<Derived> ones(int size); template<typename CustomZeroaryOp>
static const Ones<Derived> ones(); static const CwiseNullaryOp<CustomZeroaryOp, Derived>
static const Identity<Derived> identity(); cwiseCreate(int rows, int cols, const CustomZeroaryOp& func);
static const Identity<Derived> identity(int rows, int cols); template<typename CustomZeroaryOp>
static const CwiseNullaryOp<CustomZeroaryOp, Derived>
cwiseCreate(int size, const CustomZeroaryOp& func);
template<typename CustomZeroaryOp>
static const CwiseNullaryOp<CustomZeroaryOp, Derived>
cwiseCreate(const CustomZeroaryOp& func);
static const CwiseNullaryOp<ei_scalar_random_op<Scalar>,Derived> random(int rows, int cols);
static const CwiseNullaryOp<ei_scalar_random_op<Scalar>,Derived> random(int size);
static const CwiseNullaryOp<ei_scalar_random_op<Scalar>,Derived> random();
static const CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> zero(int rows, int cols);
static const CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> zero(int size);
static const CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> zero();
static const CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ones(int rows, int cols);
static const CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ones(int size);
static const CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ones();
static const CwiseNullaryOp<ei_scalar_identity_op<Scalar>,Derived> identity();
static const CwiseNullaryOp<ei_scalar_identity_op<Scalar>,Derived> identity(int rows, int cols);
const DiagonalMatrix<Derived> asDiagonal() const; const DiagonalMatrix<Derived> asDiagonal() const;
Derived& setConstant(const Scalar& value);
Derived& setZero(); Derived& setZero();
Derived& setOnes(); Derived& setOnes();
Derived& setRandom(); Derived& setRandom();
@ -370,6 +388,7 @@ template<typename Derived> class MatrixBase
bool isMuchSmallerThan(const MatrixBase<OtherDerived>& other, bool isMuchSmallerThan(const MatrixBase<OtherDerived>& other,
RealScalar prec = precision<Scalar>()) const; RealScalar prec = precision<Scalar>()) const;
bool isEqualToConstant(const Scalar& value, RealScalar prec = precision<Scalar>()) const;
bool isZero(RealScalar prec = precision<Scalar>()) const; bool isZero(RealScalar prec = precision<Scalar>()) const;
bool isOnes(RealScalar prec = precision<Scalar>()) const; bool isOnes(RealScalar prec = precision<Scalar>()) const;
bool isIdentity(RealScalar prec = precision<Scalar>()) const; bool isIdentity(RealScalar prec = precision<Scalar>()) const;

View File

@ -1,172 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_ONES_H
#define EIGEN_ONES_H
/** \class Ones
*
* \brief Expression of a matrix where all coefficients equal one.
*
* \sa MatrixBase::ones(), MatrixBase::ones(int), MatrixBase::ones(int,int),
* MatrixBase::setOnes(), MatrixBase::isOnes()
*/
template<typename MatrixType>
struct ei_traits<Ones<MatrixType> >
{
typedef typename MatrixType::Scalar Scalar;
enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = MatrixType::Flags & ~VectorizableBit,
CoeffReadCost = NumTraits<Scalar>::ReadCost
};
};
template<typename MatrixType> class Ones : ei_no_assignment_operator,
public MatrixBase<Ones<MatrixType> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Ones)
private:
int _rows() const { return m_rows.value(); }
int _cols() const { return m_cols.value(); }
const Scalar _coeff(int, int) const
{
return static_cast<Scalar>(1);
}
public:
Ones(int rows, int cols) : m_rows(rows), m_cols(cols)
{
ei_assert(rows > 0
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols > 0
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
}
protected:
const ei_int_if_dynamic<RowsAtCompileTime> m_rows;
const ei_int_if_dynamic<ColsAtCompileTime> m_cols;
};
/** \returns an expression of a matrix where all coefficients equal one.
*
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so ones() should be used
* instead.
*
* Example: \include MatrixBase_ones_int_int.cpp
* Output: \verbinclude MatrixBase_ones_int_int.out
*
* \sa ones(), ones(int), isOnes(), class Ones
*/
template<typename Derived>
const Ones<Derived> MatrixBase<Derived>::ones(int rows, int cols)
{
return Ones<Derived>(rows, cols);
}
/** \returns an expression of a vector where all coefficients equal one.
*
* The parameter \a size is the size of the returned vector.
* Must be compatible with this MatrixBase type.
*
* \only_for_vectors
*
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
* it is redundant to pass \a size as argument, so ones() should be used
* instead.
*
* Example: \include MatrixBase_ones_int.cpp
* Output: \verbinclude MatrixBase_ones_int.out
*
* \sa ones(), ones(int,int), isOnes(), class Ones
*/
template<typename Derived>
const Ones<Derived> MatrixBase<Derived>::ones(int size)
{
ei_assert(IsVectorAtCompileTime);
if(RowsAtCompileTime == 1) return Ones<Derived>(1, size);
else return Ones<Derived>(size, 1);
}
/** \returns an expression of a fixed-size matrix or vector where all coefficients equal one.
*
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
* need to use the variants taking size arguments.
*
* Example: \include MatrixBase_ones.cpp
* Output: \verbinclude MatrixBase_ones.out
*
* \sa ones(int), ones(int,int), isOnes(), class Ones
*/
template<typename Derived>
const Ones<Derived> MatrixBase<Derived>::ones()
{
return Ones<Derived>(RowsAtCompileTime, ColsAtCompileTime);
}
/** \returns true if *this is approximately equal to the matrix where all coefficients
* are equal to 1, within the precision given by \a prec.
*
* Example: \include MatrixBase_isOnes.cpp
* Output: \verbinclude MatrixBase_isOnes.out
*
* \sa class Ones, ones()
*/
template<typename Derived>
bool MatrixBase<Derived>::isOnes
(typename NumTraits<Scalar>::Real prec) const
{
for(int j = 0; j < cols(); j++)
for(int i = 0; i < rows(); i++)
if(!ei_isApprox(coeff(i, j), static_cast<Scalar>(1), prec))
return false;
return true;
}
/** Sets all coefficients in this expression to one.
*
* Example: \include MatrixBase_setOnes.cpp
* Output: \verbinclude MatrixBase_setOnes.out
*
* \sa class Ones, ones()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setOnes()
{
return *this = Ones<Derived>(rows(), cols());
}
#endif // EIGEN_ONES_H

View File

@ -25,6 +25,21 @@
#ifndef EIGEN_PACKET_MATH_H #ifndef EIGEN_PACKET_MATH_H
#define EIGEN_PACKET_MATH_H #define EIGEN_PACKET_MATH_H
// Default implementation for types not supported by the vectorization.
// In practice these functions are provided to make easier the writting
// of generic vectorized code. However, at runtime, they should never be
// called, TODO so sould we raise an assertion or not ?
template <typename Scalar> inline Scalar ei_padd(const Scalar& a, const Scalar& b) { return a + b; }
template <typename Scalar> inline Scalar ei_psub(const Scalar& a, const Scalar& b) { return a - b; }
template <typename Scalar> inline Scalar ei_pmul(const Scalar& a, const Scalar& b) { return a * b; }
template <typename Scalar> inline Scalar ei_pmin(const Scalar& a, const Scalar& b) { return std::min(a,b); }
template <typename Scalar> inline Scalar ei_pmax(const Scalar& a, const Scalar& b) { return std::max(a,b); }
template <typename Scalar> inline Scalar ei_pload(const Scalar* from) { return *from; }
template <typename Scalar> inline Scalar ei_pload1(const Scalar* from) { return *from; }
template <typename Scalar> inline Scalar ei_pset1(const Scalar& from) { return from; }
template <typename Scalar> inline void ei_pstore(Scalar* to, const Scalar& from) { (*to) = from; }
template <typename Scalar> inline Scalar ei_pfirst(const Scalar& a) { return a; }
#ifdef EIGEN_VECTORIZE_SSE #ifdef EIGEN_VECTORIZE_SSE
template<> struct ei_packet_traits<float> { typedef __m128 type; enum {size=4}; }; template<> struct ei_packet_traits<float> { typedef __m128 type; enum {size=4}; };
@ -41,10 +56,17 @@ inline __m128i ei_psub(const __m128i& a, const __m128i& b) { return _mm_sub_epi3
inline __m128 ei_pmul(const __m128& a, const __m128& b) { return _mm_mul_ps(a,b); } inline __m128 ei_pmul(const __m128& a, const __m128& b) { return _mm_mul_ps(a,b); }
inline __m128d ei_pmul(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); } inline __m128d ei_pmul(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); }
inline __m128i ei_pmul(const __m128i& a, const __m128i& b) { return _mm_mul_epu32(a,b); } inline __m128i ei_pmul(const __m128i& a, const __m128i& b)
{
return _mm_or_si128(
_mm_mul_epu32(a,b),
_mm_slli_si128(
_mm_mul_epu32(_mm_srli_si128(a,32),_mm_srli_si128(b,32)), 32));
}
inline __m128 ei_pmin(const __m128& a, const __m128& b) { return _mm_min_ps(a,b); } inline __m128 ei_pmin(const __m128& a, const __m128& b) { return _mm_min_ps(a,b); }
inline __m128d ei_pmin(const __m128d& a, const __m128d& b) { return _mm_min_pd(a,b); } inline __m128d ei_pmin(const __m128d& a, const __m128d& b) { return _mm_min_pd(a,b); }
// FIXME this vectorized min operator is likely to be slower than the standard one
inline __m128i ei_pmin(const __m128i& a, const __m128i& b) inline __m128i ei_pmin(const __m128i& a, const __m128i& b)
{ {
__m128i mask = _mm_cmplt_epi32(a,b); __m128i mask = _mm_cmplt_epi32(a,b);
@ -53,6 +75,7 @@ inline __m128i ei_pmin(const __m128i& a, const __m128i& b)
inline __m128 ei_pmax(const __m128& a, const __m128& b) { return _mm_max_ps(a,b); } inline __m128 ei_pmax(const __m128& a, const __m128& b) { return _mm_max_ps(a,b); }
inline __m128d ei_pmax(const __m128d& a, const __m128d& b) { return _mm_max_pd(a,b); } inline __m128d ei_pmax(const __m128d& a, const __m128d& b) { return _mm_max_pd(a,b); }
// FIXME this vectorized max operator is likely to be slower than the standard one
inline __m128i ei_pmax(const __m128i& a, const __m128i& b) inline __m128i ei_pmax(const __m128i& a, const __m128i& b)
{ {
__m128i mask = _mm_cmpgt_epi32(a,b); __m128i mask = _mm_cmpgt_epi32(a,b);
@ -61,7 +84,7 @@ inline __m128i ei_pmax(const __m128i& a, const __m128i& b)
inline __m128 ei_pload(const float* from) { return _mm_load_ps(from); } inline __m128 ei_pload(const float* from) { return _mm_load_ps(from); }
inline __m128d ei_pload(const double* from) { return _mm_load_pd(from); } inline __m128d ei_pload(const double* from) { return _mm_load_pd(from); }
inline __m128i ei_pload(const __m128i* from) { return _mm_load_si128(from); } inline __m128i ei_pload(const int* from) { return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
inline __m128 ei_pload1(const float* from) { return _mm_load1_ps(from); } inline __m128 ei_pload1(const float* from) { return _mm_load1_ps(from); }
inline __m128d ei_pload1(const double* from) { return _mm_load1_pd(from); } inline __m128d ei_pload1(const double* from) { return _mm_load1_pd(from); }
@ -71,9 +94,9 @@ inline __m128 ei_pset1(const float& from) { return _mm_set1_ps(from); }
inline __m128d ei_pset1(const double& from) { return _mm_set1_pd(from); } inline __m128d ei_pset1(const double& from) { return _mm_set1_pd(from); }
inline __m128i ei_pset1(const int& from) { return _mm_set1_epi32(from); } inline __m128i ei_pset1(const int& from) { return _mm_set1_epi32(from); }
inline void ei_pstore(float* to, const __m128& from) { _mm_store_ps(to, from); } inline void ei_pstore(float* to, const __m128& from) { _mm_store_ps(to, from); }
inline void ei_pstore(double* to, const __m128d& from) { _mm_store_pd(to, from); } inline void ei_pstore(double* to, const __m128d& from) { _mm_store_pd(to, from); }
inline void ei_pstore(__m128i* to, const __m128i& from) { _mm_store_si128(to, from); } inline void ei_pstore(int* to, const __m128i& from) { _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
inline float ei_pfirst(const __m128& a) { return _mm_cvtss_f32(a); } inline float ei_pfirst(const __m128& a) { return _mm_cvtss_f32(a); }
inline double ei_pfirst(const __m128d& a) { return _mm_cvtsd_f64(a); } inline double ei_pfirst(const __m128d& a) { return _mm_cvtsd_f64(a); }

View File

@ -213,7 +213,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1, ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1,
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
? Lhs::ColsAtCompileTime : Dynamic, ? Lhs::ColsAtCompileTime : Dynamic,
Lhs, Rhs, PacketScalar> _LhsNested, _RhsNested, PacketScalar>
::run(row, col, m_lhs, m_rhs, res); ::run(row, col, m_lhs, m_rhs, res);
} }
else else
@ -282,7 +282,7 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
const int cols4 = m_lhs.cols() & 0xfffffffC; const int cols4 = m_lhs.cols() & 0xfffffffC;
#ifdef EIGEN_VECTORIZE #ifdef EIGEN_VECTORIZE
if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) ) if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
{ {
for(int k=0; k<this->cols(); k++) for(int k=0; k<this->cols(); k++)
{ {
int j=0; int j=0;

View File

@ -1,158 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_RANDOM_H
#define EIGEN_RANDOM_H
/** \class Random
*
* \brief Expression of a random matrix or vector.
*
* \sa MatrixBase::random(), MatrixBase::random(int), MatrixBase::random(int,int),
* MatrixBase::setRandom()
*/
template<typename MatrixType>
struct ei_traits<Random<MatrixType> >
{
typedef typename ei_traits<MatrixType>::Scalar Scalar;
enum {
RowsAtCompileTime = ei_traits<MatrixType>::RowsAtCompileTime,
ColsAtCompileTime = ei_traits<MatrixType>::ColsAtCompileTime,
MaxRowsAtCompileTime = ei_traits<MatrixType>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ei_traits<MatrixType>::MaxColsAtCompileTime,
Flags = (ei_traits<MatrixType>::Flags | EvalBeforeNestingBit) & ~VectorizableBit,
CoeffReadCost = 2 * NumTraits<Scalar>::MulCost // FIXME: arbitrary value
};
};
template<typename MatrixType> class Random : ei_no_assignment_operator,
public MatrixBase<Random<MatrixType> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Random)
private:
int _rows() const { return m_rows.value(); }
int _cols() const { return m_cols.value(); }
const Scalar _coeff(int, int) const
{
return ei_random<Scalar>();
}
public:
Random(int rows, int cols) : m_rows(rows), m_cols(cols)
{
ei_assert(rows > 0
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols > 0
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
}
protected:
const ei_int_if_dynamic<RowsAtCompileTime> m_rows;
const ei_int_if_dynamic<ColsAtCompileTime> m_cols;
};
/** \returns a random matrix (not an expression, the matrix is immediately evaluated).
*
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so ei_random() should be used
* instead.
*
* Example: \include MatrixBase_random_int_int.cpp
* Output: \verbinclude MatrixBase_random_int_int.out
*
* \sa ei_random(), ei_random(int)
*/
template<typename Derived>
const Random<Derived>
MatrixBase<Derived>::random(int rows, int cols)
{
return Random<Derived>(rows, cols);
}
/** \returns a random vector (not an expression, the vector is immediately evaluated).
*
* The parameter \a size is the size of the returned vector.
* Must be compatible with this MatrixBase type.
*
* \only_for_vectors
*
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
* it is redundant to pass \a size as argument, so ei_random() should be used
* instead.
*
* Example: \include MatrixBase_random_int.cpp
* Output: \verbinclude MatrixBase_random_int.out
*
* \sa ei_random(), ei_random(int,int)
*/
template<typename Derived>
const Random<Derived>
MatrixBase<Derived>::random(int size)
{
ei_assert(IsVectorAtCompileTime);
if(RowsAtCompileTime == 1) return Random<Derived>(1, size);
else return Random<Derived>(size, 1);
}
/** \returns a fixed-size random matrix or vector
* (not an expression, the matrix is immediately evaluated).
*
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
* need to use the variants taking size arguments.
*
* Example: \include MatrixBase_random.cpp
* Output: \verbinclude MatrixBase_random.out
*
* \sa ei_random(int), ei_random(int,int)
*/
template<typename Derived>
const Random<Derived>
MatrixBase<Derived>::random()
{
return Random<Derived>(RowsAtCompileTime, ColsAtCompileTime);
}
/** Sets all coefficients in this expression to random values.
*
* Example: \include MatrixBase_setRandom.cpp
* Output: \verbinclude MatrixBase_setRandom.out
*
* \sa class Random, ei_random()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setRandom()
{
return *this = Random<Derived>(rows(), cols());
}
#endif // EIGEN_RANDOM_H

View File

@ -1,173 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_ZERO_H
#define EIGEN_ZERO_H
/** \class Zero
*
* \brief Expression of a zero matrix or vector.
*
* \sa MatrixBase::zero(), MatrixBase::zero(int), MatrixBase::zero(int,int),
* MatrixBase::setZero(), MatrixBase::isZero()
*/
template<typename MatrixType>
struct ei_traits<Zero<MatrixType> >
{
typedef typename MatrixType::Scalar Scalar;
enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = MatrixType::Flags & ~VectorizableBit,
CoeffReadCost = NumTraits<Scalar>::ReadCost
};
};
template<typename MatrixType> class Zero : ei_no_assignment_operator,
public MatrixBase<Zero<MatrixType> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Zero)
private:
int _rows() const { return m_rows.value(); }
int _cols() const { return m_cols.value(); }
Scalar _coeff(int, int) const
{
return static_cast<Scalar>(0);
}
public:
Zero(int rows, int cols) : m_rows(rows), m_cols(cols)
{
ei_assert(rows > 0
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols > 0
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
}
protected:
const ei_int_if_dynamic<RowsAtCompileTime> m_rows;
const ei_int_if_dynamic<ColsAtCompileTime> m_cols;
};
/** \returns an expression of a zero matrix.
*
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so zero() should be used
* instead.
*
* Example: \include MatrixBase_zero_int_int.cpp
* Output: \verbinclude MatrixBase_zero_int_int.out
*
* \sa zero(), zero(int)
*/
template<typename Derived>
const Zero<Derived> MatrixBase<Derived>::zero(int rows, int cols)
{
return Zero<Derived>(rows, cols);
}
/** \returns an expression of a zero vector.
*
* The parameter \a size is the size of the returned vector.
* Must be compatible with this MatrixBase type.
*
* \only_for_vectors
*
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
* it is redundant to pass \a size as argument, so zero() should be used
* instead.
*
* Example: \include MatrixBase_zero_int.cpp
* Output: \verbinclude MatrixBase_zero_int.out
*
* \sa zero(), zero(int,int)
*/
template<typename Derived>
const Zero<Derived> MatrixBase<Derived>::zero(int size)
{
ei_assert(IsVectorAtCompileTime);
if(RowsAtCompileTime == 1) return Zero<Derived>(1, size);
else return Zero<Derived>(size, 1);
}
/** \returns an expression of a fixed-size zero matrix or vector.
*
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
* need to use the variants taking size arguments.
*
* Example: \include MatrixBase_zero.cpp
* Output: \verbinclude MatrixBase_zero.out
*
* \sa zero(int), zero(int,int)
*/
template<typename Derived>
const Zero<Derived> MatrixBase<Derived>::zero()
{
return Zero<Derived>(RowsAtCompileTime, ColsAtCompileTime);
}
/** \returns true if *this is approximately equal to the zero matrix,
* within the precision given by \a prec.
*
* Example: \include MatrixBase_isZero.cpp
* Output: \verbinclude MatrixBase_isZero.out
*
* \sa class Zero, zero()
*/
template<typename Derived>
bool MatrixBase<Derived>::isZero
(typename NumTraits<Scalar>::Real prec) const
{
for(int j = 0; j < cols(); j++)
for(int i = 0; i < rows(); i++)
if(!ei_isMuchSmallerThan(coeff(i, j), static_cast<Scalar>(1), prec))
return false;
return true;
}
/** Sets all coefficients in this expression to zero.
*
* Example: \include MatrixBase_setZero.cpp
* Output: \verbinclude MatrixBase_setZero.out
*
* \sa class Zero, zero()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setZero()
{
return *this = Zero<Derived>(rows(), cols());
}
#endif // EIGEN_ZERO_H

View File

@ -38,6 +38,7 @@ const unsigned int VectorizableBit = 0x10;
#else #else
const unsigned int VectorizableBit = 0x0; const unsigned int VectorizableBit = 0x0;
#endif #endif
const unsigned int Like1DArrayBit = 0x20;
enum { ConditionalJumpCost = 5 }; enum { ConditionalJumpCost = 5 };
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight }; enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };

View File

@ -40,15 +40,12 @@ template<typename MatrixType> class Minor;
template<typename MatrixType, int BlockRows=Dynamic, int BlockCols=Dynamic> class Block; template<typename MatrixType, int BlockRows=Dynamic, int BlockCols=Dynamic> class Block;
template<typename MatrixType> class Transpose; template<typename MatrixType> class Transpose;
template<typename MatrixType> class Conjugate; template<typename MatrixType> class Conjugate;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp; template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp; template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename Lhs, typename Rhs, int EvalMode=ei_product_eval_mode<Lhs,Rhs>::value> class Product; template<typename Lhs, typename Rhs, int EvalMode=ei_product_eval_mode<Lhs,Rhs>::value> class Product;
template<typename MatrixType> class Random;
template<typename MatrixType> class Zero;
template<typename MatrixType> class Ones;
template<typename CoeffsVectorType> class DiagonalMatrix; template<typename CoeffsVectorType> class DiagonalMatrix;
template<typename MatrixType> class DiagonalCoeffs; template<typename MatrixType> class DiagonalCoeffs;
template<typename MatrixType> class Identity;
template<typename MatrixType> class Map; template<typename MatrixType> class Map;
template<typename Derived> class Eval; template<typename Derived> class Eval;
template<int Direction, typename UnaryOp, typename MatrixType> class PartialRedux; template<int Direction, typename UnaryOp, typename MatrixType> class PartialRedux;

View File

@ -158,7 +158,7 @@ class ei_corrected_matrix_flags
? Cols%ei_packet_traits<Scalar>::size==0 ? Cols%ei_packet_traits<Scalar>::size==0
: Rows%ei_packet_traits<Scalar>::size==0 : Rows%ei_packet_traits<Scalar>::size==0
), ),
_flags1 = SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit) _flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit
}; };
public: public: