Vectorize Visitor.h.

This commit is contained in:
Rasmus Munk Larsen 2021-10-20 16:58:01 +00:00
parent 2bf07fa5b5
commit f2c9c2d2f7

View File

@ -16,8 +16,11 @@ namespace Eigen {
namespace internal { namespace internal {
template<typename Visitor, typename Derived, int UnrollCount, bool Vectorize=((Derived::PacketAccess!=0) && functor_traits<Visitor>::PacketAccess)>
struct visitor_impl;
template<typename Visitor, typename Derived, int UnrollCount> template<typename Visitor, typename Derived, int UnrollCount>
struct visitor_impl struct visitor_impl<Visitor, Derived, UnrollCount, false>
{ {
enum { enum {
col = (UnrollCount-1) / Derived::RowsAtCompileTime, col = (UnrollCount-1) / Derived::RowsAtCompileTime,
@ -33,7 +36,7 @@ struct visitor_impl
}; };
template<typename Visitor, typename Derived> template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, 1> struct visitor_impl<Visitor, Derived, 1, false>
{ {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline void run(const Derived &mat, Visitor& visitor) static inline void run(const Derived &mat, Visitor& visitor)
@ -44,14 +47,14 @@ struct visitor_impl<Visitor, Derived, 1>
// This specialization enables visitors on empty matrices at compile-time // This specialization enables visitors on empty matrices at compile-time
template<typename Visitor, typename Derived> template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, 0> { struct visitor_impl<Visitor, Derived, 0, false> {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline void run(const Derived &/*mat*/, Visitor& /*visitor*/) static inline void run(const Derived &/*mat*/, Visitor& /*visitor*/)
{} {}
}; };
template<typename Visitor, typename Derived> template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, Dynamic> struct visitor_impl<Visitor, Derived, Dynamic, /*Vectorize=*/false>
{ {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline void run(const Derived& mat, Visitor& visitor) static inline void run(const Derived& mat, Visitor& visitor)
@ -65,21 +68,62 @@ struct visitor_impl<Visitor, Derived, Dynamic>
} }
}; };
template<typename Visitor, typename Derived, int UnrollSize>
struct visitor_impl<Visitor, Derived, UnrollSize, /*Vectorize=*/true>
{
typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC
static inline void run(const Derived& mat, Visitor& visitor)
{
const Index PacketSize = packet_traits<Scalar>::size;
visitor.init(mat.coeff(0,0), 0, 0);
if (Derived::IsRowMajor) {
for(Index i = 0; i < mat.rows(); ++i) {
Index j = i == 0 ? 1 : 0;
for(; j+PacketSize-1 < mat.cols(); j += PacketSize) {
Packet p = mat.packet(i, j);
visitor.packet(p, i, j);
}
for(; j < mat.cols(); ++j)
visitor(mat.coeff(i, j), i, j);
}
} else {
for(Index j = 0; j < mat.cols(); ++j) {
Index i = j == 0 ? 1 : 0;
for(; i+PacketSize-1 < mat.rows(); i += PacketSize) {
Packet p = mat.packet(i, j);
visitor.packet(p, i, j);
}
for(; i < mat.rows(); ++i)
visitor(mat.coeff(i, j), i, j);
}
}
}
};
// evaluator adaptor // evaluator adaptor
template<typename XprType> template<typename XprType>
class visitor_evaluator class visitor_evaluator
{ {
public: public:
typedef internal::evaluator<XprType> Evaluator;
enum {
PacketAccess = Evaluator::Flags & PacketAccessBit,
IsRowMajor = XprType::IsRowMajor,
RowsAtCompileTime = XprType::RowsAtCompileTime,
CoeffReadCost = Evaluator::CoeffReadCost
};
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) { }
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketReturnType PacketReturnType;
enum {
RowsAtCompileTime = XprType::RowsAtCompileTime,
CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
};
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_xpr.rows(); } EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_xpr.rows(); }
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); } EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); }
@ -87,11 +131,14 @@ public:
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{ return m_evaluator.coeff(row, col); } { return m_evaluator.coeff(row, col); }
EIGEN_DEVICE_FUNC PacketReturnType packet(Index row, Index col) const
{ return m_evaluator.template packet<Unaligned,PacketReturnType>(row, col); }
protected: protected:
internal::evaluator<XprType> m_evaluator; Evaluator m_evaluator;
const XprType &m_xpr; const XprType &m_xpr;
}; };
} // end namespace internal } // end namespace internal
/** Applies the visitor \a visitor to the whole coefficients of the matrix or vector. /** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
@ -154,123 +201,131 @@ struct coeff_visitor
} }
}; };
/** \internal
* \brief Visitor computing the min coefficient with its value and coordinates
*
* \sa DenseBase::minCoeff(Index*, Index*)
*/
template <typename Derived, int NaNPropagation>
struct min_coeff_visitor : coeff_visitor<Derived>
{
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
void operator() (const Scalar& value, Index i, Index j)
{
if(value < this->res)
{
this->res = value;
this->row = i;
this->col = j;
}
}
};
template <typename Derived> template<typename Scalar, int NaNPropagation, bool is_min=true>
struct min_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived> struct minmax_compare {
{ typedef typename packet_traits<Scalar>::type Packet;
typedef typename Derived::Scalar Scalar; static EIGEN_DEVICE_FUNC inline bool compare(Scalar a, Scalar b) { return a < b; }
EIGEN_DEVICE_FUNC static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_min<NaNPropagation>(p);}
void operator() (const Scalar& value, Index i, Index j)
{
if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value < this->res))
{
this->res = value;
this->row = i;
this->col = j;
}
}
};
template <typename Derived>
struct min_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>
{
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
void operator() (const Scalar& value, Index i, Index j)
{
if((numext::isnan)(value) || value < this->res)
{
this->res = value;
this->row = i;
this->col = j;
}
}
}; };
template<typename Scalar, int NaNPropagation> template<typename Scalar, int NaNPropagation>
struct functor_traits<min_coeff_visitor<Scalar, NaNPropagation> > { struct minmax_compare<Scalar, NaNPropagation, false> {
typedef typename packet_traits<Scalar>::type Packet;
static EIGEN_DEVICE_FUNC inline bool compare(Scalar a, Scalar b) { return a > b; }
static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_max<NaNPropagation>(p);}
};
template <typename Derived, bool is_min, int NaNPropagation>
struct minmax_coeff_visitor : coeff_visitor<Derived>
{
using Scalar = typename Derived::Scalar;
using Packet = typename packet_traits<Scalar>::type;
using Comparator = minmax_compare<Scalar, NaNPropagation, is_min>;
EIGEN_DEVICE_FUNC inline
void operator() (const Scalar& value, Index i, Index j)
{
if(Comparator::compare(value, this->res)) {
this->res = value;
this->row = i;
this->col = j;
}
}
EIGEN_DEVICE_FUNC inline
void packet(const Packet& p, Index i, Index j) {
const Index PacketSize = packet_traits<Scalar>::size;
Scalar value = Comparator::predux(p);
if (Comparator::compare(value, this->res)) {
const Packet range = preverse(plset<Packet>(Scalar(1)));
Packet mask = pcmp_eq(pset1<Packet>(value), p);
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
this->res = value;
this->row = Derived::IsRowMajor ? i : i + max_idx;;
this->col = Derived::IsRowMajor ? j + max_idx : j;
}
}
};
// Suppress NaN. The only case in which we return NaN is if the matrix is all NaN, in which case,
// the row=0, col=0 is returned for the location.
template <typename Derived, bool is_min>
struct minmax_coeff_visitor<Derived, is_min, PropagateNumbers> : coeff_visitor<Derived>
{
typedef typename Derived::Scalar Scalar;
using Packet = typename packet_traits<Scalar>::type;
using Comparator = minmax_compare<Scalar, PropagateNumbers, is_min>;
EIGEN_DEVICE_FUNC inline
void operator() (const Scalar& value, Index i, Index j)
{
if ((!(numext::isnan)(value) && (numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
this->res = value;
this->row = i;
this->col = j;
}
}
EIGEN_DEVICE_FUNC inline
void packet(const Packet& p, Index i, Index j) {
const Index PacketSize = packet_traits<Scalar>::size;
Scalar value = Comparator::predux(p);
if ((!(numext::isnan)(value) && (numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
const Packet range = preverse(plset<Packet>(Scalar(1)));
/* mask will be zero for NaNs, so they will be ignored. */
Packet mask = pcmp_eq(pset1<Packet>(value), p);
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
this->res = value;
this->row = Derived::IsRowMajor ? i : i + max_idx;;
this->col = Derived::IsRowMajor ? j + max_idx : j;
}
}
};
// Propagate NaN. If the matrix contains NaN, the location of the first NaN will be returned in
// row and col.
template <typename Derived, bool is_min>
struct minmax_coeff_visitor<Derived, is_min, PropagateNaN> : coeff_visitor<Derived>
{
typedef typename Derived::Scalar Scalar;
using Packet = typename packet_traits<Scalar>::type;
using Comparator = minmax_compare<Scalar, PropagateNaN, is_min>;
EIGEN_DEVICE_FUNC inline
void operator() (const Scalar& value, Index i, Index j)
{
const bool value_is_nan = (numext::isnan)(value);
if ((value_is_nan && !(numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
this->res = value;
this->row = i;
this->col = j;
}
}
EIGEN_DEVICE_FUNC inline
void packet(const Packet& p, Index i, Index j) {
const Index PacketSize = packet_traits<Scalar>::size;
Scalar value = Comparator::predux(p);
const bool value_is_nan = (numext::isnan)(value);
if ((value_is_nan && !(numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
const Packet range = preverse(plset<Packet>(Scalar(1)));
// If the value is NaN, pick the first position of a NaN, otherwise pick the first extremal value.
Packet mask = value_is_nan ? pnot(pcmp_eq(p, p)) : pcmp_eq(pset1<Packet>(value), p);
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
this->res = value;
this->row = Derived::IsRowMajor ? i : i + max_idx;;
this->col = Derived::IsRowMajor ? j + max_idx : j;
}
}
};
template<typename Scalar, bool is_min, int NaNPropagation>
struct functor_traits<minmax_coeff_visitor<Scalar, is_min, NaNPropagation> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost Cost = NumTraits<Scalar>::AddCost,
}; PacketAccess = true
};
/** \internal
* \brief Visitor computing the max coefficient with its value and coordinates
*
* \sa DenseBase::maxCoeff(Index*, Index*)
*/
template <typename Derived, int NaNPropagation>
struct max_coeff_visitor : coeff_visitor<Derived>
{
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
void operator() (const Scalar& value, Index i, Index j)
{
if(value > this->res)
{
this->res = value;
this->row = i;
this->col = j;
}
}
};
template <typename Derived>
struct max_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>
{
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
void operator() (const Scalar& value, Index i, Index j)
{
if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value > this->res))
{
this->res = value;
this->row = i;
this->col = j;
}
}
};
template <typename Derived>
struct max_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>
{
typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
void operator() (const Scalar& value, Index i, Index j)
{
if((numext::isnan)(value) || value > this->res)
{
this->res = value;
this->row = i;
this->col = j;
}
}
};
template<typename Scalar, int NaNPropagation>
struct functor_traits<max_coeff_visitor<Scalar, NaNPropagation> > {
enum {
Cost = NumTraits<Scalar>::AddCost
}; };
}; };
@ -295,7 +350,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
{ {
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor; internal::minmax_coeff_visitor<Derived, true, NaNPropagation> minVisitor;
this->visit(minVisitor); this->visit(minVisitor);
*rowId = minVisitor.row; *rowId = minVisitor.row;
if (colId) *colId = minVisitor.col; if (colId) *colId = minVisitor.col;
@ -321,7 +376,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor; internal::minmax_coeff_visitor<Derived, true, NaNPropagation> minVisitor;
this->visit(minVisitor); this->visit(minVisitor);
*index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row); *index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
return minVisitor.res; return minVisitor.res;
@ -346,7 +401,7 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
{ {
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor; internal::minmax_coeff_visitor<Derived, false, NaNPropagation> maxVisitor;
this->visit(maxVisitor); this->visit(maxVisitor);
*rowPtr = maxVisitor.row; *rowPtr = maxVisitor.row;
if (colPtr) *colPtr = maxVisitor.col; if (colPtr) *colPtr = maxVisitor.col;
@ -372,7 +427,7 @@ DenseBase<Derived>::maxCoeff(IndexType* index) const
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor; internal::minmax_coeff_visitor<Derived, false, NaNPropagation> maxVisitor;
this->visit(maxVisitor); this->visit(maxVisitor);
*index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row; *index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
return maxVisitor.res; return maxVisitor.res;