Optimize visitor traversal in case of RowMajor.

This commit is contained in:
Antonio Sánchez 2022-03-23 15:27:57 +00:00
parent f2a3e03e9b
commit 19a6a827c4
2 changed files with 171 additions and 7 deletions

View File

@ -23,8 +23,10 @@ template<typename Visitor, typename Derived, int UnrollCount>
struct visitor_impl<Visitor, Derived, UnrollCount, false>
{
enum {
col = (UnrollCount-1) / Derived::RowsAtCompileTime,
row = (UnrollCount-1) % Derived::RowsAtCompileTime
col = Derived::IsRowMajor ? (UnrollCount-1) % Derived::ColsAtCompileTime
: (UnrollCount-1) / Derived::RowsAtCompileTime,
row = Derived::IsRowMajor ? (UnrollCount-1) / Derived::ColsAtCompileTime
: (UnrollCount-1) % Derived::RowsAtCompileTime
};
EIGEN_DEVICE_FUNC
@ -60,11 +62,25 @@ struct visitor_impl<Visitor, Derived, Dynamic, /*Vectorize=*/false>
static inline void run(const Derived& mat, Visitor& visitor)
{
visitor.init(mat.coeff(0,0), 0, 0);
for(Index i = 1; i < mat.rows(); ++i)
visitor(mat.coeff(i, 0), i, 0);
for(Index j = 1; j < mat.cols(); ++j)
for(Index i = 0; i < mat.rows(); ++i)
visitor(mat.coeff(i, j), i, j);
if (Derived::IsRowMajor) {
for(Index i = 1; i < mat.cols(); ++i) {
visitor(mat.coeff(0, i), 0, i);
}
for(Index j = 1; j < mat.rows(); ++j) {
for(Index i = 0; i < mat.cols(); ++i) {
visitor(mat.coeff(j, i), j, i);
}
}
} else {
for(Index i = 1; i < mat.rows(); ++i) {
visitor(mat.coeff(i, 0), i, 0);
}
for(Index j = 1; j < mat.cols(); ++j) {
for(Index i = 0; i < mat.rows(); ++i) {
visitor(mat.coeff(i, j), i, j);
}
}
}
}
};
@ -114,6 +130,7 @@ public:
PacketAccess = Evaluator::Flags & PacketAccessBit,
IsRowMajor = XprType::IsRowMajor,
RowsAtCompileTime = XprType::RowsAtCompileTime,
ColsAtCompileTime = XprType::ColsAtCompileTime,
CoeffReadCost = Evaluator::CoeffReadCost
};

View File

@ -173,6 +173,152 @@ template<typename VectorType> void vectorVisitor(const VectorType& w)
}
}
template<typename T, bool Vectorizable>
struct TrackedVisitor {
void init(T v, int i, int j) { return this->operator()(v,i,j); }
void operator()(T v, int i, int j) {
EIGEN_UNUSED_VARIABLE(v)
visited.push_back({i, j});
vectorized = false;
}
template<typename Packet>
void packet(Packet p, int i, int j) {
EIGEN_UNUSED_VARIABLE(p)
visited.push_back({i, j});
vectorized = true;
}
std::vector<std::pair<int,int>> visited;
bool vectorized;
};
namespace Eigen {
namespace internal {
template<typename T, bool Vectorizable>
struct functor_traits<TrackedVisitor<T, Vectorizable> > {
enum { PacketAccess = Vectorizable, Cost = 1 };
};
} // namespace internal
} // namespace Eigen
void checkOptimalTraversal() {
// Unrolled - ColMajor.
{
Eigen::Matrix4f X = Eigen::Matrix4f::Random();
TrackedVisitor<double, false> visitor;
X.visit(visitor);
int count = 0;
for (int j=0; j<X.cols(); ++j) {
for (int i=0; i<X.rows(); ++i) {
VERIFY_IS_EQUAL(visitor.visited[count].first, i);
VERIFY_IS_EQUAL(visitor.visited[count].second, j);
++count;
}
}
}
// Unrolled - RowMajor.
using Matrix4fRowMajor = Eigen::Matrix<float, 4, 4, Eigen::RowMajor>;
{
Matrix4fRowMajor X = Matrix4fRowMajor::Random();
TrackedVisitor<double, false> visitor;
X.visit(visitor);
int count = 0;
for (int i=0; i<X.rows(); ++i) {
for (int j=0; j<X.cols(); ++j) {
VERIFY_IS_EQUAL(visitor.visited[count].first, i);
VERIFY_IS_EQUAL(visitor.visited[count].second, j);
++count;
}
}
}
// Not unrolled - ColMajor
{
Eigen::MatrixXf X = Eigen::MatrixXf::Random(4, 4);
TrackedVisitor<double, false> visitor;
X.visit(visitor);
int count = 0;
for (int j=0; j<X.cols(); ++j) {
for (int i=0; i<X.rows(); ++i) {
VERIFY_IS_EQUAL(visitor.visited[count].first, i);
VERIFY_IS_EQUAL(visitor.visited[count].second, j);
++count;
}
}
}
// Not unrolled - RowMajor.
using MatrixXfRowMajor = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
{
MatrixXfRowMajor X = MatrixXfRowMajor::Random(4, 4);
TrackedVisitor<double, false> visitor;
X.visit(visitor);
int count = 0;
for (int i=0; i<X.rows(); ++i) {
for (int j=0; j<X.cols(); ++j) {
VERIFY_IS_EQUAL(visitor.visited[count].first, i);
VERIFY_IS_EQUAL(visitor.visited[count].second, j);
++count;
}
}
}
// Vectorized - ColMajor
{
// Ensure rows/cols is larger than packet size.
constexpr int PacketSize = Eigen::internal::packet_traits<float>::size;
Eigen::MatrixXf X = Eigen::MatrixXf::Random(4 * PacketSize, 4 * PacketSize);
TrackedVisitor<double, true> visitor;
X.visit(visitor);
int previ = -1;
int prevj = 0;
for (const auto& p : visitor.visited) {
int i = p.first;
int j = p.second;
VERIFY(
(j == prevj && i == previ + 1) // Advance single element
|| (j == prevj && i == previ + PacketSize) // Advance packet
|| (j == prevj + 1 && i == 0) // Advance column
);
previ = i;
prevj = j;
}
if (Eigen::internal::packet_traits<float>::Vectorizable) {
VERIFY(visitor.vectorized);
}
}
// Vectorized - RowMajor.
{
// Ensure rows/cols is larger than packet size.
constexpr int PacketSize = Eigen::internal::packet_traits<float>::size;
MatrixXfRowMajor X = MatrixXfRowMajor::Random(4 * PacketSize, 4 * PacketSize);
TrackedVisitor<double, true> visitor;
X.visit(visitor);
int previ = 0;
int prevj = -1;
for (const auto& p : visitor.visited) {
int i = p.first;
int j = p.second;
VERIFY(
(i == previ && j == prevj + 1) // Advance single element
|| (i == previ && j == prevj + PacketSize) // Advance packet
|| (i == previ + 1 && j == 0) // Advance row
);
previ = i;
prevj = j;
}
if (Eigen::internal::packet_traits<float>::Vectorizable) {
VERIFY(visitor.vectorized);
}
}
}
EIGEN_DECLARE_TEST(visitor)
{
for(int i = 0; i < g_repeat; i++) {
@ -190,4 +336,5 @@ EIGEN_DECLARE_TEST(visitor)
CALL_SUBTEST_9( vectorVisitor(RowVectorXd(10)) );
CALL_SUBTEST_10( vectorVisitor(VectorXf(33)) );
}
CALL_SUBTEST_11(checkOptimalTraversal());
}