mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-03 18:24:02 +08:00
Optimize visitor traversal in case of RowMajor.
This commit is contained in:
parent
f2a3e03e9b
commit
19a6a827c4
@ -23,8 +23,10 @@ template<typename Visitor, typename Derived, int UnrollCount>
|
||||
struct visitor_impl<Visitor, Derived, UnrollCount, false>
|
||||
{
|
||||
enum {
|
||||
col = (UnrollCount-1) / Derived::RowsAtCompileTime,
|
||||
row = (UnrollCount-1) % Derived::RowsAtCompileTime
|
||||
col = Derived::IsRowMajor ? (UnrollCount-1) % Derived::ColsAtCompileTime
|
||||
: (UnrollCount-1) / Derived::RowsAtCompileTime,
|
||||
row = Derived::IsRowMajor ? (UnrollCount-1) / Derived::ColsAtCompileTime
|
||||
: (UnrollCount-1) % Derived::RowsAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@ -60,11 +62,25 @@ struct visitor_impl<Visitor, Derived, Dynamic, /*Vectorize=*/false>
|
||||
static inline void run(const Derived& mat, Visitor& visitor)
|
||||
{
|
||||
visitor.init(mat.coeff(0,0), 0, 0);
|
||||
for(Index i = 1; i < mat.rows(); ++i)
|
||||
visitor(mat.coeff(i, 0), i, 0);
|
||||
for(Index j = 1; j < mat.cols(); ++j)
|
||||
for(Index i = 0; i < mat.rows(); ++i)
|
||||
visitor(mat.coeff(i, j), i, j);
|
||||
if (Derived::IsRowMajor) {
|
||||
for(Index i = 1; i < mat.cols(); ++i) {
|
||||
visitor(mat.coeff(0, i), 0, i);
|
||||
}
|
||||
for(Index j = 1; j < mat.rows(); ++j) {
|
||||
for(Index i = 0; i < mat.cols(); ++i) {
|
||||
visitor(mat.coeff(j, i), j, i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(Index i = 1; i < mat.rows(); ++i) {
|
||||
visitor(mat.coeff(i, 0), i, 0);
|
||||
}
|
||||
for(Index j = 1; j < mat.cols(); ++j) {
|
||||
for(Index i = 0; i < mat.rows(); ++i) {
|
||||
visitor(mat.coeff(i, j), i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -114,6 +130,7 @@ public:
|
||||
PacketAccess = Evaluator::Flags & PacketAccessBit,
|
||||
IsRowMajor = XprType::IsRowMajor,
|
||||
RowsAtCompileTime = XprType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = XprType::ColsAtCompileTime,
|
||||
CoeffReadCost = Evaluator::CoeffReadCost
|
||||
};
|
||||
|
||||
|
147
test/visitor.cpp
147
test/visitor.cpp
@ -173,6 +173,152 @@ template<typename VectorType> void vectorVisitor(const VectorType& w)
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, bool Vectorizable>
|
||||
struct TrackedVisitor {
|
||||
void init(T v, int i, int j) { return this->operator()(v,i,j); }
|
||||
void operator()(T v, int i, int j) {
|
||||
EIGEN_UNUSED_VARIABLE(v)
|
||||
visited.push_back({i, j});
|
||||
vectorized = false;
|
||||
}
|
||||
|
||||
template<typename Packet>
|
||||
void packet(Packet p, int i, int j) {
|
||||
EIGEN_UNUSED_VARIABLE(p)
|
||||
visited.push_back({i, j});
|
||||
vectorized = true;
|
||||
}
|
||||
std::vector<std::pair<int,int>> visited;
|
||||
bool vectorized;
|
||||
};
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
template<typename T, bool Vectorizable>
|
||||
struct functor_traits<TrackedVisitor<T, Vectorizable> > {
|
||||
enum { PacketAccess = Vectorizable, Cost = 1 };
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
void checkOptimalTraversal() {
|
||||
|
||||
// Unrolled - ColMajor.
|
||||
{
|
||||
Eigen::Matrix4f X = Eigen::Matrix4f::Random();
|
||||
TrackedVisitor<double, false> visitor;
|
||||
X.visit(visitor);
|
||||
int count = 0;
|
||||
for (int j=0; j<X.cols(); ++j) {
|
||||
for (int i=0; i<X.rows(); ++i) {
|
||||
VERIFY_IS_EQUAL(visitor.visited[count].first, i);
|
||||
VERIFY_IS_EQUAL(visitor.visited[count].second, j);
|
||||
++count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unrolled - RowMajor.
|
||||
using Matrix4fRowMajor = Eigen::Matrix<float, 4, 4, Eigen::RowMajor>;
|
||||
{
|
||||
Matrix4fRowMajor X = Matrix4fRowMajor::Random();
|
||||
TrackedVisitor<double, false> visitor;
|
||||
X.visit(visitor);
|
||||
int count = 0;
|
||||
for (int i=0; i<X.rows(); ++i) {
|
||||
for (int j=0; j<X.cols(); ++j) {
|
||||
VERIFY_IS_EQUAL(visitor.visited[count].first, i);
|
||||
VERIFY_IS_EQUAL(visitor.visited[count].second, j);
|
||||
++count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Not unrolled - ColMajor
|
||||
{
|
||||
Eigen::MatrixXf X = Eigen::MatrixXf::Random(4, 4);
|
||||
TrackedVisitor<double, false> visitor;
|
||||
X.visit(visitor);
|
||||
int count = 0;
|
||||
for (int j=0; j<X.cols(); ++j) {
|
||||
for (int i=0; i<X.rows(); ++i) {
|
||||
VERIFY_IS_EQUAL(visitor.visited[count].first, i);
|
||||
VERIFY_IS_EQUAL(visitor.visited[count].second, j);
|
||||
++count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Not unrolled - RowMajor.
|
||||
using MatrixXfRowMajor = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
|
||||
{
|
||||
MatrixXfRowMajor X = MatrixXfRowMajor::Random(4, 4);
|
||||
TrackedVisitor<double, false> visitor;
|
||||
X.visit(visitor);
|
||||
int count = 0;
|
||||
for (int i=0; i<X.rows(); ++i) {
|
||||
for (int j=0; j<X.cols(); ++j) {
|
||||
VERIFY_IS_EQUAL(visitor.visited[count].first, i);
|
||||
VERIFY_IS_EQUAL(visitor.visited[count].second, j);
|
||||
++count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Vectorized - ColMajor
|
||||
{
|
||||
// Ensure rows/cols is larger than packet size.
|
||||
constexpr int PacketSize = Eigen::internal::packet_traits<float>::size;
|
||||
Eigen::MatrixXf X = Eigen::MatrixXf::Random(4 * PacketSize, 4 * PacketSize);
|
||||
TrackedVisitor<double, true> visitor;
|
||||
X.visit(visitor);
|
||||
int previ = -1;
|
||||
int prevj = 0;
|
||||
for (const auto& p : visitor.visited) {
|
||||
int i = p.first;
|
||||
int j = p.second;
|
||||
VERIFY(
|
||||
(j == prevj && i == previ + 1) // Advance single element
|
||||
|| (j == prevj && i == previ + PacketSize) // Advance packet
|
||||
|| (j == prevj + 1 && i == 0) // Advance column
|
||||
);
|
||||
previ = i;
|
||||
prevj = j;
|
||||
}
|
||||
if (Eigen::internal::packet_traits<float>::Vectorizable) {
|
||||
VERIFY(visitor.vectorized);
|
||||
}
|
||||
}
|
||||
|
||||
// Vectorized - RowMajor.
|
||||
{
|
||||
// Ensure rows/cols is larger than packet size.
|
||||
constexpr int PacketSize = Eigen::internal::packet_traits<float>::size;
|
||||
MatrixXfRowMajor X = MatrixXfRowMajor::Random(4 * PacketSize, 4 * PacketSize);
|
||||
TrackedVisitor<double, true> visitor;
|
||||
X.visit(visitor);
|
||||
int previ = 0;
|
||||
int prevj = -1;
|
||||
for (const auto& p : visitor.visited) {
|
||||
int i = p.first;
|
||||
int j = p.second;
|
||||
VERIFY(
|
||||
(i == previ && j == prevj + 1) // Advance single element
|
||||
|| (i == previ && j == prevj + PacketSize) // Advance packet
|
||||
|| (i == previ + 1 && j == 0) // Advance row
|
||||
);
|
||||
previ = i;
|
||||
prevj = j;
|
||||
}
|
||||
if (Eigen::internal::packet_traits<float>::Vectorizable) {
|
||||
VERIFY(visitor.vectorized);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(visitor)
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
@ -190,4 +336,5 @@ EIGEN_DECLARE_TEST(visitor)
|
||||
CALL_SUBTEST_9( vectorVisitor(RowVectorXd(10)) );
|
||||
CALL_SUBTEST_10( vectorVisitor(VectorXf(33)) );
|
||||
}
|
||||
CALL_SUBTEST_11(checkOptimalTraversal());
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user