From ee63e15e2cad25d8acd0ac6ba6dc9d7a84f74f6c Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sat, 29 Sep 2007 08:28:36 +0000 Subject: [PATCH] make matrix multiplication do immediate evaluation; add lazyMul() for the old behaviour some reorganization, especially in MatrixStorage start playing with loop unrolling, always_inline, and __restrict__ --- CMakeLists.txt | 4 +-- doc/example.cpp | 2 +- doc/tutorial.cpp | 18 ----------- src/internal/Eval.h | 2 +- src/internal/Matrix.h | 8 ++--- src/internal/MatrixOps.h | 33 ++++++++++++++------ src/internal/MatrixStorage.h | 59 +++++++++++++++++++++++++++++------- src/internal/Object.h | 23 ++++++++++++-- src/internal/Util.h | 8 +++++ test/matrixops.cpp | 6 +++- 10 files changed, 113 insertions(+), 50 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4bfe11c6d..180453180 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,8 +7,8 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) if(CMAKE_COMPILER_IS_GNUCXX) if (CMAKE_SYSTEM_NAME MATCHES Linux) - set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-long-long -ansi -Wundef -Wcast-align -Werror-implicit-function-declaration -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wmissing-format-attribute -fno-common") - set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common") + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-long-long -ansi -Wundef -Wcast-align -Werror-implicit-function-declaration -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wmissing-format-attribute -fno-common -fstrict-aliasing") + set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing") endif (CMAKE_SYSTEM_NAME MATCHES Linux) endif (CMAKE_COMPILER_IS_GNUCXX) diff --git a/doc/example.cpp b/doc/example.cpp index 89f7820b1..25321f4f8 100644 --- a/doc/example.cpp +++ b/doc/example.cpp @@ -12,7 +12,7 @@ template EiScalarProduct twice(const EiObject& m) { - return static_cast(2) * m; + return 2 * m; } int main(int, char**) diff --git a/doc/tutorial.cpp b/doc/tutorial.cpp index 1da3deb1d..03495dbcb 100644 --- a/doc/tutorial.cpp +++ b/doc/tutorial.cpp @@ -29,23 +29,5 @@ int main(int, char **) cout << "Row 0 of m2, written as a column vector, is:" << endl << m2.row(0) << endl; cout << "Column 1 of m2 is:" << endl << m2.col(1) << endl; cout << "The matrix m2 with row 0 and column 1 removed is:" << endl << m2.minor(0,1) << endl; - - cout << endl << "Now let us study a tricky issue." << endl; - cout << "Recall that the matrix product m*m is:" << endl << m*m << endl; - cout << "We want to store that into m, i.e. do \"m = m * m;\"" << endl; - cout << "Here we must be very careful. For if we do \"m = m * m;\"," << endl - << "the matrix m becomes" << endl; - EiMatrix m_save = m; - m = m * m; // the bogus operation - cout << m << "," << endl; - cout << "which is not what was wanted!" << endl - << "Explanation: because of the way expression templates work, the matrix m gets" << endl - << "overwritten _while_ the matrix product m * m is being computed." << endl - << "This is the counterpart of eliminating temporary objects!" << endl - << "Anyway, if you want to store m * m into m, you can do this:" << endl - << " m = (m * m).eval();" << endl; - m = m_save; - m = (m * m).eval(); - cout << "And m is now:" << endl << m << endl << "as was expected." << endl; return 0; } diff --git a/src/internal/Eval.h b/src/internal/Eval.h index 2d792d822..4de32ebac 100644 --- a/src/internal/Eval.h +++ b/src/internal/Eval.h @@ -33,7 +33,7 @@ template class EiEval { public: typedef typename Expression::Scalar Scalar; - typedef EiMatrix< Scalar, Expression::RowsAtCompileTime, Expression::ColsAtCompileTime> MatrixType; + typedef EiMatrix MatrixType; typedef Expression Base; friend class EiObject; diff --git a/src/internal/Matrix.h b/src/internal/Matrix.h index 738cc9f6b..faed649c2 100644 --- a/src/internal/Matrix.h +++ b/src/internal/Matrix.h @@ -44,22 +44,22 @@ class EiMatrix : public EiObject<_Scalar, EiMatrix<_Scalar, _Rows, _Cols> >, static const int RowsAtCompileTime = _Rows, ColsAtCompileTime = _Cols; - const Scalar* array() const + const Scalar* EI_RESTRICT array() const { return Storage::m_array; } - Scalar* array() + Scalar* EI_RESTRICT array() { return Storage::m_array; } private: Ref _ref() const { return Ref(*const_cast(this)); } - const Scalar& _read(int row, int col = 0) const + const Scalar& EI_RESTRICT _read(int row, int col = 0) const { EI_CHECK_RANGES(*this, row, col); return array()[row + col * Storage::_rows()]; } - Scalar& _write(int row, int col = 0) + Scalar& EI_RESTRICT _write(int row, int col = 0) { EI_CHECK_RANGES(*this, row, col); return array()[row + col * Storage::_rows()]; diff --git a/src/internal/MatrixOps.h b/src/internal/MatrixOps.h index f5120277b..2798dd9c1 100644 --- a/src/internal/MatrixOps.h +++ b/src/internal/MatrixOps.h @@ -39,7 +39,7 @@ template class EiSum static const int RowsAtCompileTime = Lhs::RowsAtCompileTime, ColsAtCompileTime = Rhs::ColsAtCompileTime; - EiSum(const LhsRef& lhs, const RhsRef& rhs) + EiSum(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs) : m_lhs(lhs), m_rhs(rhs) { assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols()); @@ -79,7 +79,7 @@ template class EiDifference static const int RowsAtCompileTime = Lhs::RowsAtCompileTime, ColsAtCompileTime = Rhs::ColsAtCompileTime; - EiDifference(const LhsRef& lhs, const RhsRef& rhs) + EiDifference(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs) : m_lhs(lhs), m_rhs(rhs) { assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols()); @@ -118,7 +118,7 @@ template class EiMatrixProduct static const int RowsAtCompileTime = Lhs::RowsAtCompileTime, ColsAtCompileTime = Rhs::ColsAtCompileTime; - EiMatrixProduct(const LhsRef& lhs, const RhsRef& rhs) + EiMatrixProduct(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs) : m_lhs(lhs), m_rhs(rhs) { assert(lhs.cols() == rhs.rows()); @@ -136,10 +136,17 @@ template class EiMatrixProduct Scalar _read(int row, int col) const { - Scalar x = static_cast(0); - for(int i = 0; i < m_lhs.cols(); i++) - x += m_lhs.read(row, i) * m_rhs.read(i, col); - return x; + if(Lhs::ColsAtCompileTime == 3) + { + return m_lhs(row,0) * m_rhs(0,col) + m_lhs(row,1) * m_rhs(1,col) + m_lhs(row,2) * m_rhs(2,col); + } + else + { + Scalar x = static_cast(0); + for(int i = 0; i < m_lhs.cols(); i++) + x += m_lhs.read(row, i) * m_rhs.read(i, col); + return x; + } } protected: @@ -161,11 +168,19 @@ operator-(const EiObject &mat1, const EiObject(mat1.ref(), mat2.ref()); } +template +template +EiMatrixProduct +EiObject::lazyMul(const EiObject &other) const +{ + return EiMatrixProduct(ref(), other.ref()); +} + template -EiMatrixProduct +EiEval > operator*(const EiObject &mat1, const EiObject &mat2) { - return EiMatrixProduct(mat1.ref(), mat2.ref()); + return mat1.lazyMul(mat2).eval(); } template diff --git a/src/internal/MatrixStorage.h b/src/internal/MatrixStorage.h index 822903b14..609bf7c29 100644 --- a/src/internal/MatrixStorage.h +++ b/src/internal/MatrixStorage.h @@ -32,7 +32,7 @@ template -class EiMatrixStorage +template +class EiMatrixStorage { protected: int m_rows; - Scalar *m_array; + Scalar* EI_RESTRICT m_array; void resize(int rows, int cols) - { assert(rows > 0 && cols == 1); - if(rows > m_rows) { + { + assert(rows > 0 && cols == ColsAtCompileTime); + if(rows > m_rows) + { delete[] m_array; - m_array = new Scalar[rows]; + m_array = new Scalar[rows * ColsAtCompileTime]; } m_rows = rows; } @@ -74,13 +76,48 @@ class EiMatrixStorage { return m_rows; } int _cols() const - { return 1; } + { return ColsAtCompileTime; } public: EiMatrixStorage(int rows, int cols) : m_rows(rows) { - assert(m_rows > 0 && cols == 1); - m_array = new Scalar[m_rows]; + assert(m_rows > 0 && cols == ColsAtCompileTime); + m_array = new Scalar[m_rows * ColsAtCompileTime]; + } + + ~EiMatrixStorage() + { delete[] m_array; } +}; + +template +class EiMatrixStorage +{ + protected: + int m_cols; + Scalar* EI_RESTRICT m_array; + + void resize(int rows, int cols) + { + assert(rows == RowsAtCompileTime && cols > 0); + if(cols > m_cols) + { + delete[] m_array; + m_array = new Scalar[cols * RowsAtCompileTime]; + } + m_cols = cols; + } + + int _rows() const + { return RowsAtCompileTime; } + + int _cols() const + { return m_cols; } + + public: + EiMatrixStorage(int rows, int cols) : m_cols(cols) + { + assert(rows == RowsAtCompileTime && cols > 0); + m_array = new Scalar[m_cols * RowsAtCompileTime]; } ~EiMatrixStorage() @@ -92,7 +129,7 @@ class EiMatrixStorage { protected: int m_rows, m_cols; - Scalar *m_array; + Scalar* EI_RESTRICT m_array; void resize(int rows, int cols) { diff --git a/src/internal/Object.h b/src/internal/Object.h index fefa15897..7ec59da8c 100644 --- a/src/internal/Object.h +++ b/src/internal/Object.h @@ -36,6 +36,19 @@ template class EiObject template void _copy_helper(const EiObject& other) { + if(RowsAtCompileTime == 3 && ColsAtCompileTime == 3) + { + write(0,0) = other.read(0,0); + write(1,0) = other.read(1,0); + write(2,0) = other.read(2,0); + write(0,1) = other.read(0,1); + write(1,1) = other.read(1,1); + write(2,1) = other.read(2,1); + write(0,2) = other.read(0,2); + write(1,2) = other.read(1,2); + write(2,2) = other.read(2,2); + } + else for(int i = 0; i < rows(); i++) for(int j = 0; j < cols(); j++) write(i, j) = other.read(i, j); @@ -54,7 +67,7 @@ template class EiObject Ref ref() const { return static_cast(this)->_ref(); } - Scalar& write(int row, int col) + Scalar& EI_RESTRICT write(int row, int col) { return static_cast(this)->_write(row, col); } @@ -86,6 +99,10 @@ template class EiObject EiMinor minor(int row, int col); EiBlock block(int startRow, int endRow, int startCol= 0, int endCol = 0); + template + EiMatrixProduct + lazyMul(const EiObject& other) const EI_ALWAYS_INLINE; + template Derived& operator+=(const EiObject& other); template @@ -110,10 +127,10 @@ template class EiObject Scalar operator()(int row, int col = 0) const { return read(row, col); } - Scalar& operator()(int row, int col = 0) + Scalar& EI_RESTRICT operator()(int row, int col = 0) { return write(row, col); } - EiEval eval() const; + EiEval eval() const EI_ALWAYS_INLINE; }; template diff --git a/src/internal/Util.h b/src/internal/Util.h index c7730d6fb..03df6b097 100644 --- a/src/internal/Util.h +++ b/src/internal/Util.h @@ -68,6 +68,14 @@ const int EiDynamic = -1; #define EI_UNUSED(x) (void)x +#ifdef __GNUC__ +# define EI_ALWAYS_INLINE __attribute__((always_inline)) +# define EI_RESTRICT __restrict__ +#else +# define EI_ALWAYS_INLINE +# define EI_RESTRICT +#endif + #define EI_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \ template \ Derived& operator Op(const EiObject& other) \ diff --git a/test/matrixops.cpp b/test/matrixops.cpp index f8f787819..9df827a2d 100644 --- a/test/matrixops.cpp +++ b/test/matrixops.cpp @@ -49,7 +49,11 @@ template