diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 85694b35f..d20ab7446 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -348,7 +348,7 @@ struct ei_assign_impl { const int row = rowMajor ? i : index; const int col = rowMajor ? index : i; - dst.template writePacket(row, col, src.template packet(row, col)); + dst.template writePacket(row, col, src.template packet(row, col)); } // do the non-vectorizable part of the assignment diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 33aeb78a3..d954e1a2e 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -168,26 +168,26 @@ template class Block template inline PacketScalar packet(int row, int col) const { - return m_matrix.template packet(row + m_startRow.value(), col + m_startCol.value()); + return m_matrix.template packet(row + m_startRow.value(), col + m_startCol.value()); } template inline void writePacket(int row, int col, const PacketScalar& x) { - m_matrix.const_cast_derived().template writePacket(row + m_startRow.value(), col + m_startCol.value(), x); + m_matrix.const_cast_derived().template writePacket(row + m_startRow.value(), col + m_startCol.value(), x); } template inline PacketScalar packet(int index) const { - return m_matrix.template packet(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + return m_matrix.template packet(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); } template inline void writePacket(int index, const PacketScalar& x) { - m_matrix.const_cast_derived().template writePacket + m_matrix.const_cast_derived().template writePacket (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), x); } @@ -195,10 +195,10 @@ template class Block protected: const typename MatrixType::Nested m_matrix; - ei_int_if_dynamic m_startRow; - ei_int_if_dynamic m_startCol; - ei_int_if_dynamic m_blockRows; - ei_int_if_dynamic m_blockCols; + const ei_int_if_dynamic m_startRow; + const ei_int_if_dynamic m_startCol; + const ei_int_if_dynamic m_blockRows; + const ei_int_if_dynamic m_blockCols; }; /** \returns a dynamic-size expression of a block in *this. diff --git a/Eigen/src/Core/Coeffs.h b/Eigen/src/Core/Coeffs.h index cc8bc713c..0f87099e2 100644 --- a/Eigen/src/Core/Coeffs.h +++ b/Eigen/src/Core/Coeffs.h @@ -214,7 +214,7 @@ inline typename ei_traits::Scalar& MatrixBase * to ensure that a packet really starts there. This method is only available on expressions having the * PacketAccessBit. * - * The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select + * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets * starting at an address which is a multiple of the packet size. */ @@ -232,7 +232,7 @@ MatrixBase::packet(int row, int col) const * to ensure that a packet really starts there. This method is only available on expressions having the * PacketAccessBit. * - * The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select + * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets * starting at an address which is a multiple of the packet size. */ @@ -250,7 +250,7 @@ inline void MatrixBase::writePacket * to ensure that a packet really starts there. This method is only available on expressions having the * PacketAccessBit and the LinearAccessBit. * - * The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select + * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets * starting at an address which is a multiple of the packet size. */ @@ -267,7 +267,7 @@ MatrixBase::packet(int index) const * to ensure that a packet really starts there. This method is only available on expressions having the * PacketAccessBit and the LinearAccessBit. * - * The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select + * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets * starting at an address which is a multiple of the packet size. */ diff --git a/Eigen/src/Core/DummyPacketMath.h b/Eigen/src/Core/DummyPacketMath.h index 9de204df3..6a35a232c 100644 --- a/Eigen/src/Core/DummyPacketMath.h +++ b/Eigen/src/Core/DummyPacketMath.h @@ -30,50 +30,93 @@ // of generic vectorized code. However, at runtime, they should never be // called, TODO so sould we raise an assertion or not ? /** \internal \returns a + b (coeff-wise) */ -template inline Scalar ei_padd(const Scalar& a, const Scalar& b) { return a + b; } +template inline Packet +ei_padd(const Packet&, + const Packet&) { Packet ret; return ret; } /** \internal \returns a - b (coeff-wise) */ -template inline Scalar ei_psub(const Scalar& a, const Scalar& b) { return a - b; } +template inline Packet +ei_psub(const Packet&, + const Packet&) { Packet ret; return ret; } /** \internal \returns a * b (coeff-wise) */ -template inline Scalar ei_pmul(const Scalar& a, const Scalar& b) { return a * b; } +template inline Packet +ei_pmul(const Packet&, + const Packet&) { Packet ret; return ret; } /** \internal \returns a / b (coeff-wise) */ -template inline Scalar ei_pdiv(const Scalar& a, const Scalar& b) { return a / b; } - -/** \internal \returns a * b - c (coeff-wise) */ -template inline Scalar ei_pmadd(const Scalar& a, const Scalar& b, const Scalar& c) -{ return ei_padd(ei_pmul(a, b),c); } +template inline Packet +ei_pdiv(const Packet&, + const Packet&) { Packet ret; return ret; } /** \internal \returns the min of \a a and \a b (coeff-wise) */ -template inline Scalar ei_pmin(const Scalar& a, const Scalar& b) { return std::min(a,b); } +template inline Packet +ei_pmin(const Packet&, + const Packet&) { Packet ret; return ret; } /** \internal \returns the max of \a a and \a b (coeff-wise) */ -template inline Scalar ei_pmax(const Scalar& a, const Scalar& b) { return std::max(a,b); } +template inline Packet +ei_pmax(const Packet&, + const Packet&) { Packet ret; return ret; } /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ -template inline Scalar ei_pload(const Scalar* from) { return *from; } +template inline typename ei_packet_traits::type +ei_pload(const Scalar*) { typename ei_packet_traits::type ret; return ret; } /** \internal \returns a packet version of \a *from, (un-aligned load) */ -template inline Scalar ei_ploadu(const Scalar* from) { return *from; } +template inline typename ei_packet_traits::type +ei_ploadu(const Scalar*) { typename ei_packet_traits::type ret; return ret; } /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ -template inline Scalar ei_pset1(const Scalar& a) { return a; } +template inline typename ei_packet_traits::type +ei_pset1(const Scalar&) { typename ei_packet_traits::type ret; return ret; } /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ -template inline void ei_pstore(Scalar* to, const Scalar& from) { (*to) = from; } +template inline void ei_pstore(Scalar*, const Packet&) {} /** \internal copy the packet \a from to \a *to, (un-aligned store) */ -template inline void ei_pstoreu(Scalar* to, const Scalar& from) { (*to) = from; } +template inline void ei_pstoreu(Scalar*, const Packet&) {} /** \internal \returns the first element of a packet */ -template inline Scalar ei_pfirst(const Scalar& a) { return a; } +template inline typename ei_unpacket_traits::type ei_pfirst(const Packet&) +{ typename ei_unpacket_traits::type ret; return ret; } /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */ -template inline Scalar ei_preduxp(const Scalar* vecs) { return vecs[0]; } +template inline Packet +ei_preduxp(const Packet*) { Packet ret; return ret; } /** \internal \returns the sum of the elements of \a a*/ -template inline Scalar ei_predux(const Scalar& a) { return a; } +template inline typename ei_unpacket_traits::type ei_predux(const Packet&) +{ typename ei_unpacket_traits::type ret; return ret; } + + +//////////// + + +/** \internal \returns a * b + c (coeff-wise) */ +template inline Packet +ei_pmadd(const Packet& a, + const Packet& b, + const Packet& c) +{ return ei_padd(ei_pmul(a, b),c); } + +/** \internal \returns a packet version of \a *from. If LoadMode equals Aligned, \a from must be 16 bytes aligned */ +template inline typename ei_packet_traits::type ei_ploadt(const Scalar* from) +{ + if(LoadMode == Aligned) + return ei_pload(from); + else + return ei_ploadu(from); +} + +/** \internal copy the packet \a from to \a *to. If StoreMode equals Aligned, \a to must be 16 bytes aligned */ +template inline void ei_pstoret(Scalar* to, const Packet& from) +{ + if(LoadMode == Aligned) + ei_pstore(to, from); + else + ei_pstoreu(to, from); +} #endif // EIGEN_DUMMY_PACKET_MATH_H diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index c4b464ce2..db8e2738e 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -65,9 +65,6 @@ template clas inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} - /** \internal */ - inline const ExpressionType& _expression() const { return m_matrix; } - inline int rows() const { return m_matrix.rows(); } inline int cols() const { return m_matrix.cols(); } inline int stride() const { return m_matrix.stride(); } diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index 335c2a85f..8964681ea 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -29,17 +29,19 @@ * * \brief A matrix or vector expression mapping an existing array of data. * + * \param Alignment can be either Aligned or Unaligned. Tells whether the array is suitably aligned for + * vectorization on the present CPU architecture. Defaults to Unaligned. + * * This class represents a matrix or vector expression mapping an existing array of data. * It can be used to let Eigen interface without any overhead with non-Eigen data structures, * such as plain C arrays or structures from other libraries. * - * This class is the return type of Matrix::map() and most of the time this is the only - * way it is used. + * This class is the return type of Matrix::map() but can also be used directly. * * \sa Matrix::map() */ -template -struct ei_traits > +template +struct ei_traits > { typedef typename MatrixType::Scalar Scalar; enum { @@ -47,35 +49,37 @@ struct ei_traits > ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - Flags = MatrixType::Flags & (HereditaryBits | DirectAccessBit), + Flags = MatrixType::Flags + & (HereditaryBits | LinearAccessBit | DirectAccessBit) + & (Alignment == Aligned ? PacketAccessBit : 0), CoeffReadCost = NumTraits::ReadCost }; }; -template class Map - : public MatrixBase > +template class Map + : public MatrixBase > { public: EIGEN_GENERIC_PUBLIC_INTERFACE(Map) - inline int rows() const { return m_rows; } - inline int cols() const { return m_cols; } + inline int rows() const { return m_rows.value(); } + inline int cols() const { return m_cols.value(); } inline const Scalar& coeff(int row, int col) const { if(Flags & RowMajorBit) - return m_data[col + row * m_cols]; + return m_data[col + row * m_cols.value()]; else // column-major - return m_data[row + col * m_rows]; + return m_data[row + col * m_rows.value()]; } inline Scalar& coeffRef(int row, int col) { if(Flags & RowMajorBit) - return const_cast(m_data)[col + row * m_cols]; + return const_cast(m_data)[col + row * m_cols.value()]; else // column-major - return const_cast(m_data)[row + col * m_rows]; + return const_cast(m_data)[row + col * m_rows.value()]; } inline const Scalar& coeff(int index) const @@ -88,107 +92,69 @@ template class Map return m_data[index]; } - public: - inline Map(const Scalar* data, int rows, int cols) : m_data(data), m_rows(rows), m_cols(cols) + template + inline PacketScalar packet(int row, int col) const { - ei_assert(rows > 0 - && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) - && cols > 0 - && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); + return ei_ploadt + (m_data + (Flags & RowMajorBit + ? col + row * m_cols.value() + : row + col * m_rows.value())); + } + + template + inline PacketScalar packet(int index) const + { + return ei_ploadt(m_data + index); + } + + template + inline void writePacket(int row, int col, const PacketScalar& x) + { + ei_pstoret + (m_data + (Flags & RowMajorBit + ? col + row * m_cols.value() + : row + col * m_rows.value()), x); + } + + template + inline void writePacket(int index, const PacketScalar& x) + { + ei_pstoret(m_data + index, x); + } + + inline Map(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) + { + ei_assert(RowsAtCompileTime != Dynamic && ColsAtCompileTime != Dynamic); + ei_assert(RowsAtCompileTime > 0 && ColsAtCompileTime > 0); + } + + inline Map(const Scalar* data, int size) + : m_data(data), + m_rows(RowsAtCompileTime == Dynamic ? size : RowsAtCompileTime), + m_cols(ColsAtCompileTime == Dynamic ? size : ColsAtCompileTime) + { + ei_assert(size > 0); + ei_assert((RowsAtCompileTime == 1 + && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == size)) + || (ColsAtCompileTime == 1 + && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == size))); + } + + inline Map(const Scalar* data, int rows, int cols) + : m_data(data), m_rows(rows), m_cols(cols) + { + ei_assert(rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map) protected: const Scalar* m_data; - const int m_rows, m_cols; + const ei_int_if_dynamic m_rows; + const ei_int_if_dynamic m_cols; }; -/** This is the const version of map(Scalar*,int,int). */ -template -inline const Map > -Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(const Scalar* data, int rows, int cols) -{ - return Map(data, rows, cols); -} - -/** This is the const version of map(Scalar*,int). */ -template -inline const Map > -Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(const Scalar* data, int size) -{ - ei_assert(_Cols == 1 || _Rows ==1); - if(_Cols == 1) - return Map(data, size, 1); - else - return Map(data, 1, size); -} - -/** This is the const version of map(Scalar*). */ -template -inline const Map > -Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(const Scalar* data) -{ - return Map(data, _Rows, _Cols); -} - -/** \returns a expression of a matrix or vector mapping the given data. - * - * \param data The array of data to map - * \param rows The number of rows of the expression to construct - * \param cols The number of columns of the expression to construct - * - * Example: \include MatrixBase_map_int_int.cpp - * Output: \verbinclude MatrixBase_map_int_int.out - * - * \sa map(const Scalar*, int, int), map(Scalar*, int), map(Scalar*), class Map - */ -template -inline Map > -Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(Scalar* data, int rows, int cols) -{ - return Map(data, rows, cols); -} - -/** \returns a expression of a vector mapping the given data. - * - * \param data The array of data to map - * \param size The size (number of coefficients) of the expression to construct - * - * \only_for_vectors - * - * Example: \include MatrixBase_map_int.cpp - * Output: \verbinclude MatrixBase_map_int.out - * - * \sa map(const Scalar*, int), map(Scalar*, int, int), map(Scalar*), class Map - */ -template -inline Map > -Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(Scalar* data, int size) -{ - ei_assert(_Cols == 1 || _Rows ==1); - if(_Cols == 1) - return Map(data, size, 1); - else - return Map(data, 1, size); -} - -/** \returns a expression of a fixed-size matrix or vector mapping the given data. - * - * \param data The array of data to map - * - * Example: \include MatrixBase_map.cpp - * Output: \verbinclude MatrixBase_map.out - * - * \sa map(const Scalar*), map(Scalar*, int), map(Scalar*, int, int), class Map - */ -template -inline Map > -Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(Scalar* data) -{ - return Map(data, _Rows, _Cols); -} - /** Constructor copying an existing array of data. Only useful for dynamic-size matrices: * for fixed-size matrices, it is redundant to pass the \a rows and \a cols parameters. * \param data The array of data to copy @@ -202,7 +168,7 @@ inline Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags> ::Matrix(const Scalar *data, int rows, int cols) : m_storage(rows*cols, rows, cols) { - *this = map(data, rows, cols); + *this = Map(data, rows, cols); } /** Constructor copying an existing array of data. Only useful for dynamic-size vectors: @@ -220,7 +186,7 @@ inline Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags> ::Matrix(const Scalar *data, int size) : m_storage(size, RowsAtCompileTime == 1 ? 1 : size, ColsAtCompileTime == 1 ? 1 : size) { - *this = map(data, size); + *this = Map(data, size); } /** Constructor copying an existing array of data. @@ -237,7 +203,7 @@ template ::Matrix(const Scalar *data) { - *this = map(data); + *this = Map(data); } #endif // EIGEN_MAP_H diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index f00a27b33..bf4b8d739 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -102,12 +102,13 @@ class Matrix : public MatrixBase; + friend class Eigen::Map; protected: ei_matrix_storage m_storage; public: - friend class Map; inline int rows() const { return m_storage.rows(); } inline int cols() const { return m_storage.cols(); } @@ -149,50 +150,31 @@ class Matrix : public MatrixBase inline PacketScalar packet(int row, int col) const { - if(Flags & RowMajorBit) - if (LoadMode==Aligned) - return ei_pload(m_storage.data() + col + row * m_storage.cols()); - else - return ei_ploadu(m_storage.data() + col + row * m_storage.cols()); - else - if (LoadMode==Aligned) - return ei_pload(m_storage.data() + row + col * m_storage.rows()); - else - return ei_ploadu(m_storage.data() + row + col * m_storage.rows()); + return ei_ploadt + (m_storage.data() + (Flags & RowMajorBit + ? col + row * m_storage.cols() + : row + col * m_storage.rows())); } template inline PacketScalar packet(int index) const { - if (LoadMode==Aligned) - return ei_pload(m_storage.data() + index); - else - return ei_ploadu(m_storage.data() + index); + return ei_ploadt(m_storage.data() + index); } template inline void writePacket(int row, int col, const PacketScalar& x) { - ei_internal_assert(Flags & PacketAccessBit); - if(Flags & RowMajorBit) - if (StoreMode==Aligned) - ei_pstore(m_storage.data() + col + row * m_storage.cols(), x); - else - ei_pstoreu(m_storage.data() + col + row * m_storage.cols(), x); - else - if (StoreMode==Aligned) - ei_pstore(m_storage.data() + row + col * m_storage.rows(), x); - else - ei_pstoreu(m_storage.data() + row + col * m_storage.rows(), x); + ei_pstoret + (m_storage.data() + (Flags & RowMajorBit + ? col + row * m_storage.cols() + : row + col * m_storage.rows()), x); } template inline void writePacket(int index, const PacketScalar& x) { - if (StoreMode==Aligned) - ei_pstore(m_storage.data() + index, x); - else - ei_pstoreu(m_storage.data() + index, x); + ei_pstoret(m_storage.data() + index, x); } public: @@ -253,19 +235,13 @@ class Matrix : public MatrixBase map(const Scalar* array, int rows, int cols); - static const Map map(const Scalar* array, int size); - static const Map map(const Scalar* array); - static Map map(Scalar* array, int rows, int cols); - static Map map(Scalar* array, int size); - static Map map(Scalar* array); - /** Default constructor, does nothing. Only for fixed-size matrices. * For dynamic-size matrices and vectors, this constructor is forbidden (guarded by * an assertion) because it would leave the matrix without an allocated data buffer. */ inline explicit Matrix() { + ei_assert(RowsAtCompileTime != Dynamic && ColsAtCompileTime != Dynamic); ei_assert(RowsAtCompileTime > 0 && ColsAtCompileTime > 0); } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 35c43eb12..ec8a7495b 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -72,6 +72,11 @@ inline vector int ei_pmax(const vector int a, const vector int b) { r inline vector float ei_pload(const float* from) { return vec_ld(0, from); } inline vector int ei_pload(const int* from) { return vec_ld(0, from); } +inline vector float ei_ploadu(const float*) +{ EIGEN_STATIC_ASSERT(unaligned_load_and_store_operations_unimplemented_on_AltiVec) } +inline vector int ei_ploadu(const int* ) +{ EIGEN_STATIC_ASSERT(unaligned_load_and_store_operations_unimplemented_on_AltiVec) } + inline vector float ei_pset1(const float& from) { static float __attribute__(aligned(16)) af[4]; @@ -93,6 +98,11 @@ inline vector int ei_pset1(const int& from) inline void ei_pstore(float* to, const vector float from) { vec_st(from, 0, to); } inline void ei_pstore(int* to, const vector int from) { vec_st(from, 0, to); } +inline void ei_pstoreu(float*, const vector float) +{ EIGEN_STATIC_ASSERT(unaligned_load_and_store_operations_unimplemented_on_AltiVec) } +inline void ei_pstoreu(int* , const vector int ) +{ EIGEN_STATIC_ASSERT(unaligned_load_and_store_operations_unimplemented_on_AltiVec) } + inline float ei_pfirst(const vector float a) { static float __attribute__(aligned(16)) af[4]; diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index bfec50f1b..a30011dea 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -33,6 +33,10 @@ template<> struct ei_packet_traits { typedef __m128 type; enum {size=4} template<> struct ei_packet_traits { typedef __m128d type; enum {size=2}; }; template<> struct ei_packet_traits { typedef __m128i type; enum {size=4}; }; +template<> struct ei_unpacket_traits<__m128> { typedef float type; enum {size=4}; }; +template<> struct ei_unpacket_traits<__m128d> { typedef double type; enum {size=2}; }; +template<> struct ei_unpacket_traits<__m128i> { typedef int type; enum {size=4}; }; + template<> inline __m128 ei_padd(const __m128& a, const __m128& b) { return _mm_add_ps(a,b); } template<> inline __m128d ei_padd(const __m128d& a, const __m128d& b) { return _mm_add_pd(a,b); } template<> inline __m128i ei_padd(const __m128i& a, const __m128i& b) { return _mm_add_epi32(a,b); } @@ -79,29 +83,29 @@ template<> inline __m128i ei_pmax(const __m128i& a, const __m128i& b) return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); } -inline __m128 ei_pload(const float* from) { return _mm_load_ps(from); } -inline __m128d ei_pload(const double* from) { return _mm_load_pd(from); } -inline __m128i ei_pload(const int* from) { return _mm_load_si128(reinterpret_cast(from)); } +template<> inline __m128 ei_pload(const float* from) { return _mm_load_ps(from); } +template<> inline __m128d ei_pload(const double* from) { return _mm_load_pd(from); } +template<> inline __m128i ei_pload(const int* from) { return _mm_load_si128(reinterpret_cast(from)); } -inline __m128 ei_ploadu(const float* from) { return _mm_loadu_ps(from); } -inline __m128d ei_ploadu(const double* from) { return _mm_loadu_pd(from); } -inline __m128i ei_ploadu(const int* from) { return _mm_loadu_si128(reinterpret_cast(from)); } +template<> inline __m128 ei_ploadu(const float* from) { return _mm_loadu_ps(from); } +template<> inline __m128d ei_ploadu(const double* from) { return _mm_loadu_pd(from); } +template<> inline __m128i ei_ploadu(const int* from) { return _mm_loadu_si128(reinterpret_cast(from)); } -inline __m128 ei_pset1(const float& from) { return _mm_set1_ps(from); } -inline __m128d ei_pset1(const double& from) { return _mm_set1_pd(from); } -inline __m128i ei_pset1(const int& from) { return _mm_set1_epi32(from); } +template<> inline __m128 ei_pset1(const float& from) { return _mm_set1_ps(from); } +template<> inline __m128d ei_pset1(const double& from) { return _mm_set1_pd(from); } +template<> inline __m128i ei_pset1(const int& from) { return _mm_set1_epi32(from); } -inline void ei_pstore(float* to, const __m128& from) { _mm_store_ps(to, from); } -inline void ei_pstore(double* to, const __m128d& from) { _mm_store_pd(to, from); } -inline void ei_pstore(int* to, const __m128i& from) { _mm_store_si128(reinterpret_cast<__m128i*>(to), from); } +template<> inline void ei_pstore(float* to, const __m128& from) { _mm_store_ps(to, from); } +template<> inline void ei_pstore(double* to, const __m128d& from) { _mm_store_pd(to, from); } +template<> inline void ei_pstore(int* to, const __m128i& from) { _mm_store_si128(reinterpret_cast<__m128i*>(to), from); } -inline void ei_pstoreu(float* to, const __m128& from) { _mm_storeu_ps(to, from); } -inline void ei_pstoreu(double* to, const __m128d& from) { _mm_storeu_pd(to, from); } -inline void ei_pstoreu(int* to, const __m128i& from) { _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); } +template<> inline void ei_pstoreu(float* to, const __m128& from) { _mm_storeu_ps(to, from); } +template<> inline void ei_pstoreu(double* to, const __m128d& from) { _mm_storeu_pd(to, from); } +template<> inline void ei_pstoreu(int* to, const __m128i& from) { _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); } -inline float ei_pfirst(const __m128& a) { return _mm_cvtss_f32(a); } -inline double ei_pfirst(const __m128d& a) { return _mm_cvtsd_f64(a); } -inline int ei_pfirst(const __m128i& a) { return _mm_cvtsi128_si32(a); } +template<> inline float ei_pfirst(const __m128& a) { return _mm_cvtss_f32(a); } +template<> inline double ei_pfirst(const __m128d& a) { return _mm_cvtsd_f64(a); } +template<> inline int ei_pfirst(const __m128i& a) { return _mm_cvtsi128_si32(a); } #ifdef __SSE3__ // TODO implement SSE2 versions as well as integer versions diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index eafcbca7f..7e0c5650d 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -167,7 +167,7 @@ const unsigned int UnitUpper = UpperTriangularBit | UnitDiagBit; const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit; const unsigned int Diagonal = Upper | Lower; -enum { Aligned=0, UnAligned=1 }; +enum { Aligned=0, Unaligned=1 }; enum { ConditionalJumpCost = 5 }; enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight }; enum DirectionType { Vertical, Horizontal }; diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index ea6478a04..bad5cacdc 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -51,7 +51,7 @@ template class CwiseBinaryOp; template class Product; template class DiagonalMatrix; template class DiagonalCoeffs; -template class Map; +template class Map; template class PartialRedux; template class Part; template class Extract; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index dff4d7885..9c013f6d1 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -45,7 +45,7 @@ #define EIGEN_DEFAULT_MATRIX_FLAGS EIGEN_DEFAULT_MATRIX_STORAGE_ORDER -/** Define a hint size when dealling with large matrices and L2 cache friendlyness +/** Define a hint size when dealing with large matrices and L2 cache friendlyness * More precisely, its square value represents the amount of bytes which can be assumed to stay in L2 cache. */ #ifndef EIGEN_TUNE_FOR_L2_CACHE_SIZE @@ -136,15 +136,15 @@ typedef typename Base::PacketScalar PacketScalar; \ typedef typename Eigen::ei_nested::type Nested; \ typedef typename Eigen::ei_eval::type Eval; \ typedef typename Eigen::Inverse InverseType; \ -enum { RowsAtCompileTime = Base::RowsAtCompileTime, \ - ColsAtCompileTime = Base::ColsAtCompileTime, \ - MaxRowsAtCompileTime = Base::MaxRowsAtCompileTime, \ - MaxColsAtCompileTime = Base::MaxColsAtCompileTime, \ +enum { RowsAtCompileTime = Eigen::ei_traits::RowsAtCompileTime, \ + ColsAtCompileTime = Eigen::ei_traits::ColsAtCompileTime, \ + MaxRowsAtCompileTime = Eigen::ei_traits::MaxRowsAtCompileTime, \ + MaxColsAtCompileTime = Eigen::ei_traits::MaxColsAtCompileTime, \ + Flags = Eigen::ei_traits::Flags, \ + CoeffReadCost = Eigen::ei_traits::CoeffReadCost, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \ - IsVectorAtCompileTime = Base::IsVectorAtCompileTime, \ - Flags = Base::Flags, \ - CoeffReadCost = Base::CoeffReadCost }; + IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; #define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ _EIGEN_GENERIC_PUBLIC_INTERFACE(Derived, Eigen::MatrixBase) \ diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 93d50441e..45b2543d8 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -147,6 +147,13 @@ template struct ei_packet_traits enum {size=1}; }; +template struct ei_unpacket_traits +{ + typedef T type; + enum {size=1}; +}; + + template class ei_corrected_matrix_flags { diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index f74bc7775..9fcf55d84 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -58,7 +58,8 @@ you_tried_calling_a_vector_method_on_a_matrix, you_mixed_vectors_of_different_sizes, you_mixed_matrices_of_different_sizes, - you_did_a_programming_error + you_did_a_programming_error, + unaligned_load_and_store_operations_unimplemented_on_AltiVec }; }; diff --git a/test/map.cpp b/test/map.cpp index 2ae78a4c7..1723a047c 100644 --- a/test/map.cpp +++ b/test/map.cpp @@ -31,16 +31,16 @@ template void tmap(const VectorType& m) int size = m.size(); // test Map.h - Scalar* array1 = new Scalar[size]; - Scalar* array2 = new Scalar[size]; - VectorType::map(array1, size) = VectorType::random(size); - VectorType::map(array2, size) = VectorType::map(array1, size); - VectorType ma1 = VectorType::map(array1, size); - VectorType ma2 = VectorType::map(array2, size); + Scalar* array1 = ei_aligned_malloc(size); + Scalar* array2 = ei_aligned_malloc(size); + Map(array1, size) = VectorType::random(size); + Map(array2, size) = Map(array1, size); + VectorType ma1 = Map(array1, size); + VectorType ma2 = Map(array2, size); VERIFY_IS_APPROX(ma1, ma2); VERIFY_IS_APPROX(ma1, VectorType(array2, size)); - delete[] array1; - delete[] array2; + ei_aligned_free(array1); + ei_aligned_free(array2); } void test_map()