Set of fixes and workaround to make sun studio more happy.

Still remains the problem of alignment and vectorization.
This commit is contained in:
Gael Guennebaud 2009-07-10 16:10:03 +02:00
parent 1c52985aa7
commit ec5c608aa3
15 changed files with 81 additions and 72 deletions

View File

@ -88,6 +88,8 @@
#include <cstring> #include <cstring>
#include <string> #include <string>
#include <limits> #include <limits>
// for min/max:
#include <algorithm>
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS) #if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
#define EIGEN_EXCEPTIONS #define EIGEN_EXCEPTIONS

View File

@ -140,21 +140,4 @@ VectorwiseOp<ExpressionType,Direction>::replicate(int factor) const
(_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1); (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
} }
/** \nonstableyet
* \return an expression of the replication of each column (or row) of \c *this
*
* Example: \include DirectionWise_replicate.cpp
* Output: \verbinclude DirectionWise_replicate.out
*
* \sa VectorwiseOp::replicate(int), MatrixBase::replicate(), class Replicate
*/
template<typename ExpressionType, int Direction>
template<int Factor>
const Replicate<ExpressionType,(Direction==Vertical?Factor:1),(Direction==Horizontal?Factor:1)>
VectorwiseOp<ExpressionType,Direction>::replicate(int factor) const
{
return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1>
(_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
}
#endif // EIGEN_REPLICATE_H #endif // EIGEN_REPLICATE_H

View File

@ -179,6 +179,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
> Type; > Type;
}; };
enum {
IsVertical = (Direction==Vertical) ? 1 : 0,
IsHorizontal = (Direction==Horizontal) ? 1 : 0
};
protected: protected:
/** \internal /** \internal
@ -222,9 +227,17 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \internal */ /** \internal */
inline const ExpressionType& _expression() const { return m_matrix; } inline const ExpressionType& _expression() const { return m_matrix; }
/** \returns a row or column vector expression of \c *this reduxed by \a func
*
* The template parameter \a BinaryOp is the type of the functor
* of the custom redux operator. Note that func must be an associative operator.
*
* \sa class VectorwiseOp, MatrixBase::colwise(), MatrixBase::rowwise()
*/
template<typename BinaryOp> template<typename BinaryOp>
const typename ReduxReturnType<BinaryOp>::Type const typename ReduxReturnType<BinaryOp>::Type
redux(const BinaryOp& func = BinaryOp()) const; redux(const BinaryOp& func = BinaryOp()) const
{ return typename ReduxReturnType<BinaryOp>::Type(_expression(), func); }
/** \returns a row (or column) vector expression of the smallest coefficient /** \returns a row (or column) vector expression of the smallest coefficient
* of each column (or row) of the referenced expression. * of each column (or row) of the referenced expression.
@ -319,16 +332,26 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* *
* \sa MatrixBase::reverse() */ * \sa MatrixBase::reverse() */
const Reverse<ExpressionType, Direction> reverse() const const Reverse<ExpressionType, Direction> reverse() const
{ { return Reverse<ExpressionType, Direction>( _expression() ); }
return Reverse<ExpressionType, Direction>( _expression() );
}
const Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1> const Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1>
replicate(int factor) const; replicate(int factor) const;
template<int Factor> /** \nonstableyet
const Replicate<ExpressionType,(Direction==Vertical?Factor:1),(Direction==Horizontal?Factor:1)> * \return an expression of the replication of each column (or row) of \c *this
replicate(int factor = Factor) const; *
* Example: \include DirectionWise_replicate.cpp
* Output: \verbinclude DirectionWise_replicate.out
*
* \sa VectorwiseOp::replicate(int), MatrixBase::replicate(), class Replicate
*/
// NOTE implemented here because of sunstudio's compilation errors
template<int Factor> const Replicate<ExpressionType,(IsVertical?Factor:1),(IsHorizontal?Factor:1)>
replicate(int factor = Factor) const
{
return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1>
(_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
}
/////////// Artithmetic operators /////////// /////////// Artithmetic operators ///////////
@ -466,19 +489,4 @@ MatrixBase<Derived>::rowwise()
return derived(); return derived();
} }
/** \returns a row or column vector expression of \c *this reduxed by \a func
*
* The template parameter \a BinaryOp is the type of the functor
* of the custom redux operator. Note that func must be an associative operator.
*
* \sa class VectorwiseOp, MatrixBase::colwise(), MatrixBase::rowwise()
*/
template<typename ExpressionType, int Direction>
template<typename BinaryOp>
const typename VectorwiseOp<ExpressionType,Direction>::template ReduxReturnType<BinaryOp>::Type
VectorwiseOp<ExpressionType,Direction>::redux(const BinaryOp& func) const
{
return typename ReduxReturnType<BinaryOp>::Type(_expression(), func);
}
#endif // EIGEN_PARTIAL_REDUX_H #endif // EIGEN_PARTIAL_REDUX_H

View File

@ -271,13 +271,19 @@ class Block<MatrixType,BlockRows,BlockCols,PacketAccess,HasDirectAccess>
inline int stride(void) const { return m_matrix.stride(); } inline int stride(void) const { return m_matrix.stride(); }
#ifndef __SUNPRO_CC
// FIXME sunstudio is not friendly with the above friend...
protected: protected:
#endif
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal used by allowAligned() */ /** \internal used by allowAligned() */
inline Block(const MatrixType& matrix, const Scalar* data, int blockRows, int blockCols) inline Block(const MatrixType& matrix, const Scalar* data, int blockRows, int blockCols)
: Base(data, blockRows, blockCols), m_matrix(matrix) : Base(data, blockRows, blockCols), m_matrix(matrix)
{} {}
#endif
protected:
const typename MatrixType::Nested m_matrix; const typename MatrixType::Nested m_matrix;
}; };

View File

@ -42,13 +42,13 @@ struct ei_traits<CwiseUnaryView<ViewOp, MatrixType> >
: ei_traits<MatrixType> : ei_traits<MatrixType>
{ {
typedef typename ei_result_of< typedef typename ei_result_of<
ViewOp(typename MatrixType::Scalar) ViewOp(typename ei_traits<MatrixType>::Scalar)
>::type Scalar; >::type Scalar;
typedef typename MatrixType::Nested MatrixTypeNested; typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested; typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
enum { enum {
Flags = (_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | AlignedBit)), Flags = (ei_traits<_MatrixTypeNested>::Flags & (HereditaryBits | LinearAccessBit | AlignedBit)),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost + ei_functor_traits<ViewOp>::Cost CoeffReadCost = ei_traits<_MatrixTypeNested>::CoeffReadCost + ei_functor_traits<ViewOp>::Cost
}; };
}; };
@ -62,7 +62,7 @@ class CwiseUnaryView : ei_no_assignment_operator,
inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp()) inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp())
: m_matrix(mat), m_functor(func) {} : m_matrix(mat), m_functor(func) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
EIGEN_STRONG_INLINE int rows() const { return m_matrix.rows(); } EIGEN_STRONG_INLINE int rows() const { return m_matrix.rows(); }
@ -77,7 +77,7 @@ class CwiseUnaryView : ei_no_assignment_operator,
{ {
return m_functor(m_matrix.coeff(index)); return m_functor(m_matrix.coeff(index));
} }
EIGEN_STRONG_INLINE Scalar& coeffRef(int row, int col) EIGEN_STRONG_INLINE Scalar& coeffRef(int row, int col)
{ {
return m_functor(m_matrix.const_cast_derived().coeffRef(row, col)); return m_functor(m_matrix.const_cast_derived().coeffRef(row, col));
@ -89,7 +89,8 @@ class CwiseUnaryView : ei_no_assignment_operator,
} }
protected: protected:
const typename MatrixType::Nested m_matrix; // FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
const typename ei_nested<MatrixType>::type m_matrix;
const ViewOp m_functor; const ViewOp m_functor;
}; };

View File

@ -178,6 +178,7 @@ template<typename Derived> class MapBase
} }
using Base::operator*=; using Base::operator*=;
using Base::operator+=;
template<typename OtherDerived> template<typename OtherDerived>
Derived& operator+=(const MatrixBase<OtherDerived>& other) Derived& operator+=(const MatrixBase<OtherDerived>& other)

View File

@ -124,6 +124,7 @@ class Matrix
{ {
public: public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix) EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix)
enum { Options = _Options }; enum { Options = _Options };
friend class Eigen::Map<Matrix, Unaligned>; friend class Eigen::Map<Matrix, Unaligned>;
typedef class Eigen::Map<Matrix, Unaligned> UnalignedMapType; typedef class Eigen::Map<Matrix, Unaligned> UnalignedMapType;
@ -217,7 +218,7 @@ class Matrix
* *
* This method is intended for dynamic-size matrices, although it is legal to call it on any * This method is intended for dynamic-size matrices, although it is legal to call it on any
* matrix as long as fixed dimensions are left unchanged. If you only want to change the number * matrix as long as fixed dimensions are left unchanged. If you only want to change the number
* of rows and/or of columns, you can use resize(NoChange_t, int), resize(int, NoChange_t). * of rows and/or of columns, you can use resize(NoChange_t, int), resize(int, NoChange_t).
* *
* If the current number of coefficients of \c *this exactly matches the * If the current number of coefficients of \c *this exactly matches the
* product \a rows * \a cols, then no memory allocation is performed and * product \a rows * \a cols, then no memory allocation is performed and

View File

@ -137,10 +137,14 @@ template<typename Derived> class MatrixBase
* constructed from this one. See the \ref flags "list of flags". * constructed from this one. See the \ref flags "list of flags".
*/ */
CoeffReadCost = ei_traits<Derived>::CoeffReadCost CoeffReadCost = ei_traits<Derived>::CoeffReadCost,
/**< This is a rough measure of how expensive it is to read one coefficient from /**< This is a rough measure of how expensive it is to read one coefficient from
* this expression. * this expression.
*/ */
#ifndef EIGEN_PARSED_BY_DOXYGEN
_HasDirectAccess = (int(Flags)&DirectAccessBit) ? 1 : 0 // workaround sunCC
#endif
}; };
/** Default constructor. Just checks at compile-time for self-consistency of the flags. */ /** Default constructor. Just checks at compile-time for self-consistency of the flags. */
@ -204,7 +208,7 @@ template<typename Derived> class MatrixBase
/** \internal the return type of coeff() /** \internal the return type of coeff()
*/ */
typedef typename ei_meta_if<bool(int(Flags)&DirectAccessBit), const Scalar&, Scalar>::ret CoeffReturnType; typedef typename ei_meta_if<_HasDirectAccess, const Scalar&, Scalar>::ret CoeffReturnType;
/** \internal Represents a matrix with all coefficients equal to one another*/ /** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType; typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType;

View File

@ -83,15 +83,14 @@ struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct>
template<typename Lhs, typename Rhs> struct ei_product_mode template<typename Lhs, typename Rhs> struct ei_product_mode
{ {
enum{ enum{
value = ei_is_diagonal<Rhs>::ret || ei_is_diagonal<Lhs>::ret value = ei_is_diagonal<Rhs>::ret || ei_is_diagonal<Lhs>::ret
? DiagonalProduct ? DiagonalProduct
: Lhs::MaxColsAtCompileTime == Dynamic : ei_traits<Lhs>::MaxColsAtCompileTime == Dynamic
&& ( Lhs::MaxRowsAtCompileTime == Dynamic && ( ei_traits<Lhs>::MaxRowsAtCompileTime == Dynamic
|| Rhs::MaxColsAtCompileTime == Dynamic ) || ei_traits<Rhs>::MaxColsAtCompileTime == Dynamic )
&& (!(Rhs::IsVectorAtCompileTime && (Lhs::Flags&RowMajorBit) && (!(Lhs::Flags&DirectAccessBit)))) && (!(ei_traits<Rhs>::IsVectorAtCompileTime && (ei_traits<Lhs>::Flags&RowMajorBit) && (!(ei_traits<Lhs>::Flags&DirectAccessBit))))
&& (!(Lhs::IsVectorAtCompileTime && (!(Rhs::Flags&RowMajorBit)) && (!(Rhs::Flags&DirectAccessBit)))) && (!(ei_traits<Lhs>::IsVectorAtCompileTime && (!(ei_traits<Rhs>::Flags&RowMajorBit)) && (!(ei_traits<Rhs>::Flags&DirectAccessBit))))
&& (ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret) && (ei_is_same_type<typename ei_traits<Lhs>::Scalar, typename ei_traits<Rhs>::Scalar>::ret)
? CacheFriendlyProduct ? CacheFriendlyProduct
: NormalProduct }; : NormalProduct };
}; };
@ -215,7 +214,7 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
*/ */
EIGEN_STRONG_INLINE bool _useCacheFriendlyProduct() const EIGEN_STRONG_INLINE bool _useCacheFriendlyProduct() const
{ {
return m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD return m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& ( rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD && ( rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|| cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD); || cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD);
} }

View File

@ -188,7 +188,7 @@ const unsigned int HereditaryBits = RowMajorBit
// diagonal means both upper and lower triangular // diagonal means both upper and lower triangular
const unsigned DiagonalBits = UpperTriangularBit | LowerTriangularBit; const unsigned DiagonalBits = UpperTriangularBit | LowerTriangularBit;
// Possible values for the Mode parameter of part() // Possible values for the Mode parameter of part()
const unsigned int UpperTriangular = UpperTriangularBit; const unsigned int UpperTriangular = UpperTriangularBit;
const unsigned int StrictlyUpperTriangular = UpperTriangularBit | ZeroDiagBit; const unsigned int StrictlyUpperTriangular = UpperTriangularBit | ZeroDiagBit;
@ -201,7 +201,7 @@ const unsigned int UnitLowerTriangular = LowerTriangularBit | UnitDiagBit;
template<typename T> struct ei_is_diagonal template<typename T> struct ei_is_diagonal
{ {
enum { enum {
ret = ( (unsigned int)(T::Flags) & DiagonalBits ) == DiagonalBits ret = ( int(ei_traits<T>::Flags) & DiagonalBits ) == DiagonalBits
}; };
}; };

View File

@ -97,7 +97,7 @@ template<typename Scalar1,typename Scalar2> struct ei_scalar_multiple2_op;
struct IOFormat; struct IOFormat;
template<typename Scalar> template<typename Scalar>
void ei_cache_friendly_product( static void ei_cache_friendly_product(
int _rows, int _cols, int depth, int _rows, int _cols, int depth,
bool _lhsRowMajor, const Scalar* _lhs, int _lhsStride, bool _lhsRowMajor, const Scalar* _lhs, int _lhsStride,
bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride, bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride,

View File

@ -51,7 +51,8 @@
#define EIGEN_GCC3_OR_OLDER 0 #define EIGEN_GCC3_OR_OLDER 0
#endif #endif
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT && !EIGEN_GCC3_OR_OLDER // FIXME vectorization + alignment is completely disabled with sun studio
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT && !EIGEN_GCC3_OR_OLDER && !defined(__SUNPRO_CC)
#define EIGEN_ARCH_WANTS_ALIGNMENT 1 #define EIGEN_ARCH_WANTS_ALIGNMENT 1
#else #else
#define EIGEN_ARCH_WANTS_ALIGNMENT 0 #define EIGEN_ARCH_WANTS_ALIGNMENT 0
@ -97,7 +98,7 @@
/** Allows to disable some optimizations which might affect the accuracy of the result. /** Allows to disable some optimizations which might affect the accuracy of the result.
* Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
* They currently include: * They currently include:
* - single precision Cwise::sin() and Cwise::cos() when SSE vectorization is enabled. * - single precision Cwise::sin() and Cwise::cos() when SSE vectorization is enabled.
*/ */
#ifndef EIGEN_FAST_MATH #ifndef EIGEN_FAST_MATH
#define EIGEN_FAST_MATH 1 #define EIGEN_FAST_MATH 1
@ -199,13 +200,16 @@ using Eigen::ei_cos;
* vectorized and non-vectorized code. * vectorized and non-vectorized code.
*/ */
#if !EIGEN_ALIGN #if !EIGEN_ALIGN
#define EIGEN_ALIGN_128 #define EIGEN_ALIGN_128
#elif (defined __GNUC__) #elif (defined __GNUC__)
#define EIGEN_ALIGN_128 __attribute__((aligned(16))) #define EIGEN_ALIGN_128 __attribute__((aligned(16)))
#elif (defined _MSC_VER) #elif (defined _MSC_VER)
#define EIGEN_ALIGN_128 __declspec(align(16)) #define EIGEN_ALIGN_128 __declspec(align(16))
#elif (defined __SUNPRO_CC)
// FIXME not sure about this one:
#define EIGEN_ALIGN_128 __attribute__((aligned(16)))
#else #else
#error Please tell me what is the equivalent of __attribute__((aligned(16))) for your compiler #error Please tell me what is the equivalent of __attribute__((aligned(16))) for your compiler
#endif #endif
#define EIGEN_RESTRICT __restrict #define EIGEN_RESTRICT __restrict

View File

@ -46,7 +46,7 @@ macro(_eigen2_check_version)
endif(${EIGEN2_VERSION} VERSION_LESS ${Eigen2_FIND_VERSION}) endif(${EIGEN2_VERSION} VERSION_LESS ${Eigen2_FIND_VERSION})
if(NOT EIGEN2_VERSION_OK) if(NOT EIGEN2_VERSION_OK)
message(STATUS "Eigen2 version ${EIGEN2_VERSION} found in ${EIGEN2_INCLUDE_DIR}, " message(STATUS "Eigen2 version ${EIGEN2_VERSION} found in ${EIGEN2_INCLUDE_DIR}, "
"but at least version ${Eigen2_FIND_VERSION} is required") "but at least version ${Eigen2_FIND_VERSION} is required")
endif(NOT EIGEN2_VERSION_OK) endif(NOT EIGEN2_VERSION_OK)

View File

@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library // This file is part of Eigen, a lightweight C++ template library
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> // Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
// //
// Eigen is free software; you can redistribute it and/or // Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public // modify it under the terms of the GNU Lesser General Public

View File

@ -107,7 +107,7 @@ template<typename MatrixType> void basicStuff(const MatrixType& m)
{ {
VERIFY_IS_NOT_APPROX(m3, m1); VERIFY_IS_NOT_APPROX(m3, m1);
} }
m3.real() = m1.real(); m3.real() = m1.real();
VERIFY_IS_APPROX(static_cast<const MatrixType&>(m3).real(), static_cast<const MatrixType&>(m1).real()); VERIFY_IS_APPROX(static_cast<const MatrixType&>(m3).real(), static_cast<const MatrixType&>(m1).real());
VERIFY_IS_APPROX(static_cast<const MatrixType&>(m3).real(), m1.real()); VERIFY_IS_APPROX(static_cast<const MatrixType&>(m3).real(), m1.real());
@ -121,16 +121,16 @@ template<typename MatrixType> void basicStuffComplex(const MatrixType& m)
int rows = m.rows(); int rows = m.rows();
int cols = m.cols(); int cols = m.cols();
Scalar s1 = ei_random<Scalar>(), Scalar s1 = ei_random<Scalar>(),
s2 = ei_random<Scalar>(); s2 = ei_random<Scalar>();
VERIFY(ei_real(s1)==ei_real_ref(s1)); VERIFY(ei_real(s1)==ei_real_ref(s1));
VERIFY(ei_imag(s1)==ei_imag_ref(s1)); VERIFY(ei_imag(s1)==ei_imag_ref(s1));
ei_real_ref(s1) = ei_real(s2); ei_real_ref(s1) = ei_real(s2);
ei_imag_ref(s1) = ei_imag(s2); ei_imag_ref(s1) = ei_imag(s2);
VERIFY(s1==s2); VERIFY(s1==s2);
RealMatrixType rm1 = RealMatrixType::Random(rows,cols), RealMatrixType rm1 = RealMatrixType::Random(rows,cols),
rm2 = RealMatrixType::Random(rows,cols); rm2 = RealMatrixType::Random(rows,cols);
MatrixType cm(rows,cols); MatrixType cm(rows,cols);
@ -162,7 +162,7 @@ void test_basicstuff()
CALL_SUBTEST( basicStuff(MatrixXcd(20, 20)) ); CALL_SUBTEST( basicStuff(MatrixXcd(20, 20)) );
CALL_SUBTEST( basicStuff(Matrix<float, 100, 100>()) ); CALL_SUBTEST( basicStuff(Matrix<float, 100, 100>()) );
CALL_SUBTEST( basicStuff(Matrix<long double,Dynamic,Dynamic>(10,10)) ); CALL_SUBTEST( basicStuff(Matrix<long double,Dynamic,Dynamic>(10,10)) );
CALL_SUBTEST( basicStuffComplex(MatrixXcf(21, 17)) ); CALL_SUBTEST( basicStuffComplex(MatrixXcf(21, 17)) );
CALL_SUBTEST( basicStuffComplex(MatrixXcd(2, 3)) ); CALL_SUBTEST( basicStuffComplex(MatrixXcd(2, 3)) );
} }