Set of fixes and workaround to make sun studio more happy.

Still remains the problem of alignment and vectorization.
2025-09-18 12:23:13 +08:00 · 2009-07-10 16:10:03 +02:00 · 2009-07-10 16:10:03 +02:00 · ec5c608aa3
commit ec5c608aa3
parent 1c52985aa7
15 changed files with 81 additions and 72 deletions
--- a/Eigen/Core
+++ b/Eigen/Core
@ -88,6 +88,8 @@
 #include <cstring>
 #include <string>
 #include <limits>
 // for min/max:
 #include <algorithm>
 #if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
  #define EIGEN_EXCEPTIONS
--- a/Eigen/src/Array/Replicate.h
+++ b/Eigen/src/Array/Replicate.h
@ -140,21 +140,4 @@ VectorwiseOp<ExpressionType,Direction>::replicate(int factor) const
          (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
 }
 /** \nonstableyet
  * \return an expression of the replication of each column (or row) of \c *this
  *
  * Example: \include DirectionWise_replicate.cpp
  * Output: \verbinclude DirectionWise_replicate.out
  *
  * \sa VectorwiseOp::replicate(int), MatrixBase::replicate(), class Replicate
  */
 template<typename ExpressionType, int Direction>
 template<int Factor>
 const Replicate<ExpressionType,(Direction==Vertical?Factor:1),(Direction==Horizontal?Factor:1)>
 VectorwiseOp<ExpressionType,Direction>::replicate(int factor) const
 {
  return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1>
          (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
 }
 #endif // EIGEN_REPLICATE_H
--- a/Eigen/src/Array/VectorwiseOp.h
+++ b/Eigen/src/Array/VectorwiseOp.h
@ -179,6 +179,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                              > Type;
    };
    enum {
      IsVertical   = (Direction==Vertical) ? 1 : 0,
      IsHorizontal = (Direction==Horizontal) ? 1 : 0
    };
  protected:
    /** \internal
@ -222,9 +227,17 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    /** \internal */
    inline const ExpressionType& _expression() const { return m_matrix; }
    /** \returns a row or column vector expression of \c *this reduxed by \a func
      *
      * The template parameter \a BinaryOp is the type of the functor
      * of the custom redux operator. Note that func must be an associative operator.
      *
      * \sa class VectorwiseOp, MatrixBase::colwise(), MatrixBase::rowwise()
      */
    template<typename BinaryOp>
    const typename ReduxReturnType<BinaryOp>::Type
-    redux(const BinaryOp& func = BinaryOp()) const;
+    redux(const BinaryOp& func = BinaryOp()) const
    { return typename ReduxReturnType<BinaryOp>::Type(_expression(), func); }
    /** \returns a row (or column) vector expression of the smallest coefficient
      * of each column (or row) of the referenced expression.
@ -319,16 +332,26 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
      *
      * \sa MatrixBase::reverse() */
    const Reverse<ExpressionType, Direction> reverse() const
-    {
+    { return Reverse<ExpressionType, Direction>( _expression() ); }
      return Reverse<ExpressionType, Direction>( _expression() );
    }
    const Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1>
    replicate(int factor) const;
-    template<int Factor>
+    /** \nonstableyet
-    const Replicate<ExpressionType,(Direction==Vertical?Factor:1),(Direction==Horizontal?Factor:1)>
+      * \return an expression of the replication of each column (or row) of \c *this
-    replicate(int factor = Factor) const;
+      *
      * Example: \include DirectionWise_replicate.cpp
      * Output: \verbinclude DirectionWise_replicate.out
      *
      * \sa VectorwiseOp::replicate(int), MatrixBase::replicate(), class Replicate
      */
    // NOTE implemented here because of sunstudio's compilation errors
    template<int Factor> const Replicate<ExpressionType,(IsVertical?Factor:1),(IsHorizontal?Factor:1)>
    replicate(int factor = Factor) const
    {
      return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1>
          (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
    }
 /////////// Artithmetic operators ///////////
@ -466,19 +489,4 @@ MatrixBase<Derived>::rowwise()
  return derived();
 }
 /** \returns a row or column vector expression of \c *this reduxed by \a func
  *
  * The template parameter \a BinaryOp is the type of the functor
  * of the custom redux operator. Note that func must be an associative operator.
  *
  * \sa class VectorwiseOp, MatrixBase::colwise(), MatrixBase::rowwise()
  */
 template<typename ExpressionType, int Direction>
 template<typename BinaryOp>
 const typename VectorwiseOp<ExpressionType,Direction>::template ReduxReturnType<BinaryOp>::Type
 VectorwiseOp<ExpressionType,Direction>::redux(const BinaryOp& func) const
 {
  return typename ReduxReturnType<BinaryOp>::Type(_expression(), func);
 }
 #endif // EIGEN_PARTIAL_REDUX_H
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@ -271,13 +271,19 @@ class Block<MatrixType,BlockRows,BlockCols,PacketAccess,HasDirectAccess>
    inline int stride(void) const { return m_matrix.stride(); }
  #ifndef __SUNPRO_CC
  // FIXME sunstudio is not friendly with the above friend...
  protected:
  #endif
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal used by allowAligned() */
    inline Block(const MatrixType& matrix, const Scalar* data, int blockRows, int blockCols)
      : Base(data, blockRows, blockCols), m_matrix(matrix)
    {}
    #endif
  protected:
    const typename MatrixType::Nested m_matrix;
 };
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@ -42,13 +42,13 @@ struct ei_traits<CwiseUnaryView<ViewOp, MatrixType> >
 : ei_traits<MatrixType>
 {
  typedef typename ei_result_of<
-                     ViewOp(typename MatrixType::Scalar)
+                     ViewOp(typename ei_traits<MatrixType>::Scalar)
                   >::type Scalar;
  typedef typename MatrixType::Nested MatrixTypeNested;
  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
-    Flags = (_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | AlignedBit)),
+    Flags = (ei_traits<_MatrixTypeNested>::Flags & (HereditaryBits | LinearAccessBit | AlignedBit)),
-    CoeffReadCost = _MatrixTypeNested::CoeffReadCost + ei_functor_traits<ViewOp>::Cost
+    CoeffReadCost = ei_traits<_MatrixTypeNested>::CoeffReadCost + ei_functor_traits<ViewOp>::Cost
  };
 };
@ -62,7 +62,7 @@ class CwiseUnaryView : ei_no_assignment_operator,
    inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp())
      : m_matrix(mat), m_functor(func) {}
-    
+
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
    EIGEN_STRONG_INLINE int rows() const { return m_matrix.rows(); }
@ -77,7 +77,7 @@ class CwiseUnaryView : ei_no_assignment_operator,
    {
      return m_functor(m_matrix.coeff(index));
    }
-    
+
    EIGEN_STRONG_INLINE Scalar& coeffRef(int row, int col)
    {
      return m_functor(m_matrix.const_cast_derived().coeffRef(row, col));
@ -89,7 +89,8 @@ class CwiseUnaryView : ei_no_assignment_operator,
    }
  protected:
-    const typename MatrixType::Nested m_matrix;
+    // FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
    const typename ei_nested<MatrixType>::type m_matrix;
    const ViewOp m_functor;
 };
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@ -178,6 +178,7 @@ template<typename Derived> class MapBase
    }
    using Base::operator*=;
    using Base::operator+=;
    template<typename OtherDerived>
    Derived& operator+=(const MatrixBase<OtherDerived>& other)
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@ -124,6 +124,7 @@ class Matrix
 {
  public:
    EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix)
    enum { Options = _Options };
    friend class Eigen::Map<Matrix, Unaligned>;
    typedef class Eigen::Map<Matrix, Unaligned> UnalignedMapType;
@ -217,7 +218,7 @@ class Matrix
      *
      * This method is intended for dynamic-size matrices, although it is legal to call it on any
      * matrix as long as fixed dimensions are left unchanged. If you only want to change the number
-      * of rows and/or of columns, you can use resize(NoChange_t, int), resize(int, NoChange_t). 
+      * of rows and/or of columns, you can use resize(NoChange_t, int), resize(int, NoChange_t).
      *
      * If the current number of coefficients of \c *this exactly matches the
      * product \a rows * \a cols, then no memory allocation is performed and
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@ -137,10 +137,14 @@ template<typename Derived> class MatrixBase
          * constructed from this one. See the \ref flags "list of flags".
          */
-      CoeffReadCost = ei_traits<Derived>::CoeffReadCost
+      CoeffReadCost = ei_traits<Derived>::CoeffReadCost,
        /**< This is a rough measure of how expensive it is to read one coefficient from
          * this expression.
          */
 #ifndef EIGEN_PARSED_BY_DOXYGEN
      _HasDirectAccess = (int(Flags)&DirectAccessBit) ? 1 : 0 // workaround sunCC
 #endif
    };
    /** Default constructor. Just checks at compile-time for self-consistency of the flags. */
@ -204,7 +208,7 @@ template<typename Derived> class MatrixBase
    /** \internal the return type of coeff()
      */
-    typedef typename ei_meta_if<bool(int(Flags)&DirectAccessBit), const Scalar&, Scalar>::ret CoeffReturnType;
+    typedef typename ei_meta_if<_HasDirectAccess, const Scalar&, Scalar>::ret CoeffReturnType;
    /** \internal Represents a matrix with all coefficients equal to one another*/
    typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType;
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -83,15 +83,14 @@ struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct>
 template<typename Lhs, typename Rhs> struct ei_product_mode
 {
  enum{
    value = ei_is_diagonal<Rhs>::ret || ei_is_diagonal<Lhs>::ret
          ? DiagonalProduct
-          : Lhs::MaxColsAtCompileTime == Dynamic
+          : ei_traits<Lhs>::MaxColsAtCompileTime == Dynamic
-            && ( Lhs::MaxRowsAtCompileTime == Dynamic
+            && ( ei_traits<Lhs>::MaxRowsAtCompileTime == Dynamic
-              || Rhs::MaxColsAtCompileTime == Dynamic )
+              || ei_traits<Rhs>::MaxColsAtCompileTime == Dynamic )
-            && (!(Rhs::IsVectorAtCompileTime && (Lhs::Flags&RowMajorBit)  && (!(Lhs::Flags&DirectAccessBit))))
+            && (!(ei_traits<Rhs>::IsVectorAtCompileTime && (ei_traits<Lhs>::Flags&RowMajorBit)  && (!(ei_traits<Lhs>::Flags&DirectAccessBit))))
-            && (!(Lhs::IsVectorAtCompileTime && (!(Rhs::Flags&RowMajorBit)) && (!(Rhs::Flags&DirectAccessBit))))
+            && (!(ei_traits<Lhs>::IsVectorAtCompileTime && (!(ei_traits<Rhs>::Flags&RowMajorBit)) && (!(ei_traits<Rhs>::Flags&DirectAccessBit))))
-            && (ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret)
+            && (ei_is_same_type<typename ei_traits<Lhs>::Scalar, typename ei_traits<Rhs>::Scalar>::ret)
          ? CacheFriendlyProduct
          : NormalProduct };
 };
@ -215,7 +214,7 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
      */
    EIGEN_STRONG_INLINE bool _useCacheFriendlyProduct() const
    {
-      return  m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+      return m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
              && (  rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
                 || cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD);
    }
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@ -188,7 +188,7 @@ const unsigned int HereditaryBits = RowMajorBit
 // diagonal means both upper and lower triangular
 const unsigned DiagonalBits = UpperTriangularBit | LowerTriangularBit;
-    
+
 // Possible values for the Mode parameter of part()
 const unsigned int UpperTriangular = UpperTriangularBit;
 const unsigned int StrictlyUpperTriangular = UpperTriangularBit | ZeroDiagBit;
@ -201,7 +201,7 @@ const unsigned int UnitLowerTriangular = LowerTriangularBit | UnitDiagBit;
 template<typename T> struct ei_is_diagonal
 {
  enum {
-    ret = ( (unsigned int)(T::Flags) & DiagonalBits ) == DiagonalBits
+    ret = ( int(ei_traits<T>::Flags) & DiagonalBits ) == DiagonalBits
  };
 };
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@ -97,7 +97,7 @@ template<typename Scalar1,typename Scalar2> struct ei_scalar_multiple2_op;
 struct IOFormat;
 template<typename Scalar>
-void ei_cache_friendly_product(
+static void ei_cache_friendly_product(
  int _rows, int _cols, int depth,
  bool _lhsRowMajor, const Scalar* _lhs, int _lhsStride,
  bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride,
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@ -51,7 +51,8 @@
 #define EIGEN_GCC3_OR_OLDER 0
 #endif
-#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT && !EIGEN_GCC3_OR_OLDER
+// FIXME vectorization + alignment is completely disabled with sun studio
 #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT && !EIGEN_GCC3_OR_OLDER && !defined(__SUNPRO_CC)
  #define EIGEN_ARCH_WANTS_ALIGNMENT 1
 #else
  #define EIGEN_ARCH_WANTS_ALIGNMENT 0
@ -97,7 +98,7 @@
 /** Allows to disable some optimizations which might affect the accuracy of the result.
  * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
  * They currently include:
-  *   - single precision Cwise::sin() and Cwise::cos() when SSE vectorization is enabled. 
+  *   - single precision Cwise::sin() and Cwise::cos() when SSE vectorization is enabled.
  */
 #ifndef EIGEN_FAST_MATH
 #define EIGEN_FAST_MATH 1
@ -199,13 +200,16 @@ using Eigen::ei_cos;
 * vectorized and non-vectorized code.
 */
 #if !EIGEN_ALIGN
-#define EIGEN_ALIGN_128
+  #define EIGEN_ALIGN_128
 #elif (defined __GNUC__)
-#define EIGEN_ALIGN_128 __attribute__((aligned(16)))
+  #define EIGEN_ALIGN_128 __attribute__((aligned(16)))
 #elif (defined _MSC_VER)
-#define EIGEN_ALIGN_128 __declspec(align(16))
+  #define EIGEN_ALIGN_128 __declspec(align(16))
 #elif (defined __SUNPRO_CC)
  // FIXME not sure about this one:
  #define EIGEN_ALIGN_128 __attribute__((aligned(16)))
 #else
-#error Please tell me what is the equivalent of __attribute__((aligned(16))) for your compiler
+  #error Please tell me what is the equivalent of __attribute__((aligned(16))) for your compiler
 #endif
 #define EIGEN_RESTRICT __restrict
--- a/cmake/FindEigen2.cmake
+++ b/cmake/FindEigen2.cmake
@ -46,7 +46,7 @@ macro(_eigen2_check_version)
  endif(${EIGEN2_VERSION} VERSION_LESS ${Eigen2_FIND_VERSION})
  if(NOT EIGEN2_VERSION_OK)
-  
+
    message(STATUS "Eigen2 version ${EIGEN2_VERSION} found in ${EIGEN2_INCLUDE_DIR}, "
                   "but at least version ${Eigen2_FIND_VERSION} is required")
  endif(NOT EIGEN2_VERSION_OK)
--- a/test/array.cpp
+++ b/test/array.cpp
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
--- a/test/basicstuff.cpp
+++ b/test/basicstuff.cpp
@ -107,7 +107,7 @@ template<typename MatrixType> void basicStuff(const MatrixType& m)
  {
    VERIFY_IS_NOT_APPROX(m3, m1);
  }
-  
+
  m3.real() = m1.real();
  VERIFY_IS_APPROX(static_cast<const MatrixType&>(m3).real(), static_cast<const MatrixType&>(m1).real());
  VERIFY_IS_APPROX(static_cast<const MatrixType&>(m3).real(), m1.real());
@ -121,16 +121,16 @@ template<typename MatrixType> void basicStuffComplex(const MatrixType& m)
  int rows = m.rows();
  int cols = m.cols();
-  
+
  Scalar s1 = ei_random<Scalar>(),
         s2 = ei_random<Scalar>();
-  
+
  VERIFY(ei_real(s1)==ei_real_ref(s1));
  VERIFY(ei_imag(s1)==ei_imag_ref(s1));
  ei_real_ref(s1) = ei_real(s2);
  ei_imag_ref(s1) = ei_imag(s2);
  VERIFY(s1==s2);
-  
+
  RealMatrixType rm1 = RealMatrixType::Random(rows,cols),
                 rm2 = RealMatrixType::Random(rows,cols);
  MatrixType cm(rows,cols);
@ -162,7 +162,7 @@ void test_basicstuff()
    CALL_SUBTEST( basicStuff(MatrixXcd(20, 20)) );
    CALL_SUBTEST( basicStuff(Matrix<float, 100, 100>()) );
    CALL_SUBTEST( basicStuff(Matrix<long double,Dynamic,Dynamic>(10,10)) );
-    
+
    CALL_SUBTEST( basicStuffComplex(MatrixXcf(21, 17)) );
    CALL_SUBTEST( basicStuffComplex(MatrixXcd(2, 3)) );
  }