mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-26 00:03:14 +08:00
merge with default branch
This commit is contained in:
commit
cefd9b8888
@ -98,11 +98,18 @@ if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
message("Enabling SSE4.2 in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_ALTIVEC "Enable/Disable altivec in tests/examples" OFF)
|
||||
option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF)
|
||||
if(EIGEN_TEST_ALTIVEC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
|
||||
message("Enabling AltiVec in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
|
||||
if(EIGEN_TEST_NEON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon -mcpu=cortex-a8")
|
||||
message("Enabling NEON in tests/examples")
|
||||
endif()
|
||||
|
||||
endif(CMAKE_COMPILER_IS_GNUCXX)
|
||||
|
||||
if(MSVC)
|
||||
|
98
Eigen/Core
98
Eigen/Core
@ -2,7 +2,7 @@
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@ -61,20 +61,45 @@
|
||||
|
||||
#ifndef EIGEN_DONT_VECTORIZE
|
||||
#if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
|
||||
|
||||
// Defines symbols for compile-time detection of which instructions are
|
||||
// used.
|
||||
// EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_SSE
|
||||
#include <emmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#define EIGEN_VECTORIZE_SSE2
|
||||
|
||||
// Detect sse3/ssse3/sse4:
|
||||
// gcc and icc defines __SSE3__, ..,
|
||||
// there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
|
||||
// want to force the use of those instructions with msvc.
|
||||
#ifdef __SSE3__
|
||||
#include <pmmintrin.h>
|
||||
#define EIGEN_VECTORIZE_SSE3
|
||||
#endif
|
||||
#ifdef __SSSE3__
|
||||
#include <tmmintrin.h>
|
||||
#define EIGEN_VECTORIZE_SSSE3
|
||||
#endif
|
||||
#ifdef __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#define EIGEN_VECTORIZE_SSE4_1
|
||||
#endif
|
||||
#ifdef __SSE4_2__
|
||||
#define EIGEN_VECTORIZE_SSE4_2
|
||||
#endif
|
||||
|
||||
// include files
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_2
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
#elif defined __ALTIVEC__
|
||||
@ -86,6 +111,10 @@
|
||||
#undef bool
|
||||
#undef vector
|
||||
#undef pixel
|
||||
#elif defined __ARM_NEON__
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -97,18 +126,24 @@
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <iosfwd>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <limits>
|
||||
// for min/max:
|
||||
#include <algorithm>
|
||||
|
||||
// for outputting debug info
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
#include<iostream>
|
||||
#endif
|
||||
|
||||
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
|
||||
#define EIGEN_EXCEPTIONS
|
||||
#endif
|
||||
@ -129,6 +164,26 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
inline static const char *SimdInstructionSetsInUse(void) {
|
||||
#if defined(EIGEN_VECTORIZE_SSE4_2)
|
||||
return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
|
||||
#elif defined(EIGEN_VECTORIZE_SSE4_1)
|
||||
return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
|
||||
#elif defined(EIGEN_VECTORIZE_SSSE3)
|
||||
return "SSE, SSE2, SSE3, SSSE3";
|
||||
#elif defined(EIGEN_VECTORIZE_SSE3)
|
||||
return "SSE, SSE2, SSE3";
|
||||
#elif defined(EIGEN_VECTORIZE_SSE2)
|
||||
return "SSE, SSE2";
|
||||
#elif defined(EIGEN_VECTORIZE_ALTIVEC)
|
||||
return "AltiVec";
|
||||
#elif defined(EIGEN_VECTORIZE_NEON)
|
||||
return "ARM NEON";
|
||||
#else
|
||||
return "None";
|
||||
#endif
|
||||
}
|
||||
|
||||
// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
|
||||
// ensure QNX/QCC support
|
||||
using std::size_t;
|
||||
@ -163,11 +218,11 @@ struct Dense {};
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#elif defined EIGEN_VECTORIZE_ALTIVEC
|
||||
#include "src/Core/arch/AltiVec/PacketMath.h"
|
||||
#elif defined EIGEN_VECTORIZE_NEON
|
||||
#include "src/Core/arch/NEON/PacketMath.h"
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
#include "src/Core/arch/Default/Settings.h"
|
||||
|
||||
#include "src/Core/Functors.h"
|
||||
#include "src/Core/DenseBase.h"
|
||||
@ -196,6 +251,7 @@ struct Dense {};
|
||||
#include "src/Core/Dot.h"
|
||||
#include "src/Core/StableNorm.h"
|
||||
#include "src/Core/MapBase.h"
|
||||
#include "src/Core/Stride.h"
|
||||
#include "src/Core/Map.h"
|
||||
#include "src/Core/Block.h"
|
||||
#include "src/Core/VectorBlock.h"
|
||||
@ -231,28 +287,6 @@ struct Dense {};
|
||||
#include "src/Core/products/TriangularSolverMatrix.h"
|
||||
#include "src/Core/BandMatrix.h"
|
||||
|
||||
/** \defgroup Array_Module Array module
|
||||
* \ingroup Core_Module
|
||||
* This module provides several handy features to manipulate matrices as simple array of values.
|
||||
* In addition to listed classes, it defines various methods of the Cwise interface
|
||||
* (accessible from MatrixBase::cwise()), including:
|
||||
* - matrix-scalar sum,
|
||||
* - coeff-wise comparison operators,
|
||||
* - sin, cos, sqrt, pow, exp, log, square, cube, inverse (reciprocal).
|
||||
*
|
||||
* This module also provides various MatrixBase methods, including:
|
||||
* - boolean reductions: \ref MatrixBase::all() "all", \ref MatrixBase::any() "any", \ref MatrixBase::count() "count",
|
||||
* - \ref MatrixBase::Random() "random matrix initialization",
|
||||
* - a \ref MatrixBase::select() "select" function mimicking the trivariate ?: operator,
|
||||
* - \ref MatrixBase::colwise() "column-wise" and \ref MatrixBase::rowwise() "row-wise" reductions,
|
||||
* - \ref MatrixBase::reverse() "matrix reverse",
|
||||
* - \ref MatrixBase::lpNorm() "generic matrix norm".
|
||||
*
|
||||
* \code
|
||||
* #include <Eigen/Core>
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "src/Array/Functors.h"
|
||||
#include "src/Array/BooleanRedux.h"
|
||||
#include "src/Array/Select.h"
|
||||
|
@ -26,7 +26,7 @@
|
||||
#define EIGEN2SUPPORT_H
|
||||
|
||||
#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H))
|
||||
#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before any other Eigen header
|
||||
#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
|
||||
#endif
|
||||
|
||||
#include "src/Core/util/DisableMSVCWarnings.h"
|
||||
@ -36,6 +36,7 @@ namespace Eigen {
|
||||
/** \defgroup Eigen2Support_Module Eigen2 support module
|
||||
* This module provides a couple of deprecated functions improving the compatibility with Eigen2.
|
||||
*
|
||||
* To use it, define EIGEN2_SUPPORT before including any Eigen header
|
||||
* \code
|
||||
* #define EIGEN2_SUPPORT
|
||||
* \endcode
|
||||
@ -51,4 +52,7 @@ namespace Eigen {
|
||||
|
||||
#include "src/Core/util/EnableMSVCWarnings.h"
|
||||
|
||||
// Eigen2 used to include iostream
|
||||
#include<iostream>
|
||||
|
||||
#endif // EIGEN2SUPPORT_H
|
||||
|
@ -213,6 +213,9 @@ class Array
|
||||
void swap(ArrayBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
|
||||
{ this->_swap(other.derived()); }
|
||||
|
||||
inline int innerStride() const { return 1; }
|
||||
inline int outerStride() const { return this->innerSize(); }
|
||||
|
||||
#ifdef EIGEN_ARRAY_PLUGIN
|
||||
#include EIGEN_ARRAY_PLUGIN
|
||||
#endif
|
||||
|
@ -55,7 +55,8 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
|
||||
inline int rows() const { return m_expression.rows(); }
|
||||
inline int cols() const { return m_expression.cols(); }
|
||||
inline int stride() const { return m_expression.stride(); }
|
||||
inline int outerStride() const { return m_expression.outerStride(); }
|
||||
inline int innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
inline const CoeffReturnType coeff(int row, int col) const
|
||||
{
|
||||
@ -139,7 +140,8 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
|
||||
inline int rows() const { return m_expression.rows(); }
|
||||
inline int cols() const { return m_expression.cols(); }
|
||||
inline int stride() const { return m_expression.stride(); }
|
||||
inline int outerStride() const { return m_expression.outerStride(); }
|
||||
inline int innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
inline const CoeffReturnType coeff(int row, int col) const
|
||||
{
|
||||
|
@ -81,11 +81,11 @@ template<typename MatrixType, int Direction> class Reverse
|
||||
|
||||
typedef typename MatrixType::template MakeBase< Reverse<MatrixType, Direction> >::Type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
|
||||
using Base::IsRowMajor;
|
||||
|
||||
protected:
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<Scalar>::size,
|
||||
IsRowMajor = Flags & RowMajorBit,
|
||||
IsColMajor = !IsRowMajor,
|
||||
ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
|
||||
ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
|
||||
|
@ -62,14 +62,21 @@ template<typename _MatrixType> class LDLT
|
||||
typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;
|
||||
typedef Matrix<int, 1, MatrixType::RowsAtCompileTime> IntRowVectorType;
|
||||
|
||||
/**
|
||||
* \brief Default Constructor.
|
||||
*
|
||||
* The default constructor is useful in cases in which the user intends to
|
||||
* perform decompositions via LDLT::compute(const MatrixType&).
|
||||
*/
|
||||
/** \brief Default Constructor.
|
||||
*
|
||||
* The default constructor is useful in cases in which the user intends to
|
||||
* perform decompositions via LDLT::compute(const MatrixType&).
|
||||
*/
|
||||
LDLT() : m_matrix(), m_p(), m_transpositions(), m_isInitialized(false) {}
|
||||
|
||||
/** \brief Default Constructor with memory preallocation
|
||||
*
|
||||
* Like the default constructor but with preallocation of the internal data
|
||||
* according to the specified problem \a size.
|
||||
* \sa LDLT()
|
||||
*/
|
||||
LDLT(int size) : m_matrix(size,size), m_p(size), m_transpositions(size), m_isInitialized(false) {}
|
||||
|
||||
LDLT(const MatrixType& matrix)
|
||||
: m_matrix(matrix.rows(), matrix.cols()),
|
||||
m_p(matrix.rows()),
|
||||
@ -148,6 +155,8 @@ template<typename _MatrixType> class LDLT
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
MatrixType reconstructedMatrix() const;
|
||||
|
||||
inline int rows() const { return m_matrix.rows(); }
|
||||
inline int cols() const { return m_matrix.cols(); }
|
||||
|
||||
@ -175,6 +184,10 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
|
||||
|
||||
m_matrix = a;
|
||||
|
||||
m_p.resize(size);
|
||||
m_transpositions.resize(size);
|
||||
m_isInitialized = false;
|
||||
|
||||
if (size <= 1) {
|
||||
m_p.setZero();
|
||||
m_transpositions.setZero();
|
||||
@ -202,11 +215,8 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
|
||||
{
|
||||
// The biggest overall is the point of reference to which further diagonals
|
||||
// are compared; if any diagonal is negligible compared
|
||||
// to the largest overall, the algorithm bails. This cutoff is suggested
|
||||
// in "Analysis of the Cholesky Decomposition of a Semi-definite Matrix" by
|
||||
// Nicholas J. Higham. Also see "Accuracy and Stability of Numerical
|
||||
// Algorithms" page 217, also by Higham.
|
||||
cutoff = ei_abs(NumTraits<Scalar>::epsilon() * RealScalar(size) * biggest_in_corner);
|
||||
// to the largest overall, the algorithm bails.
|
||||
cutoff = ei_abs(NumTraits<Scalar>::epsilon() * biggest_in_corner);
|
||||
|
||||
m_sign = ei_real(m_matrix.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1;
|
||||
}
|
||||
@ -231,26 +241,21 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
|
||||
continue;
|
||||
}
|
||||
|
||||
RealScalar Djj = ei_real(m_matrix.coeff(j,j) - m_matrix.row(j).head(j)
|
||||
.dot(m_matrix.col(j).head(j)));
|
||||
RealScalar Djj = ei_real(m_matrix.coeff(j,j) - m_matrix.row(j).head(j).dot(m_matrix.col(j).head(j)));
|
||||
m_matrix.coeffRef(j,j) = Djj;
|
||||
|
||||
// Finish early if the matrix is not full rank.
|
||||
if(ei_abs(Djj) < cutoff)
|
||||
{
|
||||
for(int i = j; i < size; i++) m_transpositions.coeffRef(i) = i;
|
||||
break;
|
||||
}
|
||||
|
||||
int endSize = size - j - 1;
|
||||
if (endSize > 0) {
|
||||
_temporary.tail(endSize).noalias() = m_matrix.block(j+1,0, endSize, j)
|
||||
* m_matrix.col(j).head(j).conjugate();
|
||||
|
||||
m_matrix.row(j).tail(endSize) = m_matrix.row(j).tail(endSize).conjugate()
|
||||
- _temporary.tail(endSize).transpose();
|
||||
- _temporary.tail(endSize).transpose();
|
||||
|
||||
m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj;
|
||||
if(ei_abs(Djj) > cutoff)
|
||||
{
|
||||
m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -315,6 +320,31 @@ bool LDLT<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \returns the matrix represented by the decomposition,
|
||||
* i.e., it returns the product: P^T L D L^* P.
|
||||
* This function is provided for debug purpose. */
|
||||
template<typename MatrixType>
|
||||
MatrixType LDLT<MatrixType>::reconstructedMatrix() const
|
||||
{
|
||||
ei_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
const int size = m_matrix.rows();
|
||||
MatrixType res(size,size);
|
||||
res.setIdentity();
|
||||
|
||||
// PI
|
||||
for(int i = 0; i < size; ++i) res.row(m_transpositions.coeff(i)).swap(res.row(i));
|
||||
// L^* P
|
||||
res = matrixL().adjoint() * res;
|
||||
// D(L^*P)
|
||||
res = vectorD().asDiagonal() * res;
|
||||
// L(DL^*P)
|
||||
res = matrixL() * res;
|
||||
// P^T (LDL^*P)
|
||||
for (int i = size-1; i >= 0; --i) res.row(m_transpositions.coeff(i)).swap(res.row(i));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/** \cholesky_module
|
||||
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
|
||||
*/
|
||||
|
@ -117,7 +117,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
&& "LLT::solve(): invalid number of rows of the right hand side matrix b");
|
||||
return ei_solve_retval<LLT, Rhs>(*this, b.derived());
|
||||
}
|
||||
|
||||
|
||||
template<typename Derived>
|
||||
bool solveInPlace(MatrixBase<Derived> &bAndX) const;
|
||||
|
||||
@ -133,6 +133,8 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
MatrixType reconstructedMatrix() const;
|
||||
|
||||
inline int rows() const { return m_matrix.rows(); }
|
||||
inline int cols() const { return m_matrix.cols(); }
|
||||
|
||||
@ -295,6 +297,16 @@ bool LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \returns the matrix represented by the decomposition,
|
||||
* i.e., it returns the product: L L^*.
|
||||
* This function is provided for debug purpose. */
|
||||
template<typename MatrixType, int _UpLo>
|
||||
MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
{
|
||||
ei_assert(m_isInitialized && "LLT is not initialized.");
|
||||
return matrixL() * matrixL().adjoint().toDenseMatrix();
|
||||
}
|
||||
|
||||
/** \cholesky_module
|
||||
* \returns the LLT decomposition of \c *this
|
||||
*/
|
||||
|
@ -2,7 +2,7 @@
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007 Michael Olbrich <michael.olbrich@gmx.net>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
@ -37,34 +37,35 @@ struct ei_assign_traits
|
||||
public:
|
||||
enum {
|
||||
DstIsAligned = Derived::Flags & AlignedBit,
|
||||
DstHasDirectAccess = Derived::Flags & DirectAccessBit,
|
||||
SrcIsAligned = OtherDerived::Flags & AlignedBit,
|
||||
SrcAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned
|
||||
JointAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
InnerSize = int(Derived::Flags)&RowMajorBit
|
||||
? Derived::ColsAtCompileTime
|
||||
: Derived::RowsAtCompileTime,
|
||||
InnerMaxSize = int(Derived::Flags)&RowMajorBit
|
||||
? Derived::MaxColsAtCompileTime
|
||||
: Derived::MaxRowsAtCompileTime,
|
||||
MaxSizeAtCompileTime = ei_size_at_compile_time<Derived::MaxColsAtCompileTime,Derived::MaxRowsAtCompileTime>::ret,
|
||||
InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
|
||||
: int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
|
||||
: int(Derived::RowsAtCompileTime),
|
||||
InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
|
||||
: int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
|
||||
: int(Derived::MaxRowsAtCompileTime),
|
||||
MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
|
||||
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
|
||||
};
|
||||
|
||||
enum {
|
||||
StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit),
|
||||
StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
|
||||
MightVectorize = StorageOrdersAgree
|
||||
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
|
||||
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
||||
&& int(DstIsAligned) && int(SrcIsAligned),
|
||||
MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||
MayLinearVectorize = MightVectorize && MayLinearize
|
||||
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
|
||||
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
|
||||
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
so it's only good for large enough sizes. */
|
||||
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
|
||||
MaySliceVectorize = MightVectorize && DstHasDirectAccess && int(InnerMaxSize)>=3*PacketSize
|
||||
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
||||
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
||||
in a fixed-size matrix */
|
||||
@ -104,16 +105,18 @@ public:
|
||||
: int(NoUnrolling)
|
||||
};
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
static void debug()
|
||||
{
|
||||
EIGEN_DEBUG_VAR(DstIsAligned)
|
||||
EIGEN_DEBUG_VAR(SrcIsAligned)
|
||||
EIGEN_DEBUG_VAR(SrcAlignment)
|
||||
EIGEN_DEBUG_VAR(JointAlignment)
|
||||
EIGEN_DEBUG_VAR(InnerSize)
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
EIGEN_DEBUG_VAR(PacketSize)
|
||||
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
||||
EIGEN_DEBUG_VAR(MightVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearize)
|
||||
EIGEN_DEBUG_VAR(MayInnerVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
||||
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
||||
@ -123,6 +126,7 @@ public:
|
||||
EIGEN_DEBUG_VAR(MayUnrollInner)
|
||||
EIGEN_DEBUG_VAR(Unrolling)
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
@ -137,17 +141,13 @@ template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct ei_assign_DefaultTraversal_CompleteUnrolling
|
||||
{
|
||||
enum {
|
||||
row = int(Derived1::Flags)&RowMajorBit
|
||||
? Index / int(Derived1::ColsAtCompileTime)
|
||||
: Index % Derived1::RowsAtCompileTime,
|
||||
col = int(Derived1::Flags)&RowMajorBit
|
||||
? Index % int(Derived1::ColsAtCompileTime)
|
||||
: Index / Derived1::RowsAtCompileTime
|
||||
outer = Index / Derived1::InnerSizeAtCompileTime,
|
||||
inner = Index % Derived1::InnerSizeAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.copyCoeff(row, col, src);
|
||||
dst.copyCoeffByOuterInner(outer, inner, src);
|
||||
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
|
||||
}
|
||||
};
|
||||
@ -161,13 +161,10 @@ struct ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, St
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct ei_assign_DefaultTraversal_InnerUnrolling
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
|
||||
{
|
||||
const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
|
||||
const int row = rowMajor ? row_or_col : Index;
|
||||
const int col = rowMajor ? Index : row_or_col;
|
||||
dst.copyCoeff(row, col, src);
|
||||
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col);
|
||||
dst.copyCoeffByOuterInner(outer, Index, src);
|
||||
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
|
||||
}
|
||||
};
|
||||
|
||||
@ -205,18 +202,14 @@ template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct ei_assign_innervec_CompleteUnrolling
|
||||
{
|
||||
enum {
|
||||
row = int(Derived1::Flags)&RowMajorBit
|
||||
? Index / int(Derived1::ColsAtCompileTime)
|
||||
: Index % Derived1::RowsAtCompileTime,
|
||||
col = int(Derived1::Flags)&RowMajorBit
|
||||
? Index % int(Derived1::ColsAtCompileTime)
|
||||
: Index / Derived1::RowsAtCompileTime,
|
||||
SrcAlignment = ei_assign_traits<Derived1,Derived2>::SrcAlignment
|
||||
outer = Index / Derived1::InnerSizeAtCompileTime,
|
||||
inner = Index % Derived1::InnerSizeAtCompileTime,
|
||||
JointAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment
|
||||
};
|
||||
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.template copyPacket<Derived2, Aligned, SrcAlignment>(row, col, src);
|
||||
dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
|
||||
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2,
|
||||
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
|
||||
}
|
||||
@ -231,13 +224,11 @@ struct ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct ei_assign_innervec_InnerUnrolling
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
|
||||
{
|
||||
const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index;
|
||||
const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col;
|
||||
dst.template copyPacket<Derived2, Aligned, Aligned>(row, col, src);
|
||||
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
|
||||
ei_assign_innervec_InnerUnrolling<Derived1, Derived2,
|
||||
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, row_or_col);
|
||||
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
|
||||
}
|
||||
};
|
||||
|
||||
@ -267,14 +258,9 @@ struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
const int innerSize = dst.innerSize();
|
||||
const int outerSize = dst.outerSize();
|
||||
for(int j = 0; j < outerSize; ++j)
|
||||
for(int i = 0; i < innerSize; ++i)
|
||||
{
|
||||
if(int(Derived1::Flags)&RowMajorBit)
|
||||
dst.copyCoeff(j, i, src);
|
||||
else
|
||||
dst.copyCoeff(i, j, src);
|
||||
}
|
||||
for(int outer = 0; outer < outerSize; ++outer)
|
||||
for(int inner = 0; inner < innerSize; ++inner)
|
||||
dst.copyCoeffByOuterInner(outer, inner, src);
|
||||
}
|
||||
};
|
||||
|
||||
@ -293,12 +279,10 @@ struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
|
||||
const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
|
||||
const int outerSize = dst.outerSize();
|
||||
for(int j = 0; j < outerSize; ++j)
|
||||
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, innerSize>
|
||||
::run(dst, src, j);
|
||||
for(int outer = 0; outer < outerSize; ++outer)
|
||||
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
|
||||
::run(dst, src, outer);
|
||||
}
|
||||
};
|
||||
|
||||
@ -339,14 +323,9 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
|
||||
const int innerSize = dst.innerSize();
|
||||
const int outerSize = dst.outerSize();
|
||||
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
||||
for(int j = 0; j < outerSize; ++j)
|
||||
for(int i = 0; i < innerSize; i+=packetSize)
|
||||
{
|
||||
if(int(Derived1::Flags)&RowMajorBit)
|
||||
dst.template copyPacket<Derived2, Aligned, Aligned>(j, i, src);
|
||||
else
|
||||
dst.template copyPacket<Derived2, Aligned, Aligned>(i, j, src);
|
||||
}
|
||||
for(int outer = 0; outer < outerSize; ++outer)
|
||||
for(int inner = 0; inner < innerSize; inner+=packetSize)
|
||||
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
|
||||
}
|
||||
};
|
||||
|
||||
@ -365,12 +344,10 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolli
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
|
||||
const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
|
||||
const int outerSize = dst.outerSize();
|
||||
for(int j = 0; j < outerSize; ++j)
|
||||
ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
|
||||
::run(dst, src, j);
|
||||
for(int outer = 0; outer < outerSize; ++outer)
|
||||
ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
|
||||
::run(dst, src, outer);
|
||||
}
|
||||
};
|
||||
|
||||
@ -406,7 +383,7 @@ struct ei_unaligned_assign_impl<false>
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const int size = dst.size();
|
||||
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
||||
@ -418,7 +395,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
|
||||
|
||||
for(int index = alignedStart; index < alignedEnd; index += packetSize)
|
||||
{
|
||||
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
|
||||
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::JointAlignment>(index, src);
|
||||
}
|
||||
|
||||
ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
|
||||
@ -452,40 +429,24 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
|
||||
const int packetAlignedMask = packetSize - 1;
|
||||
const int innerSize = dst.innerSize();
|
||||
const int outerSize = dst.outerSize();
|
||||
const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask;
|
||||
const int alignedStep = (packetSize - dst.outerStride() % packetSize) & packetAlignedMask;
|
||||
int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
||||
: ei_first_aligned(&dst.coeffRef(0,0), innerSize);
|
||||
|
||||
for(int i = 0; i < outerSize; ++i)
|
||||
for(int outer = 0; outer < outerSize; ++outer)
|
||||
{
|
||||
const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
|
||||
|
||||
// do the non-vectorizable part of the assignment
|
||||
for (int index = 0; index<alignedStart ; ++index)
|
||||
{
|
||||
if(Derived1::Flags&RowMajorBit)
|
||||
dst.copyCoeff(i, index, src);
|
||||
else
|
||||
dst.copyCoeff(index, i, src);
|
||||
}
|
||||
for(int inner = 0; inner<alignedStart ; ++inner)
|
||||
dst.copyCoeffByOuterInner(outer, inner, src);
|
||||
|
||||
// do the vectorizable part of the assignment
|
||||
for (int index = alignedStart; index<alignedEnd; index+=packetSize)
|
||||
{
|
||||
if(Derived1::Flags&RowMajorBit)
|
||||
dst.template copyPacket<Derived2, Aligned, Unaligned>(i, index, src);
|
||||
else
|
||||
dst.template copyPacket<Derived2, Aligned, Unaligned>(index, i, src);
|
||||
}
|
||||
for(int inner = alignedStart; inner<alignedEnd; inner+=packetSize)
|
||||
dst.template copyPacketByOuterInner<Derived2, Aligned, Unaligned>(outer, inner, src);
|
||||
|
||||
// do the non-vectorizable part of the assignment
|
||||
for (int index = alignedEnd; index<innerSize ; ++index)
|
||||
{
|
||||
if(Derived1::Flags&RowMajorBit)
|
||||
dst.copyCoeff(i, index, src);
|
||||
else
|
||||
dst.copyCoeff(index, i, src);
|
||||
}
|
||||
for(int inner = alignedEnd; inner<innerSize ; ++inner)
|
||||
dst.copyCoeffByOuterInner(outer, inner, src);
|
||||
|
||||
alignedStart = std::min<int>((alignedStart+alignedStep)%packetSize, innerSize);
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@ -100,8 +100,8 @@ template<typename MatrixType, int BlockRows, int BlockCols, int _DirectAccessSta
|
||||
// The case a 1x1 matrix seems ambiguous, but the result is the same anyway.
|
||||
m_startRow( (BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) ? i : 0),
|
||||
m_startCol( (BlockRows==MatrixType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
|
||||
m_blockRows(matrix.rows()), // if it is a row, then m_blockRows has a fixed-size of 1, so no pb to try to overwrite it
|
||||
m_blockCols(matrix.cols()) // same for m_blockCols
|
||||
m_blockRows(BlockRows==1 ? 1 : matrix.rows()),
|
||||
m_blockCols(BlockCols==1 ? 1 : matrix.cols())
|
||||
{
|
||||
ei_assert( (i>=0) && (
|
||||
((BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) && i<matrix.rows())
|
||||
@ -112,7 +112,7 @@ template<typename MatrixType, int BlockRows, int BlockCols, int _DirectAccessSta
|
||||
*/
|
||||
inline Block(const MatrixType& matrix, int startRow, int startCol)
|
||||
: m_matrix(matrix), m_startRow(startRow), m_startCol(startCol),
|
||||
m_blockRows(matrix.rows()), m_blockCols(matrix.cols())
|
||||
m_blockRows(BlockRows), m_blockCols(BlockCols)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
|
||||
ei_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= matrix.rows()
|
||||
@ -196,8 +196,8 @@ template<typename MatrixType, int BlockRows, int BlockCols, int _DirectAccessSta
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \sa MapBase::data() */
|
||||
inline const Scalar* data() const;
|
||||
/** \sa MapBase::stride() */
|
||||
inline int stride() const;
|
||||
inline int innerStride() const;
|
||||
inline int outerStride() const;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
@ -260,17 +260,23 @@ class Block<MatrixType,BlockRows,BlockCols,HasDirectAccess>
|
||||
&& startCol >= 0 && blockCols >= 0 && startCol + blockCols <= matrix.cols());
|
||||
}
|
||||
|
||||
/** \sa MapBase::stride() */
|
||||
inline int stride() const
|
||||
/** \sa MapBase::innerStride() */
|
||||
inline int innerStride() const
|
||||
{
|
||||
return ((!Base::IsVectorAtCompileTime)
|
||||
|| (BlockRows==1 && ((Flags&RowMajorBit)==0))
|
||||
|| (BlockCols==1 && ((Flags&RowMajorBit)==RowMajorBit)))
|
||||
? m_matrix.stride() : 1;
|
||||
return RowsAtCompileTime==1 ? m_matrix.colStride()
|
||||
: ColsAtCompileTime==1 ? m_matrix.rowStride()
|
||||
: m_matrix.innerStride();
|
||||
}
|
||||
|
||||
/** \sa MapBase::outerStride() */
|
||||
inline int outerStride() const
|
||||
{
|
||||
return IsVectorAtCompileTime ? this->size() : m_matrix.outerStride();
|
||||
}
|
||||
|
||||
#ifndef __SUNPRO_CC
|
||||
// FIXME sunstudio is not friendly with the above friend...
|
||||
// META-FIXME there is no 'friend' keyword around here. Is this obsolete?
|
||||
protected:
|
||||
#endif
|
||||
|
||||
|
@ -25,6 +25,24 @@
|
||||
#ifndef EIGEN_COEFFS_H
|
||||
#define EIGEN_COEFFS_H
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE int DenseBase<Derived>::rowIndexByOuterInner(int outer, int inner)
|
||||
{
|
||||
return int(Derived::RowsAtCompileTime) == 1 ? 0
|
||||
: int(Derived::ColsAtCompileTime) == 1 ? inner
|
||||
: int(Derived::Flags)&RowMajorBit ? outer
|
||||
: inner;
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE int DenseBase<Derived>::colIndexByOuterInner(int outer, int inner)
|
||||
{
|
||||
return int(Derived::ColsAtCompileTime) == 1 ? 0
|
||||
: int(Derived::RowsAtCompileTime) == 1 ? inner
|
||||
: int(Derived::Flags)&RowMajorBit ? inner
|
||||
: outer;
|
||||
}
|
||||
|
||||
/** Short version: don't use this function, use
|
||||
* \link operator()(int,int) const \endlink instead.
|
||||
*
|
||||
@ -48,6 +66,14 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::CoeffReturnType DenseBase
|
||||
return derived().coeff(row, col);
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::CoeffReturnType DenseBase<Derived>
|
||||
::coeffByOuterInner(int outer, int inner) const
|
||||
{
|
||||
return coeff(rowIndexByOuterInner(outer, inner),
|
||||
colIndexByOuterInner(outer, inner));
|
||||
}
|
||||
|
||||
/** \returns the coefficient at given the given row and column.
|
||||
*
|
||||
* \sa operator()(int,int), operator[](int) const
|
||||
@ -84,6 +110,14 @@ EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& DenseBase<Derived>
|
||||
return derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& DenseBase<Derived>
|
||||
::coeffRefByOuterInner(int outer, int inner)
|
||||
{
|
||||
return coeffRef(rowIndexByOuterInner(outer, inner),
|
||||
colIndexByOuterInner(outer, inner));
|
||||
}
|
||||
|
||||
/** \returns a reference to the coefficient at given the given row and column.
|
||||
*
|
||||
* \sa operator()(int,int) const, operator[](int)
|
||||
@ -261,6 +295,15 @@ DenseBase<Derived>::packet(int row, int col) const
|
||||
return derived().template packet<LoadMode>(row,col);
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type
|
||||
DenseBase<Derived>::packetByOuterInner(int outer, int inner) const
|
||||
{
|
||||
return packet<LoadMode>(rowIndexByOuterInner(outer, inner),
|
||||
colIndexByOuterInner(outer, inner));
|
||||
}
|
||||
|
||||
/** Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
|
||||
* to ensure that a packet really starts there. This method is only available on expressions having the
|
||||
* PacketAccessBit.
|
||||
@ -279,6 +322,16 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::writePacket
|
||||
derived().template writePacket<StoreMode>(row,col,x);
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<int StoreMode>
|
||||
EIGEN_STRONG_INLINE void DenseBase<Derived>::writePacketByOuterInner
|
||||
(int outer, int inner, const typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type& x)
|
||||
{
|
||||
writePacket<StoreMode>(rowIndexByOuterInner(outer, inner),
|
||||
colIndexByOuterInner(outer, inner),
|
||||
x);
|
||||
}
|
||||
|
||||
/** \returns the packet of coefficients starting at the given index. It is your responsibility
|
||||
* to ensure that a packet really starts there. This method is only available on expressions having the
|
||||
* PacketAccessBit and the LinearAccessBit.
|
||||
@ -346,6 +399,16 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::copyCoeff(int index, const DenseBas
|
||||
derived().coeffRef(index) = other.derived().coeff(index);
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE void DenseBase<Derived>::copyCoeffByOuterInner(int outer, int inner, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
const int row = Derived::rowIndexByOuterInner(outer,inner);
|
||||
const int col = Derived::colIndexByOuterInner(outer,inner);
|
||||
// derived() is important here: copyCoeff() may be reimplemented in Derived!
|
||||
derived().copyCoeff(row, col, other);
|
||||
}
|
||||
|
||||
/** \internal Copies the packet at position (row,col) of other into *this.
|
||||
*
|
||||
* This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
|
||||
@ -379,6 +442,16 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::copyPacket(int index, const DenseBa
|
||||
other.derived().template packet<LoadMode>(index));
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||
EIGEN_STRONG_INLINE void DenseBase<Derived>::copyPacketByOuterInner(int outer, int inner, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
const int row = Derived::rowIndexByOuterInner(outer,inner);
|
||||
const int col = Derived::colIndexByOuterInner(outer,inner);
|
||||
// derived() is important here: copyCoeff() may be reimplemented in Derived!
|
||||
derived().copyPacket<OtherDerived, StoreMode, LoadMode>(row, col, other);
|
||||
}
|
||||
|
||||
template<typename Derived, bool JustReturnZero>
|
||||
struct ei_first_aligned_impl
|
||||
{
|
||||
|
@ -124,7 +124,14 @@ template<typename Derived> class DenseBase
|
||||
* constructed from this one. See the \ref flags "list of flags".
|
||||
*/
|
||||
|
||||
IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression is row major. */
|
||||
IsRowMajor = RowsAtCompileTime==1 ? 1
|
||||
: ColsAtCompileTime==1 ? 0
|
||||
: int(Flags) & RowMajorBit, /**< True if this expression has row-major effective addressing.
|
||||
For non-vectors, it is like reading the RowMajorBit on the Flags. For vectors, this is
|
||||
overriden by the convention that row-vectors are row-major and column-vectors are column-major. */
|
||||
|
||||
InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? SizeAtCompileTime
|
||||
: int(Flags)&RowMajorBit ? ColsAtCompileTime : RowsAtCompileTime,
|
||||
|
||||
CoeffReadCost = ei_traits<Derived>::CoeffReadCost,
|
||||
/**< This is a rough measure of how expensive it is to read one coefficient from
|
||||
@ -160,31 +167,98 @@ template<typename Derived> class DenseBase
|
||||
* In other words, this function returns
|
||||
* \code rows()==1 || cols()==1 \endcode
|
||||
* \sa rows(), cols(), IsVectorAtCompileTime. */
|
||||
inline bool isVector() const { return rows()==1 || cols()==1; }
|
||||
/** \returns the size of the storage major dimension,
|
||||
* i.e., the number of columns for a columns major matrix, and the number of rows otherwise */
|
||||
int outerSize() const { return (int(Flags)&RowMajorBit) ? this->rows() : this->cols(); }
|
||||
/** \returns the size of the inner dimension according to the storage order,
|
||||
* i.e., the number of rows for a columns major matrix, and the number of cols otherwise */
|
||||
int innerSize() const { return (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); }
|
||||
|
||||
/** Only plain matrices, not expressions may be resized; therefore the only useful resize method is
|
||||
* Matrix::resize(). The present method only asserts that the new size equals the old size, and does
|
||||
/** \returns the outer size.
|
||||
*
|
||||
* \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
|
||||
* with respect to the storage order, i.e., the number of columns for a column-major matrix,
|
||||
* and the number of rows for a row-major matrix. */
|
||||
int outerSize() const
|
||||
{
|
||||
return IsVectorAtCompileTime ? 1
|
||||
: (int(Flags)&RowMajorBit) ? this->rows() : this->cols();
|
||||
}
|
||||
|
||||
/** \returns the inner size.
|
||||
*
|
||||
* \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
|
||||
* with respect to the storage order, i.e., the number of rows for a column-major matrix,
|
||||
* and the number of columns for a row-major matrix. */
|
||||
int innerSize() const
|
||||
{
|
||||
return IsVectorAtCompileTime ? this->size()
|
||||
: (int(Flags)&RowMajorBit) ? this->cols() : this->rows();
|
||||
}
|
||||
|
||||
/** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
|
||||
* Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
|
||||
* nothing else.
|
||||
*/
|
||||
void resize(int size)
|
||||
{
|
||||
ei_assert(size == this->size()
|
||||
&& "MatrixBase::resize() does not actually allow to resize.");
|
||||
&& "DenseBase::resize() does not actually allow to resize.");
|
||||
}
|
||||
/** Only plain matrices, not expressions may be resized; therefore the only useful resize method is
|
||||
* Matrix::resize(). The present method only asserts that the new size equals the old size, and does
|
||||
/** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
|
||||
* Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
|
||||
* nothing else.
|
||||
*/
|
||||
void resize(int rows, int cols)
|
||||
{
|
||||
ei_assert(rows == this->rows() && cols == this->cols()
|
||||
&& "MatrixBase::resize() does not actually allow to resize.");
|
||||
&& "DenseBase::resize() does not actually allow to resize.");
|
||||
}
|
||||
|
||||
/** \returns the pointer increment between two consecutive elements.
|
||||
*
|
||||
* \note For vectors, the storage order is ignored. For matrices (non-vectors), we're looking
|
||||
* at the increment between two consecutive elements within a slice in the inner direction.
|
||||
*
|
||||
* \sa outerStride(), rowStride(), colStride()
|
||||
*/
|
||||
inline int innerStride() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(int(Flags)&DirectAccessBit,
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
|
||||
return derived().innerStride();
|
||||
}
|
||||
|
||||
/** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns
|
||||
* in a column-major matrix).
|
||||
*
|
||||
* \note For vectors, the storage order is ignored, there is only one inner slice, and so this method returns 1.
|
||||
* For matrices (non-vectors), the notion of inner slice depends on the storage order.
|
||||
*
|
||||
* \sa innerStride(), rowStride(), colStride()
|
||||
*/
|
||||
inline int outerStride() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(int(Flags)&DirectAccessBit,
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
|
||||
return derived().outerStride();
|
||||
}
|
||||
|
||||
inline int stride() const
|
||||
{
|
||||
return IsVectorAtCompileTime ? innerStride() : outerStride();
|
||||
}
|
||||
|
||||
/** \returns the pointer increment between two consecutive rows.
|
||||
*
|
||||
* \sa innerStride(), outerStride(), colStride()
|
||||
*/
|
||||
inline int rowStride() const
|
||||
{
|
||||
return IsRowMajor ? outerStride() : innerStride();
|
||||
}
|
||||
|
||||
/** \returns the pointer increment between two consecutive columns.
|
||||
*
|
||||
* \sa innerStride(), outerStride(), rowStride()
|
||||
*/
|
||||
inline int colStride() const
|
||||
{
|
||||
return IsRowMajor ? innerStride() : outerStride();
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
@ -242,9 +316,11 @@ template<typename Derived> class DenseBase
|
||||
CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
|
||||
|
||||
const CoeffReturnType coeff(int row, int col) const;
|
||||
const CoeffReturnType coeffByOuterInner(int outer, int inner) const;
|
||||
const CoeffReturnType operator()(int row, int col) const;
|
||||
|
||||
Scalar& coeffRef(int row, int col);
|
||||
Scalar& coeffRefByOuterInner(int outer, int inner);
|
||||
Scalar& operator()(int row, int col);
|
||||
|
||||
const CoeffReturnType coeff(int index) const;
|
||||
@ -259,17 +335,30 @@ template<typename Derived> class DenseBase
|
||||
template<typename OtherDerived>
|
||||
void copyCoeff(int row, int col, const DenseBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
void copyCoeffByOuterInner(int outer, int inner, const DenseBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
void copyCoeff(int index, const DenseBase<OtherDerived>& other);
|
||||
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||
void copyPacket(int row, int col, const DenseBase<OtherDerived>& other);
|
||||
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||
void copyPacketByOuterInner(int outer, int inner, const DenseBase<OtherDerived>& other);
|
||||
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||
void copyPacket(int index, const DenseBase<OtherDerived>& other);
|
||||
|
||||
private:
|
||||
static int rowIndexByOuterInner(int outer, int inner);
|
||||
static int colIndexByOuterInner(int outer, int inner);
|
||||
public:
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(int row, int col) const;
|
||||
template<int LoadMode>
|
||||
PacketScalar packetByOuterInner(int outer, int inner) const;
|
||||
template<int StoreMode>
|
||||
void writePacket(int row, int col, const PacketScalar& x);
|
||||
template<int StoreMode>
|
||||
void writePacketByOuterInner(int outer, int inner, const PacketScalar& x);
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(int index) const;
|
||||
@ -409,13 +498,6 @@ template<typename Derived> class DenseBase
|
||||
template<typename OtherDerived>
|
||||
void swap(DenseBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other);
|
||||
|
||||
/** \returns number of elements to skip to pass from one row (resp. column) to another
|
||||
* for a row-major (resp. column-major) matrix.
|
||||
* Combined with coeffRef() and the \ref flags flags, it allows a direct access to the data
|
||||
* of the underlying matrix.
|
||||
*/
|
||||
inline int stride() const { return derived().stride(); }
|
||||
|
||||
inline const NestByValue<Derived> nestByValue() const;
|
||||
inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
|
||||
inline ForceAlignedAccess<Derived> forceAlignedAccess();
|
||||
|
@ -75,23 +75,6 @@ class DenseStorageBase : public _Base<Derived>
|
||||
EIGEN_STRONG_INLINE int rows() const { return m_storage.rows(); }
|
||||
EIGEN_STRONG_INLINE int cols() const { return m_storage.cols(); }
|
||||
|
||||
/** Returns the leading dimension (for matrices) or the increment (for vectors) to be used with data().
|
||||
*
|
||||
* More precisely:
|
||||
* - for a column major matrix it returns the number of elements between two successive columns
|
||||
* - for a row major matrix it returns the number of elements between two successive rows
|
||||
* - for a vector it returns the number of elements between two successive coefficients
|
||||
* This function has to be used together with the MapBase::data() function.
|
||||
*
|
||||
* \sa data() */
|
||||
EIGEN_STRONG_INLINE int stride() const
|
||||
{
|
||||
if(IsVectorAtCompileTime)
|
||||
return 1;
|
||||
else
|
||||
return (Flags & RowMajorBit) ? m_storage.cols() : m_storage.rows();
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE const Scalar& coeff(int row, int col) const
|
||||
{
|
||||
if(Flags & RowMajorBit)
|
||||
@ -253,12 +236,12 @@ class DenseStorageBase : public _Base<Derived>
|
||||
{
|
||||
if(RowsAtCompileTime == 1)
|
||||
{
|
||||
ei_assert(other.isVector());
|
||||
ei_assert(other.rows() == 1 || other.cols() == 1);
|
||||
resize(1, other.size());
|
||||
}
|
||||
else if(ColsAtCompileTime == 1)
|
||||
{
|
||||
ei_assert(other.isVector());
|
||||
ei_assert(other.rows() == 1 || other.cols() == 1);
|
||||
resize(other.size(), 1);
|
||||
}
|
||||
else resize(other.rows(), other.cols());
|
||||
@ -379,6 +362,9 @@ class DenseStorageBase : public _Base<Derived>
|
||||
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
|
||||
* \a data pointers.
|
||||
*
|
||||
* These methods do not allow to specify strides. If you need to specify strides, you have to
|
||||
* use the Map class directly.
|
||||
*
|
||||
* \see class Map
|
||||
*/
|
||||
//@{
|
||||
@ -544,11 +530,21 @@ struct ei_conservative_resize_like_impl
|
||||
{
|
||||
if (_this.rows() == rows && _this.cols() == cols) return;
|
||||
EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
|
||||
typename Derived::PlainObject tmp(rows,cols);
|
||||
const int common_rows = std::min(rows, _this.rows());
|
||||
const int common_cols = std::min(cols, _this.cols());
|
||||
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
||||
_this.derived().swap(tmp);
|
||||
|
||||
if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
|
||||
(!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns
|
||||
{
|
||||
_this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
|
||||
}
|
||||
else
|
||||
{
|
||||
// The storage order does not allow us to use reallocation.
|
||||
typename Derived::PlainObject tmp(rows,cols);
|
||||
const int common_rows = std::min(rows, _this.rows());
|
||||
const int common_cols = std::min(cols, _this.cols());
|
||||
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
||||
_this.derived().swap(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
|
||||
@ -563,11 +559,26 @@ struct ei_conservative_resize_like_impl
|
||||
EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
|
||||
EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived)
|
||||
|
||||
typename Derived::PlainObject tmp(other);
|
||||
const int common_rows = std::min(tmp.rows(), _this.rows());
|
||||
const int common_cols = std::min(tmp.cols(), _this.cols());
|
||||
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
||||
_this.derived().swap(tmp);
|
||||
if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) || // row-major and we change only the number of rows
|
||||
(!Derived::IsRowMajor && _this.rows() == other.rows()) ) // column-major and we change only the number of columns
|
||||
{
|
||||
const int new_rows = other.rows() - _this.rows();
|
||||
const int new_cols = other.cols() - _this.cols();
|
||||
_this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols());
|
||||
if (new_rows>0)
|
||||
_this.corner(BottomRight, new_rows, other.cols()) = other.corner(BottomRight, new_rows, other.cols());
|
||||
else if (new_cols>0)
|
||||
_this.corner(BottomRight, other.rows(), new_cols) = other.corner(BottomRight, other.rows(), new_cols);
|
||||
}
|
||||
else
|
||||
{
|
||||
// The storage order does not allow us to use reallocation.
|
||||
typename Derived::PlainObject tmp(other);
|
||||
const int common_rows = std::min(tmp.rows(), _this.rows());
|
||||
const int common_cols = std::min(tmp.cols(), _this.cols());
|
||||
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
|
||||
_this.derived().swap(tmp);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -576,22 +587,23 @@ struct ei_conservative_resize_like_impl<Derived,OtherDerived,true>
|
||||
{
|
||||
static void run(DenseBase<Derived>& _this, int size)
|
||||
{
|
||||
if (_this.size() == size) return;
|
||||
typename Derived::PlainObject tmp(size);
|
||||
const int common_size = std::min<int>(_this.size(),size);
|
||||
tmp.segment(0,common_size) = _this.segment(0,common_size);
|
||||
_this.derived().swap(tmp);
|
||||
const int new_rows = Derived::RowsAtCompileTime==1 ? 1 : size;
|
||||
const int new_cols = Derived::RowsAtCompileTime==1 ? size : 1;
|
||||
_this.derived().m_storage.conservativeResize(size,new_rows,new_cols);
|
||||
}
|
||||
|
||||
static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
|
||||
|
||||
// segment(...) will check whether Derived/OtherDerived are vectors!
|
||||
typename Derived::PlainObject tmp(other);
|
||||
const int common_size = std::min<int>(_this.size(),tmp.size());
|
||||
tmp.segment(0,common_size) = _this.segment(0,common_size);
|
||||
_this.derived().swap(tmp);
|
||||
const int num_new_elements = other.size() - _this.size();
|
||||
|
||||
const int new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows();
|
||||
const int new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1;
|
||||
_this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols);
|
||||
|
||||
if (num_new_elements > 0)
|
||||
_this.tail(num_new_elements) = other.tail(num_new_elements);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2006-2008, 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@ -25,224 +25,35 @@
|
||||
#ifndef EIGEN_DOT_H
|
||||
#define EIGEN_DOT_H
|
||||
|
||||
/***************************************************************************
|
||||
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_dot_traits
|
||||
{
|
||||
public:
|
||||
enum {
|
||||
Traversal = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
|
||||
&& (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit)
|
||||
? LinearVectorizedTraversal
|
||||
: DefaultTraversal
|
||||
};
|
||||
|
||||
private:
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<Scalar>::size,
|
||||
Cost = Derived1::SizeAtCompileTime * (Derived1::CoeffReadCost + Derived2::CoeffReadCost + NumTraits<Scalar>::MulCost)
|
||||
+ (Derived1::SizeAtCompileTime-1) * NumTraits<Scalar>::AddCost,
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Unrolling = Cost <= UnrollingLimit
|
||||
? CompleteUnrolling
|
||||
: NoUnrolling
|
||||
};
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 2 : unrollers
|
||||
***************************************************************************/
|
||||
|
||||
/*** no vectorization ***/
|
||||
|
||||
template<typename Derived1, typename Derived2, int Start, int Length>
|
||||
struct ei_dot_novec_unroller
|
||||
{
|
||||
enum {
|
||||
HalfLength = Length/2
|
||||
};
|
||||
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
|
||||
inline static Scalar run(const Derived1& v1, const Derived2& v2)
|
||||
{
|
||||
return ei_dot_novec_unroller<Derived1, Derived2, Start, HalfLength>::run(v1, v2)
|
||||
+ ei_dot_novec_unroller<Derived1, Derived2, Start+HalfLength, Length-HalfLength>::run(v1, v2);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int Start>
|
||||
struct ei_dot_novec_unroller<Derived1, Derived2, Start, 1>
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
|
||||
inline static Scalar run(const Derived1& v1, const Derived2& v2)
|
||||
{
|
||||
return ei_conj(v1.coeff(Start)) * v2.coeff(Start);
|
||||
}
|
||||
};
|
||||
|
||||
/*** vectorization ***/
|
||||
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop,
|
||||
bool LastPacket = (Stop-Index == ei_packet_traits<typename Derived1::Scalar>::size)>
|
||||
struct ei_dot_vec_unroller
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
enum {
|
||||
row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index,
|
||||
col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0,
|
||||
row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index,
|
||||
col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0
|
||||
};
|
||||
|
||||
inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
|
||||
{
|
||||
return ei_pmadd(
|
||||
v1.template packet<Aligned>(row1, col1),
|
||||
v2.template packet<Aligned>(row2, col2),
|
||||
ei_dot_vec_unroller<Derived1, Derived2, Index+ei_packet_traits<Scalar>::size, Stop>::run(v1, v2)
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>
|
||||
{
|
||||
enum {
|
||||
row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index,
|
||||
col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0,
|
||||
row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index,
|
||||
col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0,
|
||||
alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned,
|
||||
alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned
|
||||
};
|
||||
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
|
||||
{
|
||||
return ei_pmul(v1.template packet<alignment1>(row1, col1), v2.template packet<alignment2>(row2, col2));
|
||||
}
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 3 : implementation of all cases
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Derived1, typename Derived2,
|
||||
int Traversal = ei_dot_traits<Derived1, Derived2>::Traversal,
|
||||
int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling
|
||||
// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot
|
||||
// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
|
||||
// looking at the static assertions. Thus this is a trick to get better compile errors.
|
||||
template<typename T, typename U,
|
||||
// the NeedToTranspose condition here is taken straight from Assign.h
|
||||
bool NeedToTranspose = T::IsVectorAtCompileTime
|
||||
&& U::IsVectorAtCompileTime
|
||||
&& ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1)
|
||||
| // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
|
||||
// revert to || as soon as not needed anymore.
|
||||
(int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
|
||||
>
|
||||
struct ei_dot_impl;
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
|
||||
struct ei_dot_nocheck
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
static Scalar run(const Derived1& v1, const Derived2& v2)
|
||||
static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
ei_assert(v1.size()>0 && "you are using a non initialized vector");
|
||||
Scalar res;
|
||||
res = ei_conj(v1.coeff(0)) * v2.coeff(0);
|
||||
for(int i = 1; i < v1.size(); ++i)
|
||||
res += ei_conj(v1.coeff(i)) * v2.coeff(i);
|
||||
return res;
|
||||
return a.conjugate().cwiseProduct(b).sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
|
||||
: public ei_dot_novec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
{};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
|
||||
template<typename T, typename U>
|
||||
struct ei_dot_nocheck<T, U, true>
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
static Scalar run(const Derived1& v1, const Derived2& v2)
|
||||
static inline typename ei_traits<T>::Scalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
const int size = v1.size();
|
||||
const int packetSize = ei_packet_traits<Scalar>::size;
|
||||
const int alignedSize = (size/packetSize)*packetSize;
|
||||
enum {
|
||||
alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned,
|
||||
alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned
|
||||
};
|
||||
Scalar res;
|
||||
|
||||
// do the vectorizable part of the sum
|
||||
if(size >= packetSize)
|
||||
{
|
||||
PacketScalar packet_res = ei_pmul(
|
||||
v1.template packet<alignment1>(0),
|
||||
v2.template packet<alignment2>(0)
|
||||
);
|
||||
for(int index = packetSize; index<alignedSize; index += packetSize)
|
||||
{
|
||||
packet_res = ei_pmadd(
|
||||
v1.template packet<alignment1>(index),
|
||||
v2.template packet<alignment2>(index),
|
||||
packet_res
|
||||
);
|
||||
}
|
||||
res = ei_predux(packet_res);
|
||||
|
||||
// now we must do the rest without vectorization.
|
||||
if(alignedSize == size) return res;
|
||||
}
|
||||
else // too small to vectorize anything.
|
||||
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
||||
{
|
||||
res = Scalar(0);
|
||||
}
|
||||
|
||||
// do the remainder of the vector
|
||||
for(int index = alignedSize; index < size; ++index)
|
||||
{
|
||||
res += v1.coeff(index) * v2.coeff(index);
|
||||
}
|
||||
|
||||
return res;
|
||||
return a.adjoint().cwiseProduct(b).sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<Scalar>::size,
|
||||
Size = Derived1::SizeAtCompileTime,
|
||||
VectorizedSize = (Size / PacketSize) * PacketSize
|
||||
};
|
||||
static Scalar run(const Derived1& v1, const Derived2& v2)
|
||||
{
|
||||
Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizedSize>::run(v1, v2));
|
||||
if (VectorizedSize != Size)
|
||||
res += ei_dot_novec_unroller<Derived1, Derived2, VectorizedSize, Size-VectorizedSize>::run(v1, v2);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 4 : implementation of MatrixBase methods
|
||||
***************************************************************************/
|
||||
|
||||
/** \returns the dot product of *this with other.
|
||||
*
|
||||
* \only_for_vectors
|
||||
@ -266,10 +77,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
|
||||
ei_assert(size() == other.size());
|
||||
|
||||
// dot() must honor EvalBeforeNestingBit (eg: v.dot(M*v) )
|
||||
typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
|
||||
typedef typename ei_cleantype<typename OtherDerived::Nested>::type OtherNested;
|
||||
return ei_dot_impl<ThisNested, OtherNested>::run(derived(), other.derived());
|
||||
return ei_dot_nocheck<Derived,OtherDerived>::run(*this, other);
|
||||
}
|
||||
|
||||
/** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself.
|
||||
|
@ -60,7 +60,8 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
|
||||
|
||||
inline int rows() const { return m_matrix.rows(); }
|
||||
inline int cols() const { return m_matrix.cols(); }
|
||||
inline int stride() const { return m_matrix.stride(); }
|
||||
inline int outerStride() const { return m_matrix.outerStride(); }
|
||||
inline int innerStride() const { return m_matrix.innerStride(); }
|
||||
|
||||
inline const Scalar coeff(int row, int col) const
|
||||
{
|
||||
|
@ -52,7 +52,8 @@ template<typename ExpressionType> class ForceAlignedAccess
|
||||
|
||||
inline int rows() const { return m_expression.rows(); }
|
||||
inline int cols() const { return m_expression.cols(); }
|
||||
inline int stride() const { return m_expression.stride(); }
|
||||
inline int outerStride() const { return m_expression.outerStride(); }
|
||||
inline int innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
inline const CoeffReturnType coeff(int row, int col) const
|
||||
{
|
||||
|
@ -179,7 +179,7 @@ struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
|
||||
enum {
|
||||
Cost = 2 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
#if (defined EIGEN_VECTORIZE_SSE)
|
||||
#if (defined EIGEN_VECTORIZE)
|
||||
&& NumTraits<Scalar>::HasFloatingPoint
|
||||
#endif
|
||||
};
|
||||
|
@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
@ -33,10 +33,35 @@
|
||||
* \param MatrixType the equivalent matrix type of the mapped data
|
||||
* \param Options specifies whether the pointer is \c Aligned, or \c Unaligned.
|
||||
* The default is \c Unaligned.
|
||||
* \param StrideType optionnally specifies strides. By default, Map assumes the memory layout
|
||||
* of an ordinary, contiguous array. This can be overridden by specifying strides.
|
||||
* The type passed here must be a specialization of the Stride template, see examples below.
|
||||
*
|
||||
* This class represents a matrix or vector expression mapping an existing array of data.
|
||||
* It can be used to let Eigen interface without any overhead with non-Eigen data structures,
|
||||
* such as plain C arrays or structures from other libraries.
|
||||
* such as plain C arrays or structures from other libraries. By default, it assumes that the
|
||||
* data is laid out contiguously in memory. You can however override this by explicitly specifying
|
||||
* inner and outer strides.
|
||||
*
|
||||
* Here's an example of simply mapping a contiguous array as a column-major matrix:
|
||||
* \include Map_simple.cpp
|
||||
* Output: \verbinclude Map_simple.out
|
||||
*
|
||||
* If you need to map non-contiguous arrays, you can do so by specifying strides:
|
||||
*
|
||||
* Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer
|
||||
* increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time
|
||||
* fixed value.
|
||||
* \include Map_inner_stride.cpp
|
||||
* Output: \verbinclude Map_inner_stride.out
|
||||
*
|
||||
* Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping
|
||||
* as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns.
|
||||
* Here, we're specifying the outer stride as a runtime parameter.
|
||||
* \include Map_outer_stride.cpp
|
||||
* Output: \verbinclude Map_outer_stride.out
|
||||
*
|
||||
* For more details and for an example of specifying both an inner and an outer stride, see class Stride.
|
||||
*
|
||||
* \b Tip: to change the array of data mapped by a Map object, you can use the C++
|
||||
* placement new syntax:
|
||||
@ -48,48 +73,86 @@
|
||||
*
|
||||
* \sa Matrix::Map()
|
||||
*/
|
||||
template<typename MatrixType, int Options>
|
||||
struct ei_traits<Map<MatrixType, Options> > : public ei_traits<MatrixType>
|
||||
template<typename MatrixType, int Options, typename StrideType>
|
||||
struct ei_traits<Map<MatrixType, Options, StrideType> >
|
||||
: public ei_traits<MatrixType>
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
enum {
|
||||
Flags = (Options&Aligned)==Aligned ? ei_traits<MatrixType>::Flags | AlignedBit
|
||||
: ei_traits<MatrixType>::Flags & ~AlignedBit
|
||||
InnerStride = StrideType::InnerStrideAtCompileTime,
|
||||
OuterStride = StrideType::OuterStrideAtCompileTime,
|
||||
HasNoInnerStride = InnerStride <= 1,
|
||||
HasNoOuterStride = OuterStride == 0,
|
||||
HasNoStride = HasNoInnerStride && HasNoOuterStride,
|
||||
IsAligned = int(int(Options)&Aligned)==Aligned,
|
||||
IsDynamicSize = MatrixType::SizeAtCompileTime==Dynamic,
|
||||
KeepsPacketAccess = bool(HasNoInnerStride)
|
||||
&& ( bool(IsDynamicSize)
|
||||
|| HasNoOuterStride
|
||||
|| ( OuterStride!=Dynamic && ((int(OuterStride)*sizeof(Scalar))%16)==0 ) ),
|
||||
Flags0 = ei_traits<MatrixType>::Flags,
|
||||
Flags1 = IsAligned ? int(Flags0) | AlignedBit : int(Flags0) & ~AlignedBit,
|
||||
Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
|
||||
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
|
||||
};
|
||||
};
|
||||
|
||||
template<typename MatrixType, int Options> class Map
|
||||
: public MapBase<Map<MatrixType, Options>,
|
||||
typename MatrixType::template MakeBase< Map<MatrixType, Options> >::Type>
|
||||
template<typename MatrixType, int Options, typename StrideType> class Map
|
||||
: public MapBase<Map<MatrixType, Options, StrideType>,
|
||||
typename MatrixType::template MakeBase<
|
||||
Map<MatrixType, Options, StrideType>
|
||||
>::Type>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef MapBase<Map,typename MatrixType::template MakeBase<Map>::Type> Base;
|
||||
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Map)
|
||||
|
||||
inline int stride() const { return this->innerSize(); }
|
||||
|
||||
inline Map(const Scalar* data) : Base(data) {}
|
||||
|
||||
inline Map(const Scalar* data, int size) : Base(data, size) {}
|
||||
|
||||
inline Map(const Scalar* data, int rows, int cols) : Base(data, rows, cols) {}
|
||||
|
||||
inline void resize(int rows, int cols)
|
||||
inline int innerStride() const
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(rows);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(cols);
|
||||
ei_assert(rows == this->rows());
|
||||
ei_assert(cols == this->cols());
|
||||
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
|
||||
}
|
||||
|
||||
inline void resize(int size)
|
||||
inline int outerStride() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(MatrixType)
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(size);
|
||||
ei_assert(size == this->size());
|
||||
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
|
||||
: IsVectorAtCompileTime ? this->size()
|
||||
: int(Flags)&RowMajorBit ? this->cols()
|
||||
: this->rows();
|
||||
}
|
||||
|
||||
/** Constructor in the fixed-size case.
|
||||
*
|
||||
* \param data pointer to the array to map
|
||||
* \param stride optional Stride object, passing the strides.
|
||||
*/
|
||||
inline Map(const Scalar* data, const StrideType& stride = StrideType())
|
||||
: Base(data), m_stride(stride) {}
|
||||
|
||||
/** Constructor in the dynamic-size vector case.
|
||||
*
|
||||
* \param data pointer to the array to map
|
||||
* \param size the size of the vector expression
|
||||
* \param stride optional Stride object, passing the strides.
|
||||
*/
|
||||
inline Map(const Scalar* data, int size, const StrideType& stride = StrideType())
|
||||
: Base(data, size), m_stride(stride) {}
|
||||
|
||||
/** Constructor in the dynamic-size matrix case.
|
||||
*
|
||||
* \param data pointer to the array to map
|
||||
* \param rows the number of rows of the matrix expression
|
||||
* \param cols the number of columns of the matrix expression
|
||||
* \param stride optional Stride object, passing the strides.
|
||||
*/
|
||||
inline Map(const Scalar* data, int rows, int cols, const StrideType& stride = StrideType())
|
||||
: Base(data, rows, cols), m_stride(stride) {}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
|
||||
|
||||
protected:
|
||||
StrideType m_stride;
|
||||
};
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
|
||||
|
@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
@ -37,9 +37,7 @@ template<typename Derived, typename Base> class MapBase
|
||||
{
|
||||
public:
|
||||
|
||||
// typedef MatrixBase<Derived> Base;
|
||||
enum {
|
||||
IsRowMajor = (int(ei_traits<Derived>::Flags) & RowMajorBit) ? 1 : 0,
|
||||
RowsAtCompileTime = ei_traits<Derived>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = ei_traits<Derived>::ColsAtCompileTime,
|
||||
SizeAtCompileTime = Base::SizeAtCompileTime
|
||||
@ -48,94 +46,75 @@ template<typename Derived, typename Base> class MapBase
|
||||
typedef typename ei_traits<Derived>::Scalar Scalar;
|
||||
typedef typename Base::PacketScalar PacketScalar;
|
||||
using Base::derived;
|
||||
using Base::innerStride;
|
||||
using Base::outerStride;
|
||||
using Base::rowStride;
|
||||
using Base::colStride;
|
||||
|
||||
inline int rows() const { return m_rows.value(); }
|
||||
inline int cols() const { return m_cols.value(); }
|
||||
|
||||
/** Returns the leading dimension (for matrices) or the increment (for vectors) to be used with data().
|
||||
*
|
||||
* More precisely:
|
||||
* - for a column major matrix it returns the number of elements between two successive columns
|
||||
* - for a row major matrix it returns the number of elements between two successive rows
|
||||
* - for a vector it returns the number of elements between two successive coefficients
|
||||
* This function has to be used together with the MapBase::data() function.
|
||||
*
|
||||
* \sa MapBase::data() */
|
||||
inline int stride() const { return derived().stride(); }
|
||||
|
||||
/** Returns a pointer to the first coefficient of the matrix or vector.
|
||||
* This function has to be used together with the stride() function.
|
||||
*
|
||||
* \sa MapBase::stride() */
|
||||
* \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride().
|
||||
*
|
||||
* \sa innerStride(), outerStride()
|
||||
*/
|
||||
inline const Scalar* data() const { return m_data; }
|
||||
|
||||
inline const Scalar& coeff(int row, int col) const
|
||||
{
|
||||
if(IsRowMajor)
|
||||
return m_data[col + row * stride()];
|
||||
else // column-major
|
||||
return m_data[row + col * stride()];
|
||||
return m_data[col * colStride() + row * rowStride()];
|
||||
}
|
||||
|
||||
inline Scalar& coeffRef(int row, int col)
|
||||
{
|
||||
if(IsRowMajor)
|
||||
return const_cast<Scalar*>(m_data)[col + row * stride()];
|
||||
else // column-major
|
||||
return const_cast<Scalar*>(m_data)[row + col * stride()];
|
||||
return const_cast<Scalar*>(m_data)[col * colStride() + row * rowStride()];
|
||||
}
|
||||
|
||||
inline const Scalar& coeff(int index) const
|
||||
{
|
||||
ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
|
||||
if ( ((RowsAtCompileTime == 1) == IsRowMajor) || !int(Derived::IsVectorAtCompileTime) )
|
||||
return m_data[index];
|
||||
else
|
||||
return m_data[index*stride()];
|
||||
return m_data[index * innerStride()];
|
||||
}
|
||||
|
||||
inline Scalar& coeffRef(int index)
|
||||
{
|
||||
ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
|
||||
if ( ((RowsAtCompileTime == 1) == IsRowMajor) || !int(Derived::IsVectorAtCompileTime) )
|
||||
return const_cast<Scalar*>(m_data)[index];
|
||||
else
|
||||
return const_cast<Scalar*>(m_data)[index*stride()];
|
||||
return const_cast<Scalar*>(m_data)[index * innerStride()];
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(int row, int col) const
|
||||
{
|
||||
return ei_ploadt<Scalar, LoadMode>
|
||||
(m_data + (IsRowMajor ? col + row * stride()
|
||||
: row + col * stride()));
|
||||
(m_data + (col * colStride() + row * rowStride()));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(int index) const
|
||||
{
|
||||
return ei_ploadt<Scalar, LoadMode>(m_data + index);
|
||||
return ei_ploadt<Scalar, LoadMode>(m_data + index * innerStride());
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
inline void writePacket(int row, int col, const PacketScalar& x)
|
||||
{
|
||||
ei_pstoret<Scalar, PacketScalar, StoreMode>
|
||||
(const_cast<Scalar*>(m_data) + (IsRowMajor ? col + row * stride()
|
||||
: row + col * stride()), x);
|
||||
(const_cast<Scalar*>(m_data) + (col * colStride() + row * rowStride()), x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
inline void writePacket(int index, const PacketScalar& x)
|
||||
{
|
||||
ei_pstoret<Scalar, PacketScalar, StoreMode>
|
||||
(const_cast<Scalar*>(m_data) + index, x);
|
||||
(const_cast<Scalar*>(m_data) + index * innerStride(), x);
|
||||
}
|
||||
|
||||
inline MapBase(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
checkDataAlignment();
|
||||
checkSanity();
|
||||
}
|
||||
|
||||
inline MapBase(const Scalar* data, int size)
|
||||
@ -146,7 +125,7 @@ template<typename Derived, typename Base> class MapBase
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
ei_assert(size >= 0);
|
||||
ei_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
|
||||
checkDataAlignment();
|
||||
checkSanity();
|
||||
}
|
||||
|
||||
inline MapBase(const Scalar* data, int rows, int cols)
|
||||
@ -155,7 +134,7 @@ template<typename Derived, typename Base> class MapBase
|
||||
ei_assert( (data == 0)
|
||||
|| ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
||||
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
|
||||
checkDataAlignment();
|
||||
checkSanity();
|
||||
}
|
||||
|
||||
Derived& operator=(const MapBase& other)
|
||||
@ -167,10 +146,12 @@ template<typename Derived, typename Base> class MapBase
|
||||
|
||||
protected:
|
||||
|
||||
void checkDataAlignment() const
|
||||
void checkSanity() const
|
||||
{
|
||||
ei_assert( ((!(ei_traits<Derived>::Flags&AlignedBit))
|
||||
|| ((size_t(m_data)&0xf)==0)) && "data is not aligned");
|
||||
ei_assert( ((!(ei_traits<Derived>::Flags&PacketAccessBit))
|
||||
|| (innerStride()==1)) && "packet access incompatible with inner stride greater than 1");
|
||||
}
|
||||
|
||||
const Scalar* EIGEN_RESTRICT m_data;
|
||||
|
@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
@ -318,6 +318,9 @@ class Matrix
|
||||
void swap(MatrixBase<OtherDerived> EIGEN_REF_TO_TEMPORARY other)
|
||||
{ this->_swap(other.derived()); }
|
||||
|
||||
inline int innerStride() const { return 1; }
|
||||
inline int outerStride() const { return this->innerSize(); }
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
template<typename OtherDerived>
|
||||
@ -331,6 +334,9 @@ class Matrix
|
||||
#endif
|
||||
|
||||
protected:
|
||||
template <typename Derived, typename OtherDerived, bool IsVector>
|
||||
friend struct ei_conservative_resize_like_impl;
|
||||
|
||||
using Base::m_storage;
|
||||
};
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@ -49,6 +50,12 @@ struct ei_matrix_array
|
||||
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
|
||||
};
|
||||
|
||||
// FIXME!!! This is a hack because ARM gcc does not honour __attribute__((aligned(16))) properly
|
||||
#ifdef __ARM_NEON__
|
||||
#ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
|
||||
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
|
||||
#endif
|
||||
#endif
|
||||
#ifdef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
|
||||
#else
|
||||
@ -92,6 +99,7 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class ei_matr
|
||||
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); }
|
||||
inline static int rows(void) {return _Rows;}
|
||||
inline static int cols(void) {return _Cols;}
|
||||
inline void conservativeResize(int,int,int) {}
|
||||
inline void resize(int,int,int) {}
|
||||
inline const T *data() const { return m_data.array; }
|
||||
inline T *data() { return m_data.array; }
|
||||
@ -107,6 +115,7 @@ template<typename T, int _Rows, int _Cols, int _Options> class ei_matrix_storage
|
||||
inline void swap(ei_matrix_storage& ) {}
|
||||
inline static int rows(void) {return _Rows;}
|
||||
inline static int cols(void) {return _Cols;}
|
||||
inline void conservativeResize(int,int,int) {}
|
||||
inline void resize(int,int,int) {}
|
||||
inline const T *data() const { return 0; }
|
||||
inline T *data() { return 0; }
|
||||
@ -127,11 +136,8 @@ template<typename T, int Size, int _Options> class ei_matrix_storage<T, Size, Dy
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
|
||||
inline int rows(void) const {return m_rows;}
|
||||
inline int cols(void) const {return m_cols;}
|
||||
inline void resize(int, int rows, int cols)
|
||||
{
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
}
|
||||
inline void conservativeResize(int, int rows, int cols) { m_rows = rows; m_cols = cols; }
|
||||
inline void resize(int, int rows, int cols) { m_rows = rows; m_cols = cols; }
|
||||
inline const T *data() const { return m_data.array; }
|
||||
inline T *data() { return m_data.array; }
|
||||
};
|
||||
@ -149,10 +155,8 @@ template<typename T, int Size, int _Cols, int _Options> class ei_matrix_storage<
|
||||
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
inline int rows(void) const {return m_rows;}
|
||||
inline int cols(void) const {return _Cols;}
|
||||
inline void resize(int /*size*/, int rows, int)
|
||||
{
|
||||
m_rows = rows;
|
||||
}
|
||||
inline void conservativeResize(int, int rows, int) { m_rows = rows; }
|
||||
inline void resize(int, int rows, int) { m_rows = rows; }
|
||||
inline const T *data() const { return m_data.array; }
|
||||
inline T *data() { return m_data.array; }
|
||||
};
|
||||
@ -170,10 +174,8 @@ template<typename T, int Size, int _Rows, int _Options> class ei_matrix_storage<
|
||||
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
inline int rows(void) const {return _Rows;}
|
||||
inline int cols(void) const {return m_cols;}
|
||||
inline void resize(int, int, int cols)
|
||||
{
|
||||
m_cols = cols;
|
||||
}
|
||||
inline void conservativeResize(int, int, int cols) { m_cols = cols; }
|
||||
inline void resize(int, int, int cols) { m_cols = cols; }
|
||||
inline const T *data() const { return m_data.array; }
|
||||
inline T *data() { return m_data.array; }
|
||||
};
|
||||
@ -196,6 +198,12 @@ template<typename T, int _Options> class ei_matrix_storage<T, Dynamic, Dynamic,
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
|
||||
inline int rows(void) const {return m_rows;}
|
||||
inline int cols(void) const {return m_cols;}
|
||||
inline void conservativeResize(int size, int rows, int cols)
|
||||
{
|
||||
m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
}
|
||||
void resize(int size, int rows, int cols)
|
||||
{
|
||||
if(size != m_rows*m_cols)
|
||||
@ -228,6 +236,11 @@ template<typename T, int _Rows, int _Options> class ei_matrix_storage<T, Dynamic
|
||||
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
inline static int rows(void) {return _Rows;}
|
||||
inline int cols(void) const {return m_cols;}
|
||||
inline void conservativeResize(int size, int, int cols)
|
||||
{
|
||||
m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
|
||||
m_cols = cols;
|
||||
}
|
||||
void resize(int size, int, int cols)
|
||||
{
|
||||
if(size != _Rows*m_cols)
|
||||
@ -259,6 +272,11 @@ template<typename T, int _Cols, int _Options> class ei_matrix_storage<T, Dynamic
|
||||
inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
inline int rows(void) const {return m_rows;}
|
||||
inline static int cols(void) {return _Cols;}
|
||||
inline void conservativeResize(int size, int rows, int)
|
||||
{
|
||||
m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
|
||||
m_rows = rows;
|
||||
}
|
||||
void resize(int size, int rows, int)
|
||||
{
|
||||
if(size != m_rows*_Cols)
|
||||
|
@ -53,7 +53,8 @@ template<typename ExpressionType> class NestByValue
|
||||
|
||||
inline int rows() const { return m_expression.rows(); }
|
||||
inline int cols() const { return m_expression.cols(); }
|
||||
inline int stride() const { return m_expression.stride(); }
|
||||
inline int outerStride() const { return m_expression.outerStride(); }
|
||||
inline int innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
inline const CoeffReturnType coeff(int row, int col) const
|
||||
{
|
||||
|
@ -47,7 +47,7 @@
|
||||
* \sa class DiagonalMatrix
|
||||
*/
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime> class PermutationMatrix;
|
||||
template<typename PermutationType, typename MatrixType, int Side> struct ei_permut_matrix_product_retval;
|
||||
template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false> struct ei_permut_matrix_product_retval;
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
|
||||
struct ei_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
|
||||
@ -132,6 +132,9 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
|
||||
/** \returns the number of columns */
|
||||
inline int cols() const { return m_indices.size(); }
|
||||
|
||||
/** \returns the size of a side of the respective square matrix, i.e., the number of indices */
|
||||
inline int size() const { return m_indices.size(); }
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename DenseDerived>
|
||||
void evalTo(MatrixBase<DenseDerived>& other) const
|
||||
@ -213,16 +216,29 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**** inversion and multiplication helpers to hopefully get RVO ****/
|
||||
/** \returns the inverse permutation matrix.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
*/
|
||||
inline Transpose<PermutationMatrix> inverse() const
|
||||
{ return *this; }
|
||||
/** \returns the tranpose permutation matrix.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
*/
|
||||
inline Transpose<PermutationMatrix> transpose() const
|
||||
{ return *this; }
|
||||
|
||||
/**** multiplication helpers to hopefully get RVO ****/
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
protected:
|
||||
enum Inverse_t {Inverse};
|
||||
PermutationMatrix(Inverse_t, const PermutationMatrix& other)
|
||||
: m_indices(other.m_indices.size())
|
||||
template<int OtherSize, int OtherMaxSize>
|
||||
PermutationMatrix(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other)
|
||||
: m_indices(other.nestedPermutation().size())
|
||||
{
|
||||
for (int i=0; i<rows();++i) m_indices.coeffRef(other.m_indices.coeff(i)) = i;
|
||||
for (int i=0; i<rows();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
|
||||
}
|
||||
protected:
|
||||
enum Product_t {Product};
|
||||
PermutationMatrix(Product_t, const PermutationMatrix& lhs, const PermutationMatrix& rhs)
|
||||
: m_indices(lhs.m_indices.size())
|
||||
@ -233,12 +249,7 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
|
||||
#endif
|
||||
|
||||
public:
|
||||
/** \returns the inverse permutation matrix.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
*/
|
||||
inline PermutationMatrix inverse() const
|
||||
{ return PermutationMatrix(Inverse, *this); }
|
||||
|
||||
/** \returns the product permutation matrix.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
@ -247,6 +258,22 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
|
||||
inline PermutationMatrix operator*(const PermutationMatrix<OtherSize, OtherMaxSize>& other) const
|
||||
{ return PermutationMatrix(Product, *this, other); }
|
||||
|
||||
/** \returns the product of a permutation with another inverse permutation.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
*/
|
||||
template<int OtherSize, int OtherMaxSize>
|
||||
inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other) const
|
||||
{ return PermutationMatrix(Product, *this, other.eval()); }
|
||||
|
||||
/** \returns the product of an inverse permutation with another permutation.
|
||||
*
|
||||
* \note \note_try_to_help_rvo
|
||||
*/
|
||||
template<int OtherSize, int OtherMaxSize> friend
|
||||
inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other, const PermutationMatrix& perm)
|
||||
{ return PermutationMatrix(Product, other.eval(), perm); }
|
||||
|
||||
protected:
|
||||
|
||||
IndicesType m_indices;
|
||||
@ -277,15 +304,15 @@ operator*(const PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> &perm
|
||||
(permutation, matrix.derived());
|
||||
}
|
||||
|
||||
template<typename PermutationType, typename MatrixType, int Side>
|
||||
struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> >
|
||||
template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
|
||||
struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
|
||||
{
|
||||
typedef typename MatrixType::PlainObject ReturnType;
|
||||
};
|
||||
|
||||
template<typename PermutationType, typename MatrixType, int Side>
|
||||
template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
|
||||
struct ei_permut_matrix_product_retval
|
||||
: public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> >
|
||||
: public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
|
||||
{
|
||||
typedef typename ei_cleantype<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
|
||||
|
||||
@ -299,21 +326,46 @@ struct ei_permut_matrix_product_retval
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
{
|
||||
const int n = Side==OnTheLeft ? rows() : cols();
|
||||
for(int i = 0; i < n; ++i)
|
||||
|
||||
if(ei_is_same_type<MatrixTypeNestedCleaned,Dest>::ret && ei_extract_data(dst) == ei_extract_data(m_matrix))
|
||||
{
|
||||
Block<
|
||||
Dest,
|
||||
Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime,
|
||||
Side==OnTheRight ? 1 : Dest::ColsAtCompileTime
|
||||
>(dst, Side==OnTheLeft ? m_permutation.indices().coeff(i) : i)
|
||||
// apply the permutation inplace
|
||||
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
|
||||
mask.fill(false);
|
||||
int r = 0;
|
||||
while(r < m_permutation.size())
|
||||
{
|
||||
// search for the next seed
|
||||
while(r<m_permutation.size() && mask[r]) r++;
|
||||
if(r>=m_permutation.size())
|
||||
break;
|
||||
// we got one, let's follow it until we are back to the seed
|
||||
int k0 = r++;
|
||||
int kPrev = k0;
|
||||
mask.coeffRef(k0) = true;
|
||||
for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
|
||||
{
|
||||
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
|
||||
.swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
|
||||
(dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
|
||||
|
||||
=
|
||||
mask.coeffRef(k) = true;
|
||||
kPrev = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < n; ++i)
|
||||
{
|
||||
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
|
||||
(dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
|
||||
|
||||
Block<
|
||||
MatrixTypeNestedCleaned,
|
||||
Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,
|
||||
Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime
|
||||
>(m_matrix, Side==OnTheRight ? m_permutation.indices().coeff(i) : i);
|
||||
=
|
||||
|
||||
Block<MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
|
||||
(m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -322,4 +374,78 @@ struct ei_permut_matrix_product_retval
|
||||
const typename MatrixType::Nested m_matrix;
|
||||
};
|
||||
|
||||
/* Template partial specialization for transposed/inverse permutations */
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
|
||||
struct ei_traits<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > >
|
||||
: ei_traits<Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{};
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
|
||||
class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
|
||||
: public EigenBase<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > >
|
||||
{
|
||||
typedef PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> PermutationType;
|
||||
typedef typename PermutationType::IndicesType IndicesType;
|
||||
public:
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef ei_traits<PermutationType> Traits;
|
||||
typedef Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime>
|
||||
DenseMatrixType;
|
||||
enum {
|
||||
Flags = Traits::Flags,
|
||||
CoeffReadCost = Traits::CoeffReadCost,
|
||||
RowsAtCompileTime = Traits::RowsAtCompileTime,
|
||||
ColsAtCompileTime = Traits::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
|
||||
};
|
||||
typedef typename Traits::Scalar Scalar;
|
||||
#endif
|
||||
|
||||
Transpose(const PermutationType& p) : m_permutation(p) {}
|
||||
|
||||
inline int rows() const { return m_permutation.rows(); }
|
||||
inline int cols() const { return m_permutation.cols(); }
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename DenseDerived>
|
||||
void evalTo(MatrixBase<DenseDerived>& other) const
|
||||
{
|
||||
other.setZero();
|
||||
for (int i=0; i<rows();++i)
|
||||
other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \return the equivalent permutation matrix */
|
||||
PermutationType eval() const { return *this; }
|
||||
|
||||
DenseMatrixType toDenseMatrix() const { return *this; }
|
||||
|
||||
/** \returns the matrix with the inverse permutation applied to the columns.
|
||||
*/
|
||||
template<typename Derived> friend
|
||||
inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>
|
||||
operator*(const MatrixBase<Derived>& matrix, const Transpose& trPerm)
|
||||
{
|
||||
return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
|
||||
}
|
||||
|
||||
/** \returns the matrix with the inverse permutation applied to the rows.
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>
|
||||
operator*(const MatrixBase<Derived>& matrix) const
|
||||
{
|
||||
return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>(m_permutation, matrix.derived());
|
||||
}
|
||||
|
||||
const PermutationType& nestedPermutation() const { return m_permutation; }
|
||||
|
||||
protected:
|
||||
const PermutationType& m_permutation;
|
||||
};
|
||||
|
||||
#endif // EIGEN_PERMUTATIONMATRIX_H
|
||||
|
@ -68,9 +68,9 @@ template<typename Lhs, typename Rhs> struct ei_product_type
|
||||
// is to work around an internal compiler error with gcc 4.1 and 4.2.
|
||||
private:
|
||||
enum {
|
||||
rows_select = Rows >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Rows==1 ? 1 : Small),
|
||||
cols_select = Cols >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Cols==1 ? 1 : Small),
|
||||
depth_select = Depth>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Depth==1 ? 1 : Small)
|
||||
rows_select = Rows >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Rows==1 ? 1 : Small),
|
||||
cols_select = Cols >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Cols==1 ? 1 : Small),
|
||||
depth_select = Depth>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Depth==1 ? 1 : Small)
|
||||
};
|
||||
typedef ei_product_type_selector<rows_select, cols_select, depth_select> product_type_selector;
|
||||
|
||||
@ -93,12 +93,12 @@ template<> struct ei_product_type_selector<Small,Small,Small>
|
||||
template<> struct ei_product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct ei_product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct ei_product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = GemvProduct }; };
|
||||
template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct ei_product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
|
||||
template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = GemvProduct }; };
|
||||
template<> struct ei_product_type_selector<Large,1, Small> { enum { ret = GemvProduct }; };
|
||||
template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct ei_product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct ei_product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
|
||||
template<> struct ei_product_type_selector<Small,1, Large> { enum { ret = GemvProduct }; };
|
||||
template<> struct ei_product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct ei_product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct ei_product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct ei_product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
|
||||
@ -427,6 +427,10 @@ template<typename OtherDerived>
|
||||
inline const typename ProductReturnType<Derived,OtherDerived>::Type
|
||||
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
// A note regarding the function declaration: In MSVC, this function will sometimes
|
||||
// not be inlined since ei_matrix_storage is an unwindable object for dynamic
|
||||
// matrices and product types are holding a member to store the result.
|
||||
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
|
||||
enum {
|
||||
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|
||||
|| OtherDerived::RowsAtCompileTime==Dynamic
|
||||
|
@ -40,7 +40,7 @@ struct ei_redux_traits
|
||||
private:
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
|
||||
InnerMaxSize = int(Derived::Flags)&RowMajorBit
|
||||
InnerMaxSize = int(Derived::IsRowMajor)
|
||||
? Derived::MaxColsAtCompileTime
|
||||
: Derived::MaxRowsAtCompileTime
|
||||
};
|
||||
@ -100,15 +100,15 @@ template<typename Func, typename Derived, int Start>
|
||||
struct ei_redux_novec_unroller<Func, Derived, Start, 1>
|
||||
{
|
||||
enum {
|
||||
col = Start / Derived::RowsAtCompileTime,
|
||||
row = Start % Derived::RowsAtCompileTime
|
||||
outer = Start / Derived::InnerSizeAtCompileTime,
|
||||
inner = Start % Derived::InnerSizeAtCompileTime
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
return mat.coeff(row, col);
|
||||
return mat.coeffByOuterInner(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
@ -148,12 +148,8 @@ struct ei_redux_vec_unroller<Func, Derived, Start, 1>
|
||||
{
|
||||
enum {
|
||||
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
|
||||
row = int(Derived::Flags)&RowMajorBit
|
||||
? index / int(Derived::ColsAtCompileTime)
|
||||
: index % Derived::RowsAtCompileTime,
|
||||
col = int(Derived::Flags)&RowMajorBit
|
||||
? index % int(Derived::ColsAtCompileTime)
|
||||
: index / Derived::RowsAtCompileTime,
|
||||
outer = index / int(Derived::InnerSizeAtCompileTime),
|
||||
inner = index % int(Derived::InnerSizeAtCompileTime),
|
||||
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
|
||||
};
|
||||
|
||||
@ -162,7 +158,7 @@ struct ei_redux_vec_unroller<Func, Derived, Start, 1>
|
||||
|
||||
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
return mat.template packet<alignment>(row, col);
|
||||
return mat.template packetByOuterInner<alignment>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
@ -184,12 +180,12 @@ struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
|
||||
Scalar res;
|
||||
res = mat.coeff(0, 0);
|
||||
for(int i = 1; i < mat.rows(); ++i)
|
||||
res = func(res, mat.coeff(i, 0));
|
||||
for(int j = 1; j < mat.cols(); ++j)
|
||||
for(int i = 0; i < mat.rows(); ++i)
|
||||
res = func(res, mat.coeff(i, j));
|
||||
res = mat.coeffByOuterInner(0, 0);
|
||||
for(int i = 1; i < mat.innerSize(); ++i)
|
||||
res = func(res, mat.coeffByOuterInner(0, i));
|
||||
for(int i = 1; i < mat.outerSize(); ++i)
|
||||
for(int j = 0; j < mat.innerSize(); ++j)
|
||||
res = func(res, mat.coeffByOuterInner(i, j));
|
||||
return res;
|
||||
}
|
||||
};
|
||||
@ -253,8 +249,7 @@ struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
|
||||
const int innerSize = mat.innerSize();
|
||||
const int outerSize = mat.outerSize();
|
||||
enum {
|
||||
packetSize = ei_packet_traits<Scalar>::size,
|
||||
isRowMajor = Derived::Flags&RowMajorBit?1:0
|
||||
packetSize = ei_packet_traits<Scalar>::size
|
||||
};
|
||||
const int packetedInnerSize = ((innerSize)/packetSize)*packetSize;
|
||||
Scalar res;
|
||||
@ -263,13 +258,12 @@ struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
|
||||
PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
|
||||
for(int j=0; j<outerSize; ++j)
|
||||
for(int i=0; i<packetedInnerSize; i+=int(packetSize))
|
||||
packet_res = func.packetOp(packet_res, mat.template packet<Unaligned>
|
||||
(isRowMajor?j:i, isRowMajor?i:j));
|
||||
packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
|
||||
|
||||
res = func.predux(packet_res);
|
||||
for(int j=0; j<outerSize; ++j)
|
||||
for(int i=packetedInnerSize; i<innerSize; ++i)
|
||||
res = func(res, mat.coeff(isRowMajor?j:i, isRowMajor?i:j));
|
||||
res = func(res, mat.coeffByOuterInner(j,i));
|
||||
}
|
||||
else // too small to vectorize anything.
|
||||
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
||||
|
@ -34,14 +34,9 @@ struct ei_traits<ReturnByValue<Derived> >
|
||||
: public ei_traits<typename ei_traits<Derived>::ReturnType>
|
||||
{
|
||||
enum {
|
||||
// FIXME had to remove the DirectAccessBit for usage like
|
||||
// matrix.inverse().block(...)
|
||||
// because the Block ctor with direct access
|
||||
// wants to call coeffRef() to get an address, and that fails (infinite recursion) as ReturnByValue
|
||||
// doesnt implement coeffRef().
|
||||
// The fact that I had to do that shows that when doing xpr.block() with a non-direct-access xpr,
|
||||
// even if xpr has the EvalBeforeNestingBit, the block() doesn't use direct access on the evaluated
|
||||
// xpr.
|
||||
// We're disabling the DirectAccess because e.g. the constructor of
|
||||
// the Block-with-DirectAccess expression requires to have a coeffRef method.
|
||||
// Also, we don't want to have to implement the stride stuff.
|
||||
Flags = (ei_traits<typename ei_traits<Derived>::ReturnType>::Flags
|
||||
| EvalBeforeNestingBit) & ~DirectAccessBit
|
||||
};
|
||||
|
@ -75,8 +75,9 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
|
||||
inline int rows() const { return m_matrix.rows(); }
|
||||
inline int cols() const { return m_matrix.cols(); }
|
||||
inline int stride() const { return m_matrix.stride(); }
|
||||
|
||||
inline int outerStride() const { return m_matrix.outerStride(); }
|
||||
inline int innerStride() const { return m_matrix.innerStride(); }
|
||||
|
||||
/** \sa MatrixBase::coeff()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
|
@ -57,7 +57,8 @@ template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
|
||||
|
||||
inline int rows() const { return m_matrix.rows(); }
|
||||
inline int cols() const { return m_matrix.cols(); }
|
||||
inline int stride() const { return m_matrix.stride(); }
|
||||
inline int outerStride() const { return m_matrix.outerStride(); }
|
||||
inline int innerStride() const { return m_matrix.innerStride(); }
|
||||
inline const Scalar* data() const { return m_matrix.data(); }
|
||||
|
||||
// note that this function is needed by assign to correctly align loads/stores
|
||||
|
113
Eigen/src/Core/Stride.h
Normal file
113
Eigen/src/Core/Stride.h
Normal file
@ -0,0 +1,113 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_STRIDE_H
|
||||
#define EIGEN_STRIDE_H
|
||||
|
||||
/** \class Stride
|
||||
*
|
||||
* \brief Holds strides information for Map
|
||||
*
|
||||
* This class holds the strides information for mapping arrays with strides with class Map.
|
||||
*
|
||||
* It holds two values: the inner stride and the outer stride.
|
||||
*
|
||||
* The inner stride is the pointer increment between two consecutive entries within a given row of a
|
||||
* row-major matrix or within a given column of a column-major matrix.
|
||||
*
|
||||
* The outer stride is the pointer increment between two consecutive rows of a row-major matrix or
|
||||
* between two consecutive columns of a column-major matrix.
|
||||
*
|
||||
* These two values can be passed either at compile-time as template parameters, or at runtime as
|
||||
* arguments to the constructor.
|
||||
*
|
||||
* Indeed, this class takes two template parameters:
|
||||
* \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
|
||||
* \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
|
||||
*
|
||||
* \include Map_general_stride.cpp
|
||||
* Output: \verbinclude Map_general_stride.out
|
||||
*
|
||||
* \sa class InnerStride, class OuterStride
|
||||
*/
|
||||
template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
|
||||
class Stride
|
||||
{
|
||||
public:
|
||||
|
||||
enum {
|
||||
InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
|
||||
OuterStrideAtCompileTime = _OuterStrideAtCompileTime
|
||||
};
|
||||
|
||||
/** Default constructor, for use when strides are fixed at compile time */
|
||||
Stride()
|
||||
: m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
|
||||
{
|
||||
ei_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
|
||||
}
|
||||
|
||||
/** Constructor allowing to pass the strides at runtime */
|
||||
Stride(int outerStride, int innerStride)
|
||||
: m_outer(outerStride), m_inner(innerStride)
|
||||
{
|
||||
ei_assert(innerStride>=0 && outerStride>=0);
|
||||
}
|
||||
|
||||
/** Copy constructor */
|
||||
Stride(const Stride& other)
|
||||
: m_outer(other.outer()), m_inner(other.inner())
|
||||
{}
|
||||
|
||||
/** \returns the outer stride */
|
||||
inline int outer() const { return m_outer.value(); }
|
||||
/** \returns the inner stride */
|
||||
inline int inner() const { return m_inner.value(); }
|
||||
|
||||
protected:
|
||||
ei_int_if_dynamic<OuterStrideAtCompileTime> m_outer;
|
||||
ei_int_if_dynamic<InnerStrideAtCompileTime> m_inner;
|
||||
};
|
||||
|
||||
/** \brief Convenience specialization of Stride to specify only an inner stride */
|
||||
template<int Value>
|
||||
class InnerStride : public Stride<0, Value>
|
||||
{
|
||||
typedef Stride<0, Value> Base;
|
||||
public:
|
||||
InnerStride() : Base() {}
|
||||
InnerStride(int v) : Base(0, v) {}
|
||||
};
|
||||
|
||||
/** \brief Convenience specialization of Stride to specify only an outer stride */
|
||||
template<int Value>
|
||||
class OuterStride : public Stride<Value, 0>
|
||||
{
|
||||
typedef Stride<Value, 0> Base;
|
||||
public:
|
||||
OuterStride() : Base() {}
|
||||
OuterStride(int v) : Base(v,0) {}
|
||||
};
|
||||
|
||||
#endif // EIGEN_STRIDE_H
|
@ -47,7 +47,8 @@ template<typename ExpressionType> class SwapWrapper
|
||||
|
||||
inline int rows() const { return m_expression.rows(); }
|
||||
inline int cols() const { return m_expression.cols(); }
|
||||
inline int stride() const { return m_expression.stride(); }
|
||||
inline int outerStride() const { return m_expression.outerStride(); }
|
||||
inline int innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
inline Scalar& coeffRef(int row, int col)
|
||||
{
|
||||
@ -60,7 +61,7 @@ template<typename ExpressionType> class SwapWrapper
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
void copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other)
|
||||
void copyCoeff(int row, int col, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
ei_internal_assert(row >= 0 && row < rows()
|
||||
@ -71,7 +72,7 @@ template<typename ExpressionType> class SwapWrapper
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
void copyCoeff(int index, const MatrixBase<OtherDerived>& other)
|
||||
void copyCoeff(int index, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
ei_internal_assert(index >= 0 && index < m_expression.size());
|
||||
@ -81,7 +82,7 @@ template<typename ExpressionType> class SwapWrapper
|
||||
}
|
||||
|
||||
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||
void copyPacket(int row, int col, const MatrixBase<OtherDerived>& other)
|
||||
void copyPacket(int row, int col, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
ei_internal_assert(row >= 0 && row < rows()
|
||||
@ -94,7 +95,7 @@ template<typename ExpressionType> class SwapWrapper
|
||||
}
|
||||
|
||||
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||
void copyPacket(int index, const MatrixBase<OtherDerived>& other)
|
||||
void copyPacket(int index, const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
ei_internal_assert(index >= 0 && index < m_expression.size());
|
||||
|
@ -93,7 +93,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
||||
typedef typename MatrixType::template MakeBase<Transpose<MatrixType> >::Type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
|
||||
|
||||
inline int stride() const { return derived().nestedExpression().stride(); }
|
||||
inline int innerStride() const { return derived().nestedExpression().innerStride(); }
|
||||
inline int outerStride() const { return derived().nestedExpression().outerStride(); }
|
||||
inline Scalar* data() { return derived().nestedExpression().data(); }
|
||||
inline const Scalar* data() const { return derived().nestedExpression().data(); }
|
||||
|
||||
@ -295,25 +296,6 @@ struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr> >
|
||||
static inline const XprType extract(const XprType& x) { return x; }
|
||||
};
|
||||
|
||||
|
||||
template<typename T, int Access=ei_blas_traits<T>::ActualAccess>
|
||||
struct ei_extract_data_selector {
|
||||
static typename T::Scalar* run(const T& m)
|
||||
{
|
||||
return &ei_blas_traits<T>::extract(m).const_cast_derived().coeffRef(0,0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct ei_extract_data_selector<T,NoDirectAccess> {
|
||||
static typename T::Scalar* run(const T&) { return 0; }
|
||||
};
|
||||
|
||||
template<typename T> typename T::Scalar* ei_extract_data(const T& m)
|
||||
{
|
||||
return ei_extract_data_selector<T>::run(m);
|
||||
}
|
||||
|
||||
template<typename Scalar, bool DestIsTranposed, typename OtherDerived>
|
||||
struct ei_check_transpose_aliasing_selector
|
||||
{
|
||||
|
@ -50,7 +50,8 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
|
||||
inline int rows() const { return derived().rows(); }
|
||||
inline int cols() const { return derived().cols(); }
|
||||
inline int stride() const { return derived().stride(); }
|
||||
inline int outerStride() const { return derived().outerStride(); }
|
||||
inline int innerStride() const { return derived().innerStride(); }
|
||||
|
||||
inline Scalar coeff(int row, int col) const { return derived().coeff(row,col); }
|
||||
inline Scalar& coeffRef(int row, int col) { return derived().coeffRef(row,col); }
|
||||
@ -165,7 +166,8 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
|
||||
inline int rows() const { return m_matrix.rows(); }
|
||||
inline int cols() const { return m_matrix.cols(); }
|
||||
inline int stride() const { return m_matrix.stride(); }
|
||||
inline int outerStride() const { return m_matrix.outerStride(); }
|
||||
inline int innerStride() const { return m_matrix.innerStride(); }
|
||||
|
||||
/** \sa MatrixBase::operator+=() */
|
||||
template<typename Other> TriangularView& operator+=(const Other& other) { return *this = m_matrix + other; }
|
||||
|
@ -86,7 +86,6 @@ template<typename VectorType, int Size> class VectorBlock
|
||||
IsColVector ? start : 0, IsColVector ? 0 : start,
|
||||
IsColVector ? size : 1, IsColVector ? 1 : size)
|
||||
{
|
||||
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
|
||||
}
|
||||
|
||||
|
@ -169,6 +169,11 @@ template<> inline v4f ei_pdiv(const v4f& a, const v4f& b) {
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
|
||||
{ ei_assert(false && "packet integer division are not supported by AltiVec");
|
||||
return ei_pset1<int>(0);
|
||||
}
|
||||
|
||||
template<> inline v4f ei_pmadd(const v4f& a, const v4f& b, const v4f& c) { return vec_madd(a, b, c); }
|
||||
|
||||
template<> inline v4f ei_pmin(const v4f& a, const v4f& b) { return vec_min(a,b); }
|
||||
|
@ -1,2 +1,3 @@
|
||||
ADD_SUBDIRECTORY(SSE)
|
||||
ADD_SUBDIRECTORY(AltiVec)
|
||||
ADD_SUBDIRECTORY(AltiVec)
|
||||
ADD_SUBDIRECTORY(NEON)
|
||||
|
65
Eigen/src/Core/arch/Default/Settings.h
Normal file
65
Eigen/src/Core/arch/Default/Settings.h
Normal file
@ -0,0 +1,65 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
/* All the parameters defined in this file can be specialized in the
|
||||
* architecture specific files, and/or by the user.
|
||||
* More to come... */
|
||||
|
||||
#ifndef EIGEN_DEFAULT_SETTINGS_H
|
||||
#define EIGEN_DEFAULT_SETTINGS_H
|
||||
|
||||
/** Defines the maximal loop size to enable meta unrolling of loops.
|
||||
* Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
|
||||
* it does not correspond to the number of iterations or the number of instructions
|
||||
*/
|
||||
#ifndef EIGEN_UNROLLING_LIMIT
|
||||
#define EIGEN_UNROLLING_LIMIT 100
|
||||
#endif
|
||||
|
||||
/** Defines the threshold between a "small" and a "large" matrix.
|
||||
* This threshold is mainly used to select the proper product implementation.
|
||||
*/
|
||||
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
|
||||
/** Defines the maximal size in Bytes of blocks fitting in CPU cache.
|
||||
* The current value is set to generate blocks of 256x256 for float
|
||||
*
|
||||
* Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes
|
||||
*/
|
||||
#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
|
||||
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256)
|
||||
#endif
|
||||
|
||||
/** Defines the maximal width of the blocks used in the triangular product and solver
|
||||
* for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
|
||||
*/
|
||||
#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
|
||||
#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_DEFAULT_SETTINGS_H
|
6
Eigen/src/Core/arch/NEON/CMakeLists.txt
Normal file
6
Eigen/src/Core/arch/NEON/CMakeLists.txt
Normal file
@ -0,0 +1,6 @@
|
||||
FILE(GLOB Eigen_Core_arch_NEON_SRCS "*.h")
|
||||
|
||||
INSTALL(FILES
|
||||
${Eigen_Core_arch_NEON_SRCS}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/NEON COMPONENT Devel
|
||||
)
|
372
Eigen/src/Core/arch/NEON/PacketMath.h
Normal file
372
Eigen/src/Core/arch/NEON/PacketMath.h
Normal file
@ -0,0 +1,372 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2010 Konstantinos Margaritis <markos@codex.gr>
|
||||
// Heavily based on Gael's SSE version.
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_NEON_H
|
||||
#define EIGEN_PACKET_MATH_NEON_H
|
||||
|
||||
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
|
||||
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 4*96*96
|
||||
#endif
|
||||
|
||||
typedef float32x4_t Packet4f;
|
||||
typedef int32x4_t Packet4i;
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
|
||||
const Packet4f ei_p4f_##NAME = ei_pset1<float>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
|
||||
const Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||
const Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
|
||||
|
||||
template<> struct ei_packet_traits<float> : ei_default_packet_traits
|
||||
{
|
||||
typedef Packet4f type; enum {size=4};
|
||||
enum {
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 0,
|
||||
HasSqrt = 0
|
||||
};
|
||||
};
|
||||
template<> struct ei_packet_traits<int> : ei_default_packet_traits
|
||||
{ typedef Packet4i type; enum {size=4}; };
|
||||
|
||||
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
|
||||
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return vdupq_n_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return vdupq_n_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a)
|
||||
{
|
||||
Packet4f countdown = { 3, 2, 1, 0 };
|
||||
return vaddq_f32(ei_pset1(a), countdown);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a)
|
||||
{
|
||||
Packet4i countdown = { 3, 2, 1, 0 };
|
||||
return vaddq_s32(ei_pset1(a), countdown);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a) { return vnegq_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a) { return vnegq_s32(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f inv, restep, div;
|
||||
|
||||
// NEON does not offer a divide instruction, we have to do a reciprocal approximation
|
||||
// However NEON in contrast to other SIMD engines (AltiVec/SSE), offers
|
||||
// a reciprocal estimate AND a reciprocal step -which saves a few instructions
|
||||
// vrecpeq_f32() returns an estimate to 1/b, which we will finetune with
|
||||
// Newton-Raphson and vrecpsq_f32()
|
||||
inv = vrecpeq_f32(b);
|
||||
|
||||
// This returns a differential, by which we will have to multiply inv to get a better
|
||||
// approximation of 1/b.
|
||||
restep = vrecpsq_f32(b, inv);
|
||||
inv = vmulq_f32(restep, inv);
|
||||
|
||||
// Finally, multiply a by 1/b and get the wanted result of the division.
|
||||
div = vmulq_f32(a, inv);
|
||||
|
||||
return div;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
|
||||
{ ei_assert(false && "packet integer division are not supported by NEON");
|
||||
return ei_pset1<int>(0);
|
||||
}
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
|
||||
|
||||
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
// FIXME only store the 2 first elements ?
|
||||
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) {
|
||||
float32x2_t a_lo, a_hi;
|
||||
Packet4f a_r64, a_r128;
|
||||
|
||||
a_r64 = vrev64q_f32(a);
|
||||
a_lo = vget_low_f32(a_r64);
|
||||
a_hi = vget_high_f32(a_r64);
|
||||
a_r128 = vcombine_f32(a_hi, a_lo);
|
||||
|
||||
return a_r128;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) {
|
||||
int32x2_t a_lo, a_hi;
|
||||
Packet4i a_r64, a_r128;
|
||||
|
||||
a_r64 = vrev64q_s32(a);
|
||||
a_lo = vget_low_s32(a_r64);
|
||||
a_hi = vget_high_s32(a_r64);
|
||||
a_r128 = vcombine_s32(a_hi, a_lo);
|
||||
|
||||
return a_r128;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vabsq_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vabsq_s32(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
float32x2_t a_lo, a_hi, sum;
|
||||
float s[2];
|
||||
|
||||
a_lo = vget_low_f32(a);
|
||||
a_hi = vget_high_f32(a);
|
||||
sum = vpadd_f32(a_lo, a_hi);
|
||||
sum = vpadd_f32(sum, sum);
|
||||
vst1_f32(s, sum);
|
||||
|
||||
return s[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
float32x4x2_t vtrn1, vtrn2, res1, res2;
|
||||
Packet4f sum1, sum2, sum;
|
||||
|
||||
// NEON zip performs interleaving of the supplied vectors.
|
||||
// We perform two interleaves in a row to acquire the transposed vector
|
||||
vtrn1 = vzipq_f32(vecs[0], vecs[2]);
|
||||
vtrn2 = vzipq_f32(vecs[1], vecs[3]);
|
||||
res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
|
||||
res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
|
||||
|
||||
// Do the addition of the resulting vectors
|
||||
sum1 = vaddq_f32(res1.val[0], res1.val[1]);
|
||||
sum2 = vaddq_f32(res2.val[0], res2.val[1]);
|
||||
sum = vaddq_f32(sum1, sum2);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, sum;
|
||||
int32_t s[2];
|
||||
|
||||
a_lo = vget_low_s32(a);
|
||||
a_hi = vget_high_s32(a);
|
||||
sum = vpadd_s32(a_lo, a_hi);
|
||||
sum = vpadd_s32(sum, sum);
|
||||
vst1_s32(s, sum);
|
||||
|
||||
return s[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs)
|
||||
{
|
||||
int32x4x2_t vtrn1, vtrn2, res1, res2;
|
||||
Packet4i sum1, sum2, sum;
|
||||
|
||||
// NEON zip performs interleaving of the supplied vectors.
|
||||
// We perform two interleaves in a row to acquire the transposed vector
|
||||
vtrn1 = vzipq_s32(vecs[0], vecs[2]);
|
||||
vtrn2 = vzipq_s32(vecs[1], vecs[3]);
|
||||
res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
|
||||
res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
|
||||
|
||||
// Do the addition of the resulting vectors
|
||||
sum1 = vaddq_s32(res1.val[0], res1.val[1]);
|
||||
sum2 = vaddq_s32(res2.val[0], res2.val[1]);
|
||||
sum = vaddq_s32(sum1, sum2);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Other reduction functions:
|
||||
// mul
|
||||
template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
float32x2_t a_lo, a_hi, prod;
|
||||
float s[2];
|
||||
|
||||
// Get a_lo = |a1|a2| and a_hi = |a3|a4|
|
||||
a_lo = vget_low_f32(a);
|
||||
a_hi = vget_high_f32(a);
|
||||
// Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
|
||||
prod = vmul_f32(a_lo, a_hi);
|
||||
// Multiply prod with its swapped value |a2*a4|a1*a3|
|
||||
prod = vmul_f32(prod, vrev64_f32(prod));
|
||||
vst1_f32(s, prod);
|
||||
|
||||
return s[0];
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, prod;
|
||||
int32_t s[2];
|
||||
|
||||
// Get a_lo = |a1|a2| and a_hi = |a3|a4|
|
||||
a_lo = vget_low_s32(a);
|
||||
a_hi = vget_high_s32(a);
|
||||
// Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
|
||||
prod = vmul_s32(a_lo, a_hi);
|
||||
// Multiply prod with its swapped value |a2*a4|a1*a3|
|
||||
prod = vmul_s32(prod, vrev64_s32(prod));
|
||||
vst1_s32(s, prod);
|
||||
|
||||
return s[0];
|
||||
}
|
||||
|
||||
// min
|
||||
template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
float32x2_t a_lo, a_hi, min;
|
||||
float s[2];
|
||||
|
||||
a_lo = vget_low_f32(a);
|
||||
a_hi = vget_high_f32(a);
|
||||
min = vpmin_f32(a_lo, a_hi);
|
||||
min = vpmin_f32(min, min);
|
||||
vst1_f32(s, min);
|
||||
|
||||
return s[0];
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, min;
|
||||
int32_t s[2];
|
||||
|
||||
a_lo = vget_low_s32(a);
|
||||
a_hi = vget_high_s32(a);
|
||||
min = vpmin_s32(a_lo, a_hi);
|
||||
min = vpmin_s32(min, min);
|
||||
vst1_s32(s, min);
|
||||
|
||||
return s[0];
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
float32x2_t a_lo, a_hi, max;
|
||||
float s[2];
|
||||
|
||||
a_lo = vget_low_f32(a);
|
||||
a_hi = vget_high_f32(a);
|
||||
max = vpmax_f32(a_lo, a_hi);
|
||||
max = vpmax_f32(max, max);
|
||||
vst1_f32(s, max);
|
||||
|
||||
return s[0];
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, max;
|
||||
int32_t s[2];
|
||||
|
||||
a_lo = vget_low_s32(a);
|
||||
a_hi = vget_high_s32(a);
|
||||
max = vpmax_s32(a_lo, a_hi);
|
||||
max = vpmax_s32(max, max);
|
||||
vst1_s32(s, max);
|
||||
|
||||
return s[0];
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct ei_palign_impl<Offset,Packet4f>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
if (Offset!=0)
|
||||
first = vextq_f32(first, second, Offset);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct ei_palign_impl<Offset,Packet4i>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
|
||||
{
|
||||
if (Offset!=0)
|
||||
first = vextq_s32(first, second, Offset);
|
||||
}
|
||||
};
|
||||
#endif // EIGEN_PACKET_MATH_NEON_H
|
@ -122,7 +122,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
#ifdef __SSE4_1__
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_mullo_epi32(a,b);
|
||||
#else
|
||||
// this version is slightly faster than 4 scalar products
|
||||
@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a)
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
|
||||
{
|
||||
#ifdef __SSSE3__
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
return _mm_abs_epi32(a);
|
||||
#else
|
||||
Packet4i aux = _mm_srai_epi32(a,31);
|
||||
@ -285,7 +285,7 @@ EIGEN_STRONG_INLINE void ei_punpackp(Packet4f* vecs)
|
||||
vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
|
||||
}
|
||||
|
||||
#ifdef __SSE3__
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
// TODO implement SSE2 versions as well as integer versions
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
@ -446,7 +446,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
|
||||
// }
|
||||
#endif
|
||||
|
||||
#ifdef __SSSE3__
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
// SSSE3 versions
|
||||
template<int Offset>
|
||||
struct ei_palign_impl<Offset,Packet4f>
|
||||
|
@ -305,10 +305,7 @@ struct ei_product_coeff_vectorized_dyn_selector
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = ei_dot_impl<
|
||||
Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
|
||||
Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
|
||||
LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col));
|
||||
res = lhs.row(row).cwiseProduct(rhs.col(col)).sum();
|
||||
}
|
||||
};
|
||||
|
||||
@ -319,10 +316,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = ei_dot_impl<
|
||||
Lhs,
|
||||
Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
|
||||
LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col));
|
||||
res = lhs.cwiseProduct(rhs.col(col)).sum();
|
||||
}
|
||||
};
|
||||
|
||||
@ -331,10 +325,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = ei_dot_impl<
|
||||
Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
|
||||
Rhs,
|
||||
LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs);
|
||||
res = lhs.row(row).cwiseProduct(rhs).sum();
|
||||
}
|
||||
};
|
||||
|
||||
@ -343,10 +334,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
res = ei_dot_impl<
|
||||
Lhs,
|
||||
Rhs,
|
||||
LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs);
|
||||
res = lhs.cwiseProduct(rhs).sum();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
#define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C);
|
||||
#else
|
||||
#define CJMADD(A,B,C,T) T = B; T = cj.pmul(A,T); C = ei_padd(C,T);
|
||||
// #define CJMADD(A,B,C,T) T = A; T = cj.pmul(T,B); C = ei_padd(C,T);
|
||||
#endif
|
||||
|
||||
// optimized GEneral packed Block * packed Panel product kernel
|
||||
@ -146,7 +147,7 @@ struct ei_gebp_kernel
|
||||
#endif
|
||||
|
||||
// performs "inner" product
|
||||
// TODO let's check wether the flowing peeled loop could not be
|
||||
// TODO let's check wether the folowing peeled loop could not be
|
||||
// optimized via optimal prefetching from one loop to the other
|
||||
const Scalar* blB = unpackedB;
|
||||
for(int k=0; k<peeled_kc; k+=4)
|
||||
@ -409,6 +410,7 @@ struct ei_gebp_kernel
|
||||
CJMADD(A0,B2,C2,B2);
|
||||
B2 = ei_pload(&blB[14*PacketSize]);
|
||||
CJMADD(A0,B3,C3,B3);
|
||||
|
||||
A0 = ei_pload(&blA[3*PacketSize]);
|
||||
B3 = ei_pload(&blB[15*PacketSize]);
|
||||
CJMADD(A0,B0,C0,B0);
|
||||
|
@ -43,7 +43,10 @@ struct ei_symm_pack_lhs
|
||||
{
|
||||
for(int w=0; w<h; w++)
|
||||
blockA[count++] = ei_conj(lhs(k, i+w)); // transposed
|
||||
for(int w=h; w<BlockRows; w++)
|
||||
|
||||
blockA[count++] = ei_real(lhs(k,k)); // real (diagonal)
|
||||
|
||||
for(int w=h+1; w<BlockRows; w++)
|
||||
blockA[count++] = lhs(i+w, k); // normal
|
||||
++h;
|
||||
}
|
||||
@ -71,8 +74,11 @@ struct ei_symm_pack_lhs
|
||||
// do the same with mr==1
|
||||
for(int i=peeled_mc; i<rows; i++)
|
||||
{
|
||||
for(int k=0; k<=i; k++)
|
||||
for(int k=0; k<i; k++)
|
||||
blockA[count++] = lhs(i, k); // normal
|
||||
|
||||
blockA[count++] = ei_real(lhs(i, i)); // real (diagonal)
|
||||
|
||||
for(int k=i+1; k<cols; k++)
|
||||
blockA[count++] = ei_conj(lhs(k, i)); // transposed
|
||||
}
|
||||
@ -129,8 +135,11 @@ struct ei_symm_pack_rhs
|
||||
// normal
|
||||
for (int w=0 ; w<h; ++w)
|
||||
blockB[count+w] = alpha*rhs(k,j2+w);
|
||||
|
||||
blockB[count+h] = alpha*rhs(k,k);
|
||||
|
||||
// transpose
|
||||
for (int w=h ; w<nr; ++w)
|
||||
for (int w=h+1 ; w<nr; ++w)
|
||||
blockB[count+w] = alpha*ei_conj(rhs(j2+w,k));
|
||||
count += nr;
|
||||
++h;
|
||||
@ -175,8 +184,15 @@ struct ei_symm_pack_rhs
|
||||
blockB[count] = alpha*ei_conj(rhs(j2,k));
|
||||
count += 1;
|
||||
}
|
||||
|
||||
if(half==j2)
|
||||
{
|
||||
blockB[count] = alpha*ei_real(rhs(j2,j2));
|
||||
count += 1;
|
||||
}
|
||||
|
||||
// normal
|
||||
for(int k=half; k<k2+rows; k++)
|
||||
for(int k=half+1; k<k2+rows; k++)
|
||||
{
|
||||
blockB[count] = alpha*rhs(k,j2);
|
||||
count += 1;
|
||||
@ -389,7 +405,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
|
||||
* RhsBlasTraits::extractScalarFactor(m_rhs);
|
||||
|
||||
ei_product_selfadjoint_matrix<Scalar,
|
||||
EIGEN_LOGICAL_XOR(LhsIsUpper,
|
||||
EIGEN_LOGICAL_XOR(LhsIsUpper,
|
||||
ei_traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
|
||||
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
|
||||
EIGEN_LOGICAL_XOR(RhsIsUpper,
|
||||
|
@ -236,4 +236,22 @@ struct ei_blas_traits<Transpose<NestedXpr> >
|
||||
static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
|
||||
};
|
||||
|
||||
template<typename T, int Access=ei_blas_traits<T>::ActualAccess>
|
||||
struct ei_extract_data_selector {
|
||||
static const typename T::Scalar* run(const T& m)
|
||||
{
|
||||
return &ei_blas_traits<T>::extract(m).const_cast_derived().coeffRef(0,0); // FIXME this should be .data()
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct ei_extract_data_selector<T,NoDirectAccess> {
|
||||
static typename T::Scalar* run(const T&) { return 0; }
|
||||
};
|
||||
|
||||
template<typename T> const typename T::Scalar* ei_extract_data(const T& m)
|
||||
{
|
||||
return ei_extract_data_selector<T>::run(m);
|
||||
}
|
||||
|
||||
#endif // EIGEN_BLASUTIL_H
|
||||
|
@ -86,11 +86,11 @@ const unsigned int EvalBeforeAssigningBit = 0x4;
|
||||
* Long version: means that the coefficients can be handled by packets
|
||||
* and start at a memory location whose alignment meets the requirements
|
||||
* of the present CPU architecture for optimized packet access. In the fixed-size
|
||||
* case, there is the additional condition that the total size of the coefficients
|
||||
* array is a multiple of the packet size, so that it is possible to access all the
|
||||
* coefficients by packets. In the dynamic-size case, there is no such condition
|
||||
* on the total size, so it might not be possible to access the few last coeffs
|
||||
* by packets.
|
||||
* case, there is the additional condition that it be possible to access all the
|
||||
* coefficients by packets (this implies the requirement that the size be a multiple of 16 bytes,
|
||||
* and that any nontrivial strides don't break the alignment). In the dynamic-size case,
|
||||
* there is no such condition on the total size and strides, so it might not be possible to access
|
||||
* all coeffs by packets.
|
||||
*
|
||||
* \note This bit can be set regardless of whether vectorization is actually enabled.
|
||||
* To check for actual vectorizability, see \a ActualPacketAccessBit.
|
||||
@ -140,7 +140,7 @@ const unsigned int LinearAccessBit = 0x10;
|
||||
* Means that the underlying array of coefficients can be directly accessed. This means two things.
|
||||
* First, references to the coefficients must be available through coeffRef(int, int). This rules out read-only
|
||||
* expressions whose coefficients are computed on demand by coeff(int, int). Second, the memory layout of the
|
||||
* array of coefficients must be exactly the natural one suggested by rows(), cols(), stride(), and the RowMajorBit.
|
||||
* array of coefficients must be exactly the natural one suggested by rows(), cols(), outerStride(), innerStride(), and the RowMajorBit.
|
||||
* This rules out expressions such as Diagonal, whose coefficients, though referencable, do not have
|
||||
* such a regular memory layout.
|
||||
*/
|
||||
|
@ -1,7 +1,8 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@ -40,8 +41,10 @@ template<typename ExpressionType> class NestByValue;
|
||||
template<typename ExpressionType> class ForceAlignedAccess;
|
||||
template<typename ExpressionType> class SwapWrapper;
|
||||
template<typename MatrixType> class Minor;
|
||||
// MSVC will not compile when the expression ei_traits<MatrixType>::Flags&DirectAccessBit
|
||||
// is put into brackets like (ei_traits<MatrixType>::Flags&DirectAccessBit)!
|
||||
template<typename MatrixType, int BlockRows=Dynamic, int BlockCols=Dynamic,
|
||||
int _DirectAccessStatus = (ei_traits<MatrixType>::Flags&DirectAccessBit) ? HasDirectAccess : NoDirectAccess> class Block;
|
||||
int _DirectAccessStatus = ei_traits<MatrixType>::Flags&DirectAccessBit ? HasDirectAccess : NoDirectAccess> class Block;
|
||||
template<typename MatrixType, int Size=Dynamic> class VectorBlock;
|
||||
template<typename MatrixType> class Transpose;
|
||||
template<typename MatrixType> class Conjugate;
|
||||
@ -60,7 +63,9 @@ template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeA
|
||||
template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;
|
||||
template<typename MatrixType, int Index> class Diagonal;
|
||||
|
||||
template<typename MatrixType, int Options=Unaligned> class Map;
|
||||
template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
|
||||
template<typename MatrixType, int Options=Unaligned, typename StrideType = Stride<0,0> > class Map;
|
||||
|
||||
template<typename Derived> class TriangularBase;
|
||||
template<typename MatrixType, unsigned int Mode> class TriangularView;
|
||||
template<typename MatrixType, unsigned int Mode> class SelfAdjointView;
|
||||
|
@ -39,7 +39,7 @@
|
||||
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable 16 byte alignment on all
|
||||
// platforms where vectorization might be enabled. In theory we could always enable alignment, but it can be a cause of problems
|
||||
// on some platforms, so we just disable it in certain common platform (compiler+architecture combinations) to avoid these problems.
|
||||
#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__))
|
||||
#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__) || defined(__ARM_NEON__))
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT 1
|
||||
#else
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT 0
|
||||
@ -78,30 +78,6 @@
|
||||
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ColMajor
|
||||
#endif
|
||||
|
||||
/** Defines the maximal loop size to enable meta unrolling of loops.
|
||||
* Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
|
||||
* it does not correspond to the number of iterations or the number of instructions
|
||||
*/
|
||||
#ifndef EIGEN_UNROLLING_LIMIT
|
||||
#define EIGEN_UNROLLING_LIMIT 100
|
||||
#endif
|
||||
|
||||
/** Defines the maximal size in Bytes of blocks fitting in CPU cache.
|
||||
* The current value is set to generate blocks of 256x256 for float
|
||||
*
|
||||
* Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes
|
||||
*/
|
||||
#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
|
||||
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256)
|
||||
#endif
|
||||
|
||||
/** Defines the maximal width of the blocks used in the triangular product and solver
|
||||
* for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
|
||||
*/
|
||||
#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
|
||||
#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
|
||||
#endif
|
||||
|
||||
/** Allows to disable some optimizations which might affect the accuracy of the result.
|
||||
* Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
|
||||
* They currently include:
|
||||
@ -211,7 +187,7 @@ using Eigen::ei_cos;
|
||||
*/
|
||||
#if !EIGEN_ALIGN
|
||||
#define EIGEN_ALIGN_TO_BOUNDARY(n)
|
||||
#elif (defined __GNUC__)
|
||||
#elif (defined __GNUC__) || (defined __PGI)
|
||||
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
|
||||
#elif (defined _MSC_VER)
|
||||
#define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
|
||||
|
@ -4,6 +4,7 @@
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
|
||||
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@ -24,26 +25,49 @@
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
*** Platform checks for aligned malloc functions ***
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef EIGEN_MEMORY_H
|
||||
#define EIGEN_MEMORY_H
|
||||
|
||||
// FreeBSD 6 seems to have 16-byte aligned malloc
|
||||
// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
|
||||
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
|
||||
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
|
||||
#if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__)
|
||||
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
|
||||
// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
|
||||
// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
|
||||
// This is true at least since glibc 2.8.
|
||||
// This leaves the question how to detect 64-bit. According to this document,
|
||||
// http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
|
||||
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
|
||||
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
|
||||
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
|
||||
&& defined(__LP64__)
|
||||
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
|
||||
#else
|
||||
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
|
||||
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) || defined(_WIN64) || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
|
||||
// FreeBSD 6 seems to have 16-byte aligned malloc
|
||||
// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
|
||||
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
|
||||
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
|
||||
#if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__)
|
||||
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
|
||||
#else
|
||||
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) \
|
||||
|| defined(_WIN64) \
|
||||
|| EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
|
||||
|| EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
|
||||
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
|
||||
#else
|
||||
#define EIGEN_MALLOC_ALREADY_ALIGNED 0
|
||||
#endif
|
||||
|
||||
#if ((defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
|
||||
#if ((defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
|
||||
&& (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
|
||||
#define EIGEN_HAS_POSIX_MEMALIGN 1
|
||||
#else
|
||||
#define EIGEN_HAS_POSIX_MEMALIGN 0
|
||||
@ -55,26 +79,90 @@
|
||||
#define EIGEN_HAS_MM_MALLOC 0
|
||||
#endif
|
||||
|
||||
/** \internal like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
|
||||
* Fast, but wastes 16 additional bytes of memory.
|
||||
* Does not throw any exception.
|
||||
/*****************************************************************************
|
||||
*** Implementation of handmade aligned functions ***
|
||||
*****************************************************************************/
|
||||
|
||||
/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
|
||||
|
||||
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
|
||||
* Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
|
||||
*/
|
||||
inline void* ei_handmade_aligned_malloc(size_t size)
|
||||
{
|
||||
void *original = std::malloc(size+16);
|
||||
if (original == 0) return 0;
|
||||
void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
|
||||
*(reinterpret_cast<void**>(aligned) - 1) = original;
|
||||
return aligned;
|
||||
}
|
||||
|
||||
/** \internal frees memory allocated with ei_handmade_aligned_malloc */
|
||||
/** \internal Frees memory allocated with ei_handmade_aligned_malloc */
|
||||
inline void ei_handmade_aligned_free(void *ptr)
|
||||
{
|
||||
if(ptr)
|
||||
std::free(*(reinterpret_cast<void**>(ptr) - 1));
|
||||
if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
|
||||
}
|
||||
|
||||
/** \internal allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
|
||||
/** \internal
|
||||
* \brief Reallocates aligned memory.
|
||||
* Since we know that our handmade version is based on std::realloc
|
||||
* we can use std::realloc to implement efficient reallocation.
|
||||
*/
|
||||
inline void* ei_handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
|
||||
{
|
||||
if (ptr == 0) return ei_handmade_aligned_malloc(size);
|
||||
void *original = *(reinterpret_cast<void**>(ptr) - 1);
|
||||
original = std::realloc(original,size+16);
|
||||
if (original == 0) return 0;
|
||||
void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
|
||||
*(reinterpret_cast<void**>(aligned) - 1) = original;
|
||||
return aligned;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*** Implementation of generic aligned realloc (when no realloc can be used)***
|
||||
*****************************************************************************/
|
||||
|
||||
void* ei_aligned_malloc(size_t size);
|
||||
void ei_aligned_free(void *ptr);
|
||||
|
||||
/** \internal
|
||||
* \brief Reallocates aligned memory.
|
||||
* Allows reallocation with aligned ptr types. This implementation will
|
||||
* always create a new memory chunk and copy the old data.
|
||||
*/
|
||||
inline void* ei_generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
|
||||
{
|
||||
if (ptr==0)
|
||||
return ei_aligned_malloc(size);
|
||||
|
||||
if (size==0)
|
||||
{
|
||||
ei_aligned_free(ptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void* newptr = ei_aligned_malloc(size);
|
||||
if (newptr == 0)
|
||||
{
|
||||
errno = ENOMEM; // according to the standard
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ptr != 0)
|
||||
{
|
||||
std::memcpy(newptr, ptr, std::min(size,old_size));
|
||||
ei_aligned_free(ptr);
|
||||
}
|
||||
|
||||
return newptr;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*** Implementation of portable aligned versions of malloc/free/realloc ***
|
||||
*****************************************************************************/
|
||||
|
||||
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
|
||||
* On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
|
||||
*/
|
||||
inline void* ei_aligned_malloc(size_t size)
|
||||
@ -85,9 +173,9 @@ inline void* ei_aligned_malloc(size_t size)
|
||||
|
||||
void *result;
|
||||
#if !EIGEN_ALIGN
|
||||
result = malloc(size);
|
||||
result = std::malloc(size);
|
||||
#elif EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
result = malloc(size);
|
||||
result = std::malloc(size);
|
||||
#elif EIGEN_HAS_POSIX_MEMALIGN
|
||||
if(posix_memalign(&result, 16, size)) result = 0;
|
||||
#elif EIGEN_HAS_MM_MALLOC
|
||||
@ -105,7 +193,67 @@ inline void* ei_aligned_malloc(size_t size)
|
||||
return result;
|
||||
}
|
||||
|
||||
/** allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
|
||||
/** \internal Frees memory allocated with ei_aligned_malloc. */
|
||||
inline void ei_aligned_free(void *ptr)
|
||||
{
|
||||
#if !EIGEN_ALIGN
|
||||
std::free(ptr);
|
||||
#elif EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
std::free(ptr);
|
||||
#elif EIGEN_HAS_POSIX_MEMALIGN
|
||||
std::free(ptr);
|
||||
#elif EIGEN_HAS_MM_MALLOC
|
||||
_mm_free(ptr);
|
||||
#elif defined(_MSC_VER)
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
ei_handmade_aligned_free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* \internal
|
||||
* \brief Reallocates an aligned block of memory.
|
||||
* \throws std::bad_alloc if EIGEN_EXCEPTIONS are defined.
|
||||
**/
|
||||
inline void* ei_aligned_realloc(void *ptr, size_t new_size, size_t old_size)
|
||||
{
|
||||
(void)old_size; // Suppress 'unused variable' warning. Seen in boost tee.
|
||||
|
||||
void *result;
|
||||
#if !EIGEN_ALIGN
|
||||
result = std::realloc(ptr,new_size);
|
||||
#elif EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
result = std::realloc(ptr,new_size);
|
||||
#elif EIGEN_HAS_POSIX_MEMALIGN
|
||||
result = ei_generic_aligned_realloc(ptr,new_size,old_size);
|
||||
#elif EIGEN_HAS_MM_MALLOC
|
||||
// The defined(_mm_free) is just here to verify that this MSVC version
|
||||
// implements _mm_malloc/_mm_free based on the corresponding _aligned_
|
||||
// functions. This may not always be the case and we just try to be safe.
|
||||
#if defined(_MSC_VER) && defined(_mm_free)
|
||||
result = _aligned_realloc(ptr,new_size,16);
|
||||
#else
|
||||
result = ei_generic_aligned_realloc(ptr,new_size,old_size);
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
result = _aligned_realloc(ptr,new_size,16);
|
||||
#else
|
||||
result = ei_handmade_aligned_realloc(ptr,new_size,old_size);
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
if (result==0 && new_size!=0)
|
||||
throw std::bad_alloc();
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*** Implementation of conditionally aligned functions ***
|
||||
*****************************************************************************/
|
||||
|
||||
/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
|
||||
* On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
|
||||
*/
|
||||
template<bool Align> inline void* ei_conditional_aligned_malloc(size_t size)
|
||||
@ -126,7 +274,32 @@ template<> inline void* ei_conditional_aligned_malloc<false>(size_t size)
|
||||
return result;
|
||||
}
|
||||
|
||||
/** \internal construct the elements of an array.
|
||||
/** \internal Frees memory allocated with ei_conditional_aligned_malloc */
|
||||
template<bool Align> inline void ei_conditional_aligned_free(void *ptr)
|
||||
{
|
||||
ei_aligned_free(ptr);
|
||||
}
|
||||
|
||||
template<> inline void ei_conditional_aligned_free<false>(void *ptr)
|
||||
{
|
||||
std::free(ptr);
|
||||
}
|
||||
|
||||
template<bool Align> inline void* ei_conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size)
|
||||
{
|
||||
return ei_aligned_realloc(ptr, new_size, old_size);
|
||||
}
|
||||
|
||||
template<> inline void* ei_conditional_aligned_realloc<false>(void* ptr, size_t new_size, size_t)
|
||||
{
|
||||
return std::realloc(ptr, new_size);
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*** Construction/destruction of array elements ***
|
||||
*****************************************************************************/
|
||||
|
||||
/** \internal Constructs the elements of an array.
|
||||
* The \a size parameter tells on how many objects to call the constructor of T.
|
||||
*/
|
||||
template<typename T> inline T* ei_construct_elements_of_array(T *ptr, size_t size)
|
||||
@ -135,7 +308,20 @@ template<typename T> inline T* ei_construct_elements_of_array(T *ptr, size_t siz
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/** allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
|
||||
/** \internal Destructs the elements of an array.
|
||||
* The \a size parameters tells on how many objects to call the destructor of T.
|
||||
*/
|
||||
template<typename T> inline void ei_destruct_elements_of_array(T *ptr, size_t size)
|
||||
{
|
||||
// always destruct an array starting from the end.
|
||||
while(size) ptr[--size].~T();
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*** Implementation of aligned new/delete-like functions ***
|
||||
*****************************************************************************/
|
||||
|
||||
/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
|
||||
* On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
|
||||
* The default constructor of T is called.
|
||||
*/
|
||||
@ -151,47 +337,7 @@ template<typename T, bool Align> inline T* ei_conditional_aligned_new(size_t siz
|
||||
return ei_construct_elements_of_array(result, size);
|
||||
}
|
||||
|
||||
/** \internal free memory allocated with ei_aligned_malloc
|
||||
*/
|
||||
inline void ei_aligned_free(void *ptr)
|
||||
{
|
||||
#if !EIGEN_ALIGN
|
||||
free(ptr);
|
||||
#elif EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
free(ptr);
|
||||
#elif EIGEN_HAS_POSIX_MEMALIGN
|
||||
free(ptr);
|
||||
#elif EIGEN_HAS_MM_MALLOC
|
||||
_mm_free(ptr);
|
||||
#elif defined(_MSC_VER)
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
ei_handmade_aligned_free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** \internal free memory allocated with ei_conditional_aligned_malloc
|
||||
*/
|
||||
template<bool Align> inline void ei_conditional_aligned_free(void *ptr)
|
||||
{
|
||||
ei_aligned_free(ptr);
|
||||
}
|
||||
|
||||
template<> inline void ei_conditional_aligned_free<false>(void *ptr)
|
||||
{
|
||||
std::free(ptr);
|
||||
}
|
||||
|
||||
/** \internal destruct the elements of an array.
|
||||
* The \a size parameters tells on how many objects to call the destructor of T.
|
||||
*/
|
||||
template<typename T> inline void ei_destruct_elements_of_array(T *ptr, size_t size)
|
||||
{
|
||||
// always destruct an array starting from the end.
|
||||
while(size) ptr[--size].~T();
|
||||
}
|
||||
|
||||
/** \internal delete objects constructed with ei_aligned_new
|
||||
/** \internal Deletes objects constructed with ei_aligned_new
|
||||
* The \a size parameters tells on how many objects to call the destructor of T.
|
||||
*/
|
||||
template<typename T> inline void ei_aligned_delete(T *ptr, size_t size)
|
||||
@ -200,7 +346,7 @@ template<typename T> inline void ei_aligned_delete(T *ptr, size_t size)
|
||||
ei_aligned_free(ptr);
|
||||
}
|
||||
|
||||
/** \internal delete objects constructed with ei_conditional_aligned_new
|
||||
/** \internal Deletes objects constructed with ei_conditional_aligned_new
|
||||
* The \a size parameters tells on how many objects to call the destructor of T.
|
||||
*/
|
||||
template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *ptr, size_t size)
|
||||
@ -209,7 +355,17 @@ template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *pt
|
||||
ei_conditional_aligned_free<Align>(ptr);
|
||||
}
|
||||
|
||||
/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
|
||||
template<typename T, bool Align> inline T* ei_conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
|
||||
{
|
||||
T *result = reinterpret_cast<T*>(ei_conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
|
||||
if (new_size > old_size)
|
||||
ei_construct_elements_of_array(result+old_size, new_size-old_size);
|
||||
return result;
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
/** \internal Returns the index of the first element of the array that is well aligned for vectorization.
|
||||
*
|
||||
* \param array the address of the start of the array
|
||||
* \param size the size of the array
|
||||
@ -236,7 +392,7 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size)
|
||||
if(PacketSize==1)
|
||||
{
|
||||
// Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
|
||||
// of the array have the same aligment.
|
||||
// of the array have the same alignment.
|
||||
return 0;
|
||||
}
|
||||
else if(size_t(array) & (sizeof(Scalar)-1))
|
||||
@ -252,19 +408,23 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size)
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*** Implementation of runtime stack allocation (falling back to malloc) ***
|
||||
*****************************************************************************/
|
||||
|
||||
/** \internal
|
||||
* ei_aligned_stack_alloc(SIZE) allocates an aligned buffer of SIZE bytes
|
||||
* on the stack if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and
|
||||
* if stack allocation is supported by the platform (currently, this is linux only).
|
||||
* Otherwise the memory is allocated on the heap.
|
||||
* Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,SIZE).
|
||||
* Allocates an aligned buffer of SIZE bytes on the stack if SIZE is smaller than
|
||||
* EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
|
||||
* (currently, this is Linux only). Otherwise the memory is allocated on the heap.
|
||||
* Data allocated with ei_aligned_stack_alloc \b must be freed by calling
|
||||
* ei_aligned_stack_free(PTR,SIZE).
|
||||
* \code
|
||||
* float * data = ei_aligned_stack_alloc(float,array.size());
|
||||
* // ...
|
||||
* ei_aligned_stack_free(data,float,array.size());
|
||||
* \endcode
|
||||
*/
|
||||
#ifdef __linux__
|
||||
#if (defined __linux__) && !(defined __ARM_NEON__)
|
||||
#define ei_aligned_stack_alloc(SIZE) (SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) \
|
||||
? alloca(SIZE) \
|
||||
: ei_aligned_malloc(SIZE)
|
||||
@ -279,6 +439,10 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size)
|
||||
ei_aligned_stack_free(PTR,sizeof(TYPE)*SIZE);} while(0)
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
|
||||
*****************************************************************************/
|
||||
|
||||
#if EIGEN_ALIGN
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
|
||||
@ -322,10 +486,11 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size)
|
||||
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0))
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
/** \class aligned_allocator
|
||||
*
|
||||
* \brief stl compatible allocator to use with with 16 byte aligned types
|
||||
* \brief STL compatible allocator to use with with 16 byte aligned types
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
|
@ -2,7 +2,7 @@
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@ -46,7 +46,7 @@
|
||||
// if native static_assert is enabled, let's use it
|
||||
#define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
|
||||
|
||||
#else // CXX0X
|
||||
#else // not CXX0X
|
||||
|
||||
template<bool condition>
|
||||
struct ei_static_assert {};
|
||||
@ -81,7 +81,8 @@
|
||||
BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
|
||||
THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
|
||||
YOU_ALREADY_SPECIFIED_THIS_STRIDE
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -50,7 +50,7 @@ template<int Value> class ei_int_if_dynamic
|
||||
{
|
||||
public:
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_int_if_dynamic)
|
||||
explicit ei_int_if_dynamic(int) {}
|
||||
explicit ei_int_if_dynamic(int v) { EIGEN_ONLY_USED_FOR_DEBUG(v); ei_assert(v == Value); }
|
||||
static int value() { return Value; }
|
||||
void setValue(int) {}
|
||||
};
|
||||
@ -58,7 +58,7 @@ template<int Value> class ei_int_if_dynamic
|
||||
template<> class ei_int_if_dynamic<Dynamic>
|
||||
{
|
||||
int m_value;
|
||||
ei_int_if_dynamic() {}
|
||||
ei_int_if_dynamic() { ei_assert(false); }
|
||||
public:
|
||||
explicit ei_int_if_dynamic(int value) : m_value(value) {}
|
||||
int value() const { return m_value; }
|
||||
@ -87,12 +87,17 @@ class ei_compute_matrix_flags
|
||||
{
|
||||
enum {
|
||||
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
|
||||
inner_max_size = MaxCols==1 ? MaxRows
|
||||
: MaxRows==1 ? MaxCols
|
||||
: row_major_bit ? MaxCols : MaxRows,
|
||||
is_big = inner_max_size == Dynamic,
|
||||
is_packet_size_multiple = MaxRows==Dynamic || MaxCols==Dynamic || ((MaxCols*MaxRows) % ei_packet_traits<Scalar>::size) == 0,
|
||||
aligned_bit = (((Options&DontAlign)==0) && (is_big || is_packet_size_multiple)) ? AlignedBit : 0,
|
||||
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
|
||||
#if !defined(__ARM_NEON__)
|
||||
is_fixed_size_aligned
|
||||
= (!is_dynamic_size_storage) && (((MaxCols*MaxRows) % ei_packet_traits<Scalar>::size) == 0),
|
||||
#else
|
||||
// FIXME!!! This is a hack because ARM gcc does not honour __attribute__((aligned(16))) properly
|
||||
is_fixed_size_aligned = 0,
|
||||
#endif
|
||||
aligned_bit = (((Options&DontAlign)==0)
|
||||
&& (is_dynamic_size_storage || is_fixed_size_aligned))
|
||||
? AlignedBit : 0,
|
||||
packet_access_bit = ei_packet_traits<Scalar>::size > 1 && aligned_bit ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
@ -214,7 +219,7 @@ struct ei_is_reference<T&>
|
||||
};
|
||||
|
||||
/**
|
||||
* The reference selector for template expressions. The idea is that we don't
|
||||
* \internal The reference selector for template expressions. The idea is that we don't
|
||||
* need to use references for expressions since they are light weight proxy
|
||||
* objects which should generate no copying overhead.
|
||||
**/
|
||||
|
@ -154,6 +154,14 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
|
||||
m_matT = hess.matrixH();
|
||||
if(!skipU) m_matU = hess.matrixQ();
|
||||
|
||||
|
||||
// Reduce the Hessenberg matrix m_matT to triangular form by QR iteration.
|
||||
|
||||
// The matrix m_matT is divided in three parts.
|
||||
// Rows 0,...,il-1 are decoupled from the rest because m_matT(il,il-1) is zero.
|
||||
// Rows il,...,iu is the part we are working on (the active submatrix).
|
||||
// Rows iu+1,...,end are already brought in triangular form.
|
||||
|
||||
int iu = m_matT.cols() - 1;
|
||||
int il;
|
||||
RealScalar d,sd,sf;
|
||||
@ -164,7 +172,7 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
|
||||
int iter = 0;
|
||||
while(true)
|
||||
{
|
||||
//locate the range in which to iterate
|
||||
// find iu, the bottom row of the active submatrix
|
||||
while(iu > 0)
|
||||
{
|
||||
d = ei_norm1(m_matT.coeff(iu,iu)) + ei_norm1(m_matT.coeff(iu-1,iu-1));
|
||||
@ -183,10 +191,11 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
|
||||
if(iter >= 30)
|
||||
{
|
||||
// FIXME : what to do when iter==MAXITER ??
|
||||
std::cerr << "MAXITER" << std::endl;
|
||||
//std::cerr << "MAXITER" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// find il, the top row of the active submatrix
|
||||
il = iu-1;
|
||||
while(il > 0)
|
||||
{
|
||||
@ -202,16 +211,19 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
|
||||
|
||||
if( il != 0 ) m_matT.coeffRef(il,il-1) = Complex(0);
|
||||
|
||||
// compute the shift (the normalization by sf is to avoid under/overflow)
|
||||
// compute the shift kappa as one of the eigenvalues of the 2x2
|
||||
// diagonal block on the bottom of the active submatrix
|
||||
|
||||
Matrix<Scalar,2,2> t = m_matT.template block<2,2>(iu-1,iu-1);
|
||||
sf = t.cwiseAbs().sum();
|
||||
t /= sf;
|
||||
t /= sf; // the normalization by sf is to avoid under/overflow
|
||||
|
||||
c = t.determinant();
|
||||
b = t.diagonal().sum();
|
||||
|
||||
disc = ei_sqrt(b*b - RealScalar(4)*c);
|
||||
b = t.coeff(0,1) * t.coeff(1,0);
|
||||
c = t.coeff(0,0) - t.coeff(1,1);
|
||||
disc = ei_sqrt(c*c + RealScalar(4)*b);
|
||||
|
||||
c = t.coeff(0,0) * t.coeff(1,1) - b;
|
||||
b = t.coeff(0,0) + t.coeff(1,1);
|
||||
r1 = (b+disc)/RealScalar(2);
|
||||
r2 = (b-disc)/RealScalar(2);
|
||||
|
||||
@ -224,6 +236,12 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
|
||||
kappa = sf * r1;
|
||||
else
|
||||
kappa = sf * r2;
|
||||
|
||||
if (iter == 10 || iter == 20)
|
||||
{
|
||||
// exceptional shift, taken from http://www.netlib.org/eispack/comqr.f
|
||||
kappa = ei_abs(ei_real(m_matT.coeff(iu,iu-1))) + ei_abs(ei_real(m_matT.coeff(iu-1,iu-2)));
|
||||
}
|
||||
|
||||
// perform the QR step using Givens rotations
|
||||
PlanarRotation<Complex> rot;
|
||||
@ -246,18 +264,6 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME : is it necessary ?
|
||||
/*
|
||||
for(int i=0 ; i<n ; i++)
|
||||
for(int j=0 ; j<n ; j++)
|
||||
{
|
||||
if(ei_abs(ei_real(m_matT.coeff(i,j))) < eps)
|
||||
ei_real_ref(m_matT.coeffRef(i,j)) = 0;
|
||||
if(ei_imag(ei_abs(m_matT.coeff(i,j))) < eps)
|
||||
ei_imag_ref(m_matT.coeffRef(i,j)) = 0;
|
||||
}
|
||||
*/
|
||||
|
||||
m_isInitialized = true;
|
||||
m_matUisUptodate = !skipU;
|
||||
}
|
||||
|
@ -72,8 +72,8 @@ public:
|
||||
inline Transform<Scalar,Dim> operator* (const Translation<Scalar,Dim>& t) const;
|
||||
|
||||
/** Concatenates a uniform scaling and an affine transformation */
|
||||
template<int Dim>
|
||||
inline Transform<Scalar,Dim> operator* (const Transform<Scalar,Dim>& t) const;
|
||||
template<int Dim, int Mode>
|
||||
inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim, Mode>& t) const;
|
||||
|
||||
/** Concatenates a uniform scaling and a linear transformation matrix */
|
||||
// TODO returns an expression
|
||||
@ -156,4 +156,27 @@ typedef DiagonalMatrix<float, 3> AlignedScaling3f;
|
||||
typedef DiagonalMatrix<double,3> AlignedScaling3d;
|
||||
//@}
|
||||
|
||||
template<typename Scalar>
|
||||
template<int Dim>
|
||||
inline Transform<Scalar,Dim>
|
||||
UniformScaling<Scalar>::operator* (const Translation<Scalar,Dim>& t) const
|
||||
{
|
||||
Transform<Scalar,Dim> res;
|
||||
res.matrix().setZero();
|
||||
res.linear().diagonal().fill(factor());
|
||||
res.translation() = factor() * t.vector();
|
||||
res(Dim,Dim) = Scalar(1);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
template<int Dim,int Mode>
|
||||
inline Transform<Scalar,Dim,Mode>
|
||||
UniformScaling<Scalar>::operator* (const Transform<Scalar,Dim, Mode>& t) const
|
||||
{
|
||||
Transform<Scalar,Dim> res = t;
|
||||
res.prescale(factor());
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif // EIGEN_SCALING_H
|
||||
|
@ -69,7 +69,7 @@ template<typename Derived> struct ei_determinant_impl<Derived, 2>
|
||||
|
||||
template<typename Derived> struct ei_determinant_impl<Derived, 3>
|
||||
{
|
||||
static typename ei_traits<Derived>::Scalar run(const Derived& m)
|
||||
static inline typename ei_traits<Derived>::Scalar run(const Derived& m)
|
||||
{
|
||||
return ei_bruteforce_det3_helper(m,0,1,2)
|
||||
- ei_bruteforce_det3_helper(m,1,0,2)
|
||||
@ -100,8 +100,7 @@ inline typename ei_traits<Derived>::Scalar MatrixBase<Derived>::determinant() co
|
||||
{
|
||||
assert(rows() == cols());
|
||||
typedef typename ei_nested<Derived,Base::RowsAtCompileTime>::type Nested;
|
||||
Nested nested(derived());
|
||||
return ei_determinant_impl<typename ei_cleantype<Nested>::type>::run(nested);
|
||||
return ei_determinant_impl<typename ei_cleantype<Nested>::type>::run(derived());
|
||||
}
|
||||
|
||||
#endif // EIGEN_DETERMINANT_H
|
||||
|
@ -119,7 +119,7 @@ template<typename _MatrixType> class FullPivLU
|
||||
* diagonal coefficient of U.
|
||||
*/
|
||||
RealScalar maxPivot() const { return m_maxpivot; }
|
||||
|
||||
|
||||
/** \returns the permutation matrix P
|
||||
*
|
||||
* \sa permutationQ()
|
||||
@ -361,6 +361,8 @@ template<typename _MatrixType> class FullPivLU
|
||||
(*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
|
||||
}
|
||||
|
||||
MatrixType reconstructedMatrix() const;
|
||||
|
||||
inline int rows() const { return m_lu.rows(); }
|
||||
inline int cols() const { return m_lu.cols(); }
|
||||
|
||||
@ -404,6 +406,7 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
|
||||
|
||||
m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
|
||||
m_maxpivot = RealScalar(0);
|
||||
RealScalar cutoff(0);
|
||||
|
||||
for(int k = 0; k < size; ++k)
|
||||
{
|
||||
@ -418,8 +421,14 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
|
||||
row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
|
||||
col_of_biggest_in_corner += k; // need to add k to them.
|
||||
|
||||
// if the pivot (hence the corner) is exactly zero, terminate to avoid generating nan/inf values
|
||||
if(biggest_in_corner == RealScalar(0))
|
||||
// when k==0, biggest_in_corner is the biggest coeff absolute value in the original matrix
|
||||
if(k == 0) cutoff = biggest_in_corner * NumTraits<Scalar>::epsilon();
|
||||
|
||||
// if the pivot (hence the corner) is "zero", terminate to avoid generating nan/inf values.
|
||||
// Notice that using an exact comparison (biggest_in_corner==0) here, as Golub-van Loan do in
|
||||
// their pseudo-code, results in numerical instability! The cutoff here has been validated
|
||||
// by running the unit test 'lu' with many repetitions.
|
||||
if(biggest_in_corner < cutoff)
|
||||
{
|
||||
// before exiting, make sure to initialize the still uninitialized transpositions
|
||||
// in a sane state without destroying what we already have.
|
||||
@ -480,6 +489,31 @@ typename ei_traits<MatrixType>::Scalar FullPivLU<MatrixType>::determinant() cons
|
||||
return Scalar(m_det_pq) * Scalar(m_lu.diagonal().prod());
|
||||
}
|
||||
|
||||
/** \returns the matrix represented by the decomposition,
|
||||
* i.e., it returns the product: P^{-1} L U Q^{-1}.
|
||||
* This function is provided for debug purpose. */
|
||||
template<typename MatrixType>
|
||||
MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const
|
||||
{
|
||||
ei_assert(m_isInitialized && "LU is not initialized.");
|
||||
const int smalldim = std::min(m_lu.rows(), m_lu.cols());
|
||||
// LU
|
||||
MatrixType res(m_lu.rows(),m_lu.cols());
|
||||
// FIXME the .toDenseMatrix() should not be needed...
|
||||
res = m_lu.corner(TopLeft,m_lu.rows(),smalldim)
|
||||
.template triangularView<UnitLower>().toDenseMatrix()
|
||||
* m_lu.corner(TopLeft,smalldim,m_lu.cols())
|
||||
.template triangularView<Upper>().toDenseMatrix();
|
||||
|
||||
// P^{-1}(LU)
|
||||
res = m_p.inverse() * res;
|
||||
|
||||
// (P^{-1}LU)Q^{-1}
|
||||
res = res * m_q.inverse();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/********* Implementation of kernel() **************************************************/
|
||||
|
||||
template<typename _MatrixType>
|
||||
|
@ -123,7 +123,7 @@ struct ei_compute_inverse_and_det_with_check<MatrixType, ResultType, 2>
|
||||
****************************/
|
||||
|
||||
template<typename MatrixType, typename ResultType>
|
||||
void ei_compute_inverse_size3_helper(
|
||||
inline void ei_compute_inverse_size3_helper(
|
||||
const MatrixType& matrix,
|
||||
const typename ResultType::Scalar& invdet,
|
||||
const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0,
|
||||
|
@ -165,6 +165,8 @@ template<typename _MatrixType> class PartialPivLU
|
||||
*/
|
||||
typename ei_traits<MatrixType>::Scalar determinant() const;
|
||||
|
||||
MatrixType reconstructedMatrix() const;
|
||||
|
||||
inline int rows() const { return m_lu.rows(); }
|
||||
inline int cols() const { return m_lu.cols(); }
|
||||
|
||||
@ -194,7 +196,7 @@ PartialPivLU<MatrixType>::PartialPivLU(const MatrixType& matrix)
|
||||
compute(matrix);
|
||||
}
|
||||
|
||||
/** This is the blocked version of ei_fullpivlu_unblocked() */
|
||||
/** \internal This is the blocked version of ei_fullpivlu_unblocked() */
|
||||
template<typename Scalar, int StorageOrder>
|
||||
struct ei_partial_lu_impl
|
||||
{
|
||||
@ -368,7 +370,7 @@ void ei_partial_lu_inplace(MatrixType& lu, IntVector& row_transpositions, int& n
|
||||
|
||||
ei_partial_lu_impl
|
||||
<typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor>
|
||||
::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.stride(), &row_transpositions.coeffRef(0), nb_transpositions);
|
||||
::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
|
||||
}
|
||||
|
||||
template<typename MatrixType>
|
||||
@ -400,6 +402,23 @@ typename ei_traits<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() c
|
||||
return Scalar(m_det_p) * m_lu.diagonal().prod();
|
||||
}
|
||||
|
||||
/** \returns the matrix represented by the decomposition,
|
||||
* i.e., it returns the product: P^{-1} L U.
|
||||
* This function is provided for debug purpose. */
|
||||
template<typename MatrixType>
|
||||
MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
|
||||
{
|
||||
ei_assert(m_isInitialized && "LU is not initialized.");
|
||||
// LU
|
||||
MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix()
|
||||
* m_lu.template triangularView<Upper>();
|
||||
|
||||
// P^{-1}(LU)
|
||||
res = m_p.inverse() * res;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/***** Implementation of solve() *****************************************************/
|
||||
|
||||
template<typename _MatrixType, typename Rhs>
|
||||
|
@ -99,7 +99,7 @@ cholmod_dense ei_cholmod_map_eigen_to_dense(MatrixBase<Derived>& mat)
|
||||
res.nrow = mat.rows();
|
||||
res.ncol = mat.cols();
|
||||
res.nzmax = res.nrow * res.ncol;
|
||||
res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().stride();
|
||||
res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride();
|
||||
res.x = mat.derived().data();
|
||||
res.z = 0;
|
||||
|
||||
@ -233,7 +233,7 @@ bool SparseLLT<MatrixType,Cholmod>::solveInPlace(MatrixBase<Derived> &b) const
|
||||
cholmod_dense* x = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &cdb, &m_cholmod);
|
||||
if(!x)
|
||||
{
|
||||
std::cerr << "Eigen: cholmod_solve failed\n";
|
||||
//std::cerr << "Eigen: cholmod_solve failed\n";
|
||||
return false;
|
||||
}
|
||||
b = Matrix<typename Base::Scalar,Dynamic,1>::Map(reinterpret_cast<typename Base::Scalar*>(x->x),b.rows());
|
||||
|
@ -236,7 +236,7 @@ class DynamicSparseMatrix
|
||||
{
|
||||
// remove all coefficients with innerCoord>=innerSize
|
||||
// TODO
|
||||
std::cerr << "not implemented yet\n";
|
||||
//std::cerr << "not implemented yet\n";
|
||||
exit(2);
|
||||
}
|
||||
if (m_data.size() != outerSize)
|
||||
|
@ -161,7 +161,7 @@ struct SluMatrix : SuperMatrix
|
||||
res.nrow = mat.rows();
|
||||
res.ncol = mat.cols();
|
||||
|
||||
res.storage.lda = MatrixType::IsVectorAtCompileTime ? mat.size() : mat.stride();
|
||||
res.storage.lda = MatrixType::IsVectorAtCompileTime ? mat.size() : mat.outerStride();
|
||||
res.storage.values = mat.data();
|
||||
return res;
|
||||
}
|
||||
@ -217,7 +217,7 @@ struct SluMatrixMapHelper<Matrix<Scalar,Rows,Cols,Options,MRows,MCols> >
|
||||
res.nrow = mat.rows();
|
||||
res.ncol = mat.cols();
|
||||
|
||||
res.storage.lda = mat.stride();
|
||||
res.storage.lda = mat.outerStride();
|
||||
res.storage.values = mat.data();
|
||||
}
|
||||
};
|
||||
@ -397,7 +397,7 @@ void SparseLU<MatrixType,SuperLU>::compute(const MatrixType& a)
|
||||
case MinimumDegree_ATA : m_sluOptions.ColPerm = MMD_ATA; break;
|
||||
case ColApproxMinimumDegree : m_sluOptions.ColPerm = COLAMD; break;
|
||||
default:
|
||||
std::cerr << "Eigen: ordering method \"" << Base::orderingMethod() << "\" not supported by the SuperLU backend\n";
|
||||
//std::cerr << "Eigen: ordering method \"" << Base::orderingMethod() << "\" not supported by the SuperLU backend\n";
|
||||
m_sluOptions.ColPerm = NATURAL;
|
||||
};
|
||||
|
||||
@ -448,7 +448,7 @@ void SparseLU<MatrixType,SuperLU>::compute(const MatrixType& a)
|
||||
&recip_pivot_gross, &rcond,
|
||||
&m_sluStat, &info, Scalar());
|
||||
#else
|
||||
std::cerr << "Incomplete factorization is only available in SuperLU v4\n";
|
||||
//std::cerr << "Incomplete factorization is only available in SuperLU v4\n";
|
||||
Base::m_succeeded = false;
|
||||
return;
|
||||
#endif
|
||||
@ -486,7 +486,7 @@ bool SparseLU<MatrixType,SuperLU>::solve(const MatrixBase<BDerived> &b,
|
||||
case SvTranspose : m_sluOptions.Trans = TRANS; break;
|
||||
case SvAdjoint : m_sluOptions.Trans = CONJ; break;
|
||||
default:
|
||||
std::cerr << "Eigen: transposition option \"" << transposed << "\" not supported by the SuperLU backend\n";
|
||||
//std::cerr << "Eigen: transposition option \"" << transposed << "\" not supported by the SuperLU backend\n";
|
||||
m_sluOptions.Trans = NOTRANS;
|
||||
}
|
||||
|
||||
@ -513,7 +513,7 @@ bool SparseLU<MatrixType,SuperLU>::solve(const MatrixBase<BDerived> &b,
|
||||
&recip_pivot_gross, &rcond,
|
||||
&m_sluStat, &info, Scalar());
|
||||
#else
|
||||
std::cerr << "Incomplete factorization is only available in SuperLU v4\n";
|
||||
//std::cerr << "Incomplete factorization is only available in SuperLU v4\n";
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
@ -27,18 +27,20 @@
|
||||
#define EIGEN_BENCH_TIMERR_H
|
||||
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#define NOMINMAX
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
# ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
# define EIGEN_BT_UNDEF_NOMINMAX
|
||||
# endif
|
||||
# ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
|
||||
# endif
|
||||
# include <windows.h>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <numeric>
|
||||
#include <Eigen/Core>
|
||||
|
||||
namespace Eigen
|
||||
{
|
||||
@ -126,14 +128,13 @@ public:
|
||||
inline double getRealTime()
|
||||
{
|
||||
#ifdef WIN32
|
||||
SYSTEMTIME st;
|
||||
GetSystemTime(&st);
|
||||
return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds;
|
||||
SYSTEMTIME st;
|
||||
GetSystemTime(&st);
|
||||
return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds;
|
||||
#else
|
||||
struct timeval tv;
|
||||
struct timezone tz;
|
||||
gettimeofday(&tv, &tz);
|
||||
return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
|
||||
timespec ts;
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -161,4 +162,15 @@ protected:
|
||||
|
||||
}
|
||||
|
||||
// clean #defined tokens
|
||||
#ifdef EIGEN_BT_UNDEF_NOMINMAX
|
||||
# undef EIGEN_BT_UNDEF_NOMINMAX
|
||||
# undef NOMINMAX
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
|
||||
# undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
|
||||
# undef WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_BENCH_TIMERR_H
|
||||
|
@ -2,10 +2,11 @@
|
||||
|
||||
# gcc : CXX="g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000"
|
||||
# icc : CXX="icpc -fast -no-inline-max-size -fno-exceptions"
|
||||
CXX=${CXX-g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000} # default value
|
||||
|
||||
for ((i=1; i<16; ++i)); do
|
||||
echo "Matrix size: $i x $i :"
|
||||
$CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
|
||||
$CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
|
||||
$CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null
|
||||
echo " "
|
||||
done
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/bin/bash
|
||||
CXX=${CXX-g++} # default value unless caller has defined CXX
|
||||
echo "Fixed size 3x3, column-major, -DNDEBUG"
|
||||
$CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null
|
||||
echo "Fixed size 3x3, column-major, with asserts"
|
||||
|
@ -2,9 +2,10 @@ project(EigenBlas)
|
||||
|
||||
add_custom_target(blas)
|
||||
|
||||
set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp)
|
||||
set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp xerbla.cpp)
|
||||
|
||||
add_library(eigen_blas SHARED ${EigenBlas_SRCS})
|
||||
add_library(eigen_blas ${EigenBlas_SRCS})
|
||||
# add_library(eigen_blas SHARED ${EigenBlas_SRCS})
|
||||
add_dependencies(blas eigen_blas)
|
||||
|
||||
install(TARGETS eigen_blas
|
||||
|
@ -25,6 +25,8 @@
|
||||
#ifndef EIGEN_BLAS_COMMON_H
|
||||
#define EIGEN_BLAS_COMMON_H
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#ifndef SCALAR
|
||||
#error the token SCALAR must be defined to compile this file
|
||||
#endif
|
||||
@ -34,13 +36,12 @@ extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#include <blas.h>
|
||||
#include "../bench/btl/libs/C_BLAS/blas.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#define NOTR 0
|
||||
#define TR 1
|
||||
#define ADJ 2
|
||||
@ -75,27 +76,6 @@ extern "C"
|
||||
#include <Eigen/Jacobi>
|
||||
using namespace Eigen;
|
||||
|
||||
template<typename T>
|
||||
Block<Map<Matrix<T,Dynamic,Dynamic> >, Dynamic, Dynamic>
|
||||
matrix(T* data, int rows, int cols, int stride)
|
||||
{
|
||||
return Map<Matrix<T,Dynamic,Dynamic> >(data, stride, cols).block(0,0,rows,cols);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Block<Map<Matrix<T,Dynamic,Dynamic,RowMajor> >, Dynamic, 1>
|
||||
vector(T* data, int size, int incr)
|
||||
{
|
||||
return Map<Matrix<T,Dynamic,Dynamic,RowMajor> >(data, size, incr).col(0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Map<Matrix<T,Dynamic,1> >
|
||||
vector(T* data, int size)
|
||||
{
|
||||
return Map<Matrix<T,Dynamic,1> >(data, size);
|
||||
}
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
typedef NumTraits<Scalar>::Real RealScalar;
|
||||
typedef std::complex<RealScalar> Complex;
|
||||
@ -106,10 +86,29 @@ enum
|
||||
Conj = IsComplex
|
||||
};
|
||||
|
||||
typedef Block<Map<Matrix<Scalar,Dynamic,Dynamic> >, Dynamic, Dynamic> MatrixType;
|
||||
typedef Block<Map<Matrix<Scalar,Dynamic,Dynamic, RowMajor> >, Dynamic, 1> StridedVectorType;
|
||||
typedef Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<Dynamic> > MatrixType;
|
||||
typedef Map<Matrix<Scalar,Dynamic,1>, 0, InnerStride<Dynamic> > StridedVectorType;
|
||||
typedef Map<Matrix<Scalar,Dynamic,1> > CompactVectorType;
|
||||
|
||||
template<typename T>
|
||||
Map<Matrix<T,Dynamic,Dynamic>, 0, OuterStride<Dynamic> >
|
||||
matrix(T* data, int rows, int cols, int stride)
|
||||
{
|
||||
return Map<Matrix<T,Dynamic,Dynamic>, 0, OuterStride<Dynamic> >(data, rows, cols, OuterStride<Dynamic>(stride));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Map<Matrix<T,Dynamic,1>, 0, InnerStride<Dynamic> > vector(T* data, int size, int incr)
|
||||
{
|
||||
return Map<Matrix<T,Dynamic,1>, 0, InnerStride<Dynamic> >(data, size, InnerStride<Dynamic>(incr));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Map<Matrix<T,Dynamic,1> > vector(T* data, int size)
|
||||
{
|
||||
return Map<Matrix<T,Dynamic,1> >(data, size);
|
||||
}
|
||||
|
||||
#define EIGEN_BLAS_FUNC(X) EIGEN_CAT(SCALAR_SUFFIX,X##_)
|
||||
|
||||
#endif // EIGEN_BLAS_COMMON_H
|
||||
|
@ -23,7 +23,8 @@
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#define SCALAR std::complex<double>
|
||||
#define SCALAR_SUFFIX c
|
||||
#define SCALAR_SUFFIX z
|
||||
#define REAL_SCALAR_SUFFIX d
|
||||
#define ISCOMPLEX 1
|
||||
|
||||
#include "level1_impl.h"
|
||||
|
@ -23,7 +23,8 @@
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#define SCALAR std::complex<float>
|
||||
#define SCALAR_SUFFIX z
|
||||
#define SCALAR_SUFFIX c
|
||||
#define REAL_SCALAR_SUFFIX s
|
||||
#define ISCOMPLEX 1
|
||||
|
||||
#include "level1_impl.h"
|
||||
|
@ -30,52 +30,111 @@ int EIGEN_BLAS_FUNC(axpy)(int *n, RealScalar *palpha, RealScalar *px, int *incx,
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
|
||||
if(*incx==1 && *incy==1)
|
||||
vector(y,*n) += alpha * vector(x,*n);
|
||||
else
|
||||
vector(y,*n,*incy) += alpha * vector(x,*n,*incx);
|
||||
// std::cerr << "axpy " << *n << " " << alpha << " " << *incx << " " << *incy << "\n";
|
||||
|
||||
return 1;
|
||||
if(*incx==1 && *incy==1) vector(y,*n) += alpha * vector(x,*n);
|
||||
else if(*incx>0 && *incy>0) vector(y,*n,*incy) += alpha * vector(x,*n,*incx);
|
||||
else if(*incx>0 && *incy<0) vector(y,*n,-*incy).reverse() += alpha * vector(x,*n,*incx);
|
||||
else if(*incx<0 && *incy>0) vector(y,*n,*incy) += alpha * vector(x,*n,-*incx).reverse();
|
||||
else if(*incx<0 && *incy<0) vector(y,*n,-*incy).reverse() += alpha * vector(x,*n,-*incx).reverse();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if !ISCOMPLEX
|
||||
// computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum
|
||||
// res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n
|
||||
RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx)
|
||||
{
|
||||
int size = IsComplex ? 2* *n : *n;
|
||||
// std::cerr << "_asum " << *n << " " << *incx << "\n";
|
||||
|
||||
if(*incx==1)
|
||||
return vector(px,size).cwise().abs().sum();
|
||||
else
|
||||
return vector(px,size,*incx).cwise().abs().sum();
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
|
||||
return 1;
|
||||
if(*n<=0) return 0;
|
||||
|
||||
if(*incx==1) return vector(x,*n).cwiseAbs().sum();
|
||||
else return vector(x,*n,std::abs(*incx)).cwiseAbs().sum();
|
||||
}
|
||||
#else
|
||||
|
||||
struct ei_scalar_norm1_op {
|
||||
typedef RealScalar result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_norm1_op)
|
||||
inline RealScalar operator() (const Scalar& a) const { return ei_norm1(a); }
|
||||
};
|
||||
namespace Eigen {
|
||||
template<> struct ei_functor_traits<ei_scalar_norm1_op >
|
||||
{
|
||||
enum { Cost = 3 * NumTraits<Scalar>::AddCost, PacketAccess = 0 };
|
||||
};
|
||||
}
|
||||
|
||||
RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),asum_)(int *n, RealScalar *px, int *incx)
|
||||
{
|
||||
// std::cerr << "__asum " << *n << " " << *incx << "\n";
|
||||
|
||||
Complex* x = reinterpret_cast<Complex*>(px);
|
||||
|
||||
if(*n<=0) return 0;
|
||||
|
||||
if(*incx==1) return vector(x,*n).unaryExpr<ei_scalar_norm1_op>().sum();
|
||||
else return vector(x,*n,std::abs(*incx)).unaryExpr<ei_scalar_norm1_op>().sum();
|
||||
}
|
||||
#endif
|
||||
|
||||
int EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
|
||||
{
|
||||
int size = IsComplex ? 2* *n : *n;
|
||||
// std::cerr << "_copy " << *n << " " << *incx << " " << *incy << "\n";
|
||||
|
||||
if(*incx==1 && *incy==1)
|
||||
vector(py,size) = vector(px,size);
|
||||
else
|
||||
vector(py,size,*incy) = vector(px,size,*incx);
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
|
||||
return 1;
|
||||
if(*incx==1 && *incy==1) vector(y,*n) = vector(x,*n);
|
||||
else if(*incx>0 && *incy>0) vector(y,*n,*incy) = vector(x,*n,*incx);
|
||||
else if(*incx>0 && *incy<0) vector(y,*n,-*incy).reverse() = vector(x,*n,*incx);
|
||||
else if(*incx<0 && *incy>0) vector(y,*n,*incy) = vector(x,*n,-*incx).reverse();
|
||||
else if(*incx<0 && *incy<0) vector(y,*n,-*incy).reverse() = vector(x,*n,-*incx).reverse();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// computes a vector-vector dot product.
|
||||
Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
|
||||
{
|
||||
// std::cerr << "_dot " << *n << " " << *incx << " " << *incy << "\n";
|
||||
|
||||
if(*n<=0)
|
||||
return 0;
|
||||
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
|
||||
if(*incx==1 && *incy==1)
|
||||
return (vector(x,*n).cwise()*vector(y,*n)).sum();
|
||||
|
||||
return (vector(x,*n,*incx).cwise()*vector(y,*n,*incy)).sum();
|
||||
if(*incx==1 && *incy==1) return (vector(x,*n).cwiseProduct(vector(y,*n))).sum();
|
||||
else if(*incx>0 && *incy>0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum();
|
||||
else if(*incx<0 && *incy>0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum();
|
||||
else if(*incx>0 && *incy<0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum();
|
||||
else if(*incx<0 && *incy<0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum();
|
||||
else return 0;
|
||||
}
|
||||
|
||||
int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amax_)(int *n, RealScalar *px, int *incx)
|
||||
{
|
||||
// std::cerr << "i_amax " << *n << " " << *incx << "\n";
|
||||
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
|
||||
if(*n<=0)
|
||||
return 0;
|
||||
|
||||
int ret;
|
||||
|
||||
if(*incx==1) vector(x,*n).cwiseAbs().maxCoeff(&ret);
|
||||
else vector(x,*n,std::abs(*incx)).cwiseAbs().maxCoeff(&ret);
|
||||
|
||||
return ret+1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
||||
// computes a vector-vector dot product with extended precision.
|
||||
@ -96,53 +155,100 @@ Scalar EIGEN_BLAS_FUNC(sdot)(int *n, RealScalar *px, int *incx, RealScalar *py,
|
||||
#if ISCOMPLEX
|
||||
|
||||
// computes a dot product of a conjugated vector with another vector.
|
||||
Scalar EIGEN_BLAS_FUNC(dotc)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
|
||||
void EIGEN_BLAS_FUNC(dotc)(RealScalar* dot, int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
|
||||
{
|
||||
|
||||
std::cerr << "Eigen BLAS: _dotc is not implemented yet\n";
|
||||
|
||||
return;
|
||||
|
||||
// TODO: find how to return a complex to fortran
|
||||
|
||||
// std::cerr << "_dotc " << *n << " " << *incx << " " << *incy << "\n";
|
||||
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
|
||||
if(*incx==1 && *incy==1)
|
||||
return vector(x,*n).dot(vector(y,*n));
|
||||
|
||||
return vector(x,*n,*incx).dot(vector(y,*n,*incy));
|
||||
*reinterpret_cast<Scalar*>(dot) = vector(x,*n).dot(vector(y,*n));
|
||||
else
|
||||
*reinterpret_cast<Scalar*>(dot) = vector(x,*n,*incx).dot(vector(y,*n,*incy));
|
||||
}
|
||||
|
||||
// computes a vector-vector dot product without complex conjugation.
|
||||
Scalar EIGEN_BLAS_FUNC(dotu)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
|
||||
void EIGEN_BLAS_FUNC(dotu)(RealScalar* dot, int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
|
||||
{
|
||||
std::cerr << "Eigen BLAS: _dotu is not implemented yet\n";
|
||||
|
||||
return;
|
||||
|
||||
// TODO: find how to return a complex to fortran
|
||||
|
||||
// std::cerr << "_dotu " << *n << " " << *incx << " " << *incy << "\n";
|
||||
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
|
||||
if(*incx==1 && *incy==1)
|
||||
return (vector(x,*n).cwise()*vector(y,*n)).sum();
|
||||
|
||||
return (vector(x,*n,*incx).cwise()*vector(y,*n,*incy)).sum();
|
||||
*reinterpret_cast<Scalar*>(dot) = (vector(x,*n).cwiseProduct(vector(y,*n))).sum();
|
||||
else
|
||||
*reinterpret_cast<Scalar*>(dot) = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum();
|
||||
}
|
||||
|
||||
#endif // ISCOMPLEX
|
||||
|
||||
#if !ISCOMPLEX
|
||||
// computes the Euclidean norm of a vector.
|
||||
Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx)
|
||||
{
|
||||
// std::cerr << "_nrm2 " << *n << " " << *incx << "\n";
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
|
||||
if(*n<=0)
|
||||
return 0;
|
||||
|
||||
if(*incx==1) return vector(x,*n).norm();
|
||||
else return vector(x,*n,std::abs(*incx)).norm();
|
||||
}
|
||||
#else
|
||||
RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),nrm2_)(int *n, RealScalar *px, int *incx)
|
||||
{
|
||||
// std::cerr << "__nrm2 " << *n << " " << *incx << "\n";
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
|
||||
if(*n<=0)
|
||||
return 0;
|
||||
|
||||
if(*incx==1)
|
||||
return vector(x,*n).norm();
|
||||
|
||||
return vector(x,*n,*incx).norm();
|
||||
}
|
||||
#endif
|
||||
|
||||
int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps)
|
||||
{
|
||||
// std::cerr << "_rot " << *n << " " << *incx << " " << *incy << "\n";
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
Scalar c = *reinterpret_cast<Scalar*>(pc);
|
||||
Scalar s = *reinterpret_cast<Scalar*>(ps);
|
||||
|
||||
StridedVectorType vx(vector(x,*n,*incx));
|
||||
StridedVectorType vy(vector(y,*n,*incy));
|
||||
ei_apply_rotation_in_the_plane(vx, vy, PlanarRotation<Scalar>(c,s));
|
||||
return 1;
|
||||
if(*n<=0)
|
||||
return 0;
|
||||
|
||||
StridedVectorType vx(vector(x,*n,std::abs(*incx)));
|
||||
StridedVectorType vy(vector(y,*n,std::abs(*incy)));
|
||||
|
||||
Reverse<StridedVectorType> rvx(vx);
|
||||
Reverse<StridedVectorType> rvy(vy);
|
||||
|
||||
if(*incx<0 && *incy>0) ei_apply_rotation_in_the_plane(rvx, vy, PlanarRotation<Scalar>(c,s));
|
||||
else if(*incx>0 && *incy<0) ei_apply_rotation_in_the_plane(vx, rvy, PlanarRotation<Scalar>(c,s));
|
||||
else ei_apply_rotation_in_the_plane(vx, vy, PlanarRotation<Scalar>(c,s));
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps)
|
||||
@ -157,7 +263,7 @@ int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealSc
|
||||
*c = r.c();
|
||||
*s = r.s();
|
||||
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if !ISCOMPLEX
|
||||
@ -183,43 +289,56 @@ int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealS
|
||||
*/
|
||||
#endif // !ISCOMPLEX
|
||||
|
||||
int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *px, int *incx, RealScalar *palpha)
|
||||
int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx)
|
||||
{
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
|
||||
if(*incx==1)
|
||||
vector(x,*n) *= alpha;
|
||||
// std::cerr << "_scal " << *n << " " << alpha << " " << *incx << "\n";
|
||||
|
||||
vector(x,*n,*incx) *= alpha;
|
||||
if(*n<=0)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
if(*incx==1) vector(x,*n) *= alpha;
|
||||
else vector(x,*n,std::abs(*incx)) *= alpha;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if ISCOMPLEX
|
||||
int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),scal_)(int *n, RealScalar *palpha, RealScalar *px, int *incx)
|
||||
{
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
RealScalar alpha = *palpha;
|
||||
|
||||
// std::cerr << "__scal " << *n << " " << alpha << " " << *incx << "\n";
|
||||
|
||||
if(*n<=0)
|
||||
return 0;
|
||||
|
||||
if(*incx==1) vector(x,*n) *= alpha;
|
||||
else vector(x,*n,std::abs(*incx)) *= alpha;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif // ISCOMPLEX
|
||||
|
||||
int EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
|
||||
{
|
||||
int size = IsComplex ? 2* *n : *n;
|
||||
// std::cerr << "_swap " << *n << " " << *incx << " " << *incy << "\n";
|
||||
|
||||
if(*incx==1 && *incy==1)
|
||||
vector(py,size).swap(vector(px,size));
|
||||
else
|
||||
vector(py,size,*incy).swap(vector(px,size,*incx));
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
|
||||
if(*n<=0)
|
||||
return 0;
|
||||
|
||||
if(*incx==1 && *incy==1) vector(y,*n).swap(vector(x,*n));
|
||||
else if(*incx>0 && *incy>0) vector(y,*n,*incy).swap(vector(x,*n,*incx));
|
||||
else if(*incx>0 && *incy<0) vector(y,*n,-*incy).reverse().swap(vector(x,*n,*incx));
|
||||
else if(*incx<0 && *incy>0) vector(y,*n,*incy).swap(vector(x,*n,-*incx).reverse());
|
||||
else if(*incx<0 && *incy<0) vector(y,*n,-*incy).reverse().swap(vector(x,*n,-*incx).reverse());
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if !ISCOMPLEX
|
||||
|
||||
RealScalar EIGEN_BLAS_FUNC(casum)(int *n, RealScalar *px, int *incx)
|
||||
{
|
||||
Complex* x = reinterpret_cast<Complex*>(px);
|
||||
|
||||
if(*incx==1)
|
||||
return vector(x,*n).cwise().abs().sum();
|
||||
else
|
||||
return vector(x,*n,*incx).cwise().abs().sum();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // ISCOMPLEX
|
||||
|
@ -56,9 +56,11 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
|
||||
{
|
||||
return 0;
|
||||
|
||||
typedef void (*functype)(int, const Scalar *, int, Scalar *, int);
|
||||
functype func[16];
|
||||
|
||||
@ -95,13 +97,14 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar
|
||||
return 0;
|
||||
|
||||
func[code](*n, a, *lda, b, *incb);
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
|
||||
int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
|
||||
{
|
||||
return 0;
|
||||
// TODO
|
||||
|
||||
typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int);
|
||||
@ -140,13 +143,21 @@ int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar
|
||||
return 0;
|
||||
|
||||
func[code](*n, a, *lda, b, *incb, b, *incb);
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// y = alpha*A*x + beta*y
|
||||
int EIGEN_BLAS_FUNC(ssymv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
|
||||
{
|
||||
return 0;
|
||||
|
||||
// TODO
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *inca, RealScalar *pc, int *ldc)
|
||||
{
|
||||
return 0;
|
||||
|
||||
// TODO
|
||||
typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar);
|
||||
functype func[2];
|
||||
@ -174,11 +185,13 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa,
|
||||
func[code](*n, a, *inca, c, *ldc, alpha);
|
||||
return 1;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
|
||||
int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *inca, RealScalar *pb, int *incb, RealScalar *pc, int *ldc)
|
||||
{
|
||||
return 0;
|
||||
|
||||
// TODO
|
||||
typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
|
||||
functype func[2];
|
||||
@ -207,7 +220,7 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa
|
||||
func[code](*n, a, *inca, b, *incb, c, *ldc, alpha);
|
||||
return 1;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
#if ISCOMPLEX
|
||||
|
||||
|
@ -26,8 +26,9 @@
|
||||
|
||||
int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
|
||||
{
|
||||
// std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n";
|
||||
typedef void (*functype)(int, int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
|
||||
functype func[12];
|
||||
static functype func[12];
|
||||
|
||||
static bool init = false;
|
||||
if(!init)
|
||||
@ -52,21 +53,29 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
Scalar beta = *reinterpret_cast<Scalar*>(pbeta);
|
||||
|
||||
if(beta!=Scalar(1))
|
||||
matrix(c, *m, *n, *ldc) *= beta;
|
||||
|
||||
int code = OP(*opa) | (OP(*opb) << 2);
|
||||
if(code>=12 || func[code]==0)
|
||||
if(code>=12 || func[code]==0 || (*m<0) || (*n<0) || (*k<0))
|
||||
{
|
||||
int info = 1;
|
||||
xerbla_("GEMM", &info, 4);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(beta!=Scalar(1))
|
||||
if(beta==Scalar(0))
|
||||
matrix(c, *m, *n, *ldc).setZero();
|
||||
else
|
||||
matrix(c, *m, *n, *ldc) *= beta;
|
||||
|
||||
func[code](*m, *n, *k, a, *lda, b, *ldb, c, *ldc, alpha);
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
|
||||
{
|
||||
// std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
|
||||
typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int);
|
||||
functype func[32];
|
||||
static functype func[32];
|
||||
|
||||
static bool init = false;
|
||||
if(!init)
|
||||
@ -74,38 +83,38 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
|
||||
for(int k=0; k<32; ++k)
|
||||
func[k] = 0;
|
||||
|
||||
func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|0, false,ColMajor,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|0, false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|0, Conj, RowMajor,ColMajor>::run);
|
||||
func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Upper|0, false,ColMajor,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Lower|0, false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor>::run);
|
||||
|
||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|0, false,ColMajor,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|0, false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|0, Conj, RowMajor,ColMajor>::run);
|
||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Upper|0, false,ColMajor,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Lower|0, false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Lower|0, Conj, RowMajor,ColMajor>::run);
|
||||
|
||||
func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|0, false,ColMajor,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|0, false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|0, Conj, RowMajor,ColMajor>::run);
|
||||
func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Lower|0, false,ColMajor,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Upper|0, false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor>::run);
|
||||
|
||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|0, false,ColMajor,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|0, false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|0, Conj, RowMajor,ColMajor>::run);
|
||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Lower|0, false,ColMajor,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Upper|0, false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Upper|0, Conj, RowMajor,ColMajor>::run);
|
||||
|
||||
|
||||
func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|UnitDiagBit,false,ColMajor,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|UnitDiagBit,false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|UnitDiagBit,Conj, RowMajor,ColMajor>::run);
|
||||
func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor>::run);
|
||||
|
||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|UnitDiagBit,false,ColMajor,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|UnitDiagBit,false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|UnitDiagBit,Conj, RowMajor,ColMajor>::run);
|
||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor>::run);
|
||||
|
||||
func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|UnitDiagBit,false,ColMajor,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|UnitDiagBit,false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|UnitDiagBit,Conj, RowMajor,ColMajor>::run);
|
||||
func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor>::run);
|
||||
|
||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|UnitDiagBit,false,ColMajor,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|UnitDiagBit,false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|UnitDiagBit,Conj, RowMajor,ColMajor>::run);
|
||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor>::run);
|
||||
|
||||
init = true;
|
||||
}
|
||||
@ -114,14 +123,23 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
|
||||
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
|
||||
// TODO handle alpha
|
||||
|
||||
int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
|
||||
if(code>=32 || func[code]==0)
|
||||
if(code>=32 || func[code]==0 || *m<0 || *n <0)
|
||||
{
|
||||
int info=1;
|
||||
xerbla_("TRSM",&info,4);
|
||||
return 0;
|
||||
}
|
||||
|
||||
func[code](*m, *n, a, *lda, b, *ldb);
|
||||
return 1;
|
||||
if(SIDE(*side)==LEFT)
|
||||
func[code](*m, *n, a, *lda, b, *ldb);
|
||||
else
|
||||
func[code](*n, *m, a, *lda, b, *ldb);
|
||||
|
||||
if(alpha!=Scalar(1))
|
||||
matrix(b,*m,*n,*ldb) *= alpha;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -129,46 +147,46 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
|
||||
// b = alpha*b*op(a) for side = 'R'or'r'
|
||||
int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
|
||||
{
|
||||
// std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
|
||||
typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
|
||||
functype func[32];
|
||||
|
||||
static functype func[32];
|
||||
static bool init = false;
|
||||
if(!init)
|
||||
{
|
||||
for(int k=0; k<32; ++k)
|
||||
func[k] = 0;
|
||||
|
||||
func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0, true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0, true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
||||
func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
||||
|
||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0, false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0, false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
||||
|
||||
func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0, true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0, true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
||||
func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
||||
|
||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0, false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0, false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
||||
|
||||
func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
||||
func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
||||
|
||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
||||
|
||||
func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
||||
func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
||||
|
||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
||||
func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
||||
|
||||
init = true;
|
||||
}
|
||||
@ -178,10 +196,21 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m,
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
|
||||
int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
|
||||
if(code>=32 || func[code]==0)
|
||||
if(code>=32 || func[code]==0 || *m<0 || *n <0)
|
||||
{
|
||||
int info=1;
|
||||
xerbla_("TRMM",&info,4);
|
||||
return 0;
|
||||
}
|
||||
|
||||
func[code](*m, *n, a, *lda, b, *ldb, b, *ldb, alpha);
|
||||
// FIXME find a way to avoid this copy
|
||||
Matrix<Scalar,Dynamic,Dynamic> tmp = matrix(b,*m,*n,*ldb);
|
||||
matrix(b,*m,*n,*ldb).setZero();
|
||||
|
||||
if(SIDE(*side)==LEFT)
|
||||
func[code](*m, *n, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha);
|
||||
else
|
||||
func[code](*n, *m, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -189,41 +218,45 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m,
|
||||
// c = alpha*b*a + beta*c for side = 'R'or'r
|
||||
int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
|
||||
{
|
||||
// std::cerr << "in symm " << *side << " " << *uplo << " " << *m << "x" << *n << " lda:" << *lda << " ldb:" << *ldb << " ldc:" << *ldc << " alpha:" << *palpha << " beta:" << *pbeta << "\n";
|
||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
||||
Scalar* c = reinterpret_cast<Scalar*>(pc);
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
Scalar beta = *reinterpret_cast<Scalar*>(pbeta);
|
||||
|
||||
if(*m<0 || *n<0)
|
||||
{
|
||||
int info=1;
|
||||
xerbla_("SYMM",&info,4);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(beta!=Scalar(1))
|
||||
matrix(c, *m, *n, *ldc) *= beta;
|
||||
if(beta==Scalar(0)) matrix(c, *m, *n, *ldc).setZero();
|
||||
else matrix(c, *m, *n, *ldc) *= beta;
|
||||
|
||||
if(SIDE(*side)==LEFT)
|
||||
if(UPLO(*uplo)==UP)
|
||||
ei_product_selfadjoint_matrix<Scalar, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
||||
else if(UPLO(*uplo)==LO)
|
||||
ei_product_selfadjoint_matrix<Scalar, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
||||
else
|
||||
return 0;
|
||||
if(UPLO(*uplo)==UP) ei_product_selfadjoint_matrix<Scalar, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
||||
else if(UPLO(*uplo)==LO) ei_product_selfadjoint_matrix<Scalar, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
||||
else return 0;
|
||||
else if(SIDE(*side)==RIGHT)
|
||||
if(UPLO(*uplo)==UP)
|
||||
ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
||||
else if(UPLO(*uplo)==LO)
|
||||
ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
||||
else
|
||||
return 0;
|
||||
if(UPLO(*uplo)==UP) ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
||||
else if(UPLO(*uplo)==LO) ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
||||
else return 0;
|
||||
else
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// c = alpha*a*a' + beta*c for op = 'N'or'n'
|
||||
// c = alpha*a'*a + beta*c for op = 'T'or't','C'or'c'
|
||||
int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc)
|
||||
{
|
||||
// std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
|
||||
typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int, Scalar);
|
||||
functype func[8];
|
||||
static functype func[8];
|
||||
|
||||
static bool init = false;
|
||||
if(!init)
|
||||
@ -231,13 +264,13 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
|
||||
for(int k=0; k<8; ++k)
|
||||
func[k] = 0;
|
||||
|
||||
func[NOTR | (UP << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, UpperTriangular>::run);
|
||||
func[TR | (UP << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,UpperTriangular>::run);
|
||||
func[ADJ | (UP << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,UpperTriangular>::run);
|
||||
func[NOTR | (UP << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, Upper>::run);
|
||||
func[TR | (UP << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,Upper>::run);
|
||||
func[ADJ | (UP << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,Upper>::run);
|
||||
|
||||
func[NOTR | (LO << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, LowerTriangular>::run);
|
||||
func[TR | (LO << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,LowerTriangular>::run);
|
||||
func[ADJ | (LO << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,LowerTriangular>::run);
|
||||
func[NOTR | (LO << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, Lower>::run);
|
||||
func[TR | (LO << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,Lower>::run);
|
||||
func[ADJ | (LO << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,Lower>::run);
|
||||
|
||||
init = true;
|
||||
}
|
||||
@ -248,14 +281,20 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
|
||||
Scalar beta = *reinterpret_cast<Scalar*>(pbeta);
|
||||
|
||||
int code = OP(*op) | (UPLO(*uplo) << 2);
|
||||
if(code>=8 || func[code]==0)
|
||||
if(code>=8 || func[code]==0 || *n<0 || *k<0)
|
||||
{
|
||||
int info=1;
|
||||
xerbla_("SYRK",&info,4);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(beta!=Scalar(1))
|
||||
matrix(c, *n, *n, *ldc) *= beta;
|
||||
if(UPLO(*uplo)==UP) matrix(c, *n, *n, *ldc).triangularView<Upper>() *= beta;
|
||||
else matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta;
|
||||
|
||||
func[code](*n, *k, a, *lda, c, *ldc, alpha);
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// c = alpha*a*b' + alpha*b*a' + beta*c for op = 'N'or'n'
|
||||
@ -269,6 +308,7 @@ int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal
|
||||
Scalar beta = *reinterpret_cast<Scalar*>(pbeta);
|
||||
|
||||
// TODO
|
||||
std::cerr << "Eigen BLAS: _syr2k is not implemented yet\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -286,27 +326,30 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
Scalar beta = *reinterpret_cast<Scalar*>(pbeta);
|
||||
|
||||
// std::cerr << "in hemm " << *side << " " << *uplo << " " << *m << " " << *n << " " << alpha << " " << *lda << " " << beta << " " << *ldc << "\n";
|
||||
|
||||
if(*m<0 || *n<0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(beta!=Scalar(1))
|
||||
matrix(c, *m, *n, *ldc) *= beta;
|
||||
|
||||
if(SIDE(*side)==LEFT)
|
||||
if(UPLO(*uplo)==UP)
|
||||
ei_product_selfadjoint_matrix<Scalar, RowMajor,true,Conj, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
||||
else if(UPLO(*uplo)==LO)
|
||||
ei_product_selfadjoint_matrix<Scalar, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
||||
else
|
||||
return 0;
|
||||
if(UPLO(*uplo)==UP) ei_product_selfadjoint_matrix<Scalar, RowMajor,true,Conj, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
||||
else if(UPLO(*uplo)==LO) ei_product_selfadjoint_matrix<Scalar, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
||||
else return 0;
|
||||
else if(SIDE(*side)==RIGHT)
|
||||
if(UPLO(*uplo)==UP)
|
||||
ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, RowMajor,true,Conj, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
||||
else if(UPLO(*uplo)==LO)
|
||||
ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
||||
else
|
||||
return 0;
|
||||
if(UPLO(*uplo)==UP) ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, RowMajor,true,Conj, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
||||
else if(UPLO(*uplo)==LO) ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
||||
else return 0;
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// c = alpha*a*conj(a') + beta*c for op = 'N'or'n'
|
||||
@ -314,7 +357,7 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
|
||||
int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc)
|
||||
{
|
||||
typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int, Scalar);
|
||||
functype func[8];
|
||||
static functype func[8];
|
||||
|
||||
static bool init = false;
|
||||
if(!init)
|
||||
@ -322,29 +365,46 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
|
||||
for(int k=0; k<8; ++k)
|
||||
func[k] = 0;
|
||||
|
||||
func[NOTR | (UP << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, UpperTriangular>::run);
|
||||
func[ADJ | (UP << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,UpperTriangular>::run);
|
||||
func[NOTR | (UP << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, Upper>::run);
|
||||
func[ADJ | (UP << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,Upper>::run);
|
||||
|
||||
func[NOTR | (LO << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, LowerTriangular>::run);
|
||||
func[ADJ | (LO << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,LowerTriangular>::run);
|
||||
func[NOTR | (LO << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, Lower>::run);
|
||||
func[ADJ | (LO << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,Lower>::run);
|
||||
|
||||
init = true;
|
||||
}
|
||||
|
||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||
Scalar* c = reinterpret_cast<Scalar*>(pc);
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
Scalar beta = *reinterpret_cast<Scalar*>(pbeta);
|
||||
RealScalar alpha = *palpha;
|
||||
RealScalar beta = *pbeta;
|
||||
|
||||
// std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << beta << " " << *ldc << "\n";
|
||||
|
||||
if(*n<0 || *k<0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int code = OP(*op) | (UPLO(*uplo) << 2);
|
||||
if(code>=8 || func[code]==0)
|
||||
return 0;
|
||||
|
||||
if(beta!=Scalar(1))
|
||||
matrix(c, *n, *n, *ldc) *= beta;
|
||||
if(beta!=RealScalar(1))
|
||||
{
|
||||
if(UPLO(*uplo)==UP) matrix(c, *n, *n, *ldc).triangularView<StrictlyUpper>() *= beta;
|
||||
else matrix(c, *n, *n, *ldc).triangularView<StrictlyLower>() *= beta;
|
||||
|
||||
func[code](*n, *k, a, *lda, c, *ldc, alpha);
|
||||
return 1;
|
||||
matrix(c, *n, *n, *ldc).diagonal().real() *= beta;
|
||||
matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
|
||||
}
|
||||
|
||||
if(*k>0 && alpha!=RealScalar(0))
|
||||
{
|
||||
func[code](*n, *k, a, *lda, c, *ldc, alpha);
|
||||
matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// c = alpha*a*conj(b') + conj(alpha)*b*conj(a') + beta*c, for op = 'N'or'n'
|
||||
@ -357,7 +417,13 @@ int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
Scalar beta = *reinterpret_cast<Scalar*>(pbeta);
|
||||
|
||||
if(*n<0 || *k<0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO
|
||||
std::cerr << "Eigen BLAS: _her2k is not implemented yet\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
17
blas/xerbla.cpp
Normal file
17
blas/xerbla.cpp
Normal file
@ -0,0 +1,17 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
int xerbla_(char * msg, int *info, int)
|
||||
{
|
||||
std::cerr << "Eigen BLAS ERROR #" << *info << ": " << msg << "\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
@ -185,11 +185,17 @@ macro(ei_testing_print_summary)
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_ALTIVEC)
|
||||
message("Altivec: Using architecture defaults")
|
||||
message("Altivec: ON")
|
||||
else()
|
||||
message("Altivec: Using architecture defaults")
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_NEON)
|
||||
message("ARM NEON: ON")
|
||||
else()
|
||||
message("ARM NEON: Using architecture defaults")
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
|
||||
message("Explicit vec: OFF")
|
||||
else()
|
||||
|
5
doc/snippets/Map_general_stride.cpp
Normal file
5
doc/snippets/Map_general_stride.cpp
Normal file
@ -0,0 +1,5 @@
|
||||
int array[24];
|
||||
for(int i = 0; i < 24; ++i) array[i] = i;
|
||||
cout << Map<MatrixXi, 0, Stride<Dynamic,2> >
|
||||
(array, 3, 3, Stride<Dynamic,2>(8, 2))
|
||||
<< endl;
|
5
doc/snippets/Map_inner_stride.cpp
Normal file
5
doc/snippets/Map_inner_stride.cpp
Normal file
@ -0,0 +1,5 @@
|
||||
int array[12];
|
||||
for(int i = 0; i < 12; ++i) array[i] = i;
|
||||
cout << Map<VectorXi, 0, InnerStride<2> >
|
||||
(array, 6) // the inner stride has already been passed as template parameter
|
||||
<< endl;
|
5
doc/snippets/Map_outer_stride.cpp
Normal file
5
doc/snippets/Map_outer_stride.cpp
Normal file
@ -0,0 +1,5 @@
|
||||
int array[12];
|
||||
for(int i = 0; i < 12; ++i) array[i] = i;
|
||||
cout << Map<MatrixXi, 0, OuterStride<Dynamic> >
|
||||
(array, 3, 3, OuterStride<Dynamic>(4))
|
||||
<< endl;
|
3
doc/snippets/Map_simple.cpp
Normal file
3
doc/snippets/Map_simple.cpp
Normal file
@ -0,0 +1,3 @@
|
||||
int array[9];
|
||||
for(int i = 0; i < 9; ++i) array[i] = i;
|
||||
cout << Map<Matrix3i>(array) << endl;
|
@ -104,16 +104,18 @@ ei_add_test(cwiseop)
|
||||
ei_add_test(unalignedcount)
|
||||
ei_add_test(redux)
|
||||
ei_add_test(visitor)
|
||||
ei_add_test(block)
|
||||
ei_add_test(product_small)
|
||||
ei_add_test(product_large)
|
||||
ei_add_test(product_extra)
|
||||
ei_add_test(diagonalmatrices)
|
||||
ei_add_test(adjoint)
|
||||
ei_add_test(submatrices)
|
||||
ei_add_test(diagonal)
|
||||
ei_add_test(miscmatrices)
|
||||
ei_add_test(commainitializer)
|
||||
ei_add_test(smallvectors)
|
||||
ei_add_test(map)
|
||||
ei_add_test(mapstride)
|
||||
ei_add_test(array)
|
||||
ei_add_test(array_for_matrix)
|
||||
ei_add_test(array_replicate)
|
||||
|
@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@ -51,16 +51,18 @@ template<typename Scalar> struct CheckMinor<Scalar,1,1>
|
||||
CheckMinor(MatrixType&, int, int) {}
|
||||
};
|
||||
|
||||
template<typename MatrixType> void submatrices(const MatrixType& m)
|
||||
template<typename MatrixType> void block(const MatrixType& m)
|
||||
{
|
||||
/* this test covers the following files:
|
||||
Row.h Column.h Block.h Minor.h DiagonalCoeffs.h
|
||||
Row.h Column.h Block.h Minor.h
|
||||
*/
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
|
||||
typedef Matrix<Scalar, 1, MatrixType::ColsAtCompileTime> RowVectorType;
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> DynamicMatrixType;
|
||||
typedef Matrix<Scalar, Dynamic, 1> DynamicVectorType;
|
||||
|
||||
int rows = m.rows();
|
||||
int cols = m.cols();
|
||||
|
||||
@ -69,8 +71,6 @@ template<typename MatrixType> void submatrices(const MatrixType& m)
|
||||
m3(rows, cols),
|
||||
mzero = MatrixType::Zero(rows, cols),
|
||||
ones = MatrixType::Ones(rows, cols);
|
||||
SquareMatrixType identity = SquareMatrixType::Identity(rows, rows),
|
||||
square = SquareMatrixType::Random(rows, rows);
|
||||
VectorType v1 = VectorType::Random(rows),
|
||||
v2 = VectorType::Random(rows),
|
||||
v3 = VectorType::Random(rows),
|
||||
@ -84,20 +84,19 @@ template<typename MatrixType> void submatrices(const MatrixType& m)
|
||||
int c2 = ei_random<int>(c1,cols-1);
|
||||
|
||||
//check row() and col()
|
||||
VERIFY_IS_APPROX(m1.col(c1).transpose(), m1.transpose().row(c1));
|
||||
// FIXME perhaps we should re-enable that without the .eval()
|
||||
VERIFY_IS_APPROX(m1.col(c1).dot(square.row(r1)), (square * m1.conjugate()).eval()(r1,c1));
|
||||
VERIFY_IS_EQUAL(m1.col(c1).transpose(), m1.transpose().row(c1));
|
||||
//check operator(), both constant and non-constant, on row() and col()
|
||||
m1.row(r1) += s1 * m1.row(r2);
|
||||
m1.col(c1) += s1 * m1.col(c2);
|
||||
|
||||
//check block()
|
||||
Matrix<Scalar,Dynamic,Dynamic> b1(1,1); b1(0,0) = m1(r1,c1);
|
||||
|
||||
RowVectorType br1(m1.block(r1,0,1,cols));
|
||||
VectorType bc1(m1.block(0,c1,rows,1));
|
||||
VERIFY_IS_APPROX(b1, m1.block(r1,c1,1,1));
|
||||
VERIFY_IS_APPROX(m1.row(r1), br1);
|
||||
VERIFY_IS_APPROX(m1.col(c1), bc1);
|
||||
VERIFY_IS_EQUAL(b1, m1.block(r1,c1,1,1));
|
||||
VERIFY_IS_EQUAL(m1.row(r1), br1);
|
||||
VERIFY_IS_EQUAL(m1.col(c1), bc1);
|
||||
//check operator(), both constant and non-constant, on block()
|
||||
m1.block(r1,c1,r2-r1+1,c2-c1+1) = s1 * m2.block(0, 0, r2-r1+1,c2-c1+1);
|
||||
m1.block(r1,c1,r2-r1+1,c2-c1+1)(r2-r1,c2-c1) = m2.block(0, 0, r2-r1+1,c2-c1+1)(0,0);
|
||||
@ -105,11 +104,6 @@ template<typename MatrixType> void submatrices(const MatrixType& m)
|
||||
//check minor()
|
||||
CheckMinor<Scalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime> checkminor(m1,r1,c1);
|
||||
|
||||
//check diagonal()
|
||||
VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal());
|
||||
m2.diagonal() = 2 * m1.diagonal();
|
||||
m2.diagonal()[0] *= 3;
|
||||
|
||||
const int BlockRows = EIGEN_ENUM_MIN(MatrixType::RowsAtCompileTime,2);
|
||||
const int BlockCols = EIGEN_ENUM_MIN(MatrixType::ColsAtCompileTime,5);
|
||||
if (rows>=5 && cols>=8)
|
||||
@ -120,45 +114,23 @@ template<typename MatrixType> void submatrices(const MatrixType& m)
|
||||
m1.template block<BlockRows,BlockCols>(1,1)(0, 3) = m1.template block<2,5>(1,1)(1,2);
|
||||
// check that fixed block() and block() agree
|
||||
Matrix<Scalar,Dynamic,Dynamic> b = m1.template block<BlockRows,BlockCols>(3,3);
|
||||
VERIFY_IS_APPROX(b, m1.block(3,3,BlockRows,BlockCols));
|
||||
VERIFY_IS_EQUAL(b, m1.block(3,3,BlockRows,BlockCols));
|
||||
}
|
||||
|
||||
if (rows>2)
|
||||
{
|
||||
// test sub vectors
|
||||
VERIFY_IS_APPROX(v1.template head<2>(), v1.block(0,0,2,1));
|
||||
VERIFY_IS_APPROX(v1.template head<2>(), v1.head(2));
|
||||
VERIFY_IS_APPROX(v1.template head<2>(), v1.segment(0,2));
|
||||
VERIFY_IS_APPROX(v1.template head<2>(), v1.template segment<2>(0));
|
||||
VERIFY_IS_EQUAL(v1.template head<2>(), v1.block(0,0,2,1));
|
||||
VERIFY_IS_EQUAL(v1.template head<2>(), v1.head(2));
|
||||
VERIFY_IS_EQUAL(v1.template head<2>(), v1.segment(0,2));
|
||||
VERIFY_IS_EQUAL(v1.template head<2>(), v1.template segment<2>(0));
|
||||
int i = rows-2;
|
||||
VERIFY_IS_APPROX(v1.template tail<2>(), v1.block(i,0,2,1));
|
||||
VERIFY_IS_APPROX(v1.template tail<2>(), v1.tail(2));
|
||||
VERIFY_IS_APPROX(v1.template tail<2>(), v1.segment(i,2));
|
||||
VERIFY_IS_APPROX(v1.template tail<2>(), v1.template segment<2>(i));
|
||||
VERIFY_IS_EQUAL(v1.template tail<2>(), v1.block(i,0,2,1));
|
||||
VERIFY_IS_EQUAL(v1.template tail<2>(), v1.tail(2));
|
||||
VERIFY_IS_EQUAL(v1.template tail<2>(), v1.segment(i,2));
|
||||
VERIFY_IS_EQUAL(v1.template tail<2>(), v1.template segment<2>(i));
|
||||
i = ei_random(0,rows-2);
|
||||
VERIFY_IS_APPROX(v1.segment(i,2), v1.template segment<2>(i));
|
||||
|
||||
enum {
|
||||
N1 = MatrixType::RowsAtCompileTime>1 ? 1 : 0,
|
||||
N2 = MatrixType::RowsAtCompileTime>2 ? -2 : 0
|
||||
};
|
||||
|
||||
// check sub/super diagonal
|
||||
m2.template diagonal<N1>() = 2 * m1.template diagonal<N1>();
|
||||
m2.template diagonal<N1>()[0] *= 3;
|
||||
VERIFY_IS_APPROX(m2.template diagonal<N1>()[0], static_cast<Scalar>(6) * m1.template diagonal<N1>()[0]);
|
||||
|
||||
m2.template diagonal<N2>() = 2 * m1.template diagonal<N2>();
|
||||
m2.template diagonal<N2>()[0] *= 3;
|
||||
VERIFY_IS_APPROX(m2.template diagonal<N2>()[0], static_cast<Scalar>(6) * m1.template diagonal<N2>()[0]);
|
||||
|
||||
m2.diagonal(N1) = 2 * m1.diagonal(N1);
|
||||
m2.diagonal(N1)[0] *= 3;
|
||||
VERIFY_IS_APPROX(m2.diagonal(N1)[0], static_cast<Scalar>(6) * m1.diagonal(N1)[0]);
|
||||
|
||||
m2.diagonal(N2) = 2 * m1.diagonal(N2);
|
||||
m2.diagonal(N2)[0] *= 3;
|
||||
VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast<Scalar>(6) * m1.diagonal(N2)[0]);
|
||||
VERIFY_IS_EQUAL(v1.segment(i,2), v1.template segment<2>(i));
|
||||
}
|
||||
|
||||
// stress some basic stuffs with block matrices
|
||||
@ -167,6 +139,49 @@ template<typename MatrixType> void submatrices(const MatrixType& m)
|
||||
|
||||
VERIFY(ei_real(ones.col(c1).dot(ones.col(c2))) == RealScalar(rows));
|
||||
VERIFY(ei_real(ones.row(r1).dot(ones.row(r2))) == RealScalar(cols));
|
||||
|
||||
// now test some block-inside-of-block.
|
||||
|
||||
// expressions with direct access
|
||||
VERIFY_IS_EQUAL( (m1.block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2)) , (m1.block(r2,c2,rows-r2,cols-c2)) );
|
||||
VERIFY_IS_EQUAL( (m1.block(r1,c1,r2-r1+1,c2-c1+1).row(0)) , (m1.row(r1).segment(c1,c2-c1+1)) );
|
||||
VERIFY_IS_EQUAL( (m1.block(r1,c1,r2-r1+1,c2-c1+1).col(0)) , (m1.col(c1).segment(r1,r2-r1+1)) );
|
||||
VERIFY_IS_EQUAL( (m1.block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , (m1.row(r1).segment(c1,c2-c1+1)) );
|
||||
VERIFY_IS_EQUAL( (m1.transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , (m1.row(r1).segment(c1,c2-c1+1)) );
|
||||
|
||||
// expressions without direct access
|
||||
VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2)) , ((m1+m2).block(r2,c2,rows-r2,cols-c2)) );
|
||||
VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).row(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) );
|
||||
VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).col(0)) , ((m1+m2).col(c1).segment(r1,r2-r1+1)) );
|
||||
VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) );
|
||||
VERIFY_IS_EQUAL( ((m1+m2).transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) );
|
||||
|
||||
// evaluation into plain matrices from expressions with direct access (stress MapBase)
|
||||
DynamicMatrixType dm;
|
||||
DynamicVectorType dv;
|
||||
dm.setZero();
|
||||
dm = m1.block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2);
|
||||
VERIFY_IS_EQUAL(dm, (m1.block(r2,c2,rows-r2,cols-c2)));
|
||||
dm.setZero();
|
||||
dv.setZero();
|
||||
dm = m1.block(r1,c1,r2-r1+1,c2-c1+1).row(0).transpose();
|
||||
dv = m1.row(r1).segment(c1,c2-c1+1);
|
||||
VERIFY_IS_EQUAL(dv, dm);
|
||||
dm.setZero();
|
||||
dv.setZero();
|
||||
dm = m1.col(c1).segment(r1,r2-r1+1);
|
||||
dv = m1.block(r1,c1,r2-r1+1,c2-c1+1).col(0);
|
||||
VERIFY_IS_EQUAL(dv, dm);
|
||||
dm.setZero();
|
||||
dv.setZero();
|
||||
dm = m1.block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0);
|
||||
dv = m1.row(r1).segment(c1,c2-c1+1);
|
||||
VERIFY_IS_EQUAL(dv, dm);
|
||||
dm.setZero();
|
||||
dv.setZero();
|
||||
dm = m1.row(r1).segment(c1,c2-c1+1).transpose();
|
||||
dv = m1.transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0);
|
||||
VERIFY_IS_EQUAL(dv, dm);
|
||||
}
|
||||
|
||||
|
||||
@ -176,18 +191,30 @@ void compare_using_data_and_stride(const MatrixType& m)
|
||||
int rows = m.rows();
|
||||
int cols = m.cols();
|
||||
int size = m.size();
|
||||
int stride = m.stride();
|
||||
int innerStride = m.innerStride();
|
||||
int outerStride = m.outerStride();
|
||||
int rowStride = m.rowStride();
|
||||
int colStride = m.colStride();
|
||||
const typename MatrixType::Scalar* data = m.data();
|
||||
|
||||
for(int j=0;j<cols;++j)
|
||||
for(int i=0;i<rows;++i)
|
||||
VERIFY_IS_APPROX(m.coeff(i,j), data[(MatrixType::Flags&RowMajorBit) ? i*stride+j : j*stride + i]);
|
||||
VERIFY(m.coeff(i,j) == data[i*rowStride + j*colStride]);
|
||||
|
||||
if(!MatrixType::IsVectorAtCompileTime)
|
||||
{
|
||||
for(int j=0;j<cols;++j)
|
||||
for(int i=0;i<rows;++i)
|
||||
VERIFY(m.coeff(i,j) == data[(MatrixType::Flags&RowMajorBit)
|
||||
? i*outerStride + j*innerStride
|
||||
: j*outerStride + i*innerStride]);
|
||||
}
|
||||
|
||||
if(MatrixType::IsVectorAtCompileTime)
|
||||
{
|
||||
VERIFY_IS_APPROX(stride, int((&m.coeff(1))-(&m.coeff(0))));
|
||||
VERIFY(innerStride == int((&m.coeff(1))-(&m.coeff(0))));
|
||||
for (int i=0;i<size;++i)
|
||||
VERIFY_IS_APPROX(m.coeff(i), data[i*stride]);
|
||||
VERIFY(m.coeff(i) == data[i*innerStride]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -211,19 +238,21 @@ void data_and_stride(const MatrixType& m)
|
||||
compare_using_data_and_stride(m1.col(c1).transpose());
|
||||
}
|
||||
|
||||
void test_submatrices()
|
||||
void test_block()
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
CALL_SUBTEST_1( submatrices(Matrix<float, 1, 1>()) );
|
||||
CALL_SUBTEST_2( submatrices(Matrix4d()) );
|
||||
CALL_SUBTEST_3( submatrices(MatrixXcf(3, 3)) );
|
||||
CALL_SUBTEST_4( submatrices(MatrixXi(8, 12)) );
|
||||
CALL_SUBTEST_5( submatrices(MatrixXcd(20, 20)) );
|
||||
CALL_SUBTEST_6( submatrices(MatrixXf(20, 20)) );
|
||||
CALL_SUBTEST_1( block(Matrix<float, 1, 1>()) );
|
||||
CALL_SUBTEST_2( block(Matrix4d()) );
|
||||
CALL_SUBTEST_3( block(MatrixXcf(3, 3)) );
|
||||
CALL_SUBTEST_4( block(MatrixXi(8, 12)) );
|
||||
CALL_SUBTEST_5( block(MatrixXcd(20, 20)) );
|
||||
CALL_SUBTEST_6( block(MatrixXf(20, 20)) );
|
||||
|
||||
CALL_SUBTEST_8( submatrices(Matrix<float,Dynamic,4>(3, 4)) );
|
||||
CALL_SUBTEST_8( block(Matrix<float,Dynamic,4>(3, 4)) );
|
||||
|
||||
#ifndef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||
CALL_SUBTEST_6( data_and_stride(MatrixXf(ei_random(5,50), ei_random(5,50))) );
|
||||
CALL_SUBTEST_7( data_and_stride(Matrix<int,Dynamic,Dynamic,RowMajor>(ei_random(5,50), ei_random(5,50))) );
|
||||
#endif
|
||||
}
|
||||
}
|
@ -95,7 +95,7 @@ template<typename MatrixType> void cholesky(const MatrixType& m)
|
||||
|
||||
{
|
||||
LLT<SquareMatrixType,Lower> chollo(symmLo);
|
||||
VERIFY_IS_APPROX(symm, chollo.matrixL().toDenseMatrix() * chollo.matrixL().adjoint().toDenseMatrix());
|
||||
VERIFY_IS_APPROX(symm, chollo.reconstructedMatrix());
|
||||
vecX = chollo.solve(vecB);
|
||||
VERIFY_IS_APPROX(symm * vecX, vecB);
|
||||
matX = chollo.solve(matB);
|
||||
@ -103,7 +103,7 @@ template<typename MatrixType> void cholesky(const MatrixType& m)
|
||||
|
||||
// test the upper mode
|
||||
LLT<SquareMatrixType,Upper> cholup(symmUp);
|
||||
VERIFY_IS_APPROX(symm, cholup.matrixL().toDenseMatrix() * cholup.matrixL().adjoint().toDenseMatrix());
|
||||
VERIFY_IS_APPROX(symm, cholup.reconstructedMatrix());
|
||||
vecX = cholup.solve(vecB);
|
||||
VERIFY_IS_APPROX(symm * vecX, vecB);
|
||||
matX = cholup.solve(matB);
|
||||
@ -119,8 +119,7 @@ template<typename MatrixType> void cholesky(const MatrixType& m)
|
||||
|
||||
{
|
||||
LDLT<SquareMatrixType> ldlt(symm);
|
||||
// TODO(keir): This doesn't make sense now that LDLT pivots.
|
||||
//VERIFY_IS_APPROX(symm, ldlt.matrixL() * ldlt.vectorD().asDiagonal() * ldlt.matrixL().adjoint());
|
||||
VERIFY_IS_APPROX(symm, ldlt.reconstructedMatrix());
|
||||
vecX = ldlt.solve(vecB);
|
||||
VERIFY_IS_APPROX(symm * vecX, vecB);
|
||||
matX = ldlt.solve(matB);
|
||||
|
81
test/diagonal.cpp
Normal file
81
test/diagonal.cpp
Normal file
@ -0,0 +1,81 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "main.h"
|
||||
|
||||
template<typename MatrixType> void diagonal(const MatrixType& m)
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
|
||||
typedef Matrix<Scalar, 1, MatrixType::ColsAtCompileTime> RowVectorType;
|
||||
int rows = m.rows();
|
||||
int cols = m.cols();
|
||||
|
||||
MatrixType m1 = MatrixType::Random(rows, cols),
|
||||
m2 = MatrixType::Random(rows, cols);
|
||||
|
||||
//check diagonal()
|
||||
VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal());
|
||||
m2.diagonal() = 2 * m1.diagonal();
|
||||
m2.diagonal()[0] *= 3;
|
||||
|
||||
if (rows>2)
|
||||
{
|
||||
enum {
|
||||
N1 = MatrixType::RowsAtCompileTime>1 ? 1 : 0,
|
||||
N2 = MatrixType::RowsAtCompileTime>2 ? -2 : 0
|
||||
};
|
||||
|
||||
// check sub/super diagonal
|
||||
m2.template diagonal<N1>() = 2 * m1.template diagonal<N1>();
|
||||
m2.template diagonal<N1>()[0] *= 3;
|
||||
VERIFY_IS_APPROX(m2.template diagonal<N1>()[0], static_cast<Scalar>(6) * m1.template diagonal<N1>()[0]);
|
||||
|
||||
m2.template diagonal<N2>() = 2 * m1.template diagonal<N2>();
|
||||
m2.template diagonal<N2>()[0] *= 3;
|
||||
VERIFY_IS_APPROX(m2.template diagonal<N2>()[0], static_cast<Scalar>(6) * m1.template diagonal<N2>()[0]);
|
||||
|
||||
m2.diagonal(N1) = 2 * m1.diagonal(N1);
|
||||
m2.diagonal(N1)[0] *= 3;
|
||||
VERIFY_IS_APPROX(m2.diagonal(N1)[0], static_cast<Scalar>(6) * m1.diagonal(N1)[0]);
|
||||
|
||||
m2.diagonal(N2) = 2 * m1.diagonal(N2);
|
||||
m2.diagonal(N2)[0] *= 3;
|
||||
VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast<Scalar>(6) * m1.diagonal(N2)[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void test_diagonal()
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
CALL_SUBTEST_1( diagonal(Matrix<float, 1, 1>()) );
|
||||
CALL_SUBTEST_2( diagonal(Matrix4d()) );
|
||||
CALL_SUBTEST_2( diagonal(MatrixXcf(3, 3)) );
|
||||
CALL_SUBTEST_2( diagonal(MatrixXi(8, 12)) );
|
||||
CALL_SUBTEST_2( diagonal(MatrixXcd(20, 20)) );
|
||||
CALL_SUBTEST_1( diagonal(MatrixXf(21, 19)) );
|
||||
CALL_SUBTEST_1( diagonal(Matrix<float,Dynamic,4>(3, 4)) );
|
||||
}
|
||||
}
|
@ -59,6 +59,7 @@ template<typename Scalar, int Mode> void transformations(void)
|
||||
Matrix3 matrot1, m;
|
||||
|
||||
Scalar a = ei_random<Scalar>(-Scalar(M_PI), Scalar(M_PI));
|
||||
Scalar s0 = ei_random<Scalar>();
|
||||
|
||||
VERIFY_IS_APPROX(v0, AngleAxisx(a, v0.normalized()) * v0);
|
||||
VERIFY_IS_APPROX(-v0, AngleAxisx(Scalar(M_PI), v0.unitOrthogonal()) * v0);
|
||||
@ -234,6 +235,16 @@ template<typename Scalar, int Mode> void transformations(void)
|
||||
t1 = Matrix3(q1) * (AlignedScaling3(v0) * Translation3(v0));
|
||||
VERIFY_IS_APPROX(t0.matrix(), t1.matrix());
|
||||
|
||||
|
||||
t0.setIdentity();
|
||||
t0.scale(s0).translate(v0);
|
||||
t1 = Scaling(s0) * Translation3(v0);
|
||||
VERIFY_IS_APPROX(t0.matrix(), t1.matrix());
|
||||
t0.prescale(s0);
|
||||
t1 = Scaling(s0) * t1;
|
||||
VERIFY_IS_APPROX(t0.matrix(), t1.matrix());
|
||||
|
||||
|
||||
t0.setIdentity();
|
||||
t0.prerotate(q1).prescale(v0).pretranslate(v0);
|
||||
// translation * aligned scaling and transformation * mat
|
||||
|
@ -42,7 +42,7 @@ template<typename MatrixType> void inverse(const MatrixType& m)
|
||||
m2(rows, cols),
|
||||
mzero = MatrixType::Zero(rows, cols),
|
||||
identity = MatrixType::Identity(rows, rows);
|
||||
createRandomMatrixOfRank(rows,rows,rows,m1);
|
||||
createRandomPIMatrixOfRank(rows,rows,rows,m1);
|
||||
m2 = m1.inverse();
|
||||
VERIFY_IS_APPROX(m1, m2.inverse() );
|
||||
|
||||
|
93
test/lu.cpp
93
test/lu.cpp
@ -29,6 +29,7 @@ using namespace std;
|
||||
template<typename MatrixType> void lu_non_invertible()
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
/* this test covers the following files:
|
||||
LU.h
|
||||
*/
|
||||
@ -51,39 +52,46 @@ template<typename MatrixType> void lu_non_invertible()
|
||||
cols2 = cols = MatrixType::ColsAtCompileTime;
|
||||
}
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime
|
||||
};
|
||||
typedef typename ei_kernel_retval_base<FullPivLU<MatrixType> >::ReturnType KernelMatrixType;
|
||||
typedef typename ei_image_retval_base<FullPivLU<MatrixType> >::ReturnType ImageMatrixType;
|
||||
typedef Matrix<typename MatrixType::Scalar, Dynamic, Dynamic> DynamicMatrixType;
|
||||
typedef Matrix<typename MatrixType::Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime>
|
||||
typedef Matrix<typename MatrixType::Scalar, ColsAtCompileTime, ColsAtCompileTime>
|
||||
CMatrixType;
|
||||
typedef Matrix<typename MatrixType::Scalar, RowsAtCompileTime, RowsAtCompileTime>
|
||||
RMatrixType;
|
||||
|
||||
int rank = ei_random<int>(1, std::min(rows, cols)-1);
|
||||
|
||||
// The image of the zero matrix should consist of a single (zero) column vector
|
||||
VERIFY((MatrixType::Zero(rows,cols).fullPivLu().image(MatrixType::Zero(rows,cols)).cols() == 1));
|
||||
|
||||
|
||||
MatrixType m1(rows, cols), m3(rows, cols2);
|
||||
CMatrixType m2(cols, cols2);
|
||||
createRandomMatrixOfRank(rank, rows, cols, m1);
|
||||
createRandomPIMatrixOfRank(rank, rows, cols, m1);
|
||||
|
||||
FullPivLU<MatrixType> lu;
|
||||
|
||||
// The special value 0.01 below works well in tests. Keep in mind that we're only computing the rank
|
||||
// of singular values are either 0 or 1.
|
||||
// So it's not clear at all that the epsilon should play any role there.
|
||||
lu.setThreshold(RealScalar(0.01));
|
||||
lu.compute(m1);
|
||||
|
||||
MatrixType u(rows,cols);
|
||||
u = lu.matrixLU().template triangularView<Upper>();
|
||||
RMatrixType l = RMatrixType::Identity(rows,rows);
|
||||
l.block(0,0,rows,std::min(rows,cols)).template triangularView<StrictlyLower>()
|
||||
= lu.matrixLU().block(0,0,rows,std::min(rows,cols));
|
||||
|
||||
FullPivLU<MatrixType> lu(m1);
|
||||
// FIXME need better way to construct trapezoid matrices. extend triangularView to support rectangular.
|
||||
DynamicMatrixType u(rows,cols);
|
||||
for(int i = 0; i < rows; i++)
|
||||
for(int j = 0; j < cols; j++)
|
||||
u(i,j) = i>j ? Scalar(0) : lu.matrixLU()(i,j);
|
||||
DynamicMatrixType l(rows,rows);
|
||||
for(int i = 0; i < rows; i++)
|
||||
for(int j = 0; j < rows; j++)
|
||||
l(i,j) = (i<j || j>=cols) ? Scalar(0)
|
||||
: i==j ? Scalar(1)
|
||||
: lu.matrixLU()(i,j);
|
||||
|
||||
VERIFY_IS_APPROX(lu.permutationP() * m1 * lu.permutationQ(), l*u);
|
||||
|
||||
|
||||
KernelMatrixType m1kernel = lu.kernel();
|
||||
ImageMatrixType m1image = lu.image(m1);
|
||||
|
||||
VERIFY_IS_APPROX(m1, lu.reconstructedMatrix());
|
||||
VERIFY(rank == lu.rank());
|
||||
VERIFY(cols - lu.rank() == lu.dimensionOfKernel());
|
||||
VERIFY(!lu.isInjective());
|
||||
@ -91,9 +99,8 @@ template<typename MatrixType> void lu_non_invertible()
|
||||
VERIFY(!lu.isSurjective());
|
||||
VERIFY((m1 * m1kernel).isMuchSmallerThan(m1));
|
||||
VERIFY(m1image.fullPivLu().rank() == rank);
|
||||
DynamicMatrixType sidebyside(m1.rows(), m1.cols() + m1image.cols());
|
||||
sidebyside << m1, m1image;
|
||||
VERIFY(sidebyside.fullPivLu().rank() == rank);
|
||||
VERIFY_IS_APPROX(m1 * m1.adjoint() * m1image, m1image);
|
||||
|
||||
m2 = CMatrixType::Random(cols,cols2);
|
||||
m3 = m1*m2;
|
||||
m2 = CMatrixType::Random(cols,cols2);
|
||||
@ -107,20 +114,19 @@ template<typename MatrixType> void lu_invertible()
|
||||
/* this test covers the following files:
|
||||
LU.h
|
||||
*/
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
|
||||
int size = ei_random<int>(1,200);
|
||||
|
||||
MatrixType m1(size, size), m2(size, size), m3(size, size);
|
||||
m1 = MatrixType::Random(size,size);
|
||||
FullPivLU<MatrixType> lu;
|
||||
lu.setThreshold(0.01);
|
||||
do {
|
||||
m1 = MatrixType::Random(size,size);
|
||||
lu.compute(m1);
|
||||
} while(!lu.isInvertible());
|
||||
|
||||
if (ei_is_same_type<RealScalar,float>::ret)
|
||||
{
|
||||
// let's build a matrix more stable to inverse
|
||||
MatrixType a = MatrixType::Random(size,size*2);
|
||||
m1 += a * a.adjoint();
|
||||
}
|
||||
|
||||
FullPivLU<MatrixType> lu(m1);
|
||||
VERIFY_IS_APPROX(m1, lu.reconstructedMatrix());
|
||||
VERIFY(0 == lu.dimensionOfKernel());
|
||||
VERIFY(lu.kernel().cols() == 1); // the kernel() should consist of a single (zero) column vector
|
||||
VERIFY(size == lu.rank());
|
||||
@ -134,6 +140,23 @@ template<typename MatrixType> void lu_invertible()
|
||||
VERIFY_IS_APPROX(m2, lu.inverse()*m3);
|
||||
}
|
||||
|
||||
template<typename MatrixType> void lu_partial_piv()
|
||||
{
|
||||
/* this test covers the following files:
|
||||
PartialPivLU.h
|
||||
*/
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
|
||||
int rows = ei_random<int>(1,4);
|
||||
int cols = rows;
|
||||
|
||||
MatrixType m1(cols, rows);
|
||||
m1.setRandom();
|
||||
PartialPivLU<MatrixType> plu(m1);
|
||||
|
||||
VERIFY_IS_APPROX(m1, plu.reconstructedMatrix());
|
||||
}
|
||||
|
||||
template<typename MatrixType> void lu_verify_assert()
|
||||
{
|
||||
MatrixType tmp;
|
||||
@ -169,21 +192,23 @@ void test_lu()
|
||||
|
||||
CALL_SUBTEST_2( (lu_non_invertible<Matrix<double, 4, 6> >()) );
|
||||
CALL_SUBTEST_2( (lu_verify_assert<Matrix<double, 4, 6> >()) );
|
||||
|
||||
|
||||
CALL_SUBTEST_3( lu_non_invertible<MatrixXf>() );
|
||||
CALL_SUBTEST_3( lu_invertible<MatrixXf>() );
|
||||
CALL_SUBTEST_3( lu_verify_assert<MatrixXf>() );
|
||||
|
||||
|
||||
CALL_SUBTEST_4( lu_non_invertible<MatrixXd>() );
|
||||
CALL_SUBTEST_4( lu_invertible<MatrixXd>() );
|
||||
CALL_SUBTEST_4( lu_partial_piv<MatrixXd>() );
|
||||
CALL_SUBTEST_4( lu_verify_assert<MatrixXd>() );
|
||||
|
||||
|
||||
CALL_SUBTEST_5( lu_non_invertible<MatrixXcf>() );
|
||||
CALL_SUBTEST_5( lu_invertible<MatrixXcf>() );
|
||||
CALL_SUBTEST_5( lu_verify_assert<MatrixXcf>() );
|
||||
|
||||
|
||||
CALL_SUBTEST_6( lu_non_invertible<MatrixXcd>() );
|
||||
CALL_SUBTEST_6( lu_invertible<MatrixXcd>() );
|
||||
CALL_SUBTEST_6( lu_partial_piv<MatrixXcd>() );
|
||||
CALL_SUBTEST_6( lu_verify_assert<MatrixXcd>() );
|
||||
|
||||
CALL_SUBTEST_7(( lu_non_invertible<Matrix<float,Dynamic,16> >() ));
|
||||
|
59
test/main.h
59
test/main.h
@ -148,7 +148,7 @@ namespace Eigen
|
||||
|
||||
#define EIGEN_INTERNAL_DEBUGGING
|
||||
#define EIGEN_NICE_RANDOM
|
||||
#include <Eigen/QR> // required for createRandomMatrixOfRank
|
||||
#include <Eigen/QR> // required for createRandomPIMatrixOfRank
|
||||
|
||||
|
||||
#define VERIFY(a) do { if (!(a)) { \
|
||||
@ -157,6 +157,7 @@ namespace Eigen
|
||||
exit(2); \
|
||||
} } while (0)
|
||||
|
||||
#define VERIFY_IS_EQUAL(a, b) VERIFY(test_is_equal(a, b))
|
||||
#define VERIFY_IS_APPROX(a, b) VERIFY(test_ei_isApprox(a, b))
|
||||
#define VERIFY_IS_NOT_APPROX(a, b) VERIFY(!test_ei_isApprox(a, b))
|
||||
#define VERIFY_IS_MUCH_SMALLER_THAN(a, b) VERIFY(test_ei_isMuchSmallerThan(a, b))
|
||||
@ -342,8 +343,59 @@ inline bool test_isUnitary(const MatrixBase<Derived>& m)
|
||||
return m.isUnitary(test_precision<typename ei_traits<Derived>::Scalar>());
|
||||
}
|
||||
|
||||
template<typename Derived1, typename Derived2,
|
||||
bool IsVector = bool(Derived1::IsVectorAtCompileTime) && bool(Derived2::IsVectorAtCompileTime) >
|
||||
struct test_is_equal_impl
|
||||
{
|
||||
static bool run(const Derived1& a1, const Derived2& a2)
|
||||
{
|
||||
if(a1.size() != a2.size()) return false;
|
||||
// we evaluate a2 into a temporary of the shape of a1. this allows to let Assign.h handle the transposing if needed.
|
||||
typename Derived1::PlainObject a2_evaluated(a2);
|
||||
for(int i = 0; i < a1.size(); ++i)
|
||||
if(a1.coeff(i) != a2_evaluated.coeff(i)) return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct test_is_equal_impl<Derived1, Derived2, false>
|
||||
{
|
||||
static bool run(const Derived1& a1, const Derived2& a2)
|
||||
{
|
||||
if(a1.rows() != a2.rows()) return false;
|
||||
if(a1.cols() != a2.cols()) return false;
|
||||
for(int j = 0; j < a1.cols(); ++j)
|
||||
for(int i = 0; i < a1.rows(); ++i)
|
||||
if(a1.coeff(i,j) != a2.coeff(i,j)) return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
bool test_is_equal(const Derived1& a1, const Derived2& a2)
|
||||
{
|
||||
return test_is_equal_impl<Derived1, Derived2>::run(a1, a2);
|
||||
}
|
||||
|
||||
bool test_is_equal(const int actual, const int expected)
|
||||
{
|
||||
if (actual==expected)
|
||||
return true;
|
||||
// false:
|
||||
std::cerr
|
||||
<< std::endl << " actual = " << actual
|
||||
<< std::endl << " expected = " << expected << std::endl << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Creates a random Partial Isometry matrix of given rank.
|
||||
*
|
||||
* A partial isometry is a matrix all of whose singular values are either 0 or 1.
|
||||
* This is very useful to test rank-revealing algorithms.
|
||||
*/
|
||||
template<typename MatrixType>
|
||||
void createRandomMatrixOfRank(int desired_rank, int rows, int cols, MatrixType& m)
|
||||
void createRandomPIMatrixOfRank(int desired_rank, int rows, int cols, MatrixType& m)
|
||||
{
|
||||
typedef typename ei_traits<MatrixType>::Scalar Scalar;
|
||||
enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime };
|
||||
@ -360,7 +412,8 @@ void createRandomMatrixOfRank(int desired_rank, int rows, int cols, MatrixType&
|
||||
|
||||
if(desired_rank == 1)
|
||||
{
|
||||
m = VectorType::Random(rows) * VectorType::Random(cols).transpose();
|
||||
// here we normalize the vectors to get a partial isometry
|
||||
m = VectorType::Random(rows).normalized() * VectorType::Random(cols).normalized().transpose();
|
||||
return;
|
||||
}
|
||||
|
||||
|
14
test/map.cpp
14
test/map.cpp
@ -1,7 +1,7 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@ -42,8 +42,8 @@ template<typename VectorType> void map_class_vector(const VectorType& m)
|
||||
VectorType ma1 = Map<VectorType, Aligned>(array1, size);
|
||||
VectorType ma2 = Map<VectorType, Aligned>(array2, size);
|
||||
VectorType ma3 = Map<VectorType>(array3unaligned, size);
|
||||
VERIFY_IS_APPROX(ma1, ma2);
|
||||
VERIFY_IS_APPROX(ma1, ma3);
|
||||
VERIFY_IS_EQUAL(ma1, ma2);
|
||||
VERIFY_IS_EQUAL(ma1, ma3);
|
||||
VERIFY_RAISES_ASSERT((Map<VectorType,Aligned>(array3unaligned, size)));
|
||||
|
||||
ei_aligned_delete(array1, size);
|
||||
@ -70,9 +70,9 @@ template<typename MatrixType> void map_class_matrix(const MatrixType& m)
|
||||
Map<MatrixType>(array3unaligned, rows, cols) = Map<MatrixType>(array1, rows, cols);
|
||||
MatrixType ma1 = Map<MatrixType>(array1, rows, cols);
|
||||
MatrixType ma2 = Map<MatrixType, Aligned>(array2, rows, cols);
|
||||
VERIFY_IS_APPROX(ma1, ma2);
|
||||
VERIFY_IS_EQUAL(ma1, ma2);
|
||||
MatrixType ma3 = Map<MatrixType>(array3unaligned, rows, cols);
|
||||
VERIFY_IS_APPROX(ma1, ma3);
|
||||
VERIFY_IS_EQUAL(ma1, ma3);
|
||||
|
||||
ei_aligned_delete(array1, size);
|
||||
ei_aligned_delete(array2, size);
|
||||
@ -97,8 +97,8 @@ template<typename VectorType> void map_static_methods(const VectorType& m)
|
||||
VectorType ma1 = VectorType::Map(array1, size);
|
||||
VectorType ma2 = VectorType::MapAligned(array2, size);
|
||||
VectorType ma3 = VectorType::Map(array3unaligned, size);
|
||||
VERIFY_IS_APPROX(ma1, ma2);
|
||||
VERIFY_IS_APPROX(ma1, ma3);
|
||||
VERIFY_IS_EQUAL(ma1, ma2);
|
||||
VERIFY_IS_EQUAL(ma1, ma3);
|
||||
|
||||
ei_aligned_delete(array1, size);
|
||||
ei_aligned_delete(array2, size);
|
||||
|
139
test/mapstride.cpp
Normal file
139
test/mapstride.cpp
Normal file
@ -0,0 +1,139 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "main.h"
|
||||
|
||||
template<typename VectorType> void map_class_vector(const VectorType& m)
|
||||
{
|
||||
typedef typename VectorType::Scalar Scalar;
|
||||
|
||||
int size = m.size();
|
||||
|
||||
VectorType v = VectorType::Random(size);
|
||||
|
||||
int arraysize = 3*size;
|
||||
|
||||
Scalar* array = ei_aligned_new<Scalar>(arraysize);
|
||||
|
||||
{
|
||||
Map<VectorType, Aligned, InnerStride<3> > map(array, size);
|
||||
map = v;
|
||||
for(int i = 0; i < size; ++i)
|
||||
{
|
||||
VERIFY(array[3*i] == v[i]);
|
||||
VERIFY(map[i] == v[i]);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
Map<VectorType, Unaligned, InnerStride<Dynamic> > map(array, size, InnerStride<Dynamic>(2));
|
||||
map = v;
|
||||
for(int i = 0; i < size; ++i)
|
||||
{
|
||||
VERIFY(array[2*i] == v[i]);
|
||||
VERIFY(map[i] == v[i]);
|
||||
}
|
||||
}
|
||||
|
||||
ei_aligned_delete(array, arraysize);
|
||||
}
|
||||
|
||||
template<typename MatrixType> void map_class_matrix(const MatrixType& _m)
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
|
||||
int rows = _m.rows(), cols = _m.cols();
|
||||
|
||||
MatrixType m = MatrixType::Random(rows,cols);
|
||||
|
||||
int arraysize = 2*(rows+4)*(cols+4);
|
||||
|
||||
Scalar* array = ei_aligned_new<Scalar>(arraysize);
|
||||
|
||||
// test no inner stride and some dynamic outer stride
|
||||
{
|
||||
Map<MatrixType, Aligned, OuterStride<Dynamic> > map(array, rows, cols, OuterStride<Dynamic>(m.innerSize()+1));
|
||||
map = m;
|
||||
VERIFY(map.outerStride() == map.innerSize()+1);
|
||||
for(int i = 0; i < m.outerSize(); ++i)
|
||||
for(int j = 0; j < m.innerSize(); ++j)
|
||||
{
|
||||
VERIFY(array[map.outerStride()*i+j] == m.coeffByOuterInner(i,j));
|
||||
VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j));
|
||||
}
|
||||
}
|
||||
|
||||
// test no inner stride and an outer stride of +4. This is quite important as for fixed-size matrices,
|
||||
// this allows to hit the special case where it's vectorizable.
|
||||
{
|
||||
enum {
|
||||
InnerSize = MatrixType::InnerSizeAtCompileTime,
|
||||
OuterStrideAtCompileTime = InnerSize==Dynamic ? Dynamic : InnerSize+4
|
||||
};
|
||||
Map<MatrixType, Aligned, OuterStride<OuterStrideAtCompileTime> >
|
||||
map(array, rows, cols, OuterStride<OuterStrideAtCompileTime>(m.innerSize()+4));
|
||||
map = m;
|
||||
VERIFY(map.outerStride() == map.innerSize()+4);
|
||||
for(int i = 0; i < m.outerSize(); ++i)
|
||||
for(int j = 0; j < m.innerSize(); ++j)
|
||||
{
|
||||
VERIFY(array[map.outerStride()*i+j] == m.coeffByOuterInner(i,j));
|
||||
VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j));
|
||||
}
|
||||
}
|
||||
|
||||
// test both inner stride and outer stride
|
||||
{
|
||||
Map<MatrixType, Aligned, Stride<Dynamic,Dynamic> > map(array, rows, cols, Stride<Dynamic,Dynamic>(2*m.innerSize()+1, 2));
|
||||
map = m;
|
||||
VERIFY(map.outerStride() == 2*map.innerSize()+1);
|
||||
VERIFY(map.innerStride() == 2);
|
||||
for(int i = 0; i < m.outerSize(); ++i)
|
||||
for(int j = 0; j < m.innerSize(); ++j)
|
||||
{
|
||||
VERIFY(array[map.outerStride()*i+map.innerStride()*j] == m.coeffByOuterInner(i,j));
|
||||
VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j));
|
||||
}
|
||||
}
|
||||
|
||||
ei_aligned_delete(array, arraysize);
|
||||
}
|
||||
|
||||
void test_mapstride()
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
CALL_SUBTEST_1( map_class_vector(Matrix<float, 1, 1>()) );
|
||||
CALL_SUBTEST_2( map_class_vector(Vector4d()) );
|
||||
CALL_SUBTEST_3( map_class_vector(RowVector4f()) );
|
||||
CALL_SUBTEST_4( map_class_vector(VectorXcf(8)) );
|
||||
CALL_SUBTEST_5( map_class_vector(VectorXi(12)) );
|
||||
|
||||
CALL_SUBTEST_1( map_class_matrix(Matrix<float, 1, 1>()) );
|
||||
CALL_SUBTEST_2( map_class_matrix(Matrix4d()) );
|
||||
CALL_SUBTEST_3( map_class_matrix(Matrix<float,3,5>()) );
|
||||
CALL_SUBTEST_3( map_class_matrix(Matrix<float,4,8>()) );
|
||||
CALL_SUBTEST_4( map_class_matrix(MatrixXcf(ei_random<int>(1,10),ei_random<int>(1,10))) );
|
||||
CALL_SUBTEST_5( map_class_matrix(MatrixXi(5,5)));//ei_random<int>(1,10),ei_random<int>(1,10))) );
|
||||
}
|
||||
}
|
@ -78,7 +78,9 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
|
||||
|
||||
// check dot product
|
||||
vf.dot(vf);
|
||||
#if 0 // we get other compilation errors here than just static asserts
|
||||
VERIFY_RAISES_ASSERT(vd.dot(vf));
|
||||
#endif
|
||||
VERIFY_RAISES_ASSERT(vcf.dot(vf)); // yeah eventually we should allow this but i'm too lazy to make that change now in Dot.h
|
||||
// especially as that might be rewritten as cwise product .sum() which would make that automatic.
|
||||
|
||||
|
@ -32,7 +32,10 @@ template<typename T> T ei_negate(const T& x) { return -x; }
|
||||
template<typename Scalar> bool areApprox(const Scalar* a, const Scalar* b, int size)
|
||||
{
|
||||
for (int i=0; i<size; ++i)
|
||||
if (!ei_isApprox(a[i],b[i])) return false;
|
||||
if (!ei_isApprox(a[i],b[i])) {
|
||||
std::cout << "a[" << i << "]: " << a[i] << ", b[" << i << "]: " << b[i] << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,7 @@ template<typename MatrixType> void permutationmatrices(const MatrixType& m)
|
||||
typedef Matrix<int, Rows, 1> LeftPermutationVectorType;
|
||||
typedef PermutationMatrix<Cols> RightPermutationType;
|
||||
typedef Matrix<int, Cols, 1> RightPermutationVectorType;
|
||||
|
||||
|
||||
int rows = m.rows();
|
||||
int cols = m.cols();
|
||||
|
||||
@ -72,7 +72,7 @@ template<typename MatrixType> void permutationmatrices(const MatrixType& m)
|
||||
Matrix<Scalar,Cols,Cols> rm(rp);
|
||||
|
||||
VERIFY_IS_APPROX(m_permuted, lm*m_original*rm);
|
||||
|
||||
|
||||
VERIFY_IS_APPROX(lp.inverse()*m_permuted*rp.inverse(), m_original);
|
||||
VERIFY((lp*lp.inverse()).toDenseMatrix().isIdentity());
|
||||
|
||||
@ -86,6 +86,23 @@ template<typename MatrixType> void permutationmatrices(const MatrixType& m)
|
||||
identityp.setIdentity(rows);
|
||||
VERIFY_IS_APPROX(m_original, identityp*m_original);
|
||||
|
||||
// check inplace permutations
|
||||
m_permuted = m_original;
|
||||
m_permuted = lp.inverse() * m_permuted;
|
||||
VERIFY_IS_APPROX(m_permuted, lp.inverse()*m_original);
|
||||
|
||||
m_permuted = m_original;
|
||||
m_permuted = m_permuted * rp.inverse();
|
||||
VERIFY_IS_APPROX(m_permuted, m_original*rp.inverse());
|
||||
|
||||
m_permuted = m_original;
|
||||
m_permuted = lp * m_permuted;
|
||||
VERIFY_IS_APPROX(m_permuted, lp*m_original);
|
||||
|
||||
m_permuted = m_original;
|
||||
m_permuted = m_permuted * rp;
|
||||
VERIFY_IS_APPROX(m_permuted, m_original*rp);
|
||||
|
||||
if(rows>1 && cols>1)
|
||||
{
|
||||
lp2 = lp;
|
||||
@ -114,7 +131,7 @@ void test_permutationmatrices()
|
||||
CALL_SUBTEST_2( permutationmatrices(Matrix3f()) );
|
||||
CALL_SUBTEST_3( permutationmatrices(Matrix<double,3,3,RowMajor>()) );
|
||||
CALL_SUBTEST_4( permutationmatrices(Matrix4d()) );
|
||||
CALL_SUBTEST_5( permutationmatrices(Matrix<double,4,6>()) );
|
||||
CALL_SUBTEST_5( permutationmatrices(Matrix<double,40,60>()) );
|
||||
CALL_SUBTEST_6( permutationmatrices(Matrix<double,Dynamic,Dynamic,RowMajor>(20, 30)) );
|
||||
CALL_SUBTEST_7( permutationmatrices(MatrixXcf(15, 10)) );
|
||||
}
|
||||
|
@ -117,6 +117,7 @@ void test_qr()
|
||||
CALL_SUBTEST_3(( qr_fixedsize<Matrix<float,3,4>, 2 >() ));
|
||||
CALL_SUBTEST_4(( qr_fixedsize<Matrix<double,6,2>, 4 >() ));
|
||||
CALL_SUBTEST_5(( qr_fixedsize<Matrix<double,2,5>, 7 >() ));
|
||||
CALL_SUBTEST_11( qr(Matrix<float,1,1>()) );
|
||||
}
|
||||
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
|
@ -36,7 +36,7 @@ template<typename MatrixType> void qr()
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
|
||||
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
|
||||
MatrixType m1;
|
||||
createRandomMatrixOfRank(rank,rows,cols,m1);
|
||||
createRandomPIMatrixOfRank(rank,rows,cols,m1);
|
||||
ColPivHouseholderQR<MatrixType> qr(m1);
|
||||
VERIFY_IS_APPROX(rank, qr.rank());
|
||||
VERIFY(cols - qr.rank() == qr.dimensionOfKernel());
|
||||
@ -64,7 +64,7 @@ template<typename MatrixType, int Cols2> void qr_fixedsize()
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
int rank = ei_random<int>(1, std::min(int(Rows), int(Cols))-1);
|
||||
Matrix<Scalar,Rows,Cols> m1;
|
||||
createRandomMatrixOfRank(rank,Rows,Cols,m1);
|
||||
createRandomPIMatrixOfRank(rank,Rows,Cols,m1);
|
||||
ColPivHouseholderQR<Matrix<Scalar,Rows,Cols> > qr(m1);
|
||||
VERIFY_IS_APPROX(rank, qr.rank());
|
||||
VERIFY(Cols - qr.rank() == qr.dimensionOfKernel());
|
||||
|
@ -35,7 +35,7 @@ template<typename MatrixType> void qr()
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
|
||||
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
|
||||
MatrixType m1;
|
||||
createRandomMatrixOfRank(rank,rows,cols,m1);
|
||||
createRandomPIMatrixOfRank(rank,rows,cols,m1);
|
||||
FullPivHouseholderQR<MatrixType> qr(m1);
|
||||
VERIFY_IS_APPROX(rank, qr.rank());
|
||||
VERIFY(cols - qr.rank() == qr.dimensionOfKernel());
|
||||
|
@ -147,6 +147,9 @@ endif(NOT EIGEN_NO_UPDATE)
|
||||
|
||||
# which ctest command to use for running the dashboard
|
||||
SET (CTEST_COMMAND "${EIGEN_CMAKE_DIR}ctest -D ${EIGEN_MODE}")
|
||||
if($ENV{EIGEN_CTEST_ARGS})
|
||||
SET (CTEST_COMMAND "${CTEST_COMMAND} $ENV{EIGEN_CTEST_ARGS}")
|
||||
endif($ENV{EIGEN_CTEST_ARGS})
|
||||
# what cmake command to use for configuring this dashboard
|
||||
SET (CTEST_CMAKE_COMMAND "${EIGEN_CMAKE_DIR}cmake -DEIGEN_LEAVE_TEST_IN_ALL_TARGET=ON")
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#define EIGEN_DEBUG_ASSIGN
|
||||
#include "main.h"
|
||||
#include <typeinfo>
|
||||
|
||||
@ -33,6 +34,14 @@ bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
|
||||
&& ei_assign_traits<Dst,Src>::Unrolling==unrolling;
|
||||
}
|
||||
|
||||
template<typename Dst, typename Src>
|
||||
bool test_assign(int traversal, int unrolling)
|
||||
{
|
||||
ei_assign_traits<Dst,Src>::debug();
|
||||
return ei_assign_traits<Dst,Src>::Traversal==traversal
|
||||
&& ei_assign_traits<Dst,Src>::Unrolling==unrolling;
|
||||
}
|
||||
|
||||
template<typename Xpr>
|
||||
bool test_redux(const Xpr&, int traversal, int unrolling)
|
||||
{
|
||||
@ -86,6 +95,15 @@ void test_vectorization_logic()
|
||||
VERIFY(test_assign(MatrixXf(10,10),MatrixXf(20,20).block(10,10,2,3),
|
||||
SliceVectorizedTraversal,NoUnrolling));
|
||||
|
||||
VERIFY((test_assign<
|
||||
Map<Matrix<float,4,8>, Aligned, OuterStride<12> >,
|
||||
Matrix<float,4,8>
|
||||
>(InnerVectorizedTraversal,CompleteUnrolling)));
|
||||
|
||||
VERIFY((test_assign<
|
||||
Map<Matrix<float,4,8>, Aligned, InnerStride<12> >,
|
||||
Matrix<float,4,8>
|
||||
>(DefaultTraversal,CompleteUnrolling)));
|
||||
|
||||
VERIFY(test_redux(VectorXf(10),
|
||||
LinearVectorizedTraversal,NoUnrolling));
|
||||
|
@ -173,7 +173,7 @@ class FFT
|
||||
|
||||
template<typename InputDerived, typename ComplexDerived>
|
||||
inline
|
||||
void fwd( MatrixBase<ComplexDerived> & dst, const MatrixBase<InputDerived> & src)
|
||||
void fwd( MatrixBase<ComplexDerived> & dst, const MatrixBase<InputDerived> & src,int nfft=-1)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
|
||||
@ -183,16 +183,25 @@ class FFT
|
||||
EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
|
||||
|
||||
if ( NumTraits< typename InputDerived::Scalar >::IsComplex == 0 && HasFlag(HalfSpectrum) )
|
||||
dst.derived().resize( (src.size()>>1)+1);
|
||||
else
|
||||
dst.derived().resize(src.size());
|
||||
if (nfft<1)
|
||||
nfft = src.size();
|
||||
|
||||
if (src.stride() != 1) {
|
||||
Matrix<typename InputDerived::Scalar,1,Dynamic> tmp = src;
|
||||
fwd( &dst[0],&tmp[0],src.size() );
|
||||
if ( NumTraits< typename InputDerived::Scalar >::IsComplex == 0 && HasFlag(HalfSpectrum) )
|
||||
dst.derived().resize( (nfft>>1)+1);
|
||||
else
|
||||
dst.derived().resize(nfft);
|
||||
|
||||
if ( src.stride() != 1 || src.size() < nfft ) {
|
||||
Matrix<typename InputDerived::Scalar,1,Dynamic> tmp;
|
||||
if (src.size()<nfft) {
|
||||
tmp.setZero(nfft);
|
||||
tmp.block(0,0,src.size(),1 ) = src;
|
||||
}else{
|
||||
tmp = src;
|
||||
}
|
||||
fwd( &dst[0],&tmp[0],nfft );
|
||||
}else{
|
||||
fwd( &dst[0],&src[0],src.size() );
|
||||
fwd( &dst[0],&src[0],nfft );
|
||||
}
|
||||
}
|
||||
|
||||
@ -216,25 +225,60 @@ class FFT
|
||||
inline
|
||||
void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, int nfft=-1)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time
|
||||
EIGEN_STATIC_ASSERT((ei_is_same_type<typename ComplexDerived::Scalar, Complex>::ret),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
|
||||
typedef typename ComplexDerived::Scalar src_type;
|
||||
typedef typename OutputDerived::Scalar dst_type;
|
||||
const bool realfft= (NumTraits<dst_type>::IsComplex == 0);
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time
|
||||
EIGEN_STATIC_ASSERT((ei_is_same_type<src_type, Complex>::ret),
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
|
||||
|
||||
if (nfft<1) {
|
||||
nfft = ( NumTraits<typename OutputDerived::Scalar>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size();
|
||||
}
|
||||
dst.derived().resize( nfft );
|
||||
if (src.stride() != 1) {
|
||||
// if the vector is strided, then we need to copy it to a packed temporary
|
||||
Matrix<typename ComplexDerived::Scalar,1,Dynamic> tmp = src;
|
||||
inv( &dst[0],&tmp[0], nfft);
|
||||
if (nfft<1) { //automatic FFT size determination
|
||||
if ( realfft && HasFlag(HalfSpectrum) )
|
||||
nfft = 2*(src.size()-1); //assume even fft size
|
||||
else
|
||||
nfft = src.size();
|
||||
}
|
||||
dst.derived().resize( nfft );
|
||||
|
||||
// check for nfft that does not fit the input data size
|
||||
int resize_input= ( realfft && HasFlag(HalfSpectrum) )
|
||||
? ( (nfft/2+1) - src.size() )
|
||||
: ( nfft - src.size() );
|
||||
|
||||
if ( src.stride() != 1 || resize_input ) {
|
||||
// if the vector is strided, then we need to copy it to a packed temporary
|
||||
Matrix<src_type,1,Dynamic> tmp;
|
||||
if ( resize_input ) {
|
||||
size_t ncopy = min(src.size(),src.size() + resize_input);
|
||||
tmp.setZero(src.size() + resize_input);
|
||||
if ( realfft && HasFlag(HalfSpectrum) ) {
|
||||
// pad at the Nyquist bin
|
||||
tmp.head(ncopy) = src.head(ncopy);
|
||||
tmp(ncopy-1) = real(tmp(ncopy-1)); // enforce real-only Nyquist bin
|
||||
}else{
|
||||
size_t nhead,ntail;
|
||||
nhead = 1+ncopy/2-1; // range [0:pi)
|
||||
ntail = ncopy/2-1; // range (-pi:0)
|
||||
tmp.head(nhead) = src.head(nhead);
|
||||
tmp.tail(ntail) = src.tail(ntail);
|
||||
if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it
|
||||
tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*src_type(.5);
|
||||
}else{ // expanding -- split the old Nyquist bin into two halves
|
||||
tmp(nhead) = src(nhead) * src_type(.5);
|
||||
tmp(tmp.size()-nhead) = tmp(nhead);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
inv( &dst[0],&src[0], nfft);
|
||||
tmp = src;
|
||||
}
|
||||
inv( &dst[0],&tmp[0], nfft);
|
||||
}else{
|
||||
inv( &dst[0],&src[0], nfft);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename _Output>
|
||||
|
@ -178,9 +178,9 @@ class MatrixFunction<MatrixType, 1>
|
||||
*
|
||||
* This is morally a \c static \c const \c Scalar, but only
|
||||
* integers can be static constant class members in C++. The
|
||||
* separation constant is set to 0.01, a value taken from the
|
||||
* separation constant is set to 0.1, a value taken from the
|
||||
* paper by Davies and Higham. */
|
||||
static const RealScalar separation() { return static_cast<RealScalar>(0.01); }
|
||||
static const RealScalar separation() { return static_cast<RealScalar>(0.1); }
|
||||
};
|
||||
|
||||
/** \brief Constructor.
|
||||
@ -492,14 +492,12 @@ typename MatrixFunction<MatrixType,1>::DynMatrixType MatrixFunction<MatrixType,1
|
||||
template<typename Derived> class MatrixFunctionReturnValue
|
||||
: public ReturnByValue<MatrixFunctionReturnValue<Derived> >
|
||||
{
|
||||
private:
|
||||
public:
|
||||
|
||||
typedef typename ei_traits<Derived>::Scalar Scalar;
|
||||
typedef typename ei_stem_function<Scalar>::type StemFunction;
|
||||
|
||||
public:
|
||||
|
||||
/** \brief Constructor.
|
||||
/** \brief Constructor.
|
||||
*
|
||||
* \param[in] A %Matrix (expression) forming the argument of the
|
||||
* matrix function.
|
||||
|
@ -194,6 +194,8 @@ template<typename FunctorType, typename Scalar>
|
||||
HybridNonLinearSolverSpace::Status
|
||||
HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(FVectorType &x)
|
||||
{
|
||||
assert(x.size()==n); // check the caller is not cheating us
|
||||
|
||||
int j;
|
||||
std::vector<PlanarRotation<Scalar> > v_givens(n), w_givens(n);
|
||||
|
||||
@ -350,6 +352,8 @@ HybridNonLinearSolverSpace::Status
|
||||
HybridNonLinearSolver<FunctorType,Scalar>::solve(FVectorType &x)
|
||||
{
|
||||
HybridNonLinearSolverSpace::Status status = solveInit(x);
|
||||
if (status==HybridNonLinearSolverSpace::ImproperInputParameters)
|
||||
return status;
|
||||
while (status==HybridNonLinearSolverSpace::Running)
|
||||
status = solveOneStep(x);
|
||||
return status;
|
||||
@ -429,6 +433,8 @@ template<typename FunctorType, typename Scalar>
|
||||
HybridNonLinearSolverSpace::Status
|
||||
HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(FVectorType &x)
|
||||
{
|
||||
assert(x.size()==n); // check the caller is not cheating us
|
||||
|
||||
int j;
|
||||
std::vector<PlanarRotation<Scalar> > v_givens(n), w_givens(n);
|
||||
|
||||
@ -587,6 +593,8 @@ HybridNonLinearSolverSpace::Status
|
||||
HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiff(FVectorType &x)
|
||||
{
|
||||
HybridNonLinearSolverSpace::Status status = solveNumericalDiffInit(x);
|
||||
if (status==HybridNonLinearSolverSpace::ImproperInputParameters)
|
||||
return status;
|
||||
while (status==HybridNonLinearSolverSpace::Running)
|
||||
status = solveNumericalDiffOneStep(x);
|
||||
return status;
|
||||
|
@ -161,6 +161,8 @@ LevenbergMarquardtSpace::Status
|
||||
LevenbergMarquardt<FunctorType,Scalar>::minimize(FVectorType &x)
|
||||
{
|
||||
LevenbergMarquardtSpace::Status status = minimizeInit(x);
|
||||
if (status==LevenbergMarquardtSpace::ImproperInputParameters)
|
||||
return status;
|
||||
do {
|
||||
status = minimizeOneStep(x);
|
||||
} while (status==LevenbergMarquardtSpace::Running);
|
||||
@ -214,7 +216,7 @@ template<typename FunctorType, typename Scalar>
|
||||
LevenbergMarquardtSpace::Status
|
||||
LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(FVectorType &x)
|
||||
{
|
||||
int j;
|
||||
assert(x.size()==n); // check the caller is not cheating us
|
||||
|
||||
/* calculate the jacobian matrix. */
|
||||
int df_ret = functor.df(x, fjac);
|
||||
@ -235,7 +237,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(FVectorType &x)
|
||||
/* to the norms of the columns of the initial jacobian. */
|
||||
if (iter == 1) {
|
||||
if (!useExternalScaling)
|
||||
for (j = 0; j < n; ++j)
|
||||
for (int j = 0; j < n; ++j)
|
||||
diag[j] = (wa2[j]==0.)? 1. : wa2[j];
|
||||
|
||||
/* on the first iteration, calculate the norm of the scaled x */
|
||||
@ -255,7 +257,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(FVectorType &x)
|
||||
/* compute the norm of the scaled gradient. */
|
||||
gnorm = 0.;
|
||||
if (fnorm != 0.)
|
||||
for (j = 0; j < n; ++j)
|
||||
for (int j = 0; j < n; ++j)
|
||||
if (wa2[permutation.indices()[j]] != 0.)
|
||||
gnorm = std::max(gnorm, ei_abs( fjac.col(j).head(j+1).dot(qtf.head(j+1)/fnorm) / wa2[permutation.indices()[j]]));
|
||||
|
||||
@ -431,6 +433,8 @@ template<typename FunctorType, typename Scalar>
|
||||
LevenbergMarquardtSpace::Status
|
||||
LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(FVectorType &x)
|
||||
{
|
||||
assert(x.size()==n); // check the caller is not cheating us
|
||||
|
||||
int i, j;
|
||||
bool sing;
|
||||
|
||||
@ -606,6 +610,8 @@ LevenbergMarquardtSpace::Status
|
||||
LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorage(FVectorType &x)
|
||||
{
|
||||
LevenbergMarquardtSpace::Status status = minimizeOptimumStorageInit(x);
|
||||
if (status==LevenbergMarquardtSpace::ImproperInputParameters)
|
||||
return status;
|
||||
do {
|
||||
status = minimizeOptimumStorageOneStep(x);
|
||||
} while (status==LevenbergMarquardtSpace::Running);
|
||||
|
@ -18,5 +18,5 @@ int main()
|
||||
|
||||
std::cout << "The matrix A is:\n" << A << "\n\n";
|
||||
std::cout << "The matrix exponential of A is:\n"
|
||||
<< ei_matrix_function(A, expfn) << "\n\n";
|
||||
<< ei_matrix_function(A, expfn) << "\n\n";
|
||||
}
|
||||
|
@ -172,9 +172,9 @@ void testLmder1()
|
||||
info = lm.lmder1(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY(lm.nfev==6);
|
||||
VERIFY(lm.njev==5);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 6);
|
||||
VERIFY_IS_EQUAL(lm.njev, 5);
|
||||
|
||||
// check norm
|
||||
VERIFY_IS_APPROX(lm.fvec.blueNorm(), 0.09063596);
|
||||
@ -201,9 +201,9 @@ void testLmder()
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return values
|
||||
VERIFY( 1 == info);
|
||||
VERIFY(lm.nfev==6);
|
||||
VERIFY(lm.njev==5);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 6);
|
||||
VERIFY_IS_EQUAL(lm.njev, 5);
|
||||
|
||||
// check norm
|
||||
fnorm = lm.fvec.blueNorm();
|
||||
@ -286,9 +286,9 @@ void testHybrj1()
|
||||
info = solver.hybrj1(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY(solver.nfev==11);
|
||||
VERIFY(solver.njev==1);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(solver.nfev, 11);
|
||||
VERIFY_IS_EQUAL(solver.njev, 1);
|
||||
|
||||
// check norm
|
||||
VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
|
||||
@ -321,9 +321,9 @@ void testHybrj()
|
||||
info = solver.solve(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY(solver.nfev==11);
|
||||
VERIFY(solver.njev==1);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(solver.nfev, 11);
|
||||
VERIFY_IS_EQUAL(solver.njev, 1);
|
||||
|
||||
// check norm
|
||||
VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
|
||||
@ -375,8 +375,8 @@ void testHybrd1()
|
||||
info = solver.hybrd1(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY(solver.nfev==20);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(solver.nfev, 20);
|
||||
|
||||
// check norm
|
||||
VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
|
||||
@ -406,8 +406,8 @@ void testHybrd()
|
||||
info = solver.solveNumericalDiff(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY(solver.nfev==14);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(solver.nfev, 14);
|
||||
|
||||
// check norm
|
||||
VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
|
||||
@ -477,9 +477,9 @@ void testLmstr1()
|
||||
info = lm.lmstr1(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY(lm.nfev==6);
|
||||
VERIFY(lm.njev==5);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 6);
|
||||
VERIFY_IS_EQUAL(lm.njev, 5);
|
||||
|
||||
// check norm
|
||||
VERIFY_IS_APPROX(lm.fvec.blueNorm(), 0.09063596);
|
||||
@ -506,9 +506,9 @@ void testLmstr()
|
||||
info = lm.minimizeOptimumStorage(x);
|
||||
|
||||
// check return values
|
||||
VERIFY( 1 == info);
|
||||
VERIFY(lm.nfev==6);
|
||||
VERIFY(lm.njev==5);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 6);
|
||||
VERIFY_IS_EQUAL(lm.njev, 5);
|
||||
|
||||
// check norm
|
||||
fnorm = lm.fvec.blueNorm();
|
||||
@ -562,8 +562,8 @@ void testLmdif1()
|
||||
info = LevenbergMarquardt<lmdif_functor>::lmdif1(functor, x, &nfev);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY(nfev==26);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(nfev, 26);
|
||||
|
||||
// check norm
|
||||
functor(x, fvec);
|
||||
@ -593,8 +593,8 @@ void testLmdif()
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return values
|
||||
VERIFY( 1 == info);
|
||||
VERIFY(lm.nfev==26);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 26);
|
||||
|
||||
// check norm
|
||||
fnorm = lm.fvec.blueNorm();
|
||||
@ -678,9 +678,9 @@ void testNistChwirut2(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 10 == lm.nfev);
|
||||
VERIFY( 8 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 10);
|
||||
VERIFY_IS_EQUAL(lm.njev, 8);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02);
|
||||
// check x
|
||||
@ -699,9 +699,9 @@ void testNistChwirut2(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 7 == lm.nfev);
|
||||
VERIFY( 6 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 7);
|
||||
VERIFY_IS_EQUAL(lm.njev, 6);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02);
|
||||
// check x
|
||||
@ -758,9 +758,9 @@ void testNistMisra1a(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 19 == lm.nfev);
|
||||
VERIFY( 15 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 19);
|
||||
VERIFY_IS_EQUAL(lm.njev, 15);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01);
|
||||
// check x
|
||||
@ -775,9 +775,9 @@ void testNistMisra1a(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 5 == lm.nfev);
|
||||
VERIFY( 4 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 5);
|
||||
VERIFY_IS_EQUAL(lm.njev, 4);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01);
|
||||
// check x
|
||||
@ -844,19 +844,19 @@ void testNistHahn1(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 11== lm.nfev);
|
||||
VERIFY( 10== lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 11);
|
||||
VERIFY_IS_EQUAL(lm.njev, 10);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00);
|
||||
// check x
|
||||
VERIFY_IS_APPROX(x[0], 1.0776351733E+00 );
|
||||
VERIFY_IS_APPROX(x[1],-1.2269296921E-01 );
|
||||
VERIFY_IS_APPROX(x[2], 4.0863750610E-03 );
|
||||
VERIFY_IS_APPROX(x[0], 1.0776351733E+00);
|
||||
VERIFY_IS_APPROX(x[1],-1.2269296921E-01);
|
||||
VERIFY_IS_APPROX(x[2], 4.0863750610E-03);
|
||||
VERIFY_IS_APPROX(x[3],-1.426264e-06); // shoulde be : -1.4262662514E-06
|
||||
VERIFY_IS_APPROX(x[4],-5.7609940901E-03 );
|
||||
VERIFY_IS_APPROX(x[5], 2.4053735503E-04 );
|
||||
VERIFY_IS_APPROX(x[6],-1.2314450199E-07 );
|
||||
VERIFY_IS_APPROX(x[4],-5.7609940901E-03);
|
||||
VERIFY_IS_APPROX(x[5], 2.4053735503E-04);
|
||||
VERIFY_IS_APPROX(x[6],-1.2314450199E-07);
|
||||
|
||||
/*
|
||||
* Second try
|
||||
@ -866,9 +866,9 @@ void testNistHahn1(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 11 == lm.nfev);
|
||||
VERIFY( 10 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 11);
|
||||
VERIFY_IS_EQUAL(lm.njev, 10);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00);
|
||||
// check x
|
||||
@ -876,7 +876,7 @@ void testNistHahn1(void)
|
||||
VERIFY_IS_APPROX(x[1], -0.1226933); // should be : -1.2269296921E-01
|
||||
VERIFY_IS_APPROX(x[2], 0.004086383); // should be : 4.0863750610E-03
|
||||
VERIFY_IS_APPROX(x[3], -1.426277e-06); // shoulde be : -1.4262662514E-06
|
||||
VERIFY_IS_APPROX(x[4],-5.7609940901E-03 );
|
||||
VERIFY_IS_APPROX(x[4],-5.7609940901E-03);
|
||||
VERIFY_IS_APPROX(x[5], 0.00024053772); // should be : 2.4053735503E-04
|
||||
VERIFY_IS_APPROX(x[6], -1.231450e-07); // should be : -1.2314450199E-07
|
||||
|
||||
@ -930,9 +930,9 @@ void testNistMisra1d(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 3 == info);
|
||||
VERIFY( 9 == lm.nfev);
|
||||
VERIFY( 7 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 3);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 9);
|
||||
VERIFY_IS_EQUAL(lm.njev, 7);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02);
|
||||
// check x
|
||||
@ -947,9 +947,9 @@ void testNistMisra1d(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 4 == lm.nfev);
|
||||
VERIFY( 3 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 4);
|
||||
VERIFY_IS_EQUAL(lm.njev, 3);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02);
|
||||
// check x
|
||||
@ -1008,18 +1008,18 @@ void testNistLanczos1(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 2 == info);
|
||||
VERIFY( 79 == lm.nfev);
|
||||
VERIFY( 72 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 2);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 79);
|
||||
VERIFY_IS_EQUAL(lm.njev, 72);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430899764097e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
|
||||
// check x
|
||||
VERIFY_IS_APPROX(x[0], 9.5100000027E-02 );
|
||||
VERIFY_IS_APPROX(x[1], 1.0000000001E+00 );
|
||||
VERIFY_IS_APPROX(x[2], 8.6070000013E-01 );
|
||||
VERIFY_IS_APPROX(x[3], 3.0000000002E+00 );
|
||||
VERIFY_IS_APPROX(x[4], 1.5575999998E+00 );
|
||||
VERIFY_IS_APPROX(x[5], 5.0000000001E+00 );
|
||||
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
||||
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
|
||||
VERIFY_IS_APPROX(x[2], 8.6070000013E-01);
|
||||
VERIFY_IS_APPROX(x[3], 3.0000000002E+00);
|
||||
VERIFY_IS_APPROX(x[4], 1.5575999998E+00);
|
||||
VERIFY_IS_APPROX(x[5], 5.0000000001E+00);
|
||||
|
||||
/*
|
||||
* Second try
|
||||
@ -1029,18 +1029,18 @@ void testNistLanczos1(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 2 == info);
|
||||
VERIFY( 9 == lm.nfev);
|
||||
VERIFY( 8 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 2);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 9);
|
||||
VERIFY_IS_EQUAL(lm.njev, 8);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.428595533845e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
|
||||
// check x
|
||||
VERIFY_IS_APPROX(x[0], 9.5100000027E-02 );
|
||||
VERIFY_IS_APPROX(x[1], 1.0000000001E+00 );
|
||||
VERIFY_IS_APPROX(x[2], 8.6070000013E-01 );
|
||||
VERIFY_IS_APPROX(x[3], 3.0000000002E+00 );
|
||||
VERIFY_IS_APPROX(x[4], 1.5575999998E+00 );
|
||||
VERIFY_IS_APPROX(x[5], 5.0000000001E+00 );
|
||||
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
||||
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
|
||||
VERIFY_IS_APPROX(x[2], 8.6070000013E-01);
|
||||
VERIFY_IS_APPROX(x[3], 3.0000000002E+00);
|
||||
VERIFY_IS_APPROX(x[4], 1.5575999998E+00);
|
||||
VERIFY_IS_APPROX(x[5], 5.0000000001E+00);
|
||||
|
||||
}
|
||||
|
||||
@ -1094,9 +1094,9 @@ void testNistRat42(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 10 == lm.nfev);
|
||||
VERIFY( 8 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 10);
|
||||
VERIFY_IS_EQUAL(lm.njev, 8);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00);
|
||||
// check x
|
||||
@ -1112,9 +1112,9 @@ void testNistRat42(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 6 == lm.nfev);
|
||||
VERIFY( 5 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 6);
|
||||
VERIFY_IS_EQUAL(lm.njev, 5);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00);
|
||||
// check x
|
||||
@ -1172,9 +1172,9 @@ void testNistMGH10(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 2 == info);
|
||||
VERIFY( 284 == lm.nfev);
|
||||
VERIFY( 249 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 2);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 284 );
|
||||
VERIFY_IS_EQUAL(lm.njev, 249 );
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01);
|
||||
// check x
|
||||
@ -1190,9 +1190,9 @@ void testNistMGH10(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 3 == info);
|
||||
VERIFY( 126 == lm.nfev);
|
||||
VERIFY( 116 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 3);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 126);
|
||||
VERIFY_IS_EQUAL(lm.njev, 116);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01);
|
||||
// check x
|
||||
@ -1251,9 +1251,9 @@ void testNistBoxBOD(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 31 == lm.nfev);
|
||||
VERIFY( 25 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 31);
|
||||
VERIFY_IS_EQUAL(lm.njev, 25);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
|
||||
// check x
|
||||
@ -1271,9 +1271,9 @@ void testNistBoxBOD(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 15 == lm.nfev);
|
||||
VERIFY( 14 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 15 );
|
||||
VERIFY_IS_EQUAL(lm.njev, 14 );
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
|
||||
// check x
|
||||
@ -1333,9 +1333,9 @@ void testNistMGH17(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 2 == info);
|
||||
VERIFY( 602 == lm.nfev);
|
||||
VERIFY( 545 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 2);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 602 );
|
||||
VERIFY_IS_EQUAL(lm.njev, 545 );
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
|
||||
// check x
|
||||
@ -1354,9 +1354,9 @@ void testNistMGH17(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 18 == lm.nfev);
|
||||
VERIFY( 15 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 18);
|
||||
VERIFY_IS_EQUAL(lm.njev, 15);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
|
||||
// check x
|
||||
@ -1420,9 +1420,9 @@ void testNistMGH09(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 490 == lm.nfev);
|
||||
VERIFY( 376 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 490 );
|
||||
VERIFY_IS_EQUAL(lm.njev, 376 );
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04);
|
||||
// check x
|
||||
@ -1440,9 +1440,9 @@ void testNistMGH09(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 18 == lm.nfev);
|
||||
VERIFY( 16 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 18);
|
||||
VERIFY_IS_EQUAL(lm.njev, 16);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04);
|
||||
// check x
|
||||
@ -1503,9 +1503,9 @@ void testNistBennett5(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 758 == lm.nfev);
|
||||
VERIFY( 744 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 758);
|
||||
VERIFY_IS_EQUAL(lm.njev, 744);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04);
|
||||
// check x
|
||||
@ -1521,9 +1521,9 @@ void testNistBennett5(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 203 == lm.nfev);
|
||||
VERIFY( 192 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 203);
|
||||
VERIFY_IS_EQUAL(lm.njev, 192);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04);
|
||||
// check x
|
||||
@ -1591,9 +1591,9 @@ void testNistThurber(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 39 == lm.nfev);
|
||||
VERIFY( 36== lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 39);
|
||||
VERIFY_IS_EQUAL(lm.njev, 36);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03);
|
||||
// check x
|
||||
@ -1616,9 +1616,9 @@ void testNistThurber(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 29 == lm.nfev);
|
||||
VERIFY( 28 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 29);
|
||||
VERIFY_IS_EQUAL(lm.njev, 28);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03);
|
||||
// check x
|
||||
@ -1683,9 +1683,9 @@ void testNistRat43(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 27 == lm.nfev);
|
||||
VERIFY( 20 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 27);
|
||||
VERIFY_IS_EQUAL(lm.njev, 20);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03);
|
||||
// check x
|
||||
@ -1705,9 +1705,9 @@ void testNistRat43(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 9 == lm.nfev);
|
||||
VERIFY( 8 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 9);
|
||||
VERIFY_IS_EQUAL(lm.njev, 8);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03);
|
||||
// check x
|
||||
@ -1768,9 +1768,9 @@ void testNistEckerle4(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 18 == lm.nfev);
|
||||
VERIFY( 15 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 18);
|
||||
VERIFY_IS_EQUAL(lm.njev, 15);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03);
|
||||
// check x
|
||||
@ -1786,9 +1786,9 @@ void testNistEckerle4(void)
|
||||
info = lm.minimize(x);
|
||||
|
||||
// check return value
|
||||
VERIFY( 1 == info);
|
||||
VERIFY( 7 == lm.nfev);
|
||||
VERIFY( 6 == lm.njev);
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 7);
|
||||
VERIFY_IS_EQUAL(lm.njev, 6);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03);
|
||||
// check x
|
||||
|
@ -133,7 +133,7 @@ void randomTest(const MatrixType& m, double tol)
|
||||
m1 = MatrixType::Random(rows, cols);
|
||||
|
||||
m2 = ei_matrix_function(m1, expfn) * ei_matrix_function(-m1, expfn);
|
||||
std::cout << "randomTest: error funm = " << relerr(identity, m2 * m3);
|
||||
std::cout << "randomTest: error funm = " << relerr(identity, m2);
|
||||
VERIFY(identity.isApprox(m2, static_cast<RealScalar>(tol)));
|
||||
|
||||
m2 = ei_matrix_exponential(m1) * ei_matrix_exponential(-m1);
|
||||
|
@ -25,6 +25,17 @@
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/MatrixFunctions>
|
||||
|
||||
// Variant of VERIFY_IS_APPROX which uses absolute error instead of
|
||||
// relative error.
|
||||
#define VERIFY_IS_APPROX_ABS(a, b) VERIFY(test_isApprox_abs(a, b))
|
||||
|
||||
template<typename Type1, typename Type2>
|
||||
inline bool test_isApprox_abs(const Type1& a, const Type2& b)
|
||||
{
|
||||
return ((a-b).array().abs() < test_precision<typename Type1::RealScalar>()).all();
|
||||
}
|
||||
|
||||
|
||||
// Returns a matrix with eigenvalues clustered around 0, 1 and 2.
|
||||
template<typename MatrixType>
|
||||
MatrixType randomMatrixWithRealEivals(const int size)
|
||||
@ -37,7 +48,8 @@ MatrixType randomMatrixWithRealEivals(const int size)
|
||||
+ ei_random<Scalar>() * Scalar(RealScalar(0.01));
|
||||
}
|
||||
MatrixType A = MatrixType::Random(size, size);
|
||||
return A.inverse() * diag * A;
|
||||
HouseholderQR<MatrixType> QRofA(A);
|
||||
return QRofA.householderQ().inverse() * diag * QRofA.householderQ();
|
||||
}
|
||||
|
||||
template <typename MatrixType, int IsComplex = NumTraits<typename ei_traits<MatrixType>::Scalar>::IsComplex>
|
||||
@ -69,7 +81,8 @@ struct randomMatrixWithImagEivals<MatrixType, 0>
|
||||
}
|
||||
}
|
||||
MatrixType A = MatrixType::Random(size, size);
|
||||
return A.inverse() * diag * A;
|
||||
HouseholderQR<MatrixType> QRofA(A);
|
||||
return QRofA.householderQ().inverse() * diag * QRofA.householderQ();
|
||||
}
|
||||
};
|
||||
|
||||
@ -88,10 +101,12 @@ struct randomMatrixWithImagEivals<MatrixType, 1>
|
||||
+ ei_random<Scalar>() * Scalar(RealScalar(0.01));
|
||||
}
|
||||
MatrixType A = MatrixType::Random(size, size);
|
||||
return A.inverse() * diag * A;
|
||||
HouseholderQR<MatrixType> QRofA(A);
|
||||
return QRofA.householderQ().inverse() * diag * QRofA.householderQ();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename MatrixType>
|
||||
void testMatrixExponential(const MatrixType& A)
|
||||
{
|
||||
@ -99,50 +114,45 @@ void testMatrixExponential(const MatrixType& A)
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef std::complex<RealScalar> ComplexScalar;
|
||||
|
||||
for (int i = 0; i < g_repeat; i++) {
|
||||
VERIFY_IS_APPROX(ei_matrix_exponential(A),
|
||||
ei_matrix_function(A, StdStemFunctions<ComplexScalar>::exp));
|
||||
}
|
||||
VERIFY_IS_APPROX(ei_matrix_exponential(A),
|
||||
ei_matrix_function(A, StdStemFunctions<ComplexScalar>::exp));
|
||||
}
|
||||
|
||||
template<typename MatrixType>
|
||||
void testHyperbolicFunctions(const MatrixType& A)
|
||||
{
|
||||
for (int i = 0; i < g_repeat; i++) {
|
||||
MatrixType sinhA = ei_matrix_sinh(A);
|
||||
MatrixType coshA = ei_matrix_cosh(A);
|
||||
MatrixType expA = ei_matrix_exponential(A);
|
||||
VERIFY_IS_APPROX(sinhA, (expA - expA.inverse())/2);
|
||||
VERIFY_IS_APPROX(coshA, (expA + expA.inverse())/2);
|
||||
}
|
||||
// Need to use absolute error because of possible cancellation when
|
||||
// adding/subtracting expA and expmA.
|
||||
MatrixType expA = ei_matrix_exponential(A);
|
||||
MatrixType expmA = ei_matrix_exponential(-A);
|
||||
VERIFY_IS_APPROX_ABS(ei_matrix_sinh(A), (expA - expmA) / 2);
|
||||
VERIFY_IS_APPROX_ABS(ei_matrix_cosh(A), (expA + expmA) / 2);
|
||||
}
|
||||
|
||||
template<typename MatrixType>
|
||||
void testGonioFunctions(const MatrixType& A)
|
||||
{
|
||||
typedef ei_traits<MatrixType> Traits;
|
||||
typedef typename Traits::Scalar Scalar;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef std::complex<RealScalar> ComplexScalar;
|
||||
typedef Matrix<ComplexScalar, Traits::RowsAtCompileTime,
|
||||
Traits::ColsAtCompileTime, MatrixType::Options> ComplexMatrix;
|
||||
typedef Matrix<ComplexScalar, MatrixType::RowsAtCompileTime,
|
||||
MatrixType::ColsAtCompileTime, MatrixType::Options> ComplexMatrix;
|
||||
|
||||
ComplexScalar imagUnit(0,1);
|
||||
ComplexScalar two(2,0);
|
||||
|
||||
for (int i = 0; i < g_repeat; i++) {
|
||||
ComplexMatrix Ac = A.template cast<ComplexScalar>();
|
||||
|
||||
ComplexMatrix exp_iA = ei_matrix_exponential(imagUnit * Ac);
|
||||
|
||||
MatrixType sinA = ei_matrix_sin(A);
|
||||
ComplexMatrix sinAc = sinA.template cast<ComplexScalar>();
|
||||
VERIFY_IS_APPROX(sinAc, (exp_iA - exp_iA.inverse()) / (two*imagUnit));
|
||||
|
||||
MatrixType cosA = ei_matrix_cos(A);
|
||||
ComplexMatrix cosAc = cosA.template cast<ComplexScalar>();
|
||||
VERIFY_IS_APPROX(cosAc, (exp_iA + exp_iA.inverse()) / 2);
|
||||
}
|
||||
ComplexMatrix Ac = A.template cast<ComplexScalar>();
|
||||
|
||||
ComplexMatrix exp_iA = ei_matrix_exponential(imagUnit * Ac);
|
||||
ComplexMatrix exp_miA = ei_matrix_exponential(-imagUnit * Ac);
|
||||
|
||||
MatrixType sinA = ei_matrix_sin(A);
|
||||
ComplexMatrix sinAc = sinA.template cast<ComplexScalar>();
|
||||
VERIFY_IS_APPROX_ABS(sinAc, (exp_iA - exp_miA) / (two*imagUnit));
|
||||
|
||||
MatrixType cosA = ei_matrix_cos(A);
|
||||
ComplexMatrix cosAc = cosA.template cast<ComplexScalar>();
|
||||
VERIFY_IS_APPROX_ABS(cosAc, (exp_iA + exp_miA) / 2);
|
||||
}
|
||||
|
||||
template<typename MatrixType>
|
||||
|
Loading…
x
Reference in New Issue
Block a user