mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-20 16:49:38 +08:00
756 lines
29 KiB
C++
756 lines
29 KiB
C++
// This file is part of Eigen, a lightweight C++ template library
|
|
// for linear algebra.
|
|
//
|
|
// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
|
|
// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
|
|
//
|
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
#ifndef EIGEN_ASSIGN_EVALUATOR_H
|
|
#define EIGEN_ASSIGN_EVALUATOR_H
|
|
|
|
namespace Eigen {
|
|
|
|
// This implementation is based on Assign.h
|
|
|
|
namespace internal {
|
|
|
|
/***************************************************************************
|
|
* Part 1 : the logic deciding a strategy for traversal and unrolling *
|
|
***************************************************************************/
|
|
|
|
// copy_using_evaluator_traits is based on assign_traits
|
|
|
|
template <typename Derived, typename OtherDerived>
|
|
struct copy_using_evaluator_traits
|
|
{
|
|
public:
|
|
enum {
|
|
DstIsAligned = Derived::Flags & AlignedBit,
|
|
DstHasDirectAccess = Derived::Flags & DirectAccessBit,
|
|
SrcIsAligned = OtherDerived::Flags & AlignedBit,
|
|
JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned,
|
|
SrcEvalBeforeAssign = (evaluator_traits<OtherDerived>::HasEvalTo == 1)
|
|
};
|
|
|
|
private:
|
|
enum {
|
|
InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
|
|
: int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
|
|
: int(Derived::RowsAtCompileTime),
|
|
InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
|
|
: int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
|
|
: int(Derived::MaxRowsAtCompileTime),
|
|
MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
|
|
PacketSize = packet_traits<typename Derived::Scalar>::size
|
|
};
|
|
|
|
enum {
|
|
StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
|
|
MightVectorize = StorageOrdersAgree
|
|
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
|
|
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
|
&& int(DstIsAligned) && int(SrcIsAligned),
|
|
MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
|
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
|
|
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
|
|
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
|
so it's only good for large enough sizes. */
|
|
MaySliceVectorize = MightVectorize && DstHasDirectAccess
|
|
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
|
|
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
|
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
|
in a fixed-size matrix */
|
|
};
|
|
|
|
public:
|
|
enum {
|
|
Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal)
|
|
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
|
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
|
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
|
: int(MayLinearize) ? int(LinearTraversal)
|
|
: int(DefaultTraversal),
|
|
Vectorized = int(Traversal) == InnerVectorizedTraversal
|
|
|| int(Traversal) == LinearVectorizedTraversal
|
|
|| int(Traversal) == SliceVectorizedTraversal
|
|
};
|
|
|
|
private:
|
|
enum {
|
|
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
|
|
MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
|
|
&& int(OtherDerived::CoeffReadCost) != Dynamic
|
|
&& int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
|
|
MayUnrollInner = int(InnerSize) != Dynamic
|
|
&& int(OtherDerived::CoeffReadCost) != Dynamic
|
|
&& int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
|
|
};
|
|
|
|
public:
|
|
enum {
|
|
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
|
|
? (
|
|
int(MayUnrollCompletely) ? int(CompleteUnrolling)
|
|
: int(MayUnrollInner) ? int(InnerUnrolling)
|
|
: int(NoUnrolling)
|
|
)
|
|
: int(Traversal) == int(LinearVectorizedTraversal)
|
|
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling)
|
|
: int(NoUnrolling) )
|
|
: int(Traversal) == int(LinearTraversal)
|
|
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
|
: int(NoUnrolling) )
|
|
: int(NoUnrolling)
|
|
};
|
|
|
|
#ifdef EIGEN_DEBUG_ASSIGN
|
|
static void debug()
|
|
{
|
|
EIGEN_DEBUG_VAR(DstIsAligned)
|
|
EIGEN_DEBUG_VAR(SrcIsAligned)
|
|
EIGEN_DEBUG_VAR(JointAlignment)
|
|
EIGEN_DEBUG_VAR(InnerSize)
|
|
EIGEN_DEBUG_VAR(InnerMaxSize)
|
|
EIGEN_DEBUG_VAR(PacketSize)
|
|
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
|
EIGEN_DEBUG_VAR(MightVectorize)
|
|
EIGEN_DEBUG_VAR(MayLinearize)
|
|
EIGEN_DEBUG_VAR(MayInnerVectorize)
|
|
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
|
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
|
EIGEN_DEBUG_VAR(Traversal)
|
|
EIGEN_DEBUG_VAR(UnrollingLimit)
|
|
EIGEN_DEBUG_VAR(MayUnrollCompletely)
|
|
EIGEN_DEBUG_VAR(MayUnrollInner)
|
|
EIGEN_DEBUG_VAR(Unrolling)
|
|
}
|
|
#endif
|
|
};
|
|
|
|
/***************************************************************************
|
|
* Part 2 : meta-unrollers
|
|
***************************************************************************/
|
|
|
|
/************************
|
|
*** Default traversal ***
|
|
************************/
|
|
|
|
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
|
|
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
|
|
{
|
|
typedef typename DstEvaluatorType::XprType DstXprType;
|
|
|
|
enum {
|
|
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
|
inner = Index % DstXprType::InnerSizeAtCompileTime
|
|
};
|
|
|
|
EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
|
|
SrcEvaluatorType &srcEvaluator)
|
|
{
|
|
dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
|
|
copy_using_evaluator_DefaultTraversal_CompleteUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
|
|
::run(dstEvaluator, srcEvaluator);
|
|
}
|
|
};
|
|
|
|
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
|
|
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
|
|
};
|
|
|
|
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
|
|
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
|
|
SrcEvaluatorType &srcEvaluator,
|
|
int outer)
|
|
{
|
|
dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator);
|
|
copy_using_evaluator_DefaultTraversal_InnerUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
|
|
::run(dstEvaluator, srcEvaluator, outer);
|
|
}
|
|
};
|
|
|
|
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
|
|
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
|
|
};
|
|
|
|
/***********************
|
|
*** Linear traversal ***
|
|
***********************/
|
|
|
|
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
|
|
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
|
|
SrcEvaluatorType &srcEvaluator)
|
|
{
|
|
dstEvaluator.copyCoeff(Index, srcEvaluator);
|
|
copy_using_evaluator_LinearTraversal_CompleteUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
|
|
::run(dstEvaluator, srcEvaluator);
|
|
}
|
|
};
|
|
|
|
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
|
|
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
|
|
};
|
|
|
|
/**************************
|
|
*** Inner vectorization ***
|
|
**************************/
|
|
|
|
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
|
|
struct copy_using_evaluator_innervec_CompleteUnrolling
|
|
{
|
|
typedef typename DstEvaluatorType::XprType DstXprType;
|
|
typedef typename SrcEvaluatorType::XprType SrcXprType;
|
|
|
|
enum {
|
|
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
|
inner = Index % DstXprType::InnerSizeAtCompileTime,
|
|
JointAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
|
|
};
|
|
|
|
EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
|
|
SrcEvaluatorType &srcEvaluator)
|
|
{
|
|
dstEvaluator.template copyPacketByOuterInner<Aligned, JointAlignment>(outer, inner, srcEvaluator);
|
|
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
|
|
copy_using_evaluator_innervec_CompleteUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop>
|
|
::run(dstEvaluator, srcEvaluator);
|
|
}
|
|
};
|
|
|
|
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
|
|
struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
|
|
};
|
|
|
|
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
|
|
struct copy_using_evaluator_innervec_InnerUnrolling
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
|
|
SrcEvaluatorType &srcEvaluator,
|
|
int outer)
|
|
{
|
|
dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, Index, srcEvaluator);
|
|
typedef typename DstEvaluatorType::XprType DstXprType;
|
|
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
|
|
copy_using_evaluator_innervec_InnerUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop>
|
|
::run(dstEvaluator, srcEvaluator, outer);
|
|
}
|
|
};
|
|
|
|
template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
|
|
struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
|
|
};
|
|
|
|
/***************************************************************************
|
|
* Part 3 : implementation of all cases
|
|
***************************************************************************/
|
|
|
|
// copy_using_evaluator_impl is based on assign_impl
|
|
|
|
template<typename DstXprType, typename SrcXprType,
|
|
int Traversal = copy_using_evaluator_traits<DstXprType, SrcXprType>::Traversal,
|
|
int Unrolling = copy_using_evaluator_traits<DstXprType, SrcXprType>::Unrolling>
|
|
struct copy_using_evaluator_impl;
|
|
|
|
/************************
|
|
*** Default traversal ***
|
|
************************/
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnrolling>
|
|
{
|
|
static void run(DstXprType& dst, const SrcXprType& src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
typedef typename DstXprType::Index Index;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
for(Index outer = 0; outer < dst.outerSize(); ++outer) {
|
|
for(Index inner = 0; inner < dst.innerSize(); ++inner) {
|
|
dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling>
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
copy_using_evaluator_DefaultTraversal_CompleteUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
|
|
::run(dstEvaluator, srcEvaluator);
|
|
}
|
|
};
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling>
|
|
{
|
|
typedef typename DstXprType::Index Index;
|
|
EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
const Index outerSize = dst.outerSize();
|
|
for(Index outer = 0; outer < outerSize; ++outer)
|
|
copy_using_evaluator_DefaultTraversal_InnerUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
|
|
::run(dstEvaluator, srcEvaluator, outer);
|
|
}
|
|
};
|
|
|
|
/***************************
|
|
*** Linear vectorization ***
|
|
***************************/
|
|
|
|
template <bool IsAligned = false>
|
|
struct unaligned_copy_using_evaluator_impl
|
|
{
|
|
// if IsAligned = true, then do nothing
|
|
template <typename SrcEvaluatorType, typename DstEvaluatorType>
|
|
static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&,
|
|
typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
|
|
};
|
|
|
|
template <>
|
|
struct unaligned_copy_using_evaluator_impl<false>
|
|
{
|
|
// MSVC must not inline this functions. If it does, it fails to optimize the
|
|
// packet access path.
|
|
#ifdef _MSC_VER
|
|
template <typename DstEvaluatorType, typename SrcEvaluatorType>
|
|
static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator,
|
|
const SrcEvaluatorType &srcEvaluator,
|
|
typename DstEvaluatorType::Index start,
|
|
typename DstEvaluatorType::Index end)
|
|
#else
|
|
template <typename DstEvaluatorType, typename SrcEvaluatorType>
|
|
static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
|
|
const SrcEvaluatorType &srcEvaluator,
|
|
typename DstEvaluatorType::Index start,
|
|
typename DstEvaluatorType::Index end)
|
|
#endif
|
|
{
|
|
for (typename DstEvaluatorType::Index index = start; index < end; ++index)
|
|
dstEvaluator.copyCoeff(index, srcEvaluator);
|
|
}
|
|
};
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, NoUnrolling>
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
typedef typename DstXprType::Index Index;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
const Index size = dst.size();
|
|
typedef packet_traits<typename DstXprType::Scalar> PacketTraits;
|
|
enum {
|
|
packetSize = PacketTraits::size,
|
|
dstIsAligned = int(copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned),
|
|
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
|
|
srcAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
|
|
};
|
|
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&dstEvaluator.coeffRef(0), size);
|
|
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
|
|
|
unaligned_copy_using_evaluator_impl<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, 0, alignedStart);
|
|
|
|
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
|
|
{
|
|
dstEvaluator.template copyPacket<dstAlignment, srcAlignment>(index, srcEvaluator);
|
|
}
|
|
|
|
unaligned_copy_using_evaluator_impl<>::run(dstEvaluator, srcEvaluator, alignedEnd, size);
|
|
}
|
|
};
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling>
|
|
{
|
|
typedef typename DstXprType::Index Index;
|
|
EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
enum { size = DstXprType::SizeAtCompileTime,
|
|
packetSize = packet_traits<typename DstXprType::Scalar>::size,
|
|
alignedSize = (size/packetSize)*packetSize };
|
|
|
|
copy_using_evaluator_innervec_CompleteUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, 0, alignedSize>
|
|
::run(dstEvaluator, srcEvaluator);
|
|
copy_using_evaluator_DefaultTraversal_CompleteUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, alignedSize, size>
|
|
::run(dstEvaluator, srcEvaluator);
|
|
}
|
|
};
|
|
|
|
/**************************
|
|
*** Inner vectorization ***
|
|
**************************/
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, NoUnrolling>
|
|
{
|
|
inline static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
typedef typename DstXprType::Index Index;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
const Index innerSize = dst.innerSize();
|
|
const Index outerSize = dst.outerSize();
|
|
const Index packetSize = packet_traits<typename DstXprType::Scalar>::size;
|
|
for(Index outer = 0; outer < outerSize; ++outer)
|
|
for(Index inner = 0; inner < innerSize; inner+=packetSize) {
|
|
dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, inner, srcEvaluator);
|
|
}
|
|
}
|
|
};
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling>
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
copy_using_evaluator_innervec_CompleteUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
|
|
::run(dstEvaluator, srcEvaluator);
|
|
}
|
|
};
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling>
|
|
{
|
|
typedef typename DstXprType::Index Index;
|
|
EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
const Index outerSize = dst.outerSize();
|
|
for(Index outer = 0; outer < outerSize; ++outer)
|
|
copy_using_evaluator_innervec_InnerUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
|
|
::run(dstEvaluator, srcEvaluator, outer);
|
|
}
|
|
};
|
|
|
|
/***********************
|
|
*** Linear traversal ***
|
|
***********************/
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnrolling>
|
|
{
|
|
inline static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
typedef typename DstXprType::Index Index;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
const Index size = dst.size();
|
|
for(Index i = 0; i < size; ++i)
|
|
dstEvaluator.copyCoeff(i, srcEvaluator);
|
|
}
|
|
};
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling>
|
|
{
|
|
EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
copy_using_evaluator_LinearTraversal_CompleteUnrolling
|
|
<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
|
|
::run(dstEvaluator, srcEvaluator);
|
|
}
|
|
};
|
|
|
|
/**************************
|
|
*** Slice vectorization ***
|
|
***************************/
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversal, NoUnrolling>
|
|
{
|
|
inline static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
typedef typename DstXprType::Index Index;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
typedef packet_traits<typename DstXprType::Scalar> PacketTraits;
|
|
enum {
|
|
packetSize = PacketTraits::size,
|
|
alignable = PacketTraits::AlignedOnScalar,
|
|
dstAlignment = alignable ? Aligned : int(copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned)
|
|
};
|
|
const Index packetAlignedMask = packetSize - 1;
|
|
const Index innerSize = dst.innerSize();
|
|
const Index outerSize = dst.outerSize();
|
|
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
|
|
Index alignedStart = ((!alignable) || copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0
|
|
: internal::first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
|
|
|
|
for(Index outer = 0; outer < outerSize; ++outer)
|
|
{
|
|
const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
|
|
// do the non-vectorizable part of the assignment
|
|
for(Index inner = 0; inner<alignedStart ; ++inner) {
|
|
dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
|
|
}
|
|
|
|
// do the vectorizable part of the assignment
|
|
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) {
|
|
dstEvaluator.template copyPacketByOuterInner<dstAlignment, Unaligned>(outer, inner, srcEvaluator);
|
|
}
|
|
|
|
// do the non-vectorizable part of the assignment
|
|
for(Index inner = alignedEnd; inner<innerSize ; ++inner) {
|
|
dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
|
|
}
|
|
|
|
alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
|
|
}
|
|
}
|
|
};
|
|
|
|
/****************************
|
|
*** All-at-once traversal ***
|
|
****************************/
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoUnrolling>
|
|
{
|
|
inline static void run(DstXprType &dst, const SrcXprType &src)
|
|
{
|
|
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
|
|
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
|
|
typedef typename DstXprType::Index Index;
|
|
|
|
DstEvaluatorType dstEvaluator(dst);
|
|
SrcEvaluatorType srcEvaluator(src);
|
|
|
|
// Evaluate rhs in temporary to prevent aliasing problems in a = a * a;
|
|
// TODO: Do not pass the xpr object to evalTo()
|
|
srcEvaluator.evalTo(dstEvaluator, dst);
|
|
}
|
|
};
|
|
|
|
/***************************************************************************
|
|
* Part 4 : Entry points
|
|
***************************************************************************/
|
|
|
|
// Based on DenseBase::LazyAssign()
|
|
|
|
template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
|
|
EIGEN_STRONG_INLINE
|
|
const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst,
|
|
const EigenBase<SrcXprType>& src)
|
|
{
|
|
return noalias_copy_using_evaluator(dst.expression(), src.derived());
|
|
}
|
|
|
|
template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing>
|
|
struct AddEvalIfAssumingAliasing;
|
|
|
|
template<typename XprType>
|
|
struct AddEvalIfAssumingAliasing<XprType, 0>
|
|
{
|
|
static const XprType& run(const XprType& xpr)
|
|
{
|
|
return xpr;
|
|
}
|
|
};
|
|
|
|
template<typename XprType>
|
|
struct AddEvalIfAssumingAliasing<XprType, 1>
|
|
{
|
|
static const EvalToTemp<XprType> run(const XprType& xpr)
|
|
{
|
|
return EvalToTemp<XprType>(xpr);
|
|
}
|
|
};
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
EIGEN_STRONG_INLINE
|
|
const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
|
|
{
|
|
return noalias_copy_using_evaluator(dst.const_cast_derived(),
|
|
AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()));
|
|
}
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
EIGEN_STRONG_INLINE
|
|
const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
|
|
{
|
|
#ifdef EIGEN_DEBUG_ASSIGN
|
|
internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
|
|
#endif
|
|
#ifdef EIGEN_NO_AUTOMATIC_RESIZING
|
|
eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
|
|
: (dst.rows() == src.rows() && dst.cols() == src.cols())))
|
|
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
|
|
#else
|
|
dst.const_cast_derived().resizeLike(src.derived());
|
|
#endif
|
|
return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived());
|
|
}
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
EIGEN_STRONG_INLINE
|
|
const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
|
|
{
|
|
return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived());
|
|
}
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
const DstXprType& copy_using_evaluator_without_resizing(const DstXprType& dst, const SrcXprType& src)
|
|
{
|
|
#ifdef EIGEN_DEBUG_ASSIGN
|
|
internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
|
|
#endif
|
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
|
copy_using_evaluator_impl<DstXprType, SrcXprType>::run(const_cast<DstXprType&>(dst), src);
|
|
return dst;
|
|
}
|
|
|
|
// Based on DenseBase::swap()
|
|
// TODO: Chech whether we need to do something special for swapping two
|
|
// Arrays or Matrices.
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
|
|
{
|
|
copy_using_evaluator(SwapWrapper<DstXprType>(const_cast<DstXprType&>(dst)), src);
|
|
}
|
|
|
|
// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
|
|
template<typename DstXprType, typename SrcXprType>
|
|
void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
|
|
{
|
|
typedef typename DstXprType::Scalar Scalar;
|
|
SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
|
|
copy_using_evaluator(tmp, src.derived());
|
|
}
|
|
|
|
// Based on ArrayBase::operator+=
|
|
template<typename DstXprType, typename SrcXprType>
|
|
void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
|
{
|
|
typedef typename DstXprType::Scalar Scalar;
|
|
SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
|
|
copy_using_evaluator(tmp, src.derived());
|
|
}
|
|
|
|
// TODO: Add add_assign_using_evaluator for EigenBase ?
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
|
|
{
|
|
typedef typename DstXprType::Scalar Scalar;
|
|
SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
|
|
copy_using_evaluator(tmp, src.derived());
|
|
}
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
|
{
|
|
typedef typename DstXprType::Scalar Scalar;
|
|
SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
|
|
copy_using_evaluator(tmp, src.derived());
|
|
}
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
|
{
|
|
typedef typename DstXprType::Scalar Scalar;
|
|
SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
|
|
copy_using_evaluator(tmp, src.derived());
|
|
}
|
|
|
|
template<typename DstXprType, typename SrcXprType>
|
|
void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
|
|
{
|
|
typedef typename DstXprType::Scalar Scalar;
|
|
SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
|
|
copy_using_evaluator(tmp, src.derived());
|
|
}
|
|
|
|
|
|
} // namespace internal
|
|
|
|
} // end namespace Eigen
|
|
|
|
#endif // EIGEN_ASSIGN_EVALUATOR_H
|