mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-31 17:22:07 +08:00
Clean up Redux.h and fix vectorization_logic test after changes to traversal order in Redux.
This commit is contained in:
parent
da6a71faf0
commit
8c43bf2b5b
@ -93,7 +93,7 @@ struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling>
|
|||||||
* This specialization is not required for general reductions, which is
|
* This specialization is not required for general reductions, which is
|
||||||
* why it is defined here.
|
* why it is defined here.
|
||||||
*/
|
*/
|
||||||
template<typename Func, typename Evaluator, int Start>
|
template<typename Func, typename Evaluator, Index Start>
|
||||||
struct redux_vec_unroller<Func, Evaluator, Start, 0>
|
struct redux_vec_unroller<Func, Evaluator, Start, 0>
|
||||||
{
|
{
|
||||||
template<typename PacketType>
|
template<typename PacketType>
|
||||||
|
@ -99,12 +99,10 @@ public:
|
|||||||
|
|
||||||
/*** no vectorization ***/
|
/*** no vectorization ***/
|
||||||
|
|
||||||
template<typename Func, typename Evaluator, int Start, int Length>
|
template<typename Func, typename Evaluator, Index Start, Index Length>
|
||||||
struct redux_novec_unroller
|
struct redux_novec_unroller
|
||||||
{
|
{
|
||||||
enum {
|
static constexpr Index HalfLength = Length/2;
|
||||||
HalfLength = Length/2
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef typename Evaluator::Scalar Scalar;
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
|
|
||||||
@ -116,13 +114,11 @@ struct redux_novec_unroller
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Func, typename Evaluator, int Start>
|
template<typename Func, typename Evaluator, Index Start>
|
||||||
struct redux_novec_unroller<Func, Evaluator, Start, 1>
|
struct redux_novec_unroller<Func, Evaluator, Start, 1>
|
||||||
{
|
{
|
||||||
enum {
|
static constexpr Index outer = Start / Evaluator::InnerSizeAtCompileTime;
|
||||||
outer = Start / Evaluator::InnerSizeAtCompileTime,
|
static constexpr Index inner = Start % Evaluator::InnerSizeAtCompileTime;
|
||||||
inner = Start % Evaluator::InnerSizeAtCompileTime
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef typename Evaluator::Scalar Scalar;
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
|
|
||||||
@ -136,7 +132,7 @@ struct redux_novec_unroller<Func, Evaluator, Start, 1>
|
|||||||
// This is actually dead code and will never be called. It is required
|
// This is actually dead code and will never be called. It is required
|
||||||
// to prevent false warnings regarding failed inlining though
|
// to prevent false warnings regarding failed inlining though
|
||||||
// for 0 length run() will never be called at all.
|
// for 0 length run() will never be called at all.
|
||||||
template<typename Func, typename Evaluator, int Start>
|
template<typename Func, typename Evaluator, Index Start>
|
||||||
struct redux_novec_unroller<Func, Evaluator, Start, 0>
|
struct redux_novec_unroller<Func, Evaluator, Start, 0>
|
||||||
{
|
{
|
||||||
typedef typename Evaluator::Scalar Scalar;
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
@ -144,12 +140,10 @@ struct redux_novec_unroller<Func, Evaluator, Start, 0>
|
|||||||
static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
|
static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Func, typename Evaluator, int Start, int Length>
|
template<typename Func, typename Evaluator, Index Start, Index Length>
|
||||||
struct redux_novec_linear_unroller
|
struct redux_novec_linear_unroller
|
||||||
{
|
{
|
||||||
enum {
|
static constexpr Index HalfLength = Length/2;
|
||||||
HalfLength = Length/2
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef typename Evaluator::Scalar Scalar;
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
|
|
||||||
@ -161,7 +155,7 @@ struct redux_novec_linear_unroller
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Func, typename Evaluator, int Start>
|
template<typename Func, typename Evaluator, Index Start>
|
||||||
struct redux_novec_linear_unroller<Func, Evaluator, Start, 1>
|
struct redux_novec_linear_unroller<Func, Evaluator, Start, 1>
|
||||||
{
|
{
|
||||||
typedef typename Evaluator::Scalar Scalar;
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
@ -176,7 +170,7 @@ struct redux_novec_linear_unroller<Func, Evaluator, Start, 1>
|
|||||||
// This is actually dead code and will never be called. It is required
|
// This is actually dead code and will never be called. It is required
|
||||||
// to prevent false warnings regarding failed inlining though
|
// to prevent false warnings regarding failed inlining though
|
||||||
// for 0 length run() will never be called at all.
|
// for 0 length run() will never be called at all.
|
||||||
template<typename Func, typename Evaluator, int Start>
|
template<typename Func, typename Evaluator, Index Start>
|
||||||
struct redux_novec_linear_unroller<Func, Evaluator, Start, 0>
|
struct redux_novec_linear_unroller<Func, Evaluator, Start, 0>
|
||||||
{
|
{
|
||||||
typedef typename Evaluator::Scalar Scalar;
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
@ -186,17 +180,14 @@ struct redux_novec_linear_unroller<Func, Evaluator, Start, 0>
|
|||||||
|
|
||||||
/*** vectorization ***/
|
/*** vectorization ***/
|
||||||
|
|
||||||
template<typename Func, typename Evaluator, int Start, int Length>
|
template<typename Func, typename Evaluator, Index Start, Index Length>
|
||||||
struct redux_vec_unroller
|
struct redux_vec_unroller
|
||||||
{
|
{
|
||||||
template<typename PacketType>
|
template<typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
|
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
|
||||||
{
|
{
|
||||||
enum {
|
constexpr Index HalfLength = Length/2;
|
||||||
PacketSize = unpacket_traits<PacketType>::size,
|
|
||||||
HalfLength = Length/2
|
|
||||||
};
|
|
||||||
|
|
||||||
return func.packetOp(
|
return func.packetOp(
|
||||||
redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
|
redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
|
||||||
@ -204,35 +195,31 @@ struct redux_vec_unroller
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Func, typename Evaluator, int Start>
|
template<typename Func, typename Evaluator, Index Start>
|
||||||
struct redux_vec_unroller<Func, Evaluator, Start, 1>
|
struct redux_vec_unroller<Func, Evaluator, Start, 1>
|
||||||
{
|
{
|
||||||
template<typename PacketType>
|
template<typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
|
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
|
||||||
{
|
{
|
||||||
enum {
|
constexpr Index PacketSize = unpacket_traits<PacketType>::size;
|
||||||
PacketSize = unpacket_traits<PacketType>::size,
|
constexpr Index index = Start * PacketSize;
|
||||||
index = Start * PacketSize,
|
constexpr Index outer = index / int(Evaluator::InnerSizeAtCompileTime);
|
||||||
outer = index / int(Evaluator::InnerSizeAtCompileTime),
|
constexpr Index inner = index % int(Evaluator::InnerSizeAtCompileTime);
|
||||||
inner = index % int(Evaluator::InnerSizeAtCompileTime),
|
constexpr int alignment = Evaluator::Alignment;
|
||||||
alignment = Evaluator::Alignment
|
|
||||||
};
|
|
||||||
return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
|
return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Func, typename Evaluator, int Start, int Length>
|
template<typename Func, typename Evaluator, Index Start, Index Length>
|
||||||
struct redux_vec_linear_unroller
|
struct redux_vec_linear_unroller
|
||||||
{
|
{
|
||||||
template<typename PacketType>
|
template<typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
|
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
|
||||||
{
|
{
|
||||||
enum {
|
constexpr Index HalfLength = Length/2;
|
||||||
PacketSize = unpacket_traits<PacketType>::size,
|
|
||||||
HalfLength = Length/2
|
|
||||||
};
|
|
||||||
|
|
||||||
return func.packetOp(
|
return func.packetOp(
|
||||||
redux_vec_linear_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
|
redux_vec_linear_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
|
||||||
@ -240,18 +227,16 @@ struct redux_vec_linear_unroller
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Func, typename Evaluator, int Start>
|
template<typename Func, typename Evaluator, Index Start>
|
||||||
struct redux_vec_linear_unroller<Func, Evaluator, Start, 1>
|
struct redux_vec_linear_unroller<Func, Evaluator, Start, 1>
|
||||||
{
|
{
|
||||||
template<typename PacketType>
|
template<typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
|
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
|
||||||
{
|
{
|
||||||
enum {
|
constexpr Index PacketSize = unpacket_traits<PacketType>::size;
|
||||||
PacketSize = unpacket_traits<PacketType>::size,
|
constexpr Index index = (Start * PacketSize);
|
||||||
index = Start * PacketSize,
|
constexpr int alignment = Evaluator::Alignment;
|
||||||
alignment = Evaluator::Alignment
|
|
||||||
};
|
|
||||||
return eval.template packet<alignment,PacketType>(index);
|
return eval.template packet<alignment,PacketType>(index);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -342,12 +327,10 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
|
|||||||
{
|
{
|
||||||
const Index size = xpr.size();
|
const Index size = xpr.size();
|
||||||
|
|
||||||
const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
|
constexpr Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
|
||||||
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
|
constexpr int packetAlignment = unpacket_traits<PacketScalar>::alignment;
|
||||||
enum {
|
constexpr int alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned);
|
||||||
alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
|
constexpr int alignment = plain_enum_max(alignment0, Evaluator::Alignment);
|
||||||
alignment = plain_enum_max(alignment0, Evaluator::Alignment)
|
|
||||||
};
|
|
||||||
const Index alignedStart = internal::first_default_aligned(xpr);
|
const Index alignedStart = internal::first_default_aligned(xpr);
|
||||||
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
||||||
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
|
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
|
||||||
@ -401,11 +384,9 @@ struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
|
|||||||
EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
|
||||||
{
|
{
|
||||||
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
|
||||||
|
constexpr Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
|
||||||
const Index innerSize = xpr.innerSize();
|
const Index innerSize = xpr.innerSize();
|
||||||
const Index outerSize = xpr.outerSize();
|
const Index outerSize = xpr.outerSize();
|
||||||
enum {
|
|
||||||
packetSize = redux_traits<Func, Evaluator>::PacketSize
|
|
||||||
};
|
|
||||||
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
|
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
|
||||||
Scalar res;
|
Scalar res;
|
||||||
if(packetedInnerSize)
|
if(packetedInnerSize)
|
||||||
@ -436,11 +417,9 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
|
|||||||
typedef typename Evaluator::Scalar Scalar;
|
typedef typename Evaluator::Scalar Scalar;
|
||||||
|
|
||||||
typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
|
typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
|
||||||
enum {
|
static constexpr Index PacketSize = redux_traits<Func, Evaluator>::PacketSize;
|
||||||
PacketSize = redux_traits<Func, Evaluator>::PacketSize,
|
static constexpr Index Size = Evaluator::SizeAtCompileTime;
|
||||||
Size = Evaluator::SizeAtCompileTime,
|
static constexpr Index VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize);
|
||||||
VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename XprType>
|
template<typename XprType>
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
|
||||||
|
@ -270,9 +270,8 @@ struct vectorization_logic_half
|
|||||||
{
|
{
|
||||||
typedef internal::packet_traits<Scalar> PacketTraits;
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
||||||
typedef typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half PacketType;
|
typedef typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half PacketType;
|
||||||
enum {
|
static constexpr int PacketSize = internal::unpacket_traits<PacketType>::size;
|
||||||
PacketSize = internal::unpacket_traits<PacketType>::size
|
|
||||||
};
|
|
||||||
static void run()
|
static void run()
|
||||||
{
|
{
|
||||||
// Some half-packets have a byte size < EIGEN_MIN_ALIGN_BYTES (e.g. Packet2f),
|
// Some half-packets have a byte size < EIGEN_MIN_ALIGN_BYTES (e.g. Packet2f),
|
||||||
@ -280,7 +279,7 @@ struct vectorization_logic_half
|
|||||||
// EIGEN_UNALIGNED_VECTORIZE is 0 (the matrix is assumed unaligned).
|
// EIGEN_UNALIGNED_VECTORIZE is 0 (the matrix is assumed unaligned).
|
||||||
// Adjust the matrix sizes to account for these alignment issues.
|
// Adjust the matrix sizes to account for these alignment issues.
|
||||||
constexpr int PacketBytes = sizeof(Scalar)*PacketSize;
|
constexpr int PacketBytes = sizeof(Scalar)*PacketSize;
|
||||||
constexpr int MinVSize = EIGEN_UNALIGNED_VECTORIZE ? PacketSize
|
constexpr int MinVSize = int(EIGEN_UNALIGNED_VECTORIZE) ? PacketSize
|
||||||
: PacketBytes >= EIGEN_MIN_ALIGN_BYTES ? PacketSize
|
: PacketBytes >= EIGEN_MIN_ALIGN_BYTES ? PacketSize
|
||||||
: (EIGEN_MIN_ALIGN_BYTES + sizeof(Scalar) - 1) / sizeof(Scalar);
|
: (EIGEN_MIN_ALIGN_BYTES + sizeof(Scalar) - 1) / sizeof(Scalar);
|
||||||
|
|
||||||
@ -414,19 +413,19 @@ EIGEN_DECLARE_TEST(vectorization_logic)
|
|||||||
if(internal::packet_traits<float>::Vectorizable)
|
if(internal::packet_traits<float>::Vectorizable)
|
||||||
{
|
{
|
||||||
VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
|
VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
|
||||||
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
internal::packet_traits<float>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_redux(Matrix<float,5,2>(),
|
VERIFY(test_redux(Matrix<float,5,2>(),
|
||||||
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling));
|
internal::packet_traits<float>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
||||||
}
|
}
|
||||||
|
|
||||||
if(internal::packet_traits<double>::Vectorizable)
|
if(internal::packet_traits<double>::Vectorizable)
|
||||||
{
|
{
|
||||||
VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(),
|
VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(),
|
||||||
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
internal::packet_traits<double>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_redux(Matrix<double,7,3>(),
|
VERIFY(test_redux(Matrix<double,7,3>(),
|
||||||
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling));
|
internal::packet_traits<double>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
||||||
}
|
}
|
||||||
#endif // EIGEN_VECTORIZE
|
#endif // EIGEN_VECTORIZE
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user