Clean up Redux.h and fix vectorization_logic test after changes to traversal order in Redux.

This commit is contained in:
Rasmus Munk Larsen 2023-05-24 20:26:52 +00:00
parent da6a71faf0
commit 8c43bf2b5b
3 changed files with 41 additions and 63 deletions

View File

@ -93,7 +93,7 @@ struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling>
* This specialization is not required for general reductions, which is * This specialization is not required for general reductions, which is
* why it is defined here. * why it is defined here.
*/ */
template<typename Func, typename Evaluator, int Start> template<typename Func, typename Evaluator, Index Start>
struct redux_vec_unroller<Func, Evaluator, Start, 0> struct redux_vec_unroller<Func, Evaluator, Start, 0>
{ {
template<typename PacketType> template<typename PacketType>

View File

@ -99,12 +99,10 @@ public:
/*** no vectorization ***/ /*** no vectorization ***/
template<typename Func, typename Evaluator, int Start, int Length> template<typename Func, typename Evaluator, Index Start, Index Length>
struct redux_novec_unroller struct redux_novec_unroller
{ {
enum { static constexpr Index HalfLength = Length/2;
HalfLength = Length/2
};
typedef typename Evaluator::Scalar Scalar; typedef typename Evaluator::Scalar Scalar;
@ -116,13 +114,11 @@ struct redux_novec_unroller
} }
}; };
template<typename Func, typename Evaluator, int Start> template<typename Func, typename Evaluator, Index Start>
struct redux_novec_unroller<Func, Evaluator, Start, 1> struct redux_novec_unroller<Func, Evaluator, Start, 1>
{ {
enum { static constexpr Index outer = Start / Evaluator::InnerSizeAtCompileTime;
outer = Start / Evaluator::InnerSizeAtCompileTime, static constexpr Index inner = Start % Evaluator::InnerSizeAtCompileTime;
inner = Start % Evaluator::InnerSizeAtCompileTime
};
typedef typename Evaluator::Scalar Scalar; typedef typename Evaluator::Scalar Scalar;
@ -136,7 +132,7 @@ struct redux_novec_unroller<Func, Evaluator, Start, 1>
// This is actually dead code and will never be called. It is required // This is actually dead code and will never be called. It is required
// to prevent false warnings regarding failed inlining though // to prevent false warnings regarding failed inlining though
// for 0 length run() will never be called at all. // for 0 length run() will never be called at all.
template<typename Func, typename Evaluator, int Start> template<typename Func, typename Evaluator, Index Start>
struct redux_novec_unroller<Func, Evaluator, Start, 0> struct redux_novec_unroller<Func, Evaluator, Start, 0>
{ {
typedef typename Evaluator::Scalar Scalar; typedef typename Evaluator::Scalar Scalar;
@ -144,12 +140,10 @@ struct redux_novec_unroller<Func, Evaluator, Start, 0>
static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); } static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
}; };
template<typename Func, typename Evaluator, int Start, int Length> template<typename Func, typename Evaluator, Index Start, Index Length>
struct redux_novec_linear_unroller struct redux_novec_linear_unroller
{ {
enum { static constexpr Index HalfLength = Length/2;
HalfLength = Length/2
};
typedef typename Evaluator::Scalar Scalar; typedef typename Evaluator::Scalar Scalar;
@ -161,7 +155,7 @@ struct redux_novec_linear_unroller
} }
}; };
template<typename Func, typename Evaluator, int Start> template<typename Func, typename Evaluator, Index Start>
struct redux_novec_linear_unroller<Func, Evaluator, Start, 1> struct redux_novec_linear_unroller<Func, Evaluator, Start, 1>
{ {
typedef typename Evaluator::Scalar Scalar; typedef typename Evaluator::Scalar Scalar;
@ -176,7 +170,7 @@ struct redux_novec_linear_unroller<Func, Evaluator, Start, 1>
// This is actually dead code and will never be called. It is required // This is actually dead code and will never be called. It is required
// to prevent false warnings regarding failed inlining though // to prevent false warnings regarding failed inlining though
// for 0 length run() will never be called at all. // for 0 length run() will never be called at all.
template<typename Func, typename Evaluator, int Start> template<typename Func, typename Evaluator, Index Start>
struct redux_novec_linear_unroller<Func, Evaluator, Start, 0> struct redux_novec_linear_unroller<Func, Evaluator, Start, 0>
{ {
typedef typename Evaluator::Scalar Scalar; typedef typename Evaluator::Scalar Scalar;
@ -186,17 +180,14 @@ struct redux_novec_linear_unroller<Func, Evaluator, Start, 0>
/*** vectorization ***/ /*** vectorization ***/
template<typename Func, typename Evaluator, int Start, int Length> template<typename Func, typename Evaluator, Index Start, Index Length>
struct redux_vec_unroller struct redux_vec_unroller
{ {
template<typename PacketType> template<typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func) static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
{ {
enum { constexpr Index HalfLength = Length/2;
PacketSize = unpacket_traits<PacketType>::size,
HalfLength = Length/2
};
return func.packetOp( return func.packetOp(
redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func), redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
@ -204,35 +195,31 @@ struct redux_vec_unroller
} }
}; };
template<typename Func, typename Evaluator, int Start> template<typename Func, typename Evaluator, Index Start>
struct redux_vec_unroller<Func, Evaluator, Start, 1> struct redux_vec_unroller<Func, Evaluator, Start, 1>
{ {
template<typename PacketType> template<typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&) static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
{ {
enum { constexpr Index PacketSize = unpacket_traits<PacketType>::size;
PacketSize = unpacket_traits<PacketType>::size, constexpr Index index = Start * PacketSize;
index = Start * PacketSize, constexpr Index outer = index / int(Evaluator::InnerSizeAtCompileTime);
outer = index / int(Evaluator::InnerSizeAtCompileTime), constexpr Index inner = index % int(Evaluator::InnerSizeAtCompileTime);
inner = index % int(Evaluator::InnerSizeAtCompileTime), constexpr int alignment = Evaluator::Alignment;
alignment = Evaluator::Alignment
};
return eval.template packetByOuterInner<alignment,PacketType>(outer, inner); return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
} }
}; };
template<typename Func, typename Evaluator, int Start, int Length> template<typename Func, typename Evaluator, Index Start, Index Length>
struct redux_vec_linear_unroller struct redux_vec_linear_unroller
{ {
template<typename PacketType> template<typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func) static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
{ {
enum { constexpr Index HalfLength = Length/2;
PacketSize = unpacket_traits<PacketType>::size,
HalfLength = Length/2
};
return func.packetOp( return func.packetOp(
redux_vec_linear_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func), redux_vec_linear_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
@ -240,18 +227,16 @@ struct redux_vec_linear_unroller
} }
}; };
template<typename Func, typename Evaluator, int Start> template<typename Func, typename Evaluator, Index Start>
struct redux_vec_linear_unroller<Func, Evaluator, Start, 1> struct redux_vec_linear_unroller<Func, Evaluator, Start, 1>
{ {
template<typename PacketType> template<typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&) static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
{ {
enum { constexpr Index PacketSize = unpacket_traits<PacketType>::size;
PacketSize = unpacket_traits<PacketType>::size, constexpr Index index = (Start * PacketSize);
index = Start * PacketSize, constexpr int alignment = Evaluator::Alignment;
alignment = Evaluator::Alignment
};
return eval.template packet<alignment,PacketType>(index); return eval.template packet<alignment,PacketType>(index);
} }
}; };
@ -342,12 +327,10 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
{ {
const Index size = xpr.size(); const Index size = xpr.size();
const Index packetSize = redux_traits<Func, Evaluator>::PacketSize; constexpr Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
const int packetAlignment = unpacket_traits<PacketScalar>::alignment; constexpr int packetAlignment = unpacket_traits<PacketScalar>::alignment;
enum { constexpr int alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned);
alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), constexpr int alignment = plain_enum_max(alignment0, Evaluator::Alignment);
alignment = plain_enum_max(alignment0, Evaluator::Alignment)
};
const Index alignedStart = internal::first_default_aligned(xpr); const Index alignedStart = internal::first_default_aligned(xpr);
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
@ -401,11 +384,9 @@ struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr) EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
{ {
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix"); eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
constexpr Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
const Index innerSize = xpr.innerSize(); const Index innerSize = xpr.innerSize();
const Index outerSize = xpr.outerSize(); const Index outerSize = xpr.outerSize();
enum {
packetSize = redux_traits<Func, Evaluator>::PacketSize
};
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
Scalar res; Scalar res;
if(packetedInnerSize) if(packetedInnerSize)
@ -436,11 +417,9 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
typedef typename Evaluator::Scalar Scalar; typedef typename Evaluator::Scalar Scalar;
typedef typename redux_traits<Func, Evaluator>::PacketType PacketType; typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
enum { static constexpr Index PacketSize = redux_traits<Func, Evaluator>::PacketSize;
PacketSize = redux_traits<Func, Evaluator>::PacketSize, static constexpr Index Size = Evaluator::SizeAtCompileTime;
Size = Evaluator::SizeAtCompileTime, static constexpr Index VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize);
VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
};
template<typename XprType> template<typename XprType>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE

View File

@ -270,9 +270,8 @@ struct vectorization_logic_half
{ {
typedef internal::packet_traits<Scalar> PacketTraits; typedef internal::packet_traits<Scalar> PacketTraits;
typedef typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half PacketType; typedef typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half PacketType;
enum { static constexpr int PacketSize = internal::unpacket_traits<PacketType>::size;
PacketSize = internal::unpacket_traits<PacketType>::size
};
static void run() static void run()
{ {
// Some half-packets have a byte size < EIGEN_MIN_ALIGN_BYTES (e.g. Packet2f), // Some half-packets have a byte size < EIGEN_MIN_ALIGN_BYTES (e.g. Packet2f),
@ -280,7 +279,7 @@ struct vectorization_logic_half
// EIGEN_UNALIGNED_VECTORIZE is 0 (the matrix is assumed unaligned). // EIGEN_UNALIGNED_VECTORIZE is 0 (the matrix is assumed unaligned).
// Adjust the matrix sizes to account for these alignment issues. // Adjust the matrix sizes to account for these alignment issues.
constexpr int PacketBytes = sizeof(Scalar)*PacketSize; constexpr int PacketBytes = sizeof(Scalar)*PacketSize;
constexpr int MinVSize = EIGEN_UNALIGNED_VECTORIZE ? PacketSize constexpr int MinVSize = int(EIGEN_UNALIGNED_VECTORIZE) ? PacketSize
: PacketBytes >= EIGEN_MIN_ALIGN_BYTES ? PacketSize : PacketBytes >= EIGEN_MIN_ALIGN_BYTES ? PacketSize
: (EIGEN_MIN_ALIGN_BYTES + sizeof(Scalar) - 1) / sizeof(Scalar); : (EIGEN_MIN_ALIGN_BYTES + sizeof(Scalar) - 1) / sizeof(Scalar);
@ -414,19 +413,19 @@ EIGEN_DECLARE_TEST(vectorization_logic)
if(internal::packet_traits<float>::Vectorizable) if(internal::packet_traits<float>::Vectorizable)
{ {
VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(), VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); internal::packet_traits<float>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<float,5,2>(), VERIFY(test_redux(Matrix<float,5,2>(),
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling)); internal::packet_traits<float>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
} }
if(internal::packet_traits<double>::Vectorizable) if(internal::packet_traits<double>::Vectorizable)
{ {
VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(), VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(),
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); internal::packet_traits<double>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<double,7,3>(), VERIFY(test_redux(Matrix<double,7,3>(),
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling)); internal::packet_traits<double>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
} }
#endif // EIGEN_VECTORIZE #endif // EIGEN_VECTORIZE