Clean up Redux.h and fix vectorization_logic test after changes to traversal order in Redux.

This commit is contained in:
Rasmus Munk Larsen 2023-05-24 20:26:52 +00:00
parent da6a71faf0
commit 8c43bf2b5b
3 changed files with 41 additions and 63 deletions

View File

@ -93,7 +93,7 @@ struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling>
* This specialization is not required for general reductions, which is
* why it is defined here.
*/
template<typename Func, typename Evaluator, int Start>
template<typename Func, typename Evaluator, Index Start>
struct redux_vec_unroller<Func, Evaluator, Start, 0>
{
template<typename PacketType>

View File

@ -99,12 +99,10 @@ public:
/*** no vectorization ***/
template<typename Func, typename Evaluator, int Start, int Length>
template<typename Func, typename Evaluator, Index Start, Index Length>
struct redux_novec_unroller
{
enum {
HalfLength = Length/2
};
static constexpr Index HalfLength = Length/2;
typedef typename Evaluator::Scalar Scalar;
@ -116,13 +114,11 @@ struct redux_novec_unroller
}
};
template<typename Func, typename Evaluator, int Start>
template<typename Func, typename Evaluator, Index Start>
struct redux_novec_unroller<Func, Evaluator, Start, 1>
{
enum {
outer = Start / Evaluator::InnerSizeAtCompileTime,
inner = Start % Evaluator::InnerSizeAtCompileTime
};
static constexpr Index outer = Start / Evaluator::InnerSizeAtCompileTime;
static constexpr Index inner = Start % Evaluator::InnerSizeAtCompileTime;
typedef typename Evaluator::Scalar Scalar;
@ -136,7 +132,7 @@ struct redux_novec_unroller<Func, Evaluator, Start, 1>
// This is actually dead code and will never be called. It is required
// to prevent false warnings regarding failed inlining though
// for 0 length run() will never be called at all.
template<typename Func, typename Evaluator, int Start>
template<typename Func, typename Evaluator, Index Start>
struct redux_novec_unroller<Func, Evaluator, Start, 0>
{
typedef typename Evaluator::Scalar Scalar;
@ -144,12 +140,10 @@ struct redux_novec_unroller<Func, Evaluator, Start, 0>
static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
};
template<typename Func, typename Evaluator, int Start, int Length>
template<typename Func, typename Evaluator, Index Start, Index Length>
struct redux_novec_linear_unroller
{
enum {
HalfLength = Length/2
};
static constexpr Index HalfLength = Length/2;
typedef typename Evaluator::Scalar Scalar;
@ -161,7 +155,7 @@ struct redux_novec_linear_unroller
}
};
template<typename Func, typename Evaluator, int Start>
template<typename Func, typename Evaluator, Index Start>
struct redux_novec_linear_unroller<Func, Evaluator, Start, 1>
{
typedef typename Evaluator::Scalar Scalar;
@ -176,7 +170,7 @@ struct redux_novec_linear_unroller<Func, Evaluator, Start, 1>
// This is actually dead code and will never be called. It is required
// to prevent false warnings regarding failed inlining though
// for 0 length run() will never be called at all.
template<typename Func, typename Evaluator, int Start>
template<typename Func, typename Evaluator, Index Start>
struct redux_novec_linear_unroller<Func, Evaluator, Start, 0>
{
typedef typename Evaluator::Scalar Scalar;
@ -186,17 +180,14 @@ struct redux_novec_linear_unroller<Func, Evaluator, Start, 0>
/*** vectorization ***/
template<typename Func, typename Evaluator, int Start, int Length>
template<typename Func, typename Evaluator, Index Start, Index Length>
struct redux_vec_unroller
{
template<typename PacketType>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
{
enum {
PacketSize = unpacket_traits<PacketType>::size,
HalfLength = Length/2
};
constexpr Index HalfLength = Length/2;
return func.packetOp(
redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
@ -204,35 +195,31 @@ struct redux_vec_unroller
}
};
template<typename Func, typename Evaluator, int Start>
template<typename Func, typename Evaluator, Index Start>
struct redux_vec_unroller<Func, Evaluator, Start, 1>
{
template<typename PacketType>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
{
enum {
PacketSize = unpacket_traits<PacketType>::size,
index = Start * PacketSize,
outer = index / int(Evaluator::InnerSizeAtCompileTime),
inner = index % int(Evaluator::InnerSizeAtCompileTime),
alignment = Evaluator::Alignment
};
constexpr Index PacketSize = unpacket_traits<PacketType>::size;
constexpr Index index = Start * PacketSize;
constexpr Index outer = index / int(Evaluator::InnerSizeAtCompileTime);
constexpr Index inner = index % int(Evaluator::InnerSizeAtCompileTime);
constexpr int alignment = Evaluator::Alignment;
return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
}
};
template<typename Func, typename Evaluator, int Start, int Length>
template<typename Func, typename Evaluator, Index Start, Index Length>
struct redux_vec_linear_unroller
{
template<typename PacketType>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
{
enum {
PacketSize = unpacket_traits<PacketType>::size,
HalfLength = Length/2
};
constexpr Index HalfLength = Length/2;
return func.packetOp(
redux_vec_linear_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
@ -240,18 +227,16 @@ struct redux_vec_linear_unroller
}
};
template<typename Func, typename Evaluator, int Start>
template<typename Func, typename Evaluator, Index Start>
struct redux_vec_linear_unroller<Func, Evaluator, Start, 1>
{
template<typename PacketType>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
{
enum {
PacketSize = unpacket_traits<PacketType>::size,
index = Start * PacketSize,
alignment = Evaluator::Alignment
};
constexpr Index PacketSize = unpacket_traits<PacketType>::size;
constexpr Index index = (Start * PacketSize);
constexpr int alignment = Evaluator::Alignment;
return eval.template packet<alignment,PacketType>(index);
}
};
@ -342,12 +327,10 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
{
const Index size = xpr.size();
const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
enum {
alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
alignment = plain_enum_max(alignment0, Evaluator::Alignment)
};
constexpr Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
constexpr int packetAlignment = unpacket_traits<PacketScalar>::alignment;
constexpr int alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned);
constexpr int alignment = plain_enum_max(alignment0, Evaluator::Alignment);
const Index alignedStart = internal::first_default_aligned(xpr);
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
@ -401,11 +384,9 @@ struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
{
eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
constexpr Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
const Index innerSize = xpr.innerSize();
const Index outerSize = xpr.outerSize();
enum {
packetSize = redux_traits<Func, Evaluator>::PacketSize
};
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
Scalar res;
if(packetedInnerSize)
@ -436,11 +417,9 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
typedef typename Evaluator::Scalar Scalar;
typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
enum {
PacketSize = redux_traits<Func, Evaluator>::PacketSize,
Size = Evaluator::SizeAtCompileTime,
VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
};
static constexpr Index PacketSize = redux_traits<Func, Evaluator>::PacketSize;
static constexpr Index Size = Evaluator::SizeAtCompileTime;
static constexpr Index VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize);
template<typename XprType>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE

View File

@ -270,9 +270,8 @@ struct vectorization_logic_half
{
typedef internal::packet_traits<Scalar> PacketTraits;
typedef typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half PacketType;
enum {
PacketSize = internal::unpacket_traits<PacketType>::size
};
static constexpr int PacketSize = internal::unpacket_traits<PacketType>::size;
static void run()
{
// Some half-packets have a byte size < EIGEN_MIN_ALIGN_BYTES (e.g. Packet2f),
@ -280,7 +279,7 @@ struct vectorization_logic_half
// EIGEN_UNALIGNED_VECTORIZE is 0 (the matrix is assumed unaligned).
// Adjust the matrix sizes to account for these alignment issues.
constexpr int PacketBytes = sizeof(Scalar)*PacketSize;
constexpr int MinVSize = EIGEN_UNALIGNED_VECTORIZE ? PacketSize
constexpr int MinVSize = int(EIGEN_UNALIGNED_VECTORIZE) ? PacketSize
: PacketBytes >= EIGEN_MIN_ALIGN_BYTES ? PacketSize
: (EIGEN_MIN_ALIGN_BYTES + sizeof(Scalar) - 1) / sizeof(Scalar);
@ -414,19 +413,19 @@ EIGEN_DECLARE_TEST(vectorization_logic)
if(internal::packet_traits<float>::Vectorizable)
{
VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
internal::packet_traits<float>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<float,5,2>(),
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling));
internal::packet_traits<float>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
}
if(internal::packet_traits<double>::Vectorizable)
{
VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(),
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
internal::packet_traits<double>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<double,7,3>(),
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling));
internal::packet_traits<double>::Vectorizable && EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
}
#endif // EIGEN_VECTORIZE