Simplified the code that dispatches vectorized reductions on GPU

This commit is contained in:
Benoit Steiner 2016-06-09 10:29:52 -07:00
parent 66796e843d
commit 37638dafd7
3 changed files with 31 additions and 19 deletions

View File

@ -131,7 +131,7 @@ template <typename T, typename Device>
struct reducer_traits<SumReducer<T>, Device> { struct reducer_traits<SumReducer<T>, Device> {
enum { enum {
Cost = NumTraits<T>::AddCost, Cost = NumTraits<T>::AddCost,
PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasAdd PacketAccess = PacketType<T, Device>::HasAdd
}; };
}; };
@ -183,7 +183,7 @@ template <typename T, typename Device>
struct reducer_traits<MeanReducer<T>, Device> { struct reducer_traits<MeanReducer<T>, Device> {
enum { enum {
Cost = NumTraits<T>::AddCost, Cost = NumTraits<T>::AddCost,
PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasAdd PacketAccess = PacketType<T, Device>::HasAdd
}; };
}; };
@ -225,7 +225,7 @@ template <typename T, typename Device>
struct reducer_traits<MaxReducer<T>, Device> { struct reducer_traits<MaxReducer<T>, Device> {
enum { enum {
Cost = NumTraits<T>::AddCost, Cost = NumTraits<T>::AddCost,
PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMax PacketAccess = PacketType<T, Device>::HasMax
}; };
}; };
@ -267,7 +267,7 @@ template <typename T, typename Device>
struct reducer_traits<MinReducer<T>, Device> { struct reducer_traits<MinReducer<T>, Device> {
enum { enum {
Cost = NumTraits<T>::AddCost, Cost = NumTraits<T>::AddCost,
PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMin PacketAccess = PacketType<T, Device>::HasMin
}; };
}; };
@ -310,7 +310,7 @@ template <typename T, typename Device>
struct reducer_traits<ProdReducer<T>, Device> { struct reducer_traits<ProdReducer<T>, Device> {
enum { enum {
Cost = NumTraits<T>::MulCost, Cost = NumTraits<T>::MulCost,
PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMul PacketAccess = PacketType<T, Device>::HasMul
}; };
}; };

View File

@ -47,27 +47,39 @@ template <> struct max_n_1<0> {
// Default packet types // Default packet types
template <typename Scalar, typename Device> template <typename Scalar, typename Device>
struct PacketType { struct PacketType : internal::packet_traits<Scalar> {
typedef typename internal::packet_traits<Scalar>::type type; typedef typename internal::packet_traits<Scalar>::type type;
enum { size = internal::unpacket_traits<type>::size };
}; };
// For CUDA packet types when using a GpuDevice // For CUDA packet types when using a GpuDevice
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) #if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
template <> template <>
struct PacketType<half, GpuDevice> { struct PacketType<half, GpuDevice> {
typedef half2 type; typedef half2 type;
static const int size = 2; static const int size = 2;
}; enum {
template <> HasAdd = 1,
struct PacketType<float, GpuDevice> { HasSub = 1,
typedef float4 type; HasMul = 1,
static const int size = 4; HasNegate = 1,
}; HasAbs = 1,
template <> HasArg = 0,
struct PacketType<double, GpuDevice> { HasAbs2 = 0,
typedef double2 type; HasMin = 1,
static const int size = 2; HasMax = 1,
HasConj = 0,
HasSetLinear = 0,
HasBlend = 0,
HasDiv = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasExp = 1,
HasLog = 1,
HasLog1p = 0,
HasLog10 = 0,
HasPow = 1,
};
}; };
#endif #endif

View File

@ -328,7 +328,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
// Unfortunately nvidia doesn't support well exotic types such as complex, // Unfortunately nvidia doesn't support well exotic types such as complex,
// so reduce the scope of the optimized version of the code to the simple case // so reduce the scope of the optimized version of the code to the simple case
// of floats and half floats. // of floats and half floats.
#ifdef EIGEN_HAS_CUDA_FP16 #ifdef EIGEN_HAS_CUDA_FP16
static const bool HasOptimizedImplementation = !Op::IsStateful && static const bool HasOptimizedImplementation = !Op::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value || (internal::is_same<typename Self::CoeffReturnType, float>::value ||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));