mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-30 18:25:11 +08:00
Simplified the code that dispatches vectorized reductions on GPU
This commit is contained in:
parent
66796e843d
commit
37638dafd7
@ -131,7 +131,7 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<SumReducer<T>, Device> {
|
struct reducer_traits<SumReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::AddCost,
|
Cost = NumTraits<T>::AddCost,
|
||||||
PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasAdd
|
PacketAccess = PacketType<T, Device>::HasAdd
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -183,7 +183,7 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<MeanReducer<T>, Device> {
|
struct reducer_traits<MeanReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::AddCost,
|
Cost = NumTraits<T>::AddCost,
|
||||||
PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasAdd
|
PacketAccess = PacketType<T, Device>::HasAdd
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -225,7 +225,7 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<MaxReducer<T>, Device> {
|
struct reducer_traits<MaxReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::AddCost,
|
Cost = NumTraits<T>::AddCost,
|
||||||
PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMax
|
PacketAccess = PacketType<T, Device>::HasMax
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -267,7 +267,7 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<MinReducer<T>, Device> {
|
struct reducer_traits<MinReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::AddCost,
|
Cost = NumTraits<T>::AddCost,
|
||||||
PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMin
|
PacketAccess = PacketType<T, Device>::HasMin
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -310,7 +310,7 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<ProdReducer<T>, Device> {
|
struct reducer_traits<ProdReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::MulCost,
|
Cost = NumTraits<T>::MulCost,
|
||||||
PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMul
|
PacketAccess = PacketType<T, Device>::HasMul
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -47,27 +47,39 @@ template <> struct max_n_1<0> {
|
|||||||
|
|
||||||
// Default packet types
|
// Default packet types
|
||||||
template <typename Scalar, typename Device>
|
template <typename Scalar, typename Device>
|
||||||
struct PacketType {
|
struct PacketType : internal::packet_traits<Scalar> {
|
||||||
typedef typename internal::packet_traits<Scalar>::type type;
|
typedef typename internal::packet_traits<Scalar>::type type;
|
||||||
enum { size = internal::unpacket_traits<type>::size };
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// For CUDA packet types when using a GpuDevice
|
// For CUDA packet types when using a GpuDevice
|
||||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||||
template <>
|
template <>
|
||||||
struct PacketType<half, GpuDevice> {
|
struct PacketType<half, GpuDevice> {
|
||||||
typedef half2 type;
|
typedef half2 type;
|
||||||
static const int size = 2;
|
static const int size = 2;
|
||||||
};
|
enum {
|
||||||
template <>
|
HasAdd = 1,
|
||||||
struct PacketType<float, GpuDevice> {
|
HasSub = 1,
|
||||||
typedef float4 type;
|
HasMul = 1,
|
||||||
static const int size = 4;
|
HasNegate = 1,
|
||||||
};
|
HasAbs = 1,
|
||||||
template <>
|
HasArg = 0,
|
||||||
struct PacketType<double, GpuDevice> {
|
HasAbs2 = 0,
|
||||||
typedef double2 type;
|
HasMin = 1,
|
||||||
static const int size = 2;
|
HasMax = 1,
|
||||||
|
HasConj = 0,
|
||||||
|
HasSetLinear = 0,
|
||||||
|
HasBlend = 0,
|
||||||
|
|
||||||
|
HasDiv = 1,
|
||||||
|
HasSqrt = 1,
|
||||||
|
HasRsqrt = 1,
|
||||||
|
HasExp = 1,
|
||||||
|
HasLog = 1,
|
||||||
|
HasLog1p = 0,
|
||||||
|
HasLog10 = 0,
|
||||||
|
HasPow = 1,
|
||||||
|
};
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -328,7 +328,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
|||||||
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
||||||
// so reduce the scope of the optimized version of the code to the simple case
|
// so reduce the scope of the optimized version of the code to the simple case
|
||||||
// of floats and half floats.
|
// of floats and half floats.
|
||||||
#ifdef EIGEN_HAS_CUDA_FP16
|
#ifdef EIGEN_HAS_CUDA_FP16
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user