Simplified the code that dispatches vectorized reductions on GPU

2025-06-30 18:25:11 +08:00 · 2016-06-09 10:29:52 -07:00 · 2016-06-09 10:29:52 -07:00 · 37638dafd7
commit 37638dafd7
parent 66796e843d
3 changed files with 31 additions and 19 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@ -131,7 +131,7 @@ template <typename T, typename Device>
 struct reducer_traits<SumReducer<T>, Device> {
  enum {
    Cost = NumTraits<T>::AddCost,
-    PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasAdd
+    PacketAccess = PacketType<T, Device>::HasAdd
  };
 };
@ -183,7 +183,7 @@ template <typename T, typename Device>
 struct reducer_traits<MeanReducer<T>, Device> {
  enum {
    Cost = NumTraits<T>::AddCost,
-    PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasAdd
+    PacketAccess = PacketType<T, Device>::HasAdd
  };
 };
@ -225,7 +225,7 @@ template <typename T, typename Device>
 struct reducer_traits<MaxReducer<T>, Device> {
  enum {
    Cost = NumTraits<T>::AddCost,
-    PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMax
+    PacketAccess = PacketType<T, Device>::HasMax
  };
 };
@ -267,7 +267,7 @@ template <typename T, typename Device>
 struct reducer_traits<MinReducer<T>, Device> {
  enum {
    Cost = NumTraits<T>::AddCost,
-    PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMin
+    PacketAccess = PacketType<T, Device>::HasMin
  };
 };
@ -310,7 +310,7 @@ template <typename T, typename Device>
 struct reducer_traits<ProdReducer<T>, Device> {
  enum {
    Cost = NumTraits<T>::MulCost,
-    PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMul
+    PacketAccess = PacketType<T, Device>::HasMul
  };
 };
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@ -47,27 +47,39 @@ template <> struct max_n_1<0> {
 // Default packet types
 template <typename Scalar, typename Device>
-struct PacketType {
+struct PacketType : internal::packet_traits<Scalar> {
  typedef typename internal::packet_traits<Scalar>::type type;
  enum { size = internal::unpacket_traits<type>::size };
 };
 // For CUDA packet types when using a GpuDevice
 #if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
 template <>
-  struct PacketType<half, GpuDevice> {
+struct PacketType<half, GpuDevice> {
  typedef half2 type;
  static const int size = 2;
- };
+  enum {
-template <>
+    HasAdd    = 1,
-struct PacketType<float, GpuDevice> {
+    HasSub    = 1,
-  typedef float4 type;
+    HasMul    = 1,
-  static const int size = 4;
+    HasNegate = 1,
-};
+    HasAbs    = 1,
-template <>
+    HasArg    = 0,
-struct PacketType<double, GpuDevice> {
+    HasAbs2   = 0,
-  typedef double2 type;
+    HasMin    = 1,
-  static const int size = 2;
+    HasMax    = 1,
    HasConj   = 0,
    HasSetLinear = 0,
    HasBlend  = 0,
    HasDiv    = 1,
    HasSqrt   = 1,
    HasRsqrt  = 1,
    HasExp    = 1,
    HasLog    = 1,
    HasLog1p  = 0,
    HasLog10  = 0,
    HasPow    = 1,
  };
 };
 #endif
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@ -328,7 +328,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
  // Unfortunately nvidia doesn't support well exotic types such as complex,
  // so reduce the scope of the optimized version of the code to the simple case
  // of floats and half floats.
-  #ifdef EIGEN_HAS_CUDA_FP16
+#ifdef EIGEN_HAS_CUDA_FP16
  static const bool HasOptimizedImplementation = !Op::IsStateful &&
      (internal::is_same<typename Self::CoeffReturnType, float>::value ||
       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));