Improved support for vectorization of 16-bit floats

This commit is contained in:
Benoit Steiner 2016-06-09 08:22:27 -07:00
parent 15890c304e
commit aa33446dac
3 changed files with 95 additions and 4 deletions

View File

@ -84,6 +84,14 @@ struct functor_traits<scalar_sigmoid_op<T> > {
}; };
template<typename Reducer, typename Device>
struct reducer_traits {
enum {
Cost = 1,
PacketAccess = false
};
};
// Standard reduction functors // Standard reduction functors
template <typename T> struct SumReducer template <typename T> struct SumReducer
{ {
@ -119,6 +127,15 @@ template <typename T> struct SumReducer
} }
}; };
template <typename T, typename Device>
struct reducer_traits<SumReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
PacketAccess = PacketType<T, Device>::type::HasAdd
};
};
template <typename T> struct MeanReducer template <typename T> struct MeanReducer
{ {
static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger; static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
@ -162,6 +179,15 @@ template <typename T> struct MeanReducer
DenseIndex packetCount_; DenseIndex packetCount_;
}; };
template <typename T, typename Device>
struct reducer_traits<MeanReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
PacketAccess = PacketType<T, Device>::type::HasAdd
};
};
template <typename T> struct MaxReducer template <typename T> struct MaxReducer
{ {
static const bool PacketAccess = packet_traits<T>::HasMax; static const bool PacketAccess = packet_traits<T>::HasMax;
@ -195,6 +221,15 @@ template <typename T> struct MaxReducer
} }
}; };
template <typename T, typename Device>
struct reducer_traits<MaxReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
PacketAccess = PacketType<T, Device>::type::HasMax
};
};
template <typename T> struct MinReducer template <typename T> struct MinReducer
{ {
static const bool PacketAccess = packet_traits<T>::HasMin; static const bool PacketAccess = packet_traits<T>::HasMin;
@ -228,6 +263,14 @@ template <typename T> struct MinReducer
} }
}; };
template <typename T, typename Device>
struct reducer_traits<MinReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
PacketAccess = PacketType<T, Device>::type::HasMin
};
};
template <typename T> struct ProdReducer template <typename T> struct ProdReducer
{ {
@ -263,6 +306,14 @@ template <typename T> struct ProdReducer
} }
}; };
template <typename T, typename Device>
struct reducer_traits<ProdReducer<T>, Device> {
enum {
Cost = NumTraits<T>::MulCost,
PacketAccess = PacketType<T, Device>::type::HasMul
};
};
struct AndReducer struct AndReducer
{ {
@ -280,6 +331,15 @@ struct AndReducer
} }
}; };
template <typename Device>
struct reducer_traits<AndReducer, Device> {
enum {
Cost = 1,
PacketAccess = false
};
};
struct OrReducer { struct OrReducer {
static const bool PacketAccess = false; static const bool PacketAccess = false;
static const bool IsStateful = false; static const bool IsStateful = false;
@ -295,6 +355,15 @@ struct OrReducer {
} }
}; };
template <typename Device>
struct reducer_traits<OrReducer, Device> {
enum {
Cost = 1,
PacketAccess = false
};
};
// Argmin/Argmax reducers // Argmin/Argmax reducers
template <typename T> struct ArgMaxTupleReducer template <typename T> struct ArgMaxTupleReducer
{ {
@ -312,6 +381,15 @@ template <typename T> struct ArgMaxTupleReducer
} }
}; };
template <typename T, typename Device>
struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
PacketAccess = false
};
};
template <typename T> struct ArgMinTupleReducer template <typename T> struct ArgMinTupleReducer
{ {
static const bool PacketAccess = false; static const bool PacketAccess = false;
@ -328,6 +406,14 @@ template <typename T> struct ArgMinTupleReducer
} }
}; };
template <typename T, typename Device>
struct reducer_traits<ArgMinTupleReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
PacketAccess = false
};
};
// Random number generation // Random number generation
namespace { namespace {

View File

@ -54,6 +54,11 @@ struct PacketType {
// For CUDA packet types when using a GpuDevice // For CUDA packet types when using a GpuDevice
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) #if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
template <>
struct PacketType<half, GpuDevice> {
typedef half2 type;
static const int size = 2;
};
template <> template <>
struct PacketType<float, GpuDevice> { struct PacketType<float, GpuDevice> {
typedef float4 type; typedef float4 type;

View File

@ -331,7 +331,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
#ifdef EIGEN_HAS_CUDA_FP16 #ifdef EIGEN_HAS_CUDA_FP16
static const bool HasOptimizedImplementation = !Op::IsStateful && static const bool HasOptimizedImplementation = !Op::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value || (internal::is_same<typename Self::CoeffReturnType, float>::value ||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess)); (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
#else #else
static const bool HasOptimizedImplementation = !Op::IsStateful && static const bool HasOptimizedImplementation = !Op::IsStateful &&
internal::is_same<typename Self::CoeffReturnType, float>::value; internal::is_same<typename Self::CoeffReturnType, float>::value;
@ -346,7 +346,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
return; return;
} }
FullReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs); FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
} }
}; };
@ -608,7 +608,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
#ifdef EIGEN_HAS_CUDA_FP16 #ifdef EIGEN_HAS_CUDA_FP16
static const bool HasOptimizedImplementation = !Op::IsStateful && static const bool HasOptimizedImplementation = !Op::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value || (internal::is_same<typename Self::CoeffReturnType, float>::value ||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess)); (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
#else #else
static const bool HasOptimizedImplementation = !Op::IsStateful && static const bool HasOptimizedImplementation = !Op::IsStateful &&
internal::is_same<typename Self::CoeffReturnType, float>::value; internal::is_same<typename Self::CoeffReturnType, float>::value;
@ -627,7 +627,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
return true; return true;
} }
return InnerReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
} }
}; };