mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-30 02:05:18 +08:00
Improved support for vectorization of 16-bit floats
This commit is contained in:
parent
15890c304e
commit
aa33446dac
@ -84,6 +84,14 @@ struct functor_traits<scalar_sigmoid_op<T> > {
|
||||
};
|
||||
|
||||
|
||||
template<typename Reducer, typename Device>
|
||||
struct reducer_traits {
|
||||
enum {
|
||||
Cost = 1,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
// Standard reduction functors
|
||||
template <typename T> struct SumReducer
|
||||
{
|
||||
@ -119,6 +127,15 @@ template <typename T> struct SumReducer
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename Device>
|
||||
struct reducer_traits<SumReducer<T>, Device> {
|
||||
enum {
|
||||
Cost = NumTraits<T>::AddCost,
|
||||
PacketAccess = PacketType<T, Device>::type::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template <typename T> struct MeanReducer
|
||||
{
|
||||
static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
|
||||
@ -162,6 +179,15 @@ template <typename T> struct MeanReducer
|
||||
DenseIndex packetCount_;
|
||||
};
|
||||
|
||||
template <typename T, typename Device>
|
||||
struct reducer_traits<MeanReducer<T>, Device> {
|
||||
enum {
|
||||
Cost = NumTraits<T>::AddCost,
|
||||
PacketAccess = PacketType<T, Device>::type::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template <typename T> struct MaxReducer
|
||||
{
|
||||
static const bool PacketAccess = packet_traits<T>::HasMax;
|
||||
@ -195,6 +221,15 @@ template <typename T> struct MaxReducer
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename Device>
|
||||
struct reducer_traits<MaxReducer<T>, Device> {
|
||||
enum {
|
||||
Cost = NumTraits<T>::AddCost,
|
||||
PacketAccess = PacketType<T, Device>::type::HasMax
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template <typename T> struct MinReducer
|
||||
{
|
||||
static const bool PacketAccess = packet_traits<T>::HasMin;
|
||||
@ -228,6 +263,14 @@ template <typename T> struct MinReducer
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename Device>
|
||||
struct reducer_traits<MinReducer<T>, Device> {
|
||||
enum {
|
||||
Cost = NumTraits<T>::AddCost,
|
||||
PacketAccess = PacketType<T, Device>::type::HasMin
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template <typename T> struct ProdReducer
|
||||
{
|
||||
@ -263,6 +306,14 @@ template <typename T> struct ProdReducer
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename Device>
|
||||
struct reducer_traits<ProdReducer<T>, Device> {
|
||||
enum {
|
||||
Cost = NumTraits<T>::MulCost,
|
||||
PacketAccess = PacketType<T, Device>::type::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct AndReducer
|
||||
{
|
||||
@ -280,6 +331,15 @@ struct AndReducer
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Device>
|
||||
struct reducer_traits<AndReducer, Device> {
|
||||
enum {
|
||||
Cost = 1,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct OrReducer {
|
||||
static const bool PacketAccess = false;
|
||||
static const bool IsStateful = false;
|
||||
@ -295,6 +355,15 @@ struct OrReducer {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Device>
|
||||
struct reducer_traits<OrReducer, Device> {
|
||||
enum {
|
||||
Cost = 1,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
// Argmin/Argmax reducers
|
||||
template <typename T> struct ArgMaxTupleReducer
|
||||
{
|
||||
@ -312,6 +381,15 @@ template <typename T> struct ArgMaxTupleReducer
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename Device>
|
||||
struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
|
||||
enum {
|
||||
Cost = NumTraits<T>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template <typename T> struct ArgMinTupleReducer
|
||||
{
|
||||
static const bool PacketAccess = false;
|
||||
@ -328,6 +406,14 @@ template <typename T> struct ArgMinTupleReducer
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename Device>
|
||||
struct reducer_traits<ArgMinTupleReducer<T>, Device> {
|
||||
enum {
|
||||
Cost = NumTraits<T>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
// Random number generation
|
||||
namespace {
|
||||
|
@ -54,6 +54,11 @@ struct PacketType {
|
||||
|
||||
// For CUDA packet types when using a GpuDevice
|
||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||
template <>
|
||||
struct PacketType<half, GpuDevice> {
|
||||
typedef half2 type;
|
||||
static const int size = 2;
|
||||
};
|
||||
template <>
|
||||
struct PacketType<float, GpuDevice> {
|
||||
typedef float4 type;
|
||||
|
@ -331,7 +331,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
||||
#ifdef EIGEN_HAS_CUDA_FP16
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
|
||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||
#else
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||
@ -346,7 +346,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
||||
return;
|
||||
}
|
||||
|
||||
FullReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs);
|
||||
FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
|
||||
}
|
||||
};
|
||||
|
||||
@ -608,7 +608,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
||||
#ifdef EIGEN_HAS_CUDA_FP16
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
|
||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||
#else
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||
@ -627,7 +627,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
||||
return true;
|
||||
}
|
||||
|
||||
return InnerReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
|
||||
return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
|
||||
}
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user