mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-30 18:25:11 +08:00
Improved support for vectorization of 16-bit floats
This commit is contained in:
parent
15890c304e
commit
aa33446dac
@ -84,6 +84,14 @@ struct functor_traits<scalar_sigmoid_op<T> > {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<typename Reducer, typename Device>
|
||||||
|
struct reducer_traits {
|
||||||
|
enum {
|
||||||
|
Cost = 1,
|
||||||
|
PacketAccess = false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
// Standard reduction functors
|
// Standard reduction functors
|
||||||
template <typename T> struct SumReducer
|
template <typename T> struct SumReducer
|
||||||
{
|
{
|
||||||
@ -119,6 +127,15 @@ template <typename T> struct SumReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<SumReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = PacketType<T, Device>::type::HasAdd
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct MeanReducer
|
template <typename T> struct MeanReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
|
static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
|
||||||
@ -162,6 +179,15 @@ template <typename T> struct MeanReducer
|
|||||||
DenseIndex packetCount_;
|
DenseIndex packetCount_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<MeanReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = PacketType<T, Device>::type::HasAdd
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct MaxReducer
|
template <typename T> struct MaxReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasMax;
|
static const bool PacketAccess = packet_traits<T>::HasMax;
|
||||||
@ -195,6 +221,15 @@ template <typename T> struct MaxReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<MaxReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = PacketType<T, Device>::type::HasMax
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct MinReducer
|
template <typename T> struct MinReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasMin;
|
static const bool PacketAccess = packet_traits<T>::HasMin;
|
||||||
@ -228,6 +263,14 @@ template <typename T> struct MinReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<MinReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = PacketType<T, Device>::type::HasMin
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct ProdReducer
|
template <typename T> struct ProdReducer
|
||||||
{
|
{
|
||||||
@ -263,6 +306,14 @@ template <typename T> struct ProdReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<ProdReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::MulCost,
|
||||||
|
PacketAccess = PacketType<T, Device>::type::HasMul
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct AndReducer
|
struct AndReducer
|
||||||
{
|
{
|
||||||
@ -280,6 +331,15 @@ struct AndReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
struct reducer_traits<AndReducer, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = 1,
|
||||||
|
PacketAccess = false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct OrReducer {
|
struct OrReducer {
|
||||||
static const bool PacketAccess = false;
|
static const bool PacketAccess = false;
|
||||||
static const bool IsStateful = false;
|
static const bool IsStateful = false;
|
||||||
@ -295,6 +355,15 @@ struct OrReducer {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
struct reducer_traits<OrReducer, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = 1,
|
||||||
|
PacketAccess = false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// Argmin/Argmax reducers
|
// Argmin/Argmax reducers
|
||||||
template <typename T> struct ArgMaxTupleReducer
|
template <typename T> struct ArgMaxTupleReducer
|
||||||
{
|
{
|
||||||
@ -312,6 +381,15 @@ template <typename T> struct ArgMaxTupleReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct ArgMinTupleReducer
|
template <typename T> struct ArgMinTupleReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = false;
|
static const bool PacketAccess = false;
|
||||||
@ -328,6 +406,14 @@ template <typename T> struct ArgMinTupleReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<ArgMinTupleReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// Random number generation
|
// Random number generation
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -54,6 +54,11 @@ struct PacketType {
|
|||||||
|
|
||||||
// For CUDA packet types when using a GpuDevice
|
// For CUDA packet types when using a GpuDevice
|
||||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||||
|
template <>
|
||||||
|
struct PacketType<half, GpuDevice> {
|
||||||
|
typedef half2 type;
|
||||||
|
static const int size = 2;
|
||||||
|
};
|
||||||
template <>
|
template <>
|
||||||
struct PacketType<float, GpuDevice> {
|
struct PacketType<float, GpuDevice> {
|
||||||
typedef float4 type;
|
typedef float4 type;
|
||||||
|
@ -331,7 +331,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
|||||||
#ifdef EIGEN_HAS_CUDA_FP16
|
#ifdef EIGEN_HAS_CUDA_FP16
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
|
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||||
#else
|
#else
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||||
@ -346,7 +346,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
FullReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs);
|
FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -608,7 +608,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
|||||||
#ifdef EIGEN_HAS_CUDA_FP16
|
#ifdef EIGEN_HAS_CUDA_FP16
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
|
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||||
#else
|
#else
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||||
@ -627,7 +627,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return InnerReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
|
return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user