mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 09:39:34 +08:00
Use numerically stable tree reduction in TensorReduction.
This commit is contained in:
parent
43fd42a33b
commit
46f88fc454
@ -58,16 +58,15 @@ template<typename Reducer, typename Device>
|
|||||||
struct reducer_traits {
|
struct reducer_traits {
|
||||||
enum {
|
enum {
|
||||||
Cost = 1,
|
Cost = 1,
|
||||||
PacketAccess = false
|
PacketAccess = false,
|
||||||
|
IsStateful = false,
|
||||||
|
IsExactlyAssociative = true
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Standard reduction functors
|
// Standard reduction functors
|
||||||
template <typename T> struct SumReducer
|
template <typename T> struct SumReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasAdd;
|
|
||||||
static const bool IsStateful = false;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||||
internal::scalar_sum_op<T> sum_op;
|
internal::scalar_sum_op<T> sum_op;
|
||||||
*accum = sum_op(*accum, t);
|
*accum = sum_op(*accum, t);
|
||||||
@ -103,16 +102,14 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<SumReducer<T>, Device> {
|
struct reducer_traits<SumReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::AddCost,
|
Cost = NumTraits<T>::AddCost,
|
||||||
PacketAccess = PacketType<T, Device>::HasAdd
|
PacketAccess = PacketType<T, Device>::HasAdd,
|
||||||
|
IsStateful = false,
|
||||||
|
IsExactlyAssociative = NumTraits<T>::IsInteger
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct MeanReducer
|
template <typename T> struct MeanReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv && !NumTraits<T>::IsInteger;
|
|
||||||
static const bool IsStateful = true;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
MeanReducer() : scalarCount_(0), packetCount_(0) { }
|
MeanReducer() : scalarCount_(0), packetCount_(0) { }
|
||||||
|
|
||||||
@ -161,7 +158,9 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<MeanReducer<T>, Device> {
|
struct reducer_traits<MeanReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::AddCost,
|
Cost = NumTraits<T>::AddCost,
|
||||||
PacketAccess = PacketType<T, Device>::HasAdd
|
PacketAccess = PacketType<T, Device>::HasAdd && !NumTraits<T>::IsInteger,
|
||||||
|
IsStateful = true,
|
||||||
|
IsExactlyAssociative = NumTraits<T>::IsInteger
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -194,9 +193,6 @@ struct MinMaxBottomValue<T, false, false> {
|
|||||||
|
|
||||||
template <typename T> struct MaxReducer
|
template <typename T> struct MaxReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasMax;
|
|
||||||
static const bool IsStateful = false;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||||
if (t > *accum) { *accum = t; }
|
if (t > *accum) { *accum = t; }
|
||||||
}
|
}
|
||||||
@ -228,16 +224,15 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<MaxReducer<T>, Device> {
|
struct reducer_traits<MaxReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::AddCost,
|
Cost = NumTraits<T>::AddCost,
|
||||||
PacketAccess = PacketType<T, Device>::HasMax
|
PacketAccess = PacketType<T, Device>::HasMax,
|
||||||
|
IsStateful = false,
|
||||||
|
IsExactlyAssociative = true
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct MinReducer
|
template <typename T> struct MinReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasMin;
|
|
||||||
static const bool IsStateful = false;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||||
if (t < *accum) { *accum = t; }
|
if (t < *accum) { *accum = t; }
|
||||||
}
|
}
|
||||||
@ -269,16 +264,15 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<MinReducer<T>, Device> {
|
struct reducer_traits<MinReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::AddCost,
|
Cost = NumTraits<T>::AddCost,
|
||||||
PacketAccess = PacketType<T, Device>::HasMin
|
PacketAccess = PacketType<T, Device>::HasMin,
|
||||||
|
IsStateful = false,
|
||||||
|
IsExactlyAssociative = true
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct ProdReducer
|
template <typename T> struct ProdReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasMul;
|
|
||||||
static const bool IsStateful = false;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||||
internal::scalar_product_op<T> prod_op;
|
internal::scalar_product_op<T> prod_op;
|
||||||
(*accum) = prod_op(*accum, t);
|
(*accum) = prod_op(*accum, t);
|
||||||
@ -314,16 +308,15 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<ProdReducer<T>, Device> {
|
struct reducer_traits<ProdReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::MulCost,
|
Cost = NumTraits<T>::MulCost,
|
||||||
PacketAccess = PacketType<T, Device>::HasMul
|
PacketAccess = PacketType<T, Device>::HasMul,
|
||||||
|
IsStateful = false,
|
||||||
|
IsExactlyAssociative = true
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct AndReducer
|
struct AndReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = false;
|
|
||||||
static const bool IsStateful = false;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
|
||||||
*accum = *accum && t;
|
*accum = *accum && t;
|
||||||
}
|
}
|
||||||
@ -339,15 +332,14 @@ template <typename Device>
|
|||||||
struct reducer_traits<AndReducer, Device> {
|
struct reducer_traits<AndReducer, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = 1,
|
Cost = 1,
|
||||||
PacketAccess = false
|
PacketAccess = false,
|
||||||
|
IsStateful = false,
|
||||||
|
IsExactlyAssociative = true
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct OrReducer {
|
struct OrReducer {
|
||||||
static const bool PacketAccess = false;
|
|
||||||
static const bool IsStateful = false;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
|
||||||
*accum = *accum || t;
|
*accum = *accum || t;
|
||||||
}
|
}
|
||||||
@ -363,7 +355,9 @@ template <typename Device>
|
|||||||
struct reducer_traits<OrReducer, Device> {
|
struct reducer_traits<OrReducer, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = 1,
|
Cost = 1,
|
||||||
PacketAccess = false
|
PacketAccess = false,
|
||||||
|
IsStateful = false,
|
||||||
|
IsExactlyAssociative = true
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -371,9 +365,6 @@ struct reducer_traits<OrReducer, Device> {
|
|||||||
// Argmin/Argmax reducers
|
// Argmin/Argmax reducers
|
||||||
template <typename T> struct ArgMaxTupleReducer
|
template <typename T> struct ArgMaxTupleReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = false;
|
|
||||||
static const bool IsStateful = false;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||||
if (t.second > accum->second) { *accum = t; }
|
if (t.second > accum->second) { *accum = t; }
|
||||||
}
|
}
|
||||||
@ -389,16 +380,15 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
|
struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::AddCost,
|
Cost = NumTraits<T>::AddCost,
|
||||||
PacketAccess = false
|
PacketAccess = false,
|
||||||
|
IsStateful = false,
|
||||||
|
IsExactlyAssociative = true
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct ArgMinTupleReducer
|
template <typename T> struct ArgMinTupleReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = false;
|
|
||||||
static const bool IsStateful = false;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const {
|
||||||
if (t.second < accum->second) { *accum = t; }
|
if (t.second < accum->second) { *accum = t; }
|
||||||
}
|
}
|
||||||
@ -414,7 +404,9 @@ template <typename T, typename Device>
|
|||||||
struct reducer_traits<ArgMinTupleReducer<T>, Device> {
|
struct reducer_traits<ArgMinTupleReducer<T>, Device> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<T>::AddCost,
|
Cost = NumTraits<T>::AddCost,
|
||||||
PacketAccess = false
|
PacketAccess = false,
|
||||||
|
IsStateful = false,
|
||||||
|
IsExactlyAssociative = true
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -165,7 +165,9 @@ struct GenericDimReducer<-1, Self, Op> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
|
template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess),
|
||||||
|
bool UseTreeReduction = (!Self::ReducerTraits::IsStateful &&
|
||||||
|
!Self::ReducerTraits::IsExactlyAssociative)>
|
||||||
struct InnerMostDimReducer {
|
struct InnerMostDimReducer {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
|
||||||
typename Self::CoeffReturnType accum = reducer.initialize();
|
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||||
@ -177,23 +179,88 @@ struct InnerMostDimReducer {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename Self, typename Op>
|
template <typename Self, typename Op>
|
||||||
struct InnerMostDimReducer<Self, Op, true> {
|
struct InnerMostDimReducer<Self, Op, true, false> {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
|
||||||
const int packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size;
|
const typename Self::Index packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size;
|
||||||
const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
|
const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
|
||||||
typename Self::PacketReturnType p = reducer.template initializePacket<typename Self::PacketReturnType>();
|
typename Self::PacketReturnType paccum = reducer.template initializePacket<typename Self::PacketReturnType>();
|
||||||
for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) {
|
for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) {
|
||||||
reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &p);
|
reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum);
|
||||||
}
|
}
|
||||||
typename Self::CoeffReturnType accum = reducer.initialize();
|
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||||
for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) {
|
for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) {
|
||||||
reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
|
reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
|
||||||
}
|
}
|
||||||
return reducer.finalizeBoth(accum, p);
|
return reducer.finalizeBoth(accum, paccum);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
|
static const int kLeafSize = 1024;
|
||||||
|
|
||||||
|
template <typename Self, typename Op>
|
||||||
|
struct InnerMostDimReducer<Self, Op, false, true> {
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
|
||||||
|
reduce(const Self& self, typename Self::Index firstIndex,
|
||||||
|
typename Self::Index numValuesToReduce, Op& reducer) {
|
||||||
|
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||||
|
if (numValuesToReduce > kLeafSize) {
|
||||||
|
const typename Self::Index half = numValuesToReduce / 2;
|
||||||
|
reducer.reduce(reduce(self, firstIndex, half, reducer), &accum);
|
||||||
|
reducer.reduce(
|
||||||
|
reduce(self, firstIndex + half, numValuesToReduce - half, reducer),
|
||||||
|
&accum);
|
||||||
|
} else {
|
||||||
|
for (typename Self::Index j = 0; j < numValuesToReduce; ++j) {
|
||||||
|
reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return reducer.finalize(accum);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#if !defined(EIGEN_USE_GPU) || !defined(__CUDACC__)
|
||||||
|
template <typename Self, typename Op>
|
||||||
|
struct InnerMostDimReducer<Self, Op, true, true> {
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
|
||||||
|
reduce(const Self& self, typename Self::Index firstIndex,
|
||||||
|
typename Self::Index numValuesToReduce, Op& reducer) {
|
||||||
|
const typename Self::Index packetSize =
|
||||||
|
internal::unpacket_traits<typename Self::PacketReturnType>::size;
|
||||||
|
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||||
|
if (numValuesToReduce > packetSize * kLeafSize) {
|
||||||
|
// Make sure the split point is aligned on a packet boundary.
|
||||||
|
const typename Self::Index split =
|
||||||
|
packetSize *
|
||||||
|
divup(firstIndex + divup(numValuesToReduce, typename Self::Index(2)),
|
||||||
|
packetSize);
|
||||||
|
const typename Self::Index num_left =
|
||||||
|
numext::mini(split - firstIndex, numValuesToReduce);
|
||||||
|
reducer.reduce(reduce(self, firstIndex, num_left, reducer), &accum);
|
||||||
|
if (num_left < numValuesToReduce) {
|
||||||
|
reducer.reduce(
|
||||||
|
reduce(self, split, numValuesToReduce - num_left, reducer), &accum);
|
||||||
|
}
|
||||||
|
return reducer.finalize(accum);
|
||||||
|
} else {
|
||||||
|
const typename Self::Index VectorizedSize =
|
||||||
|
(numValuesToReduce / packetSize) * packetSize;
|
||||||
|
typename Self::PacketReturnType paccum =
|
||||||
|
reducer.template initializePacket<typename Self::PacketReturnType>();
|
||||||
|
for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) {
|
||||||
|
reducer.reducePacket(
|
||||||
|
self.m_impl.template packet<Unaligned>(firstIndex + j), &paccum);
|
||||||
|
}
|
||||||
|
for (typename Self::Index j = VectorizedSize; j < numValuesToReduce;
|
||||||
|
++j) {
|
||||||
|
reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
|
||||||
|
}
|
||||||
|
return reducer.finalizeBoth(accum, paccum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
|
||||||
struct InnerMostDimPreserver {
|
struct InnerMostDimPreserver {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {
|
||||||
eigen_assert(false && "should never be called");
|
eigen_assert(false && "should never be called");
|
||||||
@ -228,7 +295,7 @@ struct InnerMostDimPreserver<-1, Self, Op, true> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Default full reducer
|
// Default full reducer
|
||||||
template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
|
template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
|
||||||
struct FullReducer {
|
struct FullReducer {
|
||||||
static const bool HasOptimizedImplementation = false;
|
static const bool HasOptimizedImplementation = false;
|
||||||
|
|
||||||
@ -242,7 +309,7 @@ struct FullReducer {
|
|||||||
#ifdef EIGEN_USE_THREADS
|
#ifdef EIGEN_USE_THREADS
|
||||||
// Multithreaded full reducers
|
// Multithreaded full reducers
|
||||||
template <typename Self, typename Op,
|
template <typename Self, typename Op,
|
||||||
bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
|
bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
|
||||||
struct FullReducerShard {
|
struct FullReducerShard {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Self& self, typename Self::Index firstIndex,
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Self& self, typename Self::Index firstIndex,
|
||||||
typename Self::Index numValuesToReduce, Op& reducer,
|
typename Self::Index numValuesToReduce, Op& reducer,
|
||||||
@ -255,8 +322,8 @@ struct FullReducerShard {
|
|||||||
// Multithreaded full reducer
|
// Multithreaded full reducer
|
||||||
template <typename Self, typename Op, bool Vectorizable>
|
template <typename Self, typename Op, bool Vectorizable>
|
||||||
struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
|
struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful;
|
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful;
|
||||||
static const int PacketSize =
|
static const Index PacketSize =
|
||||||
unpacket_traits<typename Self::PacketReturnType>::size;
|
unpacket_traits<typename Self::PacketReturnType>::size;
|
||||||
|
|
||||||
// launch one reducer per thread and accumulate the result.
|
// launch one reducer per thread and accumulate the result.
|
||||||
@ -394,6 +461,7 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType,
|
|||||||
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
|
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
|
||||||
struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>
|
struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>
|
||||||
{
|
{
|
||||||
|
typedef internal::reducer_traits<Op, Device> ReducerTraits;
|
||||||
typedef TensorReductionOp<Op, Dims, ArgType, MakePointer_> XprType;
|
typedef TensorReductionOp<Op, Dims, ArgType, MakePointer_> XprType;
|
||||||
typedef typename XprType::Index Index;
|
typedef typename XprType::Index Index;
|
||||||
typedef ArgType ChildType;
|
typedef ArgType ChildType;
|
||||||
@ -407,11 +475,11 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
|
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
|
||||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const Index PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = Self::InputPacketAccess && Op::PacketAccess,
|
PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
@ -696,7 +764,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
template <int, typename, typename> friend struct internal::GenericDimReducer;
|
template <int, typename, typename> friend struct internal::GenericDimReducer;
|
||||||
template <typename, typename, bool> friend struct internal::InnerMostDimReducer;
|
template <typename, typename, bool, bool> friend struct internal::InnerMostDimReducer;
|
||||||
template <int, typename, typename, bool> friend struct internal::InnerMostDimPreserver;
|
template <int, typename, typename, bool> friend struct internal::InnerMostDimPreserver;
|
||||||
template <typename S, typename O, typename D, bool V> friend struct internal::FullReducer;
|
template <typename S, typename O, typename D, bool V> friend struct internal::FullReducer;
|
||||||
#ifdef EIGEN_USE_THREADS
|
#ifdef EIGEN_USE_THREADS
|
||||||
|
@ -376,12 +376,12 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
|||||||
// so reduce the scope of the optimized version of the code to the simple cases
|
// so reduce the scope of the optimized version of the code to the simple cases
|
||||||
// of doubles, floats and half floats
|
// of doubles, floats and half floats
|
||||||
#ifdef EIGEN_HAS_GPU_FP16
|
#ifdef EIGEN_HAS_GPU_FP16
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||||
internal::is_same<typename Self::CoeffReturnType, double>::value ||
|
internal::is_same<typename Self::CoeffReturnType, double>::value ||
|
||||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||||
#else // EIGEN_HAS_GPU_FP16
|
#else // EIGEN_HAS_GPU_FP16
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||||
internal::is_same<typename Self::CoeffReturnType, double>::value);
|
internal::is_same<typename Self::CoeffReturnType, double>::value);
|
||||||
#endif // EIGEN_HAS_GPU_FP16
|
#endif // EIGEN_HAS_GPU_FP16
|
||||||
@ -697,12 +697,12 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
|||||||
// so reduce the scope of the optimized version of the code to the simple case
|
// so reduce the scope of the optimized version of the code to the simple case
|
||||||
// of floats and half floats.
|
// of floats and half floats.
|
||||||
#ifdef EIGEN_HAS_GPU_FP16
|
#ifdef EIGEN_HAS_GPU_FP16
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||||
internal::is_same<typename Self::CoeffReturnType, double>::value ||
|
internal::is_same<typename Self::CoeffReturnType, double>::value ||
|
||||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||||
#else // EIGEN_HAS_GPU_FP16
|
#else // EIGEN_HAS_GPU_FP16
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||||
internal::is_same<typename Self::CoeffReturnType, double>::value);
|
internal::is_same<typename Self::CoeffReturnType, double>::value);
|
||||||
#endif // EIGEN_HAS_GPU_FP16
|
#endif // EIGEN_HAS_GPU_FP16
|
||||||
@ -759,7 +759,7 @@ struct OuterReducer<Self, Op, GpuDevice> {
|
|||||||
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
||||||
// so reduce the scope of the optimized version of the code to the simple case
|
// so reduce the scope of the optimized version of the code to the simple case
|
||||||
// of floats.
|
// of floats.
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
|
||||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||||
internal::is_same<typename Self::CoeffReturnType, double>::value);
|
internal::is_same<typename Self::CoeffReturnType, double>::value);
|
||||||
template <typename Device, typename OutputType>
|
template <typename Device, typename OutputType>
|
||||||
|
@ -386,7 +386,7 @@ static void test_static_dims() {
|
|||||||
expected = (std::max)(expected, in(i, k, j, l));
|
expected = (std::max)(expected, in(i, k, j, l));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
VERIFY_IS_APPROX(out(i, j), expected);
|
VERIFY_IS_EQUAL(out(i, j), expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -417,7 +417,7 @@ static void test_innermost_last_dims() {
|
|||||||
expected = (std::max)(expected, in(l, k, i, j));
|
expected = (std::max)(expected, in(l, k, i, j));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
VERIFY_IS_APPROX(out(i, j), expected);
|
VERIFY_IS_EQUAL(out(i, j), expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -448,7 +448,7 @@ static void test_innermost_first_dims() {
|
|||||||
expected = (std::max)(expected, in(i, j, k, l));
|
expected = (std::max)(expected, in(i, j, k, l));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
VERIFY_IS_APPROX(out(i, j), expected);
|
VERIFY_IS_EQUAL(out(i, j), expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -479,11 +479,30 @@ static void test_reduce_middle_dims() {
|
|||||||
expected = (std::max)(expected, in(i, k, l, j));
|
expected = (std::max)(expected, in(i, k, l, j));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
VERIFY_IS_APPROX(out(i, j), expected);
|
VERIFY_IS_EQUAL(out(i, j), expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_sum_accuracy() {
|
||||||
|
Tensor<float, 3> tensor(101, 101, 101);
|
||||||
|
for (float prescribed_mean : {1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f}) {
|
||||||
|
tensor.setRandom();
|
||||||
|
tensor += tensor.constant(prescribed_mean);
|
||||||
|
|
||||||
|
Tensor<float, 0> sum = tensor.sum();
|
||||||
|
double expected_sum = 0.0;
|
||||||
|
for (int i = 0; i < 101; ++i) {
|
||||||
|
for (int j = 0; j < 101; ++j) {
|
||||||
|
for (int k = 0; k < 101; ++k) {
|
||||||
|
expected_sum += static_cast<double>(tensor(i, j, k));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VERIFY_IS_APPROX(sum(), static_cast<float>(expected_sum));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DECLARE_TEST(cxx11_tensor_reduction) {
|
EIGEN_DECLARE_TEST(cxx11_tensor_reduction) {
|
||||||
CALL_SUBTEST(test_trivial_reductions<ColMajor>());
|
CALL_SUBTEST(test_trivial_reductions<ColMajor>());
|
||||||
CALL_SUBTEST(test_trivial_reductions<RowMajor>());
|
CALL_SUBTEST(test_trivial_reductions<RowMajor>());
|
||||||
@ -506,4 +525,5 @@ EIGEN_DECLARE_TEST(cxx11_tensor_reduction) {
|
|||||||
CALL_SUBTEST(test_innermost_first_dims<RowMajor>());
|
CALL_SUBTEST(test_innermost_first_dims<RowMajor>());
|
||||||
CALL_SUBTEST(test_reduce_middle_dims<ColMajor>());
|
CALL_SUBTEST(test_reduce_middle_dims<ColMajor>());
|
||||||
CALL_SUBTEST(test_reduce_middle_dims<RowMajor>());
|
CALL_SUBTEST(test_reduce_middle_dims<RowMajor>());
|
||||||
|
CALL_SUBTEST(test_sum_accuracy());
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user