mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
Add block evaluationto CwiseUnaryOp and add PreferBlockAccess enum to all evaluators
This commit is contained in:
parent
cfaedb38cd
commit
f2209d06e4
@ -87,6 +87,7 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
|
|||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||||
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
|
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
@ -220,6 +221,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
|||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||||
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
|
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
|
Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -109,6 +109,7 @@ struct TensorEvaluator<const TensorTupleReducerDeviceOp<StrideDims, ArgType>, Sy
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = false,
|
PacketAccess = false,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, SyclKernelDevice>::Layout,
|
Layout = TensorEvaluator<ArgType, SyclKernelDevice>::Layout,
|
||||||
CoordAccess = false,
|
CoordAccess = false,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -102,14 +102,16 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
|||||||
static const int NumDims = XprType::NumDims;
|
static const int NumDims = XprType::NumDims;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned &
|
IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned &
|
||||||
TensorEvaluator<RightArgType, Device>::IsAligned,
|
TensorEvaluator<RightArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
|
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
|
||||||
TensorEvaluator<RightArgType, Device>::PacketAccess,
|
TensorEvaluator<RightArgType, Device>::PacketAccess,
|
||||||
BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess &
|
BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess &
|
||||||
TensorEvaluator<RightArgType, Device>::BlockAccess,
|
TensorEvaluator<RightArgType, Device>::BlockAccess,
|
||||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess |
|
||||||
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess
|
TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
|
||||||
|
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||||
|
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::TensorBlock<
|
typedef typename internal::TensorBlock<
|
||||||
|
@ -381,6 +381,147 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \class TensorBlockCwiseUnaryOp
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Carries out a cwise binary op on a number of coefficients.
|
||||||
|
*
|
||||||
|
* This class reads strided input from the argument, and writes the
|
||||||
|
* result of the cwise unary op to the strided output array.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
struct TensorBlockCwiseUnaryOp {
|
||||||
|
template <typename StorageIndex, typename UnaryFunctor,
|
||||||
|
typename OutputScalar, typename InputScalar>
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
const UnaryFunctor& functor, const StorageIndex num_coeff,
|
||||||
|
const StorageIndex output_index, const StorageIndex output_stride,
|
||||||
|
OutputScalar* output_data, const StorageIndex input_index,
|
||||||
|
const StorageIndex input_stride, const InputScalar* input_data) {
|
||||||
|
typedef const Eigen::Array<InputScalar, Dynamic, 1> Input;
|
||||||
|
typedef Eigen::Array<OutputScalar, Dynamic, 1> Output;
|
||||||
|
|
||||||
|
typedef Eigen::Map<Input, 0, InnerStride<>> InputMap;
|
||||||
|
typedef Eigen::Map<Output, 0, InnerStride<>> OutputMap;
|
||||||
|
|
||||||
|
const InputScalar* input_base = &input_data[input_index];
|
||||||
|
OutputScalar* output_base = &output_data[output_index];
|
||||||
|
|
||||||
|
const InputMap input(input_base, num_coeff, InnerStride<>(input_stride));
|
||||||
|
OutputMap output(output_base, num_coeff, InnerStride<>(output_stride));
|
||||||
|
|
||||||
|
output = Eigen::CwiseUnaryOp<UnaryFunctor, InputMap>(input, functor);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \class TensorBlockCwiseUnaryIO
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Tensor block IO class for carrying out cwise unary ops.
|
||||||
|
*
|
||||||
|
* This class carries out the unary op on given blocks.
|
||||||
|
*/
|
||||||
|
template <typename UnaryFunctor, typename StorageIndex, typename OutputScalar,
|
||||||
|
int NumDims, int Layout>
|
||||||
|
struct TensorBlockCwiseUnaryIO {
|
||||||
|
typedef typename internal::TensorBlock<OutputScalar, StorageIndex, NumDims,
|
||||||
|
Layout>::Dimensions Dimensions;
|
||||||
|
|
||||||
|
struct BlockIteratorState {
|
||||||
|
StorageIndex output_stride, output_span;
|
||||||
|
StorageIndex input_stride, input_span;
|
||||||
|
StorageIndex size, count;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename InputScalar>
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
const UnaryFunctor& functor, const Dimensions& block_sizes,
|
||||||
|
const Dimensions& block_strides, OutputScalar* output_data,
|
||||||
|
const array<StorageIndex, NumDims>& input_strides,
|
||||||
|
const InputScalar* input_data) {
|
||||||
|
// Find the innermost dimension whose size is not 1. This is the effective
|
||||||
|
// inner dim. If all dimensions are of size 1, fallback to using the actual
|
||||||
|
// innermost dim to avoid out-of-bound access.
|
||||||
|
int num_size_one_inner_dims = 0;
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
const int dim = cond<Layout>()(i, NumDims - i - 1);
|
||||||
|
if (block_sizes[dim] != 1) {
|
||||||
|
num_size_one_inner_dims = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Calculate strides and dimensions.
|
||||||
|
const int inner_dim =
|
||||||
|
NumDims == 0 ? 1
|
||||||
|
: cond<Layout>()(num_size_one_inner_dims,
|
||||||
|
NumDims - num_size_one_inner_dims - 1);
|
||||||
|
StorageIndex inner_dim_size = NumDims == 0 ? 1 : block_sizes[inner_dim];
|
||||||
|
for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
|
||||||
|
const int dim = cond<Layout>()(i, NumDims - i - 1);
|
||||||
|
// Merge multiple inner dims into one for larger inner dim size (i.e.
|
||||||
|
// fewer calls to TensorBlockCwiseUnaryOp::Run()).
|
||||||
|
if (inner_dim_size == block_strides[dim] &&
|
||||||
|
block_strides[dim] == input_strides[dim]) {
|
||||||
|
inner_dim_size *= block_sizes[dim];
|
||||||
|
++num_size_one_inner_dims;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
StorageIndex output_index = 0, input_index = 0;
|
||||||
|
|
||||||
|
const StorageIndex output_stride =
|
||||||
|
NumDims == 0 ? 1 : block_strides[inner_dim];
|
||||||
|
const StorageIndex input_stride =
|
||||||
|
NumDims == 0 ? 1 : input_strides[inner_dim];
|
||||||
|
|
||||||
|
const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
|
||||||
|
array<BlockIteratorState, at_least_1_dim> block_iter_state;
|
||||||
|
|
||||||
|
// Initialize block iterator state. Squeeze away any dimension of size 1.
|
||||||
|
int num_squeezed_dims = 0;
|
||||||
|
for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
|
||||||
|
const int dim = cond<Layout>()(i + 1, NumDims - i - 2);
|
||||||
|
const StorageIndex size = block_sizes[dim];
|
||||||
|
if (size == 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
BlockIteratorState& state = block_iter_state[num_squeezed_dims];
|
||||||
|
state.output_stride = block_strides[dim];
|
||||||
|
state.input_stride = input_strides[dim];
|
||||||
|
state.size = size;
|
||||||
|
state.output_span = state.output_stride * (size - 1);
|
||||||
|
state.input_span = state.input_stride * (size - 1);
|
||||||
|
state.count = 0;
|
||||||
|
++num_squeezed_dims;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute cwise unary op.
|
||||||
|
const StorageIndex block_total_size =
|
||||||
|
NumDims == 0 ? 1 : block_sizes.TotalSize();
|
||||||
|
for (StorageIndex i = 0; i < block_total_size; i += inner_dim_size) {
|
||||||
|
TensorBlockCwiseUnaryOp::Run(functor, inner_dim_size, output_index,
|
||||||
|
output_stride, output_data, input_index,
|
||||||
|
input_stride, input_data);
|
||||||
|
// Update index.
|
||||||
|
for (int j = 0; j < num_squeezed_dims; ++j) {
|
||||||
|
auto& state = block_iter_state[j];
|
||||||
|
if (++state.count < state.size) {
|
||||||
|
output_index += state.output_stride;
|
||||||
|
input_index += state.input_stride;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
state.count = 0;
|
||||||
|
output_index -= state.output_span;
|
||||||
|
input_index -= state.input_span;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \class TensorBlockCwiseBinaryOp
|
* \class TensorBlockCwiseBinaryOp
|
||||||
* \ingroup CXX11_Tensor_Module
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
@ -108,11 +108,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
bool isCopy= false, nByOne = false, oneByN = false;
|
bool isCopy= false, nByOne = false, oneByN = false;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = true,
|
IsAligned = true,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
PreferBlockAccess = true,
|
||||||
RawAccess = false
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
|
@ -144,12 +144,13 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
enum {
|
enum {
|
||||||
// Alignment can't be guaranteed at compile time since it depends on the
|
// Alignment can't be guaranteed at compile time since it depends on the
|
||||||
// slice offsets.
|
// slice offsets.
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
PreferBlockAccess = true,
|
||||||
CoordAccess = false, // to be implemented
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = false
|
CoordAccess = false, // to be implemented
|
||||||
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
|
@ -123,6 +123,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
};
|
};
|
||||||
@ -308,6 +309,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
@ -241,6 +241,7 @@ struct TensorContractionEvaluatorBase
|
|||||||
IsAligned = true,
|
IsAligned = true,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = true
|
RawAccess = true
|
||||||
|
@ -196,6 +196,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = true,
|
PacketAccess = true,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
@ -308,6 +308,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<InputArgType, Device>::Layout,
|
Layout = TensorEvaluator<InputArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
@ -780,6 +781,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
|
IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
|
||||||
PacketAccess = false,
|
PacketAccess = false,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
|
Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -243,6 +243,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
IsAligned = TensorEvaluator<InputArgType, const Eigen::SyclDevice>::IsAligned & TensorEvaluator<KernelArgType, const Eigen::SyclDevice>::IsAligned,
|
IsAligned = TensorEvaluator<InputArgType, const Eigen::SyclDevice>::IsAligned & TensorEvaluator<KernelArgType, const Eigen::SyclDevice>::IsAligned,
|
||||||
PacketAccess = false,
|
PacketAccess = false,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<InputArgType, const Eigen::SyclDevice>::Layout,
|
Layout = TensorEvaluator<InputArgType, const Eigen::SyclDevice>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -93,6 +93,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<XprType, Device>::Layout,
|
Layout = TensorEvaluator<XprType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
@ -255,6 +256,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<LhsXprType, Device>::Layout,
|
Layout = TensorEvaluator<LhsXprType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -108,6 +108,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
|||||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = true
|
RawAccess = true
|
||||||
|
@ -43,6 +43,7 @@ struct TensorEvaluator
|
|||||||
IsAligned = Derived::IsAligned,
|
IsAligned = Derived::IsAligned,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
|
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = Derived::Layout,
|
Layout = Derived::Layout,
|
||||||
CoordAccess = NumCoords > 0,
|
CoordAccess = NumCoords > 0,
|
||||||
RawAccess = true
|
RawAccess = true
|
||||||
@ -195,6 +196,7 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
IsAligned = Derived::IsAligned,
|
IsAligned = Derived::IsAligned,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
|
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = Derived::Layout,
|
Layout = Derived::Layout,
|
||||||
CoordAccess = NumCoords > 0,
|
CoordAccess = NumCoords > 0,
|
||||||
RawAccess = true
|
RawAccess = true
|
||||||
@ -288,6 +290,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
|
|||||||
IsAligned = true,
|
IsAligned = true,
|
||||||
PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
|
PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
@ -351,27 +354,34 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
|||||||
typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
|
typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess &
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess &
|
||||||
internal::functor_traits<UnaryOp>::PacketAccess,
|
internal::functor_traits<UnaryOp>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||||
CoordAccess = false, // to be implemented
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = false
|
CoordAccess = false, // to be implemented
|
||||||
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_functor(op.functor()),
|
: m_device(device),
|
||||||
|
m_functor(op.functor()),
|
||||||
m_argImpl(op.nestedExpression(), device)
|
m_argImpl(op.nestedExpression(), device)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
typedef typename XprType::Index Index;
|
typedef typename XprType::Index Index;
|
||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
|
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||||
|
|
||||||
|
static const int NumDims = internal::array_size<Dimensions>::value;
|
||||||
|
typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
|
||||||
|
TensorBlock;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
|
||||||
@ -399,6 +409,29 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
|||||||
TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
|
TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||||
|
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||||
|
m_argImpl.getResourceRequirements(resources);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
|
||||||
|
TensorBlock* output_block) const {
|
||||||
|
if (NumDims <= 0) {
|
||||||
|
output_block->data()[0] = coeff(0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
internal::TensorBlockView<ArgType, Device> arg_block(m_device, m_argImpl,
|
||||||
|
*output_block);
|
||||||
|
internal::TensorBlockCwiseUnaryIO<UnaryOp, Index, ScalarNoConst, NumDims,
|
||||||
|
Layout>::Run(m_functor,
|
||||||
|
output_block->block_sizes(),
|
||||||
|
output_block
|
||||||
|
->block_strides(),
|
||||||
|
output_block->data(),
|
||||||
|
arg_block.block_strides(),
|
||||||
|
arg_block.data());
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
||||||
|
|
||||||
/// required by sycl in order to extract the accessor
|
/// required by sycl in order to extract the accessor
|
||||||
@ -408,6 +441,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
|||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
const Device& m_device;
|
||||||
const UnaryOp m_functor;
|
const UnaryOp m_functor;
|
||||||
TensorEvaluator<ArgType, Device> m_argImpl;
|
TensorEvaluator<ArgType, Device> m_argImpl;
|
||||||
};
|
};
|
||||||
@ -421,16 +455,18 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
|||||||
typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
|
typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned &
|
IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned &
|
||||||
TensorEvaluator<RightArgType, Device>::IsAligned,
|
TensorEvaluator<RightArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
|
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
|
||||||
TensorEvaluator<RightArgType, Device>::PacketAccess &
|
TensorEvaluator<RightArgType, Device>::PacketAccess &
|
||||||
internal::functor_traits<BinaryOp>::PacketAccess,
|
internal::functor_traits<BinaryOp>::PacketAccess,
|
||||||
BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess &
|
BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess &
|
||||||
TensorEvaluator<RightArgType, Device>::BlockAccess,
|
TensorEvaluator<RightArgType, Device>::BlockAccess,
|
||||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess |
|
||||||
CoordAccess = false, // to be implemented
|
TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
|
||||||
RawAccess = false
|
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||||
|
CoordAccess = false, // to be implemented
|
||||||
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
||||||
@ -501,7 +537,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
|
||||||
TensorBlock* output_block) const {
|
TensorBlock* output_block) const {
|
||||||
if (NumDims <= 0) {
|
if (NumDims <= 0) {
|
||||||
output_block->data()[0] = coeff(0);
|
output_block->data()[0] = coeff(Index(0));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
internal::TensorBlockView<LeftArgType, Device> left_block(
|
internal::TensorBlockView<LeftArgType, Device> left_block(
|
||||||
@ -543,6 +579,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
|
|||||||
PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess & TensorEvaluator<Arg2Type, Device>::PacketAccess & TensorEvaluator<Arg3Type, Device>::PacketAccess &
|
PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess & TensorEvaluator<Arg2Type, Device>::PacketAccess & TensorEvaluator<Arg3Type, Device>::PacketAccess &
|
||||||
internal::functor_traits<TernaryOp>::PacketAccess,
|
internal::functor_traits<TernaryOp>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<Arg1Type, Device>::Layout,
|
Layout = TensorEvaluator<Arg1Type, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
@ -648,6 +685,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
|||||||
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess &
|
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess &
|
||||||
PacketType<Scalar, Device>::HasBlend,
|
PacketType<Scalar, Device>::HasBlend,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<IfArgType, Device>::Layout,
|
Layout = TensorEvaluator<IfArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -136,6 +136,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = true,
|
PacketAccess = true,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false,
|
CoordAccess = false,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -42,6 +42,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
|||||||
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0),
|
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0),
|
||||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
|
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
|
||||||
CoordAccess = true,
|
CoordAccess = true,
|
||||||
RawAccess = true
|
RawAccess = true
|
||||||
|
@ -99,6 +99,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
IsAligned = true,
|
IsAligned = true,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = true
|
RawAccess = true
|
||||||
};
|
};
|
||||||
|
@ -132,7 +132,12 @@ struct IsVectorizable<GpuDevice, Expression> {
|
|||||||
|
|
||||||
template <typename Device, typename Expression>
|
template <typename Device, typename Expression>
|
||||||
struct IsTileable {
|
struct IsTileable {
|
||||||
static const bool value = TensorEvaluator<Expression, Device>::BlockAccess;
|
// Check that block evaluation is supported and it's a preferred option (at
|
||||||
|
// least one sub-expression has much faster block evaluation, e.g.
|
||||||
|
// broadcasting).
|
||||||
|
static const bool value =
|
||||||
|
TensorEvaluator<Expression, Device>::BlockAccess &
|
||||||
|
TensorEvaluator<Expression, Device>::PreferBlockAccess;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Expression, typename Device,
|
template <typename Expression, typename Device,
|
||||||
|
@ -92,6 +92,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -244,12 +244,13 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = true,
|
BlockAccess = true,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
PreferBlockAccess = true,
|
||||||
CoordAccess = false,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = false
|
CoordAccess = false,
|
||||||
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
typedef internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
||||||
|
@ -91,6 +91,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
|||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -120,6 +120,7 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
|
|||||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
|
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
|
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
|
||||||
@ -183,6 +184,7 @@ template<typename ArgType, typename Device>
|
|||||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
|
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
|
||||||
CoordAccess = false // to be implemented
|
CoordAccess = false // to be implemented
|
||||||
};
|
};
|
||||||
|
@ -111,16 +111,17 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
|||||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
// TODO(andydavis, wuke) Enable BlockAccess for the general case when the
|
// TODO(andydavis, wuke) Enable BlockAccess for the general case when the
|
||||||
// performance issue with block-based reshape is resolved.
|
// performance issue with block-based reshape is resolved.
|
||||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess &&
|
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess &&
|
||||||
TensorEvaluator<ArgType, Device>::RawAccess &&
|
TensorEvaluator<ArgType, Device>::RawAccess &&
|
||||||
NumInputDims > 0 && NumOutputDims > 0,
|
NumInputDims > 0 && NumOutputDims > 0,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
PreferBlockAccess = true,
|
||||||
CoordAccess = false, // to be implemented
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
|
CoordAccess = false, // to be implemented
|
||||||
|
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
@ -349,6 +350,7 @@ template<typename NewDimensions, typename ArgType, typename Device>
|
|||||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
|
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
|
||||||
@ -508,12 +510,13 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
enum {
|
enum {
|
||||||
// Alignment can't be guaranteed at compile time since it depends on the
|
// Alignment can't be guaranteed at compile time since it depends on the
|
||||||
// slice offsets and sizes.
|
// slice offsets and sizes.
|
||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
PreferBlockAccess = true,
|
||||||
CoordAccess = false,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = false
|
CoordAccess = false,
|
||||||
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
@ -785,12 +788,13 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
typedef Sizes Dimensions;
|
typedef Sizes Dimensions;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
PreferBlockAccess = true,
|
||||||
CoordAccess = false,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
|
CoordAccess = false,
|
||||||
|
RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
@ -972,6 +976,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = false,
|
PacketAccess = false,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
};
|
};
|
||||||
@ -1148,6 +1153,7 @@ struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Stride
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = false,
|
PacketAccess = false,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
|
CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -97,6 +97,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
IsAligned = true,
|
IsAligned = true,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = true,
|
CoordAccess = true,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -95,6 +95,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false,
|
CoordAccess = false,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -475,12 +475,13 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = Self::InputPacketAccess && Op::PacketAccess,
|
PacketAccess = Self::InputPacketAccess && Op::PacketAccess,
|
||||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
PreferBlockAccess = true,
|
||||||
CoordAccess = false, // to be implemented
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = false
|
CoordAccess = false, // to be implemented
|
||||||
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
|
@ -137,6 +137,7 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = false,
|
PacketAccess = false,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = PlainObjectType::Layout,
|
Layout = PlainObjectType::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
@ -366,6 +367,7 @@ struct TensorEvaluator<const TensorRef<Derived>, Device>
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = false,
|
PacketAccess = false,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorRef<Derived>::Layout,
|
Layout = TensorRef<Derived>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
@ -414,6 +416,7 @@ struct TensorEvaluator<TensorRef<Derived>, Device> : public TensorEvaluator<cons
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = false,
|
PacketAccess = false,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -114,6 +114,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
@ -255,6 +256,7 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -97,6 +97,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false,
|
CoordAccess = false,
|
||||||
RawAccess = true
|
RawAccess = true
|
||||||
|
@ -111,12 +111,13 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
PreferBlockAccess = true,
|
||||||
CoordAccess = false, // to be implemented
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = false
|
CoordAccess = false, // to be implemented
|
||||||
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
@ -412,11 +413,12 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
PreferBlockAccess = true,
|
||||||
RawAccess = false
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
|
@ -113,6 +113,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
@ -275,6 +276,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -96,6 +96,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false,
|
CoordAccess = false,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -200,6 +200,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false,
|
CoordAccess = false,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
@ -517,6 +517,114 @@ static void test_block_io_squeeze_ones() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T, int NumDims, int Layout>
|
||||||
|
static void test_block_cwise_unary_io_basic() {
|
||||||
|
typedef internal::scalar_square_op<T> UnaryFunctor;
|
||||||
|
typedef internal::TensorBlockCwiseUnaryIO<UnaryFunctor, Index, T, NumDims,
|
||||||
|
Layout>
|
||||||
|
TensorBlockCwiseUnaryIO;
|
||||||
|
|
||||||
|
DSizes<Index, NumDims> block_sizes = RandomDims<NumDims>();
|
||||||
|
DSizes<Index, NumDims> strides(ComputeStrides<Layout, NumDims>(block_sizes));
|
||||||
|
|
||||||
|
const auto total_size = block_sizes.TotalSize();
|
||||||
|
|
||||||
|
// Create a random input tensors.
|
||||||
|
T* input_data = GenerateRandomData<T>(total_size);
|
||||||
|
|
||||||
|
T* output_data = new T[total_size];
|
||||||
|
UnaryFunctor functor;
|
||||||
|
TensorBlockCwiseUnaryIO::Run(functor, block_sizes, strides, output_data,
|
||||||
|
strides, input_data);
|
||||||
|
for (int i = 0; i < total_size; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(output_data[i], functor(input_data[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] input_data;
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_block_cwise_unary_io_squeeze_ones() {
|
||||||
|
typedef internal::scalar_square_op<float> UnaryFunctor;
|
||||||
|
typedef internal::TensorBlockCwiseUnaryIO<UnaryFunctor, Index, float, 5,
|
||||||
|
Layout>
|
||||||
|
TensorBlockCwiseUnaryIO;
|
||||||
|
|
||||||
|
DSizes<Index, 5> block_sizes(1, 2, 1, 3, 1);
|
||||||
|
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
||||||
|
|
||||||
|
const auto total_size = block_sizes.TotalSize();
|
||||||
|
|
||||||
|
// Create a random input tensors.
|
||||||
|
auto* input_data = GenerateRandomData<float>(total_size);
|
||||||
|
|
||||||
|
auto* output_data = new float[total_size];
|
||||||
|
UnaryFunctor functor;
|
||||||
|
TensorBlockCwiseUnaryIO::Run(functor, block_sizes, strides, output_data,
|
||||||
|
strides, input_data);
|
||||||
|
for (int i = 0; i < total_size; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(output_data[i], functor(input_data[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] input_data;
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_block_cwise_unary_io_zero_strides() {
|
||||||
|
typedef internal::scalar_square_op<float> UnaryFunctor;
|
||||||
|
typedef internal::TensorBlockCwiseUnaryIO<UnaryFunctor, Index, float, 5,
|
||||||
|
Layout>
|
||||||
|
TensorBlockCwiseUnaryIO;
|
||||||
|
|
||||||
|
DSizes<Index, 5> rnd_dims = RandomDims<5>();
|
||||||
|
|
||||||
|
DSizes<Index, 5> input_sizes = rnd_dims;
|
||||||
|
input_sizes[0] = 1;
|
||||||
|
input_sizes[2] = 1;
|
||||||
|
input_sizes[4] = 1;
|
||||||
|
|
||||||
|
DSizes<Index, 5> input_strides(ComputeStrides<Layout, 5>(input_sizes));
|
||||||
|
input_strides[0] = 0;
|
||||||
|
input_strides[2] = 0;
|
||||||
|
input_strides[4] = 0;
|
||||||
|
|
||||||
|
// Generate random data.
|
||||||
|
auto* input_data = GenerateRandomData<float>(input_sizes.TotalSize());
|
||||||
|
|
||||||
|
DSizes<Index, 5> output_sizes = rnd_dims;
|
||||||
|
DSizes<Index, 5> output_strides(ComputeStrides<Layout, 5>(output_sizes));
|
||||||
|
|
||||||
|
const auto output_total_size = output_sizes.TotalSize();
|
||||||
|
auto* output_data = new float[output_total_size];
|
||||||
|
|
||||||
|
UnaryFunctor functor;
|
||||||
|
TensorBlockCwiseUnaryIO::Run(functor, output_sizes, output_strides,
|
||||||
|
output_data, input_strides, input_data);
|
||||||
|
for (int i = 0; i < rnd_dims[0]; ++i) {
|
||||||
|
for (int j = 0; j < rnd_dims[1]; ++j) {
|
||||||
|
for (int k = 0; k < rnd_dims[2]; ++k) {
|
||||||
|
for (int l = 0; l < rnd_dims[3]; ++l) {
|
||||||
|
for (int m = 0; m < rnd_dims[4]; ++m) {
|
||||||
|
Index output_index = i * output_strides[0] + j * output_strides[1] +
|
||||||
|
k * output_strides[2] + l * output_strides[3] +
|
||||||
|
m * output_strides[4];
|
||||||
|
Index input_index = i * input_strides[0] + j * input_strides[1] +
|
||||||
|
k * input_strides[2] + l * input_strides[3] +
|
||||||
|
m * input_strides[4];
|
||||||
|
VERIFY_IS_EQUAL(output_data[output_index],
|
||||||
|
functor(input_data[input_index]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] input_data;
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T, int NumDims, int Layout>
|
template <typename T, int NumDims, int Layout>
|
||||||
static void test_block_cwise_binary_io_basic() {
|
static void test_block_cwise_binary_io_basic() {
|
||||||
typedef internal::scalar_sum_op<T> BinaryFunctor;
|
typedef internal::scalar_sum_op<T> BinaryFunctor;
|
||||||
@ -982,6 +1090,9 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
|
|||||||
TEST_LAYOUTS_AND_DIMS(Data, test_block_io_copy_using_reordered_dimensions);
|
TEST_LAYOUTS_AND_DIMS(Data, test_block_io_copy_using_reordered_dimensions);
|
||||||
TEST_LAYOUTS(test_block_io_zero_stride);
|
TEST_LAYOUTS(test_block_io_zero_stride);
|
||||||
TEST_LAYOUTS(test_block_io_squeeze_ones);
|
TEST_LAYOUTS(test_block_io_squeeze_ones);
|
||||||
|
TEST_LAYOUTS_AND_DIMS(float, test_block_cwise_unary_io_basic);
|
||||||
|
TEST_LAYOUTS(test_block_cwise_unary_io_squeeze_ones);
|
||||||
|
TEST_LAYOUTS(test_block_cwise_unary_io_zero_strides);
|
||||||
TEST_LAYOUTS_AND_DIMS(float, test_block_cwise_binary_io_basic);
|
TEST_LAYOUTS_AND_DIMS(float, test_block_cwise_binary_io_basic);
|
||||||
TEST_LAYOUTS(test_block_cwise_binary_io_squeeze_ones);
|
TEST_LAYOUTS(test_block_cwise_binary_io_squeeze_ones);
|
||||||
TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides);
|
TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides);
|
||||||
|
@ -29,6 +29,33 @@ static array<Index, NumDims> RandomDims(int min_dim = 1, int max_dim = 20) {
|
|||||||
return dims;
|
return dims;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, int NumDims, typename Device, bool Vectorizable,
|
||||||
|
bool Tileable, int Layout>
|
||||||
|
static void test_execute_unary_expr(Device d) {
|
||||||
|
static constexpr int Options = 0 | Layout;
|
||||||
|
|
||||||
|
// Pick a large enough tensor size to bypass small tensor block evaluation
|
||||||
|
// optimization.
|
||||||
|
auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
|
||||||
|
|
||||||
|
Tensor<T, NumDims, Options, Index> src(dims);
|
||||||
|
Tensor<T, NumDims, Options, Index> dst(dims);
|
||||||
|
|
||||||
|
src.setRandom();
|
||||||
|
const auto expr = src.square();
|
||||||
|
|
||||||
|
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
|
||||||
|
using Executor =
|
||||||
|
internal::TensorExecutor<const Assign, Device, Vectorizable, Tileable>;
|
||||||
|
|
||||||
|
Executor::run(Assign(dst, expr), d);
|
||||||
|
|
||||||
|
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
|
||||||
|
T square = src.coeff(i) * src.coeff(i);
|
||||||
|
VERIFY_IS_EQUAL(square, dst.coeff(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T, int NumDims, typename Device, bool Vectorizable,
|
template <typename T, int NumDims, typename Device, bool Vectorizable,
|
||||||
bool Tileable, int Layout>
|
bool Tileable, int Layout>
|
||||||
static void test_execute_binary_expr(Device d)
|
static void test_execute_binary_expr(Device d)
|
||||||
@ -445,6 +472,10 @@ EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
|
|||||||
Eigen::ThreadPool tp(num_threads);
|
Eigen::ThreadPool tp(num_threads);
|
||||||
Eigen::ThreadPoolDevice tp_device(&tp, num_threads);
|
Eigen::ThreadPoolDevice tp_device(&tp, num_threads);
|
||||||
|
|
||||||
|
CALL_SUBTEST_COMBINATIONS(test_execute_unary_expr, float, 3);
|
||||||
|
CALL_SUBTEST_COMBINATIONS(test_execute_unary_expr, float, 4);
|
||||||
|
CALL_SUBTEST_COMBINATIONS(test_execute_unary_expr, float, 5);
|
||||||
|
|
||||||
CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr, float, 3);
|
CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr, float, 3);
|
||||||
CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr, float, 4);
|
CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr, float, 4);
|
||||||
CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr, float, 5);
|
CALL_SUBTEST_COMBINATIONS(test_execute_binary_expr, float, 5);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user