Using PointerType struct and specializing it per device for TensorCustomOp.h

This commit is contained in:
Mehdi Goli 2018-08-16 00:07:02 +01:00
commit 161dcbae9b
11 changed files with 169 additions and 207 deletions

View File

@ -73,7 +73,7 @@ struct TensorOpResourceRequirements {
// expression tree (like reductions) to communicate resources // expression tree (like reductions) to communicate resources
// requirements based on local state (like the total number of reductions // requirements based on local state (like the total number of reductions
// to be computed). // to be computed).
TensorOpResourceRequirements(internal::TensorBlockShapeType shape, TensorOpResourceRequirements(TensorBlockShapeType shape,
const Index size) const Index size)
: block_shape(shape), block_total_size(size) {} : block_shape(shape), block_total_size(size) {}
}; };
@ -90,9 +90,9 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
*block_shape = resources[0].block_shape; *block_shape = resources[0].block_shape;
*block_total_size = resources[0].block_total_size; *block_total_size = resources[0].block_total_size;
for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) { for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims && if (resources[i].block_shape == kSkewedInnerDims &&
*block_shape != TensorBlockShapeType::kSkewedInnerDims) { *block_shape != kSkewedInnerDims) {
*block_shape = TensorBlockShapeType::kSkewedInnerDims; *block_shape = kSkewedInnerDims;
} }
*block_total_size = *block_total_size =
numext::maxi(*block_total_size, resources[i].block_total_size); numext::maxi(*block_total_size, resources[i].block_total_size);
@ -152,11 +152,11 @@ struct TensorBlockCopyOp {
const Scalar* src_base = &src_data[src_index]; const Scalar* src_base = &src_data[src_index];
Scalar* dst_base = &dst_data[dst_index]; Scalar* dst_base = &dst_data[dst_index];
typedef const Eigen::Array<Scalar, Dynamic, 1> Src; typedef const Array<Scalar, Dynamic, 1> Src;
typedef Eigen::Array<Scalar, Dynamic, 1> Dst; typedef Array<Scalar, Dynamic, 1> Dst;
typedef Eigen::Map<Src, 0, InnerStride<> > SrcMap; typedef Map<Src, 0, InnerStride<> > SrcMap;
typedef Eigen::Map<Dst, 0, InnerStride<> > DstMap; typedef Map<Dst, 0, InnerStride<> > DstMap;
const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride)); const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride));
DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride)); DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride));
@ -178,10 +178,8 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout,
bool BlockRead> bool BlockRead>
class TensorBlockIO { class TensorBlockIO {
public: public:
typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
TensorBlock; typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp;
typedef typename internal::TensorBlockCopyOp<Scalar, StorageIndex>
TensorBlockCopyOp;
protected: protected:
struct BlockIteratorState { struct BlockIteratorState {
@ -194,7 +192,7 @@ class TensorBlockIO {
}; };
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy( static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
const TensorBlock& block, StorageIndex first_coeff_index, const Block& block, StorageIndex first_coeff_index,
const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data, const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data,
Scalar* dst_data) { Scalar* dst_data) {
@ -290,8 +288,8 @@ class TensorBlockIO {
const StorageIndex block_total_size = const StorageIndex block_total_size =
NumDims == 0 ? 1 : block.block_sizes().TotalSize(); NumDims == 0 ? 1 : block.block_sizes().TotalSize();
for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) { for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) {
TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride, BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
dst_data, inputIndex, input_stride, src_data); dst_data, inputIndex, input_stride, src_data);
// Update index. // Update index.
for (int j = 0; j < num_squeezed_dims; ++j) { for (int j = 0; j < num_squeezed_dims; ++j) {
if (++block_iter_state[j].count < block_iter_state[j].size) { if (++block_iter_state[j].count < block_iter_state[j].size) {
@ -320,13 +318,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims, class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
Layout, /*BlockRead=*/true> { Layout, /*BlockRead=*/true> {
public: public:
typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
TensorBlock; typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base;
typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true>
Base;
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
TensorBlock* block, const Scalar* src_data) { Block* block, const Scalar* src_data) {
array<StorageIndex, NumDims> tensor_to_block_dim_map; array<StorageIndex, NumDims> tensor_to_block_dim_map;
for (int i = 0; i < NumDims; ++i) { for (int i = 0; i < NumDims; ++i) {
tensor_to_block_dim_map[i] = i; tensor_to_block_dim_map[i] = i;
@ -336,7 +332,7 @@ class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
} }
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
TensorBlock* block, StorageIndex first_coeff_index, Block* block, StorageIndex first_coeff_index,
const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) { const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) {
Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map, Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
@ -357,13 +353,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims, class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
Layout, /*BlockRead=*/false> { Layout, /*BlockRead=*/false> {
public: public:
typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
TensorBlock; typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base;
typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false>
Base;
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
const TensorBlock& block, Scalar* dst_data) { const Block& block, Scalar* dst_data) {
array<StorageIndex, NumDims> tensor_to_block_dim_map; array<StorageIndex, NumDims> tensor_to_block_dim_map;
for (int i = 0; i < NumDims; ++i) { for (int i = 0; i < NumDims; ++i) {
tensor_to_block_dim_map[i] = i; tensor_to_block_dim_map[i] = i;
@ -373,7 +367,7 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
} }
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
const TensorBlock& block, StorageIndex first_coeff_index, const Block& block, StorageIndex first_coeff_index,
const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) { const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) {
Base::Copy(block, first_coeff_index, tensor_to_block_dim_map, Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
@ -401,13 +395,13 @@ struct TensorBlockCwiseBinaryOp {
const StorageIndex left_stride, const LeftScalar* left_data, const StorageIndex left_stride, const LeftScalar* left_data,
const StorageIndex right_index, const StorageIndex right_stride, const StorageIndex right_index, const StorageIndex right_stride,
const RightScalar* right_data) { const RightScalar* right_data) {
typedef const Eigen::Array<LeftScalar, Dynamic, 1> Lhs; typedef const Array<LeftScalar, Dynamic, 1> Lhs;
typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs; typedef const Array<RightScalar, Dynamic, 1> Rhs;
typedef Eigen::Array<OutputScalar, Dynamic, 1> Out; typedef Array<OutputScalar, Dynamic, 1> Out;
typedef Eigen::Map<Lhs, 0, InnerStride<> > LhsMap; typedef Map<Lhs, 0, InnerStride<> > LhsMap;
typedef Eigen::Map<Rhs, 0, InnerStride<> > RhsMap; typedef Map<Rhs, 0, InnerStride<> > RhsMap;
typedef Eigen::Map<Out, 0, InnerStride<> > OutMap; typedef Map<Out, 0, InnerStride<> > OutMap;
const LeftScalar* lhs_base = &left_data[left_index]; const LeftScalar* lhs_base = &left_data[left_index];
const RightScalar* rhs_base = &right_data[right_index]; const RightScalar* rhs_base = &right_data[right_index];
@ -417,8 +411,7 @@ struct TensorBlockCwiseBinaryOp {
const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride)); const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride));
OutMap out(out_base, num_coeff, InnerStride<>(output_stride)); OutMap out(out_base, num_coeff, InnerStride<>(output_stride));
out = out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
Eigen::CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
} }
}; };
@ -434,8 +427,7 @@ struct TensorBlockCwiseBinaryOp {
template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar, template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar,
int NumDims, int Layout> int NumDims, int Layout>
struct TensorBlockCwiseBinaryIO { struct TensorBlockCwiseBinaryIO {
typedef typename internal::TensorBlock<OutputScalar, StorageIndex, NumDims, typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions;
Layout>::Dimensions Dimensions;
struct BlockIteratorState { struct BlockIteratorState {
StorageIndex output_stride, output_span; StorageIndex output_stride, output_span;
@ -627,8 +619,7 @@ struct TensorBlockView {
template <typename Scalar, typename StorageIndex, int NumDims, int Layout> template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorBlockMapper { class TensorBlockMapper {
public: public:
typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
TensorBlock;
typedef DSizes<StorageIndex, NumDims> Dimensions; typedef DSizes<StorageIndex, NumDims> Dimensions;
TensorBlockMapper(const Dimensions& dims, TensorBlockMapper(const Dimensions& dims,
@ -663,7 +654,7 @@ class TensorBlockMapper {
} }
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
GetBlockForIndex(StorageIndex block_index, Scalar* data) const { GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
StorageIndex first_coeff_index = 0; StorageIndex first_coeff_index = 0;
DSizes<StorageIndex, NumDims> coords; DSizes<StorageIndex, NumDims> coords;
@ -711,8 +702,7 @@ class TensorBlockMapper {
} }
} }
return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
data);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
@ -742,7 +732,7 @@ class TensorBlockMapper {
block_dim_sizes[i] = 1; block_dim_sizes[i] = 1;
} }
} else if (block_dim_sizes.TotalSize() > min_target_size) { } else if (block_dim_sizes.TotalSize() > min_target_size) {
if (block_shape == TensorBlockShapeType::kUniformAllDims) { if (block_shape == kUniformAllDims) {
// Tensor will not fit within 'min_target_size' budget: calculate tensor // Tensor will not fit within 'min_target_size' budget: calculate tensor
// block dimension sizes based on "square" dimension size target. // block dimension sizes based on "square" dimension size target.
const size_t dim_size_target = static_cast<const size_t>( const size_t dim_size_target = static_cast<const size_t>(
@ -773,7 +763,7 @@ class TensorBlockMapper {
total_size = total_size_other_dims * block_dim_sizes[dim]; total_size = total_size_other_dims * block_dim_sizes[dim];
} }
} }
} else if (block_shape == TensorBlockShapeType::kSkewedInnerDims) { } else if (block_shape == kSkewedInnerDims) {
StorageIndex coeff_to_allocate = min_target_size; StorageIndex coeff_to_allocate = min_target_size;
for (int i = 0; i < NumDims; ++i) { for (int i = 0; i < NumDims; ++i) {
const int dim = cond<Layout>()(i, NumDims - i - 1); const int dim = cond<Layout>()(i, NumDims - i - 1);
@ -818,8 +808,7 @@ class TensorBlockMapper {
template <typename Scalar, typename StorageIndex, int NumDims, int Layout> template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorSliceBlockMapper { class TensorSliceBlockMapper {
public: public:
typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
TensorBlock;
typedef DSizes<StorageIndex, NumDims> Dimensions; typedef DSizes<StorageIndex, NumDims> Dimensions;
TensorSliceBlockMapper(const Dimensions& tensor_dims, TensorSliceBlockMapper(const Dimensions& tensor_dims,
@ -860,7 +849,7 @@ class TensorSliceBlockMapper {
} }
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
GetBlockForIndex(StorageIndex block_index, Scalar* data) const { GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
StorageIndex first_coeff_index = 0; StorageIndex first_coeff_index = 0;
DSizes<StorageIndex, NumDims> coords; DSizes<StorageIndex, NumDims> coords;
@ -917,8 +906,7 @@ class TensorSliceBlockMapper {
} }
} }
return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
data);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {

View File

@ -152,13 +152,7 @@ struct TensorContractionParams {
// 1. Elementwise Relu transformation following Conv2D. // 1. Elementwise Relu transformation following Conv2D.
// 2. AddBias to the Conv2D output channels dimension. // 2. AddBias to the Conv2D output channels dimension.
// //
// See expected implementation in NoOpOutputKernel. // The NoOpOutputKernel implements an output kernel that does absolutely nothing.
struct OutputKernel {
template <typename Index, typename Scalar>
using OutputMapper = internal::blas_data_mapper<Scalar, Index, ColMajor>;
};
// Output kernel that does absolutely nothing.
struct NoOpOutputKernel { struct NoOpOutputKernel {
/** /**
* Tensor contraction evaluator calls this kernel after finishing each block * Tensor contraction evaluator calls this kernel after finishing each block
@ -177,7 +171,7 @@ struct NoOpOutputKernel {
*/ */
template <typename Index, typename Scalar> template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator()( EIGEN_ALWAYS_INLINE void operator()(
const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/, const internal::blas_data_mapper<Scalar, Index, ColMajor>& /*output_mapper*/,
const TensorContractionParams& /*params*/, Index /*i*/, const TensorContractionParams& /*params*/, Index /*i*/,
Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {} Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {}
}; };

View File

@ -20,8 +20,8 @@ namespace Eigen {
* *
*/ */
namespace internal { namespace internal {
template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_> template<typename CustomUnaryFunc, typename XprType>
struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> > struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
{ {
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::StorageKind StorageKind; typedef typename XprType::StorageKind StorageKind;
@ -31,34 +31,26 @@ struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> >
static const int NumDimensions = traits<XprType>::NumDimensions; static const int NumDimensions = traits<XprType>::NumDimensions;
static const int Layout = traits<XprType>::Layout; static const int Layout = traits<XprType>::Layout;
template <class T> struct MakePointer {
// Intermediate typedef to workaround MSVC issue.
typedef MakePointer_<T> MakePointerT;
typedef typename MakePointerT::Type Type;
typedef typename MakePointerT::RefType RefType;
typedef typename MakePointerT::ScalarType ScalarType;
};
typedef typename MakePointer<typename internal::remove_const<typename XprType::CoeffReturnType>::type>::Type PointerType;
}; };
template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_> template<typename CustomUnaryFunc, typename XprType>
struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>, Eigen::Dense> struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense>
{ {
typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>& type; typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type;
}; };
template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_> template<typename CustomUnaryFunc, typename XprType>
struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> > struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
{ {
typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> type; typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> type;
}; };
} // end namespace internal } // end namespace internal
template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_> template<typename CustomUnaryFunc, typename XprType>
class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>, ReadOnlyAccessors> class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, ReadOnlyAccessors>
{ {
public: public:
typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar; typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar;
@ -85,10 +77,10 @@ class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFun
// Eval as rvalue // Eval as rvalue
template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_, typename Device> template<typename CustomUnaryFunc, typename XprType, typename Device>
struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>, Device> struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Device>
{ {
typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> ArgType; typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType;
typedef typename internal::traits<ArgType>::Index Index; typedef typename internal::traits<ArgType>::Index Index;
static const int NumDims = internal::traits<ArgType>::NumDimensions; static const int NumDims = internal::traits<ArgType>::NumDimensions;
typedef DSizes<Index, NumDims> Dimensions; typedef DSizes<Index, NumDims> Dimensions;
@ -96,7 +88,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakeP
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename Eigen::internal::traits<ArgType>::PointerType PointerType; typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
enum { enum {
IsAligned = false, IsAligned = false,
@ -115,12 +107,12 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakeP
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerType data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
if (data) { if (data) {
evalTo(data); evalTo(data);
return false; return false;
} else { } else {
m_result = static_cast<PointerType>( m_result = static_cast<PointerT>(
m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
evalTo(m_result); evalTo(m_result);
return true; return true;
@ -148,14 +140,14 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakeP
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC PointerType data() const { return m_result; } EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
#endif #endif
protected: protected:
EIGEN_DEVICE_FUNC void evalTo(PointerType data) { EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions); TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions);
m_op.func().eval(m_op.expression(), result, m_device); m_op.func().eval(m_op.expression(), result, m_device);
} }
@ -163,7 +155,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakeP
Dimensions m_dimensions; Dimensions m_dimensions;
const ArgType m_op; const ArgType m_op;
const Device& m_device; const Device& m_device;
PointerType m_result; PointerT m_result;
}; };
@ -176,8 +168,8 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakeP
* *
*/ */
namespace internal { namespace internal {
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_> template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> > struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
{ {
typedef typename internal::promote_storage_type<typename LhsXprType::Scalar, typedef typename internal::promote_storage_type<typename LhsXprType::Scalar,
typename RhsXprType::Scalar>::ret Scalar; typename RhsXprType::Scalar>::ret Scalar;
@ -194,34 +186,26 @@ struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, Mak
static const int NumDimensions = traits<LhsXprType>::NumDimensions; static const int NumDimensions = traits<LhsXprType>::NumDimensions;
static const int Layout = traits<LhsXprType>::Layout; static const int Layout = traits<LhsXprType>::Layout;
template <class T> struct MakePointer {
// Intermediate typedef to workaround MSVC issue.
typedef MakePointer_<T> MakePointerT;
typedef typename MakePointerT::Type Type;
typedef typename MakePointerT::RefType RefType;
typedef typename MakePointerT::ScalarType ScalarType;
};
typedef typename MakePointer<CoeffReturnType>::Type PointerType;
}; };
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_> template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_>, Eigen::Dense> struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Eigen::Dense>
{ {
typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type; typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type;
}; };
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_> template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> > struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
{ {
typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> type; typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> type;
}; };
} // end namespace internal } // end namespace internal
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType,template <class> class MakePointer_> template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_>, ReadOnlyAccessors> class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, ReadOnlyAccessors>
{ {
public: public:
typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar; typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar;
@ -254,10 +238,10 @@ class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinary
// Eval as rvalue // Eval as rvalue
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_, typename Device> template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, typename Device>
struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_>, Device> struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Device>
{ {
typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> XprType; typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType;
typedef typename internal::traits<XprType>::Index Index; typedef typename internal::traits<XprType>::Index Index;
static const int NumDims = internal::traits<XprType>::NumDimensions; static const int NumDims = internal::traits<XprType>::NumDimensions;
typedef DSizes<Index, NumDims> Dimensions; typedef DSizes<Index, NumDims> Dimensions;
@ -265,7 +249,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename Eigen::internal::traits<XprType>::PointerType PointerType; typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
enum { enum {
IsAligned = false, IsAligned = false,
@ -284,12 +268,12 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerType data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
if (data) { if (data) {
evalTo(data); evalTo(data);
return false; return false;
} else { } else {
m_result = static_cast<PointerType>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType))); m_result = static_cast<PointerT>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType)));
evalTo(m_result); evalTo(m_result);
return true; return true;
} }
@ -316,14 +300,14 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC PointerType data() const { return m_result; } EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
#endif #endif
protected: protected:
EIGEN_DEVICE_FUNC void evalTo(PointerType data) { EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions); TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions);
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
} }
@ -331,7 +315,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
Dimensions m_dimensions; Dimensions m_dimensions;
const XprType m_op; const XprType m_op;
const Device& m_device; const Device& m_device;
PointerType m_result; PointerT m_result;
}; };

View File

@ -132,7 +132,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
if (needs_assign) { if (needs_assign) {
// Size tensor blocks to fit in cache (or requested target block size). // Size tensor blocks to fit in cache (or requested target block size).
Index block_total_size = numext::mini(cache_size, total_size); Index block_total_size = numext::mini(cache_size, total_size);
TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims; TensorBlockShapeType block_shape = kSkewedInnerDims;
// Query expression tree for desired block size/shape. // Query expression tree for desired block size/shape.
std::vector<TensorOpResourceRequirements> resources; std::vector<TensorOpResourceRequirements> resources;
evaluator.getResourceRequirements(&resources); evaluator.getResourceRequirements(&resources);
@ -229,10 +229,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
Evaluator evaluator(expr, device); Evaluator evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign) { if (needs_assign) {
const StorageIndex PacketSize =
Vectorizable
? unpacket_traits<typename Evaluator::PacketReturnType>::size
: 1;
const StorageIndex size = array_prod(evaluator.dimensions()); const StorageIndex size = array_prod(evaluator.dimensions());
device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
EvalRange::alignBlockSize, EvalRange::alignBlockSize,
@ -272,7 +268,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign) { if (needs_assign) {
TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims; TensorBlockShapeType block_shape = kSkewedInnerDims;
Index block_total_size = 0; Index block_total_size = 0;
// Query expression tree for desired block size/shape. // Query expression tree for desired block size/shape.
std::vector<internal::TensorOpResourceRequirements> resources; std::vector<internal::TensorOpResourceRequirements> resources;

View File

@ -24,6 +24,14 @@ template<typename T> struct MakePointer {
typedef T ScalarType; typedef T ScalarType;
}; };
// The PointerType class is a container of the device specefic pointer
// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
// is a device-agnostic type and need MakePointer class for type conversion,
// the TensorEvaluator calss can be specialized for a device, hence it is possible
// to construct different types of temproray storage memory in TensorEvaluator
// for different devices by specializing the following PointerType class.
template<typename T, typename Device> struct PointerType : MakePointer<T>{};
namespace internal{ namespace internal{
template<typename A, typename B> struct Pointer_type_promotion { template<typename A, typename B> struct Pointer_type_promotion {
static const bool val=false; static const bool val=false;
@ -89,8 +97,8 @@ template<typename LeftXprType, typename RightXprType> class TensorAssignOp;
template<typename Op, typename XprType> class TensorScanOp; template<typename Op, typename XprType> class TensorScanOp;
template<typename Dims, typename XprType> class TensorTraceOp; template<typename Dims, typename XprType> class TensorTraceOp;
template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_ = MakePointer> class TensorCustomUnaryOp; template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_ = MakePointer> class TensorCustomBinaryOp; template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp; template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp;
template<typename XprType> class TensorForcedEvalOp; template<typename XprType> class TensorForcedEvalOp;

View File

@ -10,6 +10,8 @@
#include "main.h" #include "main.h"
#include <algorithm>
#include <random>
#include <set> #include <set>
#include <Eigen/CXX11/Tensor> #include <Eigen/CXX11/Tensor>
@ -19,17 +21,16 @@ using Eigen::Index;
using Eigen::RowMajor; using Eigen::RowMajor;
using Eigen::ColMajor; using Eigen::ColMajor;
using internal::TensorBlockShapeType;
template<typename T> template<typename T>
static const T& choose(int layout, const T& col, const T& row) { static const T& choose(int layout, const T& col, const T& row) {
return layout == ColMajor ? col : row; return layout == ColMajor ? col : row;
} }
static const TensorBlockShapeType RandomShape() { static internal::TensorBlockShapeType RandomShape() {
return internal::random<bool>() return internal::random<bool>()
? internal::TensorBlockShapeType::kUniformAllDims ? internal::kUniformAllDims
: internal::TensorBlockShapeType::kSkewedInnerDims; : internal::kSkewedInnerDims;
} }
template <int NumDims> template <int NumDims>
@ -44,7 +45,7 @@ static DSizes<Index, NumDims> RandomDims() {
dims[i] = internal::random<int>(1, 20); dims[i] = internal::random<int>(1, 20);
} }
return DSizes<Index, NumDims>(dims); return DSizes<Index, NumDims>(dims);
}; }
/** Dummy data type to test TensorBlock copy ops. */ /** Dummy data type to test TensorBlock copy ops. */
struct Data { struct Data {
@ -91,21 +92,19 @@ static void Debug(DSizes<Index, NumDims> dims) {
template <int Layout> template <int Layout>
static void test_block_mapper_sanity() static void test_block_mapper_sanity()
{ {
using T = int; typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
using TensorBlock = internal::TensorBlock<T, Index, 2, Layout>;
using TensorBlockMapper = internal::TensorBlockMapper<T, Index, 2, Layout>;
DSizes<Index, 2> tensor_dims(100, 100); DSizes<Index, 2> tensor_dims(100, 100);
// Test uniform blocks. // Test uniform blocks.
TensorBlockMapper uniform_block_mapper( TensorBlockMapper uniform_block_mapper(
tensor_dims, internal::TensorBlockShapeType::kUniformAllDims, 100); tensor_dims, internal::kUniformAllDims, 100);
VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100); VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100);
VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100); VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100);
// 10x10 blocks // 10x10 blocks
auto uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, nullptr); auto uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10); VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10);
VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10); VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10);
// Depending on a layout we stride by cols rows. // Depending on a layout we stride by cols rows.
@ -117,13 +116,13 @@ static void test_block_mapper_sanity()
// Test skewed to inner dims blocks. // Test skewed to inner dims blocks.
TensorBlockMapper skewed_block_mapper( TensorBlockMapper skewed_block_mapper(
tensor_dims, internal::TensorBlockShapeType::kSkewedInnerDims, 100); tensor_dims, internal::kSkewedInnerDims, 100);
VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100); VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100);
VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100); VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100);
// 1x100 (100x1) rows/cols depending on a tensor layout. // 1x100 (100x1) rows/cols depending on a tensor layout.
auto skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, nullptr); auto skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1)); VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1));
VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100)); VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100));
// Depending on a layout we stride by cols rows. // Depending on a layout we stride by cols rows.
@ -158,9 +157,8 @@ static void UpdateCoeffSet(
template <typename T, int NumDims, int Layout> template <typename T, int NumDims, int Layout>
static void test_block_mapper_maps_every_element() { static void test_block_mapper_maps_every_element() {
using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>; typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
using TensorBlockMapper = typedef internal::TensorBlockMapper<T, Index, NumDims, Layout> TensorBlockMapper;
internal::TensorBlockMapper<T, Index, NumDims, Layout>;
DSizes<Index, NumDims> dims = RandomDims<NumDims>(); DSizes<Index, NumDims> dims = RandomDims<NumDims>();
@ -171,7 +169,7 @@ static void test_block_mapper_maps_every_element() {
TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims)); TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims));
for (int i = 0; i < block_mapper.total_block_count(); ++i) { for (int i = 0; i < block_mapper.total_block_count(); ++i) {
TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(), UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
choose(Layout, NumDims - 1, 0), choose(Layout, NumDims - 1, 0),
&coeff_set); &coeff_set);
@ -187,9 +185,8 @@ static void test_block_mapper_maps_every_element() {
template <typename T, int NumDims, int Layout> template <typename T, int NumDims, int Layout>
static void test_slice_block_mapper_maps_every_element() { static void test_slice_block_mapper_maps_every_element() {
using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>; typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
using TensorSliceBlockMapper = typedef internal::TensorSliceBlockMapper<T, Index, NumDims, Layout> TensorSliceBlockMapper;
internal::TensorSliceBlockMapper<T, Index, NumDims, Layout>;
DSizes<Index, NumDims> tensor_dims = RandomDims<NumDims>(); DSizes<Index, NumDims> tensor_dims = RandomDims<NumDims>();
DSizes<Index, NumDims> tensor_slice_offsets = RandomDims<NumDims>(); DSizes<Index, NumDims> tensor_slice_offsets = RandomDims<NumDims>();
@ -219,7 +216,7 @@ static void test_slice_block_mapper_maps_every_element() {
DimensionList<Index, NumDims>()); DimensionList<Index, NumDims>());
for (int i = 0; i < block_mapper.total_block_count(); ++i) { for (int i = 0; i < block_mapper.total_block_count(); ++i) {
TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(), UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
choose(Layout, NumDims - 1, 0), choose(Layout, NumDims - 1, 0),
&coeff_set); &coeff_set);
@ -647,17 +644,16 @@ static void test_block_cwise_binary_io_zero_strides() {
template <int Layout> template <int Layout>
static void test_uniform_block_shape() static void test_uniform_block_shape()
{ {
using T = int; typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock; typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
{ {
// Test shape 'UniformAllDims' with uniform 'max_coeff count'. // Test shape 'UniformAllDims' with uniform 'max_coeff count'.
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 5; const size_t max_coeff_count = 5 * 5 * 5 * 5 * 5;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
VERIFY_IS_EQUAL(5, block.block_sizes()[i]); VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
} }
@ -669,9 +665,9 @@ static void test_uniform_block_shape()
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 7 * 5 * 5 * 5 * 5; const size_t max_coeff_count = 7 * 5 * 5 * 5 * 5;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[0]); VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(5, block.block_sizes()[i]); VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@ -680,9 +676,9 @@ static void test_uniform_block_shape()
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 6; const size_t max_coeff_count = 5 * 5 * 5 * 5 * 6;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(6, block.block_sizes()[4]); VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
for (int i = 3; i >= 0; --i) { for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(5, block.block_sizes()[i]); VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@ -695,9 +691,9 @@ static void test_uniform_block_shape()
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 5 * 5 * 5 * 5; const size_t max_coeff_count = 11 * 5 * 5 * 5 * 5;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(5, block.block_sizes()[i]); VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@ -706,9 +702,9 @@ static void test_uniform_block_shape()
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 7; const size_t max_coeff_count = 5 * 5 * 5 * 5 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
for (int i = 3; i >= 0; --i) { for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(5, block.block_sizes()[i]); VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@ -721,9 +717,9 @@ static void test_uniform_block_shape()
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(7, 5, 6, 17, 7); DSizes<Index, 5> dims(7, 5, 6, 17, 7);
const size_t max_coeff_count = 7 * 5 * 6 * 7 * 5; const size_t max_coeff_count = 7 * 5 * 6 * 7 * 5;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[0]); VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
VERIFY_IS_EQUAL(6, block.block_sizes()[2]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@ -733,9 +729,9 @@ static void test_uniform_block_shape()
} else { } else {
DSizes<Index, 5> dims(7, 5, 6, 9, 7); DSizes<Index, 5> dims(7, 5, 6, 9, 7);
const size_t max_coeff_count = 5 * 5 * 5 * 6 * 7; const size_t max_coeff_count = 5 * 5 * 5 * 6 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
VERIFY_IS_EQUAL(6, block.block_sizes()[3]); VERIFY_IS_EQUAL(6, block.block_sizes()[3]);
VERIFY_IS_EQUAL(5, block.block_sizes()[2]); VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@ -748,9 +744,9 @@ static void test_uniform_block_shape()
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(7, 5, 6, 17, 7); DSizes<Index, 5> dims(7, 5, 6, 17, 7);
const size_t max_coeff_count = 7 * 5 * 6 * 17 * 7; const size_t max_coeff_count = 7 * 5 * 6 * 17 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[0]); VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
VERIFY_IS_EQUAL(6, block.block_sizes()[2]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@ -760,9 +756,9 @@ static void test_uniform_block_shape()
} else { } else {
DSizes<Index, 5> dims(7, 5, 6, 9, 7); DSizes<Index, 5> dims(7, 5, 6, 9, 7);
const size_t max_coeff_count = 7 * 5 * 6 * 9 * 7; const size_t max_coeff_count = 7 * 5 * 6 * 9 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
VERIFY_IS_EQUAL(9, block.block_sizes()[3]); VERIFY_IS_EQUAL(9, block.block_sizes()[3]);
VERIFY_IS_EQUAL(6, block.block_sizes()[2]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@ -775,17 +771,16 @@ static void test_uniform_block_shape()
template <int Layout> template <int Layout>
static void test_skewed_inner_dim_block_shape() static void test_skewed_inner_dim_block_shape()
{ {
using T = int; typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock; typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
// Test shape 'SkewedInnerDims' with partial allocation to inner-most dim. // Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 10 * 1 * 1 * 1 * 1; const size_t max_coeff_count = 10 * 1 * 1 * 1 * 1;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(10, block.block_sizes()[0]); VERIFY_IS_EQUAL(10, block.block_sizes()[0]);
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(1, block.block_sizes()[i]); VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@ -794,9 +789,9 @@ static void test_skewed_inner_dim_block_shape()
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 1 * 1 * 1 * 1 * 6; const size_t max_coeff_count = 1 * 1 * 1 * 1 * 6;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(6, block.block_sizes()[4]); VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
for (int i = 3; i >= 0; --i) { for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(1, block.block_sizes()[i]); VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@ -808,9 +803,9 @@ static void test_skewed_inner_dim_block_shape()
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 1 * 1 * 1 * 1; const size_t max_coeff_count = 11 * 1 * 1 * 1 * 1;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(1, block.block_sizes()[i]); VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@ -819,9 +814,9 @@ static void test_skewed_inner_dim_block_shape()
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 1 * 1 * 1 * 1 * 7; const size_t max_coeff_count = 1 * 1 * 1 * 1 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
for (int i = 3; i >= 0; --i) { for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(1, block.block_sizes()[i]); VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@ -834,9 +829,9 @@ static void test_skewed_inner_dim_block_shape()
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 3 * 1 * 1 * 1; const size_t max_coeff_count = 11 * 3 * 1 * 1 * 1;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
VERIFY_IS_EQUAL(3, block.block_sizes()[1]); VERIFY_IS_EQUAL(3, block.block_sizes()[1]);
for (int i = 2; i < 5; ++i) { for (int i = 2; i < 5; ++i) {
@ -846,9 +841,9 @@ static void test_skewed_inner_dim_block_shape()
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 1 * 1 * 1 * 15 * 7; const size_t max_coeff_count = 1 * 1 * 1 * 15 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
VERIFY_IS_EQUAL(15, block.block_sizes()[3]); VERIFY_IS_EQUAL(15, block.block_sizes()[3]);
for (int i = 2; i >= 0; --i) { for (int i = 2; i >= 0; --i) {
@ -862,9 +857,9 @@ static void test_skewed_inner_dim_block_shape()
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 5 * 5 * 1 * 1; const size_t max_coeff_count = 11 * 5 * 5 * 1 * 1;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
VERIFY_IS_EQUAL(5, block.block_sizes()[2]); VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@ -875,9 +870,9 @@ static void test_skewed_inner_dim_block_shape()
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 1 * 1 * 5 * 17 * 7; const size_t max_coeff_count = 1 * 1 * 5 * 17 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
VERIFY_IS_EQUAL(17, block.block_sizes()[3]); VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
VERIFY_IS_EQUAL(5, block.block_sizes()[2]); VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@ -891,9 +886,9 @@ static void test_skewed_inner_dim_block_shape()
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7; const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
VERIFY_IS_EQUAL(6, block.block_sizes()[2]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@ -903,9 +898,9 @@ static void test_skewed_inner_dim_block_shape()
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7; const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
VERIFY_IS_EQUAL(17, block.block_sizes()[3]); VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
VERIFY_IS_EQUAL(6, block.block_sizes()[2]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@ -918,15 +913,13 @@ static void test_skewed_inner_dim_block_shape()
template <int Layout> template <int Layout>
static void test_empty_dims(const internal::TensorBlockShapeType block_shape) static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
{ {
using T = int;
// Test blocking of tensors with zero dimensions: // Test blocking of tensors with zero dimensions:
// - we must not crash on asserts and divisions by zero // - we must not crash on asserts and divisions by zero
// - we must not return block with zero dimensions // - we must not return block with zero dimensions
// (recipe for overflows/underflows, divisions by zero and NaNs later) // (recipe for overflows/underflows, divisions by zero and NaNs later)
// - total block count must be zero // - total block count must be zero
{ {
typedef internal::TensorBlockMapper<T, Index, 1, Layout> TensorBlockMapper; typedef internal::TensorBlockMapper<int, Index, 1, Layout> TensorBlockMapper;
DSizes<Index, 1> dims(0); DSizes<Index, 1> dims(0);
for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) { for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count); TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
@ -936,7 +929,7 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
} }
{ {
typedef internal::TensorBlockMapper<T, Index, 2, Layout> TensorBlockMapper; typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
for (int dim1 = 0; dim1 < 3; ++dim1) { for (int dim1 = 0; dim1 < 3; ++dim1) {
for (int dim2 = 0; dim2 < 3; ++dim2) { for (int dim2 = 0; dim2 < 3; ++dim2) {
DSizes<Index, 2> dims(dim1, dim2); DSizes<Index, 2> dims(dim1, dim2);
@ -987,8 +980,8 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides); TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides);
TEST_LAYOUTS(test_uniform_block_shape); TEST_LAYOUTS(test_uniform_block_shape);
TEST_LAYOUTS(test_skewed_inner_dim_block_shape); TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kUniformAllDims); TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kUniformAllDims);
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kSkewedInnerDims); TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kSkewedInnerDims);
} }
#undef TEST_LAYOUTS #undef TEST_LAYOUTS

View File

@ -471,7 +471,7 @@ static void test_tensor_product()
mat1.setRandom(); mat1.setRandom();
mat2.setRandom(); mat2.setRandom();
Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{{}}); Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{});
VERIFY_IS_EQUAL(result.dimension(0), 2); VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_EQUAL(result.dimension(1), 3); VERIFY_IS_EQUAL(result.dimension(1), 3);
@ -514,7 +514,7 @@ static void test_const_inputs()
struct SqrtOutputKernel { struct SqrtOutputKernel {
template <typename Index, typename Scalar> template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator()( EIGEN_ALWAYS_INLINE void operator()(
const OutputKernel::OutputMapper<Index, Scalar>& output_mapper, const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
const TensorContractionParams&, Index, Index, Index num_rows, const TensorContractionParams&, Index, Index, Index num_rows,
Index num_cols) const { Index num_cols) const {
for (int i = 0; i < num_rows; ++i) { for (int i = 0; i < num_rows; ++i) {
@ -553,7 +553,7 @@ static void test_large_contraction_with_output_kernel() {
m_result = m_left * m_right; m_result = m_left * m_right;
for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { for (std::ptrdiff_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
VERIFY(&t_result.data()[i] != &m_result.data()[i]); VERIFY(&t_result.data()[i] != &m_result.data()[i]);
VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i])); VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
} }

View File

@ -25,7 +25,7 @@ static void test_evals()
Tensor<float, 2, DataLayout> result(2,3); Tensor<float, 2, DataLayout> result(2,3);
result.setZero(); result.setZero();
Eigen::array<Tensor<float, 2>::Index, 1> dims3{{0}}; Eigen::array<Tensor<float, 2>::Index, 1> dims3{0};
typedef TensorEvaluator<decltype(input.convolve(kernel, dims3)), DefaultDevice> Evaluator; typedef TensorEvaluator<decltype(input.convolve(kernel, dims3)), DefaultDevice> Evaluator;
Evaluator eval(input.convolve(kernel, dims3), DefaultDevice()); Evaluator eval(input.convolve(kernel, dims3), DefaultDevice());

View File

@ -170,7 +170,6 @@ static void test_type2indexpair_list()
typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b; typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b;
typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c; typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c;
Dims0 d0;
Dims2_a d2_a; Dims2_a d2_a;
Dims2_b d2_b; Dims2_b d2_b;

View File

@ -255,7 +255,7 @@ void test_multithread_contraction_agrees_with_singlethread() {
struct SqrtOutputKernel { struct SqrtOutputKernel {
template <typename Index, typename Scalar> template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator()( EIGEN_ALWAYS_INLINE void operator()(
const OutputKernel::OutputMapper<Index, Scalar>& output_mapper, const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
const TensorContractionParams&, Index, Index, Index num_rows, const TensorContractionParams&, Index, Index, Index num_rows,
Index num_cols) const { Index num_cols) const {
for (int i = 0; i < num_rows; ++i) { for (int i = 0; i < num_rows; ++i) {

View File

@ -9,6 +9,7 @@
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifdef EIGEN_TEST_PART_1 #ifdef EIGEN_TEST_PART_1
#include "sparse.h" #include "sparse.h"
@ -236,7 +237,6 @@ EIGEN_DECLARE_TEST(kronecker_product)
#ifdef EIGEN_TEST_PART_2 #ifdef EIGEN_TEST_PART_2
// simply check that for a dense kronecker product, sparse module is not needed // simply check that for a dense kronecker product, sparse module is not needed
#include "main.h" #include "main.h"
#include <Eigen/KroneckerProduct> #include <Eigen/KroneckerProduct>