mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
merge
This commit is contained in:
commit
d35880ed91
@ -43,6 +43,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
|||||||
enum {
|
enum {
|
||||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||||
|
InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
|
||||||
SizeAtCompileTime = Base::SizeAtCompileTime
|
SizeAtCompileTime = Base::SizeAtCompileTime
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -187,8 +188,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
|||||||
void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
|
void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
|
||||||
{
|
{
|
||||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||||
|
// innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
|
||||||
|
const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
|
||||||
eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
|
eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
|
||||||
|| (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
|
|| (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -297,8 +297,8 @@ SluMatrix asSluMatrix(MatrixType& mat)
|
|||||||
template<typename Scalar, int Flags, typename Index>
|
template<typename Scalar, int Flags, typename Index>
|
||||||
MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
|
MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
|
||||||
{
|
{
|
||||||
eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR
|
eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR)
|
||||||
|| (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC);
|
|| ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC));
|
||||||
|
|
||||||
Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow;
|
Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow;
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
|
|||||||
*block_total_size = resources[0].block_total_size;
|
*block_total_size = resources[0].block_total_size;
|
||||||
for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
|
for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
|
||||||
if (resources[i].block_shape == kSkewedInnerDims &&
|
if (resources[i].block_shape == kSkewedInnerDims &&
|
||||||
*block_shape ! kSkewedInnerDims) {
|
*block_shape != kSkewedInnerDims) {
|
||||||
*block_shape = kSkewedInnerDims;
|
*block_shape = kSkewedInnerDims;
|
||||||
}
|
}
|
||||||
*block_total_size =
|
*block_total_size =
|
||||||
@ -152,11 +152,11 @@ struct TensorBlockCopyOp {
|
|||||||
const Scalar* src_base = &src_data[src_index];
|
const Scalar* src_base = &src_data[src_index];
|
||||||
Scalar* dst_base = &dst_data[dst_index];
|
Scalar* dst_base = &dst_data[dst_index];
|
||||||
|
|
||||||
typedef const Eigen::Array<Scalar, Dynamic, 1> Src;
|
typedef const Array<Scalar, Dynamic, 1> Src;
|
||||||
typedef Eigen::Array<Scalar, Dynamic, 1> Dst;
|
typedef Array<Scalar, Dynamic, 1> Dst;
|
||||||
|
|
||||||
typedef Eigen::Map<Src, 0, InnerStride<> > SrcMap;
|
typedef Map<Src, 0, InnerStride<> > SrcMap;
|
||||||
typedef Eigen::Map<Dst, 0, InnerStride<> > DstMap;
|
typedef Map<Dst, 0, InnerStride<> > DstMap;
|
||||||
|
|
||||||
const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride));
|
const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride));
|
||||||
DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride));
|
DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride));
|
||||||
@ -178,10 +178,8 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout,
|
|||||||
bool BlockRead>
|
bool BlockRead>
|
||||||
class TensorBlockIO {
|
class TensorBlockIO {
|
||||||
public:
|
public:
|
||||||
typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
|
typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
|
||||||
TensorBlock;
|
typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp;
|
||||||
typedef typename TensorBlockCopyOp<Scalar, StorageIndex>
|
|
||||||
TensorBlockCopyOp;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
struct BlockIteratorState {
|
struct BlockIteratorState {
|
||||||
@ -194,7 +192,7 @@ class TensorBlockIO {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
|
||||||
const TensorBlock& block, StorageIndex first_coeff_index,
|
const Block& block, StorageIndex first_coeff_index,
|
||||||
const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
|
const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
|
||||||
const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data,
|
const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data,
|
||||||
Scalar* dst_data) {
|
Scalar* dst_data) {
|
||||||
@ -290,8 +288,8 @@ class TensorBlockIO {
|
|||||||
const StorageIndex block_total_size =
|
const StorageIndex block_total_size =
|
||||||
NumDims == 0 ? 1 : block.block_sizes().TotalSize();
|
NumDims == 0 ? 1 : block.block_sizes().TotalSize();
|
||||||
for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) {
|
for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) {
|
||||||
TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
|
BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
|
||||||
dst_data, inputIndex, input_stride, src_data);
|
dst_data, inputIndex, input_stride, src_data);
|
||||||
// Update index.
|
// Update index.
|
||||||
for (int j = 0; j < num_squeezed_dims; ++j) {
|
for (int j = 0; j < num_squeezed_dims; ++j) {
|
||||||
if (++block_iter_state[j].count < block_iter_state[j].size) {
|
if (++block_iter_state[j].count < block_iter_state[j].size) {
|
||||||
@ -320,13 +318,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
|
|||||||
class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
|
class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
|
||||||
Layout, /*BlockRead=*/true> {
|
Layout, /*BlockRead=*/true> {
|
||||||
public:
|
public:
|
||||||
typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
|
typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
|
||||||
TensorBlock;
|
typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base;
|
||||||
typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true>
|
|
||||||
Base;
|
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
TensorBlock* block, const Scalar* src_data) {
|
Block* block, const Scalar* src_data) {
|
||||||
array<StorageIndex, NumDims> tensor_to_block_dim_map;
|
array<StorageIndex, NumDims> tensor_to_block_dim_map;
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
tensor_to_block_dim_map[i] = i;
|
tensor_to_block_dim_map[i] = i;
|
||||||
@ -336,7 +332,7 @@ class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
TensorBlock* block, StorageIndex first_coeff_index,
|
Block* block, StorageIndex first_coeff_index,
|
||||||
const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
|
const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
|
||||||
const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) {
|
const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) {
|
||||||
Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
|
Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
|
||||||
@ -357,13 +353,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
|
|||||||
class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
|
class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
|
||||||
Layout, /*BlockRead=*/false> {
|
Layout, /*BlockRead=*/false> {
|
||||||
public:
|
public:
|
||||||
typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
|
typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
|
||||||
TensorBlock;
|
typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base;
|
||||||
typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false>
|
|
||||||
Base;
|
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
const TensorBlock& block, Scalar* dst_data) {
|
const Block& block, Scalar* dst_data) {
|
||||||
array<StorageIndex, NumDims> tensor_to_block_dim_map;
|
array<StorageIndex, NumDims> tensor_to_block_dim_map;
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
tensor_to_block_dim_map[i] = i;
|
tensor_to_block_dim_map[i] = i;
|
||||||
@ -373,7 +367,7 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
const TensorBlock& block, StorageIndex first_coeff_index,
|
const Block& block, StorageIndex first_coeff_index,
|
||||||
const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
|
const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
|
||||||
const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) {
|
const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) {
|
||||||
Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
|
Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
|
||||||
@ -401,13 +395,13 @@ struct TensorBlockCwiseBinaryOp {
|
|||||||
const StorageIndex left_stride, const LeftScalar* left_data,
|
const StorageIndex left_stride, const LeftScalar* left_data,
|
||||||
const StorageIndex right_index, const StorageIndex right_stride,
|
const StorageIndex right_index, const StorageIndex right_stride,
|
||||||
const RightScalar* right_data) {
|
const RightScalar* right_data) {
|
||||||
typedef const Eigen::Array<LeftScalar, Dynamic, 1> Lhs;
|
typedef const Array<LeftScalar, Dynamic, 1> Lhs;
|
||||||
typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs;
|
typedef const Array<RightScalar, Dynamic, 1> Rhs;
|
||||||
typedef Eigen::Array<OutputScalar, Dynamic, 1> Out;
|
typedef Array<OutputScalar, Dynamic, 1> Out;
|
||||||
|
|
||||||
typedef Eigen::Map<Lhs, 0, InnerStride<> > LhsMap;
|
typedef Map<Lhs, 0, InnerStride<> > LhsMap;
|
||||||
typedef Eigen::Map<Rhs, 0, InnerStride<> > RhsMap;
|
typedef Map<Rhs, 0, InnerStride<> > RhsMap;
|
||||||
typedef Eigen::Map<Out, 0, InnerStride<> > OutMap;
|
typedef Map<Out, 0, InnerStride<> > OutMap;
|
||||||
|
|
||||||
const LeftScalar* lhs_base = &left_data[left_index];
|
const LeftScalar* lhs_base = &left_data[left_index];
|
||||||
const RightScalar* rhs_base = &right_data[right_index];
|
const RightScalar* rhs_base = &right_data[right_index];
|
||||||
@ -417,8 +411,7 @@ struct TensorBlockCwiseBinaryOp {
|
|||||||
const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride));
|
const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride));
|
||||||
OutMap out(out_base, num_coeff, InnerStride<>(output_stride));
|
OutMap out(out_base, num_coeff, InnerStride<>(output_stride));
|
||||||
|
|
||||||
out =
|
out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
|
||||||
Eigen::CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -434,8 +427,7 @@ struct TensorBlockCwiseBinaryOp {
|
|||||||
template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar,
|
template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar,
|
||||||
int NumDims, int Layout>
|
int NumDims, int Layout>
|
||||||
struct TensorBlockCwiseBinaryIO {
|
struct TensorBlockCwiseBinaryIO {
|
||||||
typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims,
|
typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions;
|
||||||
Layout>::Dimensions Dimensions;
|
|
||||||
|
|
||||||
struct BlockIteratorState {
|
struct BlockIteratorState {
|
||||||
StorageIndex output_stride, output_span;
|
StorageIndex output_stride, output_span;
|
||||||
@ -627,8 +619,7 @@ struct TensorBlockView {
|
|||||||
template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
|
template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
|
||||||
class TensorBlockMapper {
|
class TensorBlockMapper {
|
||||||
public:
|
public:
|
||||||
typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
|
typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
|
||||||
TensorBlock;
|
|
||||||
typedef DSizes<StorageIndex, NumDims> Dimensions;
|
typedef DSizes<StorageIndex, NumDims> Dimensions;
|
||||||
|
|
||||||
TensorBlockMapper(const Dimensions& dims,
|
TensorBlockMapper(const Dimensions& dims,
|
||||||
@ -663,7 +654,7 @@ class TensorBlockMapper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
|
||||||
GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
|
GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
|
||||||
StorageIndex first_coeff_index = 0;
|
StorageIndex first_coeff_index = 0;
|
||||||
DSizes<StorageIndex, NumDims> coords;
|
DSizes<StorageIndex, NumDims> coords;
|
||||||
@ -711,8 +702,7 @@ class TensorBlockMapper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides,
|
return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
|
||||||
data);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
|
||||||
@ -818,8 +808,7 @@ class TensorBlockMapper {
|
|||||||
template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
|
template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
|
||||||
class TensorSliceBlockMapper {
|
class TensorSliceBlockMapper {
|
||||||
public:
|
public:
|
||||||
typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
|
typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
|
||||||
TensorBlock;
|
|
||||||
typedef DSizes<StorageIndex, NumDims> Dimensions;
|
typedef DSizes<StorageIndex, NumDims> Dimensions;
|
||||||
|
|
||||||
TensorSliceBlockMapper(const Dimensions& tensor_dims,
|
TensorSliceBlockMapper(const Dimensions& tensor_dims,
|
||||||
@ -860,7 +849,7 @@ class TensorSliceBlockMapper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
|
||||||
GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
|
GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
|
||||||
StorageIndex first_coeff_index = 0;
|
StorageIndex first_coeff_index = 0;
|
||||||
DSizes<StorageIndex, NumDims> coords;
|
DSizes<StorageIndex, NumDims> coords;
|
||||||
@ -917,8 +906,7 @@ class TensorSliceBlockMapper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides,
|
return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
|
||||||
data);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
|
||||||
|
@ -152,13 +152,7 @@ struct TensorContractionParams {
|
|||||||
// 1. Elementwise Relu transformation following Conv2D.
|
// 1. Elementwise Relu transformation following Conv2D.
|
||||||
// 2. AddBias to the Conv2D output channels dimension.
|
// 2. AddBias to the Conv2D output channels dimension.
|
||||||
//
|
//
|
||||||
// See expected implementation in NoOpOutputKernel.
|
// The NoOpOutputKernel implements an output kernel that does absolutely nothing.
|
||||||
struct OutputKernel {
|
|
||||||
template <typename Index, typename Scalar>
|
|
||||||
typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Output kernel that does absolutely nothing.
|
|
||||||
struct NoOpOutputKernel {
|
struct NoOpOutputKernel {
|
||||||
/**
|
/**
|
||||||
* Tensor contraction evaluator calls this kernel after finishing each block
|
* Tensor contraction evaluator calls this kernel after finishing each block
|
||||||
@ -177,7 +171,7 @@ struct NoOpOutputKernel {
|
|||||||
*/
|
*/
|
||||||
template <typename Index, typename Scalar>
|
template <typename Index, typename Scalar>
|
||||||
EIGEN_ALWAYS_INLINE void operator()(
|
EIGEN_ALWAYS_INLINE void operator()(
|
||||||
const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/,
|
const internal::blas_data_mapper<Scalar, Index, ColMajor>& /*output_mapper*/,
|
||||||
const TensorContractionParams& /*params*/, Index /*i*/,
|
const TensorContractionParams& /*params*/, Index /*i*/,
|
||||||
Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {}
|
Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {}
|
||||||
};
|
};
|
||||||
@ -666,7 +660,7 @@ struct TensorContractionEvaluatorBase
|
|||||||
|
|
||||||
// call gebp (matrix kernel)
|
// call gebp (matrix kernel)
|
||||||
// The parameters here are copied from Eigen's GEMM implementation
|
// The parameters here are copied from Eigen's GEMM implementation
|
||||||
const auto output_mapper = output.getSubMapper(i2, j2);
|
const OutputMapper output_mapper = output.getSubMapper(i2, j2);
|
||||||
gebp(output_mapper, blockA, blockB, actual_mc, actual_kc, actual_nc,
|
gebp(output_mapper, blockA, blockB, actual_mc, actual_kc, actual_nc,
|
||||||
Scalar(1), -1, -1, 0, 0);
|
Scalar(1), -1, -1, 0, 0);
|
||||||
|
|
||||||
|
@ -88,6 +88,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
|||||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -106,12 +107,12 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
|
||||||
if (data) {
|
if (data) {
|
||||||
evalTo(data);
|
evalTo(data);
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
m_result = static_cast<CoeffReturnType*>(
|
m_result = static_cast<PointerT>(
|
||||||
m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
|
m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
|
||||||
evalTo(m_result);
|
evalTo(m_result);
|
||||||
return true;
|
return true;
|
||||||
@ -139,23 +140,22 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
|||||||
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_result; }
|
EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
|
EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
|
||||||
TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(
|
TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions);
|
||||||
data, m_dimensions);
|
|
||||||
m_op.func().eval(m_op.expression(), result, m_device);
|
m_op.func().eval(m_op.expression(), result, m_device);
|
||||||
}
|
}
|
||||||
|
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
const ArgType m_op;
|
const ArgType m_op;
|
||||||
const Device& m_device;
|
const Device& m_device;
|
||||||
CoeffReturnType* m_result;
|
PointerT m_result;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -250,6 +250,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
|||||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -268,12 +269,12 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
|
||||||
if (data) {
|
if (data) {
|
||||||
evalTo(data);
|
evalTo(data);
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
|
m_result = static_cast<PointerT>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType)));
|
||||||
evalTo(m_result);
|
evalTo(m_result);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -300,22 +301,22 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
|||||||
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename internal::traits<XprType>::PointerType data() const { return m_result; }
|
EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
|
EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
|
||||||
TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions);
|
TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions);
|
||||||
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
|
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
|
||||||
}
|
}
|
||||||
|
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
const XprType m_op;
|
const XprType m_op;
|
||||||
const Device& m_device;
|
const Device& m_device;
|
||||||
CoeffReturnType* m_result;
|
PointerT m_result;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,7 +132,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
|
|||||||
if (needs_assign) {
|
if (needs_assign) {
|
||||||
// Size tensor blocks to fit in cache (or requested target block size).
|
// Size tensor blocks to fit in cache (or requested target block size).
|
||||||
Index block_total_size = numext::mini(cache_size, total_size);
|
Index block_total_size = numext::mini(cache_size, total_size);
|
||||||
TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims;
|
TensorBlockShapeType block_shape = kSkewedInnerDims;
|
||||||
// Query expression tree for desired block size/shape.
|
// Query expression tree for desired block size/shape.
|
||||||
std::vector<TensorOpResourceRequirements> resources;
|
std::vector<TensorOpResourceRequirements> resources;
|
||||||
evaluator.getResourceRequirements(&resources);
|
evaluator.getResourceRequirements(&resources);
|
||||||
@ -229,10 +229,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
|
|||||||
Evaluator evaluator(expr, device);
|
Evaluator evaluator(expr, device);
|
||||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||||
if (needs_assign) {
|
if (needs_assign) {
|
||||||
const StorageIndex PacketSize =
|
|
||||||
Vectorizable
|
|
||||||
? unpacket_traits<typename Evaluator::PacketReturnType>::size
|
|
||||||
: 1;
|
|
||||||
const StorageIndex size = array_prod(evaluator.dimensions());
|
const StorageIndex size = array_prod(evaluator.dimensions());
|
||||||
device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
|
device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
|
||||||
EvalRange::alignBlockSize,
|
EvalRange::alignBlockSize,
|
||||||
@ -272,7 +268,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
|
|||||||
|
|
||||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||||
if (needs_assign) {
|
if (needs_assign) {
|
||||||
TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims;
|
TensorBlockShapeType block_shape = kSkewedInnerDims;
|
||||||
Index block_total_size = 0;
|
Index block_total_size = 0;
|
||||||
// Query expression tree for desired block size/shape.
|
// Query expression tree for desired block size/shape.
|
||||||
std::vector<internal::TensorOpResourceRequirements> resources;
|
std::vector<internal::TensorOpResourceRequirements> resources;
|
||||||
|
@ -24,6 +24,14 @@ template<typename T> struct MakePointer {
|
|||||||
typedef T ScalarType;
|
typedef T ScalarType;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// The PointerType class is a container of the device specefic pointer
|
||||||
|
// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
|
||||||
|
// is a device-agnostic type and need MakePointer class for type conversion,
|
||||||
|
// the TensorEvaluator calss can be specialized for a device, hence it is possible
|
||||||
|
// to construct different types of temproray storage memory in TensorEvaluator
|
||||||
|
// for different devices by specializing the following PointerType class.
|
||||||
|
template<typename T, typename Device> struct PointerType : MakePointer<T>{};
|
||||||
|
|
||||||
namespace internal{
|
namespace internal{
|
||||||
template<typename A, typename B> struct Pointer_type_promotion {
|
template<typename A, typename B> struct Pointer_type_promotion {
|
||||||
static const bool val=false;
|
static const bool val=false;
|
||||||
|
@ -57,6 +57,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
coprimes_.push_back(i);
|
coprimes_.push_back(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
queues_.resize(num_threads_);
|
||||||
for (int i = 0; i < num_threads_; i++) {
|
for (int i = 0; i < num_threads_; i++) {
|
||||||
queues_.push_back(new Queue());
|
queues_.push_back(new Queue());
|
||||||
}
|
}
|
||||||
@ -64,7 +65,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
init_barrier_.reset(new Barrier(num_threads_));
|
init_barrier_.reset(new Barrier(num_threads_));
|
||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < num_threads_; i++) {
|
for (int i = 0; i < num_threads_; i++) {
|
||||||
threads_.push_back(env_.CreateThread([this, i]() { WorkerLoop(i); }));
|
threads_.emplace_back(env_.CreateThread([this, i]() { WorkerLoop(i); }));
|
||||||
}
|
}
|
||||||
#ifndef EIGEN_THREAD_LOCAL
|
#ifndef EIGEN_THREAD_LOCAL
|
||||||
// Wait for workers to initialize per_thread_map_. Otherwise we might race
|
// Wait for workers to initialize per_thread_map_. Otherwise we might race
|
||||||
@ -85,13 +86,13 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
// Since we were cancelled, there might be entries in the queues.
|
// Since we were cancelled, there might be entries in the queues.
|
||||||
// Empty them to prevent their destructor from asserting.
|
// Empty them to prevent their destructor from asserting.
|
||||||
for (size_t i = 0; i < queues_.size(); i++) {
|
for (size_t i = 0; i < queues_.size(); i++) {
|
||||||
queues_[i]->Flush();
|
queues_[i].Flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Join threads explicitly to avoid destruction order issues.
|
// Join threads explicitly to avoid destruction order issues.
|
||||||
for (int i = 0; i < num_threads_; i++) delete threads_[i];
|
threads_.resize(0);
|
||||||
for (int i = 0; i < num_threads_; i++) delete queues_[i];
|
queues_.resize(0);
|
||||||
#ifndef EIGEN_THREAD_LOCAL
|
#ifndef EIGEN_THREAD_LOCAL
|
||||||
for (auto it : per_thread_map_) delete it.second;
|
for (auto it : per_thread_map_) delete it.second;
|
||||||
#endif
|
#endif
|
||||||
@ -102,13 +103,13 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
PerThread* pt = GetPerThread();
|
PerThread* pt = GetPerThread();
|
||||||
if (pt->pool == this) {
|
if (pt->pool == this) {
|
||||||
// Worker thread of this pool, push onto the thread's queue.
|
// Worker thread of this pool, push onto the thread's queue.
|
||||||
Queue* q = queues_[pt->thread_id];
|
Queue& q = queues_[pt->thread_id];
|
||||||
t = q->PushFront(std::move(t));
|
t = q.PushFront(std::move(t));
|
||||||
} else {
|
} else {
|
||||||
// A free-standing thread (or worker of another pool), push onto a random
|
// A free-standing thread (or worker of another pool), push onto a random
|
||||||
// queue.
|
// queue.
|
||||||
Queue* q = queues_[Rand(&pt->rand) % queues_.size()];
|
Queue& q = queues_[Rand(&pt->rand) % queues_.size()];
|
||||||
t = q->PushBack(std::move(t));
|
t = q.PushBack(std::move(t));
|
||||||
}
|
}
|
||||||
// Note: below we touch this after making w available to worker threads.
|
// Note: below we touch this after making w available to worker threads.
|
||||||
// Strictly speaking, this can lead to a racy-use-after-free. Consider that
|
// Strictly speaking, this can lead to a racy-use-after-free. Consider that
|
||||||
@ -163,8 +164,8 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
Environment env_;
|
Environment env_;
|
||||||
const int num_threads_;
|
const int num_threads_;
|
||||||
const bool allow_spinning_;
|
const bool allow_spinning_;
|
||||||
MaxSizeVector<Thread*> threads_;
|
MaxSizeVector<std::unique_ptr<Thread> > threads_;
|
||||||
MaxSizeVector<Queue*> queues_;
|
MaxSizeVector<Queue> queues_;
|
||||||
MaxSizeVector<unsigned> coprimes_;
|
MaxSizeVector<unsigned> coprimes_;
|
||||||
MaxSizeVector<EventCount::Waiter> waiters_;
|
MaxSizeVector<EventCount::Waiter> waiters_;
|
||||||
std::atomic<unsigned> blocked_;
|
std::atomic<unsigned> blocked_;
|
||||||
@ -193,7 +194,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
pt->pool = this;
|
pt->pool = this;
|
||||||
pt->rand = GlobalThreadIdHash();
|
pt->rand = GlobalThreadIdHash();
|
||||||
pt->thread_id = thread_id;
|
pt->thread_id = thread_id;
|
||||||
Queue* q = queues_[thread_id];
|
Queue& q = queues_[thread_id];
|
||||||
EventCount::Waiter* waiter = &waiters_[thread_id];
|
EventCount::Waiter* waiter = &waiters_[thread_id];
|
||||||
// TODO(dvyukov,rmlarsen): The time spent in Steal() is proportional
|
// TODO(dvyukov,rmlarsen): The time spent in Steal() is proportional
|
||||||
// to num_threads_ and we assume that new work is scheduled at a
|
// to num_threads_ and we assume that new work is scheduled at a
|
||||||
@ -209,10 +210,10 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
// counter-productive for the types of I/O workloads the single thread
|
// counter-productive for the types of I/O workloads the single thread
|
||||||
// pools tend to be used for.
|
// pools tend to be used for.
|
||||||
while (!cancelled_) {
|
while (!cancelled_) {
|
||||||
Task t = q->PopFront();
|
Task t = q.PopFront();
|
||||||
for (int i = 0; i < spin_count && !t.f; i++) {
|
for (int i = 0; i < spin_count && !t.f; i++) {
|
||||||
if (!cancelled_.load(std::memory_order_relaxed)) {
|
if (!cancelled_.load(std::memory_order_relaxed)) {
|
||||||
t = q->PopFront();
|
t = q.PopFront();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!t.f) {
|
if (!t.f) {
|
||||||
@ -226,7 +227,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
while (!cancelled_) {
|
while (!cancelled_) {
|
||||||
Task t = q->PopFront();
|
Task t = q.PopFront();
|
||||||
if (!t.f) {
|
if (!t.f) {
|
||||||
t = Steal();
|
t = Steal();
|
||||||
if (!t.f) {
|
if (!t.f) {
|
||||||
@ -263,7 +264,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
unsigned inc = coprimes_[r % coprimes_.size()];
|
unsigned inc = coprimes_[r % coprimes_.size()];
|
||||||
unsigned victim = r % size;
|
unsigned victim = r % size;
|
||||||
for (unsigned i = 0; i < size; i++) {
|
for (unsigned i = 0; i < size; i++) {
|
||||||
Task t = queues_[victim]->PopBack();
|
Task t = queues_[victim].PopBack();
|
||||||
if (t.f) {
|
if (t.f) {
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
@ -290,7 +291,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
if (cancelled_) {
|
if (cancelled_) {
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
*t = queues_[victim]->PopBack();
|
*t = queues_[victim].PopBack();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -298,6 +299,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
// If we are shutting down and all worker threads blocked without work,
|
// If we are shutting down and all worker threads blocked without work,
|
||||||
// that's we are done.
|
// that's we are done.
|
||||||
blocked_++;
|
blocked_++;
|
||||||
|
// TODO is blocked_ required to be unsigned?
|
||||||
if (done_ && blocked_ == static_cast<unsigned>(num_threads_)) {
|
if (done_ && blocked_ == static_cast<unsigned>(num_threads_)) {
|
||||||
ec_.CancelWait(waiter);
|
ec_.CancelWait(waiter);
|
||||||
// Almost done, but need to re-check queues.
|
// Almost done, but need to re-check queues.
|
||||||
@ -331,7 +333,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
|||||||
unsigned inc = coprimes_[r % coprimes_.size()];
|
unsigned inc = coprimes_[r % coprimes_.size()];
|
||||||
unsigned victim = r % size;
|
unsigned victim = r % size;
|
||||||
for (unsigned i = 0; i < size; i++) {
|
for (unsigned i = 0; i < size; i++) {
|
||||||
if (!queues_[victim]->Empty()) {
|
if (!queues_[victim].Empty()) {
|
||||||
return victim;
|
return victim;
|
||||||
}
|
}
|
||||||
victim += inc;
|
victim += inc;
|
||||||
|
@ -25,6 +25,11 @@ template <typename T, size_t n> class array {
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
|
EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE T& at(size_t index) { eigen_assert(index < size()); return values[index]; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE const T& at(size_t index) const { eigen_assert(index < size()); return values[index]; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_STRONG_INLINE T& front() { return values[0]; }
|
EIGEN_STRONG_INLINE T& front() { return values[0]; }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
|
@ -35,7 +35,6 @@ class MaxSizeVector {
|
|||||||
explicit MaxSizeVector(size_t n)
|
explicit MaxSizeVector(size_t n)
|
||||||
: reserve_(n), size_(0),
|
: reserve_(n), size_(0),
|
||||||
data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
|
data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
|
||||||
for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Construct a new MaxSizeVector, reserve and resize to n.
|
// Construct a new MaxSizeVector, reserve and resize to n.
|
||||||
@ -44,35 +43,55 @@ class MaxSizeVector {
|
|||||||
MaxSizeVector(size_t n, const T& init)
|
MaxSizeVector(size_t n, const T& init)
|
||||||
: reserve_(n), size_(n),
|
: reserve_(n), size_(n),
|
||||||
data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
|
data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
|
||||||
for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); }
|
size_t i = 0;
|
||||||
|
EIGEN_TRY
|
||||||
|
{
|
||||||
|
for(; i < size_; ++i) { new (&data_[i]) T(init); }
|
||||||
|
}
|
||||||
|
EIGEN_CATCH(...)
|
||||||
|
{
|
||||||
|
// Construction failed, destruct in reverse order:
|
||||||
|
for(; (i+1) > 0; --i) { data_[i-1].~T(); }
|
||||||
|
internal::aligned_free(data_);
|
||||||
|
EIGEN_THROW;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
~MaxSizeVector() {
|
~MaxSizeVector() {
|
||||||
for (size_t i = 0; i < size_; ++i) {
|
for (size_t i = size_; i > 0; --i) {
|
||||||
data_[i].~T();
|
data_[i-1].~T();
|
||||||
}
|
}
|
||||||
internal::aligned_free(data_);
|
internal::aligned_free(data_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void resize(size_t n) {
|
void resize(size_t n) {
|
||||||
eigen_assert(n <= reserve_);
|
eigen_assert(n <= reserve_);
|
||||||
for (size_t i = size_; i < n; ++i) {
|
for (; size_ < n; ++size_) {
|
||||||
new (&data_[i]) T;
|
new (&data_[size_]) T;
|
||||||
}
|
}
|
||||||
for (size_t i = n; i < size_; ++i) {
|
for (; size_ > n; --size_) {
|
||||||
data_[i].~T();
|
data_[size_-1].~T();
|
||||||
}
|
}
|
||||||
size_ = n;
|
eigen_assert(size_ == n);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Append new elements (up to reserved size).
|
// Append new elements (up to reserved size).
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
void push_back(const T& t) {
|
void push_back(const T& t) {
|
||||||
eigen_assert(size_ < reserve_);
|
eigen_assert(size_ < reserve_);
|
||||||
data_[size_++] = t;
|
new (&data_[size_++]) T(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For C++03 compatibility this only takes one argument
|
||||||
|
template<class X>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
void emplace_back(const X& x) {
|
||||||
|
eigen_assert(size_ < reserve_);
|
||||||
|
new (&data_[size_++]) T(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const T& operator[] (size_t i) const {
|
const T& operator[] (size_t i) const {
|
||||||
eigen_assert(i < size_);
|
eigen_assert(i < size_);
|
||||||
@ -99,11 +118,8 @@ class MaxSizeVector {
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
void pop_back() {
|
void pop_back() {
|
||||||
// NOTE: This does not destroy the value at the end the way
|
|
||||||
// std::vector's version of pop_back() does. That happens when
|
|
||||||
// the Vector is destroyed.
|
|
||||||
eigen_assert(size_ > 0);
|
eigen_assert(size_ > 0);
|
||||||
size_--;
|
data_[--size_].~T();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
@ -289,6 +289,7 @@ class FFT
|
|||||||
void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, Index nfft=-1)
|
void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, Index nfft=-1)
|
||||||
{
|
{
|
||||||
typedef typename ComplexDerived::Scalar src_type;
|
typedef typename ComplexDerived::Scalar src_type;
|
||||||
|
typedef typename ComplexDerived::RealScalar real_type;
|
||||||
typedef typename OutputDerived::Scalar dst_type;
|
typedef typename OutputDerived::Scalar dst_type;
|
||||||
const bool realfft= (NumTraits<dst_type>::IsComplex == 0);
|
const bool realfft= (NumTraits<dst_type>::IsComplex == 0);
|
||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
|
||||||
@ -329,9 +330,9 @@ class FFT
|
|||||||
tmp.head(nhead) = src.head(nhead);
|
tmp.head(nhead) = src.head(nhead);
|
||||||
tmp.tail(ntail) = src.tail(ntail);
|
tmp.tail(ntail) = src.tail(ntail);
|
||||||
if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it
|
if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it
|
||||||
tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*src_type(.5);
|
tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*real_type(.5);
|
||||||
}else{ // expanding -- split the old Nyquist bin into two halves
|
}else{ // expanding -- split the old Nyquist bin into two halves
|
||||||
tmp(nhead) = src(nhead) * src_type(.5);
|
tmp(nhead) = src(nhead) * real_type(.5);
|
||||||
tmp(tmp.size()-nhead) = tmp(nhead);
|
tmp(tmp.size()-nhead) = tmp(nhead);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -184,7 +184,7 @@ inline void glRotate(const Rotation2D<float>& rot)
|
|||||||
}
|
}
|
||||||
inline void glRotate(const Rotation2D<double>& rot)
|
inline void glRotate(const Rotation2D<double>& rot)
|
||||||
{
|
{
|
||||||
glRotated(rot.angle()*180.0/EIGEN_PI, 0.0, 0.0, 1.0);
|
glRotated(rot.angle()*180.0/double(EIGEN_PI), 0.0, 0.0, 1.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived> void glRotate(const RotationBase<Derived,3>& rot)
|
template<typename Derived> void glRotate(const RotationBase<Derived,3>& rot)
|
||||||
|
@ -35,6 +35,7 @@ struct get_boxes_helper {
|
|||||||
{
|
{
|
||||||
outBoxes.insert(outBoxes.end(), boxBegin, boxEnd);
|
outBoxes.insert(outBoxes.end(), boxBegin, boxEnd);
|
||||||
eigen_assert(outBoxes.size() == objects.size());
|
eigen_assert(outBoxes.size() == objects.size());
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(objects);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
#include "main.h"
|
#include "main.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
#include <Eigen/CXX11/Tensor>
|
#include <Eigen/CXX11/Tensor>
|
||||||
@ -19,17 +20,16 @@ using Eigen::Index;
|
|||||||
using Eigen::RowMajor;
|
using Eigen::RowMajor;
|
||||||
using Eigen::ColMajor;
|
using Eigen::ColMajor;
|
||||||
|
|
||||||
using internal::TensorBlockShapeType;
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static const T& choose(int layout, const T& col, const T& row) {
|
static const T& choose(int layout, const T& col, const T& row) {
|
||||||
return layout == ColMajor ? col : row;
|
return layout == ColMajor ? col : row;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const TensorBlockShapeType RandomShape() {
|
static internal::TensorBlockShapeType RandomShape() {
|
||||||
return internal::random<bool>()
|
return internal::random<bool>()
|
||||||
? internal::TensorBlockShapeType::kUniformAllDims
|
? internal::kUniformAllDims
|
||||||
: internal::TensorBlockShapeType::kSkewedInnerDims;
|
: internal::kSkewedInnerDims;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int NumDims>
|
template <int NumDims>
|
||||||
@ -44,12 +44,12 @@ static DSizes<Index, NumDims> RandomDims() {
|
|||||||
dims[i] = internal::random<int>(1, 20);
|
dims[i] = internal::random<int>(1, 20);
|
||||||
}
|
}
|
||||||
return DSizes<Index, NumDims>(dims);
|
return DSizes<Index, NumDims>(dims);
|
||||||
};
|
}
|
||||||
|
|
||||||
/** Dummy data type to test TensorBlock copy ops. */
|
/** Dummy data type to test TensorBlock copy ops. */
|
||||||
struct Data {
|
struct Data {
|
||||||
Data() : Data(0) {}
|
Data() : value(0) {}
|
||||||
explicit Data(int v) { value = v; }
|
explicit Data(int v) : value(v) { }
|
||||||
int value;
|
int value;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -91,21 +91,19 @@ static void Debug(DSizes<Index, NumDims> dims) {
|
|||||||
template <int Layout>
|
template <int Layout>
|
||||||
static void test_block_mapper_sanity()
|
static void test_block_mapper_sanity()
|
||||||
{
|
{
|
||||||
using T = int;
|
typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
|
||||||
using TensorBlock = internal::TensorBlock<T, Index, 2, Layout>;
|
|
||||||
using TensorBlockMapper = internal::TensorBlockMapper<T, Index, 2, Layout>;
|
|
||||||
|
|
||||||
DSizes<Index, 2> tensor_dims(100, 100);
|
DSizes<Index, 2> tensor_dims(100, 100);
|
||||||
|
|
||||||
// Test uniform blocks.
|
// Test uniform blocks.
|
||||||
TensorBlockMapper uniform_block_mapper(
|
TensorBlockMapper uniform_block_mapper(
|
||||||
tensor_dims, internal::TensorBlockShapeType::kUniformAllDims, 100);
|
tensor_dims, internal::kUniformAllDims, 100);
|
||||||
|
|
||||||
VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100);
|
VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100);
|
||||||
VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100);
|
VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100);
|
||||||
|
|
||||||
// 10x10 blocks
|
// 10x10 blocks
|
||||||
auto uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, nullptr);
|
typename TensorBlockMapper::Block uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10);
|
VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10);
|
||||||
VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10);
|
VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10);
|
||||||
// Depending on a layout we stride by cols rows.
|
// Depending on a layout we stride by cols rows.
|
||||||
@ -117,13 +115,13 @@ static void test_block_mapper_sanity()
|
|||||||
|
|
||||||
// Test skewed to inner dims blocks.
|
// Test skewed to inner dims blocks.
|
||||||
TensorBlockMapper skewed_block_mapper(
|
TensorBlockMapper skewed_block_mapper(
|
||||||
tensor_dims, internal::TensorBlockShapeType::kSkewedInnerDims, 100);
|
tensor_dims, internal::kSkewedInnerDims, 100);
|
||||||
|
|
||||||
VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100);
|
VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100);
|
||||||
VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100);
|
VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100);
|
||||||
|
|
||||||
// 1x100 (100x1) rows/cols depending on a tensor layout.
|
// 1x100 (100x1) rows/cols depending on a tensor layout.
|
||||||
auto skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, nullptr);
|
typename TensorBlockMapper::Block skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1));
|
VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1));
|
||||||
VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100));
|
VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100));
|
||||||
// Depending on a layout we stride by cols rows.
|
// Depending on a layout we stride by cols rows.
|
||||||
@ -145,7 +143,8 @@ static void UpdateCoeffSet(
|
|||||||
|
|
||||||
for (int i = 0; i < block_sizes[dim_index]; ++i) {
|
for (int i = 0; i < block_sizes[dim_index]; ++i) {
|
||||||
if (tensor_strides[dim_index] == 1) {
|
if (tensor_strides[dim_index] == 1) {
|
||||||
auto inserted = visited_coeffs->insert(first_coeff_index + i);
|
typedef std::pair<std::set<Index>::iterator, bool> ReturnType;
|
||||||
|
ReturnType inserted = visited_coeffs->insert(first_coeff_index + i);
|
||||||
VERIFY_IS_EQUAL(inserted.second, true);
|
VERIFY_IS_EQUAL(inserted.second, true);
|
||||||
} else {
|
} else {
|
||||||
int next_dim_index = dim_index + choose(Layout, -1, 1);
|
int next_dim_index = dim_index + choose(Layout, -1, 1);
|
||||||
@ -158,9 +157,8 @@ static void UpdateCoeffSet(
|
|||||||
|
|
||||||
template <typename T, int NumDims, int Layout>
|
template <typename T, int NumDims, int Layout>
|
||||||
static void test_block_mapper_maps_every_element() {
|
static void test_block_mapper_maps_every_element() {
|
||||||
using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
|
typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
|
||||||
using TensorBlockMapper =
|
typedef internal::TensorBlockMapper<T, Index, NumDims, Layout> TensorBlockMapper;
|
||||||
internal::TensorBlockMapper<T, Index, NumDims, Layout>;
|
|
||||||
|
|
||||||
DSizes<Index, NumDims> dims = RandomDims<NumDims>();
|
DSizes<Index, NumDims> dims = RandomDims<NumDims>();
|
||||||
|
|
||||||
@ -171,7 +169,7 @@ static void test_block_mapper_maps_every_element() {
|
|||||||
TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims));
|
TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims));
|
||||||
|
|
||||||
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
|
||||||
UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
|
UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
|
||||||
choose(Layout, NumDims - 1, 0),
|
choose(Layout, NumDims - 1, 0),
|
||||||
&coeff_set);
|
&coeff_set);
|
||||||
@ -187,9 +185,8 @@ static void test_block_mapper_maps_every_element() {
|
|||||||
|
|
||||||
template <typename T, int NumDims, int Layout>
|
template <typename T, int NumDims, int Layout>
|
||||||
static void test_slice_block_mapper_maps_every_element() {
|
static void test_slice_block_mapper_maps_every_element() {
|
||||||
using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
|
typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
|
||||||
using TensorSliceBlockMapper =
|
typedef internal::TensorSliceBlockMapper<T, Index, NumDims, Layout> TensorSliceBlockMapper;
|
||||||
internal::TensorSliceBlockMapper<T, Index, NumDims, Layout>;
|
|
||||||
|
|
||||||
DSizes<Index, NumDims> tensor_dims = RandomDims<NumDims>();
|
DSizes<Index, NumDims> tensor_dims = RandomDims<NumDims>();
|
||||||
DSizes<Index, NumDims> tensor_slice_offsets = RandomDims<NumDims>();
|
DSizes<Index, NumDims> tensor_slice_offsets = RandomDims<NumDims>();
|
||||||
@ -206,7 +203,7 @@ static void test_slice_block_mapper_maps_every_element() {
|
|||||||
// Keep track of elements indices available via block access.
|
// Keep track of elements indices available via block access.
|
||||||
std::set<Index> coeff_set;
|
std::set<Index> coeff_set;
|
||||||
|
|
||||||
auto total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize());
|
int total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize());
|
||||||
|
|
||||||
// Pick a random dimension sizes for the tensor blocks.
|
// Pick a random dimension sizes for the tensor blocks.
|
||||||
DSizes<Index, NumDims> block_sizes;
|
DSizes<Index, NumDims> block_sizes;
|
||||||
@ -219,7 +216,7 @@ static void test_slice_block_mapper_maps_every_element() {
|
|||||||
DimensionList<Index, NumDims>());
|
DimensionList<Index, NumDims>());
|
||||||
|
|
||||||
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
|
||||||
UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
|
UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
|
||||||
choose(Layout, NumDims - 1, 0),
|
choose(Layout, NumDims - 1, 0),
|
||||||
&coeff_set);
|
&coeff_set);
|
||||||
@ -240,7 +237,7 @@ static void test_block_io_copy_data_from_source_to_target() {
|
|||||||
TensorBlockWriter;
|
TensorBlockWriter;
|
||||||
|
|
||||||
DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>();
|
DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>();
|
||||||
const auto input_tensor_size = input_tensor_dims.TotalSize();
|
const Index input_tensor_size = input_tensor_dims.TotalSize();
|
||||||
|
|
||||||
T* input_data = GenerateRandomData<T>(input_tensor_size);
|
T* input_data = GenerateRandomData<T>(input_tensor_size);
|
||||||
T* output_data = new T[input_tensor_size];
|
T* output_data = new T[input_tensor_size];
|
||||||
@ -319,7 +316,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
|||||||
TensorBlockWriter;
|
TensorBlockWriter;
|
||||||
|
|
||||||
DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>();
|
DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>();
|
||||||
const auto input_tensor_size = input_tensor_dims.TotalSize();
|
const Index input_tensor_size = input_tensor_dims.TotalSize();
|
||||||
|
|
||||||
// Create a random input tensor.
|
// Create a random input tensor.
|
||||||
T* input_data = GenerateRandomData<T>(input_tensor_size);
|
T* input_data = GenerateRandomData<T>(input_tensor_size);
|
||||||
@ -327,7 +324,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
|||||||
// Create a random dimension re-ordering/shuffle.
|
// Create a random dimension re-ordering/shuffle.
|
||||||
std::vector<Index> shuffle;
|
std::vector<Index> shuffle;
|
||||||
for (int i = 0; i < NumDims; ++i) shuffle.push_back(i);
|
for (int i = 0; i < NumDims; ++i) shuffle.push_back(i);
|
||||||
std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());
|
std::random_shuffle(shuffle.begin(), shuffle.end());
|
||||||
|
|
||||||
DSizes<Index, NumDims> output_tensor_dims;
|
DSizes<Index, NumDims> output_tensor_dims;
|
||||||
array<Index, NumDims> input_to_output_dim_map;
|
array<Index, NumDims> input_to_output_dim_map;
|
||||||
@ -342,8 +339,8 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
|||||||
TensorBlockMapper block_mapper(output_tensor_dims, RandomShape(),
|
TensorBlockMapper block_mapper(output_tensor_dims, RandomShape(),
|
||||||
RandomTargetSize(input_tensor_dims));
|
RandomTargetSize(input_tensor_dims));
|
||||||
|
|
||||||
auto* block_data = new T[block_mapper.block_dims_total_size()];
|
T* block_data = new T[block_mapper.block_dims_total_size()];
|
||||||
auto* output_data = new T[input_tensor_size];
|
T* output_data = new T[input_tensor_size];
|
||||||
|
|
||||||
array<Index, NumDims> input_tensor_strides =
|
array<Index, NumDims> input_tensor_strides =
|
||||||
ComputeStrides<Layout, NumDims>(input_tensor_dims);
|
ComputeStrides<Layout, NumDims>(input_tensor_dims);
|
||||||
@ -385,8 +382,8 @@ static void test_block_io_zero_stride()
|
|||||||
input_tensor_dims[0] = 1;
|
input_tensor_dims[0] = 1;
|
||||||
input_tensor_dims[2] = 1;
|
input_tensor_dims[2] = 1;
|
||||||
input_tensor_dims[4] = 1;
|
input_tensor_dims[4] = 1;
|
||||||
const auto input_tensor_size = input_tensor_dims.TotalSize();
|
const Index input_tensor_size = input_tensor_dims.TotalSize();
|
||||||
auto* input_data = GenerateRandomData<float>(input_tensor_size);
|
float* input_data = GenerateRandomData<float>(input_tensor_size);
|
||||||
|
|
||||||
DSizes<Index, 5> output_tensor_dims = rnd_dims;
|
DSizes<Index, 5> output_tensor_dims = rnd_dims;
|
||||||
|
|
||||||
@ -427,7 +424,7 @@ static void test_block_io_zero_stride()
|
|||||||
};
|
};
|
||||||
|
|
||||||
{
|
{
|
||||||
auto* output_data = new float[output_tensor_dims.TotalSize()];
|
float* output_data = new float[output_tensor_dims.TotalSize()];
|
||||||
TensorBlock read_block(0, output_tensor_dims, output_tensor_strides,
|
TensorBlock read_block(0, output_tensor_dims, output_tensor_strides,
|
||||||
input_tensor_strides_with_zeros, output_data);
|
input_tensor_strides_with_zeros, output_data);
|
||||||
TensorBlockReader::Run(&read_block, input_data);
|
TensorBlockReader::Run(&read_block, input_data);
|
||||||
@ -436,7 +433,7 @@ static void test_block_io_zero_stride()
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
auto* output_data = new float[output_tensor_dims.TotalSize()];
|
float* output_data = new float[output_tensor_dims.TotalSize()];
|
||||||
TensorBlock write_block(0, output_tensor_dims,
|
TensorBlock write_block(0, output_tensor_dims,
|
||||||
input_tensor_strides_with_zeros,
|
input_tensor_strides_with_zeros,
|
||||||
output_tensor_strides, input_data);
|
output_tensor_strides, input_data);
|
||||||
@ -459,14 +456,14 @@ static void test_block_io_squeeze_ones() {
|
|||||||
// Total size > 1.
|
// Total size > 1.
|
||||||
{
|
{
|
||||||
DSizes<Index, 5> block_sizes(1, 2, 1, 2, 1);
|
DSizes<Index, 5> block_sizes(1, 2, 1, 2, 1);
|
||||||
const auto total_size = block_sizes.TotalSize();
|
const Index total_size = block_sizes.TotalSize();
|
||||||
|
|
||||||
// Create a random input tensor.
|
// Create a random input tensor.
|
||||||
auto* input_data = GenerateRandomData<float>(total_size);
|
float* input_data = GenerateRandomData<float>(total_size);
|
||||||
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
||||||
|
|
||||||
{
|
{
|
||||||
auto* output_data = new float[block_sizes.TotalSize()];
|
float* output_data = new float[block_sizes.TotalSize()];
|
||||||
TensorBlock read_block(0, block_sizes, strides, strides, output_data);
|
TensorBlock read_block(0, block_sizes, strides, strides, output_data);
|
||||||
TensorBlockReader::Run(&read_block, input_data);
|
TensorBlockReader::Run(&read_block, input_data);
|
||||||
for (int i = 0; i < total_size; ++i) {
|
for (int i = 0; i < total_size; ++i) {
|
||||||
@ -476,7 +473,7 @@ static void test_block_io_squeeze_ones() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
auto* output_data = new float[block_sizes.TotalSize()];
|
float* output_data = new float[block_sizes.TotalSize()];
|
||||||
TensorBlock write_block(0, block_sizes, strides, strides, input_data);
|
TensorBlock write_block(0, block_sizes, strides, strides, input_data);
|
||||||
TensorBlockWriter::Run(write_block, output_data);
|
TensorBlockWriter::Run(write_block, output_data);
|
||||||
for (int i = 0; i < total_size; ++i) {
|
for (int i = 0; i < total_size; ++i) {
|
||||||
@ -489,14 +486,14 @@ static void test_block_io_squeeze_ones() {
|
|||||||
// Total size == 1.
|
// Total size == 1.
|
||||||
{
|
{
|
||||||
DSizes<Index, 5> block_sizes(1, 1, 1, 1, 1);
|
DSizes<Index, 5> block_sizes(1, 1, 1, 1, 1);
|
||||||
const auto total_size = block_sizes.TotalSize();
|
const Index total_size = block_sizes.TotalSize();
|
||||||
|
|
||||||
// Create a random input tensor.
|
// Create a random input tensor.
|
||||||
auto* input_data = GenerateRandomData<float>(total_size);
|
float* input_data = GenerateRandomData<float>(total_size);
|
||||||
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
||||||
|
|
||||||
{
|
{
|
||||||
auto* output_data = new float[block_sizes.TotalSize()];
|
float* output_data = new float[block_sizes.TotalSize()];
|
||||||
TensorBlock read_block(0, block_sizes, strides, strides, output_data);
|
TensorBlock read_block(0, block_sizes, strides, strides, output_data);
|
||||||
TensorBlockReader::Run(&read_block, input_data);
|
TensorBlockReader::Run(&read_block, input_data);
|
||||||
for (int i = 0; i < total_size; ++i) {
|
for (int i = 0; i < total_size; ++i) {
|
||||||
@ -506,7 +503,7 @@ static void test_block_io_squeeze_ones() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
auto* output_data = new float[block_sizes.TotalSize()];
|
float* output_data = new float[block_sizes.TotalSize()];
|
||||||
TensorBlock write_block(0, block_sizes, strides, strides, input_data);
|
TensorBlock write_block(0, block_sizes, strides, strides, input_data);
|
||||||
TensorBlockWriter::Run(write_block, output_data);
|
TensorBlockWriter::Run(write_block, output_data);
|
||||||
for (int i = 0; i < total_size; ++i) {
|
for (int i = 0; i < total_size; ++i) {
|
||||||
@ -527,7 +524,7 @@ static void test_block_cwise_binary_io_basic() {
|
|||||||
DSizes<Index, NumDims> block_sizes = RandomDims<NumDims>();
|
DSizes<Index, NumDims> block_sizes = RandomDims<NumDims>();
|
||||||
DSizes<Index, NumDims> strides(ComputeStrides<Layout, NumDims>(block_sizes));
|
DSizes<Index, NumDims> strides(ComputeStrides<Layout, NumDims>(block_sizes));
|
||||||
|
|
||||||
const auto total_size = block_sizes.TotalSize();
|
const Index total_size = block_sizes.TotalSize();
|
||||||
|
|
||||||
// Create a random input tensors.
|
// Create a random input tensors.
|
||||||
T* left_data = GenerateRandomData<T>(total_size);
|
T* left_data = GenerateRandomData<T>(total_size);
|
||||||
@ -556,13 +553,13 @@ static void test_block_cwise_binary_io_squeeze_ones() {
|
|||||||
DSizes<Index, 5> block_sizes(1, 2, 1, 3, 1);
|
DSizes<Index, 5> block_sizes(1, 2, 1, 3, 1);
|
||||||
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
||||||
|
|
||||||
const auto total_size = block_sizes.TotalSize();
|
const Index total_size = block_sizes.TotalSize();
|
||||||
|
|
||||||
// Create a random input tensors.
|
// Create a random input tensors.
|
||||||
auto* left_data = GenerateRandomData<float>(total_size);
|
float* left_data = GenerateRandomData<float>(total_size);
|
||||||
auto* right_data = GenerateRandomData<float>(total_size);
|
float* right_data = GenerateRandomData<float>(total_size);
|
||||||
|
|
||||||
auto* output_data = new float[total_size];
|
float* output_data = new float[total_size];
|
||||||
BinaryFunctor functor;
|
BinaryFunctor functor;
|
||||||
TensorBlockCwiseBinaryIO::Run(functor, block_sizes, strides, output_data,
|
TensorBlockCwiseBinaryIO::Run(functor, block_sizes, strides, output_data,
|
||||||
strides, left_data, strides, right_data);
|
strides, left_data, strides, right_data);
|
||||||
@ -603,14 +600,14 @@ static void test_block_cwise_binary_io_zero_strides() {
|
|||||||
right_strides[3] = 0;
|
right_strides[3] = 0;
|
||||||
|
|
||||||
// Generate random data.
|
// Generate random data.
|
||||||
auto* left_data = GenerateRandomData<float>(left_sizes.TotalSize());
|
float* left_data = GenerateRandomData<float>(left_sizes.TotalSize());
|
||||||
auto* right_data = GenerateRandomData<float>(right_sizes.TotalSize());
|
float* right_data = GenerateRandomData<float>(right_sizes.TotalSize());
|
||||||
|
|
||||||
DSizes<Index, 5> output_sizes = rnd_dims;
|
DSizes<Index, 5> output_sizes = rnd_dims;
|
||||||
DSizes<Index, 5> output_strides(ComputeStrides<Layout, 5>(output_sizes));
|
DSizes<Index, 5> output_strides(ComputeStrides<Layout, 5>(output_sizes));
|
||||||
|
|
||||||
const auto output_total_size = output_sizes.TotalSize();
|
const Index output_total_size = output_sizes.TotalSize();
|
||||||
auto* output_data = new float[output_total_size];
|
float* output_data = new float[output_total_size];
|
||||||
|
|
||||||
BinaryFunctor functor;
|
BinaryFunctor functor;
|
||||||
TensorBlockCwiseBinaryIO::Run(functor, output_sizes, output_strides,
|
TensorBlockCwiseBinaryIO::Run(functor, output_sizes, output_strides,
|
||||||
@ -647,17 +644,16 @@ static void test_block_cwise_binary_io_zero_strides() {
|
|||||||
template <int Layout>
|
template <int Layout>
|
||||||
static void test_uniform_block_shape()
|
static void test_uniform_block_shape()
|
||||||
{
|
{
|
||||||
using T = int;
|
typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
|
||||||
typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock;
|
typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
|
||||||
typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
|
|
||||||
|
|
||||||
{
|
{
|
||||||
// Test shape 'UniformAllDims' with uniform 'max_coeff count'.
|
// Test shape 'UniformAllDims' with uniform 'max_coeff count'.
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 5;
|
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 5;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
for (int i = 0; i < 5; ++i) {
|
for (int i = 0; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
||||||
}
|
}
|
||||||
@ -669,9 +665,9 @@ static void test_uniform_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 7 * 5 * 5 * 5 * 5;
|
const size_t max_coeff_count = 7 * 5 * 5 * 5 * 5;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
||||||
for (int i = 1; i < 5; ++i) {
|
for (int i = 1; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
||||||
@ -680,9 +676,9 @@ static void test_uniform_block_shape()
|
|||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 6;
|
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 6;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
|
||||||
for (int i = 3; i >= 0; --i) {
|
for (int i = 3; i >= 0; --i) {
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
||||||
@ -695,9 +691,9 @@ static void test_uniform_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 11 * 5 * 5 * 5 * 5;
|
const size_t max_coeff_count = 11 * 5 * 5 * 5 * 5;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
for (int i = 1; i < 5; ++i) {
|
for (int i = 1; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
||||||
@ -706,9 +702,9 @@ static void test_uniform_block_shape()
|
|||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 7;
|
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
for (int i = 3; i >= 0; --i) {
|
for (int i = 3; i >= 0; --i) {
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
||||||
@ -721,9 +717,9 @@ static void test_uniform_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 7 * 5 * 6 * 7 * 5;
|
const size_t max_coeff_count = 7 * 5 * 6 * 7 * 5;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
||||||
@ -733,9 +729,9 @@ static void test_uniform_block_shape()
|
|||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
||||||
const size_t max_coeff_count = 5 * 5 * 5 * 6 * 7;
|
const size_t max_coeff_count = 5 * 5 * 5 * 6 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(6, block.block_sizes()[3]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
||||||
@ -748,9 +744,9 @@ static void test_uniform_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 7 * 5 * 6 * 17 * 7;
|
const size_t max_coeff_count = 7 * 5 * 6 * 17 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
||||||
@ -760,9 +756,9 @@ static void test_uniform_block_shape()
|
|||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
||||||
const size_t max_coeff_count = 7 * 5 * 6 * 9 * 7;
|
const size_t max_coeff_count = 7 * 5 * 6 * 9 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
VERIFY_IS_EQUAL(9, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(9, block.block_sizes()[3]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
||||||
@ -775,17 +771,16 @@ static void test_uniform_block_shape()
|
|||||||
template <int Layout>
|
template <int Layout>
|
||||||
static void test_skewed_inner_dim_block_shape()
|
static void test_skewed_inner_dim_block_shape()
|
||||||
{
|
{
|
||||||
using T = int;
|
typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
|
||||||
typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock;
|
typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
|
||||||
typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
|
|
||||||
|
|
||||||
// Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
|
// Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
|
||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 10 * 1 * 1 * 1 * 1;
|
const size_t max_coeff_count = 10 * 1 * 1 * 1 * 1;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(10, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(10, block.block_sizes()[0]);
|
||||||
for (int i = 1; i < 5; ++i) {
|
for (int i = 1; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
@ -794,9 +789,9 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 1 * 1 * 1 * 1 * 6;
|
const size_t max_coeff_count = 1 * 1 * 1 * 1 * 6;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
|
||||||
for (int i = 3; i >= 0; --i) {
|
for (int i = 3; i >= 0; --i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
@ -808,9 +803,9 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 11 * 1 * 1 * 1 * 1;
|
const size_t max_coeff_count = 11 * 1 * 1 * 1 * 1;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
for (int i = 1; i < 5; ++i) {
|
for (int i = 1; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
@ -819,9 +814,9 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 1 * 1 * 1 * 1 * 7;
|
const size_t max_coeff_count = 1 * 1 * 1 * 1 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
for (int i = 3; i >= 0; --i) {
|
for (int i = 3; i >= 0; --i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
@ -834,9 +829,9 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 11 * 3 * 1 * 1 * 1;
|
const size_t max_coeff_count = 11 * 3 * 1 * 1 * 1;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
VERIFY_IS_EQUAL(3, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(3, block.block_sizes()[1]);
|
||||||
for (int i = 2; i < 5; ++i) {
|
for (int i = 2; i < 5; ++i) {
|
||||||
@ -846,9 +841,9 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 1 * 1 * 1 * 15 * 7;
|
const size_t max_coeff_count = 1 * 1 * 1 * 15 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
VERIFY_IS_EQUAL(15, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(15, block.block_sizes()[3]);
|
||||||
for (int i = 2; i >= 0; --i) {
|
for (int i = 2; i >= 0; --i) {
|
||||||
@ -862,9 +857,9 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 11 * 5 * 5 * 1 * 1;
|
const size_t max_coeff_count = 11 * 5 * 5 * 1 * 1;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
||||||
@ -875,9 +870,9 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 1 * 1 * 5 * 17 * 7;
|
const size_t max_coeff_count = 1 * 1 * 5 * 17 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
||||||
@ -891,9 +886,9 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
||||||
@ -903,9 +898,9 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
||||||
max_coeff_count);
|
max_coeff_count);
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
||||||
@ -918,15 +913,13 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
template <int Layout>
|
template <int Layout>
|
||||||
static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
|
static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
|
||||||
{
|
{
|
||||||
using T = int;
|
|
||||||
|
|
||||||
// Test blocking of tensors with zero dimensions:
|
// Test blocking of tensors with zero dimensions:
|
||||||
// - we must not crash on asserts and divisions by zero
|
// - we must not crash on asserts and divisions by zero
|
||||||
// - we must not return block with zero dimensions
|
// - we must not return block with zero dimensions
|
||||||
// (recipe for overflows/underflows, divisions by zero and NaNs later)
|
// (recipe for overflows/underflows, divisions by zero and NaNs later)
|
||||||
// - total block count must be zero
|
// - total block count must be zero
|
||||||
{
|
{
|
||||||
typedef internal::TensorBlockMapper<T, Index, 1, Layout> TensorBlockMapper;
|
typedef internal::TensorBlockMapper<int, Index, 1, Layout> TensorBlockMapper;
|
||||||
DSizes<Index, 1> dims(0);
|
DSizes<Index, 1> dims(0);
|
||||||
for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
||||||
TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
|
TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
|
||||||
@ -936,7 +929,7 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
typedef internal::TensorBlockMapper<T, Index, 2, Layout> TensorBlockMapper;
|
typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
|
||||||
for (int dim1 = 0; dim1 < 3; ++dim1) {
|
for (int dim1 = 0; dim1 < 3; ++dim1) {
|
||||||
for (int dim2 = 0; dim2 < 3; ++dim2) {
|
for (int dim2 = 0; dim2 < 3; ++dim2) {
|
||||||
DSizes<Index, 2> dims(dim1, dim2);
|
DSizes<Index, 2> dims(dim1, dim2);
|
||||||
@ -987,8 +980,8 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
|
|||||||
TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides);
|
TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides);
|
||||||
TEST_LAYOUTS(test_uniform_block_shape);
|
TEST_LAYOUTS(test_uniform_block_shape);
|
||||||
TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
|
TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
|
||||||
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kUniformAllDims);
|
TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kUniformAllDims);
|
||||||
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kSkewedInnerDims);
|
TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kSkewedInnerDims);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef TEST_LAYOUTS
|
#undef TEST_LAYOUTS
|
||||||
|
@ -471,7 +471,7 @@ static void test_tensor_product()
|
|||||||
mat1.setRandom();
|
mat1.setRandom();
|
||||||
mat2.setRandom();
|
mat2.setRandom();
|
||||||
|
|
||||||
Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{{}});
|
Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{});
|
||||||
|
|
||||||
VERIFY_IS_EQUAL(result.dimension(0), 2);
|
VERIFY_IS_EQUAL(result.dimension(0), 2);
|
||||||
VERIFY_IS_EQUAL(result.dimension(1), 3);
|
VERIFY_IS_EQUAL(result.dimension(1), 3);
|
||||||
@ -514,7 +514,7 @@ static void test_const_inputs()
|
|||||||
struct SqrtOutputKernel {
|
struct SqrtOutputKernel {
|
||||||
template <typename Index, typename Scalar>
|
template <typename Index, typename Scalar>
|
||||||
EIGEN_ALWAYS_INLINE void operator()(
|
EIGEN_ALWAYS_INLINE void operator()(
|
||||||
const OutputKernel::OutputMapper<Index, Scalar>& output_mapper,
|
const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
|
||||||
const TensorContractionParams&, Index, Index, Index num_rows,
|
const TensorContractionParams&, Index, Index, Index num_rows,
|
||||||
Index num_cols) const {
|
Index num_cols) const {
|
||||||
for (int i = 0; i < num_rows; ++i) {
|
for (int i = 0; i < num_rows; ++i) {
|
||||||
@ -553,7 +553,7 @@ static void test_large_contraction_with_output_kernel() {
|
|||||||
|
|
||||||
m_result = m_left * m_right;
|
m_result = m_left * m_right;
|
||||||
|
|
||||||
for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
|
for (std::ptrdiff_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
|
||||||
VERIFY(&t_result.data()[i] != &m_result.data()[i]);
|
VERIFY(&t_result.data()[i] != &m_result.data()[i]);
|
||||||
VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
|
VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
|
||||||
}
|
}
|
||||||
|
@ -25,7 +25,7 @@ static void test_evals()
|
|||||||
|
|
||||||
Tensor<float, 2, DataLayout> result(2,3);
|
Tensor<float, 2, DataLayout> result(2,3);
|
||||||
result.setZero();
|
result.setZero();
|
||||||
Eigen::array<Tensor<float, 2>::Index, 1> dims3{{0}};
|
Eigen::array<Tensor<float, 2>::Index, 1> dims3{0};
|
||||||
|
|
||||||
typedef TensorEvaluator<decltype(input.convolve(kernel, dims3)), DefaultDevice> Evaluator;
|
typedef TensorEvaluator<decltype(input.convolve(kernel, dims3)), DefaultDevice> Evaluator;
|
||||||
Evaluator eval(input.convolve(kernel, dims3), DefaultDevice());
|
Evaluator eval(input.convolve(kernel, dims3), DefaultDevice());
|
||||||
|
@ -170,7 +170,6 @@ static void test_type2indexpair_list()
|
|||||||
typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b;
|
typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b;
|
||||||
typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c;
|
typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c;
|
||||||
|
|
||||||
Dims0 d0;
|
|
||||||
Dims2_a d2_a;
|
Dims2_a d2_a;
|
||||||
|
|
||||||
Dims2_b d2_b;
|
Dims2_b d2_b;
|
||||||
|
@ -255,7 +255,7 @@ void test_multithread_contraction_agrees_with_singlethread() {
|
|||||||
struct SqrtOutputKernel {
|
struct SqrtOutputKernel {
|
||||||
template <typename Index, typename Scalar>
|
template <typename Index, typename Scalar>
|
||||||
EIGEN_ALWAYS_INLINE void operator()(
|
EIGEN_ALWAYS_INLINE void operator()(
|
||||||
const OutputKernel::OutputMapper<Index, Scalar>& output_mapper,
|
const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
|
||||||
const TensorContractionParams&, Index, Index, Index num_rows,
|
const TensorContractionParams&, Index, Index, Index num_rows,
|
||||||
Index num_cols) const {
|
Index num_cols) const {
|
||||||
for (int i = 0; i < num_rows; ++i) {
|
for (int i = 0; i < num_rows; ++i) {
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
|
||||||
#ifdef EIGEN_TEST_PART_1
|
#ifdef EIGEN_TEST_PART_1
|
||||||
|
|
||||||
#include "sparse.h"
|
#include "sparse.h"
|
||||||
@ -236,7 +237,6 @@ EIGEN_DECLARE_TEST(kronecker_product)
|
|||||||
#ifdef EIGEN_TEST_PART_2
|
#ifdef EIGEN_TEST_PART_2
|
||||||
|
|
||||||
// simply check that for a dense kronecker product, sparse module is not needed
|
// simply check that for a dense kronecker product, sparse module is not needed
|
||||||
|
|
||||||
#include "main.h"
|
#include "main.h"
|
||||||
#include <Eigen/KroneckerProduct>
|
#include <Eigen/KroneckerProduct>
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user