mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Fixed the tensor shuffling test
This commit is contained in:
parent
a991f94c0e
commit
4b36c3591f
@ -37,8 +37,7 @@ template <typename Index> struct IndexPair {
|
|||||||
Index second;
|
Index second;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Boilerplate code
|
||||||
// Boiler plate code
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
template<std::size_t n, typename Dimension> struct dget {
|
template<std::size_t n, typename Dimension> struct dget {
|
||||||
@ -110,6 +109,11 @@ struct Sizes : internal::numeric_list<std::size_t, Indices...> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename std::size_t... Indices>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<Indices...>&) {
|
||||||
|
return Sizes<Indices...>::total_size;
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
template <std::size_t n>
|
template <std::size_t n>
|
||||||
@ -136,9 +140,21 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
|
|||||||
// todo: add assertion
|
// todo: add assertion
|
||||||
}
|
}
|
||||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||||
|
template <typename... DenseIndex> Sizes(DenseIndex... indices) { }
|
||||||
explicit Sizes(std::initializer_list<std::size_t> l) {
|
explicit Sizes(std::initializer_list<std::size_t> l) {
|
||||||
// todo: add assertion
|
// todo: add assertion
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0) {
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1) {
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template <typename T> Sizes& operator = (const T& other) {
|
template <typename T> Sizes& operator = (const T& other) {
|
||||||
@ -156,9 +172,14 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
|
||||||
|
return Sizes<V1, V2, V3, V4, V5>::total_size;
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Boiler plate
|
// Boilerplate
|
||||||
namespace internal {
|
namespace internal {
|
||||||
template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
|
template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
|
||||||
struct tensor_index_linearization_helper
|
struct tensor_index_linearization_helper
|
||||||
@ -243,6 +264,112 @@ struct DSizes : array<DenseIndex, NumDims> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Boilerplate
|
||||||
|
namespace internal {
|
||||||
|
template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
|
||||||
|
struct tensor_vsize_index_linearization_helper
|
||||||
|
{
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const& dimensions)
|
||||||
|
{
|
||||||
|
return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) +
|
||||||
|
array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) *
|
||||||
|
tensor_vsize_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Index, std::size_t NumIndices, bool RowMajor>
|
||||||
|
struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor>
|
||||||
|
{
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const&)
|
||||||
|
{
|
||||||
|
return array_get<RowMajor ? 0 : NumIndices - 1>(indices);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
template <typename DenseIndex>
|
||||||
|
struct VSizes : std::vector<DenseIndex> {
|
||||||
|
typedef std::vector<DenseIndex> Base;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() const {
|
||||||
|
return internal::array_prod(*static_cast<const Base*>(this));
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC VSizes() { }
|
||||||
|
EIGEN_DEVICE_FUNC explicit VSizes(const std::vector<DenseIndex>& a) : Base(a) { }
|
||||||
|
|
||||||
|
template <std::size_t NumDims>
|
||||||
|
EIGEN_DEVICE_FUNC explicit VSizes(const array<DenseIndex, NumDims>& a) {
|
||||||
|
this->resize(NumDims);
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
(*this)[i] = a[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0) {
|
||||||
|
this->resize(1);
|
||||||
|
(*this)[0] = i0;
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1) {
|
||||||
|
this->resize(2);
|
||||||
|
(*this)[0] = i0;
|
||||||
|
(*this)[1] = i1;
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
|
||||||
|
this->resize(3);
|
||||||
|
(*this)[0] = i0;
|
||||||
|
(*this)[1] = i1;
|
||||||
|
(*this)[2] = i2;
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
|
||||||
|
this->resize(4);
|
||||||
|
(*this)[0] = i0;
|
||||||
|
(*this)[1] = i1;
|
||||||
|
(*this)[2] = i2;
|
||||||
|
(*this)[3] = i3;
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
|
||||||
|
this->resize(5);
|
||||||
|
(*this)[0] = i0;
|
||||||
|
(*this)[1] = i1;
|
||||||
|
(*this)[2] = i2;
|
||||||
|
(*this)[3] = i3;
|
||||||
|
(*this)[4] = i4;
|
||||||
|
}
|
||||||
|
|
||||||
|
VSizes& operator = (const std::vector<DenseIndex>& other) {
|
||||||
|
*static_cast<Base*>(this) = other;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A constexpr would be so much better here
|
||||||
|
template <std::size_t NumDims>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array<DenseIndex, NumDims>& indices) const {
|
||||||
|
return internal::tensor_vsize_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, false>::run(indices, *static_cast<const Base*>(this));
|
||||||
|
}
|
||||||
|
template <std::size_t NumDims>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const {
|
||||||
|
return internal::tensor_vsize_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Boilerplate
|
||||||
|
namespace internal {
|
||||||
|
template <typename DenseIndex>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex array_prod(const VSizes<DenseIndex>& sizes) {
|
||||||
|
DenseIndex total_size = 1;
|
||||||
|
for (int i = 0; i < sizes.size(); ++i) {
|
||||||
|
total_size *= sizes[i];
|
||||||
|
}
|
||||||
|
return total_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
template <typename DenseIndex, std::size_t NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > {
|
template <typename DenseIndex, std::size_t NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > {
|
||||||
|
@ -39,7 +39,7 @@ class TensorExecutor
|
|||||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||||
if (needs_assign)
|
if (needs_assign)
|
||||||
{
|
{
|
||||||
const Index size = evaluator.dimensions().TotalSize();
|
const Index size = array_prod(evaluator.dimensions());
|
||||||
for (Index i = 0; i < size; ++i) {
|
for (Index i = 0; i < size; ++i) {
|
||||||
evaluator.evalScalar(i);
|
evaluator.evalScalar(i);
|
||||||
}
|
}
|
||||||
@ -60,7 +60,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
|
|||||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||||
if (needs_assign)
|
if (needs_assign)
|
||||||
{
|
{
|
||||||
const Index size = evaluator.dimensions().TotalSize();
|
const Index size = array_prod(evaluator.dimensions());
|
||||||
static const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
|
static const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
|
||||||
const int VectorizedSize = (size / PacketSize) * PacketSize;
|
const int VectorizedSize = (size / PacketSize) * PacketSize;
|
||||||
|
|
||||||
@ -122,7 +122,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
|
|||||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||||
if (needs_assign)
|
if (needs_assign)
|
||||||
{
|
{
|
||||||
const Index size = evaluator.dimensions().TotalSize();
|
const Index size = array_prod(evaluator.dimensions());
|
||||||
|
|
||||||
static const int PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1;
|
static const int PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1;
|
||||||
|
|
||||||
@ -176,7 +176,7 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable>
|
|||||||
const int num_blocks = getNumCudaMultiProcessors() * maxCudaThreadsPerMultiProcessor() / maxCudaThreadsPerBlock();
|
const int num_blocks = getNumCudaMultiProcessors() * maxCudaThreadsPerMultiProcessor() / maxCudaThreadsPerBlock();
|
||||||
const int block_size = maxCudaThreadsPerBlock();
|
const int block_size = maxCudaThreadsPerBlock();
|
||||||
|
|
||||||
const Index size = evaluator.dimensions().TotalSize();
|
const Index size = array_prod(evaluator.dimensions());
|
||||||
EigenMetaKernel<TensorEvaluator<Expression, GpuDevice> > <<<num_blocks, block_size, 0, device.stream()>>>(evaluator, size);
|
EigenMetaKernel<TensorEvaluator<Expression, GpuDevice> > <<<num_blocks, block_size, 0, device.stream()>>>(evaluator, size);
|
||||||
assert(cudaGetLastError() == cudaSuccess);
|
assert(cudaGetLastError() == cudaSuccess);
|
||||||
}
|
}
|
||||||
|
@ -119,7 +119,7 @@ if(EIGEN_TEST_CXX11)
|
|||||||
ei_add_test(cxx11_tensor_morphing "-std=c++0x")
|
ei_add_test(cxx11_tensor_morphing "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_padding "-std=c++0x")
|
ei_add_test(cxx11_tensor_padding "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_reduction "-std=c++0x")
|
ei_add_test(cxx11_tensor_reduction "-std=c++0x")
|
||||||
# ei_add_test(cxx11_tensor_shuffling "-std=c++0x")
|
ei_add_test(cxx11_tensor_shuffling "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_striding "-std=c++0x")
|
ei_add_test(cxx11_tensor_striding "-std=c++0x")
|
||||||
# ei_add_test(cxx11_tensor_device "-std=c++0x")
|
# ei_add_test(cxx11_tensor_device "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_thread_pool "-std=c++0x")
|
ei_add_test(cxx11_tensor_thread_pool "-std=c++0x")
|
||||||
|
@ -179,7 +179,7 @@ static void test_array()
|
|||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
for (int j = 0; j < 3; ++j) {
|
for (int j = 0; j < 3; ++j) {
|
||||||
for (int k = 0; k < 7; ++k) {
|
for (int k = 0; k < 7; ++k) {
|
||||||
VERIFY_IS_APPROX(mat3(array<ptrdiff_t, 3>(i,j,k)), powf(val, 3.5f));
|
VERIFY_IS_APPROX(mat3(i,j,k), powf(val, 3.5f));
|
||||||
val += 1.0;
|
val += 1.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include <Eigen/CXX11/Tensor>
|
#include <Eigen/CXX11/Tensor>
|
||||||
|
|
||||||
using Eigen::Tensor;
|
using Eigen::Tensor;
|
||||||
|
using Eigen::array;
|
||||||
|
|
||||||
static void test_simple_shuffling()
|
static void test_simple_shuffling()
|
||||||
{
|
{
|
||||||
@ -80,10 +81,10 @@ static void test_expr_shuffling()
|
|||||||
|
|
||||||
Tensor<float, 4> result(5,7,3,2);
|
Tensor<float, 4> result(5,7,3,2);
|
||||||
|
|
||||||
array<int, 4> src_slice_dim(Eigen::array<int, 4>(2,3,1,7));
|
array<int, 4> src_slice_dim{{2,3,1,7}};
|
||||||
array<int, 4> src_slice_start(Eigen::array<int, 4>(0,0,0,0));
|
array<int, 4> src_slice_start{{0,0,0,0}};
|
||||||
array<int, 4> dst_slice_dim(Eigen::array<int, 4>(1,7,3,2));
|
array<int, 4> dst_slice_dim{{1,7,3,2}};
|
||||||
array<int, 4> dst_slice_start(Eigen::array<int, 4>(0,0,0,0));
|
array<int, 4> dst_slice_start{{0,0,0,0}};
|
||||||
|
|
||||||
for (int i = 0; i < 5; ++i) {
|
for (int i = 0; i < 5; ++i) {
|
||||||
result.slice(dst_slice_start, dst_slice_dim) =
|
result.slice(dst_slice_start, dst_slice_dim) =
|
||||||
|
Loading…
x
Reference in New Issue
Block a user