mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Added support for fast integer divisions by a constant
Sped up tensor slicing by a factor of 3 by using these fast integer divisions.
This commit is contained in:
parent
756292f8aa
commit
33c702c79f
@ -34,6 +34,7 @@
|
|||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
|
||||||
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h"
|
||||||
|
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorBase.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorBase.h"
|
||||||
|
|
||||||
|
82
unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
Normal file
82
unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
|
||||||
|
#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
|
||||||
|
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
/** \internal
|
||||||
|
*
|
||||||
|
* \class TensorIntDiv
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Fast integer division by a constant.
|
||||||
|
*
|
||||||
|
* See the paper from Granlund and Montgomery for explanation.
|
||||||
|
* (at http://dx.doi.org/10.1145/773473.178249)
|
||||||
|
*
|
||||||
|
* \sa Tensor
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct TensorIntDivisor {
|
||||||
|
public:
|
||||||
|
TensorIntDivisor() {
|
||||||
|
multiplier = 0;
|
||||||
|
shift1 = 0;
|
||||||
|
shift2 = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must have 1 <= divider <= 2^31-1
|
||||||
|
TensorIntDivisor(const T divider) {
|
||||||
|
static const int N = 32;
|
||||||
|
eigen_assert(divider > 0);
|
||||||
|
eigen_assert(divider <= (1<<(N-1)) - 1);
|
||||||
|
|
||||||
|
// fast ln2
|
||||||
|
const int leading_zeros = __builtin_clz(divider);
|
||||||
|
const int l = N - (leading_zeros+1);
|
||||||
|
|
||||||
|
multiplier = (static_cast<uint64_t>(1) << (N+l)) / divider - (static_cast<uint64_t>(1) << N) + 1;
|
||||||
|
shift1 = (std::min)(1, l);
|
||||||
|
shift2 = (std::max)(0, l-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must have 0 <= numerator <= 2^32-1
|
||||||
|
T divide(const T numerator) const {
|
||||||
|
static const int N = 32;
|
||||||
|
eigen_assert(numerator >= 0);
|
||||||
|
eigen_assert(numerator <= (1ull<<N) - 1);
|
||||||
|
|
||||||
|
uint32_t t1 = (multiplier * numerator) >> 32;
|
||||||
|
uint32_t t = (static_cast<uint32_t>(numerator) - t1) >> shift1;
|
||||||
|
return (t1 + t) >> shift2;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
uint64_t multiplier;
|
||||||
|
int32_t shift1;
|
||||||
|
int32_t shift2;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) {
|
||||||
|
return divisor.divide(numerator);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
|
@ -305,8 +305,10 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
for (int i = 0; i < NumDims; ++i) {
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
|
m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
|
||||||
|
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
|
||||||
} else {
|
} else {
|
||||||
m_outputStrides[0] = 1;
|
m_outputStrides[0] = 1;
|
||||||
|
m_fastOutputStrides[0] = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -331,7 +333,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
{
|
{
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_fastOutputStrides[i];
|
||||||
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
|
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
|
||||||
index -= idx * m_outputStrides[i];
|
index -= idx * m_outputStrides[i];
|
||||||
}
|
}
|
||||||
@ -349,8 +351,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
Index inputIndices[] = {0, 0};
|
Index inputIndices[] = {0, 0};
|
||||||
Index indices[] = {index, index + packetSize - 1};
|
Index indices[] = {index, index + packetSize - 1};
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx0 = indices[0] / m_outputStrides[i];
|
const Index idx0 = indices[0] / m_fastOutputStrides[i];
|
||||||
const Index idx1 = indices[1] / m_outputStrides[i];
|
const Index idx1 = indices[1] / m_fastOutputStrides[i];
|
||||||
inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
|
inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
|
||||||
inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
|
inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
|
||||||
indices[0] -= idx0 * m_outputStrides[i];
|
indices[0] -= idx0 * m_outputStrides[i];
|
||||||
@ -379,6 +381,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
private:
|
private:
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
array<Index, NumDims> m_outputStrides;
|
array<Index, NumDims> m_outputStrides;
|
||||||
|
array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
|
||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
const StartIndices m_offsets;
|
const StartIndices m_offsets;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
@ -418,8 +421,10 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
for (int i = 0; i < NumDims; ++i) {
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
|
m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
|
||||||
|
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
|
||||||
} else {
|
} else {
|
||||||
m_outputStrides[0] = 1;
|
m_outputStrides[0] = 1;
|
||||||
|
m_fastOutputStrides[0] = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -444,7 +449,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
{
|
{
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_fastOutputStrides[i];
|
||||||
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
|
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
|
||||||
index -= idx * m_outputStrides[i];
|
index -= idx * m_outputStrides[i];
|
||||||
}
|
}
|
||||||
@ -460,8 +465,8 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
Index inputIndices[] = {0, 0};
|
Index inputIndices[] = {0, 0};
|
||||||
Index indices[] = {index, index + packetSize - 1};
|
Index indices[] = {index, index + packetSize - 1};
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx0 = indices[0] / m_outputStrides[i];
|
const Index idx0 = indices[0] / m_fastOutputStrides[i];
|
||||||
const Index idx1 = indices[1] / m_outputStrides[i];
|
const Index idx1 = indices[1] / m_fastOutputStrides[i];
|
||||||
inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
|
inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
|
||||||
inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
|
inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
|
||||||
indices[0] -= idx0 * m_outputStrides[i];
|
indices[0] -= idx0 * m_outputStrides[i];
|
||||||
@ -489,7 +494,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
{
|
{
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_fastOutputStrides[i];
|
||||||
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
|
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
|
||||||
index -= idx * m_outputStrides[i];
|
index -= idx * m_outputStrides[i];
|
||||||
}
|
}
|
||||||
@ -504,8 +509,8 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
Index inputIndices[] = {0, 0};
|
Index inputIndices[] = {0, 0};
|
||||||
Index indices[] = {index, index + packetSize - 1};
|
Index indices[] = {index, index + packetSize - 1};
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx0 = indices[0] / m_outputStrides[i];
|
const Index idx0 = indices[0] / m_fastOutputStrides[i];
|
||||||
const Index idx1 = indices[1] / m_outputStrides[i];
|
const Index idx1 = indices[1] / m_fastOutputStrides[i];
|
||||||
inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
|
inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
|
||||||
inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
|
inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
|
||||||
indices[0] -= idx0 * m_outputStrides[i];
|
indices[0] -= idx0 * m_outputStrides[i];
|
||||||
@ -532,6 +537,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
private:
|
private:
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
array<Index, NumDims> m_outputStrides;
|
array<Index, NumDims> m_outputStrides;
|
||||||
|
array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
|
||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
const StartIndices m_offsets;
|
const StartIndices m_offsets;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
|
@ -106,6 +106,7 @@ if(EIGEN_TEST_CXX11)
|
|||||||
ei_add_test(cxx11_tensor_convolution "-std=c++0x")
|
ei_add_test(cxx11_tensor_convolution "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_expr "-std=c++0x")
|
ei_add_test(cxx11_tensor_expr "-std=c++0x")
|
||||||
# ei_add_test(cxx11_tensor_fixed_size "-std=c++0x")
|
# ei_add_test(cxx11_tensor_fixed_size "-std=c++0x")
|
||||||
|
ei_add_test(cxx11_tensor_intdiv "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_lvalue "-std=c++0x")
|
ei_add_test(cxx11_tensor_lvalue "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_map "-std=c++0x")
|
ei_add_test(cxx11_tensor_map "-std=c++0x")
|
||||||
# ei_add_test(cxx11_tensor_morphing "-std=c++0x")
|
# ei_add_test(cxx11_tensor_morphing "-std=c++0x")
|
||||||
|
77
unsupported/test/cxx11_tensor_intdiv.cpp
Normal file
77
unsupported/test/cxx11_tensor_intdiv.cpp
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#include "main.h"
|
||||||
|
|
||||||
|
#include <Eigen/CXX11/Tensor>
|
||||||
|
|
||||||
|
|
||||||
|
static void test_signed_32bit()
|
||||||
|
{
|
||||||
|
for (int32_t i = 1; i < 25000; ++i) {
|
||||||
|
const Eigen::internal::TensorIntDivisor<int32_t> div(i);
|
||||||
|
|
||||||
|
for (int32_t j = 0; j < 25000; ++j) {
|
||||||
|
const int32_t fast_div = j / div;
|
||||||
|
const int32_t slow_div = j / i;
|
||||||
|
VERIFY_IS_EQUAL(fast_div, slow_div);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void test_unsigned_32bit()
|
||||||
|
{
|
||||||
|
for (uint32_t i = 1; i < 25000; ++i) {
|
||||||
|
const Eigen::internal::TensorIntDivisor<uint32_t> div(i);
|
||||||
|
|
||||||
|
for (uint32_t j = 0; j < 25000; ++j) {
|
||||||
|
const uint32_t fast_div = j / div;
|
||||||
|
const uint32_t slow_div = j / i;
|
||||||
|
VERIFY_IS_EQUAL(fast_div, slow_div);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void test_signed_64bit()
|
||||||
|
{
|
||||||
|
for (int64_t i = 2; i < 25000; ++i) {
|
||||||
|
const Eigen::internal::TensorIntDivisor<int64_t> div(i);
|
||||||
|
|
||||||
|
for (int64_t j = 0; j < 25000; ++j) {
|
||||||
|
const int64_t fast_div = j / div;
|
||||||
|
const int64_t slow_div = j / i;
|
||||||
|
VERIFY_IS_EQUAL(fast_div, slow_div);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void test_unsigned_64bit()
|
||||||
|
{
|
||||||
|
for (uint64_t i = 2; i < 25000; ++i) {
|
||||||
|
const Eigen::internal::TensorIntDivisor<uint64_t> div(i);
|
||||||
|
|
||||||
|
for (uint64_t j = 0; j < 25000; ++j) {
|
||||||
|
const uint64_t fast_div = j / div;
|
||||||
|
const uint64_t slow_div = j / i;
|
||||||
|
VERIFY_IS_EQUAL(fast_div, slow_div);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void test_cxx11_tensor_intdiv()
|
||||||
|
{
|
||||||
|
CALL_SUBTEST(test_signed_32bit());
|
||||||
|
CALL_SUBTEST(test_unsigned_32bit());
|
||||||
|
CALL_SUBTEST(test_signed_64bit());
|
||||||
|
CALL_SUBTEST(test_unsigned_64bit());
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user