mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Improved partial reductions in more cases
This commit is contained in:
parent
32d95e86c9
commit
c6b0de2c21
@ -492,7 +492,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Attempt to use an optimized reduction.
|
// Attempt to use an optimized reduction.
|
||||||
else if (RunningOnGPU && data && (m_device.majorDeviceVersion() >= 3)) {
|
else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) {
|
||||||
bool reducing_inner_dims = true;
|
bool reducing_inner_dims = true;
|
||||||
for (int i = 0; i < NumReducedDims; ++i) {
|
for (int i = 0; i < NumReducedDims; ++i) {
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
@ -505,8 +505,12 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
(reducing_inner_dims || ReducingInnerMostDims)) {
|
(reducing_inner_dims || ReducingInnerMostDims)) {
|
||||||
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
||||||
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
||||||
|
if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve) {
|
||||||
|
data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
||||||
|
m_result = data;
|
||||||
|
}
|
||||||
Op reducer(m_reducer);
|
Op reducer(m_reducer);
|
||||||
return internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve);
|
return internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve) || (m_result != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool preserving_inner_dims = true;
|
bool preserving_inner_dims = true;
|
||||||
@ -521,8 +525,12 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
preserving_inner_dims) {
|
preserving_inner_dims) {
|
||||||
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
||||||
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
||||||
|
if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve) {
|
||||||
|
data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
||||||
|
m_result = data;
|
||||||
|
}
|
||||||
Op reducer(m_reducer);
|
Op reducer(m_reducer);
|
||||||
return internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve);
|
return internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve) || (m_result != NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -537,8 +545,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
if (RunningFullReduction && m_result) {
|
if ((RunningFullReduction || RunningOnGPU) && m_result) {
|
||||||
return *m_result;
|
return *(m_result + index);
|
||||||
}
|
}
|
||||||
Op reducer(m_reducer);
|
Op reducer(m_reducer);
|
||||||
if (ReducingInnerMostDims || RunningFullReduction) {
|
if (ReducingInnerMostDims || RunningFullReduction) {
|
||||||
@ -560,6 +568,10 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||||
eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions())));
|
eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions())));
|
||||||
|
|
||||||
|
if (RunningOnGPU && m_result) {
|
||||||
|
return internal::pload<PacketReturnType>(m_result + index);
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
if (ReducingInnerMostDims) {
|
if (ReducingInnerMostDims) {
|
||||||
const Index num_values_to_reduce =
|
const Index num_values_to_reduce =
|
||||||
|
@ -239,6 +239,33 @@ static void test_simple_reductions() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <int DataLayout>
|
||||||
|
static void test_reductions_in_expr() {
|
||||||
|
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
|
||||||
|
tensor.setRandom();
|
||||||
|
array<ptrdiff_t, 2> reduction_axis2;
|
||||||
|
reduction_axis2[0] = 1;
|
||||||
|
reduction_axis2[1] = 3;
|
||||||
|
|
||||||
|
Tensor<float, 2, DataLayout> result(2, 5);
|
||||||
|
result = result.constant(1.0f) - tensor.sum(reduction_axis2);
|
||||||
|
VERIFY_IS_EQUAL(result.dimension(0), 2);
|
||||||
|
VERIFY_IS_EQUAL(result.dimension(1), 5);
|
||||||
|
for (int i = 0; i < 2; ++i) {
|
||||||
|
for (int j = 0; j < 5; ++j) {
|
||||||
|
float sum = 0.0f;
|
||||||
|
for (int k = 0; k < 3; ++k) {
|
||||||
|
for (int l = 0; l < 7; ++l) {
|
||||||
|
sum += tensor(i, k, j, l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VERIFY_IS_APPROX(result(i, j), 1.0f - sum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template <int DataLayout>
|
template <int DataLayout>
|
||||||
static void test_full_reductions() {
|
static void test_full_reductions() {
|
||||||
Tensor<float, 2, DataLayout> tensor(2, 3);
|
Tensor<float, 2, DataLayout> tensor(2, 3);
|
||||||
@ -462,6 +489,8 @@ void test_cxx11_tensor_reduction() {
|
|||||||
CALL_SUBTEST(test_trivial_reductions<RowMajor>());
|
CALL_SUBTEST(test_trivial_reductions<RowMajor>());
|
||||||
CALL_SUBTEST(test_simple_reductions<ColMajor>());
|
CALL_SUBTEST(test_simple_reductions<ColMajor>());
|
||||||
CALL_SUBTEST(test_simple_reductions<RowMajor>());
|
CALL_SUBTEST(test_simple_reductions<RowMajor>());
|
||||||
|
CALL_SUBTEST(test_reductions_in_expr<ColMajor>());
|
||||||
|
CALL_SUBTEST(test_reductions_in_expr<RowMajor>());
|
||||||
CALL_SUBTEST(test_full_reductions<ColMajor>());
|
CALL_SUBTEST(test_full_reductions<ColMajor>());
|
||||||
CALL_SUBTEST(test_full_reductions<RowMajor>());
|
CALL_SUBTEST(test_full_reductions<RowMajor>());
|
||||||
CALL_SUBTEST(test_user_defined_reductions<ColMajor>());
|
CALL_SUBTEST(test_user_defined_reductions<ColMajor>());
|
||||||
|
Loading…
x
Reference in New Issue
Block a user