From 997c3359705062d7b3fc64907bb357ec03c0a418 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 17 May 2016 12:54:18 -0700 Subject: [PATCH] Fixed compilation error --- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 749cb8edd..4ffb41fdd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -93,10 +93,16 @@ struct EvalRange { evaluator.evalScalar(i); } } + + static Index alignBlockSize(Index size) { + return size; + } }; template struct EvalRange { + static const int PacketSize = unpacket_traits::size; + static void run(Evaluator* evaluator_in, const Index first, const Index last) { Evaluator evaluator = *evaluator_in; eigen_assert(last >= first); @@ -122,6 +128,15 @@ struct EvalRange { evaluator.evalScalar(i); } } + + static Index alignBlockSize(Index size) { + // Align block size to packet size and account for unrolling in run above. + if (size >= 16 * PacketSize) { + return (size + 4 * PacketSize - 1) & ~(4 * PacketSize - 1); + } + // Aligning to 4 * PacketSize would increase block size by more than 25%. + return (size + PacketSize - 1) & ~(PacketSize - 1); + } }; template @@ -139,9 +154,9 @@ class TensorExecutor { const Index size = array_prod(evaluator.dimensions()); #if defined(EIGEN_USE_NONBLOCKING_THREAD_POOL) && defined(EIGEN_USE_COST_MODEL) device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), - EvalRange::alignBlockSize, + EvalRange::alignBlockSize, [&evaluator](Index first, Index last) { - EvalRange::run(&evaluator, first, last); + EvalRange::run(&evaluator, first, last); }); #else size_t num_threads = device.numThreads();