Minor cleanups: 1. Get rid of unused variables. 2. Get rid of last uses of EIGEN_USE_COST_MODEL.

2025-10-18 19:11:30 +08:00 · 2016-05-18 15:09:48 -07:00 · 2016-05-18 15:09:48 -07:00 · 7df811cfe5
commit 7df811cfe5
parent 86ae94462e
4 changed files with 6 additions and 27 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@ -568,10 +568,6 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
          (parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)) +
          nm_ * nn_;
      if (k < nk_) {
        // It is important to copy out nm_ and nn_, because once we kick off
        // the last packing operation this and device_ can be destroyed.
        Index nm = nm_;
        Index nn = nn_;
        // Issue lhs/rhs packing. Their completion will in turn kick off
        // kernels.
        if (parallel_pack_) {
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
@ -10,9 +10,6 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
 #define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
 // Turn on the cost model by default
 #define EIGEN_USE_COST_MODEL
 namespace Eigen {
 /** \class TensorEvaluator
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@ -150,9 +150,8 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> {
    const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
    if (needs_assign)
    {
      const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1;
      const Index size = array_prod(evaluator.dimensions());
-#if !defined(EIGEN_USE_SIMPLE_THREAD_POOL) && defined(EIGEN_USE_COST_MODEL)
+#if !defined(EIGEN_USE_SIMPLE_THREAD_POOL)
      device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
                         EvalRange<Evaluator, Index, Vectorizable>::alignBlockSize,
                         [&evaluator](Index first, Index last) {
@ -160,15 +159,14 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> {
                         });
 #else
      size_t num_threads = device.numThreads();
 #ifdef EIGEN_USE_COST_MODEL
      if (num_threads > 1) {
        num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
            size, evaluator.costPerCoeff(Vectorizable), num_threads);
      }
 #endif
      if (num_threads == 1) {
        EvalRange<Evaluator, Index, Vectorizable>::run(&evaluator, 0, size);
      } else {
        const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1;
        Index blocksz = std::ceil<Index>(static_cast<float>(size)/num_threads) + PacketSize - 1;
        const Index blocksize = numext::maxi<Index>(PacketSize, (blocksz - (blocksz % PacketSize)));
        const Index numblocks = size / blocksize;
@ -185,7 +183,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> {
        }
        barrier.Wait();
      }
-#endif  // defined(EIGEN_USE_NONBLOCKING_THREAD_POOL) && defined(EIGEN_USE_COST_MODEL)
+#endif
    }
    evaluator.cleanup();
  }
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@ -248,16 +248,12 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
      *output = reducer.finalize(reducer.initialize());
      return;
    }
 #ifdef EIGEN_USE_COST_MODEL
    const TensorOpCost cost =
        self.m_impl.costPerCoeff(Vectorizable) +
        TensorOpCost(0, 0, internal::functor_traits<Op>::Cost, Vectorizable,
                     PacketSize);
    const int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
        num_coeffs, cost, device.numThreads());
 #else
    const int num_threads = device.numThreads();
 #endif
    if (num_threads == 1) {
      *output =
          InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer);
@ -472,22 +468,14 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
  static bool size_large_enough(Index total_size) {
 #ifndef EIGEN_USE_COST_MODEL
    return total_size > 1024 * 1024;
 #else
    return true || total_size;
 #endif
  }
  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool evalSubExprsIfNeeded(CoeffReturnType* data) {
    m_impl.evalSubExprsIfNeeded(NULL);
    // Use the FullReducer if possible.
-    if (RunningFullReduction && internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation &&
+    if (RunningFullReduction &&
        internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation &&
        ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) ||
-         (!RunningOnGPU && size_large_enough(internal::array_prod(m_impl.dimensions()))))) {
+         !RunningOnGPU)) {
      bool need_assign = false;
      if (!data) {
        m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType)));