Merged eigen/eigen into default

2025-07-06 21:25:15 +08:00 · 2016-04-22 15:05:21 +03:00 · 2016-04-22 15:05:21 +03:00 · 3f80696ae1
commit 3f80696ae1
parent e5b2ef47d5 5c372d19e3
8 changed files with 30 additions and 8 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -221,6 +221,12 @@ if(NOT MSVC)
    message(STATUS "Enabling FMA in tests/examples")
  endif()

+  option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF)
+  if(EIGEN_TEST_F16C)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
+    message(STATUS "Enabling F16C in tests/examples")
+  endif()
+
  option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF)
  if(EIGEN_TEST_ALTIVEC)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
--- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h
+++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
@ -397,6 +397,10 @@ CompleteOrthogonalDecomposition<MatrixType>& CompleteOrthogonalDecomposition<

  const Index rank = m_cpqr.rank();
  const Index cols = matrix.cols();
+  const Index rows = matrix.rows();
+  m_zCoeffs.resize((std::min)(rows, cols));
+  m_temp.resize(cols);
+
  if (rank < cols) {
    // We have reduced the (permuted) matrix to the form
    //   [R11 R12]
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@ -201,9 +201,15 @@ template <typename Device, typename T> class BenchmarkSuite {
    size_b[1] = k_/2;
    TensorMap<Tensor<T, 2>, Eigen::Aligned> B(b_, size_b);

+#ifndef EIGEN_HAS_INDEX_LIST
    Eigen::array<TensorIndex, 2> strides;
    strides[0] = 1;
    strides[1] = 2;
+#else
+    // Take advantage of cxx11 to give the compiler information it can use to
+    // optimize the code.
+    Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> > strides;
+#endif

    StartBenchmarkTiming();
    for (int iter = 0; iter < num_iters; ++iter) {
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@ -14,7 +14,13 @@ namespace Eigen {

 // Use the SimpleThreadPool by default. We'll switch to the new non blocking
 // thread pool later.
+#ifdef EIGEN_USE_NONBLOCKING_THREAD_POOL
+template <typename Env> using ThreadPoolTempl = NonBlockingThreadPoolTempl<Env>;
+typedef NonBlockingThreadPool ThreadPool;
+#else
+template <typename Env> using ThreadPoolTempl = SimpleThreadPoolTempl<Env>;
 typedef SimpleThreadPool ThreadPool;
+#endif


 // Barrier is an object that allows one or more threads to wait until
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@ -135,7 +135,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> {
    {
      const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1;
      const Index size = array_prod(evaluator.dimensions());
-      int num_threads = device.numThreads();
+      size_t num_threads = device.numThreads();
 #ifdef EIGEN_USE_COST_MODEL
      if (num_threads > 1) {
        num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
--- a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
@ -168,7 +168,7 @@ class RunQueue {
      // larger than it is during concurrent modifications. E.g. pop can
      // decrement size before the corresponding push has incremented it.
      // So the computed size can be up to kSize + 1, fix it.
-      if (size > kSize) size = kSize;
+      if (size > static_cast<int>(kSize)) size = kSize;
      return size;
    }
  }
--- a/unsupported/test/cxx11_eventcount.cpp
+++ b/unsupported/test/cxx11_eventcount.cpp
@ -95,7 +95,7 @@ static void test_stress_eventcount()
          ec.Notify(false);
          continue;
        }
-        std::this_thread::yield();
+        EIGEN_THREAD_YIELD();
        j--;
      }
    }));
--- a/unsupported/test/cxx11_runqueue.cpp
+++ b/unsupported/test/cxx11_runqueue.cpp
@ -30,11 +30,11 @@ void test_basic_runqueue()
  RunQueue<int, 4> q;
  // Check empty state.
  VERIFY(q.Empty());
-  VERIFY_IS_EQUAL(0, q.Size());
+  VERIFY_IS_EQUAL(0u, q.Size());
  VERIFY_IS_EQUAL(0, q.PopFront());
  std::vector<int> stolen;
  VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen));
-  VERIFY_IS_EQUAL(0, stolen.size());
+  VERIFY_IS_EQUAL(0u, stolen.size());
  // Push one front, pop one front.
  VERIFY_IS_EQUAL(0, q.PushFront(1));
  VERIFY_IS_EQUAL(1, q.Size());
@ -184,7 +184,7 @@ void test_stress_runqueue()
          sum += j;
          continue;
        }
-        std::this_thread::yield();
+        EIGEN_THREAD_YIELD();
        j--;
      }
      total += sum;
@ -194,7 +194,7 @@ void test_stress_runqueue()
      std::vector<int> stolen;
      for (int j = 1; j < kEvents;) {
        if (q.PopBackHalf(&stolen) == 0) {
-          std::this_thread::yield();
+          EIGEN_THREAD_YIELD();
          continue;
        }
        while (stolen.size() && j < kEvents) {
@ -209,7 +209,7 @@ void test_stress_runqueue()
        int v = stolen.back();
        stolen.pop_back();
        VERIFY_IS_NOT_EQUAL(v, 0);
-        while ((v = q.PushBack(v)) != 0) std::this_thread::yield();
+        while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD();
      }
      total -= sum;
    }));