Added ability to access the cache sizes from the tensor devices

2025-09-14 18:33:16 +08:00 · 2016-04-14 21:25:06 -07:00 · 2016-04-14 21:25:06 -07:00 · a62e924656
commit a62e924656
parent 18e6f67426
3 changed files with 39 additions and 0 deletions
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@ -2224,6 +2224,16 @@ inline std::ptrdiff_t l2CacheSize()
  return l2;
 }
 /** \returns the currently set level 3 cpu cache size (in bytes) used to estimate the ideal blocking size paramete\
 rs.                                                                                                                
 * \sa setCpuCacheSize */
 inline std::ptrdiff_t l3CacheSize()
 {
  std::ptrdiff_t l1, l2, l3;
  internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
  return l3;
 }
 /** Set the cpu L1 and L2 cache sizes (in bytes).
  * These values are use to adjust the size of the blocks
  * for the algorithms working per blocks.
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@ -44,6 +44,26 @@ struct DefaultDevice {
 #endif
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
 #ifndef __CUDA_ARCH__
    // Running on the host CPU
    return l1CacheSize();
 #else
    // Running on a CUDA device, return the amount of shared memory available.
    return 48*1024;
 #endif
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
 #ifndef __CUDA_ARCH__
    // Running single threaded on the host CPU
    return l3CacheSize();
 #else
    // Running on a CUDA device
    return firstLevelCacheSize();
 #endif
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
 #ifndef __CUDA_ARCH__
    // Running single threaded on the host CPU
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@ -128,6 +128,15 @@ struct ThreadPoolDevice {
    return num_threads_;
  }
  EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
    return l1CacheSize();
  }
  EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
    // The l3 cache size is shared between all the cores.
    return l3CacheSize() / num_threads_;
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
    // Should return an enum that encodes the ISA supported by the CPU
    return 1;