mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-31 17:22:07 +08:00
Added ability to access the cache sizes from the tensor devices
This commit is contained in:
parent
18e6f67426
commit
a62e924656
@ -2224,6 +2224,16 @@ inline std::ptrdiff_t l2CacheSize()
|
|||||||
return l2;
|
return l2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \returns the currently set level 3 cpu cache size (in bytes) used to estimate the ideal blocking size paramete\
|
||||||
|
rs.
|
||||||
|
* \sa setCpuCacheSize */
|
||||||
|
inline std::ptrdiff_t l3CacheSize()
|
||||||
|
{
|
||||||
|
std::ptrdiff_t l1, l2, l3;
|
||||||
|
internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
||||||
|
return l3;
|
||||||
|
}
|
||||||
|
|
||||||
/** Set the cpu L1 and L2 cache sizes (in bytes).
|
/** Set the cpu L1 and L2 cache sizes (in bytes).
|
||||||
* These values are use to adjust the size of the blocks
|
* These values are use to adjust the size of the blocks
|
||||||
* for the algorithms working per blocks.
|
* for the algorithms working per blocks.
|
||||||
|
@ -44,6 +44,26 @@ struct DefaultDevice {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
// Running on the host CPU
|
||||||
|
return l1CacheSize();
|
||||||
|
#else
|
||||||
|
// Running on a CUDA device, return the amount of shared memory available.
|
||||||
|
return 48*1024;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
// Running single threaded on the host CPU
|
||||||
|
return l3CacheSize();
|
||||||
|
#else
|
||||||
|
// Running on a CUDA device
|
||||||
|
return firstLevelCacheSize();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||||
#ifndef __CUDA_ARCH__
|
#ifndef __CUDA_ARCH__
|
||||||
// Running single threaded on the host CPU
|
// Running single threaded on the host CPU
|
||||||
|
@ -128,6 +128,15 @@ struct ThreadPoolDevice {
|
|||||||
return num_threads_;
|
return num_threads_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
|
||||||
|
return l1CacheSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
|
||||||
|
// The l3 cache size is shared between all the cores.
|
||||||
|
return l3CacheSize() / num_threads_;
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||||
// Should return an enum that encodes the ISA supported by the CPU
|
// Should return an enum that encodes the ISA supported by the CPU
|
||||||
return 1;
|
return 1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user