mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Merged in rmlarsen/eigen2 (pull request PR-292)
Adds a fast memcpy function to Eigen.
This commit is contained in:
commit
e96c77668d
@ -74,6 +74,41 @@ inline void throw_std_bad_alloc()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
inline void fast_memcpy(void* dst, const void* src, size_t size) {
|
||||||
|
#if defined(__CUDA__) || defined(__ANDROID__)
|
||||||
|
::memcpy(dst, src, size);
|
||||||
|
#else
|
||||||
|
switch(size) {
|
||||||
|
// Most compilers will generate inline code for fixed sizes,
|
||||||
|
// which is significantly faster for small copies.
|
||||||
|
case 1: memcpy(dst, src, 1); break;
|
||||||
|
case 2: memcpy(dst, src, 2); break;
|
||||||
|
case 3: memcpy(dst, src, 3); break;
|
||||||
|
case 4: memcpy(dst, src, 4); break;
|
||||||
|
case 5: memcpy(dst, src, 5); break;
|
||||||
|
case 6: memcpy(dst, src, 6); break;
|
||||||
|
case 7: memcpy(dst, src, 7); break;
|
||||||
|
case 8: memcpy(dst, src, 8); break;
|
||||||
|
case 9: memcpy(dst, src, 9); break;
|
||||||
|
case 10: memcpy(dst, src, 10); break;
|
||||||
|
case 11: memcpy(dst, src, 11); break;
|
||||||
|
case 12: memcpy(dst, src, 12); break;
|
||||||
|
case 13: memcpy(dst, src, 13); break;
|
||||||
|
case 14: memcpy(dst, src, 14); break;
|
||||||
|
case 15: memcpy(dst, src, 15); break;
|
||||||
|
case 16: memcpy(dst, src, 16); break;
|
||||||
|
#ifdef EIGEN_OS_LINUX
|
||||||
|
// On Linux, memmove appears to be faster than memcpy for
|
||||||
|
// large sizes, strangely enough.
|
||||||
|
default: memmove(dst, src, size); break;
|
||||||
|
#else
|
||||||
|
default: memcpy(dst, src, size); break;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
*** Implementation of handmade aligned functions ***
|
*** Implementation of handmade aligned functions ***
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
@ -493,7 +528,7 @@ template<typename T> struct smart_copy_helper<T,true> {
|
|||||||
IntPtr size = IntPtr(end)-IntPtr(start);
|
IntPtr size = IntPtr(end)-IntPtr(start);
|
||||||
if(size==0) return;
|
if(size==0) return;
|
||||||
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
||||||
memcpy(target, start, size);
|
fast_memcpy(target, start, size);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ void pack_simple(Scalar * dst, const Scalar * src, Index cols, Index rows, Index
|
|||||||
} else {
|
} else {
|
||||||
// Naive memcpy calls
|
// Naive memcpy calls
|
||||||
for (Index col = 0; col < cols; ++col) {
|
for (Index col = 0; col < cols; ++col) {
|
||||||
memcpy(dst + col*lddst, src + col*ldsrc, rows*sizeof(Scalar));
|
internal::fast_memcpy(dst + col*lddst, src + col*ldsrc, rows*sizeof(Scalar));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,7 @@ struct DefaultDevice {
|
|||||||
internal::aligned_free(buffer);
|
internal::aligned_free(buffer);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||||
::memcpy(dst, src, n);
|
internal::fast_memcpy(dst, src, n);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
|
||||||
memcpy(dst, src, n);
|
memcpy(dst, src, n);
|
||||||
|
@ -106,7 +106,7 @@ struct ThreadPoolDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||||
::memcpy(dst, src, n);
|
internal::fast_memcpy(dst, src, n);
|
||||||
}
|
}
|
||||||
EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
|
EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
|
||||||
memcpy(dst, src, n);
|
memcpy(dst, src, n);
|
||||||
|
@ -253,7 +253,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
|||||||
// get data into line_buf
|
// get data into line_buf
|
||||||
const Index stride = m_strides[dim];
|
const Index stride = m_strides[dim];
|
||||||
if (stride == 1) {
|
if (stride == 1) {
|
||||||
memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar));
|
m_device.memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar));
|
||||||
} else {
|
} else {
|
||||||
Index offset = base_offset;
|
Index offset = base_offset;
|
||||||
for (int j = 0; j < line_len; ++j, offset += stride) {
|
for (int j = 0; j < line_len; ++j, offset += stride) {
|
||||||
@ -271,7 +271,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
|||||||
|
|
||||||
// write back
|
// write back
|
||||||
if (FFTDir == FFT_FORWARD && stride == 1) {
|
if (FFTDir == FFT_FORWARD && stride == 1) {
|
||||||
memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar));
|
m_device.memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar));
|
||||||
} else {
|
} else {
|
||||||
Index offset = base_offset;
|
Index offset = base_offset;
|
||||||
const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0);
|
const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user