mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-26 06:44:27 +08:00
Merged in codeplaysoftware/eigen-upstream-pure/separating_internal_memory_allocation (pull request PR-446)
Distinguishing between internal memory allocation/deallocation from explicit user memory allocation/deallocation.
This commit is contained in:
commit
0360c36170
@ -112,7 +112,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
|||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
m_result = static_cast<CoeffReturnType*>(
|
m_result = static_cast<CoeffReturnType*>(
|
||||||
m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
|
m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
|
||||||
evalTo(m_result);
|
evalTo(m_result);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -120,7 +120,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
if (m_result != NULL) {
|
if (m_result != NULL) {
|
||||||
m_device.deallocate(m_result);
|
m_device.deallocate_temp(m_result);
|
||||||
m_result = NULL;
|
m_result = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -273,7 +273,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
|||||||
evalTo(data);
|
evalTo(data);
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
|
m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
|
||||||
evalTo(m_result);
|
evalTo(m_result);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -281,7 +281,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
if (m_result != NULL) {
|
if (m_result != NULL) {
|
||||||
m_device.deallocate(m_result);
|
m_device.deallocate_temp(m_result);
|
||||||
m_result = NULL;
|
m_result = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,12 @@ struct DefaultDevice {
|
|||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
||||||
internal::aligned_free(buffer);
|
internal::aligned_free(buffer);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
|
||||||
|
return allocate(num_bytes);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const {
|
||||||
|
deallocate(buffer);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||||
::memcpy(dst, src, n);
|
::memcpy(dst, src, n);
|
||||||
|
@ -207,6 +207,15 @@ struct GpuDevice {
|
|||||||
stream_->deallocate(buffer);
|
stream_->deallocate(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
|
||||||
|
return stream_->allocate(num_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const {
|
||||||
|
stream_->deallocate(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void* scratchpad() const {
|
EIGEN_STRONG_INLINE void* scratchpad() const {
|
||||||
return stream_->scratchpad();
|
return stream_->scratchpad();
|
||||||
}
|
}
|
||||||
|
@ -105,6 +105,14 @@ struct ThreadPoolDevice {
|
|||||||
internal::aligned_free(buffer);
|
internal::aligned_free(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
|
||||||
|
return allocate(num_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const {
|
||||||
|
deallocate(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||||
::memcpy(dst, src, n);
|
::memcpy(dst, src, n);
|
||||||
}
|
}
|
||||||
|
@ -115,7 +115,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
#endif
|
#endif
|
||||||
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
||||||
const Index numValues = internal::array_prod(m_impl.dimensions());
|
const Index numValues = internal::array_prod(m_impl.dimensions());
|
||||||
m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType));
|
m_buffer = (CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType));
|
||||||
// Should initialize the memory in case we're dealing with non POD types.
|
// Should initialize the memory in case we're dealing with non POD types.
|
||||||
if (NumTraits<CoeffReturnType>::RequireInitialization) {
|
if (NumTraits<CoeffReturnType>::RequireInitialization) {
|
||||||
for (Index i = 0; i < numValues; ++i) {
|
for (Index i = 0; i < numValues; ++i) {
|
||||||
@ -129,7 +129,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
m_device.deallocate(m_buffer);
|
m_device.deallocate_temp(m_buffer);
|
||||||
m_buffer = NULL;
|
m_buffer = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -513,7 +513,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
!RunningOnGPU))) {
|
!RunningOnGPU))) {
|
||||||
bool need_assign = false;
|
bool need_assign = false;
|
||||||
if (!data) {
|
if (!data) {
|
||||||
m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType)));
|
m_result = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType)));
|
||||||
data = m_result;
|
data = m_result;
|
||||||
need_assign = true;
|
need_assign = true;
|
||||||
}
|
}
|
||||||
@ -525,7 +525,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
||||||
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
||||||
if (!data) {
|
if (!data) {
|
||||||
data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
||||||
m_result = data;
|
m_result = data;
|
||||||
}
|
}
|
||||||
Op reducer(m_reducer);
|
Op reducer(m_reducer);
|
||||||
@ -549,7 +549,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
||||||
if (!data) {
|
if (!data) {
|
||||||
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) {
|
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) {
|
||||||
data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
||||||
m_result = data;
|
m_result = data;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -559,7 +559,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
Op reducer(m_reducer);
|
Op reducer(m_reducer);
|
||||||
if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
|
if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
|
||||||
if (m_result) {
|
if (m_result) {
|
||||||
m_device.deallocate(m_result);
|
m_device.deallocate_temp(m_result);
|
||||||
m_result = NULL;
|
m_result = NULL;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -582,7 +582,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
||||||
if (!data) {
|
if (!data) {
|
||||||
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) {
|
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) {
|
||||||
data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
||||||
m_result = data;
|
m_result = data;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -592,7 +592,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
Op reducer(m_reducer);
|
Op reducer(m_reducer);
|
||||||
if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
|
if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
|
||||||
if (m_result) {
|
if (m_result) {
|
||||||
m_device.deallocate(m_result);
|
m_device.deallocate_temp(m_result);
|
||||||
m_result = NULL;
|
m_result = NULL;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -607,7 +607,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
m_impl.cleanup();
|
m_impl.cleanup();
|
||||||
if (m_result) {
|
if (m_result) {
|
||||||
m_device.deallocate(m_result);
|
m_device.deallocate_temp(m_result);
|
||||||
m_result = NULL;
|
m_result = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user