Use device's allocate function instead of internal::aligned_malloc. This would make it easier to track memory usage in device instances.

This commit is contained in:
Yuefeng Zhou 2018-02-20 16:50:05 -08:00
parent adb134d47e
commit 1eff6cf8a7

View File

@ -377,7 +377,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align; divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align;
size_t rhs_size = size_t rhs_size =
divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align; divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align;
packed_mem_ = static_cast<char*>(internal::aligned_malloc( packed_mem_ = static_cast<char*>(device_.allocate(
(nm0_ * lhs_size + nn0_ * rhs_size) * std::min<size_t>(nk_, P - 1))); (nm0_ * lhs_size + nn0_ * rhs_size) * std::min<size_t>(nk_, P - 1)));
char* mem = static_cast<char*>(packed_mem_); char* mem = static_cast<char*>(packed_mem_);
for (Index x = 0; x < numext::mini<Index>(nk_, P - 1); x++) { for (Index x = 0; x < numext::mini<Index>(nk_, P - 1); x++) {
@ -399,7 +399,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m]; for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m];
delete[] state_kernel_[x]; delete[] state_kernel_[x];
} }
internal::aligned_free(packed_mem_); device_.deallocate(packed_mem_);
} }
void run() { void run() {