Fix shorten-64-to-32 warning. Use regular memcpy if num_threads==0.

This commit is contained in:
Rasmus Munk Larsen 2018-12-12 14:45:31 -08:00
parent f582ea3579
commit dd6d65898a

View File

@ -87,13 +87,13 @@ struct ThreadPoolDevice {
const size_t kMinBlockSize = 32768; const size_t kMinBlockSize = 32768;
typedef TensorCostModel<ThreadPoolDevice> CostModel; typedef TensorCostModel<ThreadPoolDevice> CostModel;
const size_t num_threads = CostModel::numThreads(n, TensorOpCost(1.0, 1.0, 0), 4); const size_t num_threads = CostModel::numThreads(n, TensorOpCost(1.0, 1.0, 0), 4);
if (n <= kMinBlockSize || num_threads == 1) { if (n <= kMinBlockSize || num_threads < 2) {
::memcpy(dst, src, n); ::memcpy(dst, src, n);
} else { } else {
const char* src_ptr = static_cast<const char*>(src); const char* src_ptr = static_cast<const char*>(src);
char* dst_ptr = static_cast<char*>(dst); char* dst_ptr = static_cast<char*>(dst);
const size_t blocksize = (n + (num_threads - 1)) / num_threads; const size_t blocksize = (n + (num_threads - 1)) / num_threads;
Barrier barrier(num_threads - 1); Barrier barrier(static_cast<int>(num_threads - 1));
// Launch the last 3 blocks on worker threads. // Launch the last 3 blocks on worker threads.
for (size_t i = 1; i < num_threads; ++i) { for (size_t i = 1; i < num_threads; ++i) {
enqueue_with_barrier(&barrier, [n, i, src_ptr, dst_ptr, blocksize] { enqueue_with_barrier(&barrier, [n, i, src_ptr, dst_ptr, blocksize] {