Avoid unnecessary conversions

This commit is contained in:
Benoit Steiner 2016-03-23 16:52:38 -07:00
parent 92693b50eb
commit 41434a8a85
2 changed files with 5 additions and 5 deletions

View File

@ -128,7 +128,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
const unsigned int numblocks = static_cast<unsigned int>(size / blocksize); const unsigned int numblocks = static_cast<unsigned int>(size / blocksize);
Barrier barrier(numblocks); Barrier barrier(numblocks);
for (int i = 0; i < numblocks; ++i) { for (unsigned int i = 0; i < numblocks; ++i) {
device.enqueue_with_barrier(&barrier, &EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize); device.enqueue_with_barrier(&barrier, &EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize);
} }

View File

@ -258,7 +258,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, false> {
Barrier barrier(numblocks); Barrier barrier(numblocks);
MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize()); MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
for (Index i = 0; i < numblocks; ++i) { for (unsigned int i = 0; i < numblocks; ++i) {
device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, false>::run, self, device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, false>::run, self,
i * blocksize, blocksize, reducer, &shards[i]); i * blocksize, blocksize, reducer, &shards[i]);
} }
@ -271,7 +271,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, false> {
finalShard = reducer.initialize(); finalShard = reducer.initialize();
} }
barrier.Wait(); barrier.Wait();
for (Index i = 0; i < numblocks; ++i) { for (unsigned int i = 0; i < numblocks; ++i) {
reducer.reduce(shards[i], &finalShard); reducer.reduce(shards[i], &finalShard);
} }
*output = reducer.finalize(finalShard); *output = reducer.finalize(finalShard);
@ -305,7 +305,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, true> {
Barrier barrier(numblocks); Barrier barrier(numblocks);
MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize()); MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
for (Index i = 0; i < numblocks; ++i) { for (unsigned int i = 0; i < numblocks; ++i) {
device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, true>::run, device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, true>::run,
self, i * blocksize, blocksize, reducer, self, i * blocksize, blocksize, reducer,
&shards[i]); &shards[i]);
@ -319,7 +319,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, true> {
} }
barrier.Wait(); barrier.Wait();
for (Index i = 0; i < numblocks; ++i) { for (unsigned int i = 0; i < numblocks; ++i) {
reducer.reduce(shards[i], &finalShard); reducer.reduce(shards[i], &finalShard);
} }
*output = reducer.finalize(finalShard); *output = reducer.finalize(finalShard);