mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Avoid unnecessary conversions
This commit is contained in:
parent
92693b50eb
commit
41434a8a85
@ -128,7 +128,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
|
|||||||
const unsigned int numblocks = static_cast<unsigned int>(size / blocksize);
|
const unsigned int numblocks = static_cast<unsigned int>(size / blocksize);
|
||||||
|
|
||||||
Barrier barrier(numblocks);
|
Barrier barrier(numblocks);
|
||||||
for (int i = 0; i < numblocks; ++i) {
|
for (unsigned int i = 0; i < numblocks; ++i) {
|
||||||
device.enqueue_with_barrier(&barrier, &EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize);
|
device.enqueue_with_barrier(&barrier, &EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,7 +258,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, false> {
|
|||||||
|
|
||||||
Barrier barrier(numblocks);
|
Barrier barrier(numblocks);
|
||||||
MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
|
MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
|
||||||
for (Index i = 0; i < numblocks; ++i) {
|
for (unsigned int i = 0; i < numblocks; ++i) {
|
||||||
device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, false>::run, self,
|
device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, false>::run, self,
|
||||||
i * blocksize, blocksize, reducer, &shards[i]);
|
i * blocksize, blocksize, reducer, &shards[i]);
|
||||||
}
|
}
|
||||||
@ -271,7 +271,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, false> {
|
|||||||
finalShard = reducer.initialize();
|
finalShard = reducer.initialize();
|
||||||
}
|
}
|
||||||
barrier.Wait();
|
barrier.Wait();
|
||||||
for (Index i = 0; i < numblocks; ++i) {
|
for (unsigned int i = 0; i < numblocks; ++i) {
|
||||||
reducer.reduce(shards[i], &finalShard);
|
reducer.reduce(shards[i], &finalShard);
|
||||||
}
|
}
|
||||||
*output = reducer.finalize(finalShard);
|
*output = reducer.finalize(finalShard);
|
||||||
@ -305,7 +305,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, true> {
|
|||||||
|
|
||||||
Barrier barrier(numblocks);
|
Barrier barrier(numblocks);
|
||||||
MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
|
MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
|
||||||
for (Index i = 0; i < numblocks; ++i) {
|
for (unsigned int i = 0; i < numblocks; ++i) {
|
||||||
device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, true>::run,
|
device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, true>::run,
|
||||||
self, i * blocksize, blocksize, reducer,
|
self, i * blocksize, blocksize, reducer,
|
||||||
&shards[i]);
|
&shards[i]);
|
||||||
@ -319,7 +319,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, true> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
barrier.Wait();
|
barrier.Wait();
|
||||||
for (Index i = 0; i < numblocks; ++i) {
|
for (unsigned int i = 0; i < numblocks; ++i) {
|
||||||
reducer.reduce(shards[i], &finalShard);
|
reducer.reduce(shards[i], &finalShard);
|
||||||
}
|
}
|
||||||
*output = reducer.finalize(finalShard);
|
*output = reducer.finalize(finalShard);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user