mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 19:59:05 +08:00
Ensured that each thread has it's own copy of the TensorEvaluator: this avoid race conditions when the evaluator calls a non thread safe functor, eg when generating random numbers.
This commit is contained in:
parent
8a382aa119
commit
6559d09c60
@ -77,17 +77,17 @@ class TensorExecutor<Expression, DefaultDevice, true>
|
|||||||
#ifdef EIGEN_USE_THREADS
|
#ifdef EIGEN_USE_THREADS
|
||||||
template <typename Evaluator, typename Index, bool Vectorizable = Evaluator::PacketAccess>
|
template <typename Evaluator, typename Index, bool Vectorizable = Evaluator::PacketAccess>
|
||||||
struct EvalRange {
|
struct EvalRange {
|
||||||
static void run(Evaluator* evaluator, const Index first, const Index last) {
|
static void run(Evaluator evaluator, const Index first, const Index last) {
|
||||||
eigen_assert(last > first);
|
eigen_assert(last > first);
|
||||||
for (Index i = first; i < last; ++i) {
|
for (Index i = first; i < last; ++i) {
|
||||||
evaluator->evalScalar(i);
|
evaluator.evalScalar(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Evaluator, typename Index>
|
template <typename Evaluator, typename Index>
|
||||||
struct EvalRange<Evaluator, Index, true> {
|
struct EvalRange<Evaluator, Index, true> {
|
||||||
static void run(Evaluator* evaluator, const Index first, const Index last) {
|
static void run(Evaluator evaluator, const Index first, const Index last) {
|
||||||
eigen_assert(last > first);
|
eigen_assert(last > first);
|
||||||
|
|
||||||
Index i = first;
|
Index i = first;
|
||||||
@ -96,12 +96,12 @@ struct EvalRange<Evaluator, Index, true> {
|
|||||||
eigen_assert(first % PacketSize == 0);
|
eigen_assert(first % PacketSize == 0);
|
||||||
Index lastPacket = last - (last % PacketSize);
|
Index lastPacket = last - (last % PacketSize);
|
||||||
for (; i < lastPacket; i += PacketSize) {
|
for (; i < lastPacket; i += PacketSize) {
|
||||||
evaluator->evalPacket(i);
|
evaluator.evalPacket(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; i < last; ++i) {
|
for (; i < last; ++i) {
|
||||||
evaluator->evalScalar(i);
|
evaluator.evalScalar(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -130,16 +130,17 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
|
|||||||
std::vector<Future> results;
|
std::vector<Future> results;
|
||||||
results.reserve(numblocks);
|
results.reserve(numblocks);
|
||||||
for (int i = 0; i < numblocks; ++i) {
|
for (int i = 0; i < numblocks; ++i) {
|
||||||
results.push_back(device.enqueue(&EvalRange<Evaluator, Index>::run, &evaluator, i*blocksize, (i+1)*blocksize));
|
results.push_back(device.enqueue(&EvalRange<Evaluator, Index>::run, evaluator, i*blocksize, (i+1)*blocksize));
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < numblocks; ++i) {
|
|
||||||
results[i].get();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (numblocks * blocksize < size) {
|
if (numblocks * blocksize < size) {
|
||||||
EvalRange<Evaluator, Index>::run(&evaluator, numblocks * blocksize, size);
|
EvalRange<Evaluator, Index>::run(evaluator, numblocks * blocksize, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < numblocks; ++i) {
|
||||||
|
get_when_ready(&results[i]);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
evaluator.cleanup();
|
evaluator.cleanup();
|
||||||
}
|
}
|
||||||
@ -168,7 +169,8 @@ __launch_bounds__(1024)
|
|||||||
const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
|
const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
|
||||||
const Index vectorized_step_size = step_size * PacketSize;
|
const Index vectorized_step_size = step_size * PacketSize;
|
||||||
const Index vectorized_size = (size / PacketSize) * PacketSize;
|
const Index vectorized_size = (size / PacketSize) * PacketSize;
|
||||||
for (Index i = first_index * PacketSize; i < vectorized_size; i += vectorized_step_size) {
|
for (Index i = first_index * PacketSize; i < vectorized_size;
|
||||||
|
i += vectorized_step_size) {
|
||||||
eval.evalPacket(i);
|
eval.evalPacket(i);
|
||||||
}
|
}
|
||||||
for (Index i = vectorized_size + first_index; i < size; i += step_size) {
|
for (Index i = vectorized_size + first_index; i < size; i += step_size) {
|
||||||
@ -192,8 +194,7 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable>
|
|||||||
const int block_size = maxCudaThreadsPerBlock();
|
const int block_size = maxCudaThreadsPerBlock();
|
||||||
|
|
||||||
const Index size = array_prod(evaluator.dimensions());
|
const Index size = array_prod(evaluator.dimensions());
|
||||||
EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, Index><<<num_blocks, block_size, 0, device.stream()>>>(evaluator, size);
|
LAUNCH_CUDA_KERNEL((EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size);
|
||||||
assert(cudaGetLastError() == cudaSuccess);
|
|
||||||
}
|
}
|
||||||
evaluator.cleanup();
|
evaluator.cleanup();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user