Misc small improvements to the reduction code.

This commit is contained in:
Benoit Steiner 2016-06-06 14:09:46 -07:00
parent ea75dba201
commit 7ef9f47b58

View File

@ -130,15 +130,17 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
if (block == 0) { if (block == 0) {
// We're the first block to run, initialize the output value // We're the first block to run, initialize the output value
atomicExch(output, reducer.initialize()); atomicExch(output, reducer.initialize());
unsigned int old = atomicExch(semaphore, 2u); __threadfence();
assert(old == 1u); atomicExch(semaphore, 2u);
} }
else { else {
// Wait for the first block to initialize the output value.
// Use atomicCAS here to ensure that the reads aren't cached // Use atomicCAS here to ensure that the reads aren't cached
unsigned int val = atomicCAS(semaphore, 2u, 2u); unsigned int val;
while (val < 2u) { do {
val = atomicCAS(semaphore, 2u, 2u); val = atomicCAS(semaphore, 2u, 2u);
} }
while (val < 2u);
} }
} }
} }
@ -166,12 +168,8 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
} }
if (gridDim.x > 1 && threadIdx.x == 0) { if (gridDim.x > 1 && threadIdx.x == 0) {
unsigned int ticket = atomicInc(semaphore, UINT_MAX); // Let the last block reset the semaphore
assert(ticket >= 2u); atomicInc(semaphore, gridDim.x + 1);
if (ticket == gridDim.x + 1) {
// We're the last block, reset the semaphore
*semaphore = 0;
}
} }
} }