Removed unnecessary thread synchronization

This commit is contained in:
Benoit Steiner 2016-05-13 10:49:38 -07:00
parent 7aa3557d31
commit c4fc8b70ec

View File

@ -177,7 +177,7 @@ static __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self
} }
half2 accum = reducer.template initializePacket<half2>(); half2 accum = reducer.template initializePacket<half2>();
Index max_iter = numext::mini<Index>((num_coeffs - first_index) / 2, NumPerThread*BlockSize / 2); const Index max_iter = numext::mini<Index>((num_coeffs - first_index) / 2, NumPerThread*BlockSize / 2);
for (Index i = 0; i < max_iter; i += BlockSize) { for (Index i = 0; i < max_iter; i += BlockSize) {
const Index index = first_index + 2*i; const Index index = first_index + 2*i;
eigen_assert(index + 1 < num_coeffs); eigen_assert(index + 1 < num_coeffs);
@ -357,8 +357,6 @@ __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index nu
atomicReduce(&(output[row]), reduced_val, reducer); atomicReduce(&(output[row]), reduced_val, reducer);
} }
} }
__syncthreads();
} }
} }