Cleanup the cuda executor code.

This commit is contained in:
Benoit Steiner 2016-10-04 08:52:13 -07:00
parent 2f6d1607c8
commit 6af5ac7e27

View File

@ -234,16 +234,11 @@ struct EigenMetaKernelEval<Evaluator, Index, true> {
template <typename Evaluator, typename Index> template <typename Evaluator, typename Index>
__global__ void __global__ void
__launch_bounds__(1024) __launch_bounds__(1024)
EigenMetaKernel(Evaluator memcopied_eval, Index size) { EigenMetaKernel(Evaluator eval, Index size) {
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
const Index step_size = blockDim.x * gridDim.x; const Index step_size = blockDim.x * gridDim.x;
// Cuda memcopies the kernel arguments. That's fine for POD, but for more
// complex types such as evaluators we should really conform to the C++
// standard and call a proper copy constructor.
Evaluator eval(memcopied_eval);
const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned; const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned;
EigenMetaKernelEval<Evaluator, Index, vectorizable>::run(eval, first_index, size, step_size); EigenMetaKernelEval<Evaluator, Index, vectorizable>::run(eval, first_index, size, step_size);
} }