From 6af5ac7e2749bdea7a31323855ef3b4333b91c3e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 4 Oct 2016 08:52:13 -0700 Subject: [PATCH] Cleanup the cuda executor code. --- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index a116bf17f..0cac7b179 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -234,16 +234,11 @@ struct EigenMetaKernelEval { template __global__ void __launch_bounds__(1024) -EigenMetaKernel(Evaluator memcopied_eval, Index size) { +EigenMetaKernel(Evaluator eval, Index size) { const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; const Index step_size = blockDim.x * gridDim.x; - // Cuda memcopies the kernel arguments. That's fine for POD, but for more - // complex types such as evaluators we should really conform to the C++ - // standard and call a proper copy constructor. - Evaluator eval(memcopied_eval); - const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned; EigenMetaKernelEval::run(eval, first_index, size, step_size); }