mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-01 16:24:28 +08:00
Make sure that the copy constructor of the evaluator is always called before launching the evaluation of a tensor expression on a cuda device.
This commit is contained in:
parent
e709488361
commit
dfa991cbae
@ -157,7 +157,11 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
|
|||||||
template <typename Evaluator, typename Index>
|
template <typename Evaluator, typename Index>
|
||||||
__global__ void
|
__global__ void
|
||||||
__launch_bounds__(1024)
|
__launch_bounds__(1024)
|
||||||
EigenMetaKernel_NonVectorizable(Evaluator eval, Index size) {
|
EigenMetaKernel_NonVectorizable(Evaluator memcopied_eval, Index size) {
|
||||||
|
// Cuda memcopies the kernel arguments. That's fine for POD, but for more
|
||||||
|
// complex types such as evaluators we should really conform to the C++
|
||||||
|
// standard and call a proper copy constructor.
|
||||||
|
Evaluator eval(memcopied_eval);
|
||||||
|
|
||||||
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
|
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const Index step_size = blockDim.x * gridDim.x;
|
const Index step_size = blockDim.x * gridDim.x;
|
||||||
@ -171,7 +175,11 @@ EigenMetaKernel_NonVectorizable(Evaluator eval, Index size) {
|
|||||||
template <typename Evaluator, typename Index>
|
template <typename Evaluator, typename Index>
|
||||||
__global__ void
|
__global__ void
|
||||||
__launch_bounds__(1024)
|
__launch_bounds__(1024)
|
||||||
EigenMetaKernel_Vectorizable(Evaluator eval, Index size) {
|
EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
|
||||||
|
// Cuda memcopies the kernel arguments. That's fine for POD, but for more
|
||||||
|
// complex types such as evaluators we should really conform to the C++
|
||||||
|
// standard and call a proper copy constructor.
|
||||||
|
Evaluator eval(memcopied_eval);
|
||||||
|
|
||||||
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
|
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const Index step_size = blockDim.x * gridDim.x;
|
const Index step_size = blockDim.x * gridDim.x;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user