diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index d1c659858..61a4e1a3a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -667,8 +667,8 @@ struct TensorContractionEvaluatorBase this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); this->template evalGemmPartial(buffer, - 0, k, 1); + rhs_inner_dim_reordered, + Alignment, true>(buffer, 0, k, 1); } template + template EIGEN_DEVICE_FUNC void evalGemmPartial(Scalar* buffer, Index k_start, Index k_end, int num_threads) const { eigen_assert(k_end >= k_start && k_start >= 0 && k_end <= this->m_k_size); // columns in slice on left side, rows on right side