diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index b004f76ce..762cbfc3d 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -745,7 +745,7 @@ template class VectorwiseOp # endif protected: - Index redux_length() const + EIGEN_DEVICE_FUNC Index redux_length() const { return Direction==Vertical ? m_matrix.rows() : m_matrix.cols(); } diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 483297356..979d974a7 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -586,7 +586,7 @@ struct extract_data_selector { template struct extract_data_selector { - static typename T::Scalar* run(const T&) { return 0; } + EIGEN_DEVICE_FUNC static typename T::Scalar* run(const T&) { return 0; } }; template diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 70d21292a..e6e586b7b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -123,7 +123,9 @@ class IndexMapper { inputIndex += idx * m_inputStrides[d]; p -= idx * m_gpuInputStrides[d]; } - inputIndex += p * m_inputStrides[NumKernelDims]; + if (NumKernelDims < NumDims) { + inputIndex += p * m_inputStrides[NumKernelDims]; + } } else { std::ptrdiff_t limit = 0; if (NumKernelDims < NumDims) { @@ -147,7 +149,9 @@ class IndexMapper { outputIndex += idx * m_outputStrides[d]; p -= idx * m_gpuOutputStrides[d]; } - outputIndex += p * m_outputStrides[NumKernelDims]; + if (NumKernelDims < NumDims) { + outputIndex += p * m_outputStrides[NumKernelDims]; + } } else { std::ptrdiff_t limit = 0; if (NumKernelDims < NumDims) { @@ -386,7 +390,7 @@ struct TensorEvaluator m_inputImpl; TensorEvaluator m_kernelImpl; KernelArgType m_kernelArg; diff --git a/unsupported/test/cxx11_tensor_argmax_gpu.cu b/unsupported/test/cxx11_tensor_argmax_gpu.cu index 79f4066e9..d9d5da43d 100644 --- a/unsupported/test/cxx11_tensor_argmax_gpu.cu +++ b/unsupported/test/cxx11_tensor_argmax_gpu.cu @@ -23,8 +23,8 @@ template void test_gpu_simple_argmax() { Tensor in(Eigen::array(72,53,97)); - Tensor out_max(Eigen::array(1)); - Tensor out_min(Eigen::array(1)); + Tensor out_max; + Tensor out_min; in.setRandom(); in *= in.constant(100.0); in(0, 0, 0) = -1000.0; @@ -46,8 +46,8 @@ void test_gpu_simple_argmax() Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap, Aligned > gpu_in(d_in, Eigen::array(72,53,97)); - Eigen::TensorMap, Aligned > gpu_out_max(d_out_max, Eigen::array(1)); - Eigen::TensorMap, Aligned > gpu_out_min(d_out_min, Eigen::array(1)); + Eigen::TensorMap, Aligned > gpu_out_max(d_out_max); + Eigen::TensorMap, Aligned > gpu_out_min(d_out_min); gpu_out_max.device(gpu_device) = gpu_in.argmax(); gpu_out_min.device(gpu_device) = gpu_in.argmin(); @@ -56,8 +56,8 @@ void test_gpu_simple_argmax() assert(gpuMemcpyAsync(out_min.data(), d_out_min, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess); assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess); - VERIFY_IS_EQUAL(out_max(Eigen::array(0)), 72*53*97 - 1); - VERIFY_IS_EQUAL(out_min(Eigen::array(0)), 0); + VERIFY_IS_EQUAL(out_max(), 72*53*97 - 1); + VERIFY_IS_EQUAL(out_min(), 0); gpuFree(d_in); gpuFree(d_out_max); diff --git a/unsupported/test/cxx11_tensor_gpu.cu b/unsupported/test/cxx11_tensor_gpu.cu index 7b3fb5ab1..31baf1bb7 100644 --- a/unsupported/test/cxx11_tensor_gpu.cu +++ b/unsupported/test/cxx11_tensor_gpu.cu @@ -1100,9 +1100,9 @@ void test_gpu_erfc(const Scalar stddev) template void test_gpu_ndtri() { - Tensor in_x(8); - Tensor out(8); - Tensor expected_out(8); + Tensor in_x(9); + Tensor out(9); + Tensor expected_out(9); out.setZero(); in_x(0) = Scalar(1);