Fix bugs exposed by enabling GPU asserts.

This commit is contained in:
Antonio Sánchez 2023-01-27 21:43:00 +00:00 committed by Rasmus Munk Larsen
parent ab8725d947
commit 17ae83a966
5 changed files with 19 additions and 18 deletions

View File

@ -745,7 +745,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
# endif # endif
protected: protected:
Index redux_length() const EIGEN_DEVICE_FUNC Index redux_length() const
{ {
return Direction==Vertical ? m_matrix.rows() : m_matrix.cols(); return Direction==Vertical ? m_matrix.rows() : m_matrix.cols();
} }

View File

@ -586,7 +586,7 @@ struct extract_data_selector {
template<typename T> template<typename T>
struct extract_data_selector<T,false> { struct extract_data_selector<T,false> {
static typename T::Scalar* run(const T&) { return 0; } EIGEN_DEVICE_FUNC static typename T::Scalar* run(const T&) { return 0; }
}; };
template<typename T> template<typename T>

View File

@ -123,7 +123,9 @@ class IndexMapper {
inputIndex += idx * m_inputStrides[d]; inputIndex += idx * m_inputStrides[d];
p -= idx * m_gpuInputStrides[d]; p -= idx * m_gpuInputStrides[d];
} }
inputIndex += p * m_inputStrides[NumKernelDims]; if (NumKernelDims < NumDims) {
inputIndex += p * m_inputStrides[NumKernelDims];
}
} else { } else {
std::ptrdiff_t limit = 0; std::ptrdiff_t limit = 0;
if (NumKernelDims < NumDims) { if (NumKernelDims < NumDims) {
@ -147,7 +149,9 @@ class IndexMapper {
outputIndex += idx * m_outputStrides[d]; outputIndex += idx * m_outputStrides[d];
p -= idx * m_gpuOutputStrides[d]; p -= idx * m_gpuOutputStrides[d];
} }
outputIndex += p * m_outputStrides[NumKernelDims]; if (NumKernelDims < NumDims) {
outputIndex += p * m_outputStrides[NumKernelDims];
}
} else { } else {
std::ptrdiff_t limit = 0; std::ptrdiff_t limit = 0;
if (NumKernelDims < NumDims) { if (NumKernelDims < NumDims) {
@ -386,7 +390,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
m_inputImpl.evalSubExprsIfNeeded(NULL); m_inputImpl.evalSubExprsIfNeeded(NULL);
preloadKernel(); preloadKernel();
return true; return true;
@ -824,7 +828,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
preloadKernel(); preloadKernel();
m_inputImpl.evalSubExprsIfNeeded(NULL); m_inputImpl.evalSubExprsIfNeeded(NULL);
if (data) { if (data) {
@ -1112,9 +1116,6 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
} }
private: private:
// No assignment (copies are needed by the kernels)
TensorEvaluator& operator = (const TensorEvaluator&);
TensorEvaluator<InputArgType, GpuDevice> m_inputImpl; TensorEvaluator<InputArgType, GpuDevice> m_inputImpl;
TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl; TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl;
KernelArgType m_kernelArg; KernelArgType m_kernelArg;

View File

@ -23,8 +23,8 @@ template <int Layout>
void test_gpu_simple_argmax() void test_gpu_simple_argmax()
{ {
Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>(72,53,97)); Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>(72,53,97));
Tensor<DenseIndex, 1, Layout> out_max(Eigen::array<DenseIndex, 1>(1)); Tensor<DenseIndex, 0, Layout> out_max;
Tensor<DenseIndex, 1, Layout> out_min(Eigen::array<DenseIndex, 1>(1)); Tensor<DenseIndex, 0, Layout> out_min;
in.setRandom(); in.setRandom();
in *= in.constant(100.0); in *= in.constant(100.0);
in(0, 0, 0) = -1000.0; in(0, 0, 0) = -1000.0;
@ -46,8 +46,8 @@ void test_gpu_simple_argmax()
Eigen::GpuDevice gpu_device(&stream); Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 3>(72,53,97)); Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 3>(72,53,97));
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_max(d_out_max, Eigen::array<DenseIndex, 1>(1)); Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout>, Aligned > gpu_out_max(d_out_max);
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_min(d_out_min, Eigen::array<DenseIndex, 1>(1)); Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout>, Aligned > gpu_out_min(d_out_min);
gpu_out_max.device(gpu_device) = gpu_in.argmax(); gpu_out_max.device(gpu_device) = gpu_in.argmax();
gpu_out_min.device(gpu_device) = gpu_in.argmin(); gpu_out_min.device(gpu_device) = gpu_in.argmin();
@ -56,8 +56,8 @@ void test_gpu_simple_argmax()
assert(gpuMemcpyAsync(out_min.data(), d_out_min, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess); assert(gpuMemcpyAsync(out_min.data(), d_out_min, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess); assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
VERIFY_IS_EQUAL(out_max(Eigen::array<DenseIndex, 1>(0)), 72*53*97 - 1); VERIFY_IS_EQUAL(out_max(), 72*53*97 - 1);
VERIFY_IS_EQUAL(out_min(Eigen::array<DenseIndex, 1>(0)), 0); VERIFY_IS_EQUAL(out_min(), 0);
gpuFree(d_in); gpuFree(d_in);
gpuFree(d_out_max); gpuFree(d_out_max);

View File

@ -1100,9 +1100,9 @@ void test_gpu_erfc(const Scalar stddev)
template <typename Scalar> template <typename Scalar>
void test_gpu_ndtri() void test_gpu_ndtri()
{ {
Tensor<Scalar, 1> in_x(8); Tensor<Scalar, 1> in_x(9);
Tensor<Scalar, 1> out(8); Tensor<Scalar, 1> out(9);
Tensor<Scalar, 1> expected_out(8); Tensor<Scalar, 1> expected_out(9);
out.setZero(); out.setZero();
in_x(0) = Scalar(1); in_x(0) = Scalar(1);