mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Fix bugs exposed by enabling GPU asserts.
This commit is contained in:
parent
ab8725d947
commit
17ae83a966
@ -745,7 +745,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Index redux_length() const
|
EIGEN_DEVICE_FUNC Index redux_length() const
|
||||||
{
|
{
|
||||||
return Direction==Vertical ? m_matrix.rows() : m_matrix.cols();
|
return Direction==Vertical ? m_matrix.rows() : m_matrix.cols();
|
||||||
}
|
}
|
||||||
|
@ -586,7 +586,7 @@ struct extract_data_selector {
|
|||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
struct extract_data_selector<T,false> {
|
struct extract_data_selector<T,false> {
|
||||||
static typename T::Scalar* run(const T&) { return 0; }
|
EIGEN_DEVICE_FUNC static typename T::Scalar* run(const T&) { return 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
@ -123,7 +123,9 @@ class IndexMapper {
|
|||||||
inputIndex += idx * m_inputStrides[d];
|
inputIndex += idx * m_inputStrides[d];
|
||||||
p -= idx * m_gpuInputStrides[d];
|
p -= idx * m_gpuInputStrides[d];
|
||||||
}
|
}
|
||||||
|
if (NumKernelDims < NumDims) {
|
||||||
inputIndex += p * m_inputStrides[NumKernelDims];
|
inputIndex += p * m_inputStrides[NumKernelDims];
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
std::ptrdiff_t limit = 0;
|
std::ptrdiff_t limit = 0;
|
||||||
if (NumKernelDims < NumDims) {
|
if (NumKernelDims < NumDims) {
|
||||||
@ -147,7 +149,9 @@ class IndexMapper {
|
|||||||
outputIndex += idx * m_outputStrides[d];
|
outputIndex += idx * m_outputStrides[d];
|
||||||
p -= idx * m_gpuOutputStrides[d];
|
p -= idx * m_gpuOutputStrides[d];
|
||||||
}
|
}
|
||||||
|
if (NumKernelDims < NumDims) {
|
||||||
outputIndex += p * m_outputStrides[NumKernelDims];
|
outputIndex += p * m_outputStrides[NumKernelDims];
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
std::ptrdiff_t limit = 0;
|
std::ptrdiff_t limit = 0;
|
||||||
if (NumKernelDims < NumDims) {
|
if (NumKernelDims < NumDims) {
|
||||||
@ -386,7 +390,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
|
||||||
m_inputImpl.evalSubExprsIfNeeded(NULL);
|
m_inputImpl.evalSubExprsIfNeeded(NULL);
|
||||||
preloadKernel();
|
preloadKernel();
|
||||||
return true;
|
return true;
|
||||||
@ -824,7 +828,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
|
||||||
preloadKernel();
|
preloadKernel();
|
||||||
m_inputImpl.evalSubExprsIfNeeded(NULL);
|
m_inputImpl.evalSubExprsIfNeeded(NULL);
|
||||||
if (data) {
|
if (data) {
|
||||||
@ -1112,9 +1116,6 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// No assignment (copies are needed by the kernels)
|
|
||||||
TensorEvaluator& operator = (const TensorEvaluator&);
|
|
||||||
|
|
||||||
TensorEvaluator<InputArgType, GpuDevice> m_inputImpl;
|
TensorEvaluator<InputArgType, GpuDevice> m_inputImpl;
|
||||||
TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl;
|
TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl;
|
||||||
KernelArgType m_kernelArg;
|
KernelArgType m_kernelArg;
|
||||||
|
@ -23,8 +23,8 @@ template <int Layout>
|
|||||||
void test_gpu_simple_argmax()
|
void test_gpu_simple_argmax()
|
||||||
{
|
{
|
||||||
Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>(72,53,97));
|
Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>(72,53,97));
|
||||||
Tensor<DenseIndex, 1, Layout> out_max(Eigen::array<DenseIndex, 1>(1));
|
Tensor<DenseIndex, 0, Layout> out_max;
|
||||||
Tensor<DenseIndex, 1, Layout> out_min(Eigen::array<DenseIndex, 1>(1));
|
Tensor<DenseIndex, 0, Layout> out_min;
|
||||||
in.setRandom();
|
in.setRandom();
|
||||||
in *= in.constant(100.0);
|
in *= in.constant(100.0);
|
||||||
in(0, 0, 0) = -1000.0;
|
in(0, 0, 0) = -1000.0;
|
||||||
@ -46,8 +46,8 @@ void test_gpu_simple_argmax()
|
|||||||
Eigen::GpuDevice gpu_device(&stream);
|
Eigen::GpuDevice gpu_device(&stream);
|
||||||
|
|
||||||
Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 3>(72,53,97));
|
Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 3>(72,53,97));
|
||||||
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_max(d_out_max, Eigen::array<DenseIndex, 1>(1));
|
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout>, Aligned > gpu_out_max(d_out_max);
|
||||||
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_min(d_out_min, Eigen::array<DenseIndex, 1>(1));
|
Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout>, Aligned > gpu_out_min(d_out_min);
|
||||||
|
|
||||||
gpu_out_max.device(gpu_device) = gpu_in.argmax();
|
gpu_out_max.device(gpu_device) = gpu_in.argmax();
|
||||||
gpu_out_min.device(gpu_device) = gpu_in.argmin();
|
gpu_out_min.device(gpu_device) = gpu_in.argmin();
|
||||||
@ -56,8 +56,8 @@ void test_gpu_simple_argmax()
|
|||||||
assert(gpuMemcpyAsync(out_min.data(), d_out_min, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
|
assert(gpuMemcpyAsync(out_min.data(), d_out_min, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
|
||||||
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
|
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
|
||||||
|
|
||||||
VERIFY_IS_EQUAL(out_max(Eigen::array<DenseIndex, 1>(0)), 72*53*97 - 1);
|
VERIFY_IS_EQUAL(out_max(), 72*53*97 - 1);
|
||||||
VERIFY_IS_EQUAL(out_min(Eigen::array<DenseIndex, 1>(0)), 0);
|
VERIFY_IS_EQUAL(out_min(), 0);
|
||||||
|
|
||||||
gpuFree(d_in);
|
gpuFree(d_in);
|
||||||
gpuFree(d_out_max);
|
gpuFree(d_out_max);
|
||||||
|
@ -1100,9 +1100,9 @@ void test_gpu_erfc(const Scalar stddev)
|
|||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
void test_gpu_ndtri()
|
void test_gpu_ndtri()
|
||||||
{
|
{
|
||||||
Tensor<Scalar, 1> in_x(8);
|
Tensor<Scalar, 1> in_x(9);
|
||||||
Tensor<Scalar, 1> out(8);
|
Tensor<Scalar, 1> out(9);
|
||||||
Tensor<Scalar, 1> expected_out(8);
|
Tensor<Scalar, 1> expected_out(9);
|
||||||
out.setZero();
|
out.setZero();
|
||||||
|
|
||||||
in_x(0) = Scalar(1);
|
in_x(0) = Scalar(1);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user