diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 58f682dce..c603b5a6d 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -517,7 +517,9 @@ template class DenseBase template RealScalar lpNorm() const; template + EIGEN_DEVICE_FUNC const Replicate replicate() const; + EIGEN_DEVICE_FUNC const Replicate replicate(Index rowFacor,Index colFactor) const; typedef Reverse ReverseReturnType; diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h index d105de62b..518c52e57 100644 --- a/Eigen/src/Core/Replicate.h +++ b/Eigen/src/Core/Replicate.h @@ -69,6 +69,7 @@ template class Replicate typedef typename internal::remove_all::type NestedExpression; template + EIGEN_DEVICE_FUNC inline explicit Replicate(const OriginalMatrixType& matrix) : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) { @@ -78,6 +79,7 @@ template class Replicate } template + EIGEN_DEVICE_FUNC inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor) : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) { @@ -85,9 +87,12 @@ template class Replicate THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); } + EIGEN_DEVICE_FUNC const _MatrixTypeNested& nestedExpression() const { return m_matrix; diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 5ebcdc100..0cc5eff16 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -461,6 +461,7 @@ template class VectorwiseOp */ // NOTE implemented here because of sunstudio's compilation errors template const Replicate + EIGEN_DEVICE_FUNC replicate(Index factor = Factor) const { return Replicate diff --git a/test/cuda_basic.cu b/test/cuda_basic.cu index 6ec9446c6..571a52299 100644 --- a/test/cuda_basic.cu +++ b/test/cuda_basic.cu @@ -47,6 +47,23 @@ struct coeff_wise { } }; +template +struct replicate { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + using namespace Eigen; + T x1(in+i); + int step = x1.size() * 4; + int stride = 3 * step; + + typedef Map > MapType; + MapType(out+i*stride+0*step, x1.rows()*2, x1.cols()*2) = x1.replicate(2,2); + MapType(out+i*stride+1*step, x1.rows()*3, x1.cols()) = in[i] * x1.colwise().replicate(3); + MapType(out+i*stride+2*step, x1.rows(), x1.cols()*3) = in[i] * x1.rowwise().replicate(3); + } +}; + template struct redux { EIGEN_DEVICE_FUNC @@ -117,7 +134,7 @@ void test_cuda_basic() Eigen::VectorXf in, out; #ifndef __CUDA_ARCH__ - int data_size = nthreads * 16; + int data_size = nthreads * 512; in.setRandom(data_size); out.setRandom(data_size); #endif @@ -125,6 +142,9 @@ void test_cuda_basic() CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise(), nthreads, in, out) ); CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(replicate(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(replicate(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(redux(), nthreads, in, out) ); CALL_SUBTEST( run_and_compare_to_cuda(redux(), nthreads, in, out) );