Made the blas utils usable from within a cuda kernel

This commit is contained in:
Benoit Steiner 2016-01-11 17:26:56 -08:00
parent c5e6900400
commit bbdabbb379
2 changed files with 25 additions and 24 deletions

View File

@ -123,18 +123,18 @@ template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::R
template<typename Scalar, typename Index> template<typename Scalar, typename Index>
class BlasVectorMapper { class BlasVectorMapper {
public: public:
EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {} EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
return m_data[i]; return m_data[i];
} }
template <typename Packet, int AlignmentType> template <typename Packet, int AlignmentType>
EIGEN_ALWAYS_INLINE Packet load(Index i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
return ploadt<Packet, AlignmentType>(m_data + i); return ploadt<Packet, AlignmentType>(m_data + i);
} }
template <typename Packet> template <typename Packet>
bool aligned(Index i) const { EIGEN_DEVICE_FUNC bool aligned(Index i) const {
return (size_t(m_data+i)%sizeof(Packet))==0; return (size_t(m_data+i)%sizeof(Packet))==0;
} }
@ -148,25 +148,25 @@ class BlasLinearMapper {
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket; typedef typename packet_traits<Scalar>::half HalfPacket;
EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {} EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
EIGEN_ALWAYS_INLINE void prefetch(int i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i)); internal::prefetch(&operator()(i));
} }
EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
return m_data[i]; return m_data[i];
} }
EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
return ploadt<Packet, AlignmentType>(m_data + i); return ploadt<Packet, AlignmentType>(m_data + i);
} }
EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
return ploadt<HalfPacket, AlignmentType>(m_data + i); return ploadt<HalfPacket, AlignmentType>(m_data + i);
} }
EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
pstoret<Scalar, Packet, AlignmentType>(m_data + i, p); pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
} }
@ -184,18 +184,18 @@ class blas_data_mapper {
typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper; typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
typedef BlasVectorMapper<Scalar, Index> VectorMapper; typedef BlasVectorMapper<Scalar, Index> VectorMapper;
EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {} EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
getSubMapper(Index i, Index j) const { getSubMapper(Index i, Index j) const {
return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride); return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
} }
EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
return LinearMapper(&operator()(i, j)); return LinearMapper(&operator()(i, j));
} }
EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
return VectorMapper(&operator()(i, j)); return VectorMapper(&operator()(i, j));
} }
@ -205,28 +205,28 @@ class blas_data_mapper {
return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
} }
EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
return ploadt<Packet, AlignmentType>(&operator()(i, j)); return ploadt<Packet, AlignmentType>(&operator()(i, j));
} }
EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
return ploadt<HalfPacket, AlignmentType>(&operator()(i, j)); return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
} }
template<typename SubPacket> template<typename SubPacket>
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride); pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
} }
template<typename SubPacket> template<typename SubPacket>
EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride); return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
} }
const Index stride() const { return m_stride; } EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
const Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
Index firstAligned(Index size) const { EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
if (size_t(m_data)%sizeof(Scalar)) { if (size_t(m_data)%sizeof(Scalar)) {
return -1; return -1;
} }

View File

@ -524,7 +524,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_align
* \sa first_default_aligned() * \sa first_default_aligned()
*/ */
template<int Alignment, typename Scalar, typename Index> template<int Alignment, typename Scalar, typename Index>
inline Index first_aligned(const Scalar* array, Index size) EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
{ {
static const Index ScalarSize = sizeof(Scalar); static const Index ScalarSize = sizeof(Scalar);
static const Index AlignmentSize = Alignment / ScalarSize; static const Index AlignmentSize = Alignment / ScalarSize;
@ -544,14 +544,15 @@ inline Index first_aligned(const Scalar* array, Index size)
} }
else else
{ {
return std::min<Index>( (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask, size); Index first = (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
return (first < size) ? first : size;
} }
} }
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement. /** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
* \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */ * \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
template<typename Scalar, typename Index> template<typename Scalar, typename Index>
inline Index first_default_aligned(const Scalar* array, Index size) EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
{ {
typedef typename packet_traits<Scalar>::type DefaultPacketType; typedef typename packet_traits<Scalar>::type DefaultPacketType;
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size); return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);