Deleted some unecessary and confusing EIGEN_DEVICE_FUNC

This commit is contained in:
Benoit Steiner 2016-09-19 11:33:39 -07:00
parent bf03820339
commit c3ca9b1e76

View File

@ -168,39 +168,20 @@ struct GpuDevice {
return stream_->stream(); return stream_->stream();
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
#ifndef __CUDA_ARCH__
return stream_->allocate(num_bytes); return stream_->allocate(num_bytes);
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return NULL;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
#ifndef __CUDA_ARCH__
stream_->deallocate(buffer); stream_->deallocate(buffer);
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* scratchpad() const { EIGEN_STRONG_INLINE void* scratchpad() const {
#ifndef __CUDA_ARCH__
return stream_->scratchpad(); return stream_->scratchpad();
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return NULL;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned int* semaphore() const { EIGEN_STRONG_INLINE unsigned int* semaphore() const {
#ifndef __CUDA_ARCH__
return stream_->semaphore(); return stream_->semaphore();
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return NULL;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
@ -210,30 +191,22 @@ struct GpuDevice {
EIGEN_UNUSED_VARIABLE(err) EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess); assert(err == cudaSuccess);
#else #else
eigen_assert(false && "The default device should be used instead to generate kernel code"); eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif #endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
#ifndef __CUDA_ARCH__
cudaError_t err = cudaError_t err =
cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream());
EIGEN_UNUSED_VARIABLE(err) EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess); assert(err == cudaSuccess);
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
#ifndef __CUDA_ARCH__
cudaError_t err = cudaError_t err =
cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream());
EIGEN_UNUSED_VARIABLE(err) EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess); assert(err == cudaSuccess);
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
@ -242,21 +215,21 @@ struct GpuDevice {
EIGEN_UNUSED_VARIABLE(err) EIGEN_UNUSED_VARIABLE(err)
assert(err == cudaSuccess); assert(err == cudaSuccess);
#else #else
eigen_assert(false && "The default device should be used instead to generate kernel code"); eigen_assert(false && "The default device should be used instead to generate kernel code");
#endif #endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { EIGEN_STRONG_INLINE size_t numThreads() const {
// FIXME // FIXME
return 32; return 32;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
// FIXME // FIXME
return 48*1024; return 48*1024;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
// We won't try to take advantage of the l2 cache for the time being, and // We won't try to take advantage of the l2 cache for the time being, and
// there is no l3 cache on cuda devices. // there is no l3 cache on cuda devices.
return firstLevelCacheSize(); return firstLevelCacheSize();
@ -276,56 +249,26 @@ struct GpuDevice {
#endif #endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().multiProcessorCount; return stream_->deviceProperties().multiProcessorCount;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().maxThreadsPerBlock; return stream_->deviceProperties().maxThreadsPerBlock;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().maxThreadsPerMultiProcessor; return stream_->deviceProperties().maxThreadsPerMultiProcessor;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const { EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().sharedMemPerBlock; return stream_->deviceProperties().sharedMemPerBlock;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { EIGEN_STRONG_INLINE int majorDeviceVersion() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().major; return stream_->deviceProperties().major;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int minorDeviceVersion() const { EIGEN_STRONG_INLINE int minorDeviceVersion() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().minor; return stream_->deviceProperties().minor;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const { EIGEN_STRONG_INLINE int maxBlocks() const {
return max_blocks_; return max_blocks_;
} }