Deleted some unecessary and confusing EIGEN_DEVICE_FUNC

2025-09-20 13:23:15 +08:00 · 2016-09-19 11:33:39 -07:00 · 2016-09-19 11:33:39 -07:00 · c3ca9b1e76
commit c3ca9b1e76
parent bf03820339
1 changed files with 18 additions and 75 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@ -168,39 +168,20 @@ struct GpuDevice {
    return stream_->stream();
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
    return stream_->allocate(num_bytes);
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-    return NULL;
-#endif
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
    stream_->deallocate(buffer);
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* scratchpad() const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE void* scratchpad() const {
    return stream_->scratchpad();
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-    return NULL;
-#endif
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned int* semaphore() const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE unsigned int* semaphore() const {
    return stream_->semaphore();
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-    return NULL;
-#endif
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
@ -214,26 +195,18 @@ struct GpuDevice {
 #endif
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
    cudaError_t err =
        cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream());
    EIGEN_UNUSED_VARIABLE(err)
    assert(err == cudaSuccess);
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
    cudaError_t err =
        cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream());
    EIGEN_UNUSED_VARIABLE(err)
    assert(err == cudaSuccess);
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
@ -246,17 +219,17 @@ struct GpuDevice {
 #endif
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
+  EIGEN_STRONG_INLINE size_t numThreads() const {
    // FIXME
    return 32;
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
+  EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
    // FIXME
    return 48*1024;
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
+  EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
    // We won't try to take advantage of the l2 cache for the time being, and
    // there is no l3 cache on cuda devices.
    return firstLevelCacheSize();
@ -276,56 +249,26 @@ struct GpuDevice {
 #endif
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
    return stream_->deviceProperties().multiProcessorCount;
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-    return 0;
-#endif
  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
    return stream_->deviceProperties().maxThreadsPerBlock;
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-    return 0;
-#endif
  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
    return stream_->deviceProperties().maxThreadsPerMultiProcessor;
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-    return 0;
-#endif
  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
    return stream_->deviceProperties().sharedMemPerBlock;
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-    return 0;
-#endif
  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE int majorDeviceVersion() const {
    return stream_->deviceProperties().major;
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-    return 0;
-#endif
  }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int minorDeviceVersion() const {
-#ifndef __CUDA_ARCH__
+  EIGEN_STRONG_INLINE int minorDeviceVersion() const {
    return stream_->deviceProperties().minor;
-#else
-    eigen_assert(false && "The default device should be used instead to generate kernel code");
-    return 0;
-#endif
  }

-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
+  EIGEN_STRONG_INLINE int maxBlocks() const {
    return max_blocks_;
  }