mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 12:46:00 +08:00
Refined the #ifdef __CUDACC__ guard to ensure that when trying to compile gpu code with a non cuda compiler results in a linking error instead of bogus code.
This commit is contained in:
parent
ac99b49249
commit
9ea39ce13c
@ -106,7 +106,7 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPool
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
#if defined(EIGEN_USE_GPU)
|
||||||
template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
|
template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -287,6 +287,7 @@ class StreamInterface {
|
|||||||
virtual void deallocate(void* buffer) const = 0;
|
virtual void deallocate(void* buffer) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(__CUDACC__)
|
||||||
static cudaDeviceProp* m_deviceProperties;
|
static cudaDeviceProp* m_deviceProperties;
|
||||||
static bool m_devicePropInitialized = false;
|
static bool m_devicePropInitialized = false;
|
||||||
|
|
||||||
@ -362,7 +363,7 @@ class CudaStreamDevice : public StreamInterface {
|
|||||||
const cudaStream_t* stream_;
|
const cudaStream_t* stream_;
|
||||||
int device_;
|
int device_;
|
||||||
};
|
};
|
||||||
|
#endif // __CUDACC__
|
||||||
|
|
||||||
struct GpuDevice {
|
struct GpuDevice {
|
||||||
// The StreamInterface is not owned: the caller is
|
// The StreamInterface is not owned: the caller is
|
||||||
@ -450,7 +451,7 @@ struct GpuDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
|
||||||
#ifndef __CUDA_ARCH__
|
#if defined(__CUDACC__) && !defined(__CUDA_ARCH__)
|
||||||
cudaError_t err = cudaStreamSynchronize(stream_->stream());
|
cudaError_t err = cudaStreamSynchronize(stream_->stream());
|
||||||
assert(err == cudaSuccess);
|
assert(err == cudaSuccess);
|
||||||
#else
|
#else
|
||||||
@ -477,8 +478,12 @@ struct GpuDevice {
|
|||||||
// This function checks if the CUDA runtime recorded an error for the
|
// This function checks if the CUDA runtime recorded an error for the
|
||||||
// underlying stream device.
|
// underlying stream device.
|
||||||
inline bool ok() const {
|
inline bool ok() const {
|
||||||
|
#ifdef __CUDACC__
|
||||||
cudaError_t error = cudaStreamQuery(stream_->stream());
|
cudaError_t error = cudaStreamQuery(stream_->stream());
|
||||||
return (error == cudaSuccess) || (error == cudaErrorNotReady);
|
return (error == cudaSuccess) || (error == cudaErrorNotReady);
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -493,10 +498,12 @@ struct GpuDevice {
|
|||||||
|
|
||||||
|
|
||||||
// FIXME: Should be device and kernel specific.
|
// FIXME: Should be device and kernel specific.
|
||||||
|
#ifdef __CUDACC__
|
||||||
static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
|
static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
|
||||||
cudaError_t status = cudaDeviceSetSharedMemConfig(config);
|
cudaError_t status = cudaDeviceSetSharedMemConfig(config);
|
||||||
assert(status == cudaSuccess);
|
assert(status == cudaSuccess);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -149,7 +149,24 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
|
|||||||
|
|
||||||
|
|
||||||
// GPU: the evaluation of the expression is offloaded to a GPU.
|
// GPU: the evaluation of the expression is offloaded to a GPU.
|
||||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
#if defined(EIGEN_USE_GPU)
|
||||||
|
|
||||||
|
template <typename Expression>
|
||||||
|
class TensorExecutor<Expression, GpuDevice, false> {
|
||||||
|
public:
|
||||||
|
typedef typename Expression::Index Index;
|
||||||
|
static void run(const Expression& expr, const GpuDevice& device);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Expression>
|
||||||
|
class TensorExecutor<Expression, GpuDevice, true> {
|
||||||
|
public:
|
||||||
|
typedef typename Expression::Index Index;
|
||||||
|
static void run(const Expression& expr, const GpuDevice& device);
|
||||||
|
};
|
||||||
|
|
||||||
|
#if defined(__CUDACC__)
|
||||||
|
|
||||||
template <typename Evaluator, typename Index>
|
template <typename Evaluator, typename Index>
|
||||||
__global__ void
|
__global__ void
|
||||||
__launch_bounds__(1024)
|
__launch_bounds__(1024)
|
||||||
@ -193,13 +210,9 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*static*/
|
||||||
template <typename Expression>
|
template <typename Expression>
|
||||||
class TensorExecutor<Expression, GpuDevice, false>
|
inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression& expr, const GpuDevice& device)
|
||||||
{
|
|
||||||
public:
|
|
||||||
typedef typename Expression::Index Index;
|
|
||||||
static inline void run(const Expression& expr, const GpuDevice& device)
|
|
||||||
{
|
{
|
||||||
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
|
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
|
||||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||||
@ -212,14 +225,11 @@ class TensorExecutor<Expression, GpuDevice, false>
|
|||||||
}
|
}
|
||||||
evaluator.cleanup();
|
evaluator.cleanup();
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
/*static*/
|
||||||
template<typename Expression>
|
template<typename Expression>
|
||||||
class TensorExecutor<Expression, GpuDevice, true>
|
inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression& expr, const GpuDevice& device)
|
||||||
{
|
|
||||||
public:
|
|
||||||
typedef typename Expression::Index Index;
|
|
||||||
static inline void run(const Expression& expr, const GpuDevice& device)
|
|
||||||
{
|
{
|
||||||
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
|
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
|
||||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||||
@ -232,9 +242,9 @@ class TensorExecutor<Expression, GpuDevice, true>
|
|||||||
}
|
}
|
||||||
evaluator.cleanup();
|
evaluator.cleanup();
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
#endif // __CUDACC__
|
||||||
|
#endif // EIGEN_USE_GPU
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user