mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 04:35:57 +08:00
Silenced several compilation warnings triggered by nvcc.
This commit is contained in:
parent
2c3b13eded
commit
b523771a24
@ -10,7 +10,6 @@
|
|||||||
#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H)
|
#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H)
|
||||||
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
|
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
|
||||||
|
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
// This defines an interface that GPUDevice can take to use
|
// This defines an interface that GPUDevice can take to use
|
||||||
@ -206,20 +205,45 @@ struct GpuDevice {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int getNumCudaMultiProcessors() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
return stream_->deviceProperties().multiProcessorCount;
|
return stream_->deviceProperties().multiProcessorCount;
|
||||||
|
#else
|
||||||
|
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
inline int maxCudaThreadsPerBlock() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
return stream_->deviceProperties().maxThreadsPerBlock;
|
return stream_->deviceProperties().maxThreadsPerBlock;
|
||||||
|
#else
|
||||||
|
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
inline int maxCudaThreadsPerMultiProcessor() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
return stream_->deviceProperties().maxThreadsPerMultiProcessor;
|
return stream_->deviceProperties().maxThreadsPerMultiProcessor;
|
||||||
|
#else
|
||||||
|
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
inline int sharedMemPerBlock() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
return stream_->deviceProperties().sharedMemPerBlock;
|
return stream_->deviceProperties().sharedMemPerBlock;
|
||||||
|
#else
|
||||||
|
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
inline int majorDeviceVersion() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
return stream_->deviceProperties().major;
|
return stream_->deviceProperties().major;
|
||||||
|
#else
|
||||||
|
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function checks if the CUDA runtime recorded an error for the
|
// This function checks if the CUDA runtime recorded an error for the
|
||||||
@ -239,13 +263,13 @@ struct GpuDevice {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#ifndef __CUDA_ARCH__
|
#ifndef __CUDA_ARCH__
|
||||||
#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \
|
#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \
|
||||||
(kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
|
(kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
|
||||||
assert(cudaGetLastError() == cudaSuccess);
|
assert(cudaGetLastError() == cudaSuccess);
|
||||||
#else
|
#else
|
||||||
#define LAUNCH_CUDA_KERNEL(kernel, ...) \
|
#define LAUNCH_CUDA_KERNEL(kernel, ...) \
|
||||||
{ static const auto __attribute__((__unused__)) __makeTheKernelInstantiate = &(kernel); } \
|
{ const auto __attribute__((__unused__)) __makeTheKernelInstantiate = &(kernel); } \
|
||||||
eigen_assert(false && "Cannot launch a kernel from another kernel" __CUDA_ARCH__);
|
eigen_assert(false && "Cannot launch a kernel from another kernel" __CUDA_ARCH__);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -260,4 +284,4 @@ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
|
|||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
|
#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
|
||||||
|
@ -156,14 +156,14 @@ template <typename Expression>
|
|||||||
class TensorExecutor<Expression, GpuDevice, false> {
|
class TensorExecutor<Expression, GpuDevice, false> {
|
||||||
public:
|
public:
|
||||||
typedef typename Expression::Index Index;
|
typedef typename Expression::Index Index;
|
||||||
static void run(const Expression& expr, const GpuDevice& device);
|
static EIGEN_DEVICE_FUNC void run(const Expression& expr, const GpuDevice& device);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Expression>
|
template <typename Expression>
|
||||||
class TensorExecutor<Expression, GpuDevice, true> {
|
class TensorExecutor<Expression, GpuDevice, true> {
|
||||||
public:
|
public:
|
||||||
typedef typename Expression::Index Index;
|
typedef typename Expression::Index Index;
|
||||||
static void run(const Expression& expr, const GpuDevice& device);
|
static EIGEN_DEVICE_FUNC void run(const Expression& expr, const GpuDevice& device);
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
@ -213,7 +213,7 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
|
|||||||
|
|
||||||
/*static*/
|
/*static*/
|
||||||
template <typename Expression>
|
template <typename Expression>
|
||||||
inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression& expr, const GpuDevice& device)
|
EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression& expr, const GpuDevice& device)
|
||||||
{
|
{
|
||||||
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
|
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
|
||||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||||
@ -232,7 +232,7 @@ inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression&
|
|||||||
|
|
||||||
/*static*/
|
/*static*/
|
||||||
template<typename Expression>
|
template<typename Expression>
|
||||||
inline void TensorExecutor<Expression, GpuDevice, true>::run(const Expression& expr, const GpuDevice& device)
|
EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(const Expression& expr, const GpuDevice& device)
|
||||||
{
|
{
|
||||||
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
|
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
|
||||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||||
|
@ -115,8 +115,8 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
|||||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||||
|
|
||||||
template <typename OutputType>
|
template <typename OutputType>
|
||||||
static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
|
static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
|
||||||
eigen_assert(false && "Should only be called on floats");
|
assert(false && "Should only be called on floats");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) {
|
static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) {
|
||||||
@ -210,11 +210,11 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
|||||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||||
|
|
||||||
template <typename Device, typename OutputType>
|
template <typename Device, typename OutputType>
|
||||||
static void run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
|
static EIGEN_DEVICE_FUNC void run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
|
||||||
assert(false && "Should only be called to reduce floats on a gpu device");
|
assert(false && "Should only be called to reduce floats on a gpu device");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
|
static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
|
||||||
typedef typename Self::Index Index;
|
typedef typename Self::Index Index;
|
||||||
|
|
||||||
const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
|
const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
|
||||||
@ -264,11 +264,11 @@ struct OuterReducer<Self, Op, GpuDevice> {
|
|||||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||||
|
|
||||||
template <typename Device, typename OutputType>
|
template <typename Device, typename OutputType>
|
||||||
static void run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
|
static EIGEN_DEVICE_FUNC void run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
|
||||||
assert(false && "Should only be called to reduce floats on a gpu device");
|
assert(false && "Should only be called to reduce floats on a gpu device");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
|
static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
|
||||||
typedef typename Self::Index Index;
|
typedef typename Self::Index Index;
|
||||||
|
|
||||||
const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
|
const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user