Fix for issue with static global variables in TensorDeviceGpu.h

m_deviceProperties and m_devicePropInitialized are defined as global
statics which will define multiple copies which can cause issues if
initializeDeviceProp() is called in one translation unit and then
m_deviceProperties is used in a different translation unit. Added
inline functions getDeviceProperties() and getDevicePropInitialized()
which defines those variables as static locals. As per the C++ standard
7.1.2/4, a static local declared in an inline function always refers
to the same object, so this should be safer. Credit to Sun Chenggen
for this fix.

This fixes issue #1475.
This commit is contained in:
Turing Eret 2021-04-23 07:43:35 -06:00
parent 045c0609b5
commit 3804ca0d90

View File

@ -42,11 +42,18 @@ class StreamInterface {
virtual unsigned int* semaphore() const = 0; virtual unsigned int* semaphore() const = 0;
}; };
static gpuDeviceProp_t* m_deviceProperties; EIGEN_STRONG_INLINE gpuDeviceProp_t*& getDeviceProperties() {
static bool m_devicePropInitialized = false; static gpuDeviceProp_t* deviceProperties;
return deviceProperties;
}
EIGEN_STRONG_INLINE bool& getDevicePropInitialized() {
static bool devicePropInitialized = false;
return devicePropInitialized;
}
static void initializeDeviceProp() { static void initializeDeviceProp() {
if (!m_devicePropInitialized) { if (!getDevicePropInitialized()) {
// Attempts to ensure proper behavior in the case of multiple threads // Attempts to ensure proper behavior in the case of multiple threads
// calling this function simultaneously. This would be trivial to // calling this function simultaneously. This would be trivial to
// implement if we could use std::mutex, but unfortunately mutex don't // implement if we could use std::mutex, but unfortunately mutex don't
@ -64,9 +71,9 @@ static void initializeDeviceProp() {
<< std::endl; << std::endl;
gpu_assert(status == gpuSuccess); gpu_assert(status == gpuSuccess);
} }
m_deviceProperties = new gpuDeviceProp_t[num_devices]; getDeviceProperties() = new gpuDeviceProp_t[num_devices];
for (int i = 0; i < num_devices; ++i) { for (int i = 0; i < num_devices; ++i) {
status = gpuGetDeviceProperties(&m_deviceProperties[i], i); status = gpuGetDeviceProperties(&getDeviceProperties()[i], i);
if (status != gpuSuccess) { if (status != gpuSuccess) {
std::cerr << "Failed to initialize GPU device #" std::cerr << "Failed to initialize GPU device #"
<< i << i
@ -78,10 +85,10 @@ static void initializeDeviceProp() {
} }
std::atomic_thread_fence(std::memory_order_release); std::atomic_thread_fence(std::memory_order_release);
m_devicePropInitialized = true; getDevicePropInitialized() = true;
} else { } else {
// Wait for the other thread to inititialize the properties. // Wait for the other thread to inititialize the properties.
while (!m_devicePropInitialized) { while (!getDevicePropInitialized()) {
std::atomic_thread_fence(std::memory_order_acquire); std::atomic_thread_fence(std::memory_order_acquire);
std::this_thread::sleep_for(std::chrono::milliseconds(1000)); std::this_thread::sleep_for(std::chrono::milliseconds(1000));
} }
@ -129,7 +136,7 @@ class GpuStreamDevice : public StreamInterface {
const gpuStream_t& stream() const { return *stream_; } const gpuStream_t& stream() const { return *stream_; }
const gpuDeviceProp_t& deviceProperties() const { const gpuDeviceProp_t& deviceProperties() const {
return m_deviceProperties[device_]; return getDeviceProperties()[device_];
} }
virtual void* allocate(size_t num_bytes) const { virtual void* allocate(size_t num_bytes) const {
gpuError_t err = gpuSetDevice(device_); gpuError_t err = gpuSetDevice(device_);