diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h index 1655a813e..f108c349f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h @@ -55,11 +55,11 @@ EIGEN_DEVICE_FUNC uint64_t get_random_seed() { #endif } -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) { +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state, uint64_t stream) { // TODO: Unify with the implementation in the non blocking thread pool. uint64_t current = *state; // Update the internal state - *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; + *state = current * 6364136223846793005ULL + (stream << 1 | 1); // Generate the random output (using the PCG-XSH-RS scheme) return static_cast((current ^ (current >> 22)) >> (22 + (current >> 61))); } @@ -73,17 +73,17 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -T RandomToTypeUniform(uint64_t* state) { - unsigned rnd = PCG_XSH_RS_generator(state); +T RandomToTypeUniform(uint64_t* state, uint64_t stream) { + unsigned rnd = PCG_XSH_RS_generator(state, stream); return static_cast(rnd); } template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -Eigen::half RandomToTypeUniform(uint64_t* state) { +Eigen::half RandomToTypeUniform(uint64_t* state, uint64_t stream) { Eigen::half result; // Generate 10 random bits for the mantissa - unsigned rnd = PCG_XSH_RS_generator(state); + unsigned rnd = PCG_XSH_RS_generator(state, stream); result.x = static_cast(rnd & 0x3ffu); // Set the exponent result.x |= (static_cast(15) << 10); @@ -93,14 +93,14 @@ Eigen::half RandomToTypeUniform(uint64_t* state) { template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float RandomToTypeUniform(uint64_t* state) { +float RandomToTypeUniform(uint64_t* state, uint64_t stream) { typedef union { uint32_t raw; float fp; } internal; internal result; // Generate 23 random bits for the mantissa mantissa - const unsigned rnd = PCG_XSH_RS_generator(state); + const unsigned rnd = PCG_XSH_RS_generator(state, stream); result.raw = rnd & 0x7fffffu; // Set the exponent result.raw |= (static_cast(127) << 23); @@ -109,7 +109,7 @@ float RandomToTypeUniform(uint64_t* state) { } template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double RandomToTypeUniform(uint64_t* state) { +double RandomToTypeUniform(uint64_t* state, uint64_t stream) { typedef union { uint64_t raw; double dp; @@ -118,9 +118,9 @@ double RandomToTypeUniform(uint64_t* state) { result.raw = 0; // Generate 52 random bits for the mantissa // First generate the upper 20 bits - unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu; + unsigned rnd1 = PCG_XSH_RS_generator(state, stream) & 0xfffffu; // The generate the lower 32 bits - unsigned rnd2 = PCG_XSH_RS_generator(state); + unsigned rnd2 = PCG_XSH_RS_generator(state, stream); result.raw = (static_cast(rnd1) << 32) | rnd2; // Set the exponent result.raw |= (static_cast(1023) << 52); @@ -129,14 +129,14 @@ double RandomToTypeUniform(uint64_t* state) { } template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -std::complex RandomToTypeUniform >(uint64_t* state) { - return std::complex(RandomToTypeUniform(state), - RandomToTypeUniform(state)); +std::complex RandomToTypeUniform >(uint64_t* state, uint64_t stream) { + return std::complex(RandomToTypeUniform(state, stream), + RandomToTypeUniform(state, stream)); } template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -std::complex RandomToTypeUniform >(uint64_t* state) { - return std::complex(RandomToTypeUniform(state), - RandomToTypeUniform(state)); +std::complex RandomToTypeUniform >(uint64_t* state, uint64_t stream) { + return std::complex(RandomToTypeUniform(state, stream), + RandomToTypeUniform(state, stream)); } template class UniformRandomGenerator { @@ -155,9 +155,7 @@ template class UniformRandomGenerator { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(Index i) const { - uint64_t local_state = m_state + i; - T result = RandomToTypeUniform(&local_state); - m_state = local_state; + T result = RandomToTypeUniform(&m_state, i); return result; } @@ -165,11 +163,9 @@ template class UniformRandomGenerator { Packet packetOp(Index i) const { const int packetSize = internal::unpacket_traits::size; EIGEN_ALIGN_MAX T values[packetSize]; - uint64_t local_state = m_state + i; for (int j = 0; j < packetSize; ++j) { - values[j] = RandomToTypeUniform(&local_state); + values[j] = RandomToTypeUniform(&m_state, i); } - m_state = local_state; return internal::pload(values); } @@ -190,14 +186,14 @@ struct functor_traits > { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -T RandomToTypeNormal(uint64_t* state) { +T RandomToTypeNormal(uint64_t* state, uint64_t stream) { // Use the ratio of uniform method to generate numbers following a normal // distribution. See for example Numerical Recipes chapter 7.3.9 for the // details. T u, v, q; do { - u = RandomToTypeUniform(state); - v = T(1.7156) * (RandomToTypeUniform(state) - T(0.5)); + u = RandomToTypeUniform(state, stream); + v = T(1.7156) * (RandomToTypeUniform(state, stream) - T(0.5)); const T x = u - T(0.449871); const T y = numext::abs(v) + T(0.386595); q = x*x + y * (T(0.196)*y - T(0.25472)*x); @@ -208,14 +204,14 @@ T RandomToTypeNormal(uint64_t* state) { } template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -std::complex RandomToTypeNormal >(uint64_t* state) { - return std::complex(RandomToTypeNormal(state), - RandomToTypeNormal(state)); +std::complex RandomToTypeNormal >(uint64_t* state, uint64_t stream) { + return std::complex(RandomToTypeNormal(state, stream), + RandomToTypeNormal(state, stream)); } template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -std::complex RandomToTypeNormal >(uint64_t* state) { - return std::complex(RandomToTypeNormal(state), - RandomToTypeNormal(state)); +std::complex RandomToTypeNormal >(uint64_t* state, uint64_t stream) { + return std::complex(RandomToTypeNormal(state, stream), + RandomToTypeNormal(state, stream)); } @@ -234,9 +230,7 @@ template class NormalRandomGenerator { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(Index i) const { - uint64_t local_state = m_state + i; - T result = RandomToTypeNormal(&local_state); - m_state = local_state; + T result = RandomToTypeNormal(&m_state, i); return result; } @@ -244,11 +238,9 @@ template class NormalRandomGenerator { Packet packetOp(Index i) const { const int packetSize = internal::unpacket_traits::size; EIGEN_ALIGN_MAX T values[packetSize]; - uint64_t local_state = m_state + i; for (int j = 0; j < packetSize; ++j) { - values[j] = RandomToTypeNormal(&local_state); + values[j] = RandomToTypeNormal(&m_state, i); } - m_state = local_state; return internal::pload(values); }