diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 77f0cfa72..5e91fbaca 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -1694,10 +1694,24 @@ EIGEN_STRONG_INLINE void pscatter(uint32_t* to, const Packe } template <> EIGEN_STRONG_INLINE void pscatter(bool* to, const Packet16b& from, Index stride) { - to[4 * stride * 0] = _mm_cvtsi128_si32(from); - to[4 * stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)); - to[4 * stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2)); - to[4 * stride * 3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3)); + EIGEN_ALIGN16 bool tmp[16]; + pstore(tmp, from); + to[stride * 0] = tmp[0]; + to[stride * 1] = tmp[1]; + to[stride * 2] = tmp[2]; + to[stride * 3] = tmp[3]; + to[stride * 4] = tmp[4]; + to[stride * 5] = tmp[5]; + to[stride * 6] = tmp[6]; + to[stride * 7] = tmp[7]; + to[stride * 8] = tmp[8]; + to[stride * 9] = tmp[9]; + to[stride * 10] = tmp[10]; + to[stride * 11] = tmp[11]; + to[stride * 12] = tmp[12]; + to[stride * 13] = tmp[13]; + to[stride * 14] = tmp[14]; + to[stride * 15] = tmp[15]; } // some compilers might be tempted to perform multiple moves instead of using a vector path. diff --git a/test/packetmath.cpp b/test/packetmath.cpp index cdbaad672..9c5d6cf46 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -1635,7 +1635,7 @@ void packetmath_scatter_gather() { EIGEN_ALIGN_MAX Scalar data1[PacketSize]; RealScalar refvalue = RealScalar(0); for (int i = 0; i < PacketSize; ++i) { - data1[i] = internal::random() / RealScalar(PacketSize); + data1[i] = internal::random(); } int stride = internal::random(1, 20); @@ -1655,7 +1655,7 @@ void packetmath_scatter_gather() { } for (int i = 0; i < PacketSize * 7; ++i) { - buffer[i] = internal::random() / RealScalar(PacketSize); + buffer[i] = internal::random(); } packet = internal::pgather(buffer, 7); internal::pstore(data1, packet); @@ -1745,6 +1745,7 @@ EIGEN_DECLARE_TEST(packetmath) { CALL_SUBTEST_12(test::runner>::run()); CALL_SUBTEST_13(test::runner::run()); CALL_SUBTEST_14((packetmath::type>())); + CALL_SUBTEST_14((packetmath_scatter_gather::type>())); CALL_SUBTEST_15(test::runner::run()); g_first_pass = false; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h index c9c613a14..e9de98848 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h @@ -49,6 +49,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T RandomToTypeUniform(uint64_t* state, uin return static_cast(rnd); } +template <> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool RandomToTypeUniform(uint64_t* state, uint64_t stream) { + unsigned rnd = PCG_XSH_RS_generator(state, stream); + return (rnd & 0x1) != 0; +} + template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half RandomToTypeUniform(uint64_t* state, uint64_t stream) { // Generate 10 random bits for the mantissa, merge with exponent.