From 584832cb3c46126697844d0afb9ef56b8da9f049 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 21 Feb 2016 12:44:53 -0800 Subject: [PATCH] Implemented the ptranspose function on half floats --- Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 4a10e4fa5..bd42cb558 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -226,10 +226,12 @@ template<> EIGEN_DEVICE_FUNC inline half2 pabs(const half2& a) { EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { - assert(false && "tbd"); - // half tmp = kernel.packet[0].y; - // kernel.packet[0].y = kernel.packet[1].x; - // kernel.packet[1].x = tmp; + half a1 = __low2half(kernel.packet[0]); + half a2 = __high2half(kernel.packet[0]); + half b1 = __low2half(kernel.packet[1]); + half b2 = __high2half(kernel.packet[1]); + kernel.packet[0] = __halves2half2(a1, b1); + kernel.packet[1] = __halves2half2(a2, b2); } } // end namespace internal