diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 4a10e4fa5..bd42cb558 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -226,10 +226,12 @@ template<> EIGEN_DEVICE_FUNC inline half2 pabs(const half2& a) { EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { - assert(false && "tbd"); - // half tmp = kernel.packet[0].y; - // kernel.packet[0].y = kernel.packet[1].x; - // kernel.packet[1].x = tmp; + half a1 = __low2half(kernel.packet[0]); + half a2 = __high2half(kernel.packet[0]); + half b1 = __low2half(kernel.packet[1]); + half b2 = __high2half(kernel.packet[1]); + kernel.packet[0] = __halves2half2(a1, b1); + kernel.packet[1] = __halves2half2(a2, b2); } } // end namespace internal