Fix vectorization of swap for non trivial expressions

2025-09-12 01:13:14 +08:00 · 2015-03-31 20:16:02 +02:00 · 2015-03-31 20:16:02 +02:00 · 20d030f207
commit 20d030f207
parent 678207e02a
3 changed files with 10 additions and 12 deletions
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@ -38,13 +38,17 @@ public:
  template<int StoreMode, int LoadMode>
  void assignPacket(Index row, Index col)
  {
-    m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
+    PacketScalar tmp = m_src.template packet<LoadMode>(row,col);
    const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode>(row,col));
    m_dst.template writePacket<StoreMode>(row,col,tmp);
  }
  template<int StoreMode, int LoadMode>
  void assignPacket(Index index)
  {
-    m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
+    PacketScalar tmp = m_src.template packet<LoadMode>(index);
    const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode>(index));
    m_dst.template writePacket<StoreMode>(index,tmp);
  }
  // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
--- a/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@ -150,14 +150,6 @@ template<typename Scalar> struct swap_assign_op {
    swap(a,const_cast<Scalar&>(b));
 #endif
  }
  template<int LhsAlignment, int RhsAlignment, typename Packet>
  EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
  {
    Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
    internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
    internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
  }
 };
 template<typename Scalar>
 struct functor_traits<swap_assign_op<Scalar> > {
--- a/test/swap.cpp
+++ b/test/swap.cpp
@ -82,8 +82,10 @@ template<typename MatrixType> void swap(const MatrixType& m)
 void test_swap()
 {
  int s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
  CALL_SUBTEST_1( swap(Matrix3f()) ); // fixed size, no vectorization 
  CALL_SUBTEST_2( swap(Matrix4d()) ); // fixed size, possible vectorization 
-  CALL_SUBTEST_3( swap(MatrixXd(3,3)) ); // dyn size, no vectorization 
+  CALL_SUBTEST_3( swap(MatrixXd(s,s)) ); // dyn size, no vectorization 
-  CALL_SUBTEST_4( swap(MatrixXf(30,30)) ); // dyn size, possible vectorization 
+  CALL_SUBTEST_4( swap(MatrixXf(s,s)) ); // dyn size, possible vectorization 
  TEST_SET_BUT_UNUSED_VARIABLE(s)
 }