Fix vectorization of swap for non trivial expressions

This commit is contained in:
Gael Guennebaud 2015-03-31 20:16:02 +02:00
parent 678207e02a
commit 20d030f207
3 changed files with 10 additions and 12 deletions

View File

@ -38,13 +38,17 @@ public:
template<int StoreMode, int LoadMode>
void assignPacket(Index row, Index col)
{
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
PacketScalar tmp = m_src.template packet<LoadMode>(row,col);
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode>(row,col));
m_dst.template writePacket<StoreMode>(row,col,tmp);
}
template<int StoreMode, int LoadMode>
void assignPacket(Index index)
{
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
PacketScalar tmp = m_src.template packet<LoadMode>(index);
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode>(index));
m_dst.template writePacket<StoreMode>(index,tmp);
}
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)

View File

@ -150,14 +150,6 @@ template<typename Scalar> struct swap_assign_op {
swap(a,const_cast<Scalar&>(b));
#endif
}
template<int LhsAlignment, int RhsAlignment, typename Packet>
EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
{
Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
}
};
template<typename Scalar>
struct functor_traits<swap_assign_op<Scalar> > {

View File

@ -82,8 +82,10 @@ template<typename MatrixType> void swap(const MatrixType& m)
void test_swap()
{
int s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
CALL_SUBTEST_1( swap(Matrix3f()) ); // fixed size, no vectorization
CALL_SUBTEST_2( swap(Matrix4d()) ); // fixed size, possible vectorization
CALL_SUBTEST_3( swap(MatrixXd(3,3)) ); // dyn size, no vectorization
CALL_SUBTEST_4( swap(MatrixXf(30,30)) ); // dyn size, possible vectorization
CALL_SUBTEST_3( swap(MatrixXd(s,s)) ); // dyn size, no vectorization
CALL_SUBTEST_4( swap(MatrixXf(s,s)) ); // dyn size, possible vectorization
TEST_SET_BUT_UNUSED_VARIABLE(s)
}