temporary fix of the pèrevious commit

This commit is contained in:
Gael Guennebaud 2008-08-24 15:27:05 +00:00
parent ba100998bf
commit 440664cd5d
3 changed files with 65 additions and 34 deletions

View File

@ -299,28 +299,58 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
*** Linear vectorization *** *** Linear vectorization ***
***************************/ ***************************/
// template<typename Derived1, typename Derived2>
// struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
// {
// static void run(Derived1 &dst, const Derived2 &src)
// {
// const int size = dst.size();
// const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
// const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
// : ei_alignmentOffset(&dst.coeffRef(0), size);
// const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
//
// for(int index = 0; index < alignedStart; index++)
// dst.copyCoeff(index, src);
//
// for(int index = alignedStart; index < alignedEnd; index += packetSize)
// {
// dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
// }
//
// for(int index = alignedEnd; index < size; index++)
// dst.copyCoeff(index, src);
// }
// };
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
{ {
static void run(Derived1 &dst, const Derived2 &src) static void run(Derived1 &dst, const Derived2 &src)
{ {
const int size = dst.size(); asm("#begin");
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size; const int size = dst.size();
const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0 const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
: ei_alignmentOffset(&dst.coeffRef(0), size); const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; : ei_alignmentOffset(&dst.coeffRef(0), size);
const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
for(int index = 0; index < alignedStart; index++) asm("#unaligned start");
dst.copyCoeff(index, src);
for(int index = alignedStart; index < alignedEnd; index += packetSize) for(int index = 0; index < alignedStart; index++)
{ dst.copyCoeff(index, src);
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src); asm("#aligned middle");
}
for(int index = alignedEnd; index < size; index++) for(int index = alignedStart; index < alignedEnd; index += packetSize)
dst.copyCoeff(index, src); {
} dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
}
asm("#unaligned end");
for(int index = alignedEnd; index < size; index++)
dst.copyCoeff(index, src);
asm("#end");
}
}; };
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>

View File

@ -54,7 +54,7 @@ template<typename T> struct ei_unpointer<T*const> { typedef T type; };
template<typename T> struct ei_unconst { typedef T type; }; template<typename T> struct ei_unconst { typedef T type; };
template<typename T> struct ei_unconst<const T> { typedef T type; }; template<typename T> struct ei_unconst<const T> { typedef T type; };
template<typename T> struct ei_unconst<const T&> { typedef T& type; }; // template<typename T> struct ei_unconst<const T&> { typedef T& type; };
template<typename T> struct ei_cleantype { typedef T type; }; template<typename T> struct ei_cleantype { typedef T type; };
template<typename T> struct ei_cleantype<const T> { typedef typename ei_cleantype<T>::type type; }; template<typename T> struct ei_cleantype<const T> { typedef typename ei_cleantype<T>::type type; };

View File

@ -86,23 +86,24 @@ template<typename Scalar> void packetmath()
VERIFY(areApprox(data1, data2+offset, PacketSize) && "ei_pstoreu"); VERIFY(areApprox(data1, data2+offset, PacketSize) && "ei_pstoreu");
} }
if (!ei_is_same_type<Scalar,double>::ret) for (int offset=0; offset<PacketSize; ++offset)
{ {
for (int offset=0; offset<PacketSize; ++offset) packets[0] = ei_pload(data1);
{ packets[1] = ei_pload(data1+PacketSize);
packets[0] = ei_pload(data1); if (offset==0) ei_palign<0>(packets[0], packets[1]);
packets[1] = ei_pload(data1+PacketSize); else if (offset==1) ei_palign<1>(packets[0], packets[1]);
if (offset==0) ei_palign<0>(packets[0], packets[1]); else if (offset==2) ei_palign<2>(packets[0], packets[1]);
else if (offset==1) ei_palign<1>(packets[0], packets[1]); else if (offset==3) ei_palign<3>(packets[0], packets[1]);
else if (offset==2) ei_palign<2>(packets[0], packets[1]); ei_pstore(data2, packets[0]);
else if (offset==3) ei_palign<3>(packets[0], packets[1]);
ei_pstore(data2, packets[0]);
for (int i=0; i<PacketSize; ++i) for (int i=0; i<PacketSize; ++i)
ref[i] = data1[i+offset]; ref[i] = data1[i+offset];
VERIFY(areApprox(ref, data2, PacketSize) && "ei_palign"); typedef Matrix<Scalar, PacketSize, 1> Vector;
} std::cout << Vector(data1).transpose() << " | " << Vector(data1+PacketSize).transpose() << "\n";
std::cout << " " << offset << " => " << Vector(ref).transpose() << " == " << Vector(data2).transpose() << "\n";
VERIFY(areApprox(ref, data2, PacketSize) && "ei_palign");
} }
CHECK_CWISE(REF_ADD, ei_padd); CHECK_CWISE(REF_ADD, ei_padd);
@ -143,9 +144,9 @@ template<typename Scalar> void packetmath()
void test_packetmath() void test_packetmath()
{ {
for(int i = 0; i < g_repeat; i++) { for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( packetmath<float>() ); // CALL_SUBTEST( packetmath<float>() );
CALL_SUBTEST( packetmath<double>() ); // CALL_SUBTEST( packetmath<double>() );
CALL_SUBTEST( packetmath<int>() ); // CALL_SUBTEST( packetmath<int>() );
packetmath<std::complex<float> >(); packetmath<std::complex<float> >();
} }
} }