mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 19:59:05 +08:00
* extend the Has* packet traits and makes all functor use it
* extend the packing routines to support conjugation
This commit is contained in:
parent
e1eccfad3f
commit
c69a226192
@ -46,7 +46,7 @@ template<typename Scalar>
|
|||||||
struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
|
struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<Scalar>::AddCost,
|
Cost = NumTraits<Scalar>::AddCost,
|
||||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
PacketAccess = ei_packet_traits<Scalar>::HasAdd
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -69,7 +69,7 @@ template<typename Scalar>
|
|||||||
struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
|
struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<Scalar>::MulCost,
|
Cost = NumTraits<Scalar>::MulCost,
|
||||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
PacketAccess = ei_packet_traits<Scalar>::HasMul
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -92,7 +92,7 @@ template<typename Scalar>
|
|||||||
struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
|
struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<Scalar>::AddCost,
|
Cost = NumTraits<Scalar>::AddCost,
|
||||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
PacketAccess = ei_packet_traits<Scalar>::HasMin
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -115,7 +115,7 @@ template<typename Scalar>
|
|||||||
struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
|
struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<Scalar>::AddCost,
|
Cost = NumTraits<Scalar>::AddCost,
|
||||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
PacketAccess = ei_packet_traits<Scalar>::HasMax
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -158,7 +158,7 @@ template<typename Scalar>
|
|||||||
struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
|
struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<Scalar>::AddCost,
|
Cost = NumTraits<Scalar>::AddCost,
|
||||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
PacketAccess = ei_packet_traits<Scalar>::HasSub
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -178,10 +178,7 @@ template<typename Scalar>
|
|||||||
struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
|
struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
|
||||||
enum {
|
enum {
|
||||||
Cost = 2 * NumTraits<Scalar>::MulCost,
|
Cost = 2 * NumTraits<Scalar>::MulCost,
|
||||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
PacketAccess = ei_packet_traits<Scalar>::HasDiv
|
||||||
#if (defined EIGEN_VECTORIZE)
|
|
||||||
&& !NumTraits<Scalar>::IsInteger
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -203,7 +200,7 @@ template<typename Scalar>
|
|||||||
struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
|
struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
|
||||||
{ enum {
|
{ enum {
|
||||||
Cost = NumTraits<Scalar>::AddCost,
|
Cost = NumTraits<Scalar>::AddCost,
|
||||||
PacketAccess = int(ei_packet_traits<Scalar>::size)>1 };
|
PacketAccess = ei_packet_traits<Scalar>::HasNegate };
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
@ -224,7 +221,7 @@ struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
|
|||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<Scalar>::AddCost,
|
Cost = NumTraits<Scalar>::AddCost,
|
||||||
PacketAccess = int(ei_packet_traits<Scalar>::size)>1
|
PacketAccess = ei_packet_traits<Scalar>::HasAbs
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -243,7 +240,7 @@ template<typename Scalar> struct ei_scalar_abs2_op {
|
|||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
|
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
|
||||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
|
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasAbs2 }; };
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* \brief Template functor to compute the conjugate of a complex value
|
* \brief Template functor to compute the conjugate of a complex value
|
||||||
@ -254,14 +251,14 @@ template<typename Scalar> struct ei_scalar_conjugate_op {
|
|||||||
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conjugate_op)
|
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conjugate_op)
|
||||||
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return ei_conj(a); }
|
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return ei_conj(a); }
|
||||||
template<typename PacketScalar>
|
template<typename PacketScalar>
|
||||||
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return a; }
|
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return ei_pconj(a); }
|
||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> >
|
struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> >
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
|
Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
|
||||||
PacketAccess = int(ei_packet_traits<Scalar>::size)>1
|
PacketAccess = ei_packet_traits<Scalar>::HasConj
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -398,7 +395,7 @@ struct ei_scalar_multiple_op {
|
|||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_functor_traits<ei_scalar_multiple_op<Scalar> >
|
struct ei_functor_traits<ei_scalar_multiple_op<Scalar> >
|
||||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
|
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
|
||||||
|
|
||||||
template<typename Scalar1, typename Scalar2>
|
template<typename Scalar1, typename Scalar2>
|
||||||
struct ei_scalar_multiple2_op {
|
struct ei_scalar_multiple2_op {
|
||||||
@ -425,7 +422,7 @@ struct ei_scalar_quotient1_impl {
|
|||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
|
struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
|
||||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
|
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_scalar_quotient1_impl<Scalar,true> {
|
struct ei_scalar_quotient1_impl<Scalar,true> {
|
||||||
@ -472,6 +469,7 @@ struct ei_scalar_constant_op {
|
|||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_functor_traits<ei_scalar_constant_op<Scalar> >
|
struct ei_functor_traits<ei_scalar_constant_op<Scalar> >
|
||||||
|
// FIXME replace this packet test by a safe one
|
||||||
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
|
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
|
||||||
|
|
||||||
template<typename Scalar> struct ei_scalar_identity_op {
|
template<typename Scalar> struct ei_scalar_identity_op {
|
||||||
@ -543,7 +541,7 @@ struct ei_linspaced_op_impl<Scalar,true>
|
|||||||
// nested expressions).
|
// nested expressions).
|
||||||
template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op;
|
template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op;
|
||||||
template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> >
|
template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> >
|
||||||
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
|
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
|
||||||
template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
|
template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
|
||||||
{
|
{
|
||||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||||
@ -588,7 +586,7 @@ struct ei_scalar_add_op {
|
|||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_functor_traits<ei_scalar_add_op<Scalar> >
|
struct ei_functor_traits<ei_scalar_add_op<Scalar> >
|
||||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
|
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::HasAdd }; };
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* \brief Template functor to compute the square root of a scalar
|
* \brief Template functor to compute the square root of a scalar
|
||||||
@ -676,7 +674,7 @@ struct ei_scalar_inverse_op {
|
|||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_functor_traits<ei_scalar_inverse_op<Scalar> >
|
struct ei_functor_traits<ei_scalar_inverse_op<Scalar> >
|
||||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
|
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasDiv }; };
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* \brief Template functor to compute the square of a scalar
|
* \brief Template functor to compute the square of a scalar
|
||||||
@ -692,7 +690,7 @@ struct ei_scalar_square_op {
|
|||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_functor_traits<ei_scalar_square_op<Scalar> >
|
struct ei_functor_traits<ei_scalar_square_op<Scalar> >
|
||||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
|
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* \brief Template functor to compute the cube of a scalar
|
* \brief Template functor to compute the cube of a scalar
|
||||||
@ -708,7 +706,7 @@ struct ei_scalar_cube_op {
|
|||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_functor_traits<ei_scalar_cube_op<Scalar> >
|
struct ei_functor_traits<ei_scalar_cube_op<Scalar> >
|
||||||
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };
|
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
|
||||||
|
|
||||||
// default functor traits for STL functors:
|
// default functor traits for STL functors:
|
||||||
|
|
||||||
|
@ -58,8 +58,11 @@ struct ei_default_packet_traits
|
|||||||
HasMul = 1,
|
HasMul = 1,
|
||||||
HasNegate = 1,
|
HasNegate = 1,
|
||||||
HasAbs = 1,
|
HasAbs = 1,
|
||||||
|
HasAbs2 = 1,
|
||||||
HasMin = 1,
|
HasMin = 1,
|
||||||
HasMax = 1,
|
HasMax = 1,
|
||||||
|
HasConj = 1,
|
||||||
|
HasSetLinear = 1,
|
||||||
|
|
||||||
HasDiv = 0,
|
HasDiv = 0,
|
||||||
HasSqrt = 0,
|
HasSqrt = 0,
|
||||||
@ -105,6 +108,10 @@ ei_psub(const Packet& a,
|
|||||||
template<typename Packet> inline Packet
|
template<typename Packet> inline Packet
|
||||||
ei_pnegate(const Packet& a) { return -a; }
|
ei_pnegate(const Packet& a) { return -a; }
|
||||||
|
|
||||||
|
/** \internal \returns conj(a) (coeff-wise) */
|
||||||
|
template<typename Packet> inline Packet
|
||||||
|
ei_pconj(const Packet& a) { return ei_conj(a); }
|
||||||
|
|
||||||
/** \internal \returns a * b (coeff-wise) */
|
/** \internal \returns a * b (coeff-wise) */
|
||||||
template<typename Packet> inline Packet
|
template<typename Packet> inline Packet
|
||||||
ei_pmul(const Packet& a,
|
ei_pmul(const Packet& a,
|
||||||
|
@ -37,6 +37,18 @@ typedef __m128d Packet1cd;
|
|||||||
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
|
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
|
||||||
{
|
{
|
||||||
typedef Packet2cf type; enum {size=2};
|
typedef Packet2cf type; enum {size=2};
|
||||||
|
enum {
|
||||||
|
HasAdd = 1,
|
||||||
|
HasSub = 1,
|
||||||
|
HasMul = 1,
|
||||||
|
HasDiv = 1,
|
||||||
|
HasNegate = 1,
|
||||||
|
HasAbs = 0,
|
||||||
|
HasAbs2 = 0,
|
||||||
|
HasMin = 0,
|
||||||
|
HasMax = 0,
|
||||||
|
HasSetLinear = 0
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
||||||
@ -56,7 +68,11 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a)
|
|||||||
{
|
{
|
||||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
|
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
|
||||||
return Packet2cf(_mm_xor_ps(a.v,mask));
|
return Packet2cf(_mm_xor_ps(a.v,mask));
|
||||||
|
}
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a)
|
||||||
|
{
|
||||||
|
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
||||||
|
return Packet2cf(_mm_xor_ps(a.v,mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||||
|
@ -62,6 +62,7 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
|
|||||||
{
|
{
|
||||||
typedef Packet4f type; enum {size=4};
|
typedef Packet4f type; enum {size=4};
|
||||||
enum {
|
enum {
|
||||||
|
HasDiv = 1,
|
||||||
HasSin = EIGEN_FAST_MATH,
|
HasSin = EIGEN_FAST_MATH,
|
||||||
HasCos = EIGEN_FAST_MATH,
|
HasCos = EIGEN_FAST_MATH,
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
@ -70,7 +71,12 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
template<> struct ei_packet_traits<double> : ei_default_packet_traits
|
template<> struct ei_packet_traits<double> : ei_default_packet_traits
|
||||||
{ typedef Packet2d type; enum {size=2}; };
|
{
|
||||||
|
typedef Packet2d type; enum {size=2};
|
||||||
|
enum {
|
||||||
|
HasDiv = 1
|
||||||
|
};
|
||||||
|
};
|
||||||
template<> struct ei_packet_traits<int> : ei_default_packet_traits
|
template<> struct ei_packet_traits<int> : ei_default_packet_traits
|
||||||
{ typedef Packet4i type; enum {size=4}; };
|
{ typedef Packet4i type; enum {size=4}; };
|
||||||
|
|
||||||
|
@ -773,8 +773,8 @@ struct ei_gemm_pack_lhs
|
|||||||
// 4 5 6 7 16 17 18 19 25 28
|
// 4 5 6 7 16 17 18 19 25 28
|
||||||
// 8 9 10 11 20 21 22 23 26 29
|
// 8 9 10 11 20 21 22 23 26 29
|
||||||
// . . . . . . . . . .
|
// . . . . . . . . . .
|
||||||
template<typename Scalar, typename Index, int nr, bool PanelMode>
|
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
|
||||||
struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
|
struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
|
||||||
{
|
{
|
||||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||||
enum { PacketSize = ei_packet_traits<Scalar>::size };
|
enum { PacketSize = ei_packet_traits<Scalar>::size };
|
||||||
@ -782,6 +782,7 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
|
|||||||
Index stride=0, Index offset=0)
|
Index stride=0, Index offset=0)
|
||||||
{
|
{
|
||||||
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
||||||
|
ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
||||||
bool hasAlpha = alpha != Scalar(1);
|
bool hasAlpha = alpha != Scalar(1);
|
||||||
Index packet_cols = (cols/nr) * nr;
|
Index packet_cols = (cols/nr) * nr;
|
||||||
Index count = 0;
|
Index count = 0;
|
||||||
@ -796,19 +797,19 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
|
|||||||
if (hasAlpha)
|
if (hasAlpha)
|
||||||
for(Index k=0; k<depth; k++)
|
for(Index k=0; k<depth; k++)
|
||||||
{
|
{
|
||||||
blockB[count+0] = alpha*b0[k];
|
blockB[count+0] = alpha*cj(b0[k]);
|
||||||
blockB[count+1] = alpha*b1[k];
|
blockB[count+1] = alpha*cj(b1[k]);
|
||||||
if(nr==4) blockB[count+2] = alpha*b2[k];
|
if(nr==4) blockB[count+2] = alpha*cj(b2[k]);
|
||||||
if(nr==4) blockB[count+3] = alpha*b3[k];
|
if(nr==4) blockB[count+3] = alpha*cj(b3[k]);
|
||||||
count += nr;
|
count += nr;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
for(Index k=0; k<depth; k++)
|
for(Index k=0; k<depth; k++)
|
||||||
{
|
{
|
||||||
blockB[count+0] = b0[k];
|
blockB[count+0] = cj(b0[k]);
|
||||||
blockB[count+1] = b1[k];
|
blockB[count+1] = cj(b1[k]);
|
||||||
if(nr==4) blockB[count+2] = b2[k];
|
if(nr==4) blockB[count+2] = cj(b2[k]);
|
||||||
if(nr==4) blockB[count+3] = b3[k];
|
if(nr==4) blockB[count+3] = cj(b3[k]);
|
||||||
count += nr;
|
count += nr;
|
||||||
}
|
}
|
||||||
// skip what we have after
|
// skip what we have after
|
||||||
@ -823,13 +824,13 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
|
|||||||
if (hasAlpha)
|
if (hasAlpha)
|
||||||
for(Index k=0; k<depth; k++)
|
for(Index k=0; k<depth; k++)
|
||||||
{
|
{
|
||||||
blockB[count] = alpha*b0[k];
|
blockB[count] = alpha*cj(b0[k]);
|
||||||
count += 1;
|
count += 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
for(Index k=0; k<depth; k++)
|
for(Index k=0; k<depth; k++)
|
||||||
{
|
{
|
||||||
blockB[count] = b0[k];
|
blockB[count] = cj(b0[k]);
|
||||||
count += 1;
|
count += 1;
|
||||||
}
|
}
|
||||||
if(PanelMode) count += (stride-offset-depth);
|
if(PanelMode) count += (stride-offset-depth);
|
||||||
@ -838,14 +839,15 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
|
|||||||
};
|
};
|
||||||
|
|
||||||
// this version is optimized for row major matrices
|
// this version is optimized for row major matrices
|
||||||
template<typename Scalar, typename Index, int nr, bool PanelMode>
|
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
|
||||||
struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
|
struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
|
||||||
{
|
{
|
||||||
enum { PacketSize = ei_packet_traits<Scalar>::size };
|
enum { PacketSize = ei_packet_traits<Scalar>::size };
|
||||||
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Scalar alpha, Index depth, Index cols,
|
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Scalar alpha, Index depth, Index cols,
|
||||||
Index stride=0, Index offset=0)
|
Index stride=0, Index offset=0)
|
||||||
{
|
{
|
||||||
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
||||||
|
ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
||||||
bool hasAlpha = alpha != Scalar(1);
|
bool hasAlpha = alpha != Scalar(1);
|
||||||
Index packet_cols = (cols/nr) * nr;
|
Index packet_cols = (cols/nr) * nr;
|
||||||
Index count = 0;
|
Index count = 0;
|
||||||
@ -858,10 +860,10 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
|
|||||||
for(Index k=0; k<depth; k++)
|
for(Index k=0; k<depth; k++)
|
||||||
{
|
{
|
||||||
const Scalar* b0 = &rhs[k*rhsStride + j2];
|
const Scalar* b0 = &rhs[k*rhsStride + j2];
|
||||||
blockB[count+0] = alpha*b0[0];
|
blockB[count+0] = alpha*cj(b0[0]);
|
||||||
blockB[count+1] = alpha*b0[1];
|
blockB[count+1] = alpha*cj(b0[1]);
|
||||||
if(nr==4) blockB[count+2] = alpha*b0[2];
|
if(nr==4) blockB[count+2] = alpha*cj(b0[2]);
|
||||||
if(nr==4) blockB[count+3] = alpha*b0[3];
|
if(nr==4) blockB[count+3] = alpha*cj(b0[3]);
|
||||||
count += nr;
|
count += nr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -870,10 +872,10 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
|
|||||||
for(Index k=0; k<depth; k++)
|
for(Index k=0; k<depth; k++)
|
||||||
{
|
{
|
||||||
const Scalar* b0 = &rhs[k*rhsStride + j2];
|
const Scalar* b0 = &rhs[k*rhsStride + j2];
|
||||||
blockB[count+0] = b0[0];
|
blockB[count+0] = cj(b0[0]);
|
||||||
blockB[count+1] = b0[1];
|
blockB[count+1] = cj(b0[1]);
|
||||||
if(nr==4) blockB[count+2] = b0[2];
|
if(nr==4) blockB[count+2] = cj(b0[2]);
|
||||||
if(nr==4) blockB[count+3] = b0[3];
|
if(nr==4) blockB[count+3] = cj(b0[3]);
|
||||||
count += nr;
|
count += nr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -887,7 +889,7 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
|
|||||||
const Scalar* b0 = &rhs[j2];
|
const Scalar* b0 = &rhs[j2];
|
||||||
for(Index k=0; k<depth; k++)
|
for(Index k=0; k<depth; k++)
|
||||||
{
|
{
|
||||||
blockB[count] = alpha*b0[k*rhsStride];
|
blockB[count] = alpha*cj(b0[k*rhsStride]);
|
||||||
count += 1;
|
count += 1;
|
||||||
}
|
}
|
||||||
if(PanelMode) count += stride-offset-depth;
|
if(PanelMode) count += stride-offset-depth;
|
||||||
|
@ -73,9 +73,6 @@ static void run(Index rows, Index cols, Index depth,
|
|||||||
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||||
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||||
|
|
||||||
if (ConjugateRhs)
|
|
||||||
alpha = ei_conj(alpha);
|
|
||||||
|
|
||||||
typedef typename ei_packet_traits<Scalar>::type PacketType;
|
typedef typename ei_packet_traits<Scalar>::type PacketType;
|
||||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||||
|
|
||||||
@ -83,9 +80,9 @@ static void run(Index rows, Index cols, Index depth,
|
|||||||
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
|
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
|
||||||
//Index nc = blocking.nc(); // cache block size along the N direction
|
//Index nc = blocking.nc(); // cache block size along the N direction
|
||||||
|
|
||||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder> pack_rhs;
|
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder, ConjugateLhs> pack_lhs;
|
||||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder> pack_lhs;
|
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder, ConjugateRhs> pack_rhs;
|
||||||
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp;
|
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr> gebp;
|
||||||
|
|
||||||
#ifdef EIGEN_HAS_OPENMP
|
#ifdef EIGEN_HAS_OPENMP
|
||||||
if(info)
|
if(info)
|
||||||
|
@ -29,10 +29,15 @@
|
|||||||
// implement and control fast level 2 and level 3 BLAS-like routines.
|
// implement and control fast level 2 and level 3 BLAS-like routines.
|
||||||
|
|
||||||
// forward declarations
|
// forward declarations
|
||||||
template<typename Scalar, typename Index, int mr, int nr, typename Conj>
|
|
||||||
|
// Provides scalar/packet-wise product and product with accumulation
|
||||||
|
// with optional conjugation of the arguments.
|
||||||
|
template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
|
||||||
|
|
||||||
|
template<typename Scalar, typename Index, int mr, int nr, typename Conj = ei_conj_helper<false,false> >
|
||||||
struct ei_gebp_kernel;
|
struct ei_gebp_kernel;
|
||||||
|
|
||||||
template<typename Scalar, typename Index, int nr, int StorageOrder, bool PanelMode=false>
|
template<typename Scalar, typename Index, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
|
||||||
struct ei_gemm_pack_rhs;
|
struct ei_gemm_pack_rhs;
|
||||||
|
|
||||||
template<typename Scalar, typename Index, int mr, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
|
template<typename Scalar, typename Index, int mr, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
|
||||||
@ -53,10 +58,6 @@ template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index,
|
|||||||
static void ei_cache_friendly_product_rowmajor_times_vector(
|
static void ei_cache_friendly_product_rowmajor_times_vector(
|
||||||
const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha);
|
const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha);
|
||||||
|
|
||||||
// Provides scalar/packet-wise product and product with accumulation
|
|
||||||
// with optional conjugation of the arguments.
|
|
||||||
template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
|
|
||||||
|
|
||||||
template<> struct ei_conj_helper<false,false>
|
template<> struct ei_conj_helper<false,false>
|
||||||
{
|
{
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user