* extend the Has* packet traits and makes all functor use it

* extend the packing routines to support conjugation
This commit is contained in:
Gael Guennebaud 2010-07-05 23:27:54 +02:00
parent e1eccfad3f
commit c69a226192
7 changed files with 85 additions and 58 deletions

View File

@ -46,7 +46,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_sum_op<Scalar> > { struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = NumTraits<Scalar>::AddCost,
PacketAccess = ei_packet_traits<Scalar>::size>1 PacketAccess = ei_packet_traits<Scalar>::HasAdd
}; };
}; };
@ -69,7 +69,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_product_op<Scalar> > { struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
enum { enum {
Cost = NumTraits<Scalar>::MulCost, Cost = NumTraits<Scalar>::MulCost,
PacketAccess = ei_packet_traits<Scalar>::size>1 PacketAccess = ei_packet_traits<Scalar>::HasMul
}; };
}; };
@ -92,7 +92,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_min_op<Scalar> > { struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = NumTraits<Scalar>::AddCost,
PacketAccess = ei_packet_traits<Scalar>::size>1 PacketAccess = ei_packet_traits<Scalar>::HasMin
}; };
}; };
@ -115,7 +115,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_max_op<Scalar> > { struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = NumTraits<Scalar>::AddCost,
PacketAccess = ei_packet_traits<Scalar>::size>1 PacketAccess = ei_packet_traits<Scalar>::HasMax
}; };
}; };
@ -158,7 +158,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_difference_op<Scalar> > { struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
enum { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = NumTraits<Scalar>::AddCost,
PacketAccess = ei_packet_traits<Scalar>::size>1 PacketAccess = ei_packet_traits<Scalar>::HasSub
}; };
}; };
@ -178,10 +178,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > { struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
enum { enum {
Cost = 2 * NumTraits<Scalar>::MulCost, Cost = 2 * NumTraits<Scalar>::MulCost,
PacketAccess = ei_packet_traits<Scalar>::size>1 PacketAccess = ei_packet_traits<Scalar>::HasDiv
#if (defined EIGEN_VECTORIZE)
&& !NumTraits<Scalar>::IsInteger
#endif
}; };
}; };
@ -203,7 +200,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_opposite_op<Scalar> > struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
{ enum { { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = NumTraits<Scalar>::AddCost,
PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; PacketAccess = ei_packet_traits<Scalar>::HasNegate };
}; };
/** \internal /** \internal
@ -224,7 +221,7 @@ struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
{ {
enum { enum {
Cost = NumTraits<Scalar>::AddCost, Cost = NumTraits<Scalar>::AddCost,
PacketAccess = int(ei_packet_traits<Scalar>::size)>1 PacketAccess = ei_packet_traits<Scalar>::HasAbs
}; };
}; };
@ -243,7 +240,7 @@ template<typename Scalar> struct ei_scalar_abs2_op {
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> > struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; }; { enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasAbs2 }; };
/** \internal /** \internal
* \brief Template functor to compute the conjugate of a complex value * \brief Template functor to compute the conjugate of a complex value
@ -254,14 +251,14 @@ template<typename Scalar> struct ei_scalar_conjugate_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conjugate_op) EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conjugate_op)
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return ei_conj(a); } EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return ei_conj(a); }
template<typename PacketScalar> template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return a; } EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return ei_pconj(a); }
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> > struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> >
{ {
enum { enum {
Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0, Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
PacketAccess = int(ei_packet_traits<Scalar>::size)>1 PacketAccess = ei_packet_traits<Scalar>::HasConj
}; };
}; };
@ -398,7 +395,7 @@ struct ei_scalar_multiple_op {
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_scalar_multiple_op<Scalar> > struct ei_functor_traits<ei_scalar_multiple_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; }; { enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
template<typename Scalar1, typename Scalar2> template<typename Scalar1, typename Scalar2>
struct ei_scalar_multiple2_op { struct ei_scalar_multiple2_op {
@ -425,7 +422,7 @@ struct ei_scalar_quotient1_impl {
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> > struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; }; { enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
template<typename Scalar> template<typename Scalar>
struct ei_scalar_quotient1_impl<Scalar,true> { struct ei_scalar_quotient1_impl<Scalar,true> {
@ -472,6 +469,7 @@ struct ei_scalar_constant_op {
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_scalar_constant_op<Scalar> > struct ei_functor_traits<ei_scalar_constant_op<Scalar> >
// FIXME replace this packet test by a safe one
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; }; { enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
template<typename Scalar> struct ei_scalar_identity_op { template<typename Scalar> struct ei_scalar_identity_op {
@ -543,7 +541,7 @@ struct ei_linspaced_op_impl<Scalar,true>
// nested expressions). // nested expressions).
template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op; template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op;
template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> > template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> >
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; }; { enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
template <typename Scalar, bool RandomAccess> struct ei_linspaced_op template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
{ {
typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@ -588,7 +586,7 @@ struct ei_scalar_add_op {
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_scalar_add_op<Scalar> > struct ei_functor_traits<ei_scalar_add_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; }; { enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = ei_packet_traits<Scalar>::HasAdd }; };
/** \internal /** \internal
* \brief Template functor to compute the square root of a scalar * \brief Template functor to compute the square root of a scalar
@ -676,7 +674,7 @@ struct ei_scalar_inverse_op {
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_scalar_inverse_op<Scalar> > struct ei_functor_traits<ei_scalar_inverse_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; }; { enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasDiv }; };
/** \internal /** \internal
* \brief Template functor to compute the square of a scalar * \brief Template functor to compute the square of a scalar
@ -692,7 +690,7 @@ struct ei_scalar_square_op {
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_scalar_square_op<Scalar> > struct ei_functor_traits<ei_scalar_square_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; }; { enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
/** \internal /** \internal
* \brief Template functor to compute the cube of a scalar * \brief Template functor to compute the cube of a scalar
@ -708,7 +706,7 @@ struct ei_scalar_cube_op {
}; };
template<typename Scalar> template<typename Scalar>
struct ei_functor_traits<ei_scalar_cube_op<Scalar> > struct ei_functor_traits<ei_scalar_cube_op<Scalar> >
{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; }; { enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasMul }; };
// default functor traits for STL functors: // default functor traits for STL functors:

View File

@ -58,8 +58,11 @@ struct ei_default_packet_traits
HasMul = 1, HasMul = 1,
HasNegate = 1, HasNegate = 1,
HasAbs = 1, HasAbs = 1,
HasAbs2 = 1,
HasMin = 1, HasMin = 1,
HasMax = 1, HasMax = 1,
HasConj = 1,
HasSetLinear = 1,
HasDiv = 0, HasDiv = 0,
HasSqrt = 0, HasSqrt = 0,
@ -105,6 +108,10 @@ ei_psub(const Packet& a,
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pnegate(const Packet& a) { return -a; } ei_pnegate(const Packet& a) { return -a; }
/** \internal \returns conj(a) (coeff-wise) */
template<typename Packet> inline Packet
ei_pconj(const Packet& a) { return ei_conj(a); }
/** \internal \returns a * b (coeff-wise) */ /** \internal \returns a * b (coeff-wise) */
template<typename Packet> inline Packet template<typename Packet> inline Packet
ei_pmul(const Packet& a, ei_pmul(const Packet& a,

View File

@ -37,6 +37,18 @@ typedef __m128d Packet1cd;
template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
{ {
typedef Packet2cf type; enum {size=2}; typedef Packet2cf type; enum {size=2};
enum {
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
}; };
template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; }; template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
@ -56,7 +68,11 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a)
{ {
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000)); const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
return Packet2cf(_mm_xor_ps(a.v,mask)); return Packet2cf(_mm_xor_ps(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a)
{
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_xor_ps(a.v,mask));
} }
template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)

View File

@ -62,6 +62,7 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
{ {
typedef Packet4f type; enum {size=4}; typedef Packet4f type; enum {size=4};
enum { enum {
HasDiv = 1,
HasSin = EIGEN_FAST_MATH, HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH,
HasLog = 1, HasLog = 1,
@ -70,7 +71,12 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits
}; };
}; };
template<> struct ei_packet_traits<double> : ei_default_packet_traits template<> struct ei_packet_traits<double> : ei_default_packet_traits
{ typedef Packet2d type; enum {size=2}; }; {
typedef Packet2d type; enum {size=2};
enum {
HasDiv = 1
};
};
template<> struct ei_packet_traits<int> : ei_default_packet_traits template<> struct ei_packet_traits<int> : ei_default_packet_traits
{ typedef Packet4i type; enum {size=4}; }; { typedef Packet4i type; enum {size=4}; };

View File

@ -773,8 +773,8 @@ struct ei_gemm_pack_lhs
// 4 5 6 7 16 17 18 19 25 28 // 4 5 6 7 16 17 18 19 25 28
// 8 9 10 11 20 21 22 23 26 29 // 8 9 10 11 20 21 22 23 26 29
// . . . . . . . . . . // . . . . . . . . . .
template<typename Scalar, typename Index, int nr, bool PanelMode> template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode> struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
{ {
typedef typename ei_packet_traits<Scalar>::type Packet; typedef typename ei_packet_traits<Scalar>::type Packet;
enum { PacketSize = ei_packet_traits<Scalar>::size }; enum { PacketSize = ei_packet_traits<Scalar>::size };
@ -782,6 +782,7 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
Index stride=0, Index offset=0) Index stride=0, Index offset=0)
{ {
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
bool hasAlpha = alpha != Scalar(1); bool hasAlpha = alpha != Scalar(1);
Index packet_cols = (cols/nr) * nr; Index packet_cols = (cols/nr) * nr;
Index count = 0; Index count = 0;
@ -796,19 +797,19 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
if (hasAlpha) if (hasAlpha)
for(Index k=0; k<depth; k++) for(Index k=0; k<depth; k++)
{ {
blockB[count+0] = alpha*b0[k]; blockB[count+0] = alpha*cj(b0[k]);
blockB[count+1] = alpha*b1[k]; blockB[count+1] = alpha*cj(b1[k]);
if(nr==4) blockB[count+2] = alpha*b2[k]; if(nr==4) blockB[count+2] = alpha*cj(b2[k]);
if(nr==4) blockB[count+3] = alpha*b3[k]; if(nr==4) blockB[count+3] = alpha*cj(b3[k]);
count += nr; count += nr;
} }
else else
for(Index k=0; k<depth; k++) for(Index k=0; k<depth; k++)
{ {
blockB[count+0] = b0[k]; blockB[count+0] = cj(b0[k]);
blockB[count+1] = b1[k]; blockB[count+1] = cj(b1[k]);
if(nr==4) blockB[count+2] = b2[k]; if(nr==4) blockB[count+2] = cj(b2[k]);
if(nr==4) blockB[count+3] = b3[k]; if(nr==4) blockB[count+3] = cj(b3[k]);
count += nr; count += nr;
} }
// skip what we have after // skip what we have after
@ -823,13 +824,13 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
if (hasAlpha) if (hasAlpha)
for(Index k=0; k<depth; k++) for(Index k=0; k<depth; k++)
{ {
blockB[count] = alpha*b0[k]; blockB[count] = alpha*cj(b0[k]);
count += 1; count += 1;
} }
else else
for(Index k=0; k<depth; k++) for(Index k=0; k<depth; k++)
{ {
blockB[count] = b0[k]; blockB[count] = cj(b0[k]);
count += 1; count += 1;
} }
if(PanelMode) count += (stride-offset-depth); if(PanelMode) count += (stride-offset-depth);
@ -838,14 +839,15 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, ColMajor, PanelMode>
}; };
// this version is optimized for row major matrices // this version is optimized for row major matrices
template<typename Scalar, typename Index, int nr, bool PanelMode> template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode> struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
{ {
enum { PacketSize = ei_packet_traits<Scalar>::size }; enum { PacketSize = ei_packet_traits<Scalar>::size };
void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Scalar alpha, Index depth, Index cols, void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Scalar alpha, Index depth, Index cols,
Index stride=0, Index offset=0) Index stride=0, Index offset=0)
{ {
ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
ei_conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
bool hasAlpha = alpha != Scalar(1); bool hasAlpha = alpha != Scalar(1);
Index packet_cols = (cols/nr) * nr; Index packet_cols = (cols/nr) * nr;
Index count = 0; Index count = 0;
@ -858,10 +860,10 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
for(Index k=0; k<depth; k++) for(Index k=0; k<depth; k++)
{ {
const Scalar* b0 = &rhs[k*rhsStride + j2]; const Scalar* b0 = &rhs[k*rhsStride + j2];
blockB[count+0] = alpha*b0[0]; blockB[count+0] = alpha*cj(b0[0]);
blockB[count+1] = alpha*b0[1]; blockB[count+1] = alpha*cj(b0[1]);
if(nr==4) blockB[count+2] = alpha*b0[2]; if(nr==4) blockB[count+2] = alpha*cj(b0[2]);
if(nr==4) blockB[count+3] = alpha*b0[3]; if(nr==4) blockB[count+3] = alpha*cj(b0[3]);
count += nr; count += nr;
} }
} }
@ -870,10 +872,10 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
for(Index k=0; k<depth; k++) for(Index k=0; k<depth; k++)
{ {
const Scalar* b0 = &rhs[k*rhsStride + j2]; const Scalar* b0 = &rhs[k*rhsStride + j2];
blockB[count+0] = b0[0]; blockB[count+0] = cj(b0[0]);
blockB[count+1] = b0[1]; blockB[count+1] = cj(b0[1]);
if(nr==4) blockB[count+2] = b0[2]; if(nr==4) blockB[count+2] = cj(b0[2]);
if(nr==4) blockB[count+3] = b0[3]; if(nr==4) blockB[count+3] = cj(b0[3]);
count += nr; count += nr;
} }
} }
@ -887,7 +889,7 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode>
const Scalar* b0 = &rhs[j2]; const Scalar* b0 = &rhs[j2];
for(Index k=0; k<depth; k++) for(Index k=0; k<depth; k++)
{ {
blockB[count] = alpha*b0[k*rhsStride]; blockB[count] = alpha*cj(b0[k*rhsStride]);
count += 1; count += 1;
} }
if(PanelMode) count += stride-offset-depth; if(PanelMode) count += stride-offset-depth;

View File

@ -73,9 +73,6 @@ static void run(Index rows, Index cols, Index depth,
ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); ei_const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); ei_const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
if (ConjugateRhs)
alpha = ei_conj(alpha);
typedef typename ei_packet_traits<Scalar>::type PacketType; typedef typename ei_packet_traits<Scalar>::type PacketType;
typedef ei_product_blocking_traits<Scalar> Blocking; typedef ei_product_blocking_traits<Scalar> Blocking;
@ -83,9 +80,9 @@ static void run(Index rows, Index cols, Index depth,
Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction
//Index nc = blocking.nc(); // cache block size along the N direction //Index nc = blocking.nc(); // cache block size along the N direction
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder> pack_rhs; ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder, ConjugateLhs> pack_lhs;
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder> pack_lhs; ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder, ConjugateRhs> pack_rhs;
ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> > gebp; ei_gebp_kernel<Scalar, Index, Blocking::mr, Blocking::nr> gebp;
#ifdef EIGEN_HAS_OPENMP #ifdef EIGEN_HAS_OPENMP
if(info) if(info)

View File

@ -29,10 +29,15 @@
// implement and control fast level 2 and level 3 BLAS-like routines. // implement and control fast level 2 and level 3 BLAS-like routines.
// forward declarations // forward declarations
template<typename Scalar, typename Index, int mr, int nr, typename Conj>
// Provides scalar/packet-wise product and product with accumulation
// with optional conjugation of the arguments.
template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
template<typename Scalar, typename Index, int mr, int nr, typename Conj = ei_conj_helper<false,false> >
struct ei_gebp_kernel; struct ei_gebp_kernel;
template<typename Scalar, typename Index, int nr, int StorageOrder, bool PanelMode=false> template<typename Scalar, typename Index, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
struct ei_gemm_pack_rhs; struct ei_gemm_pack_rhs;
template<typename Scalar, typename Index, int mr, int StorageOrder, bool Conjugate = false, bool PanelMode = false> template<typename Scalar, typename Index, int mr, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
@ -53,10 +58,6 @@ template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename Index,
static void ei_cache_friendly_product_rowmajor_times_vector( static void ei_cache_friendly_product_rowmajor_times_vector(
const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha); const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha);
// Provides scalar/packet-wise product and product with accumulation
// with optional conjugation of the arguments.
template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
template<> struct ei_conj_helper<false,false> template<> struct ei_conj_helper<false,false>
{ {
template<typename T> template<typename T>