This commit is contained in:
Benoit Jacob 2015-03-06 19:13:36 -05:00
commit a4f956b1da
9 changed files with 205 additions and 99 deletions

View File

@ -105,7 +105,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
OuterStrideMatch = Derived::IsVectorAtCompileTime OuterStrideMatch = Derived::IsVectorAtCompileTime
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit), AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
}; };
typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type; typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
}; };
@ -184,9 +185,11 @@ protected:
template<typename PlainObjectType, int Options, typename StrideType> class Ref template<typename PlainObjectType, int Options, typename StrideType> class Ref
: public RefBase<Ref<PlainObjectType, Options, StrideType> > : public RefBase<Ref<PlainObjectType, Options, StrideType> >
{ {
private:
typedef internal::traits<Ref> Traits; typedef internal::traits<Ref> Traits;
template<typename Derived> template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr); EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
public: public:
typedef RefBase<Ref> Base; typedef RefBase<Ref> Base;
@ -195,13 +198,15 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename Derived> template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr) EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
{ {
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
Base::construct(expr.derived()); Base::construct(expr.derived());
} }
template<typename Derived> template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr) EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
#else #else
template<typename Derived> template<typename Derived>
inline Ref(DenseBase<Derived>& expr) inline Ref(DenseBase<Derived>& expr)
@ -228,7 +233,8 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
EIGEN_DENSE_PUBLIC_INTERFACE(Ref) EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
template<typename Derived> template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr) EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
{ {
// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n"; // std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; // std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";

View File

@ -213,8 +213,22 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
// Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2. // Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2.
// The second half is implicitly reserved to access the result and lhs coefficients. // The second half is implicitly reserved to access the result and lhs coefficients.
// When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful // When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful
// to limit this growth: we bound nc to growth by a factor x1.5, leading to: // to limit this growth: we bound nc to growth by a factor x1.5.
const Index max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar)); // However, if the entire lhs block fit within L1, then we are not going to block on the rows at all,
// and it becomes fruitful to keep the packed rhs blocks in L1 if there is enough remaining space.
Index max_nc;
const Index lhs_bytes = m * k * sizeof(LhsScalar);
const Index remaining_l1 = l1- k_sub - lhs_bytes;
if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k)
{
// L1 blocking
max_nc = remaining_l1 / (k*sizeof(RhsScalar));
}
else
{
// L2 blocking
max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
}
// WARNING Below, we assume that Traits::nr is a power of two. // WARNING Below, we assume that Traits::nr is a power of two.
Index nc = std::min<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1)); Index nc = std::min<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
if(n>nc) if(n>nc)
@ -230,6 +244,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
{ {
// So far, no blocking at all, i.e., kc==k, and nc==n. // So far, no blocking at all, i.e., kc==k, and nc==n.
// In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2 // In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2
// TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete.
Index problem_size = k*n*sizeof(LhsScalar); Index problem_size = k*n*sizeof(LhsScalar);
Index actual_lm = actual_l2; Index actual_lm = actual_l2;
Index max_mc = m; Index max_mc = m;
@ -952,32 +967,31 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
// Usually, make sense only with FMA // Usually, make sense only with FMA
if(mr>=3*Traits::LhsProgress) if(mr>=3*Traits::LhsProgress)
{ {
#ifdef EIGEN_TEST_SPECIFIC_LOOP_SWAP_CRITERION
const bool swap_loops = EIGEN_TEST_SPECIFIC_LOOP_SWAP_CRITERION;
#else
const bool swap_loops = depth<48;
#endif
Index bound1 = swap_loops ? packet_cols4 : peeled_mc3;
Index bound2 = !swap_loops ? packet_cols4 : peeled_mc3;
Index incr1 = swap_loops ? nr : 3*Traits::LhsProgress;
Index incr2 = !swap_loops ? nr : 3*Traits::LhsProgress;
PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits); PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits);
// loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth) // Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth)
// and on each largest micro vertical panel of rhs (depth * nr) // and on each largest micro vertical panel of the rhs (depth * nr).
for(Index it1=0; it1<bound1; it1+=incr1) // Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
// However, if depth is too small, we can extend the number of rows of these horizontal panels.
// This actual number of rows is computed as follow:
const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function.
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
#else
const Index actual_panel_rows = (3*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) );
#endif
for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
{ {
for(Index it2=0; it2<bound2; it2+=incr2) const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
for(Index j2=0; j2<packet_cols4; j2+=nr)
{
for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
{ {
Index i = swap_loops ? it2 : it1;
Index j2 = !swap_loops ? it2 : it1;
// We select a 3*Traits::LhsProgress x nr micro block of res which is entirely // We selected a 3*Traits::LhsProgress x nr micro block of res which is entirely
// stored into 3 x nr registers. // stored into 3 x nr registers.
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)]; const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
prefetch(&blA[0]); prefetch(&blA[0]);
// gets res block as register // gets res block as register
@ -1114,10 +1128,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
} }
// Deal with remaining columns of the rhs // Deal with remaining columns of the rhs
if(packet_cols4<cols)
for(Index i=0; i<peeled_mc3; i+=3*Traits::LhsProgress)
{
for(Index j2=packet_cols4; j2<cols; j2++) for(Index j2=packet_cols4; j2<cols; j2++)
{
for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
{ {
// One column at a time // One column at a time
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)]; const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
@ -1193,30 +1206,26 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
} }
} }
} }
}
//---------- Process 2 * LhsProgress rows at once ---------- //---------- Process 2 * LhsProgress rows at once ----------
if(mr>=2*Traits::LhsProgress) if(mr>=2*Traits::LhsProgress)
{ {
#ifdef EIGEN_TEST_SPECIFIC_LOOP_SWAP_CRITERION const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function.
const bool swap_loops = (mr<3*Traits::LhsProgress) && (EIGEN_TEST_SPECIFIC_LOOP_SWAP_CRITERION); #ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
#else #else
const bool swap_loops = (mr<3*Traits::LhsProgress) && (depth<48); Index actual_panel_rows = (2*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) );
#endif #endif
Index start1 = swap_loops ? 0 : peeled_mc3; for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
Index start2 = !swap_loops ? 0 : peeled_mc3;
Index bound1 = swap_loops ? packet_cols4 : peeled_mc2;
Index bound2 = !swap_loops ? packet_cols4 : peeled_mc2;
Index incr1 = swap_loops ? nr : 2*Traits::LhsProgress;
Index incr2 = !swap_loops ? nr : 2*Traits::LhsProgress;
for(Index it1=start1; it1<bound1; it1+=incr1)
{ {
for(Index it2=start2; it2<bound2; it2+=incr2) Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
for(Index j2=0; j2<packet_cols4; j2+=nr)
{
for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
{ {
Index i = swap_loops ? it2 : it1;
Index j2 = !swap_loops ? it2 : it1;
// We select a 2*Traits::LhsProgress x nr micro block of res which is entirely // We selected a 2*Traits::LhsProgress x nr micro block of res which is entirely
// stored into 2 x nr registers. // stored into 2 x nr registers.
const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)]; const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
@ -1324,10 +1333,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
} }
// Deal with remaining columns of the rhs // Deal with remaining columns of the rhs
if(packet_cols4<cols)
for(Index i=peeled_mc3; i<peeled_mc2; i+=2*Traits::LhsProgress)
{
for(Index j2=packet_cols4; j2<cols; j2++) for(Index j2=packet_cols4; j2<cols; j2++)
{
for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
{ {
// One column at a time // One column at a time
const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)]; const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
@ -1398,6 +1406,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
} }
} }
} }
}
//---------- Process 1 * LhsProgress rows at once ---------- //---------- Process 1 * LhsProgress rows at once ----------
if(mr>=1*Traits::LhsProgress) if(mr>=1*Traits::LhsProgress)
{ {

View File

@ -457,6 +457,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
{ {
eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols()); eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
return;
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);

View File

@ -203,7 +203,7 @@ void benchmark_t::run()
double starttime = timer.getCpuTime(); double starttime = timer.getCpuTime();
for (int i = 0; i < iters_at_a_time; i++) { for (int i = 0; i < iters_at_a_time; i++) {
dst[matrix_index] = lhs[matrix_index] * rhs[matrix_index]; dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
matrix_index++; matrix_index++;
if (matrix_index == matrix_pool_size) { if (matrix_index == matrix_pool_size) {
matrix_index = 0; matrix_index = 0;

View File

@ -1,39 +1,43 @@
3.0.1 #3.0.1
3.1.1 #3.1.1
3.2.0 #3.2.0
3.2.4 3.2.4
5745:37f59e65eb6c #5745:37f59e65eb6c
5891:d8652709345d 5891:d8652709345d # introduce AVX
5893:24b4dc92c6d3 #5893:24b4dc92c6d3 # merge
5895:997c2ef9fc8b 5895:997c2ef9fc8b # introduce FMA
5904:e1eafd14eaa1 #5904:e1eafd14eaa1 # complex and AVX
5908:f8ee3c721251 5908:f8ee3c721251 # improve packing with ptranspose
5921:ca808bb456b0 #5921:ca808bb456b0 # merge
5927:8b1001f9e3ac #5927:8b1001f9e3ac
5937:5a4ca1ad8c53 5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks
5949:f3488f4e45b2 #5949:f3488f4e45b2 # merge
5969:e09031dccfd9 #5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec
5992:4a429f5e0483 #5992:4a429f5e0483 # merge
before-evaluators before-evaluators
6334:f6a45e5b8b7c #6334:f6a45e5b8b7c # Implement evaluator for sparse outer products
6639:c9121c60b5c7 #6639:c9121c60b5c7
6655:06f163b5221f #6655:06f163b5221f # Properly detect FMA support on ARM
6677:700e023044e7 # FMA has been wrongly disabled #6677:700e023044e7 # FMA has been wrongly disabled
6681:11d31dafb0e3 #6681:11d31dafb0e3
6699:5e6e8e10aad1 # merge default to tensors #6699:5e6e8e10aad1 # merge default to tensors
6726:ff2d2388e7b9 # merge default to tensors #6726:ff2d2388e7b9 # merge default to tensors
6742:0cbd6195e829 # merge default to tensors #6742:0cbd6195e829 # merge default to tensors
6747:853d2bafeb8f # Generalized the gebp apis #6747:853d2bafeb8f # Generalized the gebp apis
6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation 6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation
6781:9cc5a931b2c6 # generalized gemv #6781:9cc5a931b2c6 # generalized gemv
6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product #6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product
6844:039efd86b75c # merge tensor #6844:039efd86b75c # merge tensor
6845:7333ed40c6ef # change prefetching in gebp 6845:7333ed40c6ef # change prefetching in gebp
6856:b5be5e10eb7f # merge index conversion #6856:b5be5e10eb7f # merge index conversion
6893:c3a64aba7c70 # clean blocking size computation #6893:c3a64aba7c70 # clean blocking size computation
6898:6fb31ebe6492 # rotating kernel for ARM #6898:6fb31ebe6492 # rotating kernel for ARM
6899:877facace746 # rotating kernel for ARM only 6899:877facace746 # rotating kernel for ARM only
6904:c250623ae9fa # result_of #6904:c250623ae9fa # result_of
6921:915f1b1fc158 # fix prefetching change for ARM 6921:915f1b1fc158 # fix prefetching change for ARM
6923:9ff25f6dacc6 # prefetching 6923:9ff25f6dacc6 # prefetching
6933:52572e60b5d3 # blocking size strategy 6933:52572e60b5d3 # blocking size strategy
6937:c8c042f286b2 # avoid redundant pack_rhs
6981:7e5d6f78da59 # dynamic loop swapping
6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache
6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1.

View File

@ -6,6 +6,7 @@
# Options: # Options:
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy # -up : enforce the recomputation of existing data, and keep best results as a merging strategy
# -s : recompute selected changesets only and keep bests
if echo "$*" | grep '\-up' > /dev/null; then if echo "$*" | grep '\-up' > /dev/null; then
@ -14,14 +15,30 @@ else
update=false update=false
fi fi
if [ $update == true ]; then if echo "$*" | grep '\-s' > /dev/null; then
selected=true
else
selected=false
fi
global_args="$*"
if [ $selected == true ]; then
echo "Recompute selected changesets only and keep bests"
elif [ $update == true ]; then
echo "(Re-)Compute all changesets and keep bests" echo "(Re-)Compute all changesets and keep bests"
else else
echo "Skip previously computed changesets" echo "Skip previously computed changesets"
fi fi
if [ ! -d "eigen_src" ]; then if [ ! -d "eigen_src" ]; then
hg clone https://bitbucket.org/eigen/eigen eigen_src hg clone https://bitbucket.org/eigen/eigen eigen_src
else
cd eigen_src
hg pull -u
cd ..
fi fi
if [ ! -z '$CXX' ]; then if [ ! -z '$CXX' ]; then
@ -61,17 +78,31 @@ function test_current
scalar=$2 scalar=$2
name=$3 name=$3
prev=""
if [ -e "$name.backup" ]; then
prev=`grep $rev "$name.backup" | cut -c 14-` prev=`grep $rev "$name.backup" | cut -c 14-`
fi
res=$prev res=$prev
count_rev=`echo $prev | wc -w` count_rev=`echo $prev | wc -w`
count_ref=`cat "settings.txt" | wc -l` count_ref=`cat "settings.txt" | wc -l`
if [ $update == true ] || [ $count_rev != $count_ref ]; then if echo "$global_args" | grep "$rev" > /dev/null; then
rev_found=true
else
rev_found=false
fi
# echo $update et $selected et $rev_found because $rev et "$global_args"
# echo $count_rev et $count_ref
if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then
if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src gemm.cpp -DSCALAR=$scalar -o $name; then if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src gemm.cpp -DSCALAR=$scalar -o $name; then
curr=`./$name` curr=`./$name`
echo merge $prev if [ $count_rev == $count_ref ]; then
echo with $curr echo "merge previous $prev"
echo "with new $curr"
else
echo "got $curr"
fi
res=`merge "$curr" "$prev"` res=`merge "$curr" "$prev"`
echo $res # echo $res
echo "$rev $res" >> $name.out echo "$rev $res" >> $name.out
else else
echo "Compilation failed, skip rev $rev" echo "Compilation failed, skip rev $rev"
@ -86,12 +117,12 @@ make_backup $PREFIX"sgemm"
make_backup $PREFIX"dgemm" make_backup $PREFIX"dgemm"
make_backup $PREFIX"cgemm" make_backup $PREFIX"cgemm"
cut -f1 -d"#" < changesets.txt | while read rev cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev
do do
if [ ! -z '$rev' ]; then if [ ! -z '$rev' ]; then
echo "Testing rev $rev" echo "Testing rev $rev"
cd eigen_src cd eigen_src
hg up -C $rev hg up -C $rev > /dev/null
actual_rev=`hg identify | cut -f1 -d' '` actual_rev=`hg identify | cut -f1 -d' '`
cd .. cd ..

View File

@ -1,5 +1,6 @@
8 8 8 8 8 8
9 9 9 9 9 9
24 24 24
239 239 239 239 239 239
240 240 240 240 240 240
2400 24 24 2400 24 24
@ -8,4 +9,7 @@
24 2400 2400 24 2400 2400
2400 24 2400 2400 24 2400
2400 2400 24 2400 2400 24
2400 2400 64
4800 23 160
23 4800 160
2400 2400 2400 2400 2400 2400

View File

@ -110,7 +110,32 @@ void mat_mat_scalar_scalar_product()
VERIFY_IS_APPROX(dNdxy.transpose()*dNdxy*det*wt, det*wt*dNdxy.transpose()*dNdxy); VERIFY_IS_APPROX(dNdxy.transpose()*dNdxy*det*wt, det*wt*dNdxy.transpose()*dNdxy);
} }
void zero_sized_objects() template <typename MatrixType>
void zero_sized_objects(const MatrixType& m)
{
Index rows = m.rows();
Index cols = m.cols();
{
MatrixType res, a(rows,0), b(0,cols);
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(rows,cols) );
VERIFY_IS_APPROX( (res=a*a.transpose()), MatrixType::Zero(rows,rows) );
VERIFY_IS_APPROX( (res=b.transpose()*b), MatrixType::Zero(cols,cols) );
VERIFY_IS_APPROX( (res=b.transpose()*a.transpose()), MatrixType::Zero(cols,rows) );
}
{
MatrixType res, a(rows,cols), b(cols,0);
res = a*b;
VERIFY(res.rows()==rows && res.cols()==0);
b.resize(0,rows);
res = b*a;
VERIFY(res.rows()==0 && res.cols()==cols);
}
}
void bug_127()
{ {
// Bug 127 // Bug 127
// //
@ -171,7 +196,8 @@ void test_product_extra()
CALL_SUBTEST_2( mat_mat_scalar_scalar_product() ); CALL_SUBTEST_2( mat_mat_scalar_scalar_product() );
CALL_SUBTEST_3( product_extra(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) ); CALL_SUBTEST_3( product_extra(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) ); CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
CALL_SUBTEST_1( zero_sized_objects(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
} }
CALL_SUBTEST_5( zero_sized_objects() ); CALL_SUBTEST_5( bug_127() );
CALL_SUBTEST_6( unaligned_objects() ); CALL_SUBTEST_6( unaligned_objects() );
} }

View File

@ -228,6 +228,28 @@ void call_ref()
VERIFY_EVALUATION_COUNT( call_ref_7(c,c), 0); VERIFY_EVALUATION_COUNT( call_ref_7(c,c), 0);
} }
typedef Matrix<double,Dynamic,Dynamic,RowMajor> RowMatrixXd;
int test_ref_overload_fun1(Ref<MatrixXd> ) { return 1; }
int test_ref_overload_fun1(Ref<RowMatrixXd> ) { return 2; }
int test_ref_overload_fun1(Ref<MatrixXf> ) { return 3; }
int test_ref_overload_fun2(Ref<const MatrixXd> ) { return 4; }
int test_ref_overload_fun2(Ref<const MatrixXf> ) { return 5; }
// See also bug 969
void test_ref_overloads()
{
MatrixXd Ad, Bd;
RowMatrixXd rAd, rBd;
VERIFY( test_ref_overload_fun1(Ad)==1 );
VERIFY( test_ref_overload_fun1(rAd)==2 );
MatrixXf Af, Bf;
VERIFY( test_ref_overload_fun2(Ad)==4 );
VERIFY( test_ref_overload_fun2(Ad+Bd)==4 );
VERIFY( test_ref_overload_fun2(Af+Bf)==5 );
}
void test_ref() void test_ref()
{ {
for(int i = 0; i < g_repeat; i++) { for(int i = 0; i < g_repeat; i++) {
@ -248,4 +270,6 @@ void test_ref()
CALL_SUBTEST_5( ref_matrix(MatrixXi(internal::random<int>(1,10),internal::random<int>(1,10))) ); CALL_SUBTEST_5( ref_matrix(MatrixXi(internal::random<int>(1,10),internal::random<int>(1,10))) );
CALL_SUBTEST_6( call_ref() ); CALL_SUBTEST_6( call_ref() );
} }
CALL_SUBTEST_7( test_ref_overloads() );
} }