diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index b414b5c46..94c30616a 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -331,17 +331,15 @@ PartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix)
 namespace internal {
 
 /** \internal This is the blocked version of fullpivlu_unblocked() */
-template<typename Scalar, int StorageOrder, typename PivIndex>
+template<typename Scalar, int StorageOrder, typename PivIndex, int SizeAtCompileTime=Dynamic>
 struct partial_lu_impl
 {
-  // FIXME add a stride to Map, so that the following mapping becomes easier,
-  // another option would be to create an expression being able to automatically
-  // warp any Map, Matrix, and Block expressions as a unique type, but since that's exactly
-  // a Map + stride, why not adding a stride to Map, and convenient ctors from a Matrix,
-  // and Block.
-  typedef Map<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > MapLU;
-  typedef Block<MapLU, Dynamic, Dynamic> MatrixType;
-  typedef Block<MatrixType,Dynamic,Dynamic> BlockType;
+  static const int UnBlockedBound = 16;
+  static const bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound;
+  static const int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic;
+  typedef Matrix<Scalar, ActualSizeAtCompileTime, ActualSizeAtCompileTime, StorageOrder> MatrixType;
+  typedef Ref<MatrixType> MatrixTypeRef;
+  typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > BlockType;
   typedef typename MatrixType::RealScalar RealScalar;
 
   /** \internal performs the LU decomposition in-place of the matrix \a lu
@@ -354,7 +352,7 @@ struct partial_lu_impl
     *
     * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
     */
-  static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
+  static Index unblocked_lu(MatrixTypeRef& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
   {
     typedef scalar_score_coeff_op<Scalar> Scoring;
     typedef typename Scoring::result_type Score;
@@ -417,13 +415,12 @@ struct partial_lu_impl
     */
   static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256)
   {
-    MapLU lu1(lu_data,StorageOrder==RowMajor?rows:luStride,StorageOrder==RowMajor?luStride:cols);
-    MatrixType lu(lu1,0,0,rows,cols);
+    MatrixTypeRef lu = MatrixType::Map(lu_data,rows, cols, OuterStride<>(luStride));
 
     const Index size = (std::min)(rows,cols);
 
     // if the matrix is too small, no blocking:
-    if(size<=16)
+    if(UnBlockedAtCompileTime || size<=UnBlockedBound)
     {
       return unblocked_lu(lu, row_transpositions, nb_transpositions);
     }
@@ -449,12 +446,12 @@ struct partial_lu_impl
       //                          A00 | A01 | A02
       // lu  = A_0 | A_1 | A_2 =  A10 | A11 | A12
       //                          A20 | A21 | A22
-      BlockType A_0(lu,0,0,rows,k);
-      BlockType A_2(lu,0,k+bs,rows,tsize);
-      BlockType A11(lu,k,k,bs,bs);
-      BlockType A12(lu,k,k+bs,bs,tsize);
-      BlockType A21(lu,k+bs,k,trows,bs);
-      BlockType A22(lu,k+bs,k+bs,trows,tsize);
+      BlockType A_0 = lu.block(0,0,rows,k);
+      BlockType A_2 = lu.block(0,k+bs,rows,tsize);
+      BlockType A11 = lu.block(k,k,bs,bs);
+      BlockType A12 = lu.block(k,k+bs,bs,tsize);
+      BlockType A21 = lu.block(k+bs,k,trows,bs);
+      BlockType A22 = lu.block(k+bs,k+bs,trows,tsize);
 
       PivIndex nb_transpositions_in_panel;
       // recursively call the blocked LU algorithm on [A11^T A21^T]^T
@@ -497,7 +494,9 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
   eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1);
 
   partial_lu_impl
-    <typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::StorageIndex>
+    < typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor,
+      typename TranspositionType::StorageIndex,
+      EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime)>
     ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
 }