From 46e9cdb7fea25d7f7aef4332b9c3ead3857e213d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20S=C3=A1nchez?= Date: Tue, 5 Dec 2023 21:22:55 +0000 Subject: [PATCH] Clang-format tests, examples, libraries, benchmarks, etc. --- bench/BenchSparseUtil.h | 98 +- bench/BenchTimer.h | 151 +- bench/BenchUtil.h | 72 +- bench/analyze-blocking-sizes.cpp | 294 +-- bench/basicbenchmark.cpp | 33 +- bench/basicbenchmark.h | 45 +- bench/benchBlasGemm.cpp | 162 +- bench/benchCholesky.cpp | 107 +- bench/benchEigenSolver.cpp | 160 +- bench/benchFFT.cpp | 134 +- bench/benchGeometry.cpp | 178 +- bench/benchVecAdd.cpp | 222 ++- bench/bench_gemm.cpp | 384 ++-- bench/bench_move_semantics.cpp | 21 +- bench/bench_norm.cpp | 326 ++-- bench/bench_reverse.cpp | 68 +- bench/bench_sum.cpp | 8 +- bench/benchmark-blocking-sizes.cpp | 206 +- bench/benchmark.cpp | 29 +- bench/benchmarkSlice.cpp | 22 +- bench/benchmarkX.cpp | 26 +- bench/benchmarkXcwise.cpp | 25 +- bench/btl/actions/action_aat_product.hh | 89 +- bench/btl/actions/action_ata_product.hh | 89 +- bench/btl/actions/action_atv_product.hh | 76 +- bench/btl/actions/action_axpby.hh | 67 +- bench/btl/actions/action_axpy.hh | 71 +- bench/btl/actions/action_cholesky.hh | 82 +- bench/btl/actions/action_ger.hh | 82 +- bench/btl/actions/action_hessenberg.hh | 177 +- bench/btl/actions/action_lu_decomp.hh | 78 +- bench/btl/actions/action_lu_solve.hh | 96 +- .../actions/action_matrix_matrix_product.hh | 100 +- .../action_matrix_matrix_product_bis.hh | 89 +- .../actions/action_matrix_vector_product.hh | 94 +- bench/btl/actions/action_partial_lu.hh | 79 +- bench/btl/actions/action_rot.hh | 72 +- bench/btl/actions/action_symv.hh | 90 +- bench/btl/actions/action_syr2.hh | 87 +- bench/btl/actions/action_trisolve.hh | 82 +- bench/btl/actions/action_trisolve_matrix.hh | 120 +- bench/btl/actions/action_trmm.hh | 120 +- bench/btl/actions/basic_actions.hh | 1 - bench/btl/data/mean.cxx | 183 +- bench/btl/data/regularize.cxx | 100 +- bench/btl/data/smooth.cxx | 171 +- bench/btl/generic_bench/bench.hh | 91 +- bench/btl/generic_bench/btl.hh | 243 +-- bench/btl/generic_bench/init/init_function.hh | 39 +- bench/btl/generic_bench/init/init_matrix.hh | 33 +- bench/btl/generic_bench/init/init_vector.hh | 9 +- .../btl/generic_bench/static/bench_static.hh | 41 +- .../static/intel_bench_fixed_size.hh | 56 +- .../static/static_size_generator.hh | 37 +- .../generic_bench/timers/STL_perf_analyzer.hh | 56 +- bench/btl/generic_bench/timers/STL_timer.hh | 33 +- .../timers/mixed_perf_analyzer.hh | 55 +- .../timers/portable_perf_analyzer.hh | 58 +- .../timers/portable_perf_analyzer_old.hh | 96 +- .../generic_bench/timers/portable_timer.hh | 130 +- .../generic_bench/timers/x86_perf_analyzer.hh | 67 +- bench/btl/generic_bench/timers/x86_timer.hh | 212 +-- bench/btl/generic_bench/utils/size_lin_log.hh | 58 +- bench/btl/generic_bench/utils/size_log.hh | 34 +- bench/btl/generic_bench/utils/utilities.h | 150 +- bench/btl/generic_bench/utils/xy_file.hh | 44 +- bench/btl/libs/BLAS/blas.h | 916 ++++----- bench/btl/libs/BLAS/blas_interface.hh | 44 +- bench/btl/libs/BLAS/blas_interface_impl.hh | 126 +- bench/btl/libs/BLAS/c_interface_base.h | 62 +- bench/btl/libs/BLAS/main.cpp | 50 +- bench/btl/libs/STL/STL_interface.hh | 239 +-- bench/btl/libs/STL/main.cpp | 23 +- bench/btl/libs/blaze/blaze_interface.hh | 123 +- bench/btl/libs/blaze/main.cpp | 20 +- .../libs/blitz/blitz_LU_solve_interface.hh | 204 +- bench/btl/libs/blitz/blitz_interface.hh | 106 +- bench/btl/libs/blitz/btl_blitz.cpp | 20 +- bench/btl/libs/blitz/btl_tiny_blitz.cpp | 11 +- bench/btl/libs/blitz/tiny_blitz_interface.hh | 73 +- bench/btl/libs/eigen2/btl_tiny_eigen2.cpp | 18 +- bench/btl/libs/eigen2/eigen2_interface.hh | 141 +- bench/btl/libs/eigen2/main_adv.cpp | 19 +- bench/btl/libs/eigen2/main_linear.cpp | 10 +- bench/btl/libs/eigen2/main_matmat.cpp | 13 +- bench/btl/libs/eigen2/main_vecmat.cpp | 15 +- bench/btl/libs/eigen3/btl_tiny_eigen3.cpp | 18 +- bench/btl/libs/eigen3/eigen3_interface.hh | 182 +- bench/btl/libs/eigen3/main_adv.cpp | 19 +- bench/btl/libs/eigen3/main_linear.cpp | 12 +- bench/btl/libs/eigen3/main_matmat.cpp | 13 +- bench/btl/libs/eigen3/main_vecmat.cpp | 15 +- bench/btl/libs/gmm/gmm_LU_solve_interface.hh | 204 +- bench/btl/libs/gmm/gmm_interface.hh | 119 +- bench/btl/libs/gmm/main.cpp | 30 +- bench/btl/libs/mtl4/main.cpp | 26 +- .../btl/libs/mtl4/mtl4_LU_solve_interface.hh | 204 +- bench/btl/libs/mtl4/mtl4_interface.hh | 118 +- bench/btl/libs/tensors/main_linear.cpp | 7 +- bench/btl/libs/tensors/main_matmat.cpp | 5 +- bench/btl/libs/tensors/main_vecmat.cpp | 5 +- bench/btl/libs/tensors/tensor_interface.hh | 73 +- bench/btl/libs/tvmet/main.cpp | 13 +- bench/btl/libs/tvmet/tvmet_interface.hh | 76 +- bench/btl/libs/ublas/main.cpp | 21 +- bench/btl/libs/ublas/ublas_interface.hh | 104 +- bench/check_cache_queries.cpp | 66 +- bench/dense_solvers.cpp | 182 +- bench/eig33.cpp | 164 +- bench/geometry.cpp | 110 +- bench/perf_monitoring/gemm.cpp | 11 +- bench/perf_monitoring/gemm_common.h | 56 +- bench/perf_monitoring/gemv.cpp | 11 +- bench/perf_monitoring/gemv_common.h | 38 +- bench/perf_monitoring/gemvt.cpp | 11 +- bench/perf_monitoring/lazy_gemm.cpp | 124 +- bench/perf_monitoring/llt.cpp | 9 +- bench/perf_monitoring/trmv_lo.cpp | 11 +- bench/perf_monitoring/trmv_lot.cpp | 9 +- bench/perf_monitoring/trmv_up.cpp | 11 +- bench/perf_monitoring/trmv_upt.cpp | 9 +- bench/product_threshold.cpp | 193 +- bench/quat_slerp.cpp | 229 +-- bench/quatmul.cpp | 41 +- bench/sparse_cholesky.cpp | 115 +- bench/sparse_dense_product.cpp | 172 +- bench/sparse_lu.cpp | 81 +- bench/sparse_product.cpp | 269 ++- bench/sparse_randomsetter.cpp | 119 +- bench/sparse_setter.cpp | 501 +++-- bench/sparse_transpose.cpp | 80 +- bench/sparse_trisolver.cpp | 165 +- bench/spbench/sp_solver.cpp | 132 +- bench/spbench/spbenchsolver.cpp | 106 +- bench/spbench/spbenchsolver.h | 690 ++++--- bench/spbench/spbenchstyle.h | 14 +- bench/spbench/test_sparseLU.cpp | 91 +- bench/spmv.cpp | 167 +- bench/tensors/benchmark.h | 12 +- bench/tensors/benchmark_main.cc | 52 +- bench/tensors/contraction_benchmarks_cpu.cc | 8 +- bench/tensors/tensor_benchmarks.h | 183 +- bench/tensors/tensor_benchmarks_cpu.cc | 23 +- bench/tensors/tensor_benchmarks_fp16_gpu.cu | 59 +- bench/tensors/tensor_benchmarks_gpu.cu | 57 +- bench/tensors/tensor_benchmarks_sycl.cc | 1 - bench/tensors/tensor_contract_sycl_bench.cc | 99 +- bench/vdw_new.cpp | 22 +- blas/BandTriangularSolver.h | 123 +- blas/GeneralRank1Update.h | 32 +- blas/PackedSelfadjointProduct.h | 41 +- blas/PackedTriangularMatrixVector.h | 53 +- blas/PackedTriangularSolverVector.h | 90 +- blas/Rank2Update.h | 51 +- blas/blas.h | 721 +++---- blas/common.h | 152 +- blas/complex_double.cpp | 4 +- blas/complex_single.cpp | 4 +- blas/double.cpp | 33 +- blas/f2c/datatypes.h | 18 +- blas/level1_cplx_impl.h | 169 +- blas/level1_impl.h | 157 +- blas/level1_real_impl.h | 118 +- blas/level2_cplx_impl.h | 459 ++--- blas/level2_impl.h | 733 +++---- blas/level2_real_impl.h | 375 ++-- blas/level3_impl.h | 1155 ++++++----- blas/single.cpp | 9 +- blas/xerbla.cpp | 10 +- demos/mandelbrot/mandelbrot.cpp | 187 +- demos/mandelbrot/mandelbrot.h | 84 +- demos/mix_eigen_and_c/binary_library.cpp | 150 +- demos/mix_eigen_and_c/binary_library.h | 84 +- demos/mix_eigen_and_c/example.c | 20 +- demos/opengl/camera.cpp | 327 ++-- demos/opengl/camera.h | 169 +- demos/opengl/gpuhelper.cpp | 185 +- demos/opengl/gpuhelper.h | 260 ++- demos/opengl/icosphere.cpp | 109 +- demos/opengl/icosphere.h | 26 +- demos/opengl/quaternion_demo.cpp | 542 +++--- demos/opengl/quaternion_demo.h | 124 +- demos/opengl/trackball.cpp | 34 +- demos/opengl/trackball.h | 35 +- doc/examples/CustomizingEigen_Inheritance.cpp | 31 +- doc/examples/Cwise_erf.cpp | 5 +- doc/examples/Cwise_erfc.cpp | 5 +- doc/examples/Cwise_lgamma.cpp | 5 +- doc/examples/DenseBase_middleCols_int.cpp | 15 +- doc/examples/DenseBase_middleRows_int.cpp | 15 +- .../DenseBase_template_int_middleCols.cpp | 15 +- .../DenseBase_template_int_middleRows.cpp | 15 +- doc/examples/QuickStart_example.cpp | 13 +- doc/examples/QuickStart_example2_dynamic.cpp | 7 +- doc/examples/QuickStart_example2_fixed.cpp | 7 +- doc/examples/TemplateKeyword_flexible.cpp | 12 +- doc/examples/TemplateKeyword_simple.cpp | 10 +- doc/examples/TutorialInplaceLU.cpp | 18 +- doc/examples/TutorialLinAlgComputeTwice.cpp | 31 +- .../TutorialLinAlgExComputeSolveError.cpp | 13 +- ...torialLinAlgExSolveColPivHouseholderQR.cpp | 19 +- doc/examples/TutorialLinAlgExSolveLDLT.cpp | 17 +- .../TutorialLinAlgInverseDeterminant.cpp | 15 +- doc/examples/TutorialLinAlgRankRevealing.cpp | 23 +- doc/examples/TutorialLinAlgSVDSolve.cpp | 15 +- .../TutorialLinAlgSelfAdjointEigenSolver.cpp | 21 +- doc/examples/TutorialLinAlgSetThreshold.cpp | 16 +- .../Tutorial_ArrayClass_accessors.cpp | 22 +- doc/examples/Tutorial_ArrayClass_addition.cpp | 17 +- .../Tutorial_ArrayClass_cwise_other.cpp | 15 +- doc/examples/Tutorial_ArrayClass_interop.cpp | 17 +- .../Tutorial_ArrayClass_interop_matrix.cpp | 15 +- doc/examples/Tutorial_ArrayClass_mult.cpp | 13 +- ...orial_BlockOperations_block_assignment.cpp | 10 +- .../Tutorial_BlockOperations_colrow.cpp | 9 +- .../Tutorial_BlockOperations_corner.cpp | 10 +- .../Tutorial_BlockOperations_print_block.cpp | 17 +- .../Tutorial_BlockOperations_vector.cpp | 5 +- doc/examples/Tutorial_PartialLU_solve.cpp | 19 +- ...ionsVisitorsBroadcasting_broadcast_1nn.cpp | 15 +- ...sVisitorsBroadcasting_broadcast_simple.cpp | 21 +- ...sBroadcasting_broadcast_simple_rowwise.cpp | 20 +- ...ReductionsVisitorsBroadcasting_colwise.cpp | 13 +- ...ReductionsVisitorsBroadcasting_maxnorm.cpp | 16 +- ...nsVisitorsBroadcasting_reductions_bool.cpp | 10 +- ...nsVisitorsBroadcasting_reductions_norm.cpp | 15 +- ...rsBroadcasting_reductions_operatornorm.cpp | 12 +- ...ReductionsVisitorsBroadcasting_rowwise.cpp | 13 +- ...eductionsVisitorsBroadcasting_visitors.cpp | 20 +- .../Tutorial_simple_example_dynamic_size.cpp | 23 +- .../Tutorial_simple_example_fixed_size.cpp | 6 +- doc/examples/class_Block.cpp | 19 +- doc/examples/class_CwiseBinaryOp.cpp | 8 +- doc/examples/class_CwiseUnaryOp.cpp | 11 +- doc/examples/class_CwiseUnaryOp_ptrfun.cpp | 8 +- doc/examples/class_FixedBlock.cpp | 19 +- doc/examples/class_FixedReshaped.cpp | 12 +- doc/examples/class_FixedVectorBlock.cpp | 22 +- doc/examples/class_Reshaped.cpp | 13 +- doc/examples/class_VectorBlock.cpp | 26 +- doc/examples/function_taking_eigenbase.cpp | 17 +- doc/examples/function_taking_ref.cpp | 14 +- doc/examples/make_circulant.cpp.entry | 3 +- doc/examples/make_circulant.cpp.evaluator | 46 +- doc/examples/make_circulant.cpp.expression | 14 +- doc/examples/make_circulant.cpp.main | 3 +- doc/examples/make_circulant.cpp.preamble | 3 +- doc/examples/make_circulant.cpp.traits | 35 +- doc/examples/make_circulant2.cpp | 28 +- doc/examples/matrixfree_cg.cpp | 86 +- doc/examples/nullary_indexing.cpp | 54 +- doc/examples/tut_arithmetic_add_sub.cpp | 15 +- doc/examples/tut_arithmetic_dot_cross.cpp | 15 +- doc/examples/tut_arithmetic_matrix_mul.cpp | 20 +- doc/examples/tut_arithmetic_redux_basic.cpp | 18 +- .../tut_arithmetic_scalar_mul_div.cpp | 8 +- .../tut_matrix_coefficient_accessors.cpp | 13 +- doc/examples/tut_matrix_resize.cpp | 13 +- doc/examples/tut_matrix_resize_fixed_size.cpp | 8 +- doc/snippets/AngleAxis_mimic_euler.cpp | 5 +- .../Array_initializer_list_23_cxx11.cpp | 5 +- .../Array_initializer_list_vector_cxx11.cpp | 2 +- doc/snippets/Array_variadic_ctor_cxx11.cpp | 2 +- doc/snippets/BiCGSTAB_simple.cpp | 22 +- doc/snippets/BiCGSTAB_step_by_step.cpp | 28 +- doc/snippets/ColPivHouseholderQR_solve.cpp | 2 +- doc/snippets/ComplexEigenSolver_compute.cpp | 2 +- .../ComplexEigenSolver_eigenvalues.cpp | 5 +- .../ComplexEigenSolver_eigenvectors.cpp | 5 +- doc/snippets/ComplexSchur_compute.cpp | 2 +- doc/snippets/ComplexSchur_matrixT.cpp | 4 +- doc/snippets/ComplexSchur_matrixU.cpp | 2 +- doc/snippets/Cwise_abs.cpp | 2 +- doc/snippets/Cwise_abs2.cpp | 2 +- doc/snippets/Cwise_acos.cpp | 2 +- doc/snippets/Cwise_array_atan2_array.cpp | 7 +- doc/snippets/Cwise_array_power_array.cpp | 7 +- doc/snippets/Cwise_asin.cpp | 2 +- doc/snippets/Cwise_atan.cpp | 2 +- doc/snippets/Cwise_boolean_and.cpp | 4 +- doc/snippets/Cwise_boolean_not.cpp | 4 +- doc/snippets/Cwise_boolean_or.cpp | 4 +- doc/snippets/Cwise_cbrt.cpp | 2 +- doc/snippets/Cwise_ceil.cpp | 2 +- doc/snippets/Cwise_cos.cpp | 2 +- doc/snippets/Cwise_cosh.cpp | 2 +- doc/snippets/Cwise_cube.cpp | 2 +- doc/snippets/Cwise_equal_equal.cpp | 4 +- doc/snippets/Cwise_exp.cpp | 2 +- doc/snippets/Cwise_floor.cpp | 2 +- doc/snippets/Cwise_greater.cpp | 4 +- doc/snippets/Cwise_greater_equal.cpp | 4 +- doc/snippets/Cwise_inverse.cpp | 2 +- doc/snippets/Cwise_isFinite.cpp | 4 +- doc/snippets/Cwise_isInf.cpp | 4 +- doc/snippets/Cwise_isNaN.cpp | 4 +- doc/snippets/Cwise_less.cpp | 4 +- doc/snippets/Cwise_less_equal.cpp | 4 +- doc/snippets/Cwise_log.cpp | 2 +- doc/snippets/Cwise_log10.cpp | 2 +- doc/snippets/Cwise_max.cpp | 2 +- doc/snippets/Cwise_min.cpp | 2 +- doc/snippets/Cwise_minus.cpp | 4 +- doc/snippets/Cwise_minus_equal.cpp | 2 +- doc/snippets/Cwise_not_equal.cpp | 4 +- doc/snippets/Cwise_plus.cpp | 4 +- doc/snippets/Cwise_plus_equal.cpp | 2 +- doc/snippets/Cwise_pow.cpp | 2 +- doc/snippets/Cwise_product.cpp | 1 - doc/snippets/Cwise_quotient.cpp | 4 +- doc/snippets/Cwise_rint.cpp | 2 +- doc/snippets/Cwise_round.cpp | 2 +- doc/snippets/Cwise_scalar_power_array.cpp | 4 +- doc/snippets/Cwise_sign.cpp | 2 +- doc/snippets/Cwise_sin.cpp | 2 +- doc/snippets/Cwise_sinh.cpp | 2 +- doc/snippets/Cwise_slash_equal.cpp | 2 +- doc/snippets/Cwise_sqrt.cpp | 2 +- doc/snippets/Cwise_square.cpp | 2 +- doc/snippets/Cwise_tan.cpp | 2 +- doc/snippets/Cwise_tanh.cpp | 2 +- doc/snippets/Cwise_times_equal.cpp | 2 +- doc/snippets/DenseBase_LinSpaced.cpp | 4 +- doc/snippets/DenseBase_LinSpacedInt.cpp | 12 +- .../DenseBase_LinSpaced_seq_deprecated.cpp | 4 +- doc/snippets/DenseBase_setLinSpaced.cpp | 2 +- doc/snippets/DirectionWise_hnormalized.cpp | 6 +- doc/snippets/DirectionWise_replicate.cpp | 2 +- .../EigenSolver_EigenSolver_MatrixType.cpp | 2 +- doc/snippets/EigenSolver_compute.cpp | 4 +- doc/snippets/EigenSolver_eigenvalues.cpp | 5 +- doc/snippets/EigenSolver_eigenvectors.cpp | 5 +- .../EigenSolver_pseudoEigenvectors.cpp | 2 +- doc/snippets/FullPivHouseholderQR_solve.cpp | 2 +- doc/snippets/FullPivLU_image.cpp | 8 +- doc/snippets/FullPivLU_kernel.cpp | 8 +- doc/snippets/FullPivLU_solve.cpp | 10 +- doc/snippets/GeneralizedEigenSolver.cpp | 4 +- .../HessenbergDecomposition_compute.cpp | 4 +- .../HessenbergDecomposition_matrixH.cpp | 2 +- .../HessenbergDecomposition_packedMatrix.cpp | 5 +- doc/snippets/HouseholderQR_householderQ.cpp | 2 +- doc/snippets/HouseholderQR_solve.cpp | 4 +- ...ouseholderSequence_HouseholderSequence.cpp | 6 +- doc/snippets/JacobiSVD_basic.cpp | 2 +- doc/snippets/LLT_example.cpp | 8 +- doc/snippets/LLT_solve.cpp | 9 +- doc/snippets/LeastSquaresNormalEquations.cpp | 3 +- doc/snippets/LeastSquaresQR.cpp | 3 +- doc/snippets/Map_general_stride.cpp | 6 +- doc/snippets/Map_inner_stride.cpp | 5 +- doc/snippets/Map_outer_stride.cpp | 2 +- doc/snippets/Map_placement_new.cpp | 6 +- doc/snippets/Map_simple.cpp | 2 +- doc/snippets/MatrixBase_all.cpp | 8 +- doc/snippets/MatrixBase_applyOnTheLeft.cpp | 8 +- doc/snippets/MatrixBase_applyOnTheRight.cpp | 6 +- doc/snippets/MatrixBase_array.cpp | 2 +- doc/snippets/MatrixBase_array_const.cpp | 4 +- doc/snippets/MatrixBase_asDiagonal.cpp | 2 +- doc/snippets/MatrixBase_block_int_int.cpp | 4 +- doc/snippets/MatrixBase_col.cpp | 2 +- doc/snippets/MatrixBase_colwise.cpp | 3 +- .../MatrixBase_colwise_iterator_cxx11.cpp | 7 +- ...trixBase_computeInverseAndDetWithCheck.cpp | 7 +- .../MatrixBase_computeInverseWithCheck.cpp | 7 +- doc/snippets/MatrixBase_cwiseAbs.cpp | 5 +- doc/snippets/MatrixBase_cwiseAbs2.cpp | 5 +- doc/snippets/MatrixBase_cwiseEqual.cpp | 9 +- doc/snippets/MatrixBase_cwiseInverse.cpp | 5 +- doc/snippets/MatrixBase_cwiseMax.cpp | 2 +- doc/snippets/MatrixBase_cwiseMin.cpp | 2 +- doc/snippets/MatrixBase_cwiseNotEqual.cpp | 9 +- doc/snippets/MatrixBase_cwiseProduct.cpp | 1 - doc/snippets/MatrixBase_cwiseQuotient.cpp | 2 +- doc/snippets/MatrixBase_cwiseSign.cpp | 5 +- doc/snippets/MatrixBase_cwiseSqrt.cpp | 2 +- doc/snippets/MatrixBase_diagonal.cpp | 3 +- doc/snippets/MatrixBase_eigenvalues.cpp | 2 +- .../MatrixBase_fixedBlock_int_int.cpp | 2 +- doc/snippets/MatrixBase_hnormalized.cpp | 4 +- doc/snippets/MatrixBase_homogeneous.cpp | 3 +- doc/snippets/MatrixBase_isDiagonal.cpp | 3 +- doc/snippets/MatrixBase_isIdentity.cpp | 2 +- doc/snippets/MatrixBase_isOnes.cpp | 2 +- doc/snippets/MatrixBase_isOrthogonal.cpp | 6 +- doc/snippets/MatrixBase_isUnitary.cpp | 2 +- doc/snippets/MatrixBase_isZero.cpp | 2 +- doc/snippets/MatrixBase_noalias.cpp | 6 +- doc/snippets/MatrixBase_ones_int_int.cpp | 2 +- doc/snippets/MatrixBase_operatorNorm.cpp | 5 +- doc/snippets/MatrixBase_random_int_int.cpp | 2 +- doc/snippets/MatrixBase_replicate.cpp | 4 +- doc/snippets/MatrixBase_replicate_int_int.cpp | 2 +- doc/snippets/MatrixBase_reshaped_fixed.cpp | 2 +- doc/snippets/MatrixBase_reverse.cpp | 7 +- doc/snippets/MatrixBase_row.cpp | 2 +- doc/snippets/MatrixBase_rowwise.cpp | 3 +- doc/snippets/MatrixBase_select.cpp | 4 +- doc/snippets/MatrixBase_set.cpp | 9 +- doc/snippets/MatrixBase_setIdentity.cpp | 2 +- ...Base_template_int_int_bottomLeftCorner.cpp | 4 +- ...plate_int_int_bottomLeftCorner_int_int.cpp | 4 +- ...ase_template_int_int_bottomRightCorner.cpp | 4 +- ...late_int_int_bottomRightCorner_int_int.cpp | 4 +- ...rixBase_template_int_int_topLeftCorner.cpp | 4 +- ...template_int_int_topLeftCorner_int_int.cpp | 4 +- ...ixBase_template_int_int_topRightCorner.cpp | 4 +- ...emplate_int_int_topRightCorner_int_int.cpp | 4 +- doc/snippets/MatrixBase_transpose.cpp | 5 +- doc/snippets/MatrixBase_zero_int_int.cpp | 2 +- doc/snippets/Matrix_Map_stride.cpp | 7 +- .../Matrix_initializer_list_23_cxx11.cpp | 5 +- .../Matrix_initializer_list_vector_cxx11.cpp | 2 +- doc/snippets/Matrix_resize_NoChange_int.cpp | 2 +- doc/snippets/Matrix_resize_int.cpp | 2 +- doc/snippets/Matrix_resize_int_NoChange.cpp | 2 +- doc/snippets/Matrix_resize_int_int.cpp | 8 +- doc/snippets/Matrix_variadic_ctor_cxx11.cpp | 2 +- doc/snippets/PartialPivLU_solve.cpp | 6 +- doc/snippets/RealQZ_compute.cpp | 32 +- .../RealSchur_RealSchur_MatrixType.cpp | 2 +- doc/snippets/RealSchur_compute.cpp | 2 +- ...ointEigenSolver_SelfAdjointEigenSolver.cpp | 4 +- ...lver_SelfAdjointEigenSolver_MatrixType.cpp | 2 +- ...ver_SelfAdjointEigenSolver_MatrixType2.cpp | 6 +- ...fAdjointEigenSolver_compute_MatrixType.cpp | 4 +- ...AdjointEigenSolver_compute_MatrixType2.cpp | 8 +- .../SelfAdjointEigenSolver_eigenvalues.cpp | 5 +- .../SelfAdjointEigenSolver_eigenvectors.cpp | 5 +- ...AdjointEigenSolver_operatorInverseSqrt.cpp | 2 +- .../SelfAdjointEigenSolver_operatorSqrt.cpp | 4 +- doc/snippets/SelfAdjointView_eigenvalues.cpp | 2 +- doc/snippets/SelfAdjointView_operatorNorm.cpp | 5 +- doc/snippets/Slicing_arrayexpr.cpp | 7 +- doc/snippets/Slicing_custom_padding_cxx11.cpp | 8 +- doc/snippets/Slicing_rawarray_cxx11.cpp | 4 +- doc/snippets/Slicing_stdvector_cxx11.cpp | 6 +- doc/snippets/SparseMatrix_coeffs.cpp | 8 +- doc/snippets/TopicAliasing_block.cpp | 6 +- doc/snippets/TopicAliasing_block_correct.cpp | 6 +- doc/snippets/TopicAliasing_cwise.cpp | 12 +- doc/snippets/TopicAliasing_mult1.cpp | 4 +- doc/snippets/TopicAliasing_mult2.cpp | 4 +- doc/snippets/TopicAliasing_mult3.cpp | 4 +- doc/snippets/TopicAliasing_mult4.cpp | 4 +- doc/snippets/TopicAliasing_mult5.cpp | 4 +- doc/snippets/TopicStorageOrders_example.cpp | 13 +- doc/snippets/Triangular_solve.cpp | 3 +- ...lization_Tridiagonalization_MatrixType.cpp | 2 +- doc/snippets/Tridiagonalization_compute.cpp | 4 +- .../Tridiagonalization_decomposeInPlace.cpp | 2 +- doc/snippets/Tridiagonalization_diagonal.cpp | 4 +- ...iagonalization_householderCoefficients.cpp | 2 +- .../Tridiagonalization_packedMatrix.cpp | 5 +- .../Tutorial_AdvancedInitialization_Block.cpp | 2 +- ..._AdvancedInitialization_CommaTemporary.cpp | 2 +- ...orial_AdvancedInitialization_ThreeWays.cpp | 21 +- .../Tutorial_AdvancedInitialization_Zero.cpp | 2 - doc/snippets/Tutorial_Map_rowmajor.cpp | 10 +- doc/snippets/Tutorial_Map_using.cpp | 21 +- doc/snippets/Tutorial_ReshapeMat2Mat.cpp | 7 +- doc/snippets/Tutorial_ReshapeMat2Vec.cpp | 8 +- doc/snippets/Tutorial_SlicingCol.cpp | 10 +- doc/snippets/Tutorial_SlicingVec.cpp | 4 +- doc/snippets/Tutorial_commainit_01.cpp | 4 +- doc/snippets/Tutorial_commainit_01b.cpp | 4 +- doc/snippets/Tutorial_commainit_02.cpp | 8 +- .../Tutorial_range_for_loop_1d_cxx11.cpp | 2 +- .../Tutorial_range_for_loop_2d_cxx11.cpp | 3 +- .../Tutorial_reshaped_vs_resize_1.cpp | 2 +- .../Tutorial_reshaped_vs_resize_2.cpp | 4 +- .../Tutorial_solve_matrix_inverse.cpp | 2 +- doc/snippets/Tutorial_solve_multiple_rhs.cpp | 10 +- .../Tutorial_solve_reuse_decomposition.cpp | 10 +- doc/snippets/Tutorial_solve_singular.cpp | 2 +- doc/snippets/Tutorial_solve_triangular.cpp | 2 +- .../Tutorial_solve_triangular_inplace.cpp | 2 +- doc/snippets/Tutorial_std_sort_rows_cxx11.cpp | 5 +- doc/snippets/VectorwiseOp_homogeneous.cpp | 6 +- doc/snippets/Vectorwise_reverse.cpp | 7 +- doc/snippets/class_FullPivLU.cpp | 5 +- doc/snippets/compile_snippet.cpp.in | 11 +- doc/snippets/tut_arithmetic_redux_minmax.cpp | 20 +- .../tut_arithmetic_transpose_aliasing.cpp | 5 +- .../tut_arithmetic_transpose_conjugate.cpp | 6 +- .../tut_arithmetic_transpose_inplace.cpp | 4 +- .../tut_matrix_assignment_resizing.cpp | 4 +- .../Tutorial_sparse_example.cpp | 20 +- .../Tutorial_sparse_example_details.cpp | 50 +- doc/special_examples/random_cpp11.cpp | 4 +- doc/tutorial.cpp | 37 +- failtest/bdcsvd_int.cpp | 5 +- .../block_nonconst_ctor_on_const_xpr_0.cpp | 4 +- .../block_nonconst_ctor_on_const_xpr_1.cpp | 4 +- .../block_nonconst_ctor_on_const_xpr_2.cpp | 6 +- .../block_on_const_type_actually_const_0.cpp | 6 +- .../block_on_const_type_actually_const_1.cpp | 6 +- failtest/colpivqr_int.cpp | 5 +- .../const_qualified_block_method_retval_0.cpp | 4 +- .../const_qualified_block_method_retval_1.cpp | 4 +- ...const_qualified_diagonal_method_retval.cpp | 4 +- ...onst_qualified_transpose_method_retval.cpp | 4 +- ...seunaryview_nonconst_ctor_on_const_xpr.cpp | 4 +- ...unaryview_on_const_type_actually_const.cpp | 6 +- .../diagonal_nonconst_ctor_on_const_xpr.cpp | 4 +- .../diagonal_on_const_type_actually_const.cpp | 6 +- failtest/eigensolver_cplx.cpp | 5 +- failtest/eigensolver_int.cpp | 5 +- failtest/fullpivlu_int.cpp | 5 +- failtest/fullpivqr_int.cpp | 5 +- failtest/initializer_list_1.cpp | 5 +- failtest/initializer_list_2.cpp | 5 +- failtest/jacobisvd_int.cpp | 5 +- failtest/ldlt_int.cpp | 5 +- failtest/llt_int.cpp | 5 +- failtest/map_nonconst_ctor_on_const_ptr_0.cpp | 4 +- failtest/map_nonconst_ctor_on_const_ptr_1.cpp | 4 +- failtest/map_nonconst_ctor_on_const_ptr_2.cpp | 4 +- failtest/map_nonconst_ctor_on_const_ptr_3.cpp | 4 +- failtest/map_nonconst_ctor_on_const_ptr_4.cpp | 4 +- .../map_on_const_type_actually_const_0.cpp | 4 +- .../map_on_const_type_actually_const_1.cpp | 4 +- failtest/partialpivlu_int.cpp | 5 +- failtest/qr_int.cpp | 5 +- failtest/ref_1.cpp | 5 +- failtest/ref_2.cpp | 7 +- failtest/ref_3.cpp | 9 +- failtest/ref_4.cpp | 7 +- failtest/ref_5.cpp | 5 +- ...adjointview_nonconst_ctor_on_const_xpr.cpp | 4 +- ...jointview_on_const_type_actually_const.cpp | 6 +- failtest/sparse_ref_1.cpp | 7 +- failtest/sparse_ref_2.cpp | 7 +- failtest/sparse_ref_3.cpp | 11 +- failtest/sparse_ref_4.cpp | 5 +- failtest/sparse_ref_5.cpp | 7 +- failtest/sparse_storage_mismatch.cpp | 13 +- failtest/swap_1.cpp | 3 +- failtest/swap_2.cpp | 3 +- failtest/ternary_1.cpp | 7 +- failtest/ternary_2.cpp | 7 +- .../transpose_nonconst_ctor_on_const_xpr.cpp | 4 +- ...transpose_on_const_type_actually_const.cpp | 6 +- ...angularview_nonconst_ctor_on_const_xpr.cpp | 4 +- ...gularview_on_const_type_actually_const.cpp | 6 +- lapack/complex_double.cpp | 4 +- lapack/complex_single.cpp | 4 +- lapack/double.cpp | 4 +- lapack/lapack.h | 136 +- lapack/lapack_common.h | 14 +- lapack/single.cpp | 4 +- scripts/eigen_gen_credits.cpp | 164 +- test/AnnoyingScalar.h | 230 ++- test/MovableScalar.h | 12 +- test/OffByOneScalar.h | 18 +- test/SafeScalar.h | 12 +- test/accelerate_support.cpp | 94 +- test/adjoint.cpp | 199 +- test/array_cwise.cpp | 700 +++---- test/array_for_matrix.cpp | 336 ++-- test/array_of_string.cpp | 13 +- test/array_replicate.cpp | 76 +- test/array_reverse.cpp | 131 +- test/bandmatrix.cpp | 61 +- test/basicstuff.cpp | 294 ++- test/bdcsvd.cpp | 53 +- test/bfloat16_float.cpp | 173 +- test/bicgstab.cpp | 33 +- test/blasutil.cpp | 279 ++- test/block.cpp | 343 ++-- test/boostmultiprec.cpp | 174 +- test/bug1213.cpp | 10 +- test/bug1213.h | 5 +- test/bug1213_main.cpp | 13 +- test/cholesky.cpp | 306 ++- test/cholmod_support.cpp | 47 +- test/commainitializer.cpp | 99 +- test/conjugate_gradient.cpp | 31 +- test/conservative_resize.cpp | 137 +- test/constexpr.cpp | 10 +- test/constructor.cpp | 77 +- test/corners.cpp | 152 +- test/ctorleak.cpp | 70 +- test/denseLM.cpp | 184 +- test/dense_storage.cpp | 197 +- test/determinant.cpp | 39 +- test/diagonal.cpp | 77 +- test/diagonal_matrix_variadic_ctor.cpp | 25 +- test/diagonalmatrices.cpp | 192 +- test/dontalign.cpp | 14 +- test/dynalloc.cpp | 174 +- test/eigen2support.cpp | 51 +- test/eigensolver_complex.cpp | 103 +- test/eigensolver_generalized_real.cpp | 131 +- test/eigensolver_generic.cpp | 172 +- test/eigensolver_selfadjoint.cpp | 234 ++- test/evaluators.cpp | 526 ++--- test/exceptions.cpp | 45 +- test/fastmath.cpp | 224 ++- test/first_aligned.cpp | 41 +- test/float_conversion.cpp | 11 +- test/geo_alignedbox.cpp | 256 ++- test/geo_eulerangles.cpp | 63 +- test/geo_homogeneous.cpp | 106 +- test/geo_hyperplane.cpp | 152 +- test/geo_orthomethods.cpp | 112 +- test/geo_parametrizedline.cpp | 92 +- test/geo_quaternion.cpp | 252 ++- test/geo_transformations.cpp | 513 +++-- test/gpu_basic.cu | 308 ++- test/gpu_common.h | 143 +- test/gpu_example.cu | 58 +- test/gpu_test_helper.h | 270 ++- test/half_float.cpp | 144 +- test/hessenberg.cpp | 39 +- test/householder.cpp | 134 +- test/incomplete_cholesky.cpp | 69 +- test/indexed_view.cpp | 591 +++--- test/initializer_list_construction.cpp | 235 ++- test/inplace_decomposition.cpp | 94 +- test/integer_types.cpp | 137 +- test/inverse.cpp | 127 +- test/io.cpp | 40 +- test/is_same_dense.cpp | 38 +- test/jacobi.cpp | 58 +- test/jacobisvd.cpp | 39 +- test/klu_support.cpp | 16 +- test/linearstructure.cpp | 152 +- test/lscg.cpp | 29 +- test/lu.cpp | 154 +- test/main.h | 945 ++++----- test/mapped_matrix.cpp | 140 +- test/mapstaticmethods.cpp | 121 +- test/mapstride.cpp | 304 +-- test/maxsizevector.cpp | 39 +- test/meta.cpp | 391 ++-- test/metis_support.cpp | 13 +- test/miscmatrices.cpp | 37 +- test/mixingtypes.cpp | 319 ++-- test/nestbyvalue.cpp | 26 +- test/nesting_ops.cpp | 106 +- test/nomalloc.cpp | 160 +- test/nullary.cpp | 359 ++-- test/num_dimensions.cpp | 54 +- test/numext.cpp | 272 ++- test/packet_ostream.h | 5 +- test/packetmath.cpp | 163 +- test/packetmath_test_shared.h | 336 ++-- test/pardiso_support.cpp | 23 +- test/pastix_support.cpp | 30 +- test/permutationmatrices.cpp | 156 +- test/prec_inverse_4x4.cpp | 57 +- test/product.h | 222 ++- test/product_extra.cpp | 402 ++-- test/product_large.cpp | 146 +- test/product_mmtr.cpp | 137 +- test/product_notemporary.cpp | 264 +-- test/product_selfadjoint.cpp | 83 +- test/product_small.cpp | 469 +++-- test/product_symm.cpp | 124 +- test/product_syrk.cpp | 145 +- test/product_threaded.cpp | 19 +- test/product_trmm.cpp | 187 +- test/product_trmv.cpp | 46 +- test/product_trsolve.cpp | 144 +- test/qr.cpp | 81 +- test/qr_colpivoting.cpp | 154 +- test/qr_fullpivoting.cpp | 99 +- test/qtvector.cpp | 76 +- test/rand.cpp | 152 +- test/random_matrix.cpp | 142 +- test/random_matrix_helper.h | 96 +- test/random_without_cast_overflow.h | 71 +- test/real_qz.cpp | 102 +- test/redux.cpp | 167 +- test/ref.cpp | 420 ++-- test/reshape.cpp | 281 ++- test/resize.cpp | 20 +- test/rvalue_types.cpp | 127 +- test/schur_complex.cpp | 34 +- test/schur_real.cpp | 48 +- test/selfadjoint.cpp | 40 +- test/serializer.cpp | 44 +- test/simplicial_cholesky.cpp | 41 +- test/sizeof.cpp | 62 +- test/sizeoverflow.cpp | 55 +- test/skew_symmetric_matrix3.cpp | 46 +- test/smallvectors.cpp | 19 +- test/solverbase.h | 55 +- test/sparse.h | 137 +- test/sparseLM.cpp | 158 +- test/sparse_basic.cpp | 924 +++++---- test/sparse_block.cpp | 334 ++-- test/sparse_permutations.cpp | 231 ++- test/sparse_product.cpp | 584 +++--- test/sparse_ref.cpp | 190 +- test/sparse_solver.h | 508 +++-- test/sparse_solvers.cpp | 77 +- test/sparse_vector.cpp | 174 +- test/sparselu.cpp | 27 +- test/sparseqr.cpp | 119 +- test/special_numbers.cpp | 52 +- test/split_test_helper.h | 1 - test/spqr_support.cpp | 53 +- test/stable_norm.cpp | 249 +-- test/stddeque.cpp | 67 +- test/stddeque_overload.cpp | 79 +- test/stdlist.cpp | 65 +- test/stdlist_overload.cpp | 83 +- test/stdvector.cpp | 88 +- test/stdvector_overload.cpp | 77 +- test/stl_iterators.cpp | 488 +++-- test/superlu_support.cpp | 11 +- test/svd_common.h | 276 ++- test/svd_fill.h | 152 +- test/swap.cpp | 84 +- test/symbolic_index.cpp | 74 +- test/threads_eventcount.cpp | 15 +- test/threads_non_blocking_thread_pool.cpp | 19 +- test/threads_runqueue.cpp | 13 +- test/triangular.cpp | 158 +- test/tuple_test.cpp | 48 +- test/type_alias.cpp | 48 +- test/umeyama.cpp | 93 +- test/umfpack_support.cpp | 14 +- test/unalignedcount.cpp | 72 +- test/unaryviewstride.cpp | 25 +- test/upperbidiagonalization.cpp | 37 +- test/vectorization_logic.cpp | 577 +++--- test/vectorwiseop.cpp | 147 +- test/visitor.cpp | 123 +- test/zerosized.cpp | 90 +- unsupported/bench/bench_svd.cpp | 100 +- unsupported/doc/examples/BVH_Example.cpp | 44 +- unsupported/doc/examples/EulerAngles.cpp | 28 +- unsupported/doc/examples/FFT.cpp | 119 +- .../doc/examples/MatrixExponential.cpp | 9 +- unsupported/doc/examples/MatrixFunction.cpp | 17 +- unsupported/doc/examples/MatrixLogarithm.cpp | 9 +- unsupported/doc/examples/MatrixPower.cpp | 14 +- .../doc/examples/MatrixPower_optimal.cpp | 22 +- unsupported/doc/examples/MatrixSine.cpp | 11 +- unsupported/doc/examples/MatrixSinh.cpp | 11 +- unsupported/doc/examples/MatrixSquareRoot.cpp | 8 +- .../doc/examples/PolynomialSolver1.cpp | 41 +- unsupported/doc/examples/PolynomialUtils1.cpp | 16 +- unsupported/doc/examples/SYCL/CwiseMul.cpp | 32 +- unsupported/test/BVH.cpp | 167 +- unsupported/test/EulerAngles.cpp | 246 ++- unsupported/test/FFTW.cpp | 2 +- unsupported/test/NNLS.cpp | 3 +- unsupported/test/NonLinearOptimization.cpp | 1684 ++++++++--------- unsupported/test/NumericalDiff.cpp | 142 +- unsupported/test/alignedvector3.cpp | 91 +- unsupported/test/autodiff.cpp | 389 ++-- unsupported/test/autodiff_scalar.cpp | 46 +- unsupported/test/bessel_functions.cpp | 341 ++-- unsupported/test/bicgstabl.cpp | 19 +- unsupported/test/cxx11_tensor_argmax.cpp | 117 +- unsupported/test/cxx11_tensor_argmax_gpu.cu | 95 +- unsupported/test/cxx11_tensor_assign.cpp | 145 +- .../test/cxx11_tensor_block_access.cpp | 191 +- unsupported/test/cxx11_tensor_block_eval.cpp | 305 ++- unsupported/test/cxx11_tensor_block_io.cpp | 55 +- .../test/cxx11_tensor_broadcast_sycl.cpp | 113 +- .../test/cxx11_tensor_broadcasting.cpp | 71 +- .../test/cxx11_tensor_builtins_sycl.cpp | 324 ++-- .../test/cxx11_tensor_cast_float16_gpu.cu | 20 +- unsupported/test/cxx11_tensor_casts.cpp | 13 +- unsupported/test/cxx11_tensor_chipping.cpp | 110 +- .../test/cxx11_tensor_chipping_sycl.cpp | 344 ++-- unsupported/test/cxx11_tensor_comparisons.cpp | 64 +- .../cxx11_tensor_complex_cwise_ops_gpu.cu | 28 +- unsupported/test/cxx11_tensor_complex_gpu.cu | 36 +- .../test/cxx11_tensor_concatenation.cpp | 59 +- .../test/cxx11_tensor_concatenation_sycl.cpp | 106 +- unsupported/test/cxx11_tensor_const.cpp | 26 +- unsupported/test/cxx11_tensor_contract_gpu.cu | 72 +- .../test/cxx11_tensor_contract_sycl.cpp | 598 ++---- unsupported/test/cxx11_tensor_contraction.cpp | 278 ++- unsupported/test/cxx11_tensor_convolution.cpp | 51 +- .../test/cxx11_tensor_convolution_sycl.cpp | 309 ++- .../test/cxx11_tensor_custom_index.cpp | 33 +- unsupported/test/cxx11_tensor_custom_op.cpp | 41 +- .../test/cxx11_tensor_custom_op_sycl.cpp | 76 +- unsupported/test/cxx11_tensor_device.cu | 222 ++- unsupported/test/cxx11_tensor_device_sycl.cpp | 37 +- unsupported/test/cxx11_tensor_dimension.cpp | 32 +- unsupported/test/cxx11_tensor_empty.cpp | 15 +- unsupported/test/cxx11_tensor_executor.cpp | 327 ++-- unsupported/test/cxx11_tensor_expr.cpp | 196 +- unsupported/test/cxx11_tensor_fft.cpp | 148 +- unsupported/test/cxx11_tensor_fixed_size.cpp | 138 +- unsupported/test/cxx11_tensor_forced_eval.cpp | 44 +- .../test/cxx11_tensor_forced_eval_sycl.cpp | 29 +- unsupported/test/cxx11_tensor_generator.cpp | 29 +- .../test/cxx11_tensor_generator_sycl.cpp | 83 +- unsupported/test/cxx11_tensor_gpu.cu | 608 +++--- unsupported/test/cxx11_tensor_ifft.cpp | 37 +- .../test/cxx11_tensor_image_op_sycl.cpp | 56 +- unsupported/test/cxx11_tensor_image_patch.cpp | 334 ++-- .../test/cxx11_tensor_image_patch_sycl.cpp | 817 ++++---- unsupported/test/cxx11_tensor_index_list.cpp | 233 ++- unsupported/test/cxx11_tensor_inflation.cpp | 22 +- .../test/cxx11_tensor_inflation_sycl.cpp | 49 +- unsupported/test/cxx11_tensor_intdiv.cpp | 34 +- unsupported/test/cxx11_tensor_io.cpp | 12 +- unsupported/test/cxx11_tensor_layout_swap.cpp | 21 +- .../test/cxx11_tensor_layout_swap_sycl.cpp | 57 +- unsupported/test/cxx11_tensor_lvalue.cpp | 20 +- unsupported/test/cxx11_tensor_map.cpp | 147 +- unsupported/test/cxx11_tensor_math.cpp | 12 +- unsupported/test/cxx11_tensor_math_sycl.cpp | 36 +- .../test/cxx11_tensor_mixed_indices.cpp | 28 +- unsupported/test/cxx11_tensor_morphing.cpp | 372 ++-- .../test/cxx11_tensor_morphing_sycl.cpp | 304 ++- unsupported/test/cxx11_tensor_move.cpp | 46 +- .../test/cxx11_tensor_notification.cpp | 19 +- .../test/cxx11_tensor_of_bfloat16_gpu.cu | 178 +- unsupported/test/cxx11_tensor_of_complex.cpp | 24 +- .../test/cxx11_tensor_of_const_values.cpp | 35 +- .../test/cxx11_tensor_of_float16_gpu.cu | 192 +- .../test/cxx11_tensor_of_float16_sycl.cpp | 168 +- unsupported/test/cxx11_tensor_of_strings.cpp | 55 +- unsupported/test/cxx11_tensor_padding.cpp | 35 +- .../test/cxx11_tensor_padding_sycl.cpp | 74 +- unsupported/test/cxx11_tensor_patch.cpp | 32 +- unsupported/test/cxx11_tensor_patch_sycl.cpp | 105 +- unsupported/test/cxx11_tensor_random.cpp | 32 +- unsupported/test/cxx11_tensor_random_gpu.cu | 24 +- unsupported/test/cxx11_tensor_random_sycl.cpp | 31 +- unsupported/test/cxx11_tensor_reduction.cpp | 51 +- .../test/cxx11_tensor_reduction_gpu.cu | 51 +- .../test/cxx11_tensor_reduction_sycl.cpp | 508 ++--- unsupported/test/cxx11_tensor_ref.cpp | 103 +- unsupported/test/cxx11_tensor_reverse.cpp | 49 +- .../test/cxx11_tensor_reverse_sycl.cpp | 100 +- unsupported/test/cxx11_tensor_roundings.cpp | 31 +- unsupported/test/cxx11_tensor_scan.cpp | 10 +- unsupported/test/cxx11_tensor_scan_gpu.cu | 22 +- unsupported/test/cxx11_tensor_scan_sycl.cpp | 95 +- unsupported/test/cxx11_tensor_shuffling.cpp | 63 +- .../test/cxx11_tensor_shuffling_sycl.cpp | 12 +- unsupported/test/cxx11_tensor_simple.cpp | 227 +-- unsupported/test/cxx11_tensor_striding.cpp | 27 +- .../test/cxx11_tensor_striding_sycl.cpp | 86 +- unsupported/test/cxx11_tensor_sugar.cpp | 17 +- unsupported/test/cxx11_tensor_sycl.cpp | 157 +- unsupported/test/cxx11_tensor_symmetry.cpp | 1049 +++++----- .../test/cxx11_tensor_thread_local.cpp | 13 +- unsupported/test/cxx11_tensor_thread_pool.cpp | 155 +- unsupported/test/cxx11_tensor_trace.cpp | 22 +- unsupported/test/cxx11_tensor_uint128.cpp | 19 +- .../test/cxx11_tensor_volume_patch.cpp | 15 +- .../test/cxx11_tensor_volume_patch_sycl.cpp | 188 +- unsupported/test/dgmres.cpp | 21 +- unsupported/test/forward_adolc.cpp | 134 +- unsupported/test/gmres.cpp | 21 +- unsupported/test/idrs.cpp | 13 +- unsupported/test/kronecker_product.cpp | 304 ++- unsupported/test/levenberg_marquardt.cpp | 1343 ++++++------- unsupported/test/matrix_exponential.cpp | 73 +- unsupported/test/matrix_function.cpp | 129 +- unsupported/test/matrix_functions.h | 39 +- unsupported/test/matrix_power.cpp | 153 +- unsupported/test/matrix_square_root.cpp | 16 +- unsupported/test/minres.cpp | 33 +- unsupported/test/mpreal_support.cpp | 44 +- unsupported/test/openglsupport.cpp | 446 +++-- unsupported/test/polynomialsolver.cpp | 249 ++- unsupported/test/polynomialutils.cpp | 119 +- unsupported/test/sparse_extra.cpp | 193 +- unsupported/test/special_functions.cpp | 409 ++-- unsupported/test/special_packetmath.cpp | 101 +- unsupported/test/splines.cpp | 256 +-- 876 files changed, 33453 insertions(+), 37795 deletions(-) diff --git a/bench/BenchSparseUtil.h b/bench/BenchSparseUtil.h index 13981f6b7..663cd480b 100644 --- a/bench/BenchSparseUtil.h +++ b/bench/BenchSparseUtil.h @@ -20,63 +20,51 @@ using namespace Eigen; #endif typedef SCALAR Scalar; -typedef Matrix DenseMatrix; -typedef Matrix DenseVector; +typedef Matrix DenseMatrix; +typedef Matrix DenseVector; typedef SparseMatrix EigenSparseMatrix; -void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst) -{ - dst.reserve(double(rows)*cols*density); - for(int j = 0; j < cols; j++) - { - for(int i = 0; i < rows; i++) - { - Scalar v = (internal::random(0,1) < density) ? internal::random() : 0; - if (v!=0) - dst.insert(i,j) = v; +void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst) { + dst.reserve(double(rows) * cols * density); + for (int j = 0; j < cols; j++) { + for (int i = 0; i < rows; i++) { + Scalar v = (internal::random(0, 1) < density) ? internal::random() : 0; + if (v != 0) dst.insert(i, j) = v; } } dst.finalize(); } -void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst) -{ -// std::cout << "alloc " << nnzPerCol*cols << "\n"; - dst.reserve(nnzPerCol*cols); - for(int j = 0; j < cols; j++) - { +void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst) { + // std::cout << "alloc " << nnzPerCol*cols << "\n"; + dst.reserve(nnzPerCol * cols); + for (int j = 0; j < cols; j++) { std::set aux; - for(int i = 0; i < nnzPerCol; i++) - { - int k = internal::random(0,rows-1); - while (aux.find(k)!=aux.end()) - k = internal::random(0,rows-1); + for (int i = 0; i < nnzPerCol; i++) { + int k = internal::random(0, rows - 1); + while (aux.find(k) != aux.end()) k = internal::random(0, rows - 1); aux.insert(k); - dst.insert(k,j) = internal::random(); + dst.insert(k, j) = internal::random(); } } dst.finalize(); } -void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst) -{ +void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst) { dst.setZero(); - for (int j=0; j GmmSparse; -typedef gmm::col_matrix< gmm::wsvector > GmmDynSparse; -void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst) -{ +typedef gmm::col_matrix > GmmDynSparse; +void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst) { GmmDynSparse tmp(src.rows(), src.cols()); - for (int j=0; j typedef mtl::compressed2D > MtlSparse; typedef mtl::compressed2D > MtlSparseRowMajor; -void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst) -{ +void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst) { mtl::matrix::inserter ins(dst); - for (int j=0; j @@ -123,22 +107,18 @@ void eiToCSparse(const EigenSparseMatrix& src, cs* &dst) #include #include -typedef boost::numeric::ublas::compressed_matrix UBlasSparse; +typedef boost::numeric::ublas::compressed_matrix UBlasSparse; -void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst) -{ +void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst) { dst.resize(src.rows(), src.cols(), false); - for (int j=0; j -void eiToUblasVec(const EigenType& src, UblasType& dst) -{ +void eiToUblasVec(const EigenType& src, UblasType& dst) { dst.resize(src.size()); - for (int j=0; j +#ifndef NOMINMAX +#define NOMINMAX +#define EIGEN_BT_UNDEF_NOMINMAX +#endif +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN +#endif +#include #elif defined(__APPLE__) #include #else -# include +#include #endif static void escape(void *p) { @@ -41,27 +41,20 @@ static void clobber() { #include -namespace Eigen -{ +namespace Eigen { -enum { - CPU_TIMER = 0, - REAL_TIMER = 1 -}; +enum { CPU_TIMER = 0, REAL_TIMER = 1 }; /** Elapsed time timer keeping the best try. - * - * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID. - * On Windows we use QueryPerformanceCounter - * - * Important: on linux, you must link with -lrt - */ -class BenchTimer -{ -public: - - BenchTimer() - { + * + * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID. + * On Windows we use QueryPerformanceCounter + * + * Important: on linux, you must link with -lrt + */ +class BenchTimer { + public: + BenchTimer() { #if defined(_WIN32) || defined(__CYGWIN__) LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); @@ -72,69 +65,53 @@ public: ~BenchTimer() {} - inline void reset() - { + inline void reset() { m_bests.fill(1e9); m_worsts.fill(0); m_totals.setZero(); } - inline void start() - { - m_starts[CPU_TIMER] = getCpuTime(); + inline void start() { + m_starts[CPU_TIMER] = getCpuTime(); m_starts[REAL_TIMER] = getRealTime(); } - inline void stop() - { + inline void stop() { m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER]; m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER]; - #if EIGEN_VERSION_AT_LEAST(2,90,0) +#if EIGEN_VERSION_AT_LEAST(2, 90, 0) m_bests = m_bests.cwiseMin(m_times); m_worsts = m_worsts.cwiseMax(m_times); - #else - m_bests(0) = std::min(m_bests(0),m_times(0)); - m_bests(1) = std::min(m_bests(1),m_times(1)); - m_worsts(0) = std::max(m_worsts(0),m_times(0)); - m_worsts(1) = std::max(m_worsts(1),m_times(1)); - #endif +#else + m_bests(0) = std::min(m_bests(0), m_times(0)); + m_bests(1) = std::min(m_bests(1), m_times(1)); + m_worsts(0) = std::max(m_worsts(0), m_times(0)); + m_worsts(1) = std::max(m_worsts(1), m_times(1)); +#endif m_totals += m_times; } /** Return the elapsed time in seconds between the last start/stop pair - */ - inline double value(int TIMER = CPU_TIMER) const - { - return m_times[TIMER]; - } + */ + inline double value(int TIMER = CPU_TIMER) const { return m_times[TIMER]; } /** Return the best elapsed time in seconds - */ - inline double best(int TIMER = CPU_TIMER) const - { - return m_bests[TIMER]; - } + */ + inline double best(int TIMER = CPU_TIMER) const { return m_bests[TIMER]; } /** Return the worst elapsed time in seconds - */ - inline double worst(int TIMER = CPU_TIMER) const - { - return m_worsts[TIMER]; - } + */ + inline double worst(int TIMER = CPU_TIMER) const { return m_worsts[TIMER]; } /** Return the total elapsed time in seconds. - */ - inline double total(int TIMER = CPU_TIMER) const - { - return m_totals[TIMER]; - } + */ + inline double total(int TIMER = CPU_TIMER) const { return m_totals[TIMER]; } - inline double getCpuTime() const - { + inline double getCpuTime() const { #ifdef _WIN32 LARGE_INTEGER query_ticks; QueryPerformanceCounter(&query_ticks); - return query_ticks.QuadPart/m_frequency; + return query_ticks.QuadPart / m_frequency; #elif __APPLE__ - return double(mach_absolute_time())*1e-9; + return double(mach_absolute_time()) * 1e-9; #else timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); @@ -142,14 +119,13 @@ public: #endif } - inline double getRealTime() const - { + inline double getRealTime() const { #ifdef _WIN32 SYSTEMTIME st; GetSystemTime(&st); return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds; #elif __APPLE__ - return double(mach_absolute_time())*1e-9; + return double(mach_absolute_time()) * 1e-9; #else timespec ts; clock_gettime(CLOCK_REALTIME, &ts); @@ -157,7 +133,7 @@ public: #endif } -protected: + protected: #if defined(_WIN32) || defined(__CYGWIN__) double m_frequency; #endif @@ -167,33 +143,34 @@ protected: Vector2d m_worsts; Vector2d m_totals; -public: + public: EIGEN_MAKE_ALIGNED_OPERATOR_NEW }; -#define BENCH(TIMER,TRIES,REP,CODE) { \ - TIMER.reset(); \ - for(int uglyvarname1=0; uglyvarname1 #include -template void initMatrix_random(MatrixType& mat) __attribute__((noinline)); -template void initMatrix_random(MatrixType& mat) -{ - mat.setRandom();// = MatrixType::random(mat.rows(), mat.cols()); +template +void initMatrix_random(MatrixType& mat) __attribute__((noinline)); +template +void initMatrix_random(MatrixType& mat) { + mat.setRandom(); // = MatrixType::random(mat.rows(), mat.cols()); } -template void initMatrix_identity(MatrixType& mat) __attribute__((noinline)); -template void initMatrix_identity(MatrixType& mat) -{ +template +void initMatrix_identity(MatrixType& mat) __attribute__((noinline)); +template +void initMatrix_identity(MatrixType& mat) { mat.setIdentity(); } #ifndef __INTEL_COMPILER -#define DISABLE_SSE_EXCEPTIONS() { \ - int aux; \ - asm( \ - "stmxcsr %[aux] \n\t" \ - "orl $32832, %[aux] \n\t" \ - "ldmxcsr %[aux] \n\t" \ - : : [aux] "m" (aux)); \ -} +#define DISABLE_SSE_EXCEPTIONS() \ + { \ + int aux; \ + asm("stmxcsr %[aux] \n\t" \ + "orl $32832, %[aux] \n\t" \ + "ldmxcsr %[aux] \n\t" \ + : \ + : [aux] "m"(aux)); \ + } #else -#define DISABLE_SSE_EXCEPTIONS() +#define DISABLE_SSE_EXCEPTIONS() #endif #ifdef BENCH_GMM #include template -void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst) -{ - dst.resize(src.rows(),src.cols()); - for (int j=0; j #include #include template -void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst) -{ - for (int j=0; j #include template -void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst) -{ - dst.resize(src.rows(),src.cols()); - for (int j=0; j -void eiToUblasVec(const EigenType& src, UblasType& dst) -{ +void eiToUblasVec(const EigenType& src, UblasType& dst) { dst.resize(src.size()); - for (int j=0; j> 8); m = 1 << ((compact & 0x0f0) >> 4); n = 1 << ((compact & 0x00f) >> 0); @@ -58,35 +55,23 @@ struct size_triple_t bool is_cubic() const { return k == m && m == n; } }; -ostream& operator<<(ostream& s, const size_triple_t& t) -{ - return s << "(" << t.k << ", " << t.m << ", " << t.n << ")"; -} +ostream& operator<<(ostream& s, const size_triple_t& t) { return s << "(" << t.k << ", " << t.m << ", " << t.n << ")"; } -struct inputfile_entry_t -{ +struct inputfile_entry_t { uint16_t product_size; uint16_t pot_block_size; size_triple_t nonpot_block_size; float gflops; }; -struct inputfile_t -{ - enum class type_t { - unknown, - all_pot_sizes, - default_sizes - }; +struct inputfile_t { + enum class type_t { unknown, all_pot_sizes, default_sizes }; string filename; vector entries; type_t type; - inputfile_t(const string& fname) - : filename(fname) - , type(type_t::unknown) - { + inputfile_t(const string& fname) : filename(fname), type(type_t::unknown) { ifstream stream(filename); if (!stream.is_open()) { cerr << "couldn't open input file: " << filename << endl; @@ -111,27 +96,17 @@ struct inputfile_t type = type_t::default_sizes; continue; } - if (type == type_t::unknown) { continue; } - switch(type) { + switch (type) { case type_t::all_pot_sizes: { unsigned int product_size, block_size; float gflops; - int sscanf_result = - sscanf(line.c_str(), "%x %x %f", - &product_size, - &block_size, - &gflops); - if (3 != sscanf_result || - !product_size || - product_size > 0xfff || - !block_size || - block_size > 0xfff || - !isfinite(gflops)) - { + int sscanf_result = sscanf(line.c_str(), "%x %x %f", &product_size, &block_size, &gflops); + if (3 != sscanf_result || !product_size || product_size > 0xfff || !block_size || block_size > 0xfff || + !isfinite(gflops)) { cerr << "ill-formed input file: " << filename << endl; cerr << "offending line:" << endl << line << endl; exit(1); @@ -150,16 +125,8 @@ struct inputfile_t unsigned int product_size; float gflops; int bk, bm, bn; - int sscanf_result = - sscanf(line.c_str(), "%x default(%d, %d, %d) %f", - &product_size, - &bk, &bm, &bn, - &gflops); - if (5 != sscanf_result || - !product_size || - product_size > 0xfff || - !isfinite(gflops)) - { + int sscanf_result = sscanf(line.c_str(), "%x default(%d, %d, %d) %f", &product_size, &bk, &bm, &bn, &gflops); + if (5 != sscanf_result || !product_size || product_size > 0xfff || !isfinite(gflops)) { cerr << "ill-formed input file: " << filename << endl; cerr << "offending line:" << endl << line << endl; exit(1); @@ -175,7 +142,7 @@ struct inputfile_t entries.push_back(entry); break; } - + default: break; } @@ -192,27 +159,22 @@ struct inputfile_t } }; -struct preprocessed_inputfile_entry_t -{ +struct preprocessed_inputfile_entry_t { uint16_t product_size; uint16_t block_size; float efficiency; }; -bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) -{ +bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) { return e1.efficiency < e2.efficiency; } -struct preprocessed_inputfile_t -{ +struct preprocessed_inputfile_t { string filename; vector entries; - preprocessed_inputfile_t(const inputfile_t& inputfile) - : filename(inputfile.filename) - { + preprocessed_inputfile_t(const inputfile_t& inputfile) : filename(inputfile.filename) { if (inputfile.type != inputfile_t::type_t::all_pot_sizes) { abort(); } @@ -220,20 +182,16 @@ struct preprocessed_inputfile_t auto it_first_with_given_product_size = it; while (it != inputfile.entries.end()) { ++it; - if (it == inputfile.entries.end() || - it->product_size != it_first_with_given_product_size->product_size) - { + if (it == inputfile.entries.end() || it->product_size != it_first_with_given_product_size->product_size) { import_input_file_range_one_product_size(it_first_with_given_product_size, it); it_first_with_given_product_size = it; } } } -private: - void import_input_file_range_one_product_size( - const vector::const_iterator& begin, - const vector::const_iterator& end) - { + private: + void import_input_file_range_one_product_size(const vector::const_iterator& begin, + const vector::const_iterator& end) { uint16_t product_size = begin->product_size; float max_gflops = 0.0f; for (auto it = begin; it != end; ++it) { @@ -254,9 +212,7 @@ private: } }; -void check_all_files_in_same_exact_order( - const vector& preprocessed_inputfiles) -{ +void check_all_files_in_same_exact_order(const vector& preprocessed_inputfiles) { if (preprocessed_inputfiles.empty()) { return; } @@ -266,11 +222,8 @@ void check_all_files_in_same_exact_order( for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) { if (preprocessed_inputfiles[i].entries.size() != num_entries) { - cerr << "these files have different number of entries: " - << preprocessed_inputfiles[i].filename - << " and " - << first_file.filename - << endl; + cerr << "these files have different number of entries: " << preprocessed_inputfiles[i].filename << " and " + << first_file.filename << endl; exit(1); } } @@ -281,12 +234,8 @@ void check_all_files_in_same_exact_order( for (size_t file_index = 0; file_index < preprocessed_inputfiles.size(); file_index++) { const preprocessed_inputfile_t& cur_file = preprocessed_inputfiles[file_index]; if (cur_file.entries[entry_index].product_size != entry_product_size || - cur_file.entries[entry_index].block_size != entry_block_size) - { - cerr << "entries not in same order between these files: " - << first_file.filename - << " and " - << cur_file.filename + cur_file.entries[entry_index].block_size != entry_block_size) { + cerr << "entries not in same order between these files: " << first_file.filename << " and " << cur_file.filename << endl; exit(1); } @@ -294,10 +243,8 @@ void check_all_files_in_same_exact_order( } } -float efficiency_of_subset( - const vector& preprocessed_inputfiles, - const vector& subset) -{ +float efficiency_of_subset(const vector& preprocessed_inputfiles, + const vector& subset) { if (subset.size() <= 1) { return 1.0f; } @@ -309,9 +256,7 @@ float efficiency_of_subset( uint16_t product_size = first_file.entries[0].product_size; while (entry_index < num_entries) { ++entry_index; - if (entry_index == num_entries || - first_file.entries[entry_index].product_size != product_size) - { + if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) { float efficiency_this_product_size = 0.0f; for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) { float efficiency_this_entry = 1.0f; @@ -331,10 +276,8 @@ float efficiency_of_subset( return efficiency; } -void dump_table_for_subset( - const vector& preprocessed_inputfiles, - const vector& subset) -{ +void dump_table_for_subset(const vector& preprocessed_inputfiles, + const vector& subset) { const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]]; const size_t num_entries = first_file.entries.size(); size_t entry_index = 0; @@ -359,9 +302,7 @@ void dump_table_for_subset( cout << " static const unsigned short data[" << TableSize << "] = {"; while (entry_index < num_entries) { ++entry_index; - if (entry_index == num_entries || - first_file.entries[entry_index].product_size != product_size) - { + if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) { float best_efficiency_this_product_size = 0.0f; uint16_t best_block_size_this_product_size = 0; for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) { @@ -397,10 +338,8 @@ void dump_table_for_subset( cout << "};" << endl; } -float efficiency_of_partition( - const vector& preprocessed_inputfiles, - const vector>& partition) -{ +float efficiency_of_partition(const vector& preprocessed_inputfiles, + const vector>& partition) { float efficiency = 1.0f; for (auto s = partition.begin(); s != partition.end(); ++s) { efficiency = min(efficiency, efficiency_of_subset(preprocessed_inputfiles, *s)); @@ -408,8 +347,7 @@ float efficiency_of_partition( return efficiency; } -void make_first_subset(size_t subset_size, vector& out_subset, size_t set_size) -{ +void make_first_subset(size_t subset_size, vector& out_subset, size_t set_size) { assert(subset_size >= 1 && subset_size <= set_size); out_subset.resize(subset_size); for (size_t i = 0; i < subset_size; i++) { @@ -417,13 +355,9 @@ void make_first_subset(size_t subset_size, vector& out_subset, size_t se } } -bool is_last_subset(const vector& subset, size_t set_size) -{ - return subset[0] == set_size - subset.size(); -} +bool is_last_subset(const vector& subset, size_t set_size) { return subset[0] == set_size - subset.size(); } -void next_subset(vector& inout_subset, size_t set_size) -{ +void next_subset(vector& inout_subset, size_t set_size) { if (is_last_subset(inout_subset, set_size)) { cerr << "iterating past the last subset" << endl; abort(); @@ -444,9 +378,8 @@ void next_subset(vector& inout_subset, size_t set_size) const size_t number_of_subsets_limit = 100; const size_t always_search_subsets_of_size_at_least = 2; -bool is_number_of_subsets_feasible(size_t n, size_t p) -{ - assert(n>0 && p>0 && p<=n); +bool is_number_of_subsets_feasible(size_t n, size_t p) { + assert(n > 0 && p > 0 && p <= n); uint64_t numerator = 1, denominator = 1; for (size_t i = 0; i < p; i++) { numerator *= n - i; @@ -458,24 +391,20 @@ bool is_number_of_subsets_feasible(size_t n, size_t p) return true; } -size_t max_feasible_subset_size(size_t n) -{ +size_t max_feasible_subset_size(size_t n) { assert(n > 0); - const size_t minresult = min(n-1, always_search_subsets_of_size_at_least); + const size_t minresult = min(n - 1, always_search_subsets_of_size_at_least); for (size_t p = 1; p <= n - 1; p++) { - if (!is_number_of_subsets_feasible(n, p+1)) { + if (!is_number_of_subsets_feasible(n, p + 1)) { return max(p, minresult); } } return n - 1; } -void find_subset_with_efficiency_higher_than( - const vector& preprocessed_inputfiles, - float required_efficiency_to_beat, - vector& inout_remainder, - vector& out_subset) -{ +void find_subset_with_efficiency_higher_than(const vector& preprocessed_inputfiles, + float required_efficiency_to_beat, vector& inout_remainder, + vector& out_subset) { out_subset.resize(0); if (required_efficiency_to_beat >= 1.0f) { @@ -484,7 +413,6 @@ void find_subset_with_efficiency_higher_than( } while (!inout_remainder.empty()) { - vector candidate_indices(inout_remainder.size()); for (size_t i = 0; i < candidate_indices.size(); i++) { candidate_indices[i] = i; @@ -493,20 +421,17 @@ void find_subset_with_efficiency_higher_than( size_t candidate_indices_subset_size = max_feasible_subset_size(candidate_indices.size()); while (candidate_indices_subset_size >= 1) { vector candidate_indices_subset; - make_first_subset(candidate_indices_subset_size, - candidate_indices_subset, - candidate_indices.size()); + make_first_subset(candidate_indices_subset_size, candidate_indices_subset, candidate_indices.size()); vector best_candidate_indices_subset; float best_efficiency = 0.0f; vector trial_subset = out_subset; trial_subset.resize(out_subset.size() + candidate_indices_subset_size); - while (true) - { + while (true) { for (size_t i = 0; i < candidate_indices_subset_size; i++) { trial_subset[out_subset.size() + i] = inout_remainder[candidate_indices_subset[i]]; } - + float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset); if (trial_efficiency > best_efficiency) { best_efficiency = trial_efficiency; @@ -517,7 +442,7 @@ void find_subset_with_efficiency_higher_than( } next_subset(candidate_indices_subset, candidate_indices.size()); } - + if (best_efficiency > required_efficiency_to_beat) { for (size_t i = 0; i < best_candidate_indices_subset.size(); i++) { candidate_indices[i] = candidate_indices[best_candidate_indices_subset[i]]; @@ -526,7 +451,7 @@ void find_subset_with_efficiency_higher_than( } candidate_indices_subset_size--; } - + size_t candidate_index = candidate_indices[0]; auto candidate_iterator = inout_remainder.begin() + candidate_index; vector trial_subset = out_subset; @@ -542,11 +467,9 @@ void find_subset_with_efficiency_higher_than( } } -void find_partition_with_efficiency_higher_than( - const vector& preprocessed_inputfiles, - float required_efficiency_to_beat, - vector>& out_partition) -{ +void find_partition_with_efficiency_higher_than(const vector& preprocessed_inputfiles, + float required_efficiency_to_beat, + vector>& out_partition) { out_partition.resize(0); vector remainder; @@ -556,25 +479,19 @@ void find_partition_with_efficiency_higher_than( while (!remainder.empty()) { vector new_subset; - find_subset_with_efficiency_higher_than( - preprocessed_inputfiles, - required_efficiency_to_beat, - remainder, - new_subset); + find_subset_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, remainder, + new_subset); out_partition.push_back(new_subset); } } -void print_partition( - const vector& preprocessed_inputfiles, - const vector>& partition) -{ +void print_partition(const vector& preprocessed_inputfiles, + const vector>& partition) { float efficiency = efficiency_of_partition(preprocessed_inputfiles, partition); - cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl; + cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl; for (auto subset = partition.begin(); subset != partition.end(); ++subset) { - cout << " Subset " << (subset - partition.begin()) - << ", efficiency " << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:" - << endl; + cout << " Subset " << (subset - partition.begin()) << ", efficiency " + << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:" << endl; for (auto file = subset->begin(); file != subset->end(); ++file) { cout << " " << preprocessed_inputfiles[*file].filename << endl; } @@ -586,18 +503,18 @@ void print_partition( cout << endl; } -struct action_t -{ - virtual const char* invokation_name() const { abort(); return nullptr; } +struct action_t { + virtual const char* invokation_name() const { + abort(); + return nullptr; + } virtual void run(const vector&) const { abort(); } virtual ~action_t() {} }; -struct partition_action_t : action_t -{ +struct partition_action_t : action_t { virtual const char* invokation_name() const override { return "partition"; } - virtual void run(const vector& input_filenames) const override - { + virtual void run(const vector& input_filenames) const override { vector preprocessed_inputfiles; if (input_filenames.empty()) { @@ -627,17 +544,12 @@ struct partition_action_t : action_t float required_efficiency_to_beat = 0.0f; vector>> partitions; cerr << "searching for partitions...\r" << flush; - while (true) - { + while (true) { vector> partition; - find_partition_with_efficiency_higher_than( - preprocessed_inputfiles, - required_efficiency_to_beat, - partition); + find_partition_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, partition); float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition); - cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() - << " subsets for " << 100.0f * actual_efficiency - << " % efficiency" + cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() << " subsets for " + << 100.0f * actual_efficiency << " % efficiency" << " \r" << flush; partitions.push_back(partition); if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) { @@ -649,7 +561,7 @@ struct partition_action_t : action_t while (true) { bool repeat = false; for (size_t i = 0; i < partitions.size() - 1; i++) { - if (partitions[i].size() >= partitions[i+1].size()) { + if (partitions[i].size() >= partitions[i + 1].size()) { partitions.erase(partitions.begin() + i); repeat = true; break; @@ -665,8 +577,7 @@ struct partition_action_t : action_t } }; -struct evaluate_defaults_action_t : action_t -{ +struct evaluate_defaults_action_t : action_t { struct results_entry_t { uint16_t product_size; size_triple_t default_block_size; @@ -675,30 +586,24 @@ struct evaluate_defaults_action_t : action_t float best_pot_gflops; float default_efficiency; }; - friend ostream& operator<<(ostream& s, const results_entry_t& entry) - { - return s - << "Product size " << size_triple_t(entry.product_size) - << ": default block size " << entry.default_block_size - << " -> " << entry.default_gflops - << " GFlop/s = " << entry.default_efficiency * 100.0f << " %" - << " of best POT block size " << size_triple_t(entry.best_pot_block_size) - << " -> " << entry.best_pot_gflops - << " GFlop/s" << dec; + friend ostream& operator<<(ostream& s, const results_entry_t& entry) { + return s << "Product size " << size_triple_t(entry.product_size) << ": default block size " + << entry.default_block_size << " -> " << entry.default_gflops + << " GFlop/s = " << entry.default_efficiency * 100.0f << " %" + << " of best POT block size " << size_triple_t(entry.best_pot_block_size) << " -> " + << entry.best_pot_gflops << " GFlop/s" << dec; } static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) { return e1.default_efficiency < e2.default_efficiency; } virtual const char* invokation_name() const override { return "evaluate-defaults"; } - void show_usage_and_exit() const - { + void show_usage_and_exit() const { cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl; cerr << "checks how well the performance with default sizes compares to the best " << "performance measured over all POT sizes." << endl; exit(1); } - virtual void run(const vector& input_filenames) const override - { + virtual void run(const vector& input_filenames) const override { if (input_filenames.size() != 2) { show_usage_and_exit(); } @@ -714,20 +619,17 @@ struct evaluate_defaults_action_t : action_t } vector results; vector cubic_results; - + uint16_t product_size = 0; auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin(); for (auto it_default_sizes = inputfile_default_sizes.entries.begin(); - it_default_sizes != inputfile_default_sizes.entries.end(); - ++it_default_sizes) - { + it_default_sizes != inputfile_default_sizes.entries.end(); ++it_default_sizes) { if (it_default_sizes->product_size == product_size) { continue; } product_size = it_default_sizes->product_size; while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() && - it_all_pot_sizes->product_size != product_size) - { + it_all_pot_sizes->product_size != product_size) { ++it_all_pot_sizes; } if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) { @@ -735,10 +637,8 @@ struct evaluate_defaults_action_t : action_t } uint16_t best_pot_block_size = 0; float best_pot_gflops = 0; - for (auto it = it_all_pot_sizes; - it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size; - ++it) - { + for (auto it = it_all_pot_sizes; it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size; + ++it) { if (it->gflops > best_pot_gflops) { best_pot_gflops = it->gflops; best_pot_block_size = it->pot_block_size; @@ -766,7 +666,7 @@ struct evaluate_defaults_action_t : action_t cout << endl; sort(results.begin(), results.end(), lower_efficiency); - + const size_t n = min(20, results.size()); cout << n << " worst results:" << endl; for (size_t i = 0; i < n; i++) { @@ -781,34 +681,30 @@ struct evaluate_defaults_action_t : action_t cout << endl; sort(cubic_results.begin(), cubic_results.end(), lower_efficiency); - + cout.precision(2); vector a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f}; for (auto it = a.begin(); it != a.end(); ++it) { size_t n = min(results.size() - 1, size_t(*it * results.size())); cout << (100.0f * n / (results.size() - 1)) - << " % of product sizes have default efficiency <= " - << 100.0f * results[n].default_efficiency << " %" << endl; + << " % of product sizes have default efficiency <= " << 100.0f * results[n].default_efficiency << " %" + << endl; } cout.precision(default_precision); } }; - -void show_usage_and_exit(int argc, char* argv[], - const vector>& available_actions) -{ +void show_usage_and_exit(int argc, char* argv[], const vector>& available_actions) { cerr << "usage: " << argv[0] << " [options...] " << endl; cerr << "available actions:" << endl; for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { cerr << " " << (*it)->invokation_name() << endl; - } + } cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl; exit(1); } -int main(int argc, char* argv[]) -{ +int main(int argc, char* argv[]) { cout.precision(default_precision); cerr.precision(default_precision); diff --git a/bench/basicbenchmark.cpp b/bench/basicbenchmark.cpp index a26ea853f..dceb9fa38 100644 --- a/bench/basicbenchmark.cpp +++ b/bench/basicbenchmark.cpp @@ -3,32 +3,31 @@ #include "BenchUtil.h" #include "basicbenchmark.h" -int main(int argc, char *argv[]) -{ +int main(int argc, char *argv[]) { DISABLE_SSE_EXCEPTIONS(); - // this is the list of matrix type and size we want to bench: - // ((suffix) (matrix size) (number of iterations)) - #define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000)) -// #define MODES ((Xd)(20)(10000)) +// this is the list of matrix type and size we want to bench: +// ((suffix) (matrix size) (number of iterations)) +#define MODES ((3d)(3)(4000000))((4d)(4)(1000000))((Xd)(4)(1000000))((Xd)(20)(10000)) + // #define MODES ((Xd)(20)(10000)) - #define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \ - << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \ - << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / " +#define _GENERATE_HEADER(R, ARG, EL) \ + << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) \ + << "-" \ + << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / " - std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl; + std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES) << endl; const int tries = 10; - #define _RUN_BENCH(R,ARG,EL) \ - std::cout << ARG( \ - BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\ - BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \ - << " "; +#define _RUN_BENCH(R, ARG, EL) \ + std::cout << ARG(BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL))(BOOST_PP_SEQ_ELEM(1, EL), BOOST_PP_SEQ_ELEM(1, EL)), \ + BOOST_PP_SEQ_ELEM(2, EL), tries) \ + << " "; - BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic, MODES ); + BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic, MODES); std::cout << endl; - BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic, MODES ); + BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic, MODES); std::cout << endl; return 0; diff --git a/bench/basicbenchmark.h b/bench/basicbenchmark.h index 8059375b5..61ad15336 100644 --- a/bench/basicbenchmark.h +++ b/bench/basicbenchmark.h @@ -2,55 +2,46 @@ #ifndef EIGEN_BENCH_BASICBENCH_H #define EIGEN_BENCH_BASICBENCH_H -enum {LazyEval, EarlyEval, OmpEval}; +enum { LazyEval, EarlyEval, OmpEval }; -template +template void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline)); -template -void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) -{ - for(int a = 0; a < iterations; a++) - { - if (Mode==LazyEval) - { +template +void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) { + for (int a = 0; a < iterations; a++) { + if (Mode == LazyEval) { asm("#begin_bench_loop LazyEval"); - if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize"); m = (I + 0.00005 * (m + m.lazyProduct(m))).eval(); - } - else if (Mode==OmpEval) - { + } else if (Mode == OmpEval) { asm("#begin_bench_loop OmpEval"); - if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize"); m = (I + 0.00005 * (m + m.lazyProduct(m))).eval(); - } - else - { + } else { asm("#begin_bench_loop EarlyEval"); - if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize"); m = I + 0.00005 * (m + m * m); } asm("#end_bench_loop"); } } -template +template double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline)); -template -double benchBasic(const MatrixType& mat, int iterations, int tries) -{ +template +double benchBasic(const MatrixType& mat, int iterations, int tries) { const int rows = mat.rows(); const int cols = mat.cols(); - MatrixType I(rows,cols); - MatrixType m(rows,cols); + MatrixType I(rows, cols); + MatrixType m(rows, cols); initMatrix_identity(I); Eigen::BenchTimer timer; - for(uint t=0; t(I, m, iterations); @@ -60,4 +51,4 @@ double benchBasic(const MatrixType& mat, int iterations, int tries) return timer.value(); }; -#endif // EIGEN_BENCH_BASICBENCH_H +#endif // EIGEN_BENCH_BASICBENCH_H diff --git a/bench/benchBlasGemm.cpp b/bench/benchBlasGemm.cpp index cb086a555..a57966e8c 100644 --- a/bench/benchBlasGemm.cpp +++ b/bench/benchBlasGemm.cpp @@ -25,59 +25,47 @@ typedef double Scalar; #define CBLAS_GEMM cblas_dgemm #endif - -typedef Eigen::Matrix MyMatrix; +typedef Eigen::Matrix MyMatrix; void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops); void check_product(int M, int N, int K); void check_product(void); -int main(int argc, char *argv[]) -{ - // disable SSE exceptions - #ifdef __GNUC__ +int main(int argc, char* argv[]) { +// disable SSE exceptions +#ifdef __GNUC__ { int aux; - asm( - "stmxcsr %[aux] \n\t" - "orl $32832, %[aux] \n\t" - "ldmxcsr %[aux] \n\t" - : : [aux] "m" (aux)); + asm("stmxcsr %[aux] \n\t" + "orl $32832, %[aux] \n\t" + "ldmxcsr %[aux] \n\t" + : + : [aux] "m"(aux)); } - #endif +#endif - int nbtries=1, nbloops=1, M, N, K; + int nbtries = 1, nbloops = 1, M, N, K; - if (argc==2) - { - if (std::string(argv[1])=="check") + if (argc == 2) { + if (std::string(argv[1]) == "check") check_product(); else M = N = K = atoi(argv[1]); - } - else if ((argc==3) && (std::string(argv[1])=="auto")) - { + } else if ((argc == 3) && (std::string(argv[1]) == "auto")) { M = N = K = atoi(argv[2]); - nbloops = 1000000000/(M*M*M); - if (nbloops<1) - nbloops = 1; + nbloops = 1000000000 / (M * M * M); + if (nbloops < 1) nbloops = 1; nbtries = 6; - } - else if (argc==4) - { + } else if (argc == 4) { M = N = K = atoi(argv[1]); nbloops = atoi(argv[2]); nbtries = atoi(argv[3]); - } - else if (argc==6) - { + } else if (argc == 6) { M = atoi(argv[1]); N = atoi(argv[2]); K = atoi(argv[3]); nbloops = atoi(argv[4]); nbtries = atoi(argv[5]); - } - else - { + } else { std::cout << "Usage: " << argv[0] << " size \n"; std::cout << "Usage: " << argv[0] << " auto size\n"; std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n"; @@ -95,14 +83,13 @@ int main(int argc, char *argv[]) double nbmad = double(M) * double(N) * double(K) * double(nbloops); - if (!(std::string(argv[1])=="auto")) - std::cout << M << " x " << N << " x " << K << "\n"; + if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n"; Scalar alpha, beta; - MyMatrix ma(M,K), mb(K,N), mc(M,N); - ma = MyMatrix::Random(M,K); - mb = MyMatrix::Random(K,N); - mc = MyMatrix::Random(M,N); + MyMatrix ma(M, K), mb(K, N), mc(M, N); + ma = MyMatrix::Random(M, K); + mb = MyMatrix::Random(K, N); + mc = MyMatrix::Random(M, N); Eigen::BenchTimer timer; @@ -112,108 +99,101 @@ int main(int argc, char *argv[]) // bench cblas // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B); - if (!(std::string(argv[1])=="auto")) - { + if (!(std::string(argv[1]) == "auto")) { timer.reset(); - for (uint k=0 ; k(1,64); - N = internal::random(1,768); - K = internal::random(1,768); + for (uint i = 0; i < 1000; ++i) { + M = internal::random(1, 64); + N = internal::random(1, 768); + K = internal::random(1, 768); M = (0 + M) * 1; std::cout << M << " x " << N << " x " << K << "\n"; check_product(M, N, K); } } - diff --git a/bench/benchCholesky.cpp b/bench/benchCholesky.cpp index 0dc94e5b4..3d6655b53 100644 --- a/bench/benchCholesky.cpp +++ b/bench/benchCholesky.cpp @@ -25,117 +25,100 @@ using namespace Eigen; typedef float Scalar; template -__attribute__ ((noinline)) void benchLLT(const MatrixType& m) -{ +__attribute__((noinline)) void benchLLT(const MatrixType& m) { int rows = m.rows(); int cols = m.cols(); double cost = 0; - for (int j=0; j SquareMatrixType; - MatrixType a = MatrixType::Random(rows,cols); - SquareMatrixType covMat = a * a.adjoint(); + MatrixType a = MatrixType::Random(rows, cols); + SquareMatrixType covMat = a * a.adjoint(); BenchTimer timerNoSqrt, timerSqrt; Scalar acc = 0; - int r = internal::random(0,covMat.rows()-1); - int c = internal::random(0,covMat.cols()-1); - for (int t=0; t(0, covMat.rows() - 1); + int c = internal::random(0, covMat.cols() - 1); + for (int t = 0; t < TRIES; ++t) { timerNoSqrt.start(); - for (int k=0; k cholnosqrt(covMat); - acc += cholnosqrt.matrixL().coeff(r,c); + acc += cholnosqrt.matrixL().coeff(r, c); } timerNoSqrt.stop(); } - for (int t=0; t chol(covMat); - acc += chol.matrixL().coeff(r,c); + acc += chol.matrixL().coeff(r, c); } timerSqrt.stop(); } - if (MatrixType::RowsAtCompileTime==Dynamic) + if (MatrixType::RowsAtCompileTime == Dynamic) std::cout << "dyn "; else std::cout << "fixed "; - std::cout << covMat.rows() << " \t" - << (timerNoSqrt.best()) / repeats << "s " - << "(" << 1e-9 * cost*repeats/timerNoSqrt.best() << " GFLOPS)\t" - << (timerSqrt.best()) / repeats << "s " - << "(" << 1e-9 * cost*repeats/timerSqrt.best() << " GFLOPS)\n"; + std::cout << covMat.rows() << " \t" << (timerNoSqrt.best()) / repeats << "s " + << "(" << 1e-9 * cost * repeats / timerNoSqrt.best() << " GFLOPS)\t" << (timerSqrt.best()) / repeats << "s " + << "(" << 1e-9 * cost * repeats / timerSqrt.best() << " GFLOPS)\n"; - - #ifdef BENCH_GSL - if (MatrixType::RowsAtCompileTime==Dynamic) - { +#ifdef BENCH_GSL + if (MatrixType::RowsAtCompileTime == Dynamic) { timerSqrt.reset(); - gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols()); - gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols()); + gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols()); + gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols()); eiToGsl(covMat, &gslCovMat); - for (int t=0; t0; ++i) - benchLLT(Matrix(dynsizes[i],dynsizes[i])); + for (int i = 0; dynsizes[i] > 0; ++i) benchLLT(Matrix(dynsizes[i], dynsizes[i])); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); return 0; } - diff --git a/bench/benchEigenSolver.cpp b/bench/benchEigenSolver.cpp index dd78c7e01..839877729 100644 --- a/bench/benchEigenSolver.cpp +++ b/bench/benchEigenSolver.cpp @@ -31,34 +31,31 @@ using namespace Eigen; typedef SCALAR Scalar; template -__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m) -{ +__attribute__((noinline)) void benchEigenSolver(const MatrixType& m) { int rows = m.rows(); int cols = m.cols(); - int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows)))); + int stdRepeats = std::max(1, int((REPEAT * 1000) / (rows * rows * sqrt(rows)))); int saRepeats = stdRepeats * 4; typedef typename MatrixType::Scalar Scalar; typedef Matrix SquareMatrixType; - MatrixType a = MatrixType::Random(rows,cols); - SquareMatrixType covMat = a * a.adjoint(); + MatrixType a = MatrixType::Random(rows, cols); + SquareMatrixType covMat = a * a.adjoint(); BenchTimer timerSa, timerStd; Scalar acc = 0; - int r = internal::random(0,covMat.rows()-1); - int c = internal::random(0,covMat.cols()-1); + int r = internal::random(0, covMat.rows() - 1); + int c = internal::random(0, covMat.cols() - 1); { SelfAdjointEigenSolver ei(covMat); - for (int t=0; t ei(covMat); - for (int t=0; t gmmCovMat(covMat.rows(),covMat.cols()); - gmm::dense_matrix eigvect(covMat.rows(),covMat.cols()); + gmm::dense_matrix gmmCovMat(covMat.rows(), covMat.cols()); + gmm::dense_matrix eigvect(covMat.rows(), covMat.cols()); std::vector eigval(covMat.rows()); eiToGmm(covMat, gmmCovMat); - for (int t=0; t0; ++i) - benchEigenSolver(Matrix(dynsizes[i],dynsizes[i])); + for (uint i = 0; dynsizes[i] > 0; ++i) benchEigenSolver(Matrix(dynsizes[i], dynsizes[i])); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); return 0; } - diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 3eb1a1ac0..3c33e77ae 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -19,13 +19,21 @@ using namespace Eigen; using namespace std; - template string nameof(); -template <> string nameof() {return "float";} -template <> string nameof() {return "double";} -template <> string nameof() {return "long double";} +template <> +string nameof() { + return "float"; +} +template <> +string nameof() { + return "double"; +} +template <> +string nameof() { + return "long double"; +} #ifndef TYPE #define TYPE float @@ -41,75 +49,69 @@ template <> string nameof() {return "long double";} using namespace Eigen; template -void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false) -{ - typedef typename NumTraits::Real Scalar; - typedef typename std::complex Complex; - int nits = NDATA/nfft; - vector inbuf(nfft); - vector outbuf(nfft); - FFT< Scalar > fft; +void bench(int nfft, bool fwd, bool unscaled = false, bool halfspec = false) { + typedef typename NumTraits::Real Scalar; + typedef typename std::complex Complex; + int nits = NDATA / nfft; + vector inbuf(nfft); + vector outbuf(nfft); + FFT fft; - if (unscaled) { - fft.SetFlag(fft.Unscaled); - cout << "unscaled "; - } - if (halfspec) { - fft.SetFlag(fft.HalfSpectrum); - cout << "halfspec "; - } - - - std::fill(inbuf.begin(),inbuf.end(),0); - fft.fwd( outbuf , inbuf); - - BenchTimer timer; - timer.reset(); - for (int k=0;k<8;++k) { - timer.start(); - if (fwd) - for(int i = 0; i < nits; i++) - fft.fwd( outbuf , inbuf); - else - for(int i = 0; i < nits; i++) - fft.inv(inbuf,outbuf); - timer.stop(); - } - - cout << nameof() << " "; - double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits ); - if ( NumTraits::IsComplex ) { - cout << "complex"; - }else{ - cout << "real "; - mflops /= 2; - } + if (unscaled) { + fft.SetFlag(fft.Unscaled); + cout << "unscaled "; + } + if (halfspec) { + fft.SetFlag(fft.HalfSpectrum); + cout << "halfspec "; + } + std::fill(inbuf.begin(), inbuf.end(), 0); + fft.fwd(outbuf, inbuf); + BenchTimer timer; + timer.reset(); + for (int k = 0; k < 8; ++k) { + timer.start(); if (fwd) - cout << " fwd"; + for (int i = 0; i < nits; i++) fft.fwd(outbuf, inbuf); else - cout << " inv"; + for (int i = 0; i < nits; i++) fft.inv(inbuf, outbuf); + timer.stop(); + } - cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; + cout << nameof() << " "; + double mflops = 5. * nfft * log2((double)nfft) / (1e6 * timer.value() / (double)nits); + if (NumTraits::IsComplex) { + cout << "complex"; + } else { + cout << "real "; + mflops /= 2; + } + + if (fwd) + cout << " fwd"; + else + cout << " inv"; + + cout << " NFFT=" << nfft << " " << (double(1e-6 * nfft * nits) / timer.value()) << " MS/s " << mflops << "MFLOPS\n"; } -int main(int argc,char ** argv) -{ - bench >(NFFT,true); - bench >(NFFT,false); - bench(NFFT,true); - bench(NFFT,false); - bench(NFFT,false,true); - bench(NFFT,false,true,true); +int main(int argc, char** argv) { + bench >(NFFT, true); + bench >(NFFT, false); + bench(NFFT, true); + bench(NFFT, false); + bench(NFFT, false, true); + bench(NFFT, false, true, true); - bench >(NFFT,true); - bench >(NFFT,false); - bench(NFFT,true); - bench(NFFT,false); - bench >(NFFT,true); - bench >(NFFT,false); - bench(NFFT,true); - bench(NFFT,false); - return 0; + bench >(NFFT, true); + bench >(NFFT, false); + bench(NFFT, true); + bench(NFFT, false); + bench >(NFFT, true); + bench >(NFFT, false); + bench(NFFT, true); + bench(NFFT, false); + return 0; } diff --git a/bench/benchGeometry.cpp b/bench/benchGeometry.cpp index 6e16c0331..67c16a992 100644 --- a/bench/benchGeometry.cpp +++ b/bench/benchGeometry.cpp @@ -11,124 +11,110 @@ using namespace std; #define REPEAT 1000000 #endif -enum func_opt -{ - TV, - TMATV, - TMATVMAT, +enum func_opt { + TV, + TMATV, + TMATVMAT, }; - template struct func; template -struct func -{ - static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 ) - { - asm (""); - return a1 * a2; - } +struct func { + static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) { + asm(""); + return a1 * a2; + } }; template -struct func -{ - static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 ) - { - asm (""); - return a1.matrix() * a2; - } +struct func { + static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) { + asm(""); + return a1.matrix() * a2; + } }; template -struct func -{ - static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 ) - { - asm (""); - return res(a1.matrix() * a2.matrix()); - } +struct func { + static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) { + asm(""); + return res(a1.matrix() * a2.matrix()); + } }; template -struct test_transform -{ - static void run() - { - arg1 a1; - a1.setIdentity(); - arg2 a2; - a2.setIdentity(); +struct test_transform { + static void run() { + arg1 a1; + a1.setIdentity(); + arg2 a2; + a2.setIdentity(); - BenchTimer timer; - timer.reset(); - for (int k=0; k<10; ++k) - { - timer.start(); - for (int k=0; k Trans; \ + typedef Matrix Vec; \ + typedef func Func; \ + test_transform::run(); \ + } -#define run_vec( op, scalar, mode, option, vsize ) \ - std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \ - {\ - typedef Transform Trans;\ - typedef Matrix Vec;\ - typedef func Func;\ - test_transform< Func, Trans, Vec >::run();\ - } +#define run_trans(op, scalar, mode, option) \ + std::cout << #scalar << "\t " << #mode << "\t " << #option << " "; \ + { \ + typedef Transform Trans; \ + typedef func Func; \ + test_transform::run(); \ + } -#define run_trans( op, scalar, mode, option ) \ - std::cout << #scalar << "\t " << #mode << "\t " << #option << " "; \ - {\ - typedef Transform Trans;\ - typedef func Func;\ - test_transform< Func, Trans, Trans >::run();\ - } +int main(int argc, char* argv[]) { + cout << "vec = trans * vec" << endl; + run_vec(TV, float, Isometry, AutoAlign, 3); + run_vec(TV, float, Isometry, DontAlign, 3); + run_vec(TV, float, Isometry, AutoAlign, 4); + run_vec(TV, float, Isometry, DontAlign, 4); + run_vec(TV, float, Projective, AutoAlign, 4); + run_vec(TV, float, Projective, DontAlign, 4); + run_vec(TV, double, Isometry, AutoAlign, 3); + run_vec(TV, double, Isometry, DontAlign, 3); + run_vec(TV, double, Isometry, AutoAlign, 4); + run_vec(TV, double, Isometry, DontAlign, 4); + run_vec(TV, double, Projective, AutoAlign, 4); + run_vec(TV, double, Projective, DontAlign, 4); -int main(int argc, char* argv[]) -{ - cout << "vec = trans * vec" << endl; - run_vec(TV, float, Isometry, AutoAlign, 3); - run_vec(TV, float, Isometry, DontAlign, 3); - run_vec(TV, float, Isometry, AutoAlign, 4); - run_vec(TV, float, Isometry, DontAlign, 4); - run_vec(TV, float, Projective, AutoAlign, 4); - run_vec(TV, float, Projective, DontAlign, 4); - run_vec(TV, double, Isometry, AutoAlign, 3); - run_vec(TV, double, Isometry, DontAlign, 3); - run_vec(TV, double, Isometry, AutoAlign, 4); - run_vec(TV, double, Isometry, DontAlign, 4); - run_vec(TV, double, Projective, AutoAlign, 4); - run_vec(TV, double, Projective, DontAlign, 4); + cout << "vec = trans.matrix() * vec" << endl; + run_vec(TMATV, float, Isometry, AutoAlign, 4); + run_vec(TMATV, float, Isometry, DontAlign, 4); + run_vec(TMATV, double, Isometry, AutoAlign, 4); + run_vec(TMATV, double, Isometry, DontAlign, 4); - cout << "vec = trans.matrix() * vec" << endl; - run_vec(TMATV, float, Isometry, AutoAlign, 4); - run_vec(TMATV, float, Isometry, DontAlign, 4); - run_vec(TMATV, double, Isometry, AutoAlign, 4); - run_vec(TMATV, double, Isometry, DontAlign, 4); + cout << "trans = trans1 * trans" << endl; + run_trans(TV, float, Isometry, AutoAlign); + run_trans(TV, float, Isometry, DontAlign); + run_trans(TV, double, Isometry, AutoAlign); + run_trans(TV, double, Isometry, DontAlign); + run_trans(TV, float, Projective, AutoAlign); + run_trans(TV, float, Projective, DontAlign); + run_trans(TV, double, Projective, AutoAlign); + run_trans(TV, double, Projective, DontAlign); - cout << "trans = trans1 * trans" << endl; - run_trans(TV, float, Isometry, AutoAlign); - run_trans(TV, float, Isometry, DontAlign); - run_trans(TV, double, Isometry, AutoAlign); - run_trans(TV, double, Isometry, DontAlign); - run_trans(TV, float, Projective, AutoAlign); - run_trans(TV, float, Projective, DontAlign); - run_trans(TV, double, Projective, AutoAlign); - run_trans(TV, double, Projective, DontAlign); - - cout << "trans = trans1.matrix() * trans.matrix()" << endl; - run_trans(TMATVMAT, float, Isometry, AutoAlign); - run_trans(TMATVMAT, float, Isometry, DontAlign); - run_trans(TMATVMAT, double, Isometry, AutoAlign); - run_trans(TMATVMAT, double, Isometry, DontAlign); + cout << "trans = trans1.matrix() * trans.matrix()" << endl; + run_trans(TMATVMAT, float, Isometry, AutoAlign); + run_trans(TMATVMAT, float, Isometry, DontAlign); + run_trans(TMATVMAT, double, Isometry, AutoAlign); + run_trans(TMATVMAT, double, Isometry, DontAlign); } - diff --git a/bench/benchVecAdd.cpp b/bench/benchVecAdd.cpp index ce8e1e911..509c64227 100644 --- a/bench/benchVecAdd.cpp +++ b/bench/benchVecAdd.cpp @@ -14,122 +14,118 @@ using namespace Eigen; typedef float Scalar; -__attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size); -__attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c); -__attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c); +__attribute__((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size); +__attribute__((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c); +__attribute__((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c); -int main(int argc, char* argv[]) -{ - int size = SIZE * 8; - int size2 = size * size; - Scalar* a = internal::aligned_new(size2); - Scalar* b = internal::aligned_new(size2+4)+1; - Scalar* c = internal::aligned_new(size2); - - for (int i=0; i(size2); + Scalar* b = internal::aligned_new(size2 + 4) + 1; + Scalar* c = internal::aligned_new(size2); + + for (int i = 0; i < size; ++i) { + a[i] = b[i] = c[i] = 0; + } + + BenchTimer timer; + + timer.reset(); + for (int k = 0; k < 10; ++k) { + timer.start(); + benchVec(a, b, c, size2); + timer.stop(); + } + std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) + << " GFlops\n"; + return 0; + for (int innersize = size; innersize > 2; --innersize) { + if (size2 % innersize == 0) { + int outersize = size2 / innersize; + MatrixXf ma = Map(a, innersize, outersize); + MatrixXf mb = Map(b, innersize, outersize); + MatrixXf mc = Map(c, innersize, outersize); + timer.reset(); + for (int k = 0; k < 3; ++k) { timer.start(); - benchVec(a, b, c, size2); + benchVec(ma, mb, mc); timer.stop(); + } + std::cout << innersize << " x " << outersize << " " << timer.value() << "s " + << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) << " GFlops\n"; } - std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; - return 0; - for (int innersize = size; innersize>2 ; --innersize) - { - if (size2%innersize==0) - { - int outersize = size2/innersize; - MatrixXf ma = Map(a, innersize, outersize ); - MatrixXf mb = Map(b, innersize, outersize ); - MatrixXf mc = Map(c, innersize, outersize ); - timer.reset(); - for (int k=0; k<3; ++k) - { - timer.start(); - benchVec(ma, mb, mc); - timer.stop(); - } - std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; - } + } + + VectorXf va = Map(a, size2); + VectorXf vb = Map(b, size2); + VectorXf vc = Map(c, size2); + timer.reset(); + for (int k = 0; k < 3; ++k) { + timer.start(); + benchVec(va, vb, vc); + timer.stop(); + } + std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) + << " GFlops\n"; + + return 0; +} + +void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) { + for (int k = 0; k < REPEAT; ++k) a = a + b; +} + +void benchVec(VectorXf& a, VectorXf& b, VectorXf& c) { + for (int k = 0; k < REPEAT; ++k) a = a + b; +} + +void benchVec(Scalar* a, Scalar* b, Scalar* c, int size) { + typedef internal::packet_traits::type PacketScalar; + const int PacketSize = internal::packet_traits::size; + PacketScalar a0, a1, a2, a3, b0, b1, b2, b3; + for (int k = 0; k < REPEAT; ++k) + for (int i = 0; i < size; i += PacketSize * 8) { + // a0 = internal::pload(&a[i]); + // b0 = internal::pload(&b[i]); + // a1 = internal::pload(&a[i+1*PacketSize]); + // b1 = internal::pload(&b[i+1*PacketSize]); + // a2 = internal::pload(&a[i+2*PacketSize]); + // b2 = internal::pload(&b[i+2*PacketSize]); + // a3 = internal::pload(&a[i+3*PacketSize]); + // b3 = internal::pload(&b[i+3*PacketSize]); + // internal::pstore(&a[i], internal::padd(a0, b0)); + // a0 = internal::pload(&a[i+4*PacketSize]); + // b0 = internal::pload(&b[i+4*PacketSize]); + // + // internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1)); + // a1 = internal::pload(&a[i+5*PacketSize]); + // b1 = internal::pload(&b[i+5*PacketSize]); + // + // internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2)); + // a2 = internal::pload(&a[i+6*PacketSize]); + // b2 = internal::pload(&b[i+6*PacketSize]); + // + // internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3)); + // a3 = internal::pload(&a[i+7*PacketSize]); + // b3 = internal::pload(&b[i+7*PacketSize]); + // + // internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0)); + // internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1)); + // internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2)); + // internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3)); + + internal::pstore(&a[i + 2 * PacketSize], internal::padd(internal::ploadu(&a[i + 2 * PacketSize]), + internal::ploadu(&b[i + 2 * PacketSize]))); + internal::pstore(&a[i + 3 * PacketSize], internal::padd(internal::ploadu(&a[i + 3 * PacketSize]), + internal::ploadu(&b[i + 3 * PacketSize]))); + internal::pstore(&a[i + 4 * PacketSize], internal::padd(internal::ploadu(&a[i + 4 * PacketSize]), + internal::ploadu(&b[i + 4 * PacketSize]))); + internal::pstore(&a[i + 5 * PacketSize], internal::padd(internal::ploadu(&a[i + 5 * PacketSize]), + internal::ploadu(&b[i + 5 * PacketSize]))); + internal::pstore(&a[i + 6 * PacketSize], internal::padd(internal::ploadu(&a[i + 6 * PacketSize]), + internal::ploadu(&b[i + 6 * PacketSize]))); + internal::pstore(&a[i + 7 * PacketSize], internal::padd(internal::ploadu(&a[i + 7 * PacketSize]), + internal::ploadu(&b[i + 7 * PacketSize]))); } - - VectorXf va = Map(a, size2); - VectorXf vb = Map(b, size2); - VectorXf vc = Map(c, size2); - timer.reset(); - for (int k=0; k<3; ++k) - { - timer.start(); - benchVec(va, vb, vc); - timer.stop(); - } - std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; - - return 0; -} - -void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) -{ - for (int k=0; k::type PacketScalar; - const int PacketSize = internal::packet_traits::size; - PacketScalar a0, a1, a2, a3, b0, b1, b2, b3; - for (int k=0; k // -DSCALARA=double or -DSCALARB=double // -DHAVE_BLAS @@ -14,7 +14,6 @@ #include #include - using namespace std; using namespace Eigen; @@ -45,15 +44,15 @@ const int opt_B = ColMajor; typedef SCALAR Scalar; typedef NumTraits::Real RealScalar; -typedef Matrix A; -typedef Matrix B; -typedef Matrix C; -typedef Matrix M; +typedef Matrix A; +typedef Matrix B; +typedef Matrix C; +typedef Matrix M; #ifdef HAVE_BLAS extern "C" { - #include +#include } static float fone = 1; @@ -65,7 +64,7 @@ static std::complex cfzero = 0; static std::complex cdone = 1; static std::complex cdzero = 0; static char notrans = 'N'; -static char trans = 'T'; +static char trans = 'T'; static char nonunit = 'N'; static char lower = 'L'; static char right = 'R'; @@ -83,60 +82,61 @@ const char transB = trans; const char transB = notrans; #endif -template -void blas_gemm(const A& a, const B& b, MatrixXf& c) -{ - int M = c.rows(); int N = c.cols(); int K = a.cols(); - int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows(); +template +void blas_gemm(const A& a, const B& b, MatrixXf& c) { + int M = c.rows(); + int N = c.cols(); + int K = a.cols(); + int lda = a.outerStride(); + int ldb = b.outerStride(); + int ldc = c.rows(); - sgemm_(&transA,&transB,&M,&N,&K,&fone, - const_cast(a.data()),&lda, - const_cast(b.data()),&ldb,&fone, - c.data(),&ldc); + sgemm_(&transA, &transB, &M, &N, &K, &fone, const_cast(a.data()), &lda, const_cast(b.data()), &ldb, + &fone, c.data(), &ldc); } -template -void blas_gemm(const A& a, const B& b, MatrixXd& c) -{ - int M = c.rows(); int N = c.cols(); int K = a.cols(); - int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows(); +template +void blas_gemm(const A& a, const B& b, MatrixXd& c) { + int M = c.rows(); + int N = c.cols(); + int K = a.cols(); + int lda = a.outerStride(); + int ldb = b.outerStride(); + int ldc = c.rows(); - dgemm_(&transA,&transB,&M,&N,&K,&done, - const_cast(a.data()),&lda, - const_cast(b.data()),&ldb,&done, - c.data(),&ldc); + dgemm_(&transA, &transB, &M, &N, &K, &done, const_cast(a.data()), &lda, const_cast(b.data()), &ldb, + &done, c.data(), &ldc); } -template -void blas_gemm(const A& a, const B& b, MatrixXcf& c) -{ - int M = c.rows(); int N = c.cols(); int K = a.cols(); - int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows(); +template +void blas_gemm(const A& a, const B& b, MatrixXcf& c) { + int M = c.rows(); + int N = c.cols(); + int K = a.cols(); + int lda = a.outerStride(); + int ldb = b.outerStride(); + int ldc = c.rows(); - cgemm_(&transA,&transB,&M,&N,&K,(float*)&cfone, - const_cast((const float*)a.data()),&lda, - const_cast((const float*)b.data()),&ldb,(float*)&cfone, - (float*)c.data(),&ldc); + cgemm_(&transA, &transB, &M, &N, &K, (float*)&cfone, const_cast((const float*)a.data()), &lda, + const_cast((const float*)b.data()), &ldb, (float*)&cfone, (float*)c.data(), &ldc); } -template -void blas_gemm(const A& a, const B& b, MatrixXcd& c) -{ - int M = c.rows(); int N = c.cols(); int K = a.cols(); - int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows(); +template +void blas_gemm(const A& a, const B& b, MatrixXcd& c) { + int M = c.rows(); + int N = c.cols(); + int K = a.cols(); + int lda = a.outerStride(); + int ldb = b.outerStride(); + int ldc = c.rows(); - zgemm_(&transA,&transB,&M,&N,&K,(double*)&cdone, - const_cast((const double*)a.data()),&lda, - const_cast((const double*)b.data()),&ldb,(double*)&cdone, - (double*)c.data(),&ldc); + zgemm_(&transA, &transB, &M, &N, &K, (double*)&cdone, const_cast((const double*)a.data()), &lda, + const_cast((const double*)b.data()), &ldb, (double*)&cdone, (double*)c.data(), &ldc); } - - #endif -void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci) -{ +void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci) { cr.noalias() += ar * br; cr.noalias() -= ai * bi; ci.noalias() += ar * bi; @@ -144,33 +144,27 @@ void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, // [cr ci] += [ar ai] * br + [-ai ar] * bi } -void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci) -{ +void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci) { cr.noalias() += a * br; ci.noalias() += a * bi; } -void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci) -{ +void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci) { cr.noalias() += ar * b; ci.noalias() += ai * b; } - - -template -EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) -{ +template +EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) { c.noalias() += a * b; } -int main(int argc, char ** argv) -{ +int main(int argc, char** argv) { std::ptrdiff_t l1 = internal::queryL1CacheSize(); std::ptrdiff_t l2 = internal::queryTopLevelCacheSize(); - std::cout << "L1 cache size = " << (l1>0 ? l1/1024 : -1) << " KB\n"; - std::cout << "L2/L3 cache size = " << (l2>0 ? l2/1024 : -1) << " KB\n"; - typedef internal::gebp_traits Traits; + std::cout << "L1 cache size = " << (l1 > 0 ? l1 / 1024 : -1) << " KB\n"; + std::cout << "L2/L3 cache size = " << (l2 > 0 ? l2 / 1024 : -1) << " KB\n"; + typedef internal::gebp_traits Traits; std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n"; int rep = 1; // number of repetitions per try @@ -180,196 +174,220 @@ int main(int argc, char ** argv) int m = s; int n = s; int p = s; - int cache_size1=-1, cache_size2=l2, cache_size3 = 0; + int cache_size1 = -1, cache_size2 = l2, cache_size3 = 0; bool need_help = false; - for (int i=1; i -c -t -p \n"; std::cout << " : size\n"; std::cout << " : rows columns depth\n"; return 1; } -#if EIGEN_VERSION_AT_LEAST(3,2,90) - if(cache_size1>0) - setCpuCacheSizes(cache_size1,cache_size2,cache_size3); +#if EIGEN_VERSION_AT_LEAST(3, 2, 90) + if (cache_size1 > 0) setCpuCacheSizes(cache_size1, cache_size2, cache_size3); #endif - - A a(m,p); a.setRandom(); - B b(p,n); b.setRandom(); - C c(m,n); c.setOnes(); + + A a(m, p); + a.setRandom(); + B b(p, n); + b.setRandom(); + C c(m, n); + c.setOnes(); C rc = c; std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n"; std::ptrdiff_t mc(m), nc(n), kc(p); - internal::computeProductBlockingSizes(kc, mc, nc); + internal::computeProductBlockingSizes(kc, mc, nc); std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << " x " << nc << "\n"; C r = c; - // check the parallel product is correct - #if defined EIGEN_HAS_OPENMP +// check the parallel product is correct +#if defined EIGEN_HAS_OPENMP Eigen::initParallel(); int procs = omp_get_max_threads(); - if(procs>1) - { - #ifdef HAVE_BLAS - blas_gemm(a,b,r); - #else + if (procs > 1) { +#ifdef HAVE_BLAS + blas_gemm(a, b, r); +#else omp_set_num_threads(1); r.noalias() += a * b; omp_set_num_threads(procs); - #endif +#endif c.noalias() += a * b; - if(!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n"; + if (!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n"; } - #elif defined HAVE_BLAS - blas_gemm(a,b,r); - c.noalias() += a * b; - if(!r.isApprox(c)) { - std::cout << (r - c).norm()/r.norm() << "\n"; +#elif defined HAVE_BLAS + blas_gemm(a, b, r); + c.noalias() += a * b; + if (!r.isApprox(c)) { + std::cout << (r - c).norm() / r.norm() << "\n"; + std::cerr << "Warning, your product is crap!\n\n"; + } +#else + if (1. * m * n * p < 2000. * 2000 * 2000) { + gemm(a, b, c); + r.noalias() += a.cast().lazyProduct(b.cast()); + if (!r.isApprox(c)) { + std::cout << (r - c).norm() / r.norm() << "\n"; std::cerr << "Warning, your product is crap!\n\n"; } - #else - if(1.*m*n*p<2000.*2000*2000) - { - gemm(a,b,c); - r.noalias() += a.cast() .lazyProduct( b.cast() ); - if(!r.isApprox(c)) { - std::cout << (r - c).norm()/r.norm() << "\n"; - std::cerr << "Warning, your product is crap!\n\n"; - } - } - #endif + } +#endif - #ifdef HAVE_BLAS +#ifdef HAVE_BLAS BenchTimer tblas; c = rc; - BENCH(tblas, tries, rep, blas_gemm(a,b,c)); - std::cout << "blas cpu " << tblas.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER) << "s)\n"; - std::cout << "blas real " << tblas.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n"; - #endif + BENCH(tblas, tries, rep, blas_gemm(a, b, c)); + std::cout << "blas cpu " << tblas.best(CPU_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / tblas.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER) + << "s)\n"; + std::cout << "blas real " << tblas.best(REAL_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / tblas.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER) + << "s)\n"; +#endif // warm start - if(b.norm()+a.norm()==123.554) std::cout << "\n"; + if (b.norm() + a.norm() == 123.554) std::cout << "\n"; BenchTimer tmt; c = rc; - BENCH(tmt, tries, rep, gemm(a,b,c)); - std::cout << "eigen cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n"; - std::cout << "eigen real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n"; + BENCH(tmt, tries, rep, gemm(a, b, c)); + std::cout << "eigen cpu " << tmt.best(CPU_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) + << "s)\n"; + std::cout << "eigen real " << tmt.best(REAL_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) + << "s)\n"; - #ifdef EIGEN_HAS_OPENMP - if(procs>1) - { +#ifdef EIGEN_HAS_OPENMP + if (procs > 1) { BenchTimer tmono; omp_set_num_threads(1); Eigen::setNbThreads(1); c = rc; - BENCH(tmono, tries, rep, gemm(a,b,c)); - std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n"; - std::cout << "eigen mono real " << tmono.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n"; - std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n"; + BENCH(tmono, tries, rep, gemm(a, b, c)); + std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / tmono.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) + << "s)\n"; + std::cout << "eigen mono real " << tmono.best(REAL_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / tmono.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" + << tmono.total(REAL_TIMER) << "s)\n"; + std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => " + << (100.0 * tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)) / procs << "%\n"; } - #endif - - if(1.*m*n*p<30*30*30) - { +#endif + + if (1. * m * n * p < 30 * 30 * 30) { BenchTimer tmt; c = rc; - BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b)); - std::cout << "lazy cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n"; - std::cout << "lazy real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n"; + BENCH(tmt, tries, rep, c.noalias() += a.lazyProduct(b)); + std::cout << "lazy cpu " << tmt.best(CPU_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) + << "s)\n"; + std::cout << "lazy real " << tmt.best(REAL_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) + << "s)\n"; } - - #ifdef DECOUPLED - if((NumTraits::IsComplex) && (NumTraits::IsComplex)) - { - M ar(m,p); ar.setRandom(); - M ai(m,p); ai.setRandom(); - M br(p,n); br.setRandom(); - M bi(p,n); bi.setRandom(); - M cr(m,n); cr.setRandom(); - M ci(m,n); ci.setRandom(); - + +#ifdef DECOUPLED + if ((NumTraits::IsComplex) && (NumTraits::IsComplex)) { + M ar(m, p); + ar.setRandom(); + M ai(m, p); + ai.setRandom(); + M br(p, n); + br.setRandom(); + M bi(p, n); + bi.setRandom(); + M cr(m, n); + cr.setRandom(); + M ci(m, n); + ci.setRandom(); + BenchTimer t; - BENCH(t, tries, rep, matlab_cplx_cplx(ar,ai,br,bi,cr,ci)); - std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; - std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; + BENCH(t, tries, rep, matlab_cplx_cplx(ar, ai, br, bi, cr, ci)); + std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) + << "s)\n"; + std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) + << "s)\n"; } - if((!NumTraits::IsComplex) && (NumTraits::IsComplex)) - { - M a(m,p); a.setRandom(); - M br(p,n); br.setRandom(); - M bi(p,n); bi.setRandom(); - M cr(m,n); cr.setRandom(); - M ci(m,n); ci.setRandom(); - + if ((!NumTraits::IsComplex) && (NumTraits::IsComplex)) { + M a(m, p); + a.setRandom(); + M br(p, n); + br.setRandom(); + M bi(p, n); + bi.setRandom(); + M cr(m, n); + cr.setRandom(); + M ci(m, n); + ci.setRandom(); + BenchTimer t; - BENCH(t, tries, rep, matlab_real_cplx(a,br,bi,cr,ci)); - std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; - std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; + BENCH(t, tries, rep, matlab_real_cplx(a, br, bi, cr, ci)); + std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) + << "s)\n"; + std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) + << "s)\n"; } - if((NumTraits::IsComplex) && (!NumTraits::IsComplex)) - { - M ar(m,p); ar.setRandom(); - M ai(m,p); ai.setRandom(); - M b(p,n); b.setRandom(); - M cr(m,n); cr.setRandom(); - M ci(m,n); ci.setRandom(); - + if ((NumTraits::IsComplex) && (!NumTraits::IsComplex)) { + M ar(m, p); + ar.setRandom(); + M ai(m, p); + ai.setRandom(); + M b(p, n); + b.setRandom(); + M cr(m, n); + cr.setRandom(); + M ci(m, n); + ci.setRandom(); + BenchTimer t; - BENCH(t, tries, rep, matlab_cplx_real(ar,ai,b,cr,ci)); - std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; - std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; + BENCH(t, tries, rep, matlab_cplx_real(ar, ai, b, cr, ci)); + std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) + << "s)\n"; + std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t" + << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) + << "s)\n"; } - #endif +#endif return 0; } - diff --git a/bench/bench_move_semantics.cpp b/bench/bench_move_semantics.cpp index 323d80417..8b7341ab8 100644 --- a/bench/bench_move_semantics.cpp +++ b/bench/bench_move_semantics.cpp @@ -16,23 +16,20 @@ #include template -void copy_matrix(MatrixType& m) -{ +void copy_matrix(MatrixType& m) { MatrixType tmp(m); m = tmp; } template -void move_matrix(MatrixType&& m) -{ +void move_matrix(MatrixType&& m) { MatrixType tmp(std::move(m)); m = std::move(tmp); } -template -void bench(const std::string& label) -{ - using MatrixType = Eigen::Matrix,1,10>; +template +void bench(const std::string& label) { + using MatrixType = Eigen::Matrix, 1, 10>; Eigen::BenchTimer t; int tries = 10; @@ -42,16 +39,14 @@ void bench(const std::string& label) MatrixType dest; BENCH(t, tries, rep, copy_matrix(data)); - std::cout << label << " copy semantics: " << 1e3*t.best(Eigen::CPU_TIMER) << " ms" << std::endl; + std::cout << label << " copy semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl; BENCH(t, tries, rep, move_matrix(std::move(data))); - std::cout << label << " move semantics: " << 1e3*t.best(Eigen::CPU_TIMER) << " ms" << std::endl; + std::cout << label << " move semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl; } -int main() -{ +int main() { bench("float"); bench("double"); return 0; } - diff --git a/bench/bench_norm.cpp b/bench/bench_norm.cpp index 592f25d66..fb53b85af 100644 --- a/bench/bench_norm.cpp +++ b/bench/bench_norm.cpp @@ -5,79 +5,64 @@ using namespace Eigen; using namespace std; -template -EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) -{ +template +EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) { return v.norm(); } -template -EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) -{ +template +EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) { return v.stableNorm(); } -template -EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) -{ +template +EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) { return v.hypotNorm(); } -template -EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) -{ +template +EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) { return v.blueNorm(); } -template -EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) -{ +template +EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) { typedef typename T::Scalar Scalar; int n = v.size(); Scalar scale = 0; Scalar ssq = 1; - for (int i=0;i= ax) - { - ssq += numext::abs2(ax/scale); - } - else - { - ssq = Scalar(1) + ssq * numext::abs2(scale/ax); + if (scale >= ax) { + ssq += numext::abs2(ax / scale); + } else { + ssq = Scalar(1) + ssq * numext::abs2(scale / ax); scale = ax; } } return scale * std::sqrt(ssq); } -template -EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) -{ +template +EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) { typedef typename T::Scalar Scalar; Scalar s = v.array().abs().maxCoeff(); - return s*(v/s).norm(); + return s * (v / s).norm(); } -template -EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) -{ +template +EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) { return v.stableNorm(); } -template -EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) -{ - int n =v.size() / 2; - for (int i=0;i0) - { - for (int i=0;i +EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) { + int n = v.size() / 2; + for (int i = 0; i < n; ++i) v(i) = v(2 * i) * v(2 * i) + v(2 * i + 1) * v(2 * i + 1); + n = n / 2; + while (n > 0) { + for (int i = 0; i < n; ++i) v(i) = v(2 * i) + v(2 * i + 1); + n = n / 2; } return std::sqrt(v(0)); } @@ -85,61 +70,61 @@ EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) namespace Eigen { namespace internal { #ifdef EIGEN_VECTORIZE -Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); } -Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); } +Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a, b); } +Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a, b); } -Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); } -Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); } +Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a, b); } +Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a, b); } #endif -} -} +} // namespace internal +} // namespace Eigen -template -EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) -{ - #ifndef EIGEN_VECTORIZE +template +EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) { +#ifndef EIGEN_VECTORIZE return v.blueNorm(); - #else +#else typedef typename T::Scalar Scalar; static int nmax = 0; static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr; int n; - if(nmax <= 0) - { + if (nmax <= 0) { int nbig, ibeta, it, iemin, iemax, iexp; Scalar abig, eps; - nbig = NumTraits::highest(); // largest integer - ibeta = std::numeric_limits::radix; // NumTraits::Base; // base for floating-point numbers - it = NumTraits::digits(); // NumTraits::Mantissa; // number of base-beta digits in mantissa + nbig = NumTraits::highest(); // largest integer + ibeta = std::numeric_limits::radix; // NumTraits::Base; // base for + // floating-point numbers + it = NumTraits::digits(); // NumTraits::Mantissa; // number of base-beta digits in + // mantissa iemin = NumTraits::min_exponent(); // minimum exponent iemax = NumTraits::max_exponent(); // maximum exponent - rbig = NumTraits::highest(); // largest floating-point number + rbig = NumTraits::highest(); // largest floating-point number // Check the basic machine-dependent constants. - if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) - || (it<=4 && ibeta <= 3 ) || it<2) - { + if (iemin > 1 - 2 * it || 1 + it > iemax || (it == 2 && ibeta < 5) || (it <= 4 && ibeta <= 3) || it < 2) { eigen_assert(false && "the algorithm cannot be guaranteed on this computer"); } - iexp = -((1-iemin)/2); - b1 = std::pow(ibeta, iexp); // lower boundary of midrange - iexp = (iemax + 1 - it)/2; - b2 = std::pow(ibeta,iexp); // upper boundary of midrange + iexp = -((1 - iemin) / 2); + b1 = std::pow(ibeta, iexp); // lower boundary of midrange + iexp = (iemax + 1 - it) / 2; + b2 = std::pow(ibeta, iexp); // upper boundary of midrange - iexp = (2-iemin)/2; - s1m = std::pow(ibeta,iexp); // scaling factor for lower range - iexp = - ((iemax+it)/2); - s2m = std::pow(ibeta,iexp); // scaling factor for upper range + iexp = (2 - iemin) / 2; + s1m = std::pow(ibeta, iexp); // scaling factor for lower range + iexp = -((iemax + it) / 2); + s2m = std::pow(ibeta, iexp); // scaling factor for upper range - overfl = rbig*s2m; // overflow boundary for abig - eps = std::pow(ibeta, 1-it); - relerr = std::sqrt(eps); // tolerance for neglecting asml - abig = 1.0/eps - 1.0; - if (Scalar(nbig)>abig) nmax = abig; // largest safe n - else nmax = nbig; + overfl = rbig * s2m; // overflow boundary for abig + eps = std::pow(ibeta, 1 - it); + relerr = std::sqrt(eps); // tolerance for neglecting asml + abig = 1.0 / eps - 1.0; + if (Scalar(nbig) > abig) + nmax = abig; // largest safe n + else + nmax = nbig; } typedef typename internal::packet_traits::type Packet; @@ -149,108 +134,103 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) Packet pabig = internal::pset1(Scalar(0)); Packet ps2m = internal::pset1(s2m); Packet ps1m = internal::pset1(s1m); - Packet pb2 = internal::pset1(b2); - Packet pb1 = internal::pset1(b1); - for(int j=0; j(b2); + Packet pb1 = internal::pset1(b1); + for (int j = 0; j < v.size(); j += ps) { Packet ax = internal::pabs(v.template packet(j)); - Packet ax_s2m = internal::pmul(ax,ps2m); - Packet ax_s1m = internal::pmul(ax,ps1m); - Packet maskBig = internal::plt(pb2,ax); - Packet maskSml = internal::plt(ax,pb1); + Packet ax_s2m = internal::pmul(ax, ps2m); + Packet ax_s1m = internal::pmul(ax, ps1m); + Packet maskBig = internal::plt(pb2, ax); + Packet maskSml = internal::plt(ax, pb1); -// Packet maskMed = internal::pand(maskSml,maskBig); -// Packet scale = internal::pset1(Scalar(0)); -// scale = internal::por(scale, internal::pand(maskBig,ps2m)); -// scale = internal::por(scale, internal::pand(maskSml,ps1m)); -// scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed)); -// ax = internal::pmul(ax,scale); -// ax = internal::pmul(ax,ax); -// pabig = internal::padd(pabig, internal::pand(maskBig, ax)); -// pasml = internal::padd(pasml, internal::pand(maskSml, ax)); -// pamed = internal::padd(pamed, internal::pandnot(ax,maskMed)); + // Packet maskMed = internal::pand(maskSml,maskBig); + // Packet scale = internal::pset1(Scalar(0)); + // scale = internal::por(scale, internal::pand(maskBig,ps2m)); + // scale = internal::por(scale, internal::pand(maskSml,ps1m)); + // scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed)); + // ax = internal::pmul(ax,scale); + // ax = internal::pmul(ax,ax); + // pabig = internal::padd(pabig, internal::pand(maskBig, ax)); + // pasml = internal::padd(pasml, internal::pand(maskSml, ax)); + // pamed = internal::padd(pamed, internal::pandnot(ax,maskMed)); - - pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m))); - pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m))); - pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig))); + pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m, ax_s2m))); + pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m, ax_s1m))); + pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax, ax), internal::pand(maskSml, maskBig))); } Scalar abig = internal::predux(pabig); Scalar asml = internal::predux(pasml); Scalar amed = internal::predux(pamed); - if(abig > Scalar(0)) - { + if (abig > Scalar(0)) { abig = std::sqrt(abig); - if(abig > overfl) - { + if (abig > overfl) { eigen_assert(false && "overflow"); return rbig; } - if(amed > Scalar(0)) - { - abig = abig/s2m; + if (amed > Scalar(0)) { + abig = abig / s2m; amed = std::sqrt(amed); - } - else - { - return abig/s2m; + } else { + return abig / s2m; } - } - else if(asml > Scalar(0)) - { - if (amed > Scalar(0)) - { + } else if (asml > Scalar(0)) { + if (amed > Scalar(0)) { abig = std::sqrt(amed); amed = std::sqrt(asml) / s1m; + } else { + return std::sqrt(asml) / s1m; } - else - { - return std::sqrt(asml)/s1m; - } - } - else - { + } else { return std::sqrt(amed); } asml = std::min(abig, amed); abig = std::max(abig, amed); - if(asml <= abig*relerr) + if (asml <= abig * relerr) return abig; else - return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig)); - #endif + return abig * std::sqrt(Scalar(1) + numext::abs2(asml / abig)); +#endif } -#define BENCH_PERF(NRM) { \ - float af = 0; double ad = 0; std::complex ac = 0; \ - Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\ - for (int k=0; k ac = 0; \ + Eigen::BenchTimer tf, td, tcf; \ + tf.reset(); \ + td.reset(); \ + tcf.reset(); \ + for (int k = 0; k < tries; ++k) { \ + tf.start(); \ + for (int i = 0; i < iters; ++i) { \ + af += NRM(vf); \ + } \ + tf.stop(); \ + } \ + for (int k = 0; k < tries; ++k) { \ + td.start(); \ + for (int i = 0; i < iters; ++i) { \ + ad += NRM(vd); \ + } \ + td.stop(); \ + } \ + /*for (int k=0; k()); double yd = based * std::abs(internal::random()); VectorXf vf = VectorXf::Ones(s) * yf; VectorXd vd = VectorXd::Ones(s) * yd; - std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n"; + std::cout << "reference\t" << std::sqrt(double(s)) * yf << "\t" << std::sqrt(double(s)) * yd << "\n"; std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n"; std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n"; std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n"; @@ -260,34 +240,38 @@ void check_accuracy(double basef, double based, int s) std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n"; } -void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) -{ +void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) { VectorXf vf(s); VectorXd vd(s); - for (int i=0; i()) * std::pow(double(10), internal::random(ef0,ef1)); - vd[i] = std::abs(internal::random()) * std::pow(double(10), internal::random(ed0,ed1)); + for (int i = 0; i < s; ++i) { + vf[i] = std::abs(internal::random()) * std::pow(double(10), internal::random(ef0, ef1)); + vd[i] = std::abs(internal::random()) * std::pow(double(10), internal::random(ed0, ed1)); } - //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n"; - std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast()) << "\t" << sqsumNorm(vd.cast()) << "\n"; - std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast()) << "\t" << hypotNorm(vd.cast()) << "\n"; - std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" << blueNorm(vd.cast()) << "\n"; - std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" << blueNorm(vd.cast()) << "\n"; - std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast()) << "\t" << lapackNorm(vd.cast()) << "\n"; - std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast()) << "\t" << twopassNorm(vd.cast()) << "\n"; -// std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast()) << "\t" << bl2passNorm(vd.cast()) << "\n"; + // std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n"; + std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast()) + << "\t" << sqsumNorm(vd.cast()) << "\n"; + std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast()) + << "\t" << hypotNorm(vd.cast()) << "\n"; + std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" + << blueNorm(vd.cast()) << "\n"; + std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast()) + << "\t" << blueNorm(vd.cast()) << "\n"; + std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast()) + << "\t" << lapackNorm(vd.cast()) << "\n"; + std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" + << twopassNorm(vf.cast()) << "\t" << twopassNorm(vd.cast()) << "\n"; + // std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast()) << "\t" << bl2passNorm(vd.cast()) << "\n"; } -int main(int argc, char** argv) -{ +int main(int argc, char** argv) { int tries = 10; int iters = 100000; double y = 1.1345743233455785456788e12 * internal::random(); VectorXf v = VectorXf::Ones(1024) * y; -// return 0; + // return 0; int s = 10000; double basef_ok = 1.1345743233455785456788e15; double based_ok = 1.1345743233455785456788e95; @@ -310,22 +294,20 @@ int main(int argc, char** argv) check_accuracy(basef_over, based_over, s); std::cerr << "\nVarying (over):\n"; - for (int k=0; k<1; ++k) - { - check_accuracy_var(20,27,190,302,s); + for (int k = 0; k < 1; ++k) { + check_accuracy_var(20, 27, 190, 302, s); std::cout << "\n"; } std::cerr << "\nVarying (under):\n"; - for (int k=0; k<1; ++k) - { - check_accuracy_var(-27,20,-302,-190,s); + for (int k = 0; k < 1; ++k) { + check_accuracy_var(-27, 20, -302, -190, s); std::cout << "\n"; } y = 1; std::cout.precision(4); - int s1 = 1024*1024*32; + int s1 = 1024 * 1024 * 32; std::cerr << "Performance (out of cache, " << s1 << "):\n"; { int iters = 1; diff --git a/bench/bench_reverse.cpp b/bench/bench_reverse.cpp index 1e69ca1b2..bf24982f3 100644 --- a/bench/bench_reverse.cpp +++ b/bench/bench_reverse.cpp @@ -15,70 +15,62 @@ using namespace Eigen; typedef double Scalar; template -__attribute__ ((noinline)) void bench_reverse(const MatrixType& m) -{ +__attribute__((noinline)) void bench_reverse(const MatrixType& m) { int rows = m.rows(); int cols = m.cols(); int size = m.size(); - int repeats = (REPEAT*1000)/size; - MatrixType a = MatrixType::Random(rows,cols); - MatrixType b = MatrixType::Random(rows,cols); + int repeats = (REPEAT * 1000) / size; + MatrixType a = MatrixType::Random(rows, cols); + MatrixType b = MatrixType::Random(rows, cols); BenchTimer timerB, timerH, timerV; Scalar acc = 0; - int r = internal::random(0,rows-1); - int c = internal::random(0,cols-1); - for (int t=0; t(0, rows - 1); + int c = internal::random(0, cols - 1); + for (int t = 0; t < TRIES; ++t) { timerB.start(); - for (int k=0; k0; ++i) - { - bench_reverse(Matrix(dynsizes[i],dynsizes[i])); - bench_reverse(Matrix(dynsizes[i]*dynsizes[i])); + for (uint i = 0; dynsizes[i] > 0; ++i) { + bench_reverse(Matrix(dynsizes[i], dynsizes[i])); + bench_reverse(Matrix(dynsizes[i] * dynsizes[i])); } -// bench_reverse(Matrix()); -// bench_reverse(Matrix()); -// bench_reverse(Matrix()); -// bench_reverse(Matrix()); -// bench_reverse(Matrix()); -// bench_reverse(Matrix()); -// bench_reverse(Matrix()); -// bench_reverse(Matrix()); -// bench_reverse(Matrix()); + // bench_reverse(Matrix()); + // bench_reverse(Matrix()); + // bench_reverse(Matrix()); + // bench_reverse(Matrix()); + // bench_reverse(Matrix()); + // bench_reverse(Matrix()); + // bench_reverse(Matrix()); + // bench_reverse(Matrix()); + // bench_reverse(Matrix()); return 0; } - diff --git a/bench/bench_sum.cpp b/bench/bench_sum.cpp index a3d925e4f..a5390b2a9 100644 --- a/bench/bench_sum.cpp +++ b/bench/bench_sum.cpp @@ -3,15 +3,13 @@ using namespace Eigen; using namespace std; -int main() -{ - typedef Matrix Vec; +int main() { + typedef Matrix Vec; Vec v(SIZE); v.setZero(); v[0] = 1; v[1] = 2; - for(int i = 0; i < 1000000; i++) - { + for (int i = 0; i < 1000000; i++) { v.coeffRef(0) += v.sum() * SCALAR(1e-20); } cout << v.sum() << endl; diff --git a/bench/benchmark-blocking-sizes.cpp b/bench/benchmark-blocking-sizes.cpp index 827be2880..8c94a797e 100644 --- a/bench/benchmark-blocking-sizes.cpp +++ b/bench/benchmark-blocking-sizes.cpp @@ -59,14 +59,12 @@ static_assert(maxsize > minsize, "maxsize must be larger than minsize"); static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)"); // just a helper to store a triple of K,M,N sizes for matrix product -struct size_triple_t -{ +struct size_triple_t { size_t k, m, n; size_triple_t() : k(0), m(0), n(0) {} size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {} size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {} - size_triple_t(uint16_t compact) - { + size_triple_t(uint16_t compact) { k = 1 << ((compact & 0xf00) >> 8); m = 1 << ((compact & 0x0f0) >> 4); n = 1 << ((compact & 0x00f) >> 0); @@ -82,50 +80,35 @@ uint8_t log2_pot(size_t x) { // Convert between size tripes and a compact form fitting in 12 bits // where each size, which must be a POT, is encoded as its log2, on 4 bits // so the largest representable size is 2^15 == 32k ... big enough. -uint16_t compact_size_triple(size_t k, size_t m, size_t n) -{ +uint16_t compact_size_triple(size_t k, size_t m, size_t n) { return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n); } -uint16_t compact_size_triple(const size_triple_t& t) -{ - return compact_size_triple(t.k, t.m, t.n); -} +uint16_t compact_size_triple(const size_triple_t& t) { return compact_size_triple(t.k, t.m, t.n); } // A single benchmark. Initially only contains benchmark params. // Then call run(), which stores the result in the gflops field. -struct benchmark_t -{ +struct benchmark_t { uint16_t compact_product_size; uint16_t compact_block_size; bool use_default_block_size; float gflops; - benchmark_t() - : compact_product_size(0) - , compact_block_size(0) - , use_default_block_size(false) - , gflops(0) - { - } - benchmark_t(size_t pk, size_t pm, size_t pn, - size_t bk, size_t bm, size_t bn) - : compact_product_size(compact_size_triple(pk, pm, pn)) - , compact_block_size(compact_size_triple(bk, bm, bn)) - , use_default_block_size(false) - , gflops(0) - {} + benchmark_t() : compact_product_size(0), compact_block_size(0), use_default_block_size(false), gflops(0) {} + benchmark_t(size_t pk, size_t pm, size_t pn, size_t bk, size_t bm, size_t bn) + : compact_product_size(compact_size_triple(pk, pm, pn)), + compact_block_size(compact_size_triple(bk, bm, bn)), + use_default_block_size(false), + gflops(0) {} benchmark_t(size_t pk, size_t pm, size_t pn) - : compact_product_size(compact_size_triple(pk, pm, pn)) - , compact_block_size(0) - , use_default_block_size(true) - , gflops(0) - {} + : compact_product_size(compact_size_triple(pk, pm, pn)), + compact_block_size(0), + use_default_block_size(true), + gflops(0) {} void run(); }; -ostream& operator<<(ostream& s, const benchmark_t& b) -{ +ostream& operator<<(ostream& s, const benchmark_t& b) { s << hex << b.compact_product_size << dec; if (b.use_default_block_size) { size_triple_t t(b.compact_product_size); @@ -141,17 +124,14 @@ ostream& operator<<(ostream& s, const benchmark_t& b) // We sort first by increasing benchmark parameters, // then by decreasing performance. -bool operator<(const benchmark_t& b1, const benchmark_t& b2) -{ +bool operator<(const benchmark_t& b1, const benchmark_t& b2) { return b1.compact_product_size < b2.compact_product_size || - (b1.compact_product_size == b2.compact_product_size && ( - (b1.compact_block_size < b2.compact_block_size || ( - b1.compact_block_size == b2.compact_block_size && - b1.gflops > b2.gflops)))); + (b1.compact_product_size == b2.compact_product_size && + ((b1.compact_block_size < b2.compact_block_size || + (b1.compact_block_size == b2.compact_block_size && b1.gflops > b2.gflops)))); } -void benchmark_t::run() -{ +void benchmark_t::run() { size_triple_t productsizes(compact_product_size); if (use_default_block_size) { @@ -168,26 +148,22 @@ void benchmark_t::run() // set up the matrix pool const size_t combined_three_matrices_sizes = - sizeof(Scalar) * - (productsizes.k * productsizes.m + - productsizes.k * productsizes.n + - productsizes.m * productsizes.n); + sizeof(Scalar) * + (productsizes.k * productsizes.m + productsizes.k * productsizes.n + productsizes.m * productsizes.n); // 64 M is large enough that nobody has a cache bigger than that, // while still being small enough that everybody has this much RAM, // so conveniently we don't need to special-case platforms here. const size_t unlikely_large_cache_size = 64 << 20; - const size_t working_set_size = - min_working_set_size ? min_working_set_size : unlikely_large_cache_size; + const size_t working_set_size = min_working_set_size ? min_working_set_size : unlikely_large_cache_size; - const size_t matrix_pool_size = - 1 + working_set_size / combined_three_matrices_sizes; + const size_t matrix_pool_size = 1 + working_set_size / combined_three_matrices_sizes; + + MatrixType* lhs = new MatrixType[matrix_pool_size]; + MatrixType* rhs = new MatrixType[matrix_pool_size]; + MatrixType* dst = new MatrixType[matrix_pool_size]; - MatrixType *lhs = new MatrixType[matrix_pool_size]; - MatrixType *rhs = new MatrixType[matrix_pool_size]; - MatrixType *dst = new MatrixType[matrix_pool_size]; - for (size_t i = 0; i < matrix_pool_size; i++) { lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k); rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n); @@ -200,7 +176,6 @@ void benchmark_t::run() float time_per_iter = 0.0f; size_t matrix_index = 0; while (true) { - double starttime = timer.getCpuTime(); for (int i = 0; i < iters_at_a_time; i++) { dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index]; @@ -228,8 +203,7 @@ void benchmark_t::run() gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter; } -void print_cpuinfo() -{ +void print_cpuinfo() { #ifdef __linux__ cout << "contents of /proc/cpuinfo:" << endl; string line; @@ -249,33 +223,30 @@ void print_cpuinfo() } template -string type_name() -{ +string type_name() { return "unknown"; } -template<> -string type_name() -{ +template <> +string type_name() { return "float"; } -template<> -string type_name() -{ +template <> +string type_name() { return "double"; } -struct action_t -{ - virtual const char* invokation_name() const { abort(); return nullptr; } +struct action_t { + virtual const char* invokation_name() const { + abort(); + return nullptr; + } virtual void run() const { abort(); } virtual ~action_t() {} }; -void show_usage_and_exit(int /*argc*/, char* argv[], - const vector>& available_actions) -{ +void show_usage_and_exit(int /*argc*/, char* argv[], const vector>& available_actions) { cerr << "usage: " << argv[0] << " [options...]" << endl << endl; cerr << "available actions:" << endl << endl; for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { @@ -293,11 +264,10 @@ void show_usage_and_exit(int /*argc*/, char* argv[], cerr << " avoid warm caches." << endl; exit(1); } - -float measure_clock_speed() -{ + +float measure_clock_speed() { cerr << "Measuring clock speed... \r" << flush; - + vector all_gflops; for (int i = 0; i < 8; i++) { benchmark_t b(1024, 1024, 1024); @@ -315,14 +285,12 @@ float measure_clock_speed() return result; } -struct human_duration_t -{ +struct human_duration_t { int seconds; human_duration_t(int s) : seconds(s) {} }; -ostream& operator<<(ostream& s, const human_duration_t& d) -{ +ostream& operator<<(ostream& s, const human_duration_t& d) { int remainder = d.seconds; if (remainder > 3600) { int hours = remainder / 3600; @@ -342,8 +310,7 @@ ostream& operator<<(ostream& s, const human_duration_t& d) const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data"; -void serialize_benchmarks(const char* filename, const vector& benchmarks, size_t first_benchmark_to_run) -{ +void serialize_benchmarks(const char* filename, const vector& benchmarks, size_t first_benchmark_to_run) { FILE* file = fopen(filename, "w"); if (!file) { cerr << "Could not open file " << filename << " for writing." << endl; @@ -358,8 +325,7 @@ void serialize_benchmarks(const char* filename, const vector& bench fclose(file); } -bool deserialize_benchmarks(const char* filename, vector& benchmarks, size_t& first_benchmark_to_run) -{ +bool deserialize_benchmarks(const char* filename, vector& benchmarks, size_t& first_benchmark_to_run) { FILE* file = fopen(filename, "r"); if (!file) { return false; @@ -382,11 +348,7 @@ bool deserialize_benchmarks(const char* filename, vector& benchmark return true; } -void try_run_some_benchmarks( - vector& benchmarks, - double time_start, - size_t& first_benchmark_to_run) -{ +void try_run_some_benchmarks(vector& benchmarks, double time_start, size_t& first_benchmark_to_run) { if (first_benchmark_to_run == benchmarks.size()) { return; } @@ -402,9 +364,7 @@ void try_run_some_benchmarks( time_now = timer.getRealTime(); // We check clock speed every minute and at the end. - if (benchmark_index == benchmarks.size() || - time_now > time_last_clock_speed_measurement + 60.0f) - { + if (benchmark_index == benchmarks.size() || time_now > time_last_clock_speed_measurement + 60.0f) { time_last_clock_speed_measurement = time_now; // Ensure that clock speed is as expected @@ -425,8 +385,7 @@ void try_run_some_benchmarks( // which invalidates all benchmark results collected so far. // Either way, we better restart all over again now. if (benchmark_index) { - cerr << "Restarting at " << 100.0f * ratio_done - << " % because clock speed increased. " << endl; + cerr << "Restarting at " << 100.0f * ratio_done << " % because clock speed increased. " << endl; } max_clock_speed = current_clock_speed; first_benchmark_to_run = 0; @@ -436,12 +395,9 @@ void try_run_some_benchmarks( bool rerun_last_tests = false; if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { - cerr << "Measurements completed so far: " - << 100.0f * ratio_done - << " % " << endl; - cerr << "Clock speed seems to be only " - << current_clock_speed/max_clock_speed - << " times what it used to be." << endl; + cerr << "Measurements completed so far: " << 100.0f * ratio_done << " % " << endl; + cerr << "Clock speed seems to be only " << current_clock_speed / max_clock_speed << " times what it used to be." + << endl; unsigned int seconds_to_sleep_if_lower_clock_speed = 1; @@ -454,9 +410,8 @@ void try_run_some_benchmarks( exit(2); } rerun_last_tests = true; - cerr << "Sleeping " - << seconds_to_sleep_if_lower_clock_speed - << " s... \r" << endl; + cerr << "Sleeping " << seconds_to_sleep_if_lower_clock_speed << " s... \r" + << endl; sleep(seconds_to_sleep_if_lower_clock_speed); current_clock_speed = measure_clock_speed(); seconds_to_sleep_if_lower_clock_speed *= 2; @@ -464,8 +419,7 @@ void try_run_some_benchmarks( } if (rerun_last_tests) { - cerr << "Redoing the last " - << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size() + cerr << "Redoing the last " << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size() << " % because clock speed had been low. " << endl; return; } @@ -486,8 +440,7 @@ void try_run_some_benchmarks( // Display progress info on stderr if (time_now > time_last_progress_update + 1.0f) { time_last_progress_update = time_now; - cerr << "Measurements... " << 100.0f * ratio_done - << " %, ETA " + cerr << "Measurements... " << 100.0f * ratio_done << " %, ETA " << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done) << " \r" << flush; } @@ -498,19 +451,15 @@ void try_run_some_benchmarks( } } -void run_benchmarks(vector& benchmarks) -{ +void run_benchmarks(vector& benchmarks) { size_t first_benchmark_to_run; vector deserialized_benchmarks; bool use_deserialized_benchmarks = false; if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) { - cerr << "Found serialized session with " - << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size() + cerr << "Found serialized session with " << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size() << " % already done" << endl; - if (deserialized_benchmarks.size() == benchmarks.size() && - first_benchmark_to_run > 0 && - first_benchmark_to_run < benchmarks.size()) - { + if (deserialized_benchmarks.size() == benchmarks.size() && first_benchmark_to_run > 0 && + first_benchmark_to_run < benchmarks.size()) { use_deserialized_benchmarks = true; } } @@ -531,15 +480,13 @@ void run_benchmarks(vector& benchmarks) for (int i = 0; i < 4; i++) { max_clock_speed = max(max_clock_speed, measure_clock_speed()); } - + double time_start = 0.0; while (first_benchmark_to_run < benchmarks.size()) { if (first_benchmark_to_run == 0) { time_start = timer.getRealTime(); } - try_run_some_benchmarks(benchmarks, - time_start, - first_benchmark_to_run); + try_run_some_benchmarks(benchmarks, time_start, first_benchmark_to_run); } // Sort timings by increasing benchmark parameters, and decreasing gflops. @@ -550,10 +497,8 @@ void run_benchmarks(vector& benchmarks) // Collect best (i.e. now first) results for each parameter values. vector best_benchmarks; for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { - if (best_benchmarks.empty() || - best_benchmarks.back().compact_product_size != it->compact_product_size || - best_benchmarks.back().compact_block_size != it->compact_block_size) - { + if (best_benchmarks.empty() || best_benchmarks.back().compact_product_size != it->compact_product_size || + best_benchmarks.back().compact_block_size != it->compact_block_size) { best_benchmarks.push_back(*it); } } @@ -562,11 +507,9 @@ void run_benchmarks(vector& benchmarks) benchmarks = best_benchmarks; } -struct measure_all_pot_sizes_action_t : action_t -{ +struct measure_all_pot_sizes_action_t : action_t { virtual const char* invokation_name() const { return "all-pot-sizes"; } - virtual void run() const - { + virtual void run() const { vector benchmarks; for (int repetition = 0; repetition < measurement_repetitions; repetition++) { for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { @@ -593,11 +536,9 @@ struct measure_all_pot_sizes_action_t : action_t } }; -struct measure_default_sizes_action_t : action_t -{ +struct measure_default_sizes_action_t : action_t { virtual const char* invokation_name() const { return "default-sizes"; } - virtual void run() const - { + virtual void run() const { vector benchmarks; for (int repetition = 0; repetition < measurement_repetitions; repetition++) { for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { @@ -618,8 +559,7 @@ struct measure_default_sizes_action_t : action_t } }; -int main(int argc, char* argv[]) -{ +int main(int argc, char* argv[]) { double time_start = timer.getRealTime(); cout.precision(4); cerr.precision(4); @@ -647,7 +587,7 @@ int main(int argc, char* argv[]) for (int i = 2; i < argc; i++) { if (argv[i] == strstr(argv[i], "--min-working-set-size=")) { const char* equals_sign = strchr(argv[i], '='); - min_working_set_size = strtoul(equals_sign+1, nullptr, 10); + min_working_set_size = strtoul(equals_sign + 1, nullptr, 10); } else { cerr << "unrecognized option: " << argv[i] << endl << endl; show_usage_and_exit(argc, argv, available_actions); @@ -657,7 +597,7 @@ int main(int argc, char* argv[]) print_cpuinfo(); cout << "benchmark parameters:" << endl; - cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl; + cout << "pointer size: " << 8 * sizeof(void*) << " bits" << endl; cout << "scalar type: " << type_name() << endl; cout << "packet size: " << internal::packet_traits::size << endl; cout << "minsize = " << minsize << endl; diff --git a/bench/benchmark.cpp b/bench/benchmark.cpp index c721b9081..93e18b68b 100644 --- a/bench/benchmark.cpp +++ b/bench/benchmark.cpp @@ -19,21 +19,18 @@ using namespace Eigen; #define SCALAR double #endif -int main(int argc, char *argv[]) -{ - Matrix I = Matrix::Ones(); - Matrix m; - for(int i = 0; i < MATSIZE; i++) - for(int j = 0; j < MATSIZE; j++) - { - m(i,j) = (i+MATSIZE*j); - } - asm("#begin"); - for(int a = 0; a < REPEAT; a++) - { - m = Matrix::Ones() + 0.00005 * (m + (m*m)); +int main(int argc, char *argv[]) { + Matrix I = Matrix::Ones(); + Matrix m; + for (int i = 0; i < MATSIZE; i++) + for (int j = 0; j < MATSIZE; j++) { + m(i, j) = (i + MATSIZE * j); } - asm("#end"); - cout << m << endl; - return 0; + asm("#begin"); + for (int a = 0; a < REPEAT; a++) { + m = Matrix::Ones() + 0.00005 * (m + (m * m)); + } + asm("#end"); + cout << m << endl; + return 0; } diff --git a/bench/benchmarkSlice.cpp b/bench/benchmarkSlice.cpp index c5b89c545..584137b75 100644 --- a/bench/benchmarkSlice.cpp +++ b/bench/benchmarkSlice.cpp @@ -15,23 +15,21 @@ using namespace Eigen; #define SCALAR float #endif -int main(int argc, char *argv[]) -{ +int main(int argc, char *argv[]) { typedef Matrix Mat; Mat m(100, 100); m.setRandom(); - for(int a = 0; a < REPEAT; a++) - { + for (int a = 0; a < REPEAT; a++) { int r, c, nr, nc; - r = Eigen::internal::random(0,10); - c = Eigen::internal::random(0,10); - nr = Eigen::internal::random(50,80); - nc = Eigen::internal::random(50,80); - m.block(r,c,nr,nc) += Mat::Ones(nr,nc); - m.block(r,c,nr,nc) *= SCALAR(10); - m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10); - m.block(r,c,nr,nc) /= SCALAR(10); + r = Eigen::internal::random(0, 10); + c = Eigen::internal::random(0, 10); + nr = Eigen::internal::random(50, 80); + nc = Eigen::internal::random(50, 80); + m.block(r, c, nr, nc) += Mat::Ones(nr, nc); + m.block(r, c, nr, nc) *= SCALAR(10); + m.block(r, c, nr, nc) -= Mat::constant(nr, nc, 10); + m.block(r, c, nr, nc) /= SCALAR(10); } cout << m[0] << endl; return 0; diff --git a/bench/benchmarkX.cpp b/bench/benchmarkX.cpp index 8e4b60c2b..eff931834 100644 --- a/bench/benchmarkX.cpp +++ b/bench/benchmarkX.cpp @@ -19,18 +19,16 @@ using namespace Eigen; #define REPEAT 100 #endif -int main(int argc, char *argv[]) -{ - MATTYPE I = MATTYPE::Ones(MATSIZE,MATSIZE); - MATTYPE m(MATSIZE,MATSIZE); - for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++) - { - m(i,j) = (i+j+1)/(MATSIZE*MATSIZE); - } - for(int a = 0; a < REPEAT; a++) - { - m = I + 0.0001 * (m + m*m); - } - cout << m(0,0) << endl; - return 0; +int main(int argc, char *argv[]) { + MATTYPE I = MATTYPE::Ones(MATSIZE, MATSIZE); + MATTYPE m(MATSIZE, MATSIZE); + for (int i = 0; i < MATSIZE; i++) + for (int j = 0; j < MATSIZE; j++) { + m(i, j) = (i + j + 1) / (MATSIZE * MATSIZE); + } + for (int a = 0; a < REPEAT; a++) { + m = I + 0.0001 * (m + m * m); + } + cout << m(0, 0) << endl; + return 0; } diff --git a/bench/benchmarkXcwise.cpp b/bench/benchmarkXcwise.cpp index 62437435e..f0c49779b 100644 --- a/bench/benchmarkXcwise.cpp +++ b/bench/benchmarkXcwise.cpp @@ -18,18 +18,15 @@ using namespace Eigen; #define REPEAT 1000 #endif -int main(int argc, char *argv[]) -{ - VECTYPE I = VECTYPE::Ones(VECSIZE); - VECTYPE m(VECSIZE,1); - for(int i = 0; i < VECSIZE; i++) - { - m[i] = 0.1 * i/VECSIZE; - } - for(int a = 0; a < REPEAT; a++) - { - m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4); - } - cout << m[0] << endl; - return 0; +int main(int argc, char *argv[]) { + VECTYPE I = VECTYPE::Ones(VECSIZE); + VECTYPE m(VECSIZE, 1); + for (int i = 0; i < VECSIZE; i++) { + m[i] = 0.1 * i / VECSIZE; + } + for (int a = 0; a < REPEAT; a++) { + m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m / 4); + } + cout << m[0] << endl; + return 0; } diff --git a/bench/btl/actions/action_aat_product.hh b/bench/btl/actions/action_aat_product.hh index aa5b35c94..2de1740ca 100644 --- a/bench/btl/actions/action_aat_product.hh +++ b/bench/btl/actions/action_aat_product.hh @@ -28,101 +28,80 @@ using namespace std; -template +template class Action_aat_product { - -public : - + public: // Ctor - Action_aat_product( int size ):_size(size) - { + Action_aat_product(int size) : _size(size) { MESSAGE("Action_aat_product Ctor"); // STL matrix and vector initialization - init_matrix(A_stl,_size); - init_matrix(X_stl,_size); - init_matrix(resu_stl,_size); + init_matrix(A_stl, _size); + init_matrix(X_stl, _size); + init_matrix(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref,A_stl); - Interface::matrix_from_stl(X_ref,X_stl); - - Interface::matrix_from_stl(A,A_stl); - Interface::matrix_from_stl(X,X_stl); + Interface::matrix_from_stl(A_ref, A_stl); + Interface::matrix_from_stl(X_ref, X_stl); + Interface::matrix_from_stl(A, A_stl); + Interface::matrix_from_stl(X, X_stl); } // invalidate copy ctor - Action_aat_product( const Action_aat_product & ) - { + Action_aat_product(const Action_aat_product&) { INFOS("illegal call to Action_aat_product Copy Ctor"); exit(0); } // Dtor - ~Action_aat_product( void ){ - + ~Action_aat_product(void) { MESSAGE("Action_aat_product Dtor"); // deallocation - Interface::free_matrix(A,_size); - Interface::free_matrix(X,_size); - - Interface::free_matrix(A_ref,_size); - Interface::free_matrix(X_ref,_size); + Interface::free_matrix(A, _size); + Interface::free_matrix(X, _size); + Interface::free_matrix(A_ref, _size); + Interface::free_matrix(X_ref, _size); } // action name - static inline std::string name( void ) - { - return "aat_"+Interface::name(); + static inline std::string name(void) { return "aat_" + Interface::name(); } + + double nb_op_base(void) { return double(_size) * double(_size) * double(_size); } + + inline void initialize(void) { + Interface::copy_matrix(A_ref, A, _size); + Interface::copy_matrix(X_ref, X, _size); } - double nb_op_base( void ){ - return double(_size)*double(_size)*double(_size); - } + inline void calculate(void) { Interface::aat_product(A, X, _size); } - inline void initialize( void ){ - - Interface::copy_matrix(A_ref,A,_size); - Interface::copy_matrix(X_ref,X,_size); - - } - - inline void calculate( void ) { - - Interface::aat_product(A,X,_size); - - } - - void check_result( void ){ - if (_size>128) return; + void check_result(void) { + if (_size > 128) return; // calculation check - Interface::matrix_to_stl(X,resu_stl); + Interface::matrix_to_stl(X, resu_stl); - STL_interface::aat_product(A_stl,X_stl,_size); + STL_interface::aat_product(A_stl, X_stl, _size); - typename Interface::real_type error= - STL_interface::norm_diff(X_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - if (error>1.e-6){ + if (error > 1.e-6) { INFOS("WRONG CALCULATION...residual=" << error); exit(1); } - } -private : - + private: typename Interface::stl_matrix A_stl; typename Interface::stl_matrix X_stl; typename Interface::stl_matrix resu_stl; @@ -133,13 +112,7 @@ private : typename Interface::gene_matrix A; typename Interface::gene_matrix X; - int _size; - }; - #endif - - - diff --git a/bench/btl/actions/action_ata_product.hh b/bench/btl/actions/action_ata_product.hh index 04364fe67..0447ab3b2 100644 --- a/bench/btl/actions/action_ata_product.hh +++ b/bench/btl/actions/action_ata_product.hh @@ -28,101 +28,80 @@ using namespace std; -template +template class Action_ata_product { - -public : - + public: // Ctor - Action_ata_product( int size ):_size(size) - { + Action_ata_product(int size) : _size(size) { MESSAGE("Action_ata_product Ctor"); // STL matrix and vector initialization - init_matrix(A_stl,_size); - init_matrix(X_stl,_size); - init_matrix(resu_stl,_size); + init_matrix(A_stl, _size); + init_matrix(X_stl, _size); + init_matrix(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref,A_stl); - Interface::matrix_from_stl(X_ref,X_stl); - - Interface::matrix_from_stl(A,A_stl); - Interface::matrix_from_stl(X,X_stl); + Interface::matrix_from_stl(A_ref, A_stl); + Interface::matrix_from_stl(X_ref, X_stl); + Interface::matrix_from_stl(A, A_stl); + Interface::matrix_from_stl(X, X_stl); } // invalidate copy ctor - Action_ata_product( const Action_ata_product & ) - { + Action_ata_product(const Action_ata_product&) { INFOS("illegal call to Action_ata_product Copy Ctor"); exit(0); } // Dtor - ~Action_ata_product( void ){ - + ~Action_ata_product(void) { MESSAGE("Action_ata_product Dtor"); // deallocation - Interface::free_matrix(A,_size); - Interface::free_matrix(X,_size); - - Interface::free_matrix(A_ref,_size); - Interface::free_matrix(X_ref,_size); + Interface::free_matrix(A, _size); + Interface::free_matrix(X, _size); + Interface::free_matrix(A_ref, _size); + Interface::free_matrix(X_ref, _size); } // action name - static inline std::string name( void ) - { - return "ata_"+Interface::name(); + static inline std::string name(void) { return "ata_" + Interface::name(); } + + double nb_op_base(void) { return 2.0 * _size * _size * _size; } + + inline void initialize(void) { + Interface::copy_matrix(A_ref, A, _size); + Interface::copy_matrix(X_ref, X, _size); } - double nb_op_base( void ){ - return 2.0*_size*_size*_size; - } + inline void calculate(void) { Interface::ata_product(A, X, _size); } - inline void initialize( void ){ - - Interface::copy_matrix(A_ref,A,_size); - Interface::copy_matrix(X_ref,X,_size); - - } - - inline void calculate( void ) { - - Interface::ata_product(A,X,_size); - - } - - void check_result( void ){ - if (_size>128) return; + void check_result(void) { + if (_size > 128) return; // calculation check - Interface::matrix_to_stl(X,resu_stl); + Interface::matrix_to_stl(X, resu_stl); - STL_interface::ata_product(A_stl,X_stl,_size); + STL_interface::ata_product(A_stl, X_stl, _size); - typename Interface::real_type error= - STL_interface::norm_diff(X_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - if (error>1.e-6){ + if (error > 1.e-6) { INFOS("WRONG CALCULATION...residual=" << error); exit(1); } - } -private : - + private: typename Interface::stl_matrix A_stl; typename Interface::stl_matrix X_stl; typename Interface::stl_matrix resu_stl; @@ -133,13 +112,7 @@ private : typename Interface::gene_matrix A; typename Interface::gene_matrix X; - int _size; - }; - #endif - - - diff --git a/bench/btl/actions/action_atv_product.hh b/bench/btl/actions/action_atv_product.hh index a8234514b..93c2e1c22 100644 --- a/bench/btl/actions/action_atv_product.hh +++ b/bench/btl/actions/action_atv_product.hh @@ -28,87 +28,79 @@ using namespace std; -template +template class Action_atv_product { - -public : - - Action_atv_product( int size ) : _size(size) - { + public: + Action_atv_product(int size) : _size(size) { MESSAGE("Action_atv_product Ctor"); // STL matrix and vector initialization - init_matrix(A_stl,_size); - init_vector(B_stl,_size); - init_vector(X_stl,_size); - init_vector(resu_stl,_size); + init_matrix(A_stl, _size); + init_vector(B_stl, _size); + init_vector(X_stl, _size); + init_vector(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref,A_stl); - Interface::vector_from_stl(B_ref,B_stl); - Interface::vector_from_stl(X_ref,X_stl); + Interface::matrix_from_stl(A_ref, A_stl); + Interface::vector_from_stl(B_ref, B_stl); + Interface::vector_from_stl(X_ref, X_stl); - Interface::matrix_from_stl(A,A_stl); - Interface::vector_from_stl(B,B_stl); - Interface::vector_from_stl(X,X_stl); + Interface::matrix_from_stl(A, A_stl); + Interface::vector_from_stl(B, B_stl); + Interface::vector_from_stl(X, X_stl); } // invalidate copy ctor - Action_atv_product( const Action_atv_product & ) - { + Action_atv_product(const Action_atv_product&) { INFOS("illegal call to Action_atv_product Copy Ctor"); exit(1); } - ~Action_atv_product( void ) - { + ~Action_atv_product(void) { MESSAGE("Action_atv_product Dtor"); - Interface::free_matrix(A,_size); + Interface::free_matrix(A, _size); Interface::free_vector(B); Interface::free_vector(X); - Interface::free_matrix(A_ref,_size); + Interface::free_matrix(A_ref, _size); Interface::free_vector(B_ref); Interface::free_vector(X_ref); } static inline std::string name() { return "atv_" + Interface::name(); } - double nb_op_base( void ) { return 2.0*_size*_size; } + double nb_op_base(void) { return 2.0 * _size * _size; } - inline void initialize( void ){ - Interface::copy_matrix(A_ref,A,_size); - Interface::copy_vector(B_ref,B,_size); - Interface::copy_vector(X_ref,X,_size); + inline void initialize(void) { + Interface::copy_matrix(A_ref, A, _size); + Interface::copy_vector(B_ref, B, _size); + Interface::copy_vector(X_ref, X, _size); } - BTL_DONT_INLINE void calculate( void ) { + BTL_DONT_INLINE void calculate(void) { BTL_ASM_COMMENT("begin atv"); - Interface::atv_product(A,B,X,_size); + Interface::atv_product(A, B, X, _size); BTL_ASM_COMMENT("end atv"); } - void check_result( void ) - { - if (_size>128) return; - Interface::vector_to_stl(X,resu_stl); + void check_result(void) { + if (_size > 128) return; + Interface::vector_to_stl(X, resu_stl); - STL_interface::atv_product(A_stl,B_stl,X_stl,_size); + STL_interface::atv_product(A_stl, B_stl, X_stl, _size); - typename Interface::real_type error= - STL_interface::norm_diff(X_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - if (error>1.e-6){ + if (error > 1.e-6) { INFOS("WRONG CALCULATION...residual=" << error); exit(1); } } -private : - + private: typename Interface::stl_matrix A_stl; typename Interface::stl_vector B_stl; typename Interface::stl_vector X_stl; @@ -122,13 +114,7 @@ private : typename Interface::gene_vector B; typename Interface::gene_vector X; - int _size; - }; - #endif - - - diff --git a/bench/btl/actions/action_axpby.hh b/bench/btl/actions/action_axpby.hh index dadd0ccf3..e74cbd8cb 100644 --- a/bench/btl/actions/action_axpby.hh +++ b/bench/btl/actions/action_axpby.hh @@ -27,38 +27,34 @@ using namespace std; -template +template class Action_axpby { - -public : - + public: // Ctor - Action_axpby( int size ):_alpha(0.5),_beta(0.95),_size(size) - { + Action_axpby(int size) : _alpha(0.5), _beta(0.95), _size(size) { MESSAGE("Action_axpby Ctor"); // STL vector initialization - init_vector(X_stl,_size); - init_vector(Y_stl,_size); - init_vector(resu_stl,_size); + init_vector(X_stl, _size); + init_vector(Y_stl, _size); + init_vector(resu_stl, _size); // generic matrix and vector initialization - Interface::vector_from_stl(X_ref,X_stl); - Interface::vector_from_stl(Y_ref,Y_stl); + Interface::vector_from_stl(X_ref, X_stl); + Interface::vector_from_stl(Y_ref, Y_stl); - Interface::vector_from_stl(X,X_stl); - Interface::vector_from_stl(Y,Y_stl); + Interface::vector_from_stl(X, X_stl); + Interface::vector_from_stl(Y, Y_stl); } // invalidate copy ctor - Action_axpby( const Action_axpby & ) - { + Action_axpby(const Action_axpby&) { INFOS("illegal call to Action_axpby Copy Ctor"); exit(1); } // Dtor - ~Action_axpby( void ){ + ~Action_axpby(void) { MESSAGE("Action_axpby Dtor"); // deallocation @@ -70,44 +66,37 @@ public : } // action name - static inline std::string name( void ) - { - return "axpby_"+Interface::name(); + static inline std::string name(void) { return "axpby_" + Interface::name(); } + + double nb_op_base(void) { return 3.0 * _size; } + + inline void initialize(void) { + Interface::copy_vector(X_ref, X, _size); + Interface::copy_vector(Y_ref, Y, _size); } - double nb_op_base( void ){ - return 3.0*_size; - } - - inline void initialize( void ){ - Interface::copy_vector(X_ref,X,_size); - Interface::copy_vector(Y_ref,Y,_size); - } - - inline void calculate( void ) { + inline void calculate(void) { BTL_ASM_COMMENT("mybegin axpby"); - Interface::axpby(_alpha,X,_beta,Y,_size); + Interface::axpby(_alpha, X, _beta, Y, _size); BTL_ASM_COMMENT("myend axpby"); } - void check_result( void ){ - if (_size>128) return; + void check_result(void) { + if (_size > 128) return; // calculation check - Interface::vector_to_stl(Y,resu_stl); + Interface::vector_to_stl(Y, resu_stl); - STL_interface::axpby(_alpha,X_stl,_beta,Y_stl,_size); + STL_interface::axpby(_alpha, X_stl, _beta, Y_stl, _size); - typename Interface::real_type error= - STL_interface::norm_diff(Y_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(Y_stl, resu_stl); - if (error>1.e-6){ + if (error > 1.e-6) { INFOS("WRONG CALCULATION...residual=" << error); exit(2); } } -private : - + private: typename Interface::stl_vector X_stl; typename Interface::stl_vector Y_stl; typename Interface::stl_vector resu_stl; diff --git a/bench/btl/actions/action_axpy.hh b/bench/btl/actions/action_axpy.hh index 261be4cb8..073f36485 100644 --- a/bench/btl/actions/action_axpy.hh +++ b/bench/btl/actions/action_axpy.hh @@ -28,46 +28,39 @@ using namespace std; -template +template class Action_axpy { - -public : - + public: // Ctor - Action_axpy( int size ):_coef(1.0),_size(size) - { + Action_axpy(int size) : _coef(1.0), _size(size) { MESSAGE("Action_axpy Ctor"); // STL vector initialization - init_vector(X_stl,_size); - init_vector(Y_stl,_size); - init_vector(resu_stl,_size); + init_vector(X_stl, _size); + init_vector(Y_stl, _size); + init_vector(resu_stl, _size); // generic matrix and vector initialization - Interface::vector_from_stl(X_ref,X_stl); - Interface::vector_from_stl(Y_ref,Y_stl); - - Interface::vector_from_stl(X,X_stl); - Interface::vector_from_stl(Y,Y_stl); - + Interface::vector_from_stl(X_ref, X_stl); + Interface::vector_from_stl(Y_ref, Y_stl); + Interface::vector_from_stl(X, X_stl); + Interface::vector_from_stl(Y, Y_stl); } // invalidate copy ctor - Action_axpy( const Action_axpy & ) - { + Action_axpy(const Action_axpy&) { INFOS("illegal call to Action_axpy Copy Ctor"); exit(1); } // Dtor - ~Action_axpy( void ){ - + ~Action_axpy(void) { MESSAGE("Action_axpy Dtor"); // deallocation @@ -81,46 +74,38 @@ public : // action name - static inline std::string name( void ) - { - return "axpy_"+Interface::name(); + static inline std::string name(void) { return "axpy_" + Interface::name(); } + + double nb_op_base(void) { return 2.0 * _size; } + + inline void initialize(void) { + Interface::copy_vector(X_ref, X, _size); + Interface::copy_vector(Y_ref, Y, _size); } - double nb_op_base( void ){ - return 2.0*_size; - } - - inline void initialize( void ){ - Interface::copy_vector(X_ref,X,_size); - Interface::copy_vector(Y_ref,Y,_size); - } - - inline void calculate( void ) { + inline void calculate(void) { BTL_ASM_COMMENT("mybegin axpy"); - Interface::axpy(_coef,X,Y,_size); + Interface::axpy(_coef, X, Y, _size); BTL_ASM_COMMENT("myend axpy"); } - void check_result( void ){ - if (_size>128) return; + void check_result(void) { + if (_size > 128) return; // calculation check - Interface::vector_to_stl(Y,resu_stl); + Interface::vector_to_stl(Y, resu_stl); - STL_interface::axpy(_coef,X_stl,Y_stl,_size); + STL_interface::axpy(_coef, X_stl, Y_stl, _size); - typename Interface::real_type error= - STL_interface::norm_diff(Y_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(Y_stl, resu_stl); - if (error>1.e-6){ + if (error > 1.e-6) { INFOS("WRONG CALCULATION...residual=" << error); exit(0); } - } -private : - + private: typename Interface::stl_vector X_stl; typename Interface::stl_vector Y_stl; typename Interface::stl_vector resu_stl; diff --git a/bench/btl/actions/action_cholesky.hh b/bench/btl/actions/action_cholesky.hh index 5f66d113a..c2ac1c20b 100644 --- a/bench/btl/actions/action_cholesky.hh +++ b/bench/btl/actions/action_cholesky.hh @@ -27,93 +27,75 @@ using namespace std; -template +template class Action_cholesky { - -public : - + public: // Ctor - Action_cholesky( int size ):_size(size) - { + Action_cholesky(int size) : _size(size) { MESSAGE("Action_cholesky Ctor"); // STL mat/vec initialization - init_matrix_symm(X_stl,_size); - init_matrix(C_stl,_size); + init_matrix_symm(X_stl, _size); + init_matrix(C_stl, _size); // make sure X is invertible - for (int i=0; i<_size; ++i) - X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100; + for (int i = 0; i < _size; ++i) X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100; // generic matrix and vector initialization - Interface::matrix_from_stl(X_ref,X_stl); - Interface::matrix_from_stl(X,X_stl); - Interface::matrix_from_stl(C,C_stl); + Interface::matrix_from_stl(X_ref, X_stl); + Interface::matrix_from_stl(X, X_stl); + Interface::matrix_from_stl(C, C_stl); _cost = 0; - for (int j=0; j<_size; ++j) - { - double r = std::max(_size - j -1,0); - _cost += 2*(r*j+r+j); + for (int j = 0; j < _size; ++j) { + double r = std::max(_size - j - 1, 0); + _cost += 2 * (r * j + r + j); } } // invalidate copy ctor - Action_cholesky( const Action_cholesky & ) - { + Action_cholesky(const Action_cholesky&) { INFOS("illegal call to Action_cholesky Copy Ctor"); exit(1); } // Dtor - ~Action_cholesky( void ){ - + ~Action_cholesky(void) { MESSAGE("Action_cholesky Dtor"); // deallocation - Interface::free_matrix(X_ref,_size); - Interface::free_matrix(X,_size); - Interface::free_matrix(C,_size); + Interface::free_matrix(X_ref, _size); + Interface::free_matrix(X, _size); + Interface::free_matrix(C, _size); } // action name - static inline std::string name( void ) - { - return "cholesky_"+Interface::name(); - } + static inline std::string name(void) { return "cholesky_" + Interface::name(); } - double nb_op_base( void ){ - return _cost; - } + double nb_op_base(void) { return _cost; } - inline void initialize( void ){ - Interface::copy_matrix(X_ref,X,_size); - } + inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); } - inline void calculate( void ) { - Interface::cholesky(X,C,_size); - } + inline void calculate(void) { Interface::cholesky(X, C, _size); } - void check_result( void ){ + void check_result(void) { // calculation check -// STL_interface::cholesky(X_stl,C_stl,_size); -// -// typename Interface::real_type error= -// STL_interface::norm_diff(C_stl,resu_stl); -// -// if (error>1.e-6){ -// INFOS("WRONG CALCULATION...residual=" << error); -// exit(0); -// } - + // STL_interface::cholesky(X_stl,C_stl,_size); + // + // typename Interface::real_type error= + // STL_interface::norm_diff(C_stl,resu_stl); + // + // if (error>1.e-6){ + // INFOS("WRONG CALCULATION...residual=" << error); + // exit(0); + // } } -private : - + private: typename Interface::stl_matrix X_stl; typename Interface::stl_matrix C_stl; diff --git a/bench/btl/actions/action_ger.hh b/bench/btl/actions/action_ger.hh index dc766efc5..d46dd0a3e 100644 --- a/bench/btl/actions/action_ger.hh +++ b/bench/btl/actions/action_ger.hh @@ -23,91 +23,78 @@ using namespace std; -template +template class Action_ger { - -public : - + public: // Ctor - BTL_DONT_INLINE Action_ger( int size ):_size(size) - { + BTL_DONT_INLINE Action_ger(int size) : _size(size) { MESSAGE("Action_ger Ctor"); // STL matrix and vector initialization typename Interface::stl_matrix tmp; - init_matrix(A_stl,_size); - init_vector(B_stl,_size); - init_vector(X_stl,_size); - init_vector(resu_stl,_size); + init_matrix(A_stl, _size); + init_vector(B_stl, _size); + init_vector(X_stl, _size); + init_vector(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref,A_stl); - Interface::matrix_from_stl(A,A_stl); - Interface::vector_from_stl(B_ref,B_stl); - Interface::vector_from_stl(B,B_stl); - Interface::vector_from_stl(X_ref,X_stl); - Interface::vector_from_stl(X,X_stl); + Interface::matrix_from_stl(A_ref, A_stl); + Interface::matrix_from_stl(A, A_stl); + Interface::vector_from_stl(B_ref, B_stl); + Interface::vector_from_stl(B, B_stl); + Interface::vector_from_stl(X_ref, X_stl); + Interface::vector_from_stl(X, X_stl); } // invalidate copy ctor - Action_ger( const Action_ger & ) - { + Action_ger(const Action_ger&) { INFOS("illegal call to Action_ger Copy Ctor"); exit(1); } // Dtor - BTL_DONT_INLINE ~Action_ger( void ){ + BTL_DONT_INLINE ~Action_ger(void) { MESSAGE("Action_ger Dtor"); - Interface::free_matrix(A,_size); + Interface::free_matrix(A, _size); Interface::free_vector(B); Interface::free_vector(X); - Interface::free_matrix(A_ref,_size); + Interface::free_matrix(A_ref, _size); Interface::free_vector(B_ref); Interface::free_vector(X_ref); - } // action name - static inline std::string name( void ) - { - return "ger_" + Interface::name(); + static inline std::string name(void) { return "ger_" + Interface::name(); } + + double nb_op_base(void) { return 2.0 * _size * _size; } + + BTL_DONT_INLINE void initialize(void) { + Interface::copy_matrix(A_ref, A, _size); + Interface::copy_vector(B_ref, B, _size); + Interface::copy_vector(X_ref, X, _size); } - double nb_op_base( void ){ - return 2.0*_size*_size; - } - - BTL_DONT_INLINE void initialize( void ){ - Interface::copy_matrix(A_ref,A,_size); - Interface::copy_vector(B_ref,B,_size); - Interface::copy_vector(X_ref,X,_size); - } - - BTL_DONT_INLINE void calculate( void ) { + BTL_DONT_INLINE void calculate(void) { BTL_ASM_COMMENT("#begin ger"); - Interface::ger(A,B,X,_size); + Interface::ger(A, B, X, _size); BTL_ASM_COMMENT("end ger"); } - BTL_DONT_INLINE void check_result( void ){ + BTL_DONT_INLINE void check_result(void) { // calculation check - Interface::vector_to_stl(X,resu_stl); + Interface::vector_to_stl(X, resu_stl); - STL_interface::ger(A_stl,B_stl,X_stl,_size); + STL_interface::ger(A_stl, B_stl, X_stl, _size); - typename Interface::real_type error= - STL_interface::norm_diff(X_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - if (error>1.e-3){ + if (error > 1.e-3) { INFOS("WRONG CALCULATION...residual=" << error); -// exit(0); + // exit(0); } - } -private : - + private: typename Interface::stl_matrix A_stl; typename Interface::stl_vector B_stl; typename Interface::stl_vector X_stl; @@ -124,5 +111,4 @@ private : int _size; }; - #endif diff --git a/bench/btl/actions/action_hessenberg.hh b/bench/btl/actions/action_hessenberg.hh index 2100ebd89..c364bfcc4 100644 --- a/bench/btl/actions/action_hessenberg.hh +++ b/bench/btl/actions/action_hessenberg.hh @@ -27,94 +27,77 @@ using namespace std; -template +template class Action_hessenberg { - -public : - + public: // Ctor - Action_hessenberg( int size ):_size(size) - { + Action_hessenberg(int size) : _size(size) { MESSAGE("Action_hessenberg Ctor"); // STL vector initialization - init_matrix(X_stl,_size); + init_matrix(X_stl, _size); - init_matrix(C_stl,_size); - init_matrix(resu_stl,_size); + init_matrix(C_stl, _size); + init_matrix(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(X_ref,X_stl); - Interface::matrix_from_stl(X,X_stl); - Interface::matrix_from_stl(C,C_stl); + Interface::matrix_from_stl(X_ref, X_stl); + Interface::matrix_from_stl(X, X_stl); + Interface::matrix_from_stl(C, C_stl); _cost = 0; - for (int j=0; j<_size-2; ++j) - { - double r = std::max(0,_size-j-1); - double b = std::max(0,_size-j-2); - _cost += 6 + 3*b + r*r*4 + r*_size*4; + for (int j = 0; j < _size - 2; ++j) { + double r = std::max(0, _size - j - 1); + double b = std::max(0, _size - j - 2); + _cost += 6 + 3 * b + r * r * 4 + r * _size * 4; } } // invalidate copy ctor - Action_hessenberg( const Action_hessenberg & ) - { + Action_hessenberg(const Action_hessenberg&) { INFOS("illegal call to Action_hessenberg Copy Ctor"); exit(1); } // Dtor - ~Action_hessenberg( void ){ - + ~Action_hessenberg(void) { MESSAGE("Action_hessenberg Dtor"); // deallocation - Interface::free_matrix(X_ref,_size); - Interface::free_matrix(X,_size); - Interface::free_matrix(C,_size); + Interface::free_matrix(X_ref, _size); + Interface::free_matrix(X, _size); + Interface::free_matrix(C, _size); } // action name - static inline std::string name( void ) - { - return "hessenberg_"+Interface::name(); - } + static inline std::string name(void) { return "hessenberg_" + Interface::name(); } - double nb_op_base( void ){ - return _cost; - } + double nb_op_base(void) { return _cost; } - inline void initialize( void ){ - Interface::copy_matrix(X_ref,X,_size); - } + inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); } - inline void calculate( void ) { - Interface::hessenberg(X,C,_size); - } + inline void calculate(void) { Interface::hessenberg(X, C, _size); } - void check_result( void ){ + void check_result(void) { // calculation check - Interface::matrix_to_stl(C,resu_stl); - -// STL_interface::hessenberg(X_stl,C_stl,_size); -// -// typename Interface::real_type error= -// STL_interface::norm_diff(C_stl,resu_stl); -// -// if (error>1.e-6){ -// INFOS("WRONG CALCULATION...residual=" << error); -// exit(0); -// } + Interface::matrix_to_stl(C, resu_stl); + // STL_interface::hessenberg(X_stl,C_stl,_size); + // + // typename Interface::real_type error= + // STL_interface::norm_diff(C_stl,resu_stl); + // + // if (error>1.e-6){ + // INFOS("WRONG CALCULATION...residual=" << error); + // exit(0); + // } } -private : - + private: typename Interface::stl_matrix X_stl; typename Interface::stl_matrix C_stl; typename Interface::stl_matrix resu_stl; @@ -127,97 +110,81 @@ private : double _cost; }; -template +template class Action_tridiagonalization { - -public : - + public: // Ctor - Action_tridiagonalization( int size ):_size(size) - { + Action_tridiagonalization(int size) : _size(size) { MESSAGE("Action_tridiagonalization Ctor"); // STL vector initialization - init_matrix(X_stl,_size); - - for(int i=0; i<_size; ++i) - { - for(int j=0; j(X_stl, _size); + + for (int i = 0; i < _size; ++i) { + for (int j = 0; j < i; ++j) X_stl[i][j] = X_stl[j][i]; } - - init_matrix(C_stl,_size); - init_matrix(resu_stl,_size); + + init_matrix(C_stl, _size); + init_matrix(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(X_ref,X_stl); - Interface::matrix_from_stl(X,X_stl); - Interface::matrix_from_stl(C,C_stl); + Interface::matrix_from_stl(X_ref, X_stl); + Interface::matrix_from_stl(X, X_stl); + Interface::matrix_from_stl(C, C_stl); _cost = 0; - for (int j=0; j<_size-2; ++j) - { - double r = std::max(0,_size-j-1); - double b = std::max(0,_size-j-2); - _cost += 6. + 3.*b + r*r*8.; + for (int j = 0; j < _size - 2; ++j) { + double r = std::max(0, _size - j - 1); + double b = std::max(0, _size - j - 2); + _cost += 6. + 3. * b + r * r * 8.; } } // invalidate copy ctor - Action_tridiagonalization( const Action_tridiagonalization & ) - { + Action_tridiagonalization(const Action_tridiagonalization&) { INFOS("illegal call to Action_tridiagonalization Copy Ctor"); exit(1); } // Dtor - ~Action_tridiagonalization( void ){ - + ~Action_tridiagonalization(void) { MESSAGE("Action_tridiagonalization Dtor"); // deallocation - Interface::free_matrix(X_ref,_size); - Interface::free_matrix(X,_size); - Interface::free_matrix(C,_size); + Interface::free_matrix(X_ref, _size); + Interface::free_matrix(X, _size); + Interface::free_matrix(C, _size); } // action name - static inline std::string name( void ) { return "tridiagonalization_"+Interface::name(); } + static inline std::string name(void) { return "tridiagonalization_" + Interface::name(); } - double nb_op_base( void ){ - return _cost; - } + double nb_op_base(void) { return _cost; } - inline void initialize( void ){ - Interface::copy_matrix(X_ref,X,_size); - } + inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); } - inline void calculate( void ) { - Interface::tridiagonalization(X,C,_size); - } + inline void calculate(void) { Interface::tridiagonalization(X, C, _size); } - void check_result( void ){ + void check_result(void) { // calculation check - Interface::matrix_to_stl(C,resu_stl); - -// STL_interface::tridiagonalization(X_stl,C_stl,_size); -// -// typename Interface::real_type error= -// STL_interface::norm_diff(C_stl,resu_stl); -// -// if (error>1.e-6){ -// INFOS("WRONG CALCULATION...residual=" << error); -// exit(0); -// } + Interface::matrix_to_stl(C, resu_stl); + // STL_interface::tridiagonalization(X_stl,C_stl,_size); + // + // typename Interface::real_type error= + // STL_interface::norm_diff(C_stl,resu_stl); + // + // if (error>1.e-6){ + // INFOS("WRONG CALCULATION...residual=" << error); + // exit(0); + // } } -private : - + private: typename Interface::stl_matrix X_stl; typename Interface::stl_matrix C_stl; typename Interface::stl_matrix resu_stl; diff --git a/bench/btl/actions/action_lu_decomp.hh b/bench/btl/actions/action_lu_decomp.hh index 2448e82c4..46fad915b 100644 --- a/bench/btl/actions/action_lu_decomp.hh +++ b/bench/btl/actions/action_lu_decomp.hh @@ -27,88 +27,72 @@ using namespace std; -template +template class Action_lu_decomp { - -public : - + public: // Ctor - Action_lu_decomp( int size ):_size(size) - { + Action_lu_decomp(int size) : _size(size) { MESSAGE("Action_lu_decomp Ctor"); // STL vector initialization - init_matrix(X_stl,_size); + init_matrix(X_stl, _size); - init_matrix(C_stl,_size); - init_matrix(resu_stl,_size); + init_matrix(C_stl, _size); + init_matrix(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(X_ref,X_stl); - Interface::matrix_from_stl(X,X_stl); - Interface::matrix_from_stl(C,C_stl); + Interface::matrix_from_stl(X_ref, X_stl); + Interface::matrix_from_stl(X, X_stl); + Interface::matrix_from_stl(C, C_stl); - _cost = 2.0*size*size*size/3.0 + size*size; + _cost = 2.0 * size * size * size / 3.0 + size * size; } // invalidate copy ctor - Action_lu_decomp( const Action_lu_decomp & ) - { + Action_lu_decomp(const Action_lu_decomp&) { INFOS("illegal call to Action_lu_decomp Copy Ctor"); exit(1); } // Dtor - ~Action_lu_decomp( void ){ - + ~Action_lu_decomp(void) { MESSAGE("Action_lu_decomp Dtor"); // deallocation - Interface::free_matrix(X_ref,_size); - Interface::free_matrix(X,_size); - Interface::free_matrix(C,_size); + Interface::free_matrix(X_ref, _size); + Interface::free_matrix(X, _size); + Interface::free_matrix(C, _size); } // action name - static inline std::string name( void ) - { - return "complete_lu_decomp_"+Interface::name(); - } + static inline std::string name(void) { return "complete_lu_decomp_" + Interface::name(); } - double nb_op_base( void ){ - return _cost; - } + double nb_op_base(void) { return _cost; } - inline void initialize( void ){ - Interface::copy_matrix(X_ref,X,_size); - } + inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); } - inline void calculate( void ) { - Interface::lu_decomp(X,C,_size); - } + inline void calculate(void) { Interface::lu_decomp(X, C, _size); } - void check_result( void ){ + void check_result(void) { // calculation check - Interface::matrix_to_stl(C,resu_stl); - -// STL_interface::lu_decomp(X_stl,C_stl,_size); -// -// typename Interface::real_type error= -// STL_interface::norm_diff(C_stl,resu_stl); -// -// if (error>1.e-6){ -// INFOS("WRONG CALCULATION...residual=" << error); -// exit(0); -// } + Interface::matrix_to_stl(C, resu_stl); + // STL_interface::lu_decomp(X_stl,C_stl,_size); + // + // typename Interface::real_type error= + // STL_interface::norm_diff(C_stl,resu_stl); + // + // if (error>1.e-6){ + // INFOS("WRONG CALCULATION...residual=" << error); + // exit(0); + // } } -private : - + private: typename Interface::stl_matrix X_stl; typename Interface::stl_matrix C_stl; typename Interface::stl_matrix resu_stl; diff --git a/bench/btl/actions/action_lu_solve.hh b/bench/btl/actions/action_lu_solve.hh index 5a81e6341..afc640c3d 100644 --- a/bench/btl/actions/action_lu_solve.hh +++ b/bench/btl/actions/action_lu_solve.hh @@ -1,14 +1,14 @@ //===================================================== // File : action_lu_solve.hh -// Author : L. Plagne +// Author : L. Plagne // Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 //===================================================== -// +// // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. -// +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -16,7 +16,7 @@ // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// +// #ifndef ACTION_LU_SOLVE #define ACTION_LU_SOLVE #include "utilities.h" @@ -28,33 +28,25 @@ using namespace std; -template -class Action_lu_solve -{ +template +class Action_lu_solve { + public: + static inline std::string name(void) { return "lu_solve_" + Interface::name(); } -public : - - static inline std::string name( void ) - { - return "lu_solve_"+Interface::name(); - } - - static double nb_op_base(int size){ - return 2.0*size*size*size/3.0; // questionable but not really important + static double nb_op_base(int size) { + return 2.0 * size * size * size / 3.0; // questionable but not really important } - - static double calculate( int nb_calc, int size ) { - + static double calculate(int nb_calc, int size) { // STL matrix and vector initialization - + typename Interface::stl_matrix A_stl; typename Interface::stl_vector B_stl; typename Interface::stl_vector X_stl; - init_matrix(A_stl,size); - init_vector(B_stl,size); - init_vector(X_stl,size); + init_matrix(A_stl, size); + init_vector(B_stl, size); + init_vector(X_stl, size); // generic matrix and vector initialization @@ -62,18 +54,18 @@ public : typename Interface::gene_vector B; typename Interface::gene_vector X; - typename Interface::gene_matrix LU; + typename Interface::gene_matrix LU; + + Interface::matrix_from_stl(A, A_stl); + Interface::vector_from_stl(B, B_stl); + Interface::vector_from_stl(X, X_stl); + Interface::matrix_from_stl(LU, A_stl); - Interface::matrix_from_stl(A,A_stl); - Interface::vector_from_stl(B,B_stl); - Interface::vector_from_stl(X,X_stl); - Interface::matrix_from_stl(LU,A_stl); - // local variable : - typename Interface::Pivot_Vector pivot; // pivot vector - Interface::new_Pivot_Vector(pivot,size); - + typename Interface::Pivot_Vector pivot; // pivot vector + Interface::new_Pivot_Vector(pivot, size); + // timer utilities Portable_Timer chronos; @@ -81,56 +73,48 @@ public : // time measurement chronos.start(); - - for (int ii=0;ii::matrix_vector_product(A_stl,X_stl,B_new_stl,size); - - typename Interface::real_type error= - STL_interface::norm_diff(B_stl,B_new_stl); - - if (error>1.e-5){ + STL_interface::matrix_vector_product(A_stl, X_stl, B_new_stl, size); + + typename Interface::real_type error = STL_interface::norm_diff(B_stl, B_new_stl); + + if (error > 1.e-5) { INFOS("WRONG CALCULATION...residual=" << error); STL_interface::display_vector(B_stl); STL_interface::display_vector(B_new_stl); exit(0); } - + // deallocation and return time - - Interface::free_matrix(A,size); + + Interface::free_matrix(A, size); Interface::free_vector(B); Interface::free_vector(X); Interface::free_Pivot_Vector(pivot); return time; } - }; - #endif - - - diff --git a/bench/btl/actions/action_matrix_matrix_product.hh b/bench/btl/actions/action_matrix_matrix_product.hh index f65ee0529..a66d47756 100644 --- a/bench/btl/actions/action_matrix_matrix_product.hh +++ b/bench/btl/actions/action_matrix_matrix_product.hh @@ -28,103 +28,83 @@ using namespace std; -template +template class Action_matrix_matrix_product { - -public : - + public: // Ctor - Action_matrix_matrix_product( int size ):_size(size) - { + Action_matrix_matrix_product(int size) : _size(size) { MESSAGE("Action_matrix_matrix_product Ctor"); // STL matrix and vector initialization - init_matrix(A_stl,_size); - init_matrix(B_stl,_size); - init_matrix(X_stl,_size); - init_matrix(resu_stl,_size); + init_matrix(A_stl, _size); + init_matrix(B_stl, _size); + init_matrix(X_stl, _size); + init_matrix(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref,A_stl); - Interface::matrix_from_stl(B_ref,B_stl); - Interface::matrix_from_stl(X_ref,X_stl); - - Interface::matrix_from_stl(A,A_stl); - Interface::matrix_from_stl(B,B_stl); - Interface::matrix_from_stl(X,X_stl); + Interface::matrix_from_stl(A_ref, A_stl); + Interface::matrix_from_stl(B_ref, B_stl); + Interface::matrix_from_stl(X_ref, X_stl); + Interface::matrix_from_stl(A, A_stl); + Interface::matrix_from_stl(B, B_stl); + Interface::matrix_from_stl(X, X_stl); } // invalidate copy ctor - Action_matrix_matrix_product( const Action_matrix_matrix_product & ) - { + Action_matrix_matrix_product(const Action_matrix_matrix_product&) { INFOS("illegal call to Action_matrix_matrix_product Copy Ctor"); exit(0); } // Dtor - ~Action_matrix_matrix_product( void ){ - + ~Action_matrix_matrix_product(void) { MESSAGE("Action_matrix_matrix_product Dtor"); // deallocation - Interface::free_matrix(A,_size); - Interface::free_matrix(B,_size); - Interface::free_matrix(X,_size); - - Interface::free_matrix(A_ref,_size); - Interface::free_matrix(B_ref,_size); - Interface::free_matrix(X_ref,_size); + Interface::free_matrix(A, _size); + Interface::free_matrix(B, _size); + Interface::free_matrix(X, _size); + Interface::free_matrix(A_ref, _size); + Interface::free_matrix(B_ref, _size); + Interface::free_matrix(X_ref, _size); } // action name - static inline std::string name( void ) - { - return "matrix_matrix_"+Interface::name(); + static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); } + + double nb_op_base(void) { return 2.0 * _size * _size * _size; } + + inline void initialize(void) { + Interface::copy_matrix(A_ref, A, _size); + Interface::copy_matrix(B_ref, B, _size); + Interface::copy_matrix(X_ref, X, _size); } - double nb_op_base( void ){ - return 2.0*_size*_size*_size; - } - - inline void initialize( void ){ - - Interface::copy_matrix(A_ref,A,_size); - Interface::copy_matrix(B_ref,B,_size); - Interface::copy_matrix(X_ref,X,_size); - - } - - inline void calculate( void ) { - Interface::matrix_matrix_product(A,B,X,_size); - } - - void check_result( void ){ + inline void calculate(void) { Interface::matrix_matrix_product(A, B, X, _size); } + void check_result(void) { // calculation check - if (_size<200) - { - Interface::matrix_to_stl(X,resu_stl); - STL_interface::matrix_matrix_product(A_stl,B_stl,X_stl,_size); - typename Interface::real_type error= - STL_interface::norm_diff(X_stl,resu_stl); - if (error>1.e-6){ + if (_size < 200) { + Interface::matrix_to_stl(X, resu_stl); + STL_interface::matrix_matrix_product(A_stl, B_stl, X_stl, _size); + typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); + if (error > 1.e-6) { INFOS("WRONG CALCULATION...residual=" << error); exit(1); } } } -private : - + private: typename Interface::stl_matrix A_stl; typename Interface::stl_matrix B_stl; typename Interface::stl_matrix X_stl; @@ -138,13 +118,7 @@ private : typename Interface::gene_matrix B; typename Interface::gene_matrix X; - int _size; - }; - #endif - - - diff --git a/bench/btl/actions/action_matrix_matrix_product_bis.hh b/bench/btl/actions/action_matrix_matrix_product_bis.hh index 29c10a6e2..c02dfb36f 100644 --- a/bench/btl/actions/action_matrix_matrix_product_bis.hh +++ b/bench/btl/actions/action_matrix_matrix_product_bis.hh @@ -29,31 +29,23 @@ using namespace std; -template +template class Action_matrix_matrix_product_bis { + public: + static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); } -public : - - static inline std::string name( void ) - { - return "matrix_matrix_"+Interface::name(); - } - - static double nb_op_base(int size){ - return 2.0*size*size*size; - } - - static double calculate( int nb_calc, int size ) { + static double nb_op_base(int size) { return 2.0 * size * size * size; } + static double calculate(int nb_calc, int size) { // STL matrix and vector initialization typename Interface::stl_matrix A_stl; typename Interface::stl_matrix B_stl; typename Interface::stl_matrix X_stl; - init_matrix(A_stl,size); - init_matrix(B_stl,size); - init_matrix(X_stl,size); + init_matrix(A_stl, size); + init_matrix(B_stl, size); + init_matrix(X_stl, size); // generic matrix and vector initialization @@ -65,15 +57,13 @@ public : typename Interface::gene_matrix B; typename Interface::gene_matrix X; + Interface::matrix_from_stl(A_ref, A_stl); + Interface::matrix_from_stl(B_ref, B_stl); + Interface::matrix_from_stl(X_ref, X_stl); - Interface::matrix_from_stl(A_ref,A_stl); - Interface::matrix_from_stl(B_ref,B_stl); - Interface::matrix_from_stl(X_ref,X_stl); - - Interface::matrix_from_stl(A,A_stl); - Interface::matrix_from_stl(B,B_stl); - Interface::matrix_from_stl(X,X_stl); - + Interface::matrix_from_stl(A, A_stl); + Interface::matrix_from_stl(B, B_stl); + Interface::matrix_from_stl(X, X_stl); // STL_timer utilities @@ -84,15 +74,12 @@ public : chronos.start_baseline(nb_calc); do { - - Interface::copy_matrix(A_ref,A,size); - Interface::copy_matrix(B_ref,B,size); - Interface::copy_matrix(X_ref,X,size); - + Interface::copy_matrix(A_ref, A, size); + Interface::copy_matrix(B_ref, B, size); + Interface::copy_matrix(X_ref, X, size); // Interface::matrix_matrix_product(A,B,X,size); This line must be commented !!!! - } - while(chronos.check()); + } while (chronos.check()); chronos.report(true); @@ -101,52 +88,44 @@ public : chronos.start(nb_calc); do { + Interface::copy_matrix(A_ref, A, size); + Interface::copy_matrix(B_ref, B, size); + Interface::copy_matrix(X_ref, X, size); - Interface::copy_matrix(A_ref,A,size); - Interface::copy_matrix(B_ref,B,size); - Interface::copy_matrix(X_ref,X,size); - - Interface::matrix_matrix_product(A,B,X,size); // here it is not commented !!!! - } - while(chronos.check()); + Interface::matrix_matrix_product(A, B, X, size); // here it is not commented !!!! + } while (chronos.check()); chronos.report(true); - double time=chronos.calculated_time/2000.0; + double time = chronos.calculated_time / 2000.0; // calculation check typename Interface::stl_matrix resu_stl(size); - Interface::matrix_to_stl(X,resu_stl); + Interface::matrix_to_stl(X, resu_stl); - STL_interface::matrix_matrix_product(A_stl,B_stl,X_stl,size); + STL_interface::matrix_matrix_product(A_stl, B_stl, X_stl, size); - typename Interface::real_type error= - STL_interface::norm_diff(X_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - if (error>1.e-6){ + if (error > 1.e-6) { INFOS("WRONG CALCULATION...residual=" << error); exit(1); } // deallocation and return time - Interface::free_matrix(A,size); - Interface::free_matrix(B,size); - Interface::free_matrix(X,size); + Interface::free_matrix(A, size); + Interface::free_matrix(B, size); + Interface::free_matrix(X, size); - Interface::free_matrix(A_ref,size); - Interface::free_matrix(B_ref,size); - Interface::free_matrix(X_ref,size); + Interface::free_matrix(A_ref, size); + Interface::free_matrix(B_ref, size); + Interface::free_matrix(X_ref, size); return time; } - }; - #endif - - - diff --git a/bench/btl/actions/action_matrix_vector_product.hh b/bench/btl/actions/action_matrix_vector_product.hh index 8bab79d18..002f87654 100644 --- a/bench/btl/actions/action_matrix_vector_product.hh +++ b/bench/btl/actions/action_matrix_vector_product.hh @@ -28,106 +28,88 @@ using namespace std; -template +template class Action_matrix_vector_product { - -public : - + public: // Ctor - BTL_DONT_INLINE Action_matrix_vector_product( int size ):_size(size) - { + BTL_DONT_INLINE Action_matrix_vector_product(int size) : _size(size) { MESSAGE("Action_matrix_vector_product Ctor"); // STL matrix and vector initialization - init_matrix(A_stl,_size); - init_vector(B_stl,_size); - init_vector(X_stl,_size); - init_vector(resu_stl,_size); + init_matrix(A_stl, _size); + init_vector(B_stl, _size); + init_vector(X_stl, _size); + init_vector(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref,A_stl); - Interface::matrix_from_stl(A,A_stl); - Interface::vector_from_stl(B_ref,B_stl); - Interface::vector_from_stl(B,B_stl); - Interface::vector_from_stl(X_ref,X_stl); - Interface::vector_from_stl(X,X_stl); - + Interface::matrix_from_stl(A_ref, A_stl); + Interface::matrix_from_stl(A, A_stl); + Interface::vector_from_stl(B_ref, B_stl); + Interface::vector_from_stl(B, B_stl); + Interface::vector_from_stl(X_ref, X_stl); + Interface::vector_from_stl(X, X_stl); } // invalidate copy ctor - Action_matrix_vector_product( const Action_matrix_vector_product & ) - { + Action_matrix_vector_product(const Action_matrix_vector_product&) { INFOS("illegal call to Action_matrix_vector_product Copy Ctor"); exit(1); } // Dtor - BTL_DONT_INLINE ~Action_matrix_vector_product( void ){ - + BTL_DONT_INLINE ~Action_matrix_vector_product(void) { MESSAGE("Action_matrix_vector_product Dtor"); // deallocation - Interface::free_matrix(A,_size); + Interface::free_matrix(A, _size); Interface::free_vector(B); Interface::free_vector(X); - Interface::free_matrix(A_ref,_size); + Interface::free_matrix(A_ref, _size); Interface::free_vector(B_ref); Interface::free_vector(X_ref); - } // action name - static inline std::string name( void ) - { - return "matrix_vector_" + Interface::name(); + static inline std::string name(void) { return "matrix_vector_" + Interface::name(); } + + double nb_op_base(void) { return 2.0 * _size * _size; } + + BTL_DONT_INLINE void initialize(void) { + Interface::copy_matrix(A_ref, A, _size); + Interface::copy_vector(B_ref, B, _size); + Interface::copy_vector(X_ref, X, _size); } - double nb_op_base( void ){ - return 2.0*_size*_size; + BTL_DONT_INLINE void calculate(void) { + BTL_ASM_COMMENT("#begin matrix_vector_product"); + Interface::matrix_vector_product(A, B, X, _size); + BTL_ASM_COMMENT("end matrix_vector_product"); } - BTL_DONT_INLINE void initialize( void ){ - - Interface::copy_matrix(A_ref,A,_size); - Interface::copy_vector(B_ref,B,_size); - Interface::copy_vector(X_ref,X,_size); - - } - - BTL_DONT_INLINE void calculate( void ) { - BTL_ASM_COMMENT("#begin matrix_vector_product"); - Interface::matrix_vector_product(A,B,X,_size); - BTL_ASM_COMMENT("end matrix_vector_product"); - } - - BTL_DONT_INLINE void check_result( void ){ - + BTL_DONT_INLINE void check_result(void) { // calculation check - Interface::vector_to_stl(X,resu_stl); + Interface::vector_to_stl(X, resu_stl); - STL_interface::matrix_vector_product(A_stl,B_stl,X_stl,_size); + STL_interface::matrix_vector_product(A_stl, B_stl, X_stl, _size); - typename Interface::real_type error= - STL_interface::norm_diff(X_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - if (error>1.e-5){ + if (error > 1.e-5) { INFOS("WRONG CALCULATION...residual=" << error); exit(0); } - } -private : - + private: typename Interface::stl_matrix A_stl; typename Interface::stl_vector B_stl; typename Interface::stl_vector X_stl; @@ -141,13 +123,7 @@ private : typename Interface::gene_vector B; typename Interface::gene_vector X; - int _size; - }; - #endif - - - diff --git a/bench/btl/actions/action_partial_lu.hh b/bench/btl/actions/action_partial_lu.hh index 770ea1d1e..400e3ffe0 100644 --- a/bench/btl/actions/action_partial_lu.hh +++ b/bench/btl/actions/action_partial_lu.hh @@ -27,90 +27,73 @@ using namespace std; -template +template class Action_partial_lu { - -public : - + public: // Ctor - Action_partial_lu( int size ):_size(size) - { + Action_partial_lu(int size) : _size(size) { MESSAGE("Action_partial_lu Ctor"); // STL vector initialization - init_matrix(X_stl,_size); - init_matrix(C_stl,_size); + init_matrix(X_stl, _size); + init_matrix(C_stl, _size); // make sure X is invertible - for (int i=0; i<_size; ++i) - X_stl[i][i] = X_stl[i][i] * 1e2 + 1; + for (int i = 0; i < _size; ++i) X_stl[i][i] = X_stl[i][i] * 1e2 + 1; // generic matrix and vector initialization - Interface::matrix_from_stl(X_ref,X_stl); - Interface::matrix_from_stl(X,X_stl); - Interface::matrix_from_stl(C,C_stl); + Interface::matrix_from_stl(X_ref, X_stl); + Interface::matrix_from_stl(X, X_stl); + Interface::matrix_from_stl(C, C_stl); - _cost = 2.0*size*size*size/3.0 + size*size; + _cost = 2.0 * size * size * size / 3.0 + size * size; } // invalidate copy ctor - Action_partial_lu( const Action_partial_lu & ) - { + Action_partial_lu(const Action_partial_lu&) { INFOS("illegal call to Action_partial_lu Copy Ctor"); exit(1); } // Dtor - ~Action_partial_lu( void ){ - + ~Action_partial_lu(void) { MESSAGE("Action_partial_lu Dtor"); // deallocation - Interface::free_matrix(X_ref,_size); - Interface::free_matrix(X,_size); - Interface::free_matrix(C,_size); + Interface::free_matrix(X_ref, _size); + Interface::free_matrix(X, _size); + Interface::free_matrix(C, _size); } // action name - static inline std::string name( void ) - { - return "partial_lu_decomp_"+Interface::name(); - } + static inline std::string name(void) { return "partial_lu_decomp_" + Interface::name(); } - double nb_op_base( void ){ - return _cost; - } + double nb_op_base(void) { return _cost; } - inline void initialize( void ){ - Interface::copy_matrix(X_ref,X,_size); - } + inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); } - inline void calculate( void ) { - Interface::partial_lu_decomp(X,C,_size); - } + inline void calculate(void) { Interface::partial_lu_decomp(X, C, _size); } - void check_result( void ){ + void check_result(void) { // calculation check -// Interface::matrix_to_stl(C,resu_stl); - -// STL_interface::lu_decomp(X_stl,C_stl,_size); -// -// typename Interface::real_type error= -// STL_interface::norm_diff(C_stl,resu_stl); -// -// if (error>1.e-6){ -// INFOS("WRONG CALCULATION...residual=" << error); -// exit(0); -// } + // Interface::matrix_to_stl(C,resu_stl); + // STL_interface::lu_decomp(X_stl,C_stl,_size); + // + // typename Interface::real_type error= + // STL_interface::norm_diff(C_stl,resu_stl); + // + // if (error>1.e-6){ + // INFOS("WRONG CALCULATION...residual=" << error); + // exit(0); + // } } -private : - + private: typename Interface::stl_matrix X_stl; typename Interface::stl_matrix C_stl; diff --git a/bench/btl/actions/action_rot.hh b/bench/btl/actions/action_rot.hh index df822a6d6..7cc3c6162 100644 --- a/bench/btl/actions/action_rot.hh +++ b/bench/btl/actions/action_rot.hh @@ -23,37 +23,33 @@ using namespace std; -template +template class Action_rot { - -public : - + public: // Ctor - BTL_DONT_INLINE Action_rot( int size ):_size(size) - { + BTL_DONT_INLINE Action_rot(int size) : _size(size) { MESSAGE("Action_rot Ctor"); // STL matrix and vector initialization typename Interface::stl_matrix tmp; - init_vector(A_stl,_size); - init_vector(B_stl,_size); + init_vector(A_stl, _size); + init_vector(B_stl, _size); // generic matrix and vector initialization - Interface::vector_from_stl(A_ref,A_stl); - Interface::vector_from_stl(A,A_stl); - Interface::vector_from_stl(B_ref,B_stl); - Interface::vector_from_stl(B,B_stl); + Interface::vector_from_stl(A_ref, A_stl); + Interface::vector_from_stl(A, A_stl); + Interface::vector_from_stl(B_ref, B_stl); + Interface::vector_from_stl(B, B_stl); } // invalidate copy ctor - Action_rot( const Action_rot & ) - { + Action_rot(const Action_rot&) { INFOS("illegal call to Action_rot Copy Ctor"); exit(1); } // Dtor - BTL_DONT_INLINE ~Action_rot( void ){ + BTL_DONT_INLINE ~Action_rot(void) { MESSAGE("Action_rot Dtor"); Interface::free_vector(A); Interface::free_vector(B); @@ -62,44 +58,37 @@ public : } // action name - static inline std::string name( void ) - { - return "rot_" + Interface::name(); + static inline std::string name(void) { return "rot_" + Interface::name(); } + + double nb_op_base(void) { return 6.0 * _size; } + + BTL_DONT_INLINE void initialize(void) { + Interface::copy_vector(A_ref, A, _size); + Interface::copy_vector(B_ref, B, _size); } - double nb_op_base( void ){ - return 6.0*_size; - } - - BTL_DONT_INLINE void initialize( void ){ - Interface::copy_vector(A_ref,A,_size); - Interface::copy_vector(B_ref,B,_size); - } - - BTL_DONT_INLINE void calculate( void ) { + BTL_DONT_INLINE void calculate(void) { BTL_ASM_COMMENT("#begin rot"); - Interface::rot(A,B,0.5,0.6,_size); + Interface::rot(A, B, 0.5, 0.6, _size); BTL_ASM_COMMENT("end rot"); } - BTL_DONT_INLINE void check_result( void ){ + BTL_DONT_INLINE void check_result(void) { // calculation check -// Interface::vector_to_stl(X,resu_stl); + // Interface::vector_to_stl(X,resu_stl); -// STL_interface::rot(A_stl,B_stl,X_stl,_size); + // STL_interface::rot(A_stl,B_stl,X_stl,_size); -// typename Interface::real_type error= -// STL_interface::norm_diff(X_stl,resu_stl); - -// if (error>1.e-3){ -// INFOS("WRONG CALCULATION...residual=" << error); -// exit(0); -// } + // typename Interface::real_type error= + // STL_interface::norm_diff(X_stl,resu_stl); + // if (error>1.e-3){ + // INFOS("WRONG CALCULATION...residual=" << error); + // exit(0); + // } } -private : - + private: typename Interface::stl_vector A_stl; typename Interface::stl_vector B_stl; @@ -112,5 +101,4 @@ private : int _size; }; - #endif diff --git a/bench/btl/actions/action_symv.hh b/bench/btl/actions/action_symv.hh index a32b9dfa0..d8b38551e 100644 --- a/bench/btl/actions/action_symv.hh +++ b/bench/btl/actions/action_symv.hh @@ -28,95 +28,80 @@ using namespace std; -template +template class Action_symv { - -public : - + public: // Ctor - BTL_DONT_INLINE Action_symv( int size ):_size(size) - { + BTL_DONT_INLINE Action_symv(int size) : _size(size) { MESSAGE("Action_symv Ctor"); // STL matrix and vector initialization - init_matrix_symm(A_stl,_size); - init_vector(B_stl,_size); - init_vector(X_stl,_size); - init_vector(resu_stl,_size); + init_matrix_symm(A_stl, _size); + init_vector(B_stl, _size); + init_vector(X_stl, _size); + init_vector(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref,A_stl); - Interface::matrix_from_stl(A,A_stl); - Interface::vector_from_stl(B_ref,B_stl); - Interface::vector_from_stl(B,B_stl); - Interface::vector_from_stl(X_ref,X_stl); - Interface::vector_from_stl(X,X_stl); - + Interface::matrix_from_stl(A_ref, A_stl); + Interface::matrix_from_stl(A, A_stl); + Interface::vector_from_stl(B_ref, B_stl); + Interface::vector_from_stl(B, B_stl); + Interface::vector_from_stl(X_ref, X_stl); + Interface::vector_from_stl(X, X_stl); } // invalidate copy ctor - Action_symv( const Action_symv & ) - { + Action_symv(const Action_symv&) { INFOS("illegal call to Action_symv Copy Ctor"); exit(1); } // Dtor - BTL_DONT_INLINE ~Action_symv( void ){ - Interface::free_matrix(A,_size); + BTL_DONT_INLINE ~Action_symv(void) { + Interface::free_matrix(A, _size); Interface::free_vector(B); Interface::free_vector(X); - Interface::free_matrix(A_ref,_size); + Interface::free_matrix(A_ref, _size); Interface::free_vector(B_ref); Interface::free_vector(X_ref); } // action name - static inline std::string name( void ) - { - return "symv_" + Interface::name(); + static inline std::string name(void) { return "symv_" + Interface::name(); } + + double nb_op_base(void) { return 2.0 * _size * _size; } + + BTL_DONT_INLINE void initialize(void) { + Interface::copy_matrix(A_ref, A, _size); + Interface::copy_vector(B_ref, B, _size); + Interface::copy_vector(X_ref, X, _size); } - double nb_op_base( void ){ - return 2.0*_size*_size; + BTL_DONT_INLINE void calculate(void) { + BTL_ASM_COMMENT("#begin symv"); + Interface::symv(A, B, X, _size); + BTL_ASM_COMMENT("end symv"); } - BTL_DONT_INLINE void initialize( void ){ - - Interface::copy_matrix(A_ref,A,_size); - Interface::copy_vector(B_ref,B,_size); - Interface::copy_vector(X_ref,X,_size); - - } - - BTL_DONT_INLINE void calculate( void ) { - BTL_ASM_COMMENT("#begin symv"); - Interface::symv(A,B,X,_size); - BTL_ASM_COMMENT("end symv"); - } - - BTL_DONT_INLINE void check_result( void ){ - if (_size>128) return; + BTL_DONT_INLINE void check_result(void) { + if (_size > 128) return; // calculation check - Interface::vector_to_stl(X,resu_stl); + Interface::vector_to_stl(X, resu_stl); - STL_interface::symv(A_stl,B_stl,X_stl,_size); + STL_interface::symv(A_stl, B_stl, X_stl, _size); - typename Interface::real_type error= - STL_interface::norm_diff(X_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - if (error>1.e-5){ + if (error > 1.e-5) { INFOS("WRONG CALCULATION...residual=" << error); exit(0); } - } -private : - + private: typename Interface::stl_matrix A_stl; typename Interface::stl_vector B_stl; typename Interface::stl_vector X_stl; @@ -130,10 +115,7 @@ private : typename Interface::gene_vector B; typename Interface::gene_vector X; - int _size; - }; - #endif diff --git a/bench/btl/actions/action_syr2.hh b/bench/btl/actions/action_syr2.hh index 7c6712b13..3355faa66 100644 --- a/bench/btl/actions/action_syr2.hh +++ b/bench/btl/actions/action_syr2.hh @@ -28,89 +28,77 @@ using namespace std; -template +template class Action_syr2 { - -public : - + public: // Ctor - BTL_DONT_INLINE Action_syr2( int size ):_size(size) - { + BTL_DONT_INLINE Action_syr2(int size) : _size(size) { // STL matrix and vector initialization typename Interface::stl_matrix tmp; - init_matrix(A_stl,_size); - init_vector(B_stl,_size); - init_vector(X_stl,_size); - init_vector(resu_stl,_size); + init_matrix(A_stl, _size); + init_vector(B_stl, _size); + init_vector(X_stl, _size); + init_vector(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref,A_stl); - Interface::matrix_from_stl(A,A_stl); - Interface::vector_from_stl(B_ref,B_stl); - Interface::vector_from_stl(B,B_stl); - Interface::vector_from_stl(X_ref,X_stl); - Interface::vector_from_stl(X,X_stl); + Interface::matrix_from_stl(A_ref, A_stl); + Interface::matrix_from_stl(A, A_stl); + Interface::vector_from_stl(B_ref, B_stl); + Interface::vector_from_stl(B, B_stl); + Interface::vector_from_stl(X_ref, X_stl); + Interface::vector_from_stl(X, X_stl); } // invalidate copy ctor - Action_syr2( const Action_syr2 & ) - { + Action_syr2(const Action_syr2&) { INFOS("illegal call to Action_syr2 Copy Ctor"); exit(1); } // Dtor - BTL_DONT_INLINE ~Action_syr2( void ){ - Interface::free_matrix(A,_size); + BTL_DONT_INLINE ~Action_syr2(void) { + Interface::free_matrix(A, _size); Interface::free_vector(B); Interface::free_vector(X); - Interface::free_matrix(A_ref,_size); + Interface::free_matrix(A_ref, _size); Interface::free_vector(B_ref); Interface::free_vector(X_ref); } // action name - static inline std::string name( void ) - { - return "syr2_" + Interface::name(); + static inline std::string name(void) { return "syr2_" + Interface::name(); } + + double nb_op_base(void) { return 2.0 * _size * _size; } + + BTL_DONT_INLINE void initialize(void) { + Interface::copy_matrix(A_ref, A, _size); + Interface::copy_vector(B_ref, B, _size); + Interface::copy_vector(X_ref, X, _size); } - double nb_op_base( void ){ - return 2.0*_size*_size; + BTL_DONT_INLINE void calculate(void) { + BTL_ASM_COMMENT("#begin syr2"); + Interface::syr2(A, B, X, _size); + BTL_ASM_COMMENT("end syr2"); } - BTL_DONT_INLINE void initialize( void ){ - Interface::copy_matrix(A_ref,A,_size); - Interface::copy_vector(B_ref,B,_size); - Interface::copy_vector(X_ref,X,_size); - } - - BTL_DONT_INLINE void calculate( void ) { - BTL_ASM_COMMENT("#begin syr2"); - Interface::syr2(A,B,X,_size); - BTL_ASM_COMMENT("end syr2"); - } - - BTL_DONT_INLINE void check_result( void ){ + BTL_DONT_INLINE void check_result(void) { // calculation check - Interface::vector_to_stl(X,resu_stl); + Interface::vector_to_stl(X, resu_stl); - STL_interface::syr2(A_stl,B_stl,X_stl,_size); + STL_interface::syr2(A_stl, B_stl, X_stl, _size); - typename Interface::real_type error= - STL_interface::norm_diff(X_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - if (error>1.e-3){ + if (error > 1.e-3) { INFOS("WRONG CALCULATION...residual=" << error); -// exit(0); + // exit(0); } - } -private : - + private: typename Interface::stl_matrix A_stl; typename Interface::stl_vector B_stl; typename Interface::stl_vector X_stl; @@ -124,10 +112,7 @@ private : typename Interface::gene_vector B; typename Interface::gene_vector X; - int _size; - }; - #endif diff --git a/bench/btl/actions/action_trisolve.hh b/bench/btl/actions/action_trisolve.hh index d6f0b477e..6751a2090 100644 --- a/bench/btl/actions/action_trisolve.hh +++ b/bench/btl/actions/action_trisolve.hh @@ -27,100 +27,82 @@ using namespace std; -template +template class Action_trisolve { - -public : - + public: // Ctor - Action_trisolve( int size ):_size(size) - { + Action_trisolve(int size) : _size(size) { MESSAGE("Action_trisolve Ctor"); // STL vector initialization - init_matrix(L_stl,_size); - init_vector(B_stl,_size); - init_vector(X_stl,_size); - for (int j=0; j<_size; ++j) - { - for (int i=0; i(L_stl, _size); + init_vector(B_stl, _size); + init_vector(X_stl, _size); + for (int j = 0; j < _size; ++j) { + for (int i = 0; i < j; ++i) L_stl[j][i] = 0; L_stl[j][j] += 3; } - init_vector(resu_stl,_size); + init_vector(resu_stl, _size); // generic matrix and vector initialization - Interface::matrix_from_stl(L,L_stl); - Interface::vector_from_stl(X,X_stl); - Interface::vector_from_stl(B,B_stl); + Interface::matrix_from_stl(L, L_stl); + Interface::vector_from_stl(X, X_stl); + Interface::vector_from_stl(B, B_stl); _cost = 0; - for (int j=0; j<_size; ++j) - { - _cost += 2*j + 1; + for (int j = 0; j < _size; ++j) { + _cost += 2 * j + 1; } } // invalidate copy ctor - Action_trisolve( const Action_trisolve & ) - { + Action_trisolve(const Action_trisolve&) { INFOS("illegal call to Action_trisolve Copy Ctor"); exit(1); } // Dtor - ~Action_trisolve( void ){ - + ~Action_trisolve(void) { MESSAGE("Action_trisolve Dtor"); // deallocation - Interface::free_matrix(L,_size); + Interface::free_matrix(L, _size); Interface::free_vector(B); Interface::free_vector(X); } // action name - static inline std::string name( void ) - { - return "trisolve_vector_"+Interface::name(); + static inline std::string name(void) { return "trisolve_vector_" + Interface::name(); } + + double nb_op_base(void) { return _cost; } + + inline void initialize(void) { + // Interface::copy_vector(X_ref,X,_size); } - double nb_op_base( void ){ - return _cost; - } + inline void calculate(void) { Interface::trisolve_lower(L, B, X, _size); } - inline void initialize( void ){ - //Interface::copy_vector(X_ref,X,_size); - } - - inline void calculate( void ) { - Interface::trisolve_lower(L,B,X,_size); - } - - void check_result(){ - if (_size>128) return; + void check_result() { + if (_size > 128) return; // calculation check - Interface::vector_to_stl(X,resu_stl); + Interface::vector_to_stl(X, resu_stl); - STL_interface::trisolve_lower(L_stl,B_stl,X_stl,_size); + STL_interface::trisolve_lower(L_stl, B_stl, X_stl, _size); - typename Interface::real_type error= - STL_interface::norm_diff(X_stl,resu_stl); + typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - if (error>1.e-4){ + if (error > 1.e-4) { INFOS("WRONG CALCULATION...residual=" << error); exit(2); - } //else INFOS("CALCULATION OK...residual=" << error); - + } // else INFOS("CALCULATION OK...residual=" << error); } -private : - + private: typename Interface::stl_matrix L_stl; typename Interface::stl_vector X_stl; typename Interface::stl_vector B_stl; diff --git a/bench/btl/actions/action_trisolve_matrix.hh b/bench/btl/actions/action_trisolve_matrix.hh index 0fc2bb9ef..cf55aae7c 100644 --- a/bench/btl/actions/action_trisolve_matrix.hh +++ b/bench/btl/actions/action_trisolve_matrix.hh @@ -28,118 +28,97 @@ using namespace std; -template +template class Action_trisolve_matrix { - -public : - + public: // Ctor - Action_trisolve_matrix( int size ):_size(size) - { + Action_trisolve_matrix(int size) : _size(size) { MESSAGE("Action_trisolve_matrix Ctor"); // STL matrix and vector initialization - init_matrix(A_stl,_size); - init_matrix(B_stl,_size); - init_matrix(X_stl,_size); - init_matrix(resu_stl,_size); + init_matrix(A_stl, _size); + init_matrix(B_stl, _size); + init_matrix(X_stl, _size); + init_matrix(resu_stl, _size); - for (int j=0; j<_size; ++j) - { - for (int i=0; i::matrix_matrix_product(A_stl,B_stl,X_stl,_size); -// -// typename Interface::real_type error= -// STL_interface::norm_diff(X_stl,resu_stl); -// -// if (error>1.e-6){ -// INFOS("WRONG CALCULATION...residual=" << error); -// // exit(1); -// } - + // Interface::matrix_to_stl(X,resu_stl); + // + // STL_interface::matrix_matrix_product(A_stl,B_stl,X_stl,_size); + // + // typename Interface::real_type error= + // STL_interface::norm_diff(X_stl,resu_stl); + // + // if (error>1.e-6){ + // INFOS("WRONG CALCULATION...residual=" << error); + // // exit(1); + // } } -private : - + private: typename Interface::stl_matrix A_stl; typename Interface::stl_matrix B_stl; typename Interface::stl_matrix X_stl; @@ -155,11 +134,6 @@ private : int _size; double _cost; - }; - #endif - - - diff --git a/bench/btl/actions/action_trmm.hh b/bench/btl/actions/action_trmm.hh index 8f7813818..8c0b25f1e 100644 --- a/bench/btl/actions/action_trmm.hh +++ b/bench/btl/actions/action_trmm.hh @@ -28,118 +28,97 @@ using namespace std; -template +template class Action_trmm { - -public : - + public: // Ctor - Action_trmm( int size ):_size(size) - { + Action_trmm(int size) : _size(size) { MESSAGE("Action_trmm Ctor"); // STL matrix and vector initialization - init_matrix(A_stl,_size); - init_matrix(B_stl,_size); - init_matrix(X_stl,_size); - init_matrix(resu_stl,_size); + init_matrix(A_stl, _size); + init_matrix(B_stl, _size); + init_matrix(X_stl, _size); + init_matrix(resu_stl, _size); - for (int j=0; j<_size; ++j) - { - for (int i=0; i::matrix_matrix_product(A_stl,B_stl,X_stl,_size); -// -// typename Interface::real_type error= -// STL_interface::norm_diff(X_stl,resu_stl); -// -// if (error>1.e-6){ -// INFOS("WRONG CALCULATION...residual=" << error); -// // exit(1); -// } - + // Interface::matrix_to_stl(X,resu_stl); + // + // STL_interface::matrix_matrix_product(A_stl,B_stl,X_stl,_size); + // + // typename Interface::real_type error= + // STL_interface::norm_diff(X_stl,resu_stl); + // + // if (error>1.e-6){ + // INFOS("WRONG CALCULATION...residual=" << error); + // // exit(1); + // } } -private : - + private: typename Interface::stl_matrix A_stl; typename Interface::stl_matrix B_stl; typename Interface::stl_matrix X_stl; @@ -155,11 +134,6 @@ private : int _size; double _cost; - }; - #endif - - - diff --git a/bench/btl/actions/basic_actions.hh b/bench/btl/actions/basic_actions.hh index 62442f01f..e2e1f1c82 100644 --- a/bench/btl/actions/basic_actions.hh +++ b/bench/btl/actions/basic_actions.hh @@ -18,4 +18,3 @@ #include "action_rot.hh" // #include "action_lu_solve.hh" - diff --git a/bench/btl/data/mean.cxx b/bench/btl/data/mean.cxx index c567ef33e..fe4b453cb 100644 --- a/bench/btl/data/mean.cxx +++ b/bench/btl/data/mean.cxx @@ -1,14 +1,14 @@ //===================================================== // File : mean.cxx -// Author : L. Plagne +// Author : L. Plagne // Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 //===================================================== -// +// // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. -// +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -16,7 +16,7 @@ // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// +// #include "utilities.h" #include #include @@ -28,155 +28,138 @@ using namespace std; -double mean_calc(const vector & tab_sizes, const vector & tab_mflops, const int size_min, const int size_max); +double mean_calc(const vector &tab_sizes, const vector &tab_mflops, const int size_min, + const int size_max); -class Lib_Mean{ - -public: - Lib_Mean( void ):_lib_name(),_mean_in_cache(),_mean_out_of_cache(){ +class Lib_Mean { + public: + Lib_Mean(void) : _lib_name(), _mean_in_cache(), _mean_out_of_cache() { MESSAGE("Lib_mean Default Ctor"); MESSAGE("!!! should not be used"); exit(0); } - Lib_Mean(const string & name, const double & mic, const double & moc):_lib_name(name),_mean_in_cache(mic),_mean_out_of_cache(moc){ + Lib_Mean(const string &name, const double &mic, const double &moc) + : _lib_name(name), _mean_in_cache(mic), _mean_out_of_cache(moc) { MESSAGE("Lib_mean Ctor"); } - Lib_Mean(const Lib_Mean & lm):_lib_name(lm._lib_name),_mean_in_cache(lm._mean_in_cache),_mean_out_of_cache(lm._mean_out_of_cache){ + Lib_Mean(const Lib_Mean &lm) + : _lib_name(lm._lib_name), _mean_in_cache(lm._mean_in_cache), _mean_out_of_cache(lm._mean_out_of_cache) { MESSAGE("Lib_mean Copy Ctor"); } - ~Lib_Mean( void ){ - MESSAGE("Lib_mean Dtor"); - } - + ~Lib_Mean(void) { MESSAGE("Lib_mean Dtor"); } + double _mean_in_cache; double _mean_out_of_cache; string _lib_name; - bool operator < ( const Lib_Mean &right) const - { - //return ( this->_mean_out_of_cache > right._mean_out_of_cache) ; - return ( this->_mean_in_cache > right._mean_in_cache) ; + bool operator<(const Lib_Mean &right) const { + // return ( this->_mean_out_of_cache > right._mean_out_of_cache) ; + return (this->_mean_in_cache > right._mean_in_cache); } +}; -}; - - -int main( int argc , char *argv[] ) -{ - - if (argc<6){ +int main(int argc, char *argv[]) { + if (argc < 6) { INFOS("!!! Error ... usage : main what mic Mic moc Moc filename1 finename2..."); exit(0); } INFOS(argc); - int min_in_cache=atoi(argv[2]); - int max_in_cache=atoi(argv[3]); - int min_out_of_cache=atoi(argv[4]); - int max_out_of_cache=atoi(argv[5]); + int min_in_cache = atoi(argv[2]); + int max_in_cache = atoi(argv[3]); + int min_out_of_cache = atoi(argv[4]); + int max_out_of_cache = atoi(argv[5]); + multiset s_lib_mean; - multiset s_lib_mean ; + for (int i = 6; i < argc; i++) { + string filename = argv[i]; - for (int i=6;i tab_sizes; vector tab_mflops; - read_xy_file(filename,tab_sizes,tab_mflops); + read_xy_file(filename, tab_sizes, tab_mflops); - mic=mean_calc(tab_sizes,tab_mflops,min_in_cache,max_in_cache); - moc=mean_calc(tab_sizes,tab_mflops,min_out_of_cache,max_out_of_cache); + mic = mean_calc(tab_sizes, tab_mflops, min_in_cache, max_in_cache); + moc = mean_calc(tab_sizes, tab_mflops, min_out_of_cache, max_out_of_cache); - Lib_Mean cur_lib_mean(filename,mic,moc); - - s_lib_mean.insert(cur_lib_mean); + Lib_Mean cur_lib_mean(filename, mic, moc); - } - + s_lib_mean.insert(cur_lib_mean); + } } - - cout << "" << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; + cout << "
" << argv[1] << " in cache
mean perf
Mflops
in cache
% best
out of cache
mean perf
Mflops
out of cache
% best
details comments
" << endl; + cout << " " << endl; + cout << " " << endl; + cout << " " + << endl; + cout << " " << endl; + cout << " " + << endl; + cout << " " << endl; + cout << " " << endl; + cout << " " << endl; + cout << " " << endl; multiset::iterator is = s_lib_mean.begin(); - Lib_Mean best(*is); - + Lib_Mean best(*is); - for (is=s_lib_mean.begin(); is!=s_lib_mean.end() ; is++){ - - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - cout << " " << endl ; - + for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) { + cout << " " << endl; + cout << " " << endl; + cout << " " << endl; + cout << " " << endl; + cout << " " << endl; + cout << " " << endl; + cout << " " << endl; + cout << " " << endl; + cout << " " << endl; } - cout << "
" << argv[1] << " in cache
mean perf
Mflops
in cache
% best
out of cache
mean perf
Mflops
out of cache
% best
details comments
" << is->_lib_name << " " << is->_mean_in_cache << " " << 100*(is->_mean_in_cache/best._mean_in_cache) << " " << is->_mean_out_of_cache << " " << 100*(is->_mean_out_of_cache/best._mean_out_of_cache) << " " << - "_lib_name<<"_"<snippet/" - "_lib_name<<"_flags\">flags " << - "_lib_name<<"_comments\">click here
" << is->_lib_name << " " << is->_mean_in_cache << " " << 100 * (is->_mean_in_cache / best._mean_in_cache) << " " << is->_mean_out_of_cache << " " << 100 * (is->_mean_out_of_cache / best._mean_out_of_cache) << " " + << "_lib_name << "_" << argv[1] + << "\">snippet/" + "_lib_name << "_flags\">flags " + << "_lib_name << "_comments\">click here
" << endl ; + cout << "" << endl; - ofstream output_file ("../order_lib",ios::out) ; - - for (is=s_lib_mean.begin(); is!=s_lib_mean.end() ; is++){ - output_file << is->_lib_name << endl ; + ofstream output_file("../order_lib", ios::out); + + for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) { + output_file << is->_lib_name << endl; } output_file.close(); - } -double mean_calc(const vector & tab_sizes, const vector & tab_mflops, const int size_min, const int size_max){ - - int size=tab_sizes.size(); - int nb_sample=0; - double mean=0.0; +double mean_calc(const vector &tab_sizes, const vector &tab_mflops, const int size_min, + const int size_max) { + int size = tab_sizes.size(); + int nb_sample = 0; + double mean = 0.0; - for (int i=0;i=size_min)&&(tab_sizes[i]<=size_max)){ - + for (int i = 0; i < size; i++) { + if ((tab_sizes[i] >= size_min) && (tab_sizes[i] <= size_max)) { nb_sample++; - mean+=tab_mflops[i]; - + mean += tab_mflops[i]; } - - } - if (nb_sample==0){ + if (nb_sample == 0) { INFOS("no data for mean calculation"); return 0.0; } - return mean/nb_sample; + return mean / nb_sample; } - - - - diff --git a/bench/btl/data/regularize.cxx b/bench/btl/data/regularize.cxx index eea2b8b85..51e2edf19 100644 --- a/bench/btl/data/regularize.cxx +++ b/bench/btl/data/regularize.cxx @@ -1,14 +1,14 @@ //===================================================== // File : regularize.cxx -// Author : L. Plagne +// Author : L. Plagne // Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 //===================================================== -// +// // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. -// +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -16,7 +16,7 @@ // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// +// #include "utilities.h" #include #include @@ -27,99 +27,82 @@ using namespace std; -void read_xy_file(const string & filename, vector & tab_sizes, vector & tab_mflops); -void regularize_curve(const string & filename, - const vector & tab_mflops, - const vector & tab_sizes, - int start_cut_size, int stop_cut_size); +void read_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops); +void regularize_curve(const string &filename, const vector &tab_mflops, const vector &tab_sizes, + int start_cut_size, int stop_cut_size); ///////////////////////////////////////////////////////////////////////////////////////////////// -int main( int argc , char *argv[] ) -{ - +int main(int argc, char *argv[]) { // input data - if (argc<4){ + if (argc < 4) { INFOS("!!! Error ... usage : main filename start_cut_size stop_cut_size regularize_filename"); exit(0); } INFOS(argc); - int start_cut_size=atoi(argv[2]); - int stop_cut_size=atoi(argv[3]); + int start_cut_size = atoi(argv[2]); + int stop_cut_size = atoi(argv[3]); + + string filename = argv[1]; + string regularize_filename = argv[4]; - string filename=argv[1]; - string regularize_filename=argv[4]; - INFOS(filename); - INFOS("start_cut_size="< & tab_sizes, vector & tab_mflops){ +void read_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops) { + ifstream input_file(filename.c_str(), ios::in); - ifstream input_file (filename.c_str(),ios::in) ; - - if (!input_file){ - INFOS("!!! Error opening "<> size >> mflops ){ + int nb_point = 0; + int size = 0; + double mflops = 0; + + while (input_file >> size >> mflops) { nb_point++; tab_sizes.push_back(size); tab_mflops.push_back(mflops); @@ -128,4 +111,3 @@ void read_xy_file(const string & filename, vector & tab_sizes, vector +// Author : L. Plagne // Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 //===================================================== -// +// // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. -// +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -16,7 +16,7 @@ // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// +// #include "utilities.h" #include #include @@ -28,165 +28,133 @@ using namespace std; -void read_xy_file(const string & filename, vector & tab_sizes, vector & tab_mflops); -void write_xy_file(const string & filename, vector & tab_sizes, vector & tab_mflops); -void smooth_curve(const vector & tab_mflops, vector & smooth_tab_mflops,int window_half_width); -void centered_smooth_curve(const vector & tab_mflops, vector & smooth_tab_mflops,int window_half_width); +void read_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops); +void write_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops); +void smooth_curve(const vector &tab_mflops, vector &smooth_tab_mflops, int window_half_width); +void centered_smooth_curve(const vector &tab_mflops, vector &smooth_tab_mflops, int window_half_width); ///////////////////////////////////////////////////////////////////////////////////////////////// -int main( int argc , char *argv[] ) -{ - +int main(int argc, char *argv[]) { // input data - if (argc<3){ + if (argc < 3) { INFOS("!!! Error ... usage : main filename window_half_width smooth_filename"); exit(0); } INFOS(argc); - int window_half_width=atoi(argv[2]); + int window_half_width = atoi(argv[2]); + + string filename = argv[1]; + string smooth_filename = argv[3]; - string filename=argv[1]; - string smooth_filename=argv[3]; - INFOS(filename); - INFOS("window_half_width="< & tab_sizes, vector & tab_mflops){ - - ofstream output_file (filename.c_str(),ios::out) ; - - for (int i=0 ; i < tab_sizes.size() ; i++) - { - output_file << tab_sizes[i] << " " << tab_mflops[i] << endl ; - } - output_file.close(); - } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void read_xy_file(const string & filename, vector & tab_sizes, vector & tab_mflops){ +void read_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops) { + ifstream input_file(filename.c_str(), ios::in); - ifstream input_file (filename.c_str(),ios::in) ; - - if (!input_file){ - INFOS("!!! Error opening "<> size >> mflops ){ + int nb_point = 0; + int size = 0; + double mflops = 0; + + while (input_file >> size >> mflops) { nb_point++; tab_sizes.push_back(size); tab_mflops.push_back(mflops); @@ -195,4 +163,3 @@ void read_xy_file(const string & filename, vector & tab_sizes, vector class Perf_Analyzer, class Action> -BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ) -{ - if (BtlConfig::skipAction(Action::name())) - return; +template