Stxxl
1.4.0
|
00001 /*************************************************************************** 00002 * containers/matrix_benchmark.cpp 00003 * 00004 * Part of the STXXL. See http://stxxl.sourceforge.net 00005 * 00006 * Copyright (C) 2010-2011 Raoul Steffen <R-Steffen@gmx.de> 00007 * 00008 * Distributed under the Boost Software License, Version 1.0. 00009 * (See accompanying file LICENSE_1_0.txt or copy at 00010 * http://www.boost.org/LICENSE_1_0.txt) 00011 **************************************************************************/ 00012 00013 #include <iostream> 00014 #include <limits> 00015 00016 // Thanks Daniel Russel, Stanford University 00017 #include <Argument_helper.h> 00018 00019 #include <stxxl/vector> 00020 #include <stxxl/stream> 00021 #include <stxxl/bits/containers/matrix.h> 00022 00023 using namespace stxxl; 00024 00025 int main(int argc, char **argv) 00026 { 00027 00028 #ifndef STXXL_MATRIX_BLOCK_ORDER 00029 const int block_order = 1568; // must be a multiple of 32, assuming at least 4 bytes element size 00030 #else 00031 const int block_order = STXXL_MATRIX_BLOCK_ORDER; // must be a multiple of 32, assuming at least 4 bytes element size 00032 #endif 00033 00034 int rank = 10000; 00035 int internal_memory_megabyte = 256; 00036 int mult_algo_num = 5; 00037 int sched_algo_num = 2; 00038 int internal_memory_byte = 0; 00039 00040 dsr::Argument_helper ah; 00041 ah.new_named_int('r', "rank", "N","rank of the matrices default", rank); 00042 ah.new_named_int('m', "memory", "L", "internal memory to use (in megabytes) default", internal_memory_megabyte); 00043 ah.new_named_int('a', "mult-algo", "N", "use multiplication-algorithm number N\n available are:\n 0: naive_multiply_and_add\n 1: recursive_multiply_and_add\n 2: strassen_winograd_multiply_and_add\n 3: multi_level_strassen_winograd_multiply_and_add\n 4: strassen_winograd_multiply (block-interleaved pre- and postadditions)\n 5: strassen_winograd_multiply_and_add_interleaved (block-interleaved preadditions)\n 6: multi_level_strassen_winograd_multiply_and_add_block_grained\n -1: internal multiplication\n -2: pure BLAS\n default", mult_algo_num); 00044 ah.new_named_int('s', "scheduling-algo", "N", "use scheduling-algorithm number N\n available are:\n 0: online LRU\n 1: offline LFD\n 2: offline LRU prefetching\n default", sched_algo_num); 00045 ah.new_named_int('c', "memory-byte", "L", "internal memory to use (in bytes) no default", internal_memory_byte); 00046 00047 ah.set_description("stxxl matrix test"); 00048 ah.set_author("Raoul Steffen, R-Steffen@gmx.de"); 00049 ah.process(argc, argv); 00050 00051 int_type internal_memory; 00052 if (internal_memory_byte) 00053 internal_memory = internal_memory_byte; 00054 else 00055 internal_memory = int_type(internal_memory_megabyte) * 1048576; 00056 00057 STXXL_MSG("multiplying two full double matrices of rank " << rank << ", block order " << block_order 00058 << " using " << internal_memory_megabyte << "MiB internal memory, multiplication-algo " 00059 << mult_algo_num << ", scheduling-algo " << sched_algo_num); 00060 00061 typedef double value_type; 00062 00063 stats_data stats_before, stats_after; 00064 matrix_operation_statistic_data matrix_stats_before, matrix_stats_after; 00065 00066 if (mult_algo_num == -2) 00067 { 00068 const int_type size = rank * rank; 00069 value_type * A = new value_type[size]; 00070 value_type * B = new value_type[size]; 00071 value_type * C = new value_type[size]; 00072 // write A and B 00073 for(int_type i = 0; i < size; ++i) 00074 A[i] = B[i] = 1; 00075 // evict A and B by accessing lots of memory 00076 int_type int_mem_size = 50*2^30/sizeof(int_type); 00077 assert(int_mem_size > 0); 00078 int_type * D = new int_type[int_mem_size]; 00079 for(int_type i = 0; i < int_mem_size; ++i) 00080 D[i] = 1; 00081 delete D; 00082 #if STXXL_BLAS 00083 stats_before = *stats::get_instance(); 00084 gemm_wrapper(rank, rank, rank, 00085 value_type(1), false, A, 00086 false, B, 00087 value_type(0), false, C); 00088 stats_after = *stats::get_instance(); 00089 #else 00090 STXXL_ERRMSG("internal multiplication is only available for testing with blas"); 00091 #endif 00092 delete A; 00093 delete B; 00094 delete C; 00095 } 00096 else 00097 { 00098 typedef block_scheduler< matrix_swappable_block<value_type, block_order> > bst; 00099 typedef matrix<value_type, block_order> mt; 00100 typedef mt::row_major_iterator mitt; 00101 typedef mt::const_row_major_iterator cmitt; 00102 00103 bst * b_s = new bst(internal_memory); // the block_scheduler may use internal_memory byte for caching 00104 bst & bs = *b_s; 00105 mt * a = new mt(bs, rank, rank), 00106 * b = new mt(bs, rank, rank), 00107 * c = new mt(bs, rank, rank); 00108 00109 STXXL_MSG("writing input matrices"); 00110 for (mitt mit = a->begin(); mit != a->end(); ++mit) 00111 *mit = 1; 00112 for (mitt mit = b->begin(); mit != b->end(); ++mit) 00113 *mit = 1; 00114 00115 bs.flush(); 00116 STXXL_MSG("start of multiplication"); 00117 matrix_stats_before.set(); 00118 stats_before = *stats::get_instance(); 00119 if (mult_algo_num >= 0) 00120 *c = a->multiply(*b, mult_algo_num, sched_algo_num); 00121 else 00122 *c = a->multiply_internal(*b, sched_algo_num); 00123 bs.flush(); 00124 stats_after = *stats::get_instance(); 00125 matrix_stats_after.set(); 00126 STXXL_MSG("end of multiplication"); 00127 00128 matrix_stats_after = matrix_stats_after - matrix_stats_before; 00129 STXXL_MSG(matrix_stats_after); 00130 stats_after = stats_after - stats_before; 00131 STXXL_MSG(stats_after); 00132 { 00133 int_type num_err = 0; 00134 for (cmitt mit = c->cbegin(); mit != c->cend(); ++mit) 00135 num_err += (*mit != rank); 00136 if (num_err) 00137 STXXL_ERRMSG("c had " << num_err << " errors"); 00138 } 00139 00140 delete a; 00141 delete b; 00142 delete c; 00143 delete b_s; 00144 } 00145 00146 STXXL_MSG("end of test"); 00147 std::cout << "@"; 00148 std::cout << " ra " << rank << " bo " << block_order << " im " << internal_memory_megabyte 00149 << " ma " << mult_algo_num << " sa " << sched_algo_num; 00150 std::cout << " mu " << matrix_stats_after.block_multiplication_calls 00151 << " mus " << matrix_stats_after.block_multiplications_saved_through_zero 00152 << " ad " << matrix_stats_after.block_addition_calls 00153 << " ads " << matrix_stats_after.block_additions_saved_through_zero; 00154 std::cout << " t " << stats_after.get_elapsed_time() 00155 << " r " << stats_after.get_reads() << " w " << stats_after.get_writes() 00156 << " rt " << stats_after.get_read_time() << " rtp " << stats_after.get_pread_time() 00157 << " wt " << stats_after.get_write_time() << " wtp " << stats_after.get_pwrite_time() 00158 << " rw " << stats_after.get_wait_read_time() << " ww " << stats_after.get_wait_write_time() 00159 << " iotp " << stats_after.get_pio_time(); 00160 std::cout << std::endl; 00161 return 0; 00162 }