Stxxl
1.4.0
|
00001 /*************************************************************************** 00002 * containers/berkeley_db_benchmark.cpp 00003 * 00004 * Part of the STXXL. See http://stxxl.sourceforge.net 00005 * 00006 * Copyright (C) 2006 Roman Dementiev <dementiev@ira.uka.de> 00007 * Copyright (C) 2009, 2010 Andreas Beckmann <beckmann@cs.uni-frankfurt.de> 00008 * 00009 * Distributed under the Boost Software License, Version 1.0. 00010 * (See accompanying file LICENSE_1_0.txt or copy at 00011 * http://www.boost.org/LICENSE_1_0.txt) 00012 **************************************************************************/ 00013 00014 //! \example containers/berkeley_db_benchmark.cpp 00015 //! This is a benchmark mentioned in the paper 00016 //! R. Dementiev, L. Kettner, P. Sanders "STXXL: standard template library for XXL data sets" 00017 //! Software: Practice and Experience 00018 //! Volume 38, Issue 6, Pages 589-637, May 2008 00019 //! DOI: 10.1002/spe.844 00020 00021 00022 #include <stxxl/vector> 00023 #include <stxxl/map> 00024 #include <stxxl/timer> 00025 #include <stxxl/stream> 00026 00027 00028 ///// BDB header //////////// 00029 #include <db_cxx.h> 00030 00031 ///////// TPIE headers ////////// 00032 #include "app_config.h" 00033 #include <portability.h> 00034 #include <ami_btree.h> 00035 //////////////////////////// 00036 00037 #define BDB_BULK_SCAN 00038 00039 #define KEY_SIZE 8 00040 #define DATA_SIZE 32 00041 00042 #define NODE_BLOCK_SIZE (32 * 1024) 00043 #define LEAF_BLOCK_SIZE (32 * 1024) 00044 00045 00046 #define LEAF_BLOCK_SIZE (32 * 1024) 00047 00048 #define TOTAL_CACHE_SIZE (750 * 1024 * 1024) 00049 //#define TOTAL_CACHE_SIZE (150 * 1024 * 1024) 00050 00051 //#define NODE_CACHE_SIZE (1 * (TOTAL_CACHE_SIZE / 40)) 00052 //#define LEAF_CACHE_SIZE (39 * (TOTAL_CACHE_SIZE / 40)) 00053 00054 #define NODE_CACHE_SIZE (64 * 1024 * 1024) 00055 #define LEAF_CACHE_SIZE (TOTAL_CACHE_SIZE - NODE_CACHE_SIZE) 00056 00057 #define SORTER_MEM (TOTAL_CACHE_SIZE - 1024 * 1024 * 2 * 4) 00058 00059 00060 #define SCAN_LIMIT(x) (x) 00061 00062 //#define BDB_FILE "/data3/bdb_file" 00063 #define BDB_FILE "/var/tmp/bdb_file" 00064 00065 00066 // BDB settings 00067 u_int32_t pagesize = LEAF_BLOCK_SIZE; 00068 u_int bulkbufsize = 4 * 1024 * 1024; 00069 u_int logbufsize = 8 * 1024 * 1024; 00070 u_int cachesize = (TOTAL_CACHE_SIZE < 500 * 1024 * 1024) ? (4 * (TOTAL_CACHE_SIZE / 5)) : (TOTAL_CACHE_SIZE - 100 * 1024 * 1024); 00071 u_int datasize = DATA_SIZE; 00072 u_int keysize = KEY_SIZE; 00073 u_int numitems = 0; 00074 00075 const char * letters = "abcdefghijklmnopqrstuvwxuz"; 00076 00077 struct my_key 00078 { 00079 char keybuf[KEY_SIZE]; 00080 }; 00081 00082 00083 std::ostream & operator << (std::ostream & o, const my_key & obj) 00084 { 00085 for (int i = 0; i < KEY_SIZE; ++i) 00086 o << obj.keybuf[i]; 00087 00088 return o; 00089 } 00090 00091 bool operator == (const my_key & a, const my_key & b) 00092 { 00093 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) == 0; 00094 } 00095 00096 bool operator != (const my_key & a, const my_key & b) 00097 { 00098 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) != 0; 00099 } 00100 00101 bool operator < (const my_key & a, const my_key & b) 00102 { 00103 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0; 00104 } 00105 00106 bool operator > (const my_key & a, const my_key & b) 00107 { 00108 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) > 0; 00109 } 00110 00111 bool operator <= (const my_key & a, const my_key & b) 00112 { 00113 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) <= 0; 00114 } 00115 00116 bool operator >= (const my_key & a, const my_key & b) 00117 { 00118 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) >= 0; 00119 } 00120 00121 00122 struct my_data 00123 { 00124 char databuf[DATA_SIZE]; 00125 }; 00126 00127 std::ostream & operator << (std::ostream & o, const my_data & obj) 00128 { 00129 o << "DATA(size=" << sizeof(obj) << ") "; 00130 00131 return o; 00132 } 00133 00134 my_key min_key, max_key; 00135 00136 struct comp_type : std::binary_function<my_key, my_key, bool> 00137 { 00138 bool operator () (const my_key & a, const my_key & b) const 00139 { 00140 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0; 00141 } 00142 static my_key max_value() 00143 { 00144 return max_key; 00145 } 00146 static my_key min_value() 00147 { 00148 return min_key; 00149 } 00150 }; 00151 00152 00153 /// TPIE declarations 00154 // Key type. 00155 typedef my_key bkey_t; 00156 00157 // Element type for the btree. 00158 struct el_t { 00159 bkey_t key_; 00160 my_data data_; 00161 el_t(bkey_t k) : key_(k) { } 00162 el_t() { } 00163 }; 00164 struct key_from_el { 00165 bkey_t operator () (const el_t & v) const 00166 { 00167 return v.key_; 00168 } 00169 }; 00170 00171 00172 // Temporary distinction btw UN*X and WIN, since there are some 00173 // problems with the MMAP collection implementation. 00174 #ifdef _WIN32 00175 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el, BTE_COLLECTION_UFS> u_btree_t; 00176 #else 00177 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el> u_btree_t; 00178 #endif 00179 00180 00181 void init() 00182 { 00183 memset(max_key.keybuf, (std::numeric_limits<unsigned char>::max)(), KEY_SIZE); 00184 memset(min_key.keybuf, (std::numeric_limits<unsigned char>::min)(), KEY_SIZE); 00185 } 00186 00187 typedef stxxl::map<my_key, my_data, comp_type, NODE_BLOCK_SIZE, LEAF_BLOCK_SIZE> map_type; 00188 00189 #define REAL_NODE_BLOCK_SIZE map_type::node_block_type::raw_size 00190 #define REAL_LEAF_BLOCK_SIZE map_type::leaf_block_type::raw_size 00191 #define REAL_NODE_MELEMENTS map_type::node_block_type::size 00192 #define REAL_LEAF_MELEMENTS map_type::leaf_block_type::size 00193 00194 typedef stxxl::VECTOR_GENERATOR<std::pair<my_key, my_data>, 1, 1>::result vector_type; 00195 //typedef stxxl::vector<std::pair<my_key,my_data>,1,stxxl::lru_pager<1>,512*1024> vector_type; 00196 00197 00198 //#define KEYPOS (i % KEY_SIZE) 00199 //#define VALUE (myrand() % 26) 00200 00201 00202 #if 0 00203 unsigned ran32State = 0xdeadbeef; 00204 inline unsigned myrand() 00205 { 00206 return (ran32State = 1664525 * ran32State + 1013904223); 00207 } 00208 inline void rand_key(stxxl::int64 pos, my_key & Key) 00209 { 00210 for (int i = 0; i < KEY_SIZE; ++i) 00211 Key.keybuf[i] = letters[myrand() % 26]; 00212 } 00213 #else // a longer pseudo random sequence 00214 long long unsigned ran32State = 0xdeadbeef; 00215 inline long long unsigned myrand() 00216 { 00217 return (ran32State = (ran32State * 0x5DEECE66DULL + 0xBULL) & 0xFFFFFFFFFFFFULL); 00218 } 00219 inline void rand_key(stxxl::int64 /*pos*/, my_key & Key) 00220 { 00221 long long unsigned r = myrand(); 00222 for (int i = 0; i < KEY_SIZE; ++i) 00223 { 00224 Key.keybuf[i] = letters[r % 26]; 00225 r >>= 5; 00226 } 00227 } 00228 #endif 00229 00230 void run_bdb_btree(stxxl::int64 ops) 00231 { 00232 const char * filename = BDB_FILE; 00233 00234 my_key key1_storage; 00235 my_data data1_storage; 00236 00237 unlink(filename); 00238 00239 memset(key1_storage.keybuf, 'a', KEY_SIZE); 00240 memset(data1_storage.databuf, 'b', DATA_SIZE); 00241 00242 00243 Db db(NULL, 0); // Instantiate the Db object 00244 00245 try { 00246 db.set_errfile(stderr); 00247 db.set_pagesize(pagesize); 00248 db.set_cachesize(0, cachesize, 1); 00249 00250 // Open the database 00251 db.open(NULL, // Transaction pointer 00252 filename, // Database file name 00253 NULL, // Optional logical database name 00254 DB_BTREE, // Database access method 00255 DB_CREATE, // Open flags 00256 0); // File mode (using defaults) 00257 00258 00259 // here we start with the tests 00260 Dbt key1(key1_storage.keybuf, KEY_SIZE); 00261 Dbt data1(data1_storage.databuf, DATA_SIZE); 00262 00263 stxxl::timer Timer; 00264 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops; 00265 stxxl::int64 i; 00266 //comp_type cmp_; 00267 00268 ran32State = 0xdeadbeef; 00269 00270 00271 DB_BTREE_STAT * dbstat; 00272 00273 db.stat(NULL, &dbstat, 0); 00274 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 00275 00276 db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR); 00277 00278 Timer.start(); 00279 00280 for (i = 0; i < n_inserts; ++i) 00281 { 00282 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 00283 rand_key(i, key1_storage); 00284 db.put(NULL, &key1, &data1, DB_NOOVERWRITE); 00285 } 00286 00287 Timer.stop(); 00288 db.stat(NULL, &dbstat, 0); 00289 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 00290 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) << 00291 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00292 00293 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 00294 00295 ///////////////////////////////////////// 00296 Timer.reset(); 00297 Timer.start(); 00298 00299 00300 Dbc * cursorp; 00301 db.cursor(NULL, &cursorp, 0); 00302 00303 for (i = 0; i < n_locates; ++i) 00304 { 00305 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 00306 rand_key(i, key1_storage); 00307 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 00308 Dbt datax(data1_storage.databuf, DATA_SIZE); 00309 00310 cursorp->get(&keyx, &datax, DB_SET_RANGE); 00311 } 00312 00313 Timer.stop(); 00314 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) << 00315 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00316 00317 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 00318 00319 //////////////////////////////////// 00320 Timer.reset(); 00321 00322 Timer.start(); 00323 00324 stxxl::int64 n_scanned = 0; 00325 00326 for (i = 0; i < n_range_queries; ++i) 00327 { 00328 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 00329 rand_key(i, key1_storage); 00330 my_key last_key = key1_storage; 00331 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 00332 rand_key(i, key1_storage); 00333 if (last_key < key1_storage) 00334 std::swap(last_key, key1_storage); 00335 00336 00337 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 00338 Dbt datax(data1_storage.databuf, DATA_SIZE); 00339 00340 if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND) 00341 continue; 00342 00343 00344 while (*((my_key *)keyx.get_data()) <= last_key) 00345 { 00346 ++n_scanned; 00347 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND) 00348 break; 00349 } 00350 00351 if (n_scanned >= 10 * n_range_queries) 00352 { 00353 ++i; 00354 break; 00355 } 00356 } 00357 00358 n_range_queries = i; 00359 00360 Timer.stop(); 00361 if (cursorp != NULL) 00362 cursorp->close(); 00363 00364 00365 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) << 00366 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) << 00367 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned); 00368 00369 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 00370 00371 ////////////////////////////////////// 00372 00373 ran32State = 0xdeadbeef; 00374 memset(key1_storage.keybuf, 'a', KEY_SIZE); 00375 00376 Timer.reset(); 00377 Timer.start(); 00378 00379 for (i = 0; i < n_deletes; ++i) 00380 { 00381 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 00382 rand_key(i, key1_storage); 00383 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 00384 db.del(NULL, &keyx, 0); 00385 } 00386 00387 Timer.stop(); 00388 db.stat(NULL, &dbstat, 0); 00389 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 00390 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) << 00391 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00392 00393 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 00394 00395 db.close(0); 00396 } 00397 catch (DbException & e) 00398 { 00399 STXXL_ERRMSG("DbException happened"); 00400 } 00401 catch (std::exception & e) 00402 { 00403 STXXL_ERRMSG("std::exception happened"); 00404 } 00405 00406 unlink(filename); 00407 } 00408 00409 void run_stxxl_map(stxxl::int64 ops) 00410 { 00411 map_type Map(NODE_CACHE_SIZE, LEAF_CACHE_SIZE); 00412 Map.enable_prefetching(); 00413 stxxl::stats * Stats = stxxl::stats::get_instance(); 00414 00415 std::pair<my_key, my_data> element; 00416 00417 memset(element.first.keybuf, 'a', KEY_SIZE); 00418 memset(element.second.databuf, 'b', DATA_SIZE); 00419 00420 00421 stxxl::timer Timer; 00422 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops; 00423 stxxl::int64 i; 00424 //comp_type cmp_; 00425 00426 ran32State = 0xdeadbeef; 00427 00428 //stxxl::random_number32 myrand; 00429 00430 STXXL_MSG("Records in map: " << Map.size()); 00431 00432 stxxl::stats_data stats_begin(*Stats); 00433 Timer.start(); 00434 00435 for (i = 0; i < n_inserts; ++i) 00436 { 00437 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00438 rand_key(i, element.first); 00439 Map.insert(element); 00440 } 00441 00442 Timer.stop(); 00443 00444 STXXL_MSG("Records in map: " << Map.size()); 00445 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) << 00446 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00447 00448 std::cout << (stxxl::stats_data(*Stats) - stats_begin); 00449 00450 //////////////////////////////////////////////// 00451 00452 const map_type & CMap(Map); // const map reference 00453 00454 stats_begin = stxxl::stats_data(*Stats); 00455 Timer.reset(); 00456 Timer.start(); 00457 00458 for (i = 0; i < n_locates; ++i) 00459 { 00460 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00461 rand_key(i, element.first); 00462 CMap.lower_bound(element.first); 00463 } 00464 00465 Timer.stop(); 00466 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) << 00467 " seconds : " << (double(n_locates) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00468 00469 std::cout << (stxxl::stats_data(*Stats) - stats_begin); 00470 00471 //////////////////////////////////// 00472 00473 stats_begin = stxxl::stats_data(*Stats); 00474 Timer.reset(); 00475 Timer.start(); 00476 00477 stxxl::int64 n_scanned = 0; //, skipped_qieries = 0; 00478 00479 for (i = 0; i < n_range_queries; ++i) 00480 { 00481 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00482 rand_key(i, element.first); 00483 my_key begin_key = element.first; 00484 map_type::const_iterator begin = CMap.lower_bound(element.first); 00485 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00486 rand_key(i, element.first); 00487 map_type::const_iterator beyond = CMap.lower_bound(element.first); 00488 if (element.first < begin_key) 00489 std::swap(begin, beyond); 00490 00491 while (begin != beyond) 00492 { 00493 my_data tmp = begin->second; 00494 stxxl::STXXL_UNUSED(tmp); 00495 ++n_scanned; 00496 ++begin; 00497 } 00498 if (n_scanned >= 10 * n_range_queries) 00499 { 00500 ++i; 00501 break; 00502 } 00503 } 00504 00505 n_range_queries = i; 00506 00507 Timer.stop(); 00508 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) << 00509 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) << 00510 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned); 00511 00512 std::cout << (stxxl::stats_data(*Stats) - stats_begin); 00513 00514 ////////////////////////////////////// 00515 ran32State = 0xdeadbeef; 00516 memset(element.first.keybuf, 'a', KEY_SIZE); 00517 memset(element.second.databuf, 'b', DATA_SIZE); 00518 00519 stats_begin = stxxl::stats_data(*Stats); 00520 Timer.reset(); 00521 Timer.start(); 00522 00523 for (i = 0; i < n_deletes; ++i) 00524 { 00525 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00526 rand_key(i, element.first); 00527 Map.erase(element.first); 00528 } 00529 00530 Timer.stop(); 00531 STXXL_MSG("Records in map: " << Map.size()); 00532 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) << 00533 " seconds : " << (double(n_deletes) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00534 00535 std::cout << (stxxl::stats_data(*Stats) - stats_begin); 00536 } 00537 00538 class rand_key_gen 00539 { 00540 stxxl::int64 counter; 00541 my_key & current; 00542 stxxl::random_number32 myrand; 00543 rand_key_gen(); 00544 00545 public: 00546 typedef my_key value_type; 00547 00548 rand_key_gen(stxxl::int64 el, my_key & cur) : 00549 counter(el), current(cur) 00550 { 00551 //const stxxl::int64 & i = counter; 00552 //current.keybuf[KEYPOS] = letters[VALUE]; 00553 rand_key(counter, current); 00554 } 00555 const my_key & operator * () { return current; } 00556 const my_key * operator -> () { return ¤t; } 00557 00558 rand_key_gen & operator ++ () 00559 { 00560 --counter; 00561 //const stxxl::int64 & i = counter; 00562 //current.keybuf[KEYPOS] = letters[VALUE]; 00563 rand_key(counter, current); 00564 return *this; 00565 } 00566 bool empty() const { return counter == 0; } 00567 }; 00568 00569 template <class InputType> 00570 class key2pair 00571 { 00572 InputType & in; 00573 std::pair<my_key, my_data> current; 00574 key2pair(); 00575 00576 public: 00577 typedef std::pair<my_key, my_data> value_type; 00578 00579 key2pair(InputType & in_) : in(in_) 00580 { 00581 if (!in.empty()) 00582 current.first = *in; 00583 } 00584 const value_type & operator * () { return current; } 00585 const value_type * operator -> () { return ¤t; } 00586 00587 key2pair & operator ++ () 00588 { 00589 ++in; 00590 if (!empty()) 00591 current.first = *in; 00592 00593 00594 return *this; 00595 } 00596 bool empty() const { return in.empty(); } 00597 }; 00598 00599 void run_stxxl_map_big(stxxl::int64 n, unsigned ops) 00600 { 00601 stxxl::stats * Stats = stxxl::stats::get_instance(); 00602 00603 std::pair<my_key, my_data> element; 00604 00605 memset(element.first.keybuf, 'a', KEY_SIZE); 00606 memset(element.second.databuf, 'b', DATA_SIZE); 00607 00608 stxxl::timer Timer; 00609 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops; 00610 stxxl::int64 i; 00611 //comp_type cmp_; 00612 00613 ran32State = 0xdeadbeef; 00614 00615 //stxxl::random_number32 myrand; 00616 00617 Timer.start(); 00618 00619 vector_type SortedSeq(n); 00620 const vector_type & CSortedSeq(SortedSeq); 00621 { 00622 rand_key_gen Gen(n, element.first); 00623 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type; 00624 sorter_type Sorter(Gen, comp_type(), SORTER_MEM); 00625 typedef key2pair<sorter_type> key2pair_type; 00626 key2pair_type Key2Pair(Sorter); 00627 stxxl::stream::materialize(Key2Pair, SortedSeq.begin()); 00628 } 00629 00630 00631 Timer.stop(); 00632 00633 STXXL_MSG("Finished sorting input. Elapsed time: " << 00634 (Timer.mseconds() / 1000.) << " seconds."); 00635 00636 stxxl::stats_data stats_begin(*Stats); 00637 Timer.reset(); 00638 Timer.start(); 00639 00640 // bulk construction 00641 map_type Map(CSortedSeq.begin(), 00642 CSortedSeq.end(), 00643 NODE_CACHE_SIZE, LEAF_CACHE_SIZE, true); 00644 00645 Timer.stop(); 00646 00647 00648 STXXL_MSG("Records in map: " << Map.size()); 00649 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) << 00650 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00651 00652 Map.print_statistics(cout); 00653 Map.reset_statistics(); 00654 std::cout << (stxxl::stats_data(*Stats) - stats_begin); 00655 00656 //////////////////////////////////////// 00657 00658 Map.disable_prefetching(); 00659 00660 stats_begin = stxxl::stats_data(*Stats); 00661 Timer.reset(); 00662 Timer.start(); 00663 00664 for (i = 0; i < n_inserts; ++i) 00665 { 00666 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00667 rand_key(i, element.first); 00668 Map.insert(element); 00669 } 00670 00671 Timer.stop(); 00672 00673 STXXL_MSG("Records in map: " << Map.size()); 00674 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) << 00675 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00676 00677 Map.print_statistics(cout); 00678 Map.reset_statistics(); 00679 std::cout << (stxxl::stats_data(*Stats) - stats_begin); 00680 00681 //////////////////////////////////// 00682 00683 const map_type & CMap(Map); // const map reference 00684 00685 Timer.reset(); 00686 Timer.start(); 00687 00688 for (i = 0; i < n_locates; ++i) 00689 { 00690 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00691 rand_key(i, element.first); 00692 CMap.lower_bound(element.first); 00693 } 00694 00695 Timer.stop(); 00696 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) << 00697 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00698 00699 Map.print_statistics(cout); 00700 Map.reset_statistics(); 00701 std::cout << (stxxl::stats_data(*Stats) - stats_begin); 00702 00703 //////////////////////////////////// 00704 00705 Map.enable_prefetching(); 00706 00707 stats_begin = stxxl::stats_data(*Stats); 00708 Timer.reset(); 00709 Timer.start(); 00710 00711 stxxl::int64 n_scanned = 0; //, skipped_qieries = 0; 00712 00713 for (i = 0; i < n_range_queries; ++i) 00714 { 00715 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00716 rand_key(i, element.first); 00717 my_key begin_key = element.first; 00718 map_type::const_iterator begin = CMap.lower_bound(element.first); 00719 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00720 rand_key(i, element.first); 00721 map_type::const_iterator beyond = CMap.lower_bound(element.first); 00722 if (element.first < begin_key) 00723 std::swap(begin, beyond); 00724 00725 /* 00726 STXXL_MSG("Looking "<<element.first<<" scanned: "<<n_scanned); 00727 00728 if(beyond==CMap.end()) 00729 { 00730 STXXL_MSG("Upper bound "<<"END"); 00731 } 00732 else 00733 { 00734 STXXL_MSG("Upper bound "<<((element.first>begin_key)?element.first:begin_key)); 00735 }*/ 00736 00737 while (begin != beyond) 00738 { 00739 ++n_scanned; 00740 ++begin; 00741 } 00742 00743 if (n_scanned >= SCAN_LIMIT(n)) 00744 { 00745 ++i; 00746 break; 00747 } 00748 } 00749 00750 n_range_queries = i; 00751 00752 Timer.stop(); 00753 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) << 00754 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) << 00755 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned); 00756 00757 Map.print_statistics(cout); 00758 Map.reset_statistics(); 00759 std::cout << (stxxl::stats_data(*Stats) - stats_begin); 00760 00761 ////////////////////////////////////// 00762 00763 ran32State = 0xdeadbeef; 00764 memset(element.first.keybuf, 'a', KEY_SIZE); 00765 memset(element.second.databuf, 'b', DATA_SIZE); 00766 00767 Map.disable_prefetching(); 00768 00769 stats_begin = stxxl::stats_data(*Stats); 00770 Timer.reset(); 00771 Timer.start(); 00772 00773 for (i = n_deletes; i > 0; --i) 00774 { 00775 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00776 rand_key(i, element.first); 00777 Map.erase(element.first); 00778 } 00779 00780 Timer.stop(); 00781 STXXL_MSG("Records in map: " << Map.size()); 00782 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) << 00783 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00784 00785 Map.print_statistics(cout); 00786 Map.reset_statistics(); 00787 std::cout << (stxxl::stats_data(*Stats) - stats_begin); 00788 } 00789 00790 ///////////////////////////////////////////////////////////////////////// 00791 00792 typedef AMI_STREAM<el_t> stream_t; 00793 00794 char dummy; 00795 00796 class MyFilter { 00797 public: 00798 bool operator () (const el_t & v) const 00799 { 00800 dummy += v.key_.keybuf[0]; // touch element 00801 return true; 00802 } 00803 }; 00804 00805 00806 void run_tpie_btree_big(stxxl::int64 n, unsigned ops) 00807 { 00808 el_t element; 00809 00810 memset(element.key_.keybuf, 'a', KEY_SIZE); 00811 memset(element.data_.databuf, 'b', DATA_SIZE); 00812 00813 // Log debugging info from the application, but not from the library. 00814 tpie_log_init(TPIE_LOG_APP_DEBUG); 00815 MM_manager.set_memory_limit(TOTAL_CACHE_SIZE); 00816 MM_manager.enforce_memory_limit(); 00817 00818 stream_t * is = new stream_t; 00819 00820 stxxl::timer Timer; 00821 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops; 00822 stxxl::int64 i; 00823 //comp_type cmp_; 00824 00825 ran32State = 0xdeadbeef; 00826 00827 //stxxl::random_number32 myrand; 00828 00829 Timer.start(); 00830 00831 00832 { 00833 rand_key_gen Gen(n, element.key_); 00834 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type; 00835 sorter_type Sorter(Gen, comp_type(), SORTER_MEM); 00836 //typedef key2pair<sorter_type> key2pair_type; 00837 //key2pair_type Key2Pair(Sorter); 00838 while (!Sorter.empty()) 00839 { 00840 is->write_item(*Sorter); 00841 ++Sorter; 00842 } 00843 } 00844 00845 Timer.stop(); 00846 00847 00848 STXXL_MSG("Finished sorting input. Elapsed time: " << 00849 (Timer.mseconds() / 1000.) << " seconds."); 00850 00851 00852 Timer.reset(); 00853 Timer.start(); 00854 00855 // bulk construction 00856 u_btree_t * u_btree; 00857 AMI_btree_params params; 00858 params.node_block_factor = NODE_BLOCK_SIZE / 4096; 00859 params.leaf_block_factor = LEAF_BLOCK_SIZE / 4096; 00860 params.leaf_cache_size = LEAF_CACHE_SIZE / LEAF_BLOCK_SIZE; 00861 params.node_cache_size = NODE_CACHE_SIZE / NODE_BLOCK_SIZE; 00862 00863 u_btree = new u_btree_t(params); 00864 00865 using std::cout; 00866 using std::cerr; 00867 00868 if (!u_btree->is_valid()) { 00869 cerr << "Error initializing btree. Aborting.\n"; 00870 delete u_btree; 00871 exit(1); 00872 } 00873 00874 if (u_btree->load_sorted(is, 1.0, 1.0) != AMI_ERROR_NO_ERROR) 00875 cerr << "Error during bulk loading.\n"; 00876 00877 00878 Timer.stop(); 00879 00880 STXXL_MSG("Records in map: " << u_btree->size()); 00881 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) << 00882 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00883 00884 00885 //////////////////////////////////////// 00886 Timer.reset(); 00887 00888 00889 Timer.start(); 00890 00891 for (i = 0; i < n_inserts; ++i) 00892 { 00893 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00894 rand_key(i, element.key_); 00895 u_btree->insert(element); 00896 } 00897 00898 Timer.stop(); 00899 00900 STXXL_MSG("Records in map: " << u_btree->size()); 00901 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) << 00902 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00903 00904 00905 //////////////////////////////////////////////// 00906 Timer.reset(); 00907 00908 00909 Timer.start(); 00910 00911 00912 el_t result; 00913 for (i = 0; i < n_locates; ++i) 00914 { 00915 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00916 rand_key(i, element.key_); 00917 u_btree->succ(element.key_, result); 00918 } 00919 00920 Timer.stop(); 00921 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) << 00922 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00923 00924 00925 //////////////////////////////////// 00926 Timer.reset(); 00927 00928 00929 Timer.start(); 00930 00931 stxxl::int64 n_scanned = 0; //, skipped_qieries = 0; 00932 MyFilter filter; 00933 00934 for (i = 0; i < n_range_queries; ++i) 00935 { 00936 rand_key(i, element.key_); 00937 my_key begin_key = element.key_; 00938 rand_key(i, element.key_); 00939 if (element.key_ < begin_key) 00940 n_scanned += u_btree->range_query(element.key_, begin_key, NULL, filter); 00941 00942 else 00943 n_scanned += u_btree->range_query(begin_key, element.key_, NULL, filter); 00944 00945 00946 if (n_scanned >= SCAN_LIMIT(n)) 00947 { 00948 ++i; 00949 break; 00950 } 00951 } 00952 00953 n_range_queries = i; 00954 00955 Timer.stop(); 00956 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) << 00957 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) << 00958 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned); 00959 00960 00961 ////////////////////////////////////// 00962 ran32State = 0xdeadbeef; 00963 memset(element.key_.keybuf, 'a', KEY_SIZE); 00964 memset(element.data_.databuf, 'b', DATA_SIZE); 00965 00966 Timer.reset(); 00967 00968 Timer.start(); 00969 00970 for (i = n_deletes; i > 0; --i) 00971 { 00972 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00973 rand_key(i, element.key_); 00974 u_btree->erase(element.key_); 00975 } 00976 00977 Timer.stop(); 00978 STXXL_MSG("Records in map: " << u_btree->size()); 00979 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) << 00980 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00981 delete u_btree; 00982 delete is; 00983 } 00984 00985 void run_bdb_btree_big(stxxl::int64 n, unsigned ops) 00986 { 00987 const char * filename = BDB_FILE; 00988 00989 my_key key1_storage; 00990 my_data data1_storage; 00991 00992 #ifdef BDB_BULK_SCAN 00993 int * bulk_buffer = new int[logbufsize / sizeof(int)]; 00994 #endif 00995 00996 unlink(filename); 00997 00998 memset(key1_storage.keybuf, 'a', KEY_SIZE); 00999 memset(data1_storage.databuf, 'b', DATA_SIZE); 01000 01001 01002 Db db(NULL, 0); // Instantiate the Db object 01003 01004 try { 01005 // here we start with the tests 01006 Dbt key1(key1_storage.keybuf, KEY_SIZE); 01007 Dbt data1(data1_storage.databuf, DATA_SIZE); 01008 01009 stxxl::timer Timer; 01010 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops; 01011 stxxl::int64 i; 01012 //comp_type cmp_; 01013 01014 ran32State = 0xdeadbeef; 01015 01016 Timer.start(); 01017 01018 vector_type SortedSeq(n); 01019 //const vector_type & CSortedSeq(SortedSeq); 01020 { 01021 rand_key_gen Gen(n, key1_storage); 01022 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type; 01023 sorter_type Sorter(Gen, comp_type(), SORTER_MEM); 01024 typedef key2pair<sorter_type> key2pair_type; 01025 key2pair_type Key2Pair(Sorter); 01026 stxxl::stream::materialize(Key2Pair, SortedSeq.begin()); 01027 } 01028 01029 Timer.stop(); 01030 01031 STXXL_MSG("Finished sorting input. Elapsed time: " << (Timer.mseconds() / 1000.) << " seconds."); 01032 01033 db.set_errfile(stderr); 01034 db.set_pagesize(pagesize); 01035 db.set_cachesize(0, cachesize, 10); 01036 01037 STXXL_MSG("BDB cache size set."); 01038 01039 // Open the database 01040 db.open(NULL, // Transaction pointer 01041 filename, // Database file name 01042 NULL, // Optional logical database name 01043 DB_BTREE, // Database access method 01044 DB_CREATE, // Open flags 01045 0); // File mode (using defaults) 01046 01047 db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR); 01048 01049 Timer.reset(); 01050 Timer.start(); 01051 01052 // DBD does not have bulk construction 01053 // however inserting in sorted order might help 01054 // to improve performance 01055 vector_type::const_iterator cit = SortedSeq.begin(); 01056 for (i = 0; i < n; ++i, ++cit) 01057 { 01058 key1_storage = cit->first; 01059 db.put(NULL, &key1, &data1, DB_NOOVERWRITE); 01060 } 01061 01062 Timer.stop(); 01063 01064 DB_BTREE_STAT * dbstat; 01065 db.stat(NULL, &dbstat, 0); 01066 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 01067 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) << 01068 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 01069 01070 db.stat_print(0); 01071 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 01072 //////////////////////////////////////// 01073 01074 01075 Timer.reset(); 01076 Timer.start(); 01077 01078 for (i = 0; i < n_inserts; ++i) 01079 { 01080 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 01081 rand_key(i, key1_storage); 01082 db.put(NULL, &key1, &data1, DB_NOOVERWRITE); 01083 } 01084 01085 Timer.stop(); 01086 db.stat(NULL, &dbstat, 0); 01087 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 01088 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) << 01089 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 01090 01091 db.stat_print(0); 01092 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 01093 01094 ///////////////////////////////////////// 01095 Timer.reset(); 01096 Timer.start(); 01097 01098 01099 Dbc * cursorp; 01100 db.cursor(NULL, &cursorp, 0); 01101 01102 for (i = 0; i < n_locates; ++i) 01103 { 01104 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 01105 rand_key(i, key1_storage); 01106 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 01107 Dbt datax(data1_storage.databuf, DATA_SIZE); 01108 01109 cursorp->get(&keyx, &datax, DB_SET_RANGE); 01110 } 01111 01112 Timer.stop(); 01113 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) << 01114 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 01115 01116 db.stat_print(0); 01117 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 01118 01119 //////////////////////////////////// 01120 Timer.reset(); 01121 01122 Timer.start(); 01123 01124 stxxl::int64 n_scanned = 0; 01125 01126 for (i = 0; i < n_range_queries; ++i) 01127 { 01128 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 01129 rand_key(i, key1_storage); 01130 my_key last_key = key1_storage; 01131 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 01132 rand_key(i, key1_storage); 01133 if (last_key < key1_storage) 01134 std::swap(last_key, key1_storage); 01135 01136 01137 //STXXL_MSG("Looking "<<key1_storage<<" scanned: "<<n_scanned); 01138 //STXXL_MSG("Upper bound "<<last_key); 01139 01140 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 01141 #ifdef BDB_BULK_SCAN 01142 Dbt datax(bulk_buffer, DATA_SIZE); 01143 datax.set_ulen(logbufsize); 01144 datax.set_flags(DB_DBT_USERMEM); 01145 #else 01146 Dbt datax(data1_storage.databuf, DATA_SIZE); 01147 #endif 01148 01149 01150 #ifdef BDB_BULK_SCAN 01151 if (cursorp->get(&keyx, &datax, DB_SET_RANGE | DB_MULTIPLE_KEY) == DB_NOTFOUND) 01152 continue; 01153 01154 01155 do 01156 { 01157 DbMultipleKeyDataIterator BulkIterator(datax); 01158 Dbt key1, data1; 01159 while (BulkIterator.next(key1, data1) && 01160 *((my_key *)key1.get_data()) <= last_key) 01161 { 01162 ++n_scanned; 01163 //STXXL_MSG("Result "<<*((my_key *)key1.get_data())); 01164 } 01165 if (cursorp->get(&keyx, &datax, DB_NEXT | DB_MULTIPLE_KEY) == DB_NOTFOUND) 01166 break; 01167 01168 01169 if (*((my_key *)keyx.get_data()) > last_key) 01170 { 01171 break; 01172 } 01173 } while (1); 01174 01175 #else 01176 if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND) 01177 continue; 01178 01179 while (*((my_key *)keyx.get_data()) <= last_key) 01180 { 01181 ++n_scanned; 01182 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND) 01183 break; 01184 } 01185 #endif 01186 01187 01188 if (n_scanned >= SCAN_LIMIT(n)) 01189 { 01190 ++i; 01191 break; 01192 } 01193 } 01194 01195 n_range_queries = i; 01196 01197 Timer.stop(); 01198 if (cursorp != NULL) 01199 cursorp->close(); 01200 01201 01202 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) << 01203 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) << 01204 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned); 01205 01206 db.stat_print(0); 01207 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 01208 01209 ////////////////////////////////////// 01210 01211 ran32State = 0xdeadbeef; 01212 memset(key1_storage.keybuf, 'a', KEY_SIZE); 01213 01214 Timer.reset(); 01215 Timer.start(); 01216 01217 for (i = 0; i < n_deletes; ++i) 01218 { 01219 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 01220 rand_key(i, key1_storage); 01221 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 01222 db.del(NULL, &keyx, 0); 01223 } 01224 01225 Timer.stop(); 01226 db.stat(NULL, &dbstat, 0); 01227 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 01228 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) << 01229 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 01230 01231 db.stat_print(0); 01232 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 01233 01234 db.close(0); 01235 } 01236 catch (DbException & e) 01237 { 01238 STXXL_ERRMSG("DbException happened"); 01239 } 01240 catch (std::exception & e) 01241 { 01242 STXXL_ERRMSG("std::exception happened"); 01243 } 01244 01245 unlink(filename); 01246 01247 #ifdef BDB_BULK_SCAN 01248 delete[] bulk_buffer; 01249 #endif 01250 } 01251 01252 01253 int main(int argc, char * argv[]) 01254 { 01255 STXXL_MSG("stxxl::map Real Node block size: " << REAL_NODE_BLOCK_SIZE << " bytes"); 01256 STXXL_MSG("stxxl::map Real Leaf block size: " << REAL_LEAF_BLOCK_SIZE << " bytes"); 01257 STXXL_MSG("stxxl::map Node max elements : " << REAL_NODE_MELEMENTS); 01258 STXXL_MSG("stxxl::map Leaf max elements : " << REAL_LEAF_MELEMENTS); 01259 #ifdef STXXL_DIRECT_IO_OFF 01260 STXXL_MSG("STXXL_DIRECT_IO_OFF is defined"); 01261 #else 01262 STXXL_MSG("STXXL_DIRECT_IO_OFF is NOT defined"); 01263 #endif 01264 01265 if (argc < 3) 01266 { 01267 STXXL_MSG("Usage: " << argv[0] << " version #ops"); 01268 STXXL_MSG("\t version = 1: test stxxl map"); 01269 STXXL_MSG("\t version = 2: test Berkeley DB btree"); 01270 STXXL_MSG("\t version = 3: big test stxxl map"); 01271 STXXL_MSG("\t version = 4: big test Berkeley DB btree"); 01272 STXXL_MSG("\t version = 5: big test TPIE btree"); 01273 return -1; 01274 } 01275 01276 init(); 01277 01278 int version = atoi(argv[1]); 01279 stxxl::int64 ops = stxxl::atoint64(argv[2]); 01280 01281 STXXL_MSG("Running version : " << version); 01282 STXXL_MSG("Operations to perform: " << ops); 01283 STXXL_MSG("Btree cache size : " << TOTAL_CACHE_SIZE << " bytes"); 01284 STXXL_MSG("Leaf block size : " << LEAF_BLOCK_SIZE << " bytes"); 01285 01286 01287 switch (version) 01288 { 01289 case 1: 01290 run_stxxl_map(ops); 01291 break; 01292 case 2: 01293 run_bdb_btree(ops); 01294 break; 01295 case 3: 01296 run_stxxl_map_big(ops, 100000); 01297 break; 01298 case 4: 01299 run_bdb_btree_big(ops, 100000); 01300 break; 01301 case 5: 01302 run_tpie_btree_big(ops, 100000); 01303 break; 01304 default: 01305 STXXL_MSG("Unsupported version " << version); 01306 } 01307 }