Stxxl  1.4.0
containers/berkeley_db_benchmark.cpp
Go to the documentation of this file.
00001 /***************************************************************************
00002  *  containers/berkeley_db_benchmark.cpp
00003  *
00004  *  Part of the STXXL. See http://stxxl.sourceforge.net
00005  *
00006  *  Copyright (C) 2006 Roman Dementiev <dementiev@ira.uka.de>
00007  *  Copyright (C) 2009, 2010 Andreas Beckmann <beckmann@cs.uni-frankfurt.de>
00008  *
00009  *  Distributed under the Boost Software License, Version 1.0.
00010  *  (See accompanying file LICENSE_1_0.txt or copy at
00011  *  http://www.boost.org/LICENSE_1_0.txt)
00012  **************************************************************************/
00013 
00014 //! \example containers/berkeley_db_benchmark.cpp
00015 //! This is a benchmark mentioned in the paper
00016 //! R. Dementiev, L. Kettner, P. Sanders "STXXL: standard template library for XXL data sets"
00017 //! Software: Practice and Experience
00018 //! Volume 38, Issue 6, Pages 589-637, May 2008
00019 //! DOI: 10.1002/spe.844
00020 
00021 
00022 #include <stxxl/vector>
00023 #include <stxxl/map>
00024 #include <stxxl/timer>
00025 #include <stxxl/stream>
00026 
00027 
00028 ///// BDB header ////////////
00029 #include <db_cxx.h>
00030 
00031 ///////// TPIE headers //////////
00032 #include "app_config.h"
00033 #include <portability.h>
00034 #include <ami_btree.h>
00035 ////////////////////////////
00036 
00037 #define BDB_BULK_SCAN
00038 
00039 #define KEY_SIZE                8
00040 #define DATA_SIZE               32
00041 
00042 #define NODE_BLOCK_SIZE         (32 * 1024)
00043 #define LEAF_BLOCK_SIZE         (32 * 1024)
00044 
00045 
00046 #define LEAF_BLOCK_SIZE         (32 * 1024)
00047 
00048 #define TOTAL_CACHE_SIZE        (750 * 1024 * 1024)
00049 //#define TOTAL_CACHE_SIZE      (150 * 1024 * 1024)
00050 
00051 //#define NODE_CACHE_SIZE       (1 * (TOTAL_CACHE_SIZE / 40))
00052 //#define LEAF_CACHE_SIZE       (39 * (TOTAL_CACHE_SIZE / 40))
00053 
00054 #define NODE_CACHE_SIZE         (64 * 1024 * 1024)
00055 #define LEAF_CACHE_SIZE         (TOTAL_CACHE_SIZE - NODE_CACHE_SIZE)
00056 
00057 #define SORTER_MEM              (TOTAL_CACHE_SIZE - 1024 * 1024 * 2 * 4)
00058 
00059 
00060 #define SCAN_LIMIT(x)   (x)
00061 
00062 //#define BDB_FILE "/data3/bdb_file"
00063 #define BDB_FILE "/var/tmp/bdb_file"
00064 
00065 
00066 // BDB settings
00067 u_int32_t pagesize = LEAF_BLOCK_SIZE;
00068 u_int bulkbufsize = 4 * 1024 * 1024;
00069 u_int logbufsize = 8 * 1024 * 1024;
00070 u_int cachesize = (TOTAL_CACHE_SIZE < 500 * 1024 * 1024) ? (4 * (TOTAL_CACHE_SIZE / 5)) : (TOTAL_CACHE_SIZE - 100 * 1024 * 1024);
00071 u_int datasize = DATA_SIZE;
00072 u_int keysize = KEY_SIZE;
00073 u_int numitems = 0;
00074 
00075 const char * letters = "abcdefghijklmnopqrstuvwxuz";
00076 
00077 struct my_key
00078 {
00079     char keybuf[KEY_SIZE];
00080 };
00081 
00082 
00083 std::ostream & operator << (std::ostream & o, const my_key & obj)
00084 {
00085     for (int i = 0; i < KEY_SIZE; ++i)
00086         o << obj.keybuf[i];
00087 
00088     return o;
00089 }
00090 
00091 bool operator == (const my_key & a, const my_key & b)
00092 {
00093     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) == 0;
00094 }
00095 
00096 bool operator != (const my_key & a, const my_key & b)
00097 {
00098     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) != 0;
00099 }
00100 
00101 bool operator < (const my_key & a, const my_key & b)
00102 {
00103     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
00104 }
00105 
00106 bool operator > (const my_key & a, const my_key & b)
00107 {
00108     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) > 0;
00109 }
00110 
00111 bool operator <= (const my_key & a, const my_key & b)
00112 {
00113     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) <= 0;
00114 }
00115 
00116 bool operator >= (const my_key & a, const my_key & b)
00117 {
00118     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) >= 0;
00119 }
00120 
00121 
00122 struct my_data
00123 {
00124     char databuf[DATA_SIZE];
00125 };
00126 
00127 std::ostream & operator << (std::ostream & o, const my_data & obj)
00128 {
00129     o << "DATA(size=" << sizeof(obj) << ") ";
00130 
00131     return o;
00132 }
00133 
00134 my_key min_key, max_key;
00135 
00136 struct comp_type : std::binary_function<my_key, my_key, bool>
00137 {
00138     bool operator () (const my_key & a, const my_key & b) const
00139     {
00140         return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
00141     }
00142     static my_key max_value()
00143     {
00144         return max_key;
00145     }
00146     static my_key min_value()
00147     {
00148         return min_key;
00149     }
00150 };
00151 
00152 
00153 /// TPIE  declarations
00154 // Key type.
00155 typedef my_key bkey_t;
00156 
00157 // Element type for the btree.
00158 struct el_t {
00159     bkey_t key_;
00160     my_data data_;
00161     el_t(bkey_t k) : key_(k) { }
00162     el_t() { }
00163 };
00164 struct key_from_el {
00165     bkey_t operator () (const el_t & v) const
00166     {
00167         return v.key_;
00168     }
00169 };
00170 
00171 
00172 // Temporary distinction btw UN*X and WIN, since there are some
00173 // problems with the MMAP collection implementation.
00174 #ifdef _WIN32
00175 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el, BTE_COLLECTION_UFS> u_btree_t;
00176 #else
00177 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el> u_btree_t;
00178 #endif
00179 
00180 
00181 void init()
00182 {
00183     memset(max_key.keybuf, (std::numeric_limits<unsigned char>::max)(), KEY_SIZE);
00184     memset(min_key.keybuf, (std::numeric_limits<unsigned char>::min)(), KEY_SIZE);
00185 }
00186 
00187 typedef stxxl::map<my_key, my_data, comp_type, NODE_BLOCK_SIZE, LEAF_BLOCK_SIZE> map_type;
00188 
00189 #define REAL_NODE_BLOCK_SIZE map_type::node_block_type::raw_size
00190 #define REAL_LEAF_BLOCK_SIZE map_type::leaf_block_type::raw_size
00191 #define REAL_NODE_MELEMENTS map_type::node_block_type::size
00192 #define REAL_LEAF_MELEMENTS map_type::leaf_block_type::size
00193 
00194 typedef stxxl::VECTOR_GENERATOR<std::pair<my_key, my_data>, 1, 1>::result vector_type;
00195 //typedef stxxl::vector<std::pair<my_key,my_data>,1,stxxl::lru_pager<1>,512*1024>  vector_type;
00196 
00197 
00198 //#define KEYPOS        (i % KEY_SIZE)
00199 //#define VALUE         (myrand() % 26)
00200 
00201 
00202 #if 0
00203 unsigned ran32State = 0xdeadbeef;
00204 inline unsigned myrand()
00205 {
00206     return (ran32State = 1664525 * ran32State + 1013904223);
00207 }
00208 inline void rand_key(stxxl::int64 pos, my_key & Key)
00209 {
00210     for (int i = 0; i < KEY_SIZE; ++i)
00211         Key.keybuf[i] = letters[myrand() % 26];
00212 }
00213 #else // a longer pseudo random sequence
00214 long long unsigned ran32State = 0xdeadbeef;
00215 inline long long unsigned myrand()
00216 {
00217     return (ran32State = (ran32State * 0x5DEECE66DULL + 0xBULL) & 0xFFFFFFFFFFFFULL);
00218 }
00219 inline void rand_key(stxxl::int64 /*pos*/, my_key & Key)
00220 {
00221     long long unsigned r = myrand();
00222     for (int i = 0; i < KEY_SIZE; ++i)
00223     {
00224         Key.keybuf[i] = letters[r % 26];
00225         r >>= 5;
00226     }
00227 }
00228 #endif
00229 
00230 void run_bdb_btree(stxxl::int64 ops)
00231 {
00232     const char * filename = BDB_FILE;
00233 
00234     my_key key1_storage;
00235     my_data data1_storage;
00236 
00237     unlink(filename);
00238 
00239     memset(key1_storage.keybuf, 'a', KEY_SIZE);
00240     memset(data1_storage.databuf, 'b', DATA_SIZE);
00241 
00242 
00243     Db db(NULL, 0);             // Instantiate the Db object
00244 
00245     try {
00246         db.set_errfile(stderr);
00247         db.set_pagesize(pagesize);
00248         db.set_cachesize(0, cachesize, 1);
00249 
00250         // Open the database
00251         db.open(NULL,           // Transaction pointer
00252                 filename,       // Database file name
00253                 NULL,           // Optional logical database name
00254                 DB_BTREE,       // Database access method
00255                 DB_CREATE,      // Open flags
00256                 0);             // File mode (using defaults)
00257 
00258 
00259         // here we start with the tests
00260         Dbt key1(key1_storage.keybuf, KEY_SIZE);
00261         Dbt data1(data1_storage.databuf, DATA_SIZE);
00262 
00263         stxxl::timer Timer;
00264         stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00265         stxxl::int64 i;
00266         //comp_type cmp_;
00267 
00268         ran32State = 0xdeadbeef;
00269 
00270 
00271         DB_BTREE_STAT * dbstat;
00272 
00273         db.stat(NULL, &dbstat, 0);
00274         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
00275 
00276         db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
00277 
00278         Timer.start();
00279 
00280         for (i = 0; i < n_inserts; ++i)
00281         {
00282             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
00283             rand_key(i, key1_storage);
00284             db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
00285         }
00286 
00287         Timer.stop();
00288         db.stat(NULL, &dbstat, 0);
00289         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
00290         STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00291                   " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00292 
00293         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00294 
00295         /////////////////////////////////////////
00296         Timer.reset();
00297         Timer.start();
00298 
00299 
00300         Dbc * cursorp;
00301         db.cursor(NULL, &cursorp, 0);
00302 
00303         for (i = 0; i < n_locates; ++i)
00304         {
00305             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
00306             rand_key(i, key1_storage);
00307             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
00308             Dbt datax(data1_storage.databuf, DATA_SIZE);
00309 
00310             cursorp->get(&keyx, &datax, DB_SET_RANGE);
00311         }
00312 
00313         Timer.stop();
00314         STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00315                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00316 
00317         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00318 
00319         ////////////////////////////////////
00320         Timer.reset();
00321 
00322         Timer.start();
00323 
00324         stxxl::int64 n_scanned = 0;
00325 
00326         for (i = 0; i < n_range_queries; ++i)
00327         {
00328             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
00329             rand_key(i, key1_storage);
00330             my_key last_key = key1_storage;
00331             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
00332             rand_key(i, key1_storage);
00333             if (last_key < key1_storage)
00334                 std::swap(last_key, key1_storage);
00335 
00336 
00337             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
00338             Dbt datax(data1_storage.databuf, DATA_SIZE);
00339 
00340             if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
00341                 continue;
00342 
00343 
00344             while (*((my_key *)keyx.get_data()) <= last_key)
00345             {
00346                 ++n_scanned;
00347                 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
00348                     break;
00349             }
00350 
00351             if (n_scanned >= 10 * n_range_queries)
00352             {
00353                 ++i;
00354                 break;
00355             }
00356         }
00357 
00358         n_range_queries = i;
00359 
00360         Timer.stop();
00361         if (cursorp != NULL)
00362             cursorp->close();
00363 
00364 
00365         STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00366                   " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00367                   " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00368 
00369         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00370 
00371         //////////////////////////////////////
00372 
00373         ran32State = 0xdeadbeef;
00374         memset(key1_storage.keybuf, 'a', KEY_SIZE);
00375 
00376         Timer.reset();
00377         Timer.start();
00378 
00379         for (i = 0; i < n_deletes; ++i)
00380         {
00381             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
00382             rand_key(i, key1_storage);
00383             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
00384             db.del(NULL, &keyx, 0);
00385         }
00386 
00387         Timer.stop();
00388         db.stat(NULL, &dbstat, 0);
00389         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
00390         STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00391                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00392 
00393         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00394 
00395         db.close(0);
00396     }
00397     catch (DbException & e)
00398     {
00399         STXXL_ERRMSG("DbException happened");
00400     }
00401     catch (std::exception & e)
00402     {
00403         STXXL_ERRMSG("std::exception happened");
00404     }
00405 
00406     unlink(filename);
00407 }
00408 
00409 void run_stxxl_map(stxxl::int64 ops)
00410 {
00411     map_type Map(NODE_CACHE_SIZE, LEAF_CACHE_SIZE);
00412     Map.enable_prefetching();
00413     stxxl::stats * Stats = stxxl::stats::get_instance();
00414 
00415     std::pair<my_key, my_data> element;
00416 
00417     memset(element.first.keybuf, 'a', KEY_SIZE);
00418     memset(element.second.databuf, 'b', DATA_SIZE);
00419 
00420 
00421     stxxl::timer Timer;
00422     stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00423     stxxl::int64 i;
00424     //comp_type cmp_;
00425 
00426     ran32State = 0xdeadbeef;
00427 
00428     //stxxl::random_number32 myrand;
00429 
00430     STXXL_MSG("Records in map: " << Map.size());
00431 
00432     stxxl::stats_data stats_begin(*Stats);
00433     Timer.start();
00434 
00435     for (i = 0; i < n_inserts; ++i)
00436     {
00437         //element.first.keybuf[KEYPOS] = letters[VALUE];
00438         rand_key(i, element.first);
00439         Map.insert(element);
00440     }
00441 
00442     Timer.stop();
00443 
00444     STXXL_MSG("Records in map: " << Map.size());
00445     STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00446               " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00447 
00448     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
00449 
00450     ////////////////////////////////////////////////
00451 
00452     const map_type & CMap(Map);     // const map reference
00453 
00454     stats_begin = stxxl::stats_data(*Stats);
00455     Timer.reset();
00456     Timer.start();
00457 
00458     for (i = 0; i < n_locates; ++i)
00459     {
00460         //element.first.keybuf[KEYPOS] = letters[VALUE];
00461         rand_key(i, element.first);
00462         CMap.lower_bound(element.first);
00463     }
00464 
00465     Timer.stop();
00466     STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00467               " seconds : " << (double(n_locates) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00468 
00469     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
00470 
00471     ////////////////////////////////////
00472 
00473     stats_begin = stxxl::stats_data(*Stats);
00474     Timer.reset();
00475     Timer.start();
00476 
00477     stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
00478 
00479     for (i = 0; i < n_range_queries; ++i)
00480     {
00481         //element.first.keybuf[KEYPOS] = letters[VALUE];
00482         rand_key(i, element.first);
00483         my_key begin_key = element.first;
00484         map_type::const_iterator begin = CMap.lower_bound(element.first);
00485         //element.first.keybuf[KEYPOS] = letters[VALUE];
00486         rand_key(i, element.first);
00487         map_type::const_iterator beyond = CMap.lower_bound(element.first);
00488         if (element.first < begin_key)
00489             std::swap(begin, beyond);
00490 
00491         while (begin != beyond)
00492         {
00493             my_data tmp = begin->second;
00494             stxxl::STXXL_UNUSED(tmp);
00495             ++n_scanned;
00496             ++begin;
00497         }
00498         if (n_scanned >= 10 * n_range_queries)
00499         {
00500             ++i;
00501             break;
00502         }
00503     }
00504 
00505     n_range_queries = i;
00506 
00507     Timer.stop();
00508     STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00509               " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00510               " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00511 
00512     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
00513 
00514     //////////////////////////////////////
00515     ran32State = 0xdeadbeef;
00516     memset(element.first.keybuf, 'a', KEY_SIZE);
00517     memset(element.second.databuf, 'b', DATA_SIZE);
00518 
00519     stats_begin = stxxl::stats_data(*Stats);
00520     Timer.reset();
00521     Timer.start();
00522 
00523     for (i = 0; i < n_deletes; ++i)
00524     {
00525         //element.first.keybuf[KEYPOS] = letters[VALUE];
00526         rand_key(i, element.first);
00527         Map.erase(element.first);
00528     }
00529 
00530     Timer.stop();
00531     STXXL_MSG("Records in map: " << Map.size());
00532     STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00533               " seconds : " << (double(n_deletes) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00534 
00535     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
00536 }
00537 
00538 class rand_key_gen
00539 {
00540     stxxl::int64 counter;
00541     my_key & current;
00542     stxxl::random_number32 myrand;
00543     rand_key_gen();
00544 
00545 public:
00546     typedef my_key value_type;
00547 
00548     rand_key_gen(stxxl::int64 el, my_key & cur) :
00549         counter(el), current(cur)
00550     {
00551         //const stxxl::int64  & i = counter;
00552         //current.keybuf[KEYPOS] = letters[VALUE];
00553         rand_key(counter, current);
00554     }
00555     const my_key & operator * () { return current; }
00556     const my_key * operator -> () { return &current; }
00557 
00558     rand_key_gen & operator ++ ()
00559     {
00560         --counter;
00561         //const stxxl::int64  & i = counter;
00562         //current.keybuf[KEYPOS] = letters[VALUE];
00563         rand_key(counter, current);
00564         return *this;
00565     }
00566     bool empty() const { return counter == 0; }
00567 };
00568 
00569 template <class InputType>
00570 class key2pair
00571 {
00572     InputType & in;
00573     std::pair<my_key, my_data> current;
00574     key2pair();
00575 
00576 public:
00577     typedef std::pair<my_key, my_data> value_type;
00578 
00579     key2pair(InputType & in_) : in(in_)
00580     {
00581         if (!in.empty())
00582             current.first = *in;
00583     }
00584     const value_type & operator * () { return current; }
00585     const value_type * operator -> () { return &current; }
00586 
00587     key2pair & operator ++ ()
00588     {
00589         ++in;
00590         if (!empty())
00591             current.first = *in;
00592 
00593 
00594         return *this;
00595     }
00596     bool empty() const { return in.empty(); }
00597 };
00598 
00599 void run_stxxl_map_big(stxxl::int64 n, unsigned ops)
00600 {
00601     stxxl::stats * Stats = stxxl::stats::get_instance();
00602 
00603     std::pair<my_key, my_data> element;
00604 
00605     memset(element.first.keybuf, 'a', KEY_SIZE);
00606     memset(element.second.databuf, 'b', DATA_SIZE);
00607 
00608     stxxl::timer Timer;
00609     stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00610     stxxl::int64 i;
00611     //comp_type cmp_;
00612 
00613     ran32State = 0xdeadbeef;
00614 
00615     //stxxl::random_number32 myrand;
00616 
00617     Timer.start();
00618 
00619     vector_type SortedSeq(n);
00620     const vector_type & CSortedSeq(SortedSeq);
00621     {
00622         rand_key_gen Gen(n, element.first);
00623         typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
00624         sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
00625         typedef key2pair<sorter_type> key2pair_type;
00626         key2pair_type Key2Pair(Sorter);
00627         stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
00628     }
00629 
00630 
00631     Timer.stop();
00632 
00633     STXXL_MSG("Finished sorting input. Elapsed time: " <<
00634               (Timer.mseconds() / 1000.) << " seconds.");
00635 
00636     stxxl::stats_data stats_begin(*Stats);
00637     Timer.reset();
00638     Timer.start();
00639 
00640     // bulk construction
00641     map_type Map(CSortedSeq.begin(),
00642                  CSortedSeq.end(),
00643                  NODE_CACHE_SIZE, LEAF_CACHE_SIZE, true);
00644 
00645     Timer.stop();
00646 
00647 
00648     STXXL_MSG("Records in map: " << Map.size());
00649     STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
00650               " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00651 
00652     Map.print_statistics(cout);
00653     Map.reset_statistics();
00654     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
00655 
00656     ////////////////////////////////////////
00657 
00658     Map.disable_prefetching();
00659 
00660     stats_begin = stxxl::stats_data(*Stats);
00661     Timer.reset();
00662     Timer.start();
00663 
00664     for (i = 0; i < n_inserts; ++i)
00665     {
00666         //element.first.keybuf[KEYPOS] = letters[VALUE];
00667         rand_key(i, element.first);
00668         Map.insert(element);
00669     }
00670 
00671     Timer.stop();
00672 
00673     STXXL_MSG("Records in map: " << Map.size());
00674     STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00675               " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00676 
00677     Map.print_statistics(cout);
00678     Map.reset_statistics();
00679     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
00680 
00681     ////////////////////////////////////
00682 
00683     const map_type & CMap(Map);     // const map reference
00684 
00685     Timer.reset();
00686     Timer.start();
00687 
00688     for (i = 0; i < n_locates; ++i)
00689     {
00690         //element.first.keybuf[KEYPOS] = letters[VALUE];
00691         rand_key(i, element.first);
00692         CMap.lower_bound(element.first);
00693     }
00694 
00695     Timer.stop();
00696     STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00697               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00698 
00699     Map.print_statistics(cout);
00700     Map.reset_statistics();
00701     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
00702 
00703     ////////////////////////////////////
00704 
00705     Map.enable_prefetching();
00706 
00707     stats_begin = stxxl::stats_data(*Stats);
00708     Timer.reset();
00709     Timer.start();
00710 
00711     stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
00712 
00713     for (i = 0; i < n_range_queries; ++i)
00714     {
00715         //element.first.keybuf[KEYPOS] = letters[VALUE];
00716         rand_key(i, element.first);
00717         my_key begin_key = element.first;
00718         map_type::const_iterator begin = CMap.lower_bound(element.first);
00719         //element.first.keybuf[KEYPOS] = letters[VALUE];
00720         rand_key(i, element.first);
00721         map_type::const_iterator beyond = CMap.lower_bound(element.first);
00722         if (element.first < begin_key)
00723             std::swap(begin, beyond);
00724 
00725         /*
00726            STXXL_MSG("Looking     "<<element.first<<" scanned: "<<n_scanned);
00727 
00728            if(beyond==CMap.end())
00729            {
00730                 STXXL_MSG("Upper bound "<<"END");
00731            }
00732            else
00733            {
00734                 STXXL_MSG("Upper bound "<<((element.first>begin_key)?element.first:begin_key));
00735            }*/
00736 
00737         while (begin != beyond)
00738         {
00739             ++n_scanned;
00740             ++begin;
00741         }
00742 
00743         if (n_scanned >= SCAN_LIMIT(n))
00744         {
00745             ++i;
00746             break;
00747         }
00748     }
00749 
00750     n_range_queries = i;
00751 
00752     Timer.stop();
00753     STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00754               " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00755               " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00756 
00757     Map.print_statistics(cout);
00758     Map.reset_statistics();
00759     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
00760 
00761     //////////////////////////////////////
00762 
00763     ran32State = 0xdeadbeef;
00764     memset(element.first.keybuf, 'a', KEY_SIZE);
00765     memset(element.second.databuf, 'b', DATA_SIZE);
00766 
00767     Map.disable_prefetching();
00768 
00769     stats_begin = stxxl::stats_data(*Stats);
00770     Timer.reset();
00771     Timer.start();
00772 
00773     for (i = n_deletes; i > 0; --i)
00774     {
00775         //element.first.keybuf[KEYPOS] = letters[VALUE];
00776         rand_key(i, element.first);
00777         Map.erase(element.first);
00778     }
00779 
00780     Timer.stop();
00781     STXXL_MSG("Records in map: " << Map.size());
00782     STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00783               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00784 
00785     Map.print_statistics(cout);
00786     Map.reset_statistics();
00787     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
00788 }
00789 
00790 /////////////////////////////////////////////////////////////////////////
00791 
00792 typedef AMI_STREAM<el_t> stream_t;
00793 
00794 char dummy;
00795 
00796 class MyFilter {
00797 public:
00798     bool operator () (const el_t & v) const
00799     {
00800         dummy += v.key_.keybuf[0];         // touch element
00801         return true;
00802     }
00803 };
00804 
00805 
00806 void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
00807 {
00808     el_t element;
00809 
00810     memset(element.key_.keybuf, 'a', KEY_SIZE);
00811     memset(element.data_.databuf, 'b', DATA_SIZE);
00812 
00813     // Log debugging info from the application, but not from the library.
00814     tpie_log_init(TPIE_LOG_APP_DEBUG);
00815     MM_manager.set_memory_limit(TOTAL_CACHE_SIZE);
00816     MM_manager.enforce_memory_limit();
00817 
00818     stream_t * is = new stream_t;
00819 
00820     stxxl::timer Timer;
00821     stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00822     stxxl::int64 i;
00823     //comp_type cmp_;
00824 
00825     ran32State = 0xdeadbeef;
00826 
00827     //stxxl::random_number32 myrand;
00828 
00829     Timer.start();
00830 
00831 
00832     {
00833         rand_key_gen Gen(n, element.key_);
00834         typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
00835         sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
00836         //typedef key2pair<sorter_type> key2pair_type;
00837         //key2pair_type Key2Pair(Sorter);
00838         while (!Sorter.empty())
00839         {
00840             is->write_item(*Sorter);
00841             ++Sorter;
00842         }
00843     }
00844 
00845     Timer.stop();
00846 
00847 
00848     STXXL_MSG("Finished sorting input. Elapsed time: " <<
00849               (Timer.mseconds() / 1000.) << " seconds.");
00850 
00851 
00852     Timer.reset();
00853     Timer.start();
00854 
00855     // bulk construction
00856     u_btree_t * u_btree;
00857     AMI_btree_params params;
00858     params.node_block_factor = NODE_BLOCK_SIZE / 4096;
00859     params.leaf_block_factor = LEAF_BLOCK_SIZE / 4096;
00860     params.leaf_cache_size = LEAF_CACHE_SIZE / LEAF_BLOCK_SIZE;
00861     params.node_cache_size = NODE_CACHE_SIZE / NODE_BLOCK_SIZE;
00862 
00863     u_btree = new u_btree_t(params);
00864 
00865     using std::cout;
00866     using std::cerr;
00867 
00868     if (!u_btree->is_valid()) {
00869         cerr << "Error initializing btree. Aborting.\n";
00870         delete u_btree;
00871         exit(1);
00872     }
00873 
00874     if (u_btree->load_sorted(is, 1.0, 1.0) != AMI_ERROR_NO_ERROR)
00875         cerr << "Error during bulk loading.\n";
00876 
00877 
00878     Timer.stop();
00879 
00880     STXXL_MSG("Records in map: " << u_btree->size());
00881     STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
00882               " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00883 
00884 
00885     ////////////////////////////////////////
00886     Timer.reset();
00887 
00888 
00889     Timer.start();
00890 
00891     for (i = 0; i < n_inserts; ++i)
00892     {
00893         //element.first.keybuf[KEYPOS] = letters[VALUE];
00894         rand_key(i, element.key_);
00895         u_btree->insert(element);
00896     }
00897 
00898     Timer.stop();
00899 
00900     STXXL_MSG("Records in map: " << u_btree->size());
00901     STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00902               " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00903 
00904 
00905     ////////////////////////////////////////////////
00906     Timer.reset();
00907 
00908 
00909     Timer.start();
00910 
00911 
00912     el_t result;
00913     for (i = 0; i < n_locates; ++i)
00914     {
00915         //element.first.keybuf[KEYPOS] = letters[VALUE];
00916         rand_key(i, element.key_);
00917         u_btree->succ(element.key_, result);
00918     }
00919 
00920     Timer.stop();
00921     STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00922               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00923 
00924 
00925     ////////////////////////////////////
00926     Timer.reset();
00927 
00928 
00929     Timer.start();
00930 
00931     stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
00932     MyFilter filter;
00933 
00934     for (i = 0; i < n_range_queries; ++i)
00935     {
00936         rand_key(i, element.key_);
00937         my_key begin_key = element.key_;
00938         rand_key(i, element.key_);
00939         if (element.key_ < begin_key)
00940             n_scanned += u_btree->range_query(element.key_, begin_key, NULL, filter);
00941 
00942         else
00943             n_scanned += u_btree->range_query(begin_key, element.key_, NULL, filter);
00944 
00945 
00946         if (n_scanned >= SCAN_LIMIT(n))
00947         {
00948             ++i;
00949             break;
00950         }
00951     }
00952 
00953     n_range_queries = i;
00954 
00955     Timer.stop();
00956     STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00957               " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00958               " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00959 
00960 
00961     //////////////////////////////////////
00962     ran32State = 0xdeadbeef;
00963     memset(element.key_.keybuf, 'a', KEY_SIZE);
00964     memset(element.data_.databuf, 'b', DATA_SIZE);
00965 
00966     Timer.reset();
00967 
00968     Timer.start();
00969 
00970     for (i = n_deletes; i > 0; --i)
00971     {
00972         //element.first.keybuf[KEYPOS] = letters[VALUE];
00973         rand_key(i, element.key_);
00974         u_btree->erase(element.key_);
00975     }
00976 
00977     Timer.stop();
00978     STXXL_MSG("Records in map: " << u_btree->size());
00979     STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00980               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00981     delete u_btree;
00982     delete is;
00983 }
00984 
00985 void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
00986 {
00987     const char * filename = BDB_FILE;
00988 
00989     my_key key1_storage;
00990     my_data data1_storage;
00991 
00992 #ifdef BDB_BULK_SCAN
00993     int * bulk_buffer = new int[logbufsize / sizeof(int)];
00994 #endif
00995 
00996     unlink(filename);
00997 
00998     memset(key1_storage.keybuf, 'a', KEY_SIZE);
00999     memset(data1_storage.databuf, 'b', DATA_SIZE);
01000 
01001 
01002     Db db(NULL, 0);                   // Instantiate the Db object
01003 
01004     try {
01005         // here we start with the tests
01006         Dbt key1(key1_storage.keybuf, KEY_SIZE);
01007         Dbt data1(data1_storage.databuf, DATA_SIZE);
01008 
01009         stxxl::timer Timer;
01010         stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
01011         stxxl::int64 i;
01012         //comp_type cmp_;
01013 
01014         ran32State = 0xdeadbeef;
01015 
01016         Timer.start();
01017 
01018         vector_type SortedSeq(n);
01019         //const vector_type & CSortedSeq(SortedSeq);
01020         {
01021             rand_key_gen Gen(n, key1_storage);
01022             typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
01023             sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
01024             typedef key2pair<sorter_type> key2pair_type;
01025             key2pair_type Key2Pair(Sorter);
01026             stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
01027         }
01028 
01029         Timer.stop();
01030 
01031         STXXL_MSG("Finished sorting input. Elapsed time: " << (Timer.mseconds() / 1000.) << " seconds.");
01032 
01033         db.set_errfile(stderr);
01034         db.set_pagesize(pagesize);
01035         db.set_cachesize(0, cachesize, 10);
01036 
01037         STXXL_MSG("BDB cache size set.");
01038 
01039         // Open the database
01040         db.open(NULL,           // Transaction pointer
01041                 filename,       // Database file name
01042                 NULL,           // Optional logical database name
01043                 DB_BTREE,       // Database access method
01044                 DB_CREATE,      // Open flags
01045                 0);             // File mode (using defaults)
01046 
01047         db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
01048 
01049         Timer.reset();
01050         Timer.start();
01051 
01052         // DBD does not have bulk construction
01053         // however inserting in sorted order might help
01054         // to improve performance
01055         vector_type::const_iterator cit = SortedSeq.begin();
01056         for (i = 0; i < n; ++i, ++cit)
01057         {
01058             key1_storage = cit->first;
01059             db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
01060         }
01061 
01062         Timer.stop();
01063 
01064         DB_BTREE_STAT * dbstat;
01065         db.stat(NULL, &dbstat, 0);
01066         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
01067         STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
01068                   " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01069 
01070         db.stat_print(0);
01071         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01072         ////////////////////////////////////////
01073 
01074 
01075         Timer.reset();
01076         Timer.start();
01077 
01078         for (i = 0; i < n_inserts; ++i)
01079         {
01080             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
01081             rand_key(i, key1_storage);
01082             db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
01083         }
01084 
01085         Timer.stop();
01086         db.stat(NULL, &dbstat, 0);
01087         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
01088         STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
01089                   " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01090 
01091         db.stat_print(0);
01092         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01093 
01094         /////////////////////////////////////////
01095         Timer.reset();
01096         Timer.start();
01097 
01098 
01099         Dbc * cursorp;
01100         db.cursor(NULL, &cursorp, 0);
01101 
01102         for (i = 0; i < n_locates; ++i)
01103         {
01104             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
01105             rand_key(i, key1_storage);
01106             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
01107             Dbt datax(data1_storage.databuf, DATA_SIZE);
01108 
01109             cursorp->get(&keyx, &datax, DB_SET_RANGE);
01110         }
01111 
01112         Timer.stop();
01113         STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
01114                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01115 
01116         db.stat_print(0);
01117         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01118 
01119         ////////////////////////////////////
01120         Timer.reset();
01121 
01122         Timer.start();
01123 
01124         stxxl::int64 n_scanned = 0;
01125 
01126         for (i = 0; i < n_range_queries; ++i)
01127         {
01128             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
01129             rand_key(i, key1_storage);
01130             my_key last_key = key1_storage;
01131             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
01132             rand_key(i, key1_storage);
01133             if (last_key < key1_storage)
01134                 std::swap(last_key, key1_storage);
01135 
01136 
01137             //STXXL_MSG("Looking     "<<key1_storage<<" scanned: "<<n_scanned);
01138             //STXXL_MSG("Upper bound "<<last_key);
01139 
01140             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
01141 #ifdef BDB_BULK_SCAN
01142             Dbt datax(bulk_buffer, DATA_SIZE);
01143             datax.set_ulen(logbufsize);
01144             datax.set_flags(DB_DBT_USERMEM);
01145 #else
01146             Dbt datax(data1_storage.databuf, DATA_SIZE);
01147 #endif
01148 
01149 
01150 #ifdef BDB_BULK_SCAN
01151             if (cursorp->get(&keyx, &datax, DB_SET_RANGE | DB_MULTIPLE_KEY) == DB_NOTFOUND)
01152                 continue;
01153 
01154 
01155             do
01156             {
01157                 DbMultipleKeyDataIterator BulkIterator(datax);
01158                 Dbt key1, data1;
01159                 while (BulkIterator.next(key1, data1) &&
01160                        *((my_key *)key1.get_data()) <= last_key)
01161                 {
01162                     ++n_scanned;
01163                     //STXXL_MSG("Result      "<<*((my_key *)key1.get_data()));
01164                 }
01165                 if (cursorp->get(&keyx, &datax, DB_NEXT | DB_MULTIPLE_KEY) == DB_NOTFOUND)
01166                     break;
01167 
01168 
01169                 if (*((my_key *)keyx.get_data()) > last_key)
01170                 {
01171                     break;
01172                 }
01173             } while (1);
01174 
01175 #else
01176             if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
01177                 continue;
01178 
01179             while (*((my_key *)keyx.get_data()) <= last_key)
01180             {
01181                 ++n_scanned;
01182                 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
01183                     break;
01184             }
01185 #endif
01186 
01187 
01188             if (n_scanned >= SCAN_LIMIT(n))
01189             {
01190                 ++i;
01191                 break;
01192             }
01193         }
01194 
01195         n_range_queries = i;
01196 
01197         Timer.stop();
01198         if (cursorp != NULL)
01199             cursorp->close();
01200 
01201 
01202         STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
01203                   " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
01204                   " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
01205 
01206         db.stat_print(0);
01207         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01208 
01209         //////////////////////////////////////
01210 
01211         ran32State = 0xdeadbeef;
01212         memset(key1_storage.keybuf, 'a', KEY_SIZE);
01213 
01214         Timer.reset();
01215         Timer.start();
01216 
01217         for (i = 0; i < n_deletes; ++i)
01218         {
01219             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
01220             rand_key(i, key1_storage);
01221             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
01222             db.del(NULL, &keyx, 0);
01223         }
01224 
01225         Timer.stop();
01226         db.stat(NULL, &dbstat, 0);
01227         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
01228         STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
01229                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01230 
01231         db.stat_print(0);
01232         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01233 
01234         db.close(0);
01235     }
01236     catch (DbException & e)
01237     {
01238         STXXL_ERRMSG("DbException happened");
01239     }
01240     catch (std::exception & e)
01241     {
01242         STXXL_ERRMSG("std::exception happened");
01243     }
01244 
01245     unlink(filename);
01246 
01247 #ifdef BDB_BULK_SCAN
01248     delete[]  bulk_buffer;
01249 #endif
01250 }
01251 
01252 
01253 int main(int argc, char * argv[])
01254 {
01255     STXXL_MSG("stxxl::map Real Node block size: " << REAL_NODE_BLOCK_SIZE << " bytes");
01256     STXXL_MSG("stxxl::map Real Leaf block size: " << REAL_LEAF_BLOCK_SIZE << " bytes");
01257     STXXL_MSG("stxxl::map Node max elements   : " << REAL_NODE_MELEMENTS);
01258     STXXL_MSG("stxxl::map Leaf max elements   : " << REAL_LEAF_MELEMENTS);
01259 #ifdef STXXL_DIRECT_IO_OFF
01260     STXXL_MSG("STXXL_DIRECT_IO_OFF is defined");
01261 #else
01262     STXXL_MSG("STXXL_DIRECT_IO_OFF is NOT defined");
01263 #endif
01264 
01265     if (argc < 3)
01266     {
01267         STXXL_MSG("Usage: " << argv[0] << " version #ops");
01268         STXXL_MSG("\t version = 1: test stxxl map");
01269         STXXL_MSG("\t version = 2: test Berkeley DB btree");
01270         STXXL_MSG("\t version = 3: big test stxxl map");
01271         STXXL_MSG("\t version = 4: big test Berkeley DB btree");
01272         STXXL_MSG("\t version = 5: big test TPIE btree");
01273         return -1;
01274     }
01275 
01276     init();
01277 
01278     int version = atoi(argv[1]);
01279     stxxl::int64 ops = stxxl::atoint64(argv[2]);
01280 
01281     STXXL_MSG("Running version      : " << version);
01282     STXXL_MSG("Operations to perform: " << ops);
01283     STXXL_MSG("Btree cache size     : " << TOTAL_CACHE_SIZE << " bytes");
01284     STXXL_MSG("Leaf block size      : " << LEAF_BLOCK_SIZE << " bytes");
01285 
01286 
01287     switch (version)
01288     {
01289     case 1:
01290         run_stxxl_map(ops);
01291         break;
01292     case 2:
01293         run_bdb_btree(ops);
01294         break;
01295     case 3:
01296         run_stxxl_map_big(ops, 100000);
01297         break;
01298     case 4:
01299         run_bdb_btree_big(ops, 100000);
01300         break;
01301     case 5:
01302         run_tpie_btree_big(ops, 100000);
01303         break;
01304     default:
01305         STXXL_MSG("Unsupported version " << version);
01306     }
01307 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines