| 
       1                 : // -*- mode: c++; fill-column: 79 -*-
       2                 : // $Id: stx-cbtreedb.h 7 2010-04-14 11:24:20Z tb $
       3                 : 
       4                 : /** \file stx-cbtreedb.h
       5                 :  * Contains all classes of the constant B-tree database implementation.
       6                 :  */
       7                 : 
       8                 : /*
       9                 :  * STX Constant B-Tree Database Template Classes v0.7.0
      10                 :  * Copyright (C) 2010 Timo Bingmann
      11                 :  *
      12                 :  * This library is free software; you can redistribute it and/or modify it
      13                 :  * under the terms of the GNU Lesser General Public License as published by the
      14                 :  * Free Software Foundation; either version 2.1 of the License, or (at your
      15                 :  * option) any later version.
      16                 :  *
      17                 :  * This library is distributed in the hope that it will be useful, but WITHOUT
      18                 :  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
      19                 :  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
      20                 :  * for more details.
      21                 :  *
      22                 :  * You should have received a copy of the GNU Lesser General Public License
      23                 :  * along with this library; if not, write to the Free Software Foundation,
      24                 :  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      25                 :  */
      26                 : 
      27                 : #ifndef _STX_CBTREEDB_H_
      28                 : #define _STX_CBTREEDB_H_
      29                 : 
      30                 : #include <string.h>
      31                 : #include <stdint.h>
      32                 : 
      33                 : #include <stdexcept>
      34                 : #include <vector>
      35                 : #include <map>
      36                 : #include <iostream>
      37                 : 
      38                 : /// STX - Some Template Extensions namespace
      39                 : namespace stx {
      40                 : 
      41                 : // *** Compile-time assertion macros borrowed from Boost
      42                 : 
      43                 : #ifndef STX_STATIC_ASSERT
      44                 : 
      45                 : template <bool> struct STATIC_ASSERTION_FAILURE;
      46                 : template <> struct STATIC_ASSERTION_FAILURE<true> { enum { value = 1 }; };
      47                 : template <int x> struct static_assert_test {};
      48                 : 
      49                 : #define STX_MACRO_JOIN(X,Y) STX_MACRO_DO_JOIN(X,Y)
      50                 : #define STX_MACRO_DO_JOIN(X,Y) STX_MACRO_DO_JOIN2(X,Y)
      51                 : #define STX_MACRO_DO_JOIN2(X,Y) X##Y
      52                 : 
      53                 : #define STX_STATIC_ASSERT(A) \
      54                 :     typedef static_assert_test<sizeof(STATIC_ASSERTION_FAILURE< static_cast<bool>(A) >)> \
      55                 :         STX_MACRO_JOIN(static_assert_typedef_, __LINE__)
      56                 : 
      57                 : #endif
      58                 : 
      59                 : /**
      60                 :  * @brief Template super-class enclosing all classes which can operate on a
      61                 :  * constant B-tree database file.
      62                 :  *
      63                 :  * By parameterizing this class an instance of all sub-classes is created. The
      64                 :  * parameters described important database parameters and database files should
      65                 :  * be read and written using the _same_ class parameters!
      66                 :  *
      67                 :  * The first template parameter is the key type, which must be an integral,
      68                 :  * fixed-length type like uint32_t or a struct.
      69                 :  * 
      70                 :  * The second template parameter is the order functional used to sort the keys,
      71                 :  * it must form a total order relation over the keys.
      72                 :  * 
      73                 :  * The size of B-tree pages containing nodes are defined by the third
      74                 :  * parameter. The number of key slots in each node is calculated from this
      75                 :  * number and sizeof(_Key). There are some obvious constraints on the
      76                 :  * relationship of page size and key size which are checked by compile-time
      77                 :  * assertions.
      78                 :  *
      79                 :  * The fourth template parameter is an uint32_t version number stored in the
      80                 :  * signature page of the database. It can be used by an application to mark its
      81                 :  * own database. When opening databases this parameter must match the
      82                 :  * signature.
      83                 :  *
      84                 :  * For more information see http://idlebox.net/2010/stx-cbtreedb/
      85                 :  */
      86                 : template < typename _Key = uint32_t,
      87                 :            typename _Compare = std::less<_Key>,
      88                 :            unsigned int _BTreePageSize = 1024,
      89                 :            uint32_t _AppVersionId = 0>
      90                 : class CBTreeDB
      91                 : {
      92                 : public:
      93                 :     // *** Template Parameters
      94                 : 
      95                 :     /// first template parameter: the key type of the B-tree. This must be an
      96                 :     /// integral, fixed-length type as it is used in arrays in the tree nodes.
      97                 :     typedef _Key        key_type;
      98                 : 
      99                 :     /// second template parameter: key comparison function object type.
     100                 :     typedef _Compare    key_compare;
     101                 : 
     102                 :     /// third template parameter: B-tree page size. Usually 1024 or 2048.
     103                 :     static const unsigned int BTreePageSize = _BTreePageSize;
     104                 : 
     105                 :     /// fourth template parameter: application-defined 32-bit identifier to
     106                 :     /// mark database format or version.
     107                 :     static const uint32_t AppVersionId = _AppVersionId;
     108                 : 
     109                 :     // *** Structure parameters calculated from page size
     110                 : 
     111                 :     /// size of fields before key+offset array in LeafNode and additional
     112                 :     /// offset at end.
     113                 :     static const unsigned int LeafNodeHead = 2 * sizeof(uint16_t) + sizeof(uint64_t) + sizeof(uint32_t);
     114                 : 
     115                 :     /// number of key+offsets in each LeafNode
     116                 :     static const unsigned int LeafNodeNumKeys = (BTreePageSize - LeafNodeHead) / (sizeof(key_type) + sizeof(uint32_t));
     117                 : 
     118                 :     STX_STATIC_ASSERT( LeafNodeNumKeys >= 1 );
     119                 : 
     120                 :     /// number of unused fill bytes in each LeafNode
     121                 :     static const unsigned int LeafNodeFiller = BTreePageSize - LeafNodeHead - LeafNodeNumKeys * (sizeof(key_type) + sizeof(uint32_t));
     122                 : 
     123                 :     /// size of fields before key array in InnerNode
     124                 :     static const unsigned int InnerNodeHead = 2 * sizeof(uint16_t) + sizeof(uint32_t);
     125                 : 
     126                 :     /// number of keys in each InnerNode
     127                 :     static const unsigned int InnerNodeNumKeys = (BTreePageSize - InnerNodeHead) / sizeof(key_type);
     128                 : 
     129                 :     STX_STATIC_ASSERT( InnerNodeNumKeys >= 2 );
     130                 : 
     131                 :     /// number of unused fill bytes in each InnerNode
     132                 :     static const unsigned int InnerNodeFiller = BTreePageSize - InnerNodeHead - InnerNodeNumKeys * sizeof(key_type);
     133                 : 
     134                 : public:
     135                 :     /**
     136                 :      * Our own exception class derived from std::runtime_error.
     137                 :      */
     138                 :     class Exception : public std::runtime_error
     139               0 :     {
     140                 :     public:
     141                 :         /// Create new exception object with error message.
     142               0 :         explicit Exception(const std::string& str)
     143               0 :             : std::runtime_error("CBTreeDB: " + str)
     144                 :         {
     145               0 :         }
     146                 :     };
     147                 : 
     148                 :     /// Instead of assert() we use this macro to throw exceptions. These could
     149                 :     /// be disabled for production releases.
     150                 : #define CBTREEDB_ASSERT(x) do { if (!(x)) throw(Exception("Assertion failed: " #x)); } while(0)
     151                 : 
     152                 :     /// Short macro to throw exceptions if a program logic expression
     153                 :     /// fails. These must not be disabled in production releases as they may
     154                 :     /// depend on user input.
     155                 : #define CBTREEDB_CHECK(x,msg) do { if (!(x)) throw(Exception(msg)); } while(0)
     156                 : 
     157                 : public:
     158                 :     /**
     159                 :      * @brief Signature page which heads all cbtreedb files.
     160                 :      *
     161                 :      * It contains a signature and many important fields to correctly access
     162                 :      * the database file. Due to disk page alignment reasons, the signature
     163                 :      * block is stored with a full B-tree page size.
     164                 :      */
     165                 :     struct SignaturePage
     166                 :     {
     167                 :         char            signature[8];   ///< "cbtreedb" or custom string
     168                 :         uint32_t        header_crc32;   ///< CRC32 of following bytes
     169                 :         uint32_t        version;        ///< 0x00010000
     170                 :         uint32_t        app_version_id; ///< custom id defined by template
     171                 : 
     172                 :         uint32_t        items;          ///< key-value pairs in db
     173                 :         uint32_t        key_size;       ///< sizeof(key_type)
     174                 : 
     175                 :         uint64_t        btree_offset;   ///< b-tree offset in file
     176                 :         uint64_t        btree_size;     ///< b-tree total size in bytes
     177                 :         uint64_t        btree_firstleaf; ///< offset of first leaf in file
     178                 :         uint32_t        btree_pagesize; ///< size of b-tree nodes
     179                 :         uint32_t        btree_levels;   ///< number of levels in tree
     180                 :         uint32_t        btree_leaves;   ///< number of leaf nodes in tree
     181                 :         uint8_t         btree_sha256[32]; ///< SHA256 digest of all tree nodes
     182                 : 
     183                 :         uint64_t        value_offset;   ///< file offset of value data area
     184                 :         uint64_t        value_size;     ///< total size of value data area
     185                 :         uint8_t         value_sha256[32]; ///< SHA256 digest of all value data
     186                 :     }
     187                 :         __attribute__((packed));
     188                 : 
     189                 :     /// Fixed signature page size: always equal to the B-tree page.
     190                 :     static const unsigned int SignaturePageSize = BTreePageSize;
     191                 : 
     192                 :     STX_STATIC_ASSERT( sizeof(SignaturePage) <= BTreePageSize );
     193                 : 
     194                 : protected:
     195                 :     /**
     196                 :      * @brief Leaf node structure of the B-tree leaf pages.
     197                 :      *
     198                 :      * Each leaf node contains a key array and an array of relative value
     199                 :      * offsets. It does not contain the size of the value elements, because
     200                 :      * these can be computed from two successive relative offsets. This works
     201                 :      * for the last offset only because of the extra offset of the successor
     202                 :      * value item in the next leaf. The value offsets are relative to a
     203                 :      * starting 64-bit offset, because all leaf's data items are stored in
     204                 :      * order.
     205                 :      */
     206                 :     struct LeafNode
     207                 :     {
     208                 :         /// level of this leaf node -> always 0.
     209                 :         uint16_t        level;
     210                 : 
     211                 :         /// number of used slots in the arrays.
     212                 :         uint16_t        slots;
     213                 : 
     214                 :         /// base of value offsets enumerated in array.
     215                 :         uint64_t        baseoffset;
     216                 : 
     217                 :         union
     218                 :         {
     219                 :             struct
     220                 :             {
     221                 :                 /// key array of ascending key in this leaf.
     222                 :                 key_type        keys[LeafNodeNumKeys];
     223                 : 
     224                 :                 /// file offset of value data associated with key.
     225                 :                 uint32_t        offsets[LeafNodeNumKeys+1];
     226                 :             }
     227                 :                 __attribute__((packed));
     228                 : 
     229                 :             /// union with filler char array to assure page size.
     230                 :             char        filler[BTreePageSize - LeafNodeHead + sizeof(uint32_t)];
     231                 :         };
     232                 : 
     233                 :         /// Initializes structure with zero.
     234            9438 :         inline explicit LeafNode()
     235            9438 :             : level(0), slots(0), baseoffset(0)
     236                 :         {
     237            9438 :             memset(filler, 0, sizeof(filler));
     238            9438 :         }
     239                 : 
     240                 :         /// Returns true if no more keys can be added.
     241         1186941 :         inline bool IsFull() const
     242                 :         {
     243         1186941 :             return (slots >= LeafNodeNumKeys);
     244                 :         }
     245                 : 
     246                 :         /// Returns the currently last key in the node
     247        10447433 :         inline const key_type& LastKey() const
     248                 :         {
     249        10447433 :             CBTREEDB_ASSERT(slots > 0 && slots < LeafNodeNumKeys+1);
     250        10447433 :             return keys[slots-1];
     251                 :         }
     252                 :     }
     253                 :         __attribute__((packed));
     254                 : 
     255                 :     STX_STATIC_ASSERT( sizeof(LeafNode) == BTreePageSize );
     256                 : 
     257                 :     /**
     258                 :      * @brief Inner node structure of the B-tree inner pages.
     259                 :      *
     260                 :      * Each inner node has n+1 children nodes, where n is the number of keys in
     261                 :      * the node. The n+1 children nodes are stored consecutively starting at
     262                 :      * childrenoffset.
     263                 :      */
     264                 :     struct InnerNode
     265                 :     {
     266                 :         /// level of this inner node, always > 0.
     267                 :         uint16_t        level;
     268                 : 
     269                 :         /// number of used slots in the arrays.
     270                 :         uint16_t        slots;
     271                 : 
     272                 :         /// base offset of child B-tree nodes enumerated by keys.
     273                 :         uint32_t        childrenoffset;
     274                 : 
     275                 :         union
     276                 :         {
     277                 :             /// key array of ascending keys in this inner node.
     278                 :             key_type    keys[InnerNodeNumKeys];
     279                 : 
     280                 :             /// union with filler char array to assure page size.
     281                 :             char        filler[BTreePageSize - InnerNodeHead];
     282                 :         };
     283                 : 
     284                 :         /// Initializes structure with zero.
     285              77 :         inline explicit InnerNode(uint16_t level_)
     286              77 :             : level(level_), slots(0), childrenoffset(0)
     287                 :         {
     288              77 :             memset(filler, 0, sizeof(filler));
     289              77 :         }
     290                 : 
     291                 :         /// Returns true if no more keys can be added.
     292            9400 :         inline bool IsFull() const
     293                 :         {
     294            9400 :             return (slots >= InnerNodeNumKeys);
     295                 :         }
     296                 : 
     297                 :         /// Returns the currently last key in the node
     298         7977574 :         inline const key_type& LastKey() const
     299                 :         {
     300         7977574 :             CBTREEDB_ASSERT(slots > 0 && slots < InnerNodeNumKeys+1);
     301         7977574 :             return keys[slots-1];
     302                 :         }
     303                 :     }
     304                 :         __attribute__((packed));
     305                 : 
     306                 :     STX_STATIC_ASSERT( sizeof(InnerNode) == BTreePageSize );
     307                 : 
     308                 : protected:
     309                 :     /**
     310                 :      * @brief CRC32 Cyclic redundancy check implementation class.
     311                 :      *
     312                 :      * Copied from the Botan-1.6.4 cryptography library.
     313                 :      */
     314                 :     class CRC32
     315                 :     {
     316                 :     private:
     317                 :         /// CRC intermediate value
     318                 :         uint32_t        m_crc;
     319                 : 
     320                 :     public:
     321                 :         /// Initialize new CRC object
     322             146 :         CRC32()
     323                 :         {
     324             146 :             clear();
     325             146 :         }
     326                 : 
     327                 :         /// Clear current CRC object
     328             146 :         void clear()
     329                 :         {
     330             146 :             m_crc = 0xFFFFFFFF;
     331             146 :         }
     332                 : 
     333                 :         /// Update this CRC value with new data.
     334             146 :         CRC32& update(const unsigned char* input, uint32_t length)
     335                 :         {
     336                 :             static const uint32_t table[256] = {
     337                 :                 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419,
     338                 :                 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4,
     339                 :                 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07,
     340                 :                 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
     341                 :                 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856,
     342                 :                 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9,
     343                 :                 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4,
     344                 :                 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
     345                 :                 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3,
     346                 :                 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC, 0x51DE003A,
     347                 :                 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599,
     348                 :                 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
     349                 :                 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190,
     350                 :                 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F,
     351                 :                 0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E,
     352                 :                 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
     353                 :                 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED,
     354                 :                 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950,
     355                 :                 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3,
     356                 :                 0xFBD44C65, 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
     357                 :                 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A,
     358                 :                 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5,
     359                 :                 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010,
     360                 :                 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
     361                 :                 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17,
     362                 :                 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6,
     363                 :                 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615,
     364                 :                 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
     365                 :                 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344,
     366                 :                 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB,
     367                 :                 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A,
     368                 :                 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
     369                 :                 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1,
     370                 :                 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 0xD80D2BDA, 0xAF0A1B4C,
     371                 :                 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF,
     372                 :                 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
     373                 :                 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE,
     374                 :                 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31,
     375                 :                 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C,
     376                 :                 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
     377                 :                 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B,
     378                 :                 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
     379                 :                 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1,
     380                 :                 0x18B74777, 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
     381                 :                 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278,
     382                 :                 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7,
     383                 :                 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66,
     384                 :                 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
     385                 :                 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605,
     386                 :                 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8,
     387                 :                 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B,
     388                 :                 0x2D02EF8D };
     389                 : 
     390             146 :             register uint32_t tmp = m_crc;
     391                 : 
     392            1432 :             while(length >= 16)
     393                 :             {
     394            1140 :                 tmp = table[(tmp ^ input[ 0]) & 0xFF] ^ (tmp >> 8);
     395            1140 :                 tmp = table[(tmp ^ input[ 1]) & 0xFF] ^ (tmp >> 8);
     396            1140 :                 tmp = table[(tmp ^ input[ 2]) & 0xFF] ^ (tmp >> 8);
     397            1140 :                 tmp = table[(tmp ^ input[ 3]) & 0xFF] ^ (tmp >> 8);
     398            1140 :                 tmp = table[(tmp ^ input[ 4]) & 0xFF] ^ (tmp >> 8);
     399            1140 :                 tmp = table[(tmp ^ input[ 5]) & 0xFF] ^ (tmp >> 8);
     400            1140 :                 tmp = table[(tmp ^ input[ 6]) & 0xFF] ^ (tmp >> 8);
     401            1140 :                 tmp = table[(tmp ^ input[ 7]) & 0xFF] ^ (tmp >> 8);
     402            1140 :                 tmp = table[(tmp ^ input[ 8]) & 0xFF] ^ (tmp >> 8);
     403            1140 :                 tmp = table[(tmp ^ input[ 9]) & 0xFF] ^ (tmp >> 8);
     404            1140 :                 tmp = table[(tmp ^ input[10]) & 0xFF] ^ (tmp >> 8);
     405            1140 :                 tmp = table[(tmp ^ input[11]) & 0xFF] ^ (tmp >> 8);
     406            1140 :                 tmp = table[(tmp ^ input[12]) & 0xFF] ^ (tmp >> 8);
     407            1140 :                 tmp = table[(tmp ^ input[13]) & 0xFF] ^ (tmp >> 8);
     408            1140 :                 tmp = table[(tmp ^ input[14]) & 0xFF] ^ (tmp >> 8);
     409            1140 :                 tmp = table[(tmp ^ input[15]) & 0xFF] ^ (tmp >> 8);
     410            1140 :                 input += 16;
     411            1140 :                 length -= 16;
     412                 :             }
     413                 : 
     414             175 :             for(uint32_t j = 0; j != length; ++j)
     415              29 :                 tmp = table[(tmp ^ input[j]) & 0xFF] ^ (tmp >> 8);
     416                 : 
     417             146 :             m_crc = tmp;
     418                 : 
     419             146 :             return *this;
     420                 :         }
     421                 : 
     422                 :         /// Update this CRC value with new data.
     423             146 :         CRC32& update(const void* input, uint32_t length)
     424                 :         {
     425             146 :             return update(reinterpret_cast<const unsigned char*>(input), length);
     426                 :         }
     427                 : 
     428                 :         /// Return final CRC value of this object.
     429             146 :         uint32_t final() const
     430                 :         {
     431             146 :             return (m_crc ^ 0xFFFFFFFF);
     432                 :         }
     433                 : 
     434                 :         /// Calculate CRC32 digest of bytes in the given range.
     435             146 :         static uint32_t digest(const void* input, uint32_t length)
     436                 :         {
     437             146 :             return CRC32().update(input, length).final();
     438                 :         }
     439                 : 
     440                 :         /// Calculate CRC32 digest of a string.
     441               4 :         static uint32_t digest(const std::string& input)
     442                 :         {
     443               4 :             return digest(input.data(), input.size());
     444                 :         }
     445                 :     };
     446                 : 
     447                 : protected:
     448                 :     /**
     449                 :      * @brief SHA-256 Message Digest implementation class.
     450                 :      *
     451                 :      * Copied from the Botan-1.6.4 cryptography library.
     452                 :      */
     453                 :     class SHA256
     454                 :     {
     455                 :     private:
     456                 :         /// local typedef from Botan library
     457                 :         typedef uint8_t byte;
     458                 : 
     459                 :         /// local typedef from Botan library
     460                 :         typedef uint32_t u32bit;
     461                 : 
     462                 :         /// local typedef from Botan library
     463                 :         typedef uint64_t u64bit;
     464                 : 
     465                 :         /// length of the resulting digest
     466                 :         static const u32bit OUTPUT_LENGTH = 32;
     467                 : 
     468                 :         /// block of bytes to process in hash function
     469                 :         static const u32bit HASH_BLOCK_SIZE = 64;
     470                 : 
     471                 :         /// length of size suffix hashed during finalization
     472                 :         static const u32bit COUNT_SIZE = 8;
     473                 : 
     474                 :     private:
     475                 :         u32bit          W[64];
     476                 :         u32bit          digest[8];
     477                 : 
     478                 :         byte            buffer[HASH_BLOCK_SIZE];
     479                 : 
     480                 :         u64bit          count;
     481                 :         u32bit          position;
     482                 : 
     483                 :     private:
     484                 :         /// Rotation Functions
     485                 :         template<typename T> inline T rotate_left(T input, u32bit rot)
     486                 :         { return static_cast<T>((input << rot) | (input >> (8*sizeof(T)-rot))); }
     487                 : 
     488                 :         /// Rotation Functions
     489       387081216 :         template<typename T> inline T rotate_right(T input, u32bit rot)
     490       387081216 :         { return static_cast<T>((input >> rot) | (input << (8*sizeof(T)-rot))); }
     491                 : 
     492                 :         /// Byte Extraction Function
     493           16240 :         template<typename T> inline byte get_byte(u32bit byte_num, T input)
     494           16240 :         { return static_cast<byte>(input >> ((sizeof(T)-1-(byte_num&(sizeof(T)-1))) << 3)); }
     495                 : 
     496                 :         /// Byte to Word Conversions
     497        10752256 :         inline u32bit make_u32bit(byte input0, byte input1, byte input2, byte input3)
     498                 :         { return static_cast<u32bit>((static_cast<u32bit>(input0) << 24) |
     499                 :                                      (static_cast<u32bit>(input1) << 16) |
     500        10752256 :                                      (static_cast<u32bit>(input2) <<  8) | input3); }
     501                 : 
     502                 :         /// SHA-256 Rho Function
     503        86018048 :         inline u32bit rho(u32bit X, u32bit rot1, u32bit rot2, u32bit rot3)
     504                 :         {
     505                 :             return (rotate_right(X, rot1) ^ rotate_right(X, rot2) ^
     506        86018048 :                     rotate_right(X, rot3));
     507                 :         }
     508                 : 
     509                 :         /// SHA-256 Sigma Function
     510        64513536 :         inline u32bit sigma(u32bit X, u32bit rot1, u32bit rot2, u32bit shift)
     511                 :         {
     512        64513536 :             return (rotate_right(X, rot1) ^ rotate_right(X, rot2) ^ (X >> shift));
     513                 :         }
     514                 : 
     515                 :         /// SHA-256 F1 Function
     516                 :         inline void F1(u32bit A, u32bit B, u32bit C, u32bit& D,
     517                 :                        u32bit E, u32bit F, u32bit G, u32bit& H,
     518        43009024 :                        u32bit msg, u32bit magic)
     519                 :         {
     520        43009024 :             magic += rho(E, 6, 11, 25) + ((E & F) ^ (~E & G)) + msg;
     521        43009024 :             D += magic + H;
     522        43009024 :             H += magic + rho(A, 2, 13, 22) + ((A & B) ^ (A & C) ^ (B & C));
     523        43009024 :         }
     524                 : 
     525                 :         /// SHA-256 Compression Function
     526          672016 :         void hash(const byte input[])
     527                 :         {
     528        11424272 :             for(u32bit j = 0; j != 16; ++j)
     529        10752256 :                 W[j] = make_u32bit(input[4*j], input[4*j+1], input[4*j+2], input[4*j+3]);
     530        32928784 :             for(u32bit j = 16; j != 64; ++j)
     531        32256768 :                 W[j] = sigma(W[j- 2], 17, 19, 10) + W[j- 7] +
     532                 :                     sigma(W[j-15],  7, 18,  3) + W[j-16];
     533                 : 
     534          672016 :             u32bit A = digest[0], B = digest[1], C = digest[2],
     535          672016 :                 D = digest[3], E = digest[4], F = digest[5],
     536          672016 :                 G = digest[6], H = digest[7];
     537                 : 
     538          672016 :             F1(A,B,C,D,E,F,G,H,W[ 0],0x428A2F98);
     539          672016 :             F1(H,A,B,C,D,E,F,G,W[ 1],0x71374491);
     540          672016 :             F1(G,H,A,B,C,D,E,F,W[ 2],0xB5C0FBCF);
     541          672016 :             F1(F,G,H,A,B,C,D,E,W[ 3],0xE9B5DBA5);
     542          672016 :             F1(E,F,G,H,A,B,C,D,W[ 4],0x3956C25B);
     543          672016 :             F1(D,E,F,G,H,A,B,C,W[ 5],0x59F111F1);
     544          672016 :             F1(C,D,E,F,G,H,A,B,W[ 6],0x923F82A4);
     545          672016 :             F1(B,C,D,E,F,G,H,A,W[ 7],0xAB1C5ED5);
     546          672016 :             F1(A,B,C,D,E,F,G,H,W[ 8],0xD807AA98);
     547          672016 :             F1(H,A,B,C,D,E,F,G,W[ 9],0x12835B01);
     548          672016 :             F1(G,H,A,B,C,D,E,F,W[10],0x243185BE);
     549          672016 :             F1(F,G,H,A,B,C,D,E,W[11],0x550C7DC3);
     550          672016 :             F1(E,F,G,H,A,B,C,D,W[12],0x72BE5D74);
     551          672016 :             F1(D,E,F,G,H,A,B,C,W[13],0x80DEB1FE);
     552          672016 :             F1(C,D,E,F,G,H,A,B,W[14],0x9BDC06A7);
     553          672016 :             F1(B,C,D,E,F,G,H,A,W[15],0xC19BF174);
     554          672016 :             F1(A,B,C,D,E,F,G,H,W[16],0xE49B69C1);
     555          672016 :             F1(H,A,B,C,D,E,F,G,W[17],0xEFBE4786);
     556          672016 :             F1(G,H,A,B,C,D,E,F,W[18],0x0FC19DC6);
     557          672016 :             F1(F,G,H,A,B,C,D,E,W[19],0x240CA1CC);
     558          672016 :             F1(E,F,G,H,A,B,C,D,W[20],0x2DE92C6F);
     559          672016 :             F1(D,E,F,G,H,A,B,C,W[21],0x4A7484AA);
     560          672016 :             F1(C,D,E,F,G,H,A,B,W[22],0x5CB0A9DC);
     561          672016 :             F1(B,C,D,E,F,G,H,A,W[23],0x76F988DA);
     562          672016 :             F1(A,B,C,D,E,F,G,H,W[24],0x983E5152);
     563          672016 :             F1(H,A,B,C,D,E,F,G,W[25],0xA831C66D);
     564          672016 :             F1(G,H,A,B,C,D,E,F,W[26],0xB00327C8);
     565          672016 :             F1(F,G,H,A,B,C,D,E,W[27],0xBF597FC7);
     566          672016 :             F1(E,F,G,H,A,B,C,D,W[28],0xC6E00BF3);
     567          672016 :             F1(D,E,F,G,H,A,B,C,W[29],0xD5A79147);
     568          672016 :             F1(C,D,E,F,G,H,A,B,W[30],0x06CA6351);
     569          672016 :             F1(B,C,D,E,F,G,H,A,W[31],0x14292967);
     570          672016 :             F1(A,B,C,D,E,F,G,H,W[32],0x27B70A85);
     571          672016 :             F1(H,A,B,C,D,E,F,G,W[33],0x2E1B2138);
     572          672016 :             F1(G,H,A,B,C,D,E,F,W[34],0x4D2C6DFC);
     573          672016 :             F1(F,G,H,A,B,C,D,E,W[35],0x53380D13);
     574          672016 :             F1(E,F,G,H,A,B,C,D,W[36],0x650A7354);
     575          672016 :             F1(D,E,F,G,H,A,B,C,W[37],0x766A0ABB);
     576          672016 :             F1(C,D,E,F,G,H,A,B,W[38],0x81C2C92E);
     577          672016 :             F1(B,C,D,E,F,G,H,A,W[39],0x92722C85);
     578          672016 :             F1(A,B,C,D,E,F,G,H,W[40],0xA2BFE8A1);
     579          672016 :             F1(H,A,B,C,D,E,F,G,W[41],0xA81A664B);
     580          672016 :             F1(G,H,A,B,C,D,E,F,W[42],0xC24B8B70);
     581          672016 :             F1(F,G,H,A,B,C,D,E,W[43],0xC76C51A3);
     582          672016 :             F1(E,F,G,H,A,B,C,D,W[44],0xD192E819);
     583          672016 :             F1(D,E,F,G,H,A,B,C,W[45],0xD6990624);
     584          672016 :             F1(C,D,E,F,G,H,A,B,W[46],0xF40E3585);
     585          672016 :             F1(B,C,D,E,F,G,H,A,W[47],0x106AA070);
     586          672016 :             F1(A,B,C,D,E,F,G,H,W[48],0x19A4C116);
     587          672016 :             F1(H,A,B,C,D,E,F,G,W[49],0x1E376C08);
     588          672016 :             F1(G,H,A,B,C,D,E,F,W[50],0x2748774C);
     589          672016 :             F1(F,G,H,A,B,C,D,E,W[51],0x34B0BCB5);
     590          672016 :             F1(E,F,G,H,A,B,C,D,W[52],0x391C0CB3);
     591          672016 :             F1(D,E,F,G,H,A,B,C,W[53],0x4ED8AA4A);
     592          672016 :             F1(C,D,E,F,G,H,A,B,W[54],0x5B9CCA4F);
     593          672016 :             F1(B,C,D,E,F,G,H,A,W[55],0x682E6FF3);
     594          672016 :             F1(A,B,C,D,E,F,G,H,W[56],0x748F82EE);
     595          672016 :             F1(H,A,B,C,D,E,F,G,W[57],0x78A5636F);
     596          672016 :             F1(G,H,A,B,C,D,E,F,W[58],0x84C87814);
     597          672016 :             F1(F,G,H,A,B,C,D,E,W[59],0x8CC70208);
     598          672016 :             F1(E,F,G,H,A,B,C,D,W[60],0x90BEFFFA);
     599          672016 :             F1(D,E,F,G,H,A,B,C,W[61],0xA4506CEB);
     600          672016 :             F1(C,D,E,F,G,H,A,B,W[62],0xBEF9A3F7);
     601          672016 :             F1(B,C,D,E,F,G,H,A,W[63],0xC67178F2);
     602                 : 
     603          672016 :             digest[0] += A; digest[1] += B; digest[2] += C;
     604          672016 :             digest[3] += D; digest[4] += E; digest[5] += F;
     605          672016 :             digest[6] += G; digest[7] += H;
     606          672016 :         }
     607                 : 
     608                 :         /// Copy out the digest
     609             406 :         void copy_out(byte output[])
     610                 :         {
     611           13398 :             for(u32bit j = 0; j != OUTPUT_LENGTH; ++j)
     612           12992 :                 output[j] = get_byte(j % 4, digest[j/4]);
     613             406 :         }
     614                 : 
     615                 :     protected:
     616                 : 
     617                 :         /// Update the hash
     618         1215328 :         void add_data(const byte input[], u32bit length)
     619                 :         {
     620         1215328 :             count += length;
     621                 : 
     622         1215328 :             if (position)
     623                 :             {
     624         1112696 :                 memcpy(buffer + position, input,
     625                 :                        std::min<u32bit>(length, sizeof(buffer) - position));
     626                 : 
     627         1112696 :                 if(position + length >= HASH_BLOCK_SIZE)
     628                 :                 {
     629           74176 :                     hash(buffer);
     630           74176 :                     input += (HASH_BLOCK_SIZE - position);
     631           74176 :                     length -= (HASH_BLOCK_SIZE - position);
     632           74176 :                     position = 0;
     633                 :                 }
     634                 :             }
     635                 : 
     636         3028090 :             while(length >= HASH_BLOCK_SIZE)
     637                 :             {
     638          597434 :                 hash(input);
     639          597434 :                 input += HASH_BLOCK_SIZE;
     640          597434 :                 length -= HASH_BLOCK_SIZE;
     641                 :             }
     642                 : 
     643         1215328 :             memcpy(buffer + position, input,
     644                 :                    std::min<u32bit>(length, sizeof(buffer) - position));
     645                 : 
     646         1215328 :             position += length;
     647         1215328 :         }
     648                 : 
     649                 :         /// Write the count bits to the buffer
     650             406 :         void write_count(byte out[])
     651                 :         {
     652            3654 :             for(u32bit j = 0; j != 8; ++j)
     653                 :             {
     654            3248 :                 out[j+COUNT_SIZE-8] = get_byte(j % 8, 8 * count);
     655                 :             }
     656             406 :         }
     657                 : 
     658                 :         /// Finalize a Hash
     659             406 :         void final_result(byte output[OUTPUT_LENGTH])
     660                 :         {
     661             406 :             buffer[position] = 0x80;
     662           24415 :             for(u32bit j = position+1; j != HASH_BLOCK_SIZE; ++j)
     663           24009 :                 buffer[j] = 0;
     664             406 :             if(position >= HASH_BLOCK_SIZE - COUNT_SIZE)
     665                 :             {
     666               0 :                 hash(buffer);
     667               0 :                 memset(buffer, 0, sizeof(buffer));
     668                 :             }
     669             406 :             write_count(buffer + HASH_BLOCK_SIZE - COUNT_SIZE);
     670                 : 
     671             406 :             hash(buffer);
     672             406 :             copy_out(output);
     673             406 :             clear();
     674             406 :         }
     675                 : 
     676                 :     public:
     677                 :         /// SHA_256 / MDx_HashFunction Constructor
     678             406 :         SHA256()
     679                 :         {
     680             406 :             clear();
     681             406 :         }
     682                 : 
     683                 :         /// Clear memory of sensitive data
     684             844 :         void clear() throw()
     685                 :         {
     686             844 :             memset(buffer, 0, sizeof(buffer));
     687             844 :             count = position = 0;
     688                 : 
     689             844 :             memset(W, 0, sizeof(W));
     690             844 :             digest[0] = 0x6A09E667;
     691             844 :             digest[1] = 0xBB67AE85;
     692             844 :             digest[2] = 0x3C6EF372;
     693             844 :             digest[3] = 0xA54FF53A;
     694             844 :             digest[4] = 0x510E527F;
     695             844 :             digest[5] = 0x9B05688C;
     696             844 :             digest[6] = 0x1F83D9AB;
     697             844 :             digest[7] = 0x5BE0CD19;
     698             844 :         }
     699                 : 
     700                 :         /// Update this SHA256 calculation with new data.
     701         1215328 :         SHA256& update(const void* input, uint32_t length)
     702                 :         {
     703         1215328 :             add_data(reinterpret_cast<const unsigned char*>(input), length);
     704         1215328 :             return *this;
     705                 :         }
     706                 : 
     707                 :         /// Return final SHA256 digest of this object in the buffer.
     708             138 :         void final(byte output[OUTPUT_LENGTH])
     709                 :         {
     710             138 :             final_result(output);
     711             138 :         }
     712                 : 
     713                 :         /// Return final SHA256 digest of this object as a string.
     714               0 :         std::string final()
     715                 :         {
     716                 :             byte result[OUTPUT_LENGTH];
     717               0 :             final_result(result);
     718               0 :             return std::string(reinterpret_cast<char*>(result), OUTPUT_LENGTH);
     719                 :         }
     720                 : 
     721                 :         /// Returns true if the final SHA256 digest of this object equals the
     722                 :         /// given data.
     723             264 :         bool final_equals(byte compare[OUTPUT_LENGTH])
     724                 :         {
     725                 :             byte result[OUTPUT_LENGTH];
     726             264 :             final_result(result);
     727             264 :             return (memcmp(compare, result, OUTPUT_LENGTH) == 0);
     728                 :         }
     729                 : 
     730                 :         /// Return final SHA256 digest of this object as a string encoded in
     731                 :         /// hexadecimal.
     732               4 :         std::string final_hex()
     733                 :         {
     734                 :             byte result[OUTPUT_LENGTH];
     735               4 :             final_result(result);
     736                 : 
     737               4 :             std::string out(OUTPUT_LENGTH*2, '\0');
     738                 : 
     739                 :             static const char xdigits[16] = {
     740                 :                 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
     741                 :             };
     742                 : 
     743               4 :             std::string::iterator oi = out.begin();
     744             132 :             for (unsigned int i = 0; i < OUTPUT_LENGTH; ++i)
     745                 :             {
     746             128 :                 *oi++ = xdigits[ (result[i] & 0xF0) >> 4 ];
     747             128 :                 *oi++ = xdigits[ (result[i] & 0x0F) ];
     748                 :             }
     749                 : 
     750               0 :             return out;
     751                 :         }
     752                 : 
     753                 :         /// Calculate SHA256 digest of bytes in the given range.
     754               0 :         static std::string digest_bin(const void* input, uint32_t length)
     755                 :         {
     756               0 :             return SHA256().update(input, length).final();
     757                 :         }
     758                 : 
     759                 :         /// Calculate SHA256 digest of a string.
     760               0 :         static std::string digest_bin(const std::string& input)
     761                 :         {
     762               0 :             return digest_bin(input.data(), input.size());
     763                 :         }
     764                 : 
     765                 :         /// Calculate SHA256 digest of bytes in the given range. Result is
     766                 :         /// encoded in hexadecimal.
     767               4 :         static std::string digest_hex(const void* input, uint32_t length)
     768                 :         {
     769               4 :             return SHA256().update(input, length).final_hex();
     770                 :         }
     771                 : 
     772                 :         /// Calculate SHA256 digest of a string. Result is encoded in
     773                 :         /// hexadecimal.
     774               4 :         static std::string digest_hex(const std::string& input)
     775                 :         {
     776               4 :             return digest_hex(input.data(), input.size());
     777                 :         }
     778                 :     };
     779                 : 
     780                 : protected:
     781                 :     /**
     782                 :      * @brief BTreePage is a reference-counted buffer holding one page of the
     783                 :      * B-tree index. 
     784                 :      *
     785                 :      * Note that this wrapper object may also contain an invalid/uninitialized
     786                 :      * page pointer. The enclosed data can be casted to either a LeafNode
     787                 :      * object or an InnerNode object. The corresponding cast direction is
     788                 :      * checked against the page's level number..
     789                 :      */
     790                 :     class BTreePage
     791                 :     {
     792                 :     protected:
     793                 :         /**
     794                 :          * @brief Implementation of BTreePage: holds the data buffer and a
     795                 :          * reference counter.
     796                 :          */
     797                 :         struct Impl
     798                 :         {
     799                 :             /// reference counter
     800                 :             unsigned int refs;
     801                 : 
     802                 :             /// data buffer
     803                 :             char        data[BTreePageSize];
     804                 :         };
     805                 : 
     806                 :         /// pointer to reference-counted data buffer object.
     807                 :         struct Impl*    m_impl;
     808                 : 
     809                 :     public:
     810                 :         /// Default Constructor: create new invalid page buffer
     811        30012527 :         BTreePage()
     812        30012527 :             : m_impl(NULL)
     813                 :         {
     814        30012527 :         }
     815                 : 
     816                 :         /// Copy Constructor: increment reference counter on buffer.
     817               0 :         BTreePage(const BTreePage& btp)
     818               0 :             : m_impl(btp.m_impl)
     819                 :         {
     820               0 :             if (m_impl)
     821               0 :                 ++m_impl->refs;
     822               0 :         }
     823                 : 
     824                 :         /// Destructor: decrement reference counter on buffer and possibly
     825                 :         /// deallocate it.
     826        30012527 :         ~BTreePage()
     827                 :         {
     828        30012527 :             if (m_impl && --m_impl->refs == 0)
     829           60612 :                 delete m_impl;
     830        30012527 :         }
     831                 : 
     832                 :         /// Assignment Operator: increment reference counter on buffer.
     833        47249775 :         BTreePage& operator=(const BTreePage& btp)
     834                 :         {
     835        47249775 :             if (this != &btp)
     836                 :             {
     837        47249775 :                 if (m_impl && --m_impl->refs == 0)
     838               0 :                     delete m_impl;
     839                 : 
     840        47249775 :                 m_impl = btp.m_impl;
     841                 : 
     842        47249775 :                 if (m_impl)
     843        47249072 :                     ++m_impl->refs;
     844                 :             }
     845                 : 
     846        47249775 :             return *this;
     847                 :         }
     848                 : 
     849                 :         /// Determine whether the wrapper object contains valid page.
     850               0 :         bool IsValid() const
     851                 :         {
     852               0 :             return (m_impl != NULL);
     853                 :         }
     854                 : 
     855                 :         /// Release enclosed page and initialize a new page buffer.
     856           60612 :         void Create()
     857                 :         {
     858           60612 :             if (m_impl && --m_impl->refs == 0)
     859               0 :                 delete m_impl;
     860                 : 
     861           60612 :             m_impl = new Impl;
     862           60612 :             m_impl->refs = 1;
     863           60612 :         }
     864                 : 
     865                 :         /// Accessor: return enclosed buffer pointer.
     866           79216 :         char* GetBuffer()
     867                 :         {
     868           79216 :             CBTREEDB_ASSERT(m_impl);
     869           79216 :             return m_impl->data;
     870                 :         }
     871                 : 
     872                 :         /// Return the enclosed node's level in the tree.
     873        77163192 :         uint16_t GetLevel() const
     874                 :         {
     875        77163192 :             CBTREEDB_ASSERT(m_impl);
     876        77163192 :             return reinterpret_cast<InnerNode*>(m_impl->data)->level;
     877                 :         }
     878                 : 
     879                 :         /// Returns true if the buffer contains a leaf node.
     880        59865448 :         bool IsLeafNode() const
     881                 :         {
     882        59865448 :             return (GetLevel() == 0);
     883                 :         }
     884                 : 
     885                 :         /// Return buffer casted as an inner node.
     886        17297933 :         InnerNode* GetAsInnerNode() const
     887                 :         {
     888        17297933 :             CBTREEDB_ASSERT(m_impl && !IsLeafNode());
     889        17297933 :             return reinterpret_cast<InnerNode*>(m_impl->data);
     890                 :         }
     891                 : 
     892                 :         /// Return buffer casted as a leaf node.
     893        12634791 :         LeafNode* GetAsLeafNode() const
     894                 :         {
     895        12634791 :             CBTREEDB_ASSERT(m_impl && IsLeafNode());
     896        12634791 :             return reinterpret_cast<LeafNode*>(m_impl->data);
     897                 :         }
     898                 :     };
     899                 : 
     900                 : protected:
     901                 :     /**
     902                 :      * @brief PageCache and PageCacheImpl implement a LRU-strategy cache of
     903                 :      * B-tree pages used by CBTreeDB reader objects.
     904                 :      *
     905                 :      * One cache object can be shared between multiple readers. However, this
     906                 :      * page cache is not thread safe. You may have to wrap some mutex libraries
     907                 :      * if needed.
     908                 :      *
     909                 :      * The cached pages are put into a hash table for quick lookup by
     910                 :      * (btreeid,pageid). Simultaneously the HashCells are linked into a doubly
     911                 :      * chained "LRU"-list with the most recently used page at the head. This
     912                 :      * allows O(1) algorithms for both Store() and Retrieve() functions. When
     913                 :      * the maximum number of pages is exceeded, the tail pages of the LRU-list
     914                 :      * are removed. The drawing below illustrates the data structure used by
     915                 :      * the class.
     916                 :      *
     917                 :      * \htmlonly
     918                 :      * <div style="text-align: center">
     919                 :      * <p>Structure of PageCache's arrays and nodes</p>
     920                 :      * <object type="image/svg+xml" data="drawing-1.svg" style="height: 25em"></object>
     921                 :      * </div>
     922                 :      * \endhtmlonly
     923                 :      */
     924                 :     class PageCacheImpl
     925                 :     {
     926                 :     protected:
     927                 : 
     928                 :         /// reference counter
     929                 :         unsigned int    m_refs;
     930                 : 
     931                 :         /// maximum number of pages in cache
     932                 :         unsigned int    m_maxsize;
     933                 : 
     934                 :         /// current number of pages in cache
     935                 :         unsigned int    m_size;
     936                 : 
     937                 : #ifdef CBTREEDB_SELF_VERIFY
     938                 :         /**
     939                 :          * @brief counter to tag pages with a virtual LRU timestamp.
     940                 :          * This is just for verification purposes and is only used in the
     941                 :          * testsuite as the counter may overflow in real applications.
     942                 :          */
     943                 :         uint32_t        m_lrutime;
     944                 : #endif
     945                 : 
     946                 :         /// Structure for each slot in the cache hash array
     947                 :         struct HashCell
     948          122394 :         {
     949                 :             /// pointer forward to next hash cell in bucket
     950                 :             struct HashCell     *bucket_next;
     951                 : 
     952                 :             /// pointer backward to previous hash cell in bucket
     953                 :             struct HashCell     *bucket_prev;
     954                 : 
     955                 :             /// pointer forward in LRU double-linked list
     956                 :             struct HashCell     *list_next;
     957                 : 
     958                 :             /// pointer backward in LRU double-linked list
     959                 :             struct HashCell     *list_prev;
     960                 : 
     961                 : #ifdef CBTREEDB_SELF_VERIFY
     962                 :             /// virtual LRU timestamp, just for testing.
     963                 :             uint32_t    lrutime;
     964                 : #endif
     965                 : 
     966                 :             /// b-tree object identifier of page
     967                 :             void*       btreeid;
     968                 : 
     969                 :             /// page identifier withing b-tree
     970                 :             uint32_t    pageid;
     971                 : 
     972                 :             /// page holder object
     973                 :             BTreePage   page;
     974                 :         };
     975                 : 
     976                 :         /// hash cell array holding pointers to active cells
     977                 :         std::vector<struct HashCell*> m_hasharray;
     978                 : 
     979                 :         /**
     980                 :          * @brief sentinel hash cell for LRU double-linked list.
     981                 :          * list_next is the head and list_prev is the tail of the list.
     982                 :          */
     983                 :         struct HashCell m_sentinel;
     984                 : 
     985                 :         /// Simple hash function mapping (btreeid,pageid) -> bucket.
     986        30012952 :         inline unsigned int hashfunc(void* btreeid, uint32_t pageid)
     987                 :         {
     988                 :             // since hot pageids are usually ascending, I guess this is a
     989                 :             // pretty good hash function.
     990        30012952 :             return (reinterpret_cast<uintptr_t>(btreeid) + pageid) % m_hasharray.size();
     991                 :         }
     992                 : 
     993                 :     public:
     994                 :         /// Create a new page cache containg maxsize pages
     995              75 :         explicit PageCacheImpl(unsigned int maxsize)
     996              75 :             : m_refs(0), m_maxsize(maxsize), m_size(0)
     997                 :         {
     998              75 :             m_hasharray.resize(m_maxsize / 2, NULL);
     999                 : 
    1000              75 :             m_sentinel.list_prev = m_sentinel.list_next = &m_sentinel;
    1001                 : #ifdef CBTREEDB_SELF_VERIFY
    1002              75 :             m_lrutime = 0;
    1003              75 :             m_sentinel.lrutime = 0;
    1004                 : #endif
    1005              75 :         }
    1006                 : 
    1007                 :         /// Removes all cached pages and destroys cache.
    1008              75 :         ~PageCacheImpl()
    1009                 :         {
    1010              75 :             Clear();
    1011              75 :         }
    1012                 : 
    1013                 :         /// Increment reference counter by one.
    1014              75 :         void RefInc()
    1015                 :         {
    1016              75 :             ++m_refs;
    1017              75 :         }
    1018                 : 
    1019                 :         /// Decrement reference counter by one and return it.
    1020              75 :         unsigned int RefDec()
    1021                 :         {
    1022              75 :             return --m_refs;
    1023                 :         }
    1024                 : 
    1025                 :         /// Remove all pages from the cache and reset status.
    1026              75 :         void Clear()
    1027                 :         {
    1028                 :             // free up hash cells
    1029              75 :             struct HashCell* hc = m_sentinel.list_next;
    1030            2082 :             while (hc != &m_sentinel)
    1031                 :             {
    1032            1932 :                 struct HashCell* nc = hc->list_next;
    1033            1932 :                 delete hc;
    1034            1932 :                 hc = nc;
    1035                 :             }
    1036                 : 
    1037                 :             // zero hash array
    1038            4767 :             for (unsigned int i = 0; i < m_hasharray.size(); ++i)
    1039            4692 :                 m_hasharray[i] = NULL;
    1040                 : 
    1041              75 :             m_sentinel.list_prev = m_sentinel.list_next = &m_sentinel;
    1042              75 :             m_size = 0;
    1043                 : #ifdef CBTREEDB_SELF_VERIFY
    1044              75 :             m_sentinel.lrutime = 0;
    1045              75 :             m_lrutime = 0;
    1046                 : #endif
    1047              75 :         }
    1048                 : 
    1049                 :         /// Store a page object in a cache cell identified by (btreeid,pageid).
    1050           61123 :         void Store(void* btreeid, uint32_t pageid, const BTreePage& page)
    1051                 :         {
    1052                 :             // check whether its already in the cache
    1053                 : 
    1054           61123 :             unsigned int h = hashfunc(btreeid, pageid);
    1055                 : 
    1056           61123 :             struct HashCell* hc = m_hasharray[h];
    1057                 : 
    1058         7729236 :             while( hc && ! (hc->btreeid == btreeid && hc->pageid == pageid) )
    1059                 :             {
    1060                 :                 // advance in bucket list
    1061         7606990 :                 hc = hc->bucket_next;
    1062                 :             }
    1063                 : 
    1064           61123 :             if ( hc ) // found in cache: unlink from LRU list and place in front
    1065                 :             {
    1066               1 :                 if (hc != m_sentinel.list_next)
    1067                 :                 {
    1068                 :                     // remove cell wherever it is
    1069               1 :                     hc->list_next->list_prev = hc->list_prev;
    1070               1 :                     hc->list_prev->list_next = hc->list_next;
    1071                 : 
    1072                 :                     // place at head
    1073               1 :                     hc->list_prev = &m_sentinel;
    1074               1 :                     hc->list_next = m_sentinel.list_next;
    1075               1 :                     m_sentinel.list_next->list_prev = hc;
    1076               1 :                     m_sentinel.list_next = hc;
    1077                 : 
    1078                 :                     // copy page, it may have changed
    1079               1 :                     hc->page = page;
    1080                 : 
    1081                 : #ifdef CBTREEDB_SELF_VERIFY
    1082               1 :                     hc->lrutime = ++m_lrutime;
    1083                 : #endif
    1084                 :                 }
    1085                 :                 // else hc is the head -> do nothing.
    1086                 :             }
    1087                 :             else // not found in cache.
    1088                 :             {
    1089                 :                 // remove last page in LRU list if necessary
    1090          181434 :                 while (m_size >= m_maxsize)
    1091                 :                 {
    1092           59190 :                     struct HashCell* lc = m_sentinel.list_prev;
    1093                 : 
    1094           59190 :                     CBTREEDB_ASSERT( lc != &m_sentinel );
    1095                 : 
    1096                 :                     // unlink from bucket list
    1097           59190 :                     if (reinterpret_cast<uintptr_t>(lc->bucket_prev) > m_hasharray.size())
    1098                 :                     {
    1099           59144 :                         if (lc->bucket_next)
    1100           17394 :                             lc->bucket_next->bucket_prev = lc->bucket_prev;
    1101                 : 
    1102           59144 :                         lc->bucket_prev->bucket_next = lc->bucket_next;
    1103                 :                     }
    1104                 :                     else // at first place in bucket list
    1105                 :                     {
    1106              46 :                         if (lc->bucket_next)
    1107               2 :                             lc->bucket_next->bucket_prev = lc->bucket_prev;
    1108                 : 
    1109              46 :                         m_hasharray[ reinterpret_cast<uintptr_t>(lc->bucket_prev) ] = lc->bucket_next;
    1110                 :                     }
    1111                 : 
    1112                 :                     // unlink from LRU list
    1113           59190 :                     lc->list_next->list_prev = lc->list_prev;
    1114           59190 :                     lc->list_prev->list_next = lc->list_next;
    1115                 : 
    1116           59190 :                     delete lc;
    1117                 : 
    1118           59190 :                     --m_size;
    1119                 :                 }
    1120                 : 
    1121                 :                 // create new hash cell
    1122           61122 :                 hc = new HashCell;
    1123                 : 
    1124                 : #ifdef CBTREEDB_SELF_VERIFY
    1125           61122 :                 hc->lrutime = ++m_lrutime;
    1126                 : #endif
    1127           61122 :                 hc->btreeid = btreeid;
    1128           61122 :                 hc->pageid = pageid;
    1129           61122 :                 hc->page = page;
    1130                 : 
    1131                 :                 // set hash cell as head of LRU list
    1132           61122 :                 hc->list_prev = &m_sentinel;
    1133           61122 :                 hc->list_next = m_sentinel.list_next;
    1134           61122 :                 m_sentinel.list_next->list_prev = hc;
    1135           61122 :                 m_sentinel.list_next = hc;
    1136                 : 
    1137                 :                 // insert new hash cell to correct bucket
    1138           61122 :                 hc->bucket_prev = reinterpret_cast<HashCell*>( h );
    1139           61122 :                 hc->bucket_next = m_hasharray[h];
    1140                 : 
    1141           61122 :                 if (m_hasharray[h])
    1142           60989 :                     m_hasharray[h]->bucket_prev = hc;
    1143                 : 
    1144           61122 :                 m_hasharray[h] = hc;
    1145                 : 
    1146           61122 :                 ++m_size;
    1147                 :             }
    1148                 : 
    1149                 : #ifdef CBTREEDB_SELF_VERIFY
    1150           61123 :             CBTREEDB_ASSERT( Verify() );
    1151                 : #endif
    1152           61123 :         }
    1153                 : 
    1154                 :         /// Retrieve a cached page identified by (btreeid,pageid). Returns true
    1155                 :         /// if the page was found.
    1156        29951829 :         bool Retrieve(void* btreeid, uint32_t pageid, BTreePage& outpage)
    1157                 :         {
    1158                 :             // check whether its in the cache
    1159                 : 
    1160        29951829 :             unsigned int h = hashfunc(btreeid, pageid);
    1161                 : 
    1162        29951829 :             struct HashCell* hc = m_hasharray[h];
    1163                 : 
    1164      1950267461 :             while( hc && ! (hc->btreeid == btreeid && hc->pageid == pageid) )
    1165                 :             {
    1166                 :                 // advance in bucket list
    1167      1890363803 :                 hc = hc->bucket_next;
    1168                 :             }
    1169                 : 
    1170        29951829 :             if ( hc ) // found in cache: unlink from LRU list and place in front
    1171                 :             {
    1172        29890908 :                 if (hc != m_sentinel.list_next)
    1173                 :                 {
    1174                 :                     // remove cell wherever it is
    1175        26454804 :                     hc->list_next->list_prev = hc->list_prev;
    1176        26454804 :                     hc->list_prev->list_next = hc->list_next;
    1177                 : 
    1178                 :                     // place at head
    1179        26454804 :                     hc->list_prev = &m_sentinel;
    1180        26454804 :                     hc->list_next = m_sentinel.list_next;
    1181        26454804 :                     m_sentinel.list_next->list_prev = hc;
    1182        26454804 :                     m_sentinel.list_next = hc;
    1183                 : 
    1184                 : #ifdef CBTREEDB_SELF_VERIFY
    1185        26454804 :                     hc->lrutime = ++m_lrutime;
    1186                 : #endif
    1187                 :                 }
    1188                 :                 // else hc is the head -> do nothing.
    1189                 : 
    1190        29890908 :                 outpage = hc->page;
    1191                 : 
    1192                 : #ifdef CBTREEDB_SELF_VERIFY
    1193        29890908 :                 CBTREEDB_ASSERT( Verify() );
    1194                 : #endif
    1195        29890908 :                 return true;
    1196                 :             }
    1197                 :             else
    1198                 :             {
    1199                 : #ifdef CBTREEDB_SELF_VERIFY
    1200           60921 :                 CBTREEDB_ASSERT( Verify() );
    1201                 : #endif
    1202           60921 :                 return false;
    1203                 :             }
    1204                 :         }
    1205                 : 
    1206                 :         /// Change maximum number of pages in cache, note that this does not
    1207                 :         /// immediately have effect.
    1208               0 :         void SetMaxSize(unsigned int maxsize)
    1209                 :         {
    1210               0 :             m_maxsize = maxsize;
    1211               0 :         }
    1212                 : 
    1213                 :         /// Return a vector listing all currently contained (btreeid,pageid)
    1214                 :         /// pairs in LRU order. Used by the test cases for verification.
    1215               5 :         std::vector< std::pair<void*, uint32_t> > GetPagelist() const
    1216                 :         {
    1217               5 :             std::vector< std::pair<void*, uint32_t> > v;
    1218                 : 
    1219               5 :             struct HashCell* hc = m_sentinel.list_next;
    1220                 : 
    1221              42 :             while (hc != &m_sentinel)
    1222                 :             {
    1223              32 :                 v.push_back( std::make_pair(hc->btreeid, hc->pageid) );
    1224              32 :                 hc = hc->list_next;
    1225                 :             }
    1226                 : 
    1227               0 :             return v;
    1228                 :         }
    1229                 : 
    1230                 :         /// Verify the integrity of the LRU list and hash table.
    1231        30013957 :         bool Verify() const
    1232                 :         {
    1233                 :             { // traverse LRU list forwards
    1234                 : 
    1235        30013957 :                 unsigned int size = 0;
    1236        30013957 :                 struct HashCell* hc = m_sentinel.list_next;
    1237                 : 
    1238      -496543401 :                 while (hc != &m_sentinel)
    1239                 :                 {
    1240      -556571315 :                     if (!(hc->list_prev->list_next == hc)) return false;
    1241      -556571315 :                     if (!(hc->list_next->list_prev == hc)) return false;
    1242                 : #ifdef CBTREEDB_SELF_VERIFY
    1243      -556571315 :                     if (!(hc->lrutime > hc->list_next->lrutime)) return false;
    1244                 : #endif
    1245                 : 
    1246      -556571315 :                     ++size;
    1247      -556571315 :                     hc = hc->list_next;
    1248                 :                 }
    1249                 : 
    1250        30013957 :                 if (size != m_size) return false;
    1251                 :             }
    1252                 : 
    1253                 :             { // traverse LRU list backwards
    1254                 : 
    1255        30013957 :                 unsigned int size = 0;
    1256        30013957 :                 struct HashCell* hc = m_sentinel.list_prev;
    1257                 : 
    1258      -496543401 :                 while (hc != &m_sentinel)
    1259                 :                 {
    1260      -556571315 :                     if (!(hc->list_prev->list_next == hc)) return false;
    1261      -556571315 :                     if (!(hc->list_next->list_prev == hc)) return false;
    1262                 : #ifdef CBTREEDB_SELF_VERIFY
    1263      -556571315 :                     if (!(hc->lrutime < hc->list_prev->lrutime || hc->list_prev == &m_sentinel)) return false;
    1264                 : #endif
    1265                 : 
    1266      -556571315 :                     ++size;
    1267      -556571315 :                     hc = hc->list_prev;
    1268                 :                 }
    1269                 : 
    1270        30013957 :                 if (size != m_size) return false;
    1271                 :             }
    1272                 : 
    1273                 :             { // check and count hash cells in buckets
    1274                 : 
    1275        30013957 :                 unsigned int size = 0;
    1276                 : 
    1277      1950810185 :                 for (unsigned int b = 0; b < m_hasharray.size(); ++b)
    1278                 :                 {
    1279      1920796228 :                     struct HashCell* hc = m_hasharray[b];
    1280                 : 
    1281      1920796228 :                     if (!hc) continue;
    1282                 : 
    1283        30042749 :                     if (!(reinterpret_cast<uintptr_t>(hc->bucket_prev) == b)) return false;
    1284                 : 
    1285        30042749 :                     ++size;
    1286                 : 
    1287      -526528566 :                     while (hc->bucket_next != NULL)
    1288                 :                     {
    1289      -586614064 :                         if (!(hc->bucket_next->bucket_prev == hc)) return false;
    1290                 : 
    1291      -586614064 :                         hc = hc->bucket_next;
    1292                 : 
    1293      -586614064 :                         ++size;
    1294                 :                     }
    1295                 :                 }
    1296                 : 
    1297        30013957 :                 if (size != m_size) return false;
    1298                 :             }
    1299                 : 
    1300        30013957 :             return true;
    1301                 :         }
    1302                 :     };
    1303                 : 
    1304                 : public:
    1305                 :     /**
    1306                 :      * @brief PageCache and PageCacheImpl implement a LRU-strategy cache of
    1307                 :      * B-tree pages used by CBTreeDB reader objects.
    1308                 :      *
    1309                 :      * One cache object can be shared between multiple readers. However, this
    1310                 :      * page cache is not thread safe. You may have to wrap some mutex libraries
    1311                 :      * if needed.
    1312                 :      *
    1313                 :      * The cached pages are put into a hash table for quick lookup by
    1314                 :      * (btreeid,pageid). Simultaneously the HashCells are linked into a doubly
    1315                 :      * chained "LRU"-list with the most recently used page at the head. This
    1316                 :      * allows O(1) algorithms for both Store() and Retrieve() functions. When
    1317                 :      * the maximum number of pages is exceeded, the tail pages of the LRU-list
    1318                 :      * are removed. The drawing below illustrates the data structure used by
    1319                 :      * the class.
    1320                 :      *
    1321                 :      * \htmlonly
    1322                 :      * <div style="text-align: center">
    1323                 :      * <p>Structure of PageCache's arrays and nodes</p>
    1324                 :      * <object type="image/svg+xml" data="drawing-1.svg" style="height: 25em"></object>
    1325                 :      * </div>
    1326                 :      * \endhtmlonly
    1327                 :      */
    1328                 :     class PageCache
    1329                 :     {
    1330                 :     protected:
    1331                 : 
    1332                 :         /// pointer to implementation class.
    1333                 :         PageCacheImpl*  m_impl;
    1334                 : 
    1335                 :     public:
    1336                 :         /// Create a new page cache containg maxsize pages
    1337              75 :         explicit PageCache(unsigned int maxpages)
    1338              75 :             : m_impl(new PageCacheImpl(maxpages))
    1339                 :         {
    1340              75 :             m_impl->RefInc();
    1341              75 :         }
    1342                 : 
    1343                 :         /// Copy Constructor: increment reference counter on base object.
    1344               0 :         PageCache(const PageCache& pc)
    1345               0 :             : m_impl(pc.m_impl)
    1346                 :         {
    1347               0 :             m_impl->RefInc();
    1348               0 :         }
    1349                 : 
    1350                 :         /// Destructor: decrement reference counter on buffer and possibly
    1351                 :         /// deallocate it.
    1352              75 :         ~PageCache()
    1353                 :         {
    1354              75 :             if (m_impl->RefDec() == 0)
    1355              75 :                 delete m_impl;
    1356              75 :         }
    1357                 : 
    1358                 :         /// Assignment Operator: increment reference counter on base object.
    1359               0 :         PageCache& operator=(const PageCache& pc)
    1360                 :         {
    1361               0 :             if (this != &pc)
    1362                 :             {
    1363               0 :                 if (m_impl->RefDec() == 0)
    1364               0 :                     delete m_impl;
    1365                 : 
    1366               0 :                 m_impl = pc.m_impl;
    1367               0 :                 m_impl->RefInc();
    1368                 :             }
    1369                 : 
    1370               0 :             return *this;
    1371                 :         }
    1372                 : 
    1373                 :         /// Remove all pages from the cache and reset status.
    1374               0 :         void Clear()
    1375                 :         {
    1376               0 :             return m_impl->Clear();
    1377                 :         }
    1378                 : 
    1379                 :         /// Store a page object in a cache cell identified by (btreeid,pageid).
    1380           61123 :         void Store(void* btreeid, uint32_t pageid, const BTreePage& page)
    1381                 :         {
    1382           61123 :             return m_impl->Store(btreeid, pageid, page);
    1383                 :         }
    1384                 : 
    1385                 :         /// Retrieve a cached page identified by (btreeid,pageid). Returns true
    1386                 :         /// if the page was found.
    1387        29951829 :         bool Retrieve(void* btreeid, uint32_t pageid, BTreePage& outpage)
    1388                 :         {
    1389        29951829 :             return m_impl->Retrieve(btreeid, pageid, outpage);
    1390                 :         }
    1391                 : 
    1392                 :         /// Change maximum number of pages in cache, note that this does not
    1393                 :         /// immediately have effect.
    1394               0 :         void SetMaxSize(unsigned int maxsize)
    1395                 :         {
    1396               0 :             return m_impl->SetMaxSize(maxsize);
    1397                 :         }
    1398                 : 
    1399                 :         /// Return a vector listing all currently contained (btreeid,pageid)
    1400                 :         /// pairs in LRU order. Used by the test cases for verification.
    1401               5 :         std::vector< std::pair<void*, uint32_t> > GetPagelist() const
    1402                 :         {
    1403               5 :             return m_impl->GetPagelist();
    1404                 :         }
    1405                 : 
    1406                 :         /// Verify the integrity of the LRU list and hash table.
    1407            1005 :         bool Verify() const
    1408                 :         {
    1409            1005 :             return m_impl->Verify();
    1410                 :         }
    1411                 :     };
    1412                 : 
    1413                 : protected:
    1414                 :     /**
    1415                 :      * @brief Implementation class used to read constant B-tree database files.
    1416                 :      *
    1417                 :      * Refer to \ref sec_architecture and \ref sec_example on how to use this class.
    1418                 :      */
    1419                 :     class ReaderImpl
    1420                 :     {
    1421                 :     protected:
    1422                 : 
    1423                 :         /// reference counter
    1424                 :         unsigned int    m_refs;
    1425                 : 
    1426                 :         /// key comparison functional
    1427                 :         key_compare     m_key_less;
    1428                 : 
    1429                 :         /// signature characters to expect file to begin with.
    1430                 :         char            m_signaturestr[8];
    1431                 : 
    1432                 :         /// file stream object currently opened.
    1433                 :         std::istream*   m_istream;
    1434                 : 
    1435                 :         /// signature page read from file
    1436                 :         SignaturePage   m_signature;
    1437                 : 
    1438                 :         /// pointer to b-tree page cache to used.
    1439                 :         PageCache*      m_pagecache;
    1440                 : 
    1441                 :         /// Read one B-tree page from the file (or from cache).
    1442        29951328 :         BTreePage ReadIndexPage(uint32_t pageoffset)
    1443                 :         {
    1444        29951328 :             CBTREEDB_CHECK(pageoffset + m_signature.btree_pagesize <= m_signature.btree_size,
    1445                 :                            "Invalid B-tree page offset to retrieve.");
    1446                 : 
    1447        29951328 :             CBTREEDB_ASSERT(m_istream);
    1448                 : 
    1449        29951328 :             BTreePage page;
    1450                 : 
    1451        29951328 :             if (m_pagecache)
    1452                 :             {
    1453        29951328 :                 if (m_pagecache->Retrieve(this, pageoffset, page))
    1454        29890716 :                     return page;
    1455                 :             }
    1456                 : 
    1457           60612 :             m_istream->seekg(m_signature.btree_offset + pageoffset);
    1458           60612 :             CBTREEDB_CHECK(m_istream->good(), "Could not read B-tree page.");
    1459                 : 
    1460           60612 :             page.Create();
    1461                 : 
    1462           60612 :             m_istream->read(page.GetBuffer(), BTreePageSize);
    1463           60612 :             CBTREEDB_CHECK(m_istream->good(), "Could not read B-tree page.");
    1464                 : 
    1465           60612 :             if (m_pagecache)
    1466                 :             {
    1467           60612 :                 m_pagecache->Store(this, pageoffset, page);
    1468                 :             }
    1469                 : 
    1470           60612 :             return page;
    1471                 :         }
    1472                 : 
    1473                 :         /// Read byte range [offset, offset+size) from value data area into the
    1474                 :         /// given buffer.
    1475         5734949 :         bool ReadValueRange(uint64_t offset, void* data, uint32_t size)
    1476                 :         {
    1477         5734949 :             CBTREEDB_ASSERT(m_istream);
    1478                 : 
    1479         5734949 :             if (offset + size > m_signature.value_size) return false;
    1480                 : 
    1481         5734949 :             m_istream->seekg(m_signature.value_offset + offset);
    1482         5734949 :             if (m_istream->bad()) return false;
    1483                 : 
    1484         5734949 :             m_istream->read(reinterpret_cast<char*>(data), size);
    1485         5734949 :             if (m_istream->bad()) return false;
    1486                 : 
    1487         5734949 :             return true;
    1488                 :         }
    1489                 : 
    1490                 :         /// Function to test key equality, constructed from m_key_less.
    1491         9183984 :         bool KeyEqual(const key_type& a, const key_type& b)
    1492                 :         {
    1493         9183984 :             return !m_key_less(a,b) && !m_key_less(b,a);
    1494                 :         }
    1495                 : 
    1496                 :         /// Function to test key inequality, constructed from m_key_less.
    1497         9175356 :         bool KeyUnequal(const key_type& a, const key_type& b)
    1498                 :         {
    1499         9175356 :             return m_key_less(a,b) || m_key_less(b,a);
    1500                 :         }
    1501                 : 
    1502                 :         /// Find the first key slot containing a greater-or-equal key.
    1503                 :         template <typename NodeType>
    1504        26473240 :         int BinarySearch(const NodeType* node, key_type key)
    1505                 :         {
    1506        26473240 :             register int lo = 0, hi = node->slots;
    1507                 : 
    1508       216156188 :             while (lo < hi)
    1509                 :             {
    1510       163209708 :                 register int mid = (hi + lo) >> 1;
    1511                 : 
    1512       163209708 :                 if (m_key_less(node->keys[mid], key)) {
    1513        78432956 :                     lo = mid + 1;
    1514                 :                 }
    1515                 :                 else {
    1516        84776752 :                     hi = mid;
    1517                 :                 }
    1518                 :             }
    1519                 : 
    1520                 : #ifdef CBTREEDB_SELF_VERIFY
    1521                 :             // verify result using simple linear search
    1522                 :             {
    1523        26473240 :                 int i = 0;
    1524      1728825800 :                 while(i < node->slots && m_key_less(node->keys[i], key))
    1525      1675879320 :                     ++i;
    1526                 : 
    1527        26473240 :                 CBTREEDB_ASSERT(i == lo);
    1528                 :             }
    1529                 : #endif
    1530        26473240 :             return lo;
    1531                 :         }
    1532                 : 
    1533                 :     public:
    1534                 :         /// Create new reader, which is initially set to closed state.
    1535              73 :         ReaderImpl(const key_compare& key_less)
    1536              73 :             : m_refs(0), m_key_less(key_less), m_istream(NULL), m_pagecache(NULL)
    1537                 :         {
    1538              73 :             memcpy(m_signaturestr, "cbtreedb", 8);
    1539              73 :         }
    1540                 : 
    1541                 :         /// Increment reference counter by one.
    1542             138 :         void RefInc()
    1543                 :         {
    1544             138 :             ++m_refs;
    1545             138 :         }
    1546                 : 
    1547                 :         /// Decrement reference counter by one and return it.
    1548             138 :         unsigned int RefDec()
    1549                 :         {
    1550             138 :             return --m_refs;
    1551                 :         }
    1552                 : 
    1553                 :         /**
    1554                 :          * Change the database signature (first 8 bytes) from 'cbtreedb' to a
    1555                 :          * custom string. The signature is always 8 bytes long. Longer strings
    1556                 :          * are truncated, shorter ones padded with nulls.
    1557                 :          */
    1558              73 :         void SetSignature(const char* newsignature)
    1559                 :         {
    1560              73 :             unsigned int i = 0;
    1561             657 :             for(; i < 8 && newsignature[i]; ++i)
    1562             584 :                 m_signaturestr[i] = newsignature[i];
    1563                 : 
    1564              73 :             for(; i < 8; ++i)
    1565               0 :                 m_signaturestr[i] = 0;
    1566              73 :         }
    1567                 : 
    1568                 :         /**
    1569                 :          * Attempt to open a cbtreedb database file. Reads and verifies the
    1570                 :          * signature and initializes the reader. Note that this function does
    1571                 :          * not through an exception if the file could not be loaded! The
    1572                 :          * istream object must exist as long as the Reader is used.
    1573                 :          *
    1574                 :          * @param file          database file input stream to attach.
    1575                 :          * @param errortext     in case of error, set to an informative text.
    1576                 :          * @return              true if loaded and verified correctly.
    1577                 :          */
    1578              73 :         bool Open(std::istream& file, std::string* errortext = NULL)
    1579                 :         {
    1580              73 :             m_istream = NULL;
    1581                 : 
    1582              73 :             file.seekg(0, std::ios::beg);
    1583              73 :             if (file.bad()) {
    1584               0 :                 if (errortext) *errortext = "Could not open database.";
    1585               0 :                 return false;
    1586                 :             }
    1587                 : 
    1588              73 :             file.read(reinterpret_cast<char*>(&m_signature), sizeof(m_signature));
    1589              73 :             if (file.bad()) {
    1590               0 :                 if (errortext) *errortext = "Could not read signature.";
    1591               0 :                 return false;
    1592                 :             }
    1593                 : 
    1594              73 :             if (memcmp(m_signature.signature, m_signaturestr, 8) != 0) {
    1595               0 :                 if (errortext) *errortext = "Could not verify signature.";
    1596               0 :                 return false;
    1597                 :             }
    1598                 : 
    1599              73 :             if (m_signature.version != 0x00010000) {
    1600               0 :                 if (errortext) *errortext = "Signature contains unknown version.";
    1601               0 :                 return false;
    1602                 :             }
    1603                 :             
    1604              73 :             if (m_signature.app_version_id != AppVersionId) {
    1605               0 :                 if (errortext) *errortext = "Signature mismatches application version identifier.";
    1606               0 :                 return false;
    1607                 :             }
    1608                 :             
    1609              73 :             uint32_t crc = CRC32::digest(reinterpret_cast<char*>(&m_signature)+16, sizeof(m_signature)-16);
    1610                 : 
    1611              73 :             if (m_signature.header_crc32 != crc) {
    1612               0 :                 if (errortext) *errortext = "Header checksum mismatches.";
    1613               0 :                 return false;
    1614                 :             }
    1615                 : 
    1616              73 :             if (m_signature.key_size != sizeof(key_type)) {
    1617               2 :                 if (errortext) *errortext  = "Database not compatible with this reader: key sizes mismatch.";
    1618               2 :                 return false;
    1619                 :             }
    1620                 : 
    1621              71 :             if (m_signature.btree_pagesize != BTreePageSize) {
    1622               2 :                 if (errortext) *errortext  = "Database not compatible with this reader: page sizes mismatch.";
    1623               2 :                 return false;
    1624                 :             }
    1625                 : 
    1626                 :             // test database compatibility with order relation by checking root
    1627                 :             // node's key sequence.
    1628                 : 
    1629              69 :             m_istream = &file;
    1630                 : 
    1631              69 :             BTreePage root = ReadIndexPage(0);
    1632                 : 
    1633              69 :             if ( root.IsLeafNode() )
    1634                 :             {
    1635              28 :                 LeafNode* leaf = root.GetAsLeafNode();
    1636                 : 
    1637             532 :                 for(uint16_t s = 0; s < leaf->slots - 1; ++s)
    1638                 :                 {
    1639             504 :                     if (!m_key_less(leaf->keys[s], leaf->keys[s+1])) {
    1640               0 :                         m_istream = NULL;
    1641               0 :                         if (errortext) *errortext  = "Database not compatible with this reader: root keys order mismatches.";
    1642               0 :                         return false;
    1643                 :                     }
    1644                 :                 }
    1645                 :             }
    1646                 :             else
    1647                 :             {
    1648              41 :                 InnerNode* inner = root.GetAsInnerNode();
    1649                 : 
    1650            1203 :                 for(uint16_t s = 0; s < inner->slots - 1; ++s)
    1651                 :                 {
    1652            1164 :                     if (!m_key_less(inner->keys[s], inner->keys[s+1])) {
    1653               2 :                         m_istream = NULL;
    1654               2 :                         if (errortext) *errortext  = "Database not compatible with this reader: root keys order mismatches.";
    1655               2 :                         return false;
    1656                 :                     }
    1657                 :                 }
    1658                 :             }
    1659                 : 
    1660              67 :             return true;
    1661                 :         }
    1662                 : 
    1663                 :         /**
    1664                 :          * Close the opened database.
    1665                 :          */
    1666              65 :         void Close()
    1667                 :         {
    1668              65 :             if (m_istream)
    1669              65 :                 m_istream = NULL;
    1670              65 :         }
    1671                 : 
    1672                 :         /// Change the currently used page cache object
    1673              73 :         void SetPageCache(PageCache* newpagecache)
    1674                 :         {
    1675              73 :             m_pagecache = newpagecache;
    1676              73 :         }
    1677                 : 
    1678                 :         /**
    1679                 :          * Returns the number of items in the loaded database.
    1680                 :          */
    1681              67 :         uint32_t Size() const
    1682                 :         {
    1683              67 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    1684                 : 
    1685              67 :             return m_signature.items;
    1686                 :         }
    1687                 : 
    1688                 :         /**
    1689                 :          * Returns a const reference to the signature page of the currently
    1690                 :          * loaded database.
    1691                 :          */
    1692               0 :         const SignaturePage& GetSignature() const
    1693                 :         {
    1694               0 :             return m_signature;
    1695                 :         }
    1696                 : 
    1697                 :     protected:
    1698                 :         /**
    1699                 :          * Internal function to look down the B-tree and find a key. If found,
    1700                 :          * returns the offset and size of the corresponding value data area.
    1701                 :          */
    1702         9175496 :         bool FindKey(const key_type& key, uint64_t& outoffset, uint32_t& outsize)
    1703                 :         {
    1704         9175496 :             if (m_signature.btree_size == 0) return false;
    1705                 : 
    1706         9175496 :             BTreePage page = ReadIndexPage(0);
    1707                 : 
    1708         9175496 :             bool checklastkey = false;
    1709         9175496 :             key_type lastkey = key_type();
    1710                 : 
    1711        35648736 :             while( ! page.IsLeafNode() )
    1712                 :             {
    1713        17297744 :                 InnerNode* inner = page.GetAsInnerNode();
    1714                 : 
    1715        17297744 :                 CBTREEDB_CHECK(!checklastkey || !m_key_less(lastkey, inner->LastKey()),
    1716                 :                                "BTree corrupt (lastkey does not match).");
    1717                 : 
    1718        17297744 :                 int slot = BinarySearch(inner, key);
    1719                 : 
    1720        17297744 :                 uint32_t next = inner->childrenoffset;
    1721        17297744 :                 if (inner->level > 1)
    1722         8126504 :                     next += slot * sizeof(InnerNode);
    1723                 :                 else
    1724         9171240 :                     next += slot * sizeof(LeafNode);
    1725                 : 
    1726        17297744 :                 int oldlevel = inner->level;
    1727                 : 
    1728        17297744 :                 if (slot < inner->slots) {
    1729        17102632 :                     checklastkey = true;
    1730        17102632 :                     lastkey = inner->keys[slot];
    1731                 :                 }
    1732                 : 
    1733        17297744 :                 page = ReadIndexPage(next);
    1734                 : 
    1735        17297744 :                 CBTREEDB_CHECK(page.GetLevel() == oldlevel-1,
    1736                 :                                "BTree corrupt (level order mismatch).");
    1737                 :             }
    1738                 : 
    1739         9175496 :             LeafNode* leaf = page.GetAsLeafNode();
    1740                 : 
    1741         9175496 :             CBTREEDB_CHECK(!checklastkey || KeyEqual(leaf->LastKey(), lastkey),
    1742                 :                            "BTree corrupt (lastkey in leaf does not match).");
    1743                 : 
    1744         9175496 :             int slot = BinarySearch(leaf, key);
    1745                 : 
    1746         9175496 :             if (slot >= leaf->slots || KeyUnequal(leaf->keys[slot],  key))
    1747         4587748 :                 return false;
    1748                 : 
    1749                 :             // Return offset and size via pointers.
    1750                 : 
    1751         4587748 :             outoffset = leaf->baseoffset + leaf->offsets[slot];
    1752                 : 
    1753                 :             // figure out value size of this slot: compute it from the offsets
    1754         4587748 :             CBTREEDB_CHECK(leaf->offsets[slot] <= leaf->offsets[slot+1],
    1755                 :                            "BTree corrupt (offsets are not ascending).");
    1756                 : 
    1757         4587748 :             outsize = leaf->offsets[slot+1] - leaf->offsets[slot];
    1758                 : 
    1759         4587748 :             return true;
    1760                 :         }
    1761                 : 
    1762                 :     public:
    1763                 :         /**
    1764                 :          * Check if a key is in the constant database.
    1765                 :          *
    1766                 :          * @param key   key to lookup
    1767                 :          * @return      true if found.
    1768                 :          */
    1769         2293874 :         bool Exists(const key_type& key)
    1770                 :         {
    1771         2293874 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    1772                 : 
    1773                 :             uint64_t offset;
    1774                 :             uint32_t size;
    1775                 : 
    1776         2293874 :             return FindKey(key, offset, size);
    1777                 :         }
    1778                 : 
    1779                 :         /**
    1780                 :          * Find a key in the constant database. If found the corresponding
    1781                 :          * value is copied into the output buffer.
    1782                 :          *
    1783                 :          * @param key     key to lookup
    1784                 :          * @param outvalue buffer filled with the associated value if the key is found
    1785                 :          * @param maxsize maximum size of buffer
    1786                 :          * @return        true if found.
    1787                 :          */
    1788         2293874 :         bool Lookup(const key_type& key, void* outvalue, uint32_t maxsize)
    1789                 :         {
    1790         2293874 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    1791                 : 
    1792                 :             uint64_t offset;
    1793                 :             uint32_t size;
    1794                 : 
    1795         2293874 :             if (!FindKey(key, offset, size))
    1796         1146937 :                 return false;
    1797                 : 
    1798         1146937 :             uint32_t readsize = size;
    1799         1146937 :             if (readsize > maxsize) readsize = maxsize;
    1800                 : 
    1801         1146937 :             return ReadValueRange(offset, outvalue, readsize);
    1802                 :         }
    1803                 : 
    1804                 :         /**
    1805                 :          * Find a key in the constant database. If found the coresponding value
    1806                 :          * is copied into the output string buffer.
    1807                 :          *
    1808                 :          * @param key     key to lookup
    1809                 :          * @param outvalue string filled with the associated value if the key is found
    1810                 :          * @return        true if found.
    1811                 :          */
    1812         2293874 :         bool Lookup(const key_type& key, std::string& outvalue)
    1813                 :         {
    1814         2293874 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    1815                 : 
    1816                 :             uint64_t offset;
    1817                 :             uint32_t size;
    1818                 : 
    1819         2293874 :             if (!FindKey(key, offset, size))
    1820         1146937 :                 return false;
    1821                 : 
    1822         1146937 :             outvalue.resize(size);
    1823                 : 
    1824         1146937 :             return ReadValueRange(offset, const_cast<char*>(outvalue.data()), size);
    1825                 :         }
    1826                 : 
    1827                 :         /**
    1828                 :          * Find a key in the constant database. If found the corresponding
    1829                 :          * value is copied into the output string buffer. If the key does not
    1830                 :          * exist, an empty string is returned.
    1831                 :          *
    1832                 :          * @param key     key to lookup
    1833                 :          * @return        string containing the value
    1834                 :          */
    1835         2293874 :         std::string operator[](const key_type& key)
    1836                 :         {
    1837         2293874 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    1838                 : 
    1839                 :             uint64_t offset;
    1840                 :             uint32_t size;
    1841                 : 
    1842         2293874 :             if (!FindKey(key, offset, size))
    1843         1146937 :                 return std::string();
    1844                 : 
    1845         1146937 :             std::string outvalue;
    1846         1146937 :             outvalue.resize(size);
    1847                 : 
    1848         1146937 :             if (!ReadValueRange(offset, const_cast<char*>(outvalue.data()), size))
    1849               0 :                 return std::string();
    1850                 : 
    1851         1146937 :             return outvalue;
    1852                 :         }
    1853                 :         
    1854                 :     protected:
    1855                 :         /**
    1856                 :          * Internal function to look directly into the B-tree's leaf pages and
    1857                 :          * find a key by index. If found, returns the key, offset and size of
    1858                 :          * the corresponding value area.
    1859                 :          */
    1860         3440811 :         bool FindIndex(uint32_t index, key_type& outkey, uint64_t& outoffset, uint32_t& outsize)
    1861                 :         {
    1862         3440811 :             if (index >= m_signature.items) return false;
    1863                 : 
    1864                 :             // directly compute offset of leaf containing to the key index
    1865                 : 
    1866         3440811 :             uint32_t offset = index / LeafNodeNumKeys * BTreePageSize;
    1867         3440811 :             unsigned int slot = index % LeafNodeNumKeys;
    1868                 : 
    1869         3440811 :             BTreePage page = ReadIndexPage(m_signature.btree_firstleaf + offset);
    1870                 : 
    1871         3440811 :             CBTREEDB_CHECK(page.IsLeafNode(),
    1872                 :                            "BTree corrupt (expecting leaf node).");
    1873                 : 
    1874         3440811 :             LeafNode* leaf = page.GetAsLeafNode();
    1875                 : 
    1876         3440811 :             CBTREEDB_CHECK(slot < leaf->slots,
    1877                 :                            "BTree corrupt (index beyond range in leaf node).");
    1878                 : 
    1879                 :             // copy key and offset
    1880         3440811 :             outkey = leaf->keys[slot];
    1881         3440811 :             outoffset = leaf->baseoffset + leaf->offsets[slot];
    1882                 : 
    1883                 :             // figure out value size of this slot: compute it from the offsets
    1884         3440811 :             CBTREEDB_CHECK(leaf->offsets[slot] <= leaf->offsets[slot+1],
    1885                 :                            "BTree corrupt (offsets are not ascending).");
    1886                 : 
    1887         3440811 :             outsize = leaf->offsets[slot+1] - leaf->offsets[slot];
    1888                 : 
    1889         3440811 :             return true;
    1890                 :         }
    1891                 : 
    1892                 :     public:
    1893                 :         /**
    1894                 :          * Returns only the key by index. Looks directly into the leaf pages.
    1895                 :          *
    1896                 :          * @param index         zero-based index of item to retrieve
    1897                 :          * @param outkey        set to key of item
    1898                 :          * @return              size of associated value if found
    1899                 :          */
    1900         1146937 :         uint32_t GetIndex(uint32_t index, key_type& outkey)
    1901                 :         {
    1902         1146937 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    1903                 : 
    1904                 :             uint64_t offset;
    1905                 :             uint32_t size;
    1906                 : 
    1907         1146937 :             if (!FindIndex(index, outkey, offset, size))
    1908               0 :                 return 0;
    1909                 : 
    1910         1146937 :             return size;
    1911                 :         }
    1912                 : 
    1913                 :         /**
    1914                 :          * Return a key and associated value by index. Looks directly into the
    1915                 :          * leaf pages.
    1916                 :          *
    1917                 :          * @param index         zero-based index of item to retrieve
    1918                 :          * @param outkey        set to key of item
    1919                 :          * @param outvalue      buffer to hold data of value
    1920                 :          * @param maxsize       maximum size of buffer
    1921                 :          * @return              size of associated value
    1922                 :          */
    1923         1146937 :         uint32_t GetIndex(uint32_t index, key_type& outkey, void* outvalue, uint32_t maxsize)
    1924                 :         {
    1925         1146937 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    1926                 : 
    1927                 :             uint64_t offset;
    1928                 :             uint32_t size;
    1929                 : 
    1930         1146937 :             if (!FindIndex(index, outkey, offset, size))
    1931               0 :                 return 0;
    1932                 : 
    1933         1146937 :             uint32_t outsize = size;
    1934         1146937 :             if (outsize > maxsize) outsize = maxsize;
    1935                 : 
    1936         1146937 :             if (!ReadValueRange(offset, outvalue, outsize))
    1937               0 :                 return 0;
    1938                 : 
    1939         1146937 :             return size;
    1940                 :         }
    1941                 : 
    1942                 :         /**
    1943                 :          * Return a key and associated value by index. Looks directly into the
    1944                 :          * leaf pages.
    1945                 :          *
    1946                 :          * @param index         zero-based index of item to retrieve
    1947                 :          * @param outkey        set to key of item
    1948                 :          * @param outvalue      string to hold data of value
    1949                 :          * @return              size of associated value
    1950                 :          */
    1951         1146937 :         uint32_t GetIndex(uint32_t index, key_type& outkey, std::string& outvalue)
    1952                 :         {
    1953         1146937 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    1954                 : 
    1955                 :             uint64_t offset;
    1956                 :             uint32_t size;
    1957                 : 
    1958         1146937 :             if (!FindIndex(index, outkey, offset, size))
    1959               0 :                 return 0;
    1960                 : 
    1961         1146937 :             outvalue.resize(size);
    1962                 : 
    1963         1146937 :             if (!ReadValueRange(offset, const_cast<char*>(outvalue.data()), size))
    1964               0 :                 return 0;
    1965                 : 
    1966         1146937 :             return size;
    1967                 :         }
    1968                 : 
    1969                 :         /**
    1970                 :          * Verify all aspects of the loaded database.
    1971                 :          *
    1972                 :          * @return      true if database is ok.
    1973                 :          */
    1974              67 :         bool Verify()
    1975                 :         {
    1976              67 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    1977                 : 
    1978              67 :             if (!VerifyBTree()) return false;
    1979                 : 
    1980              67 :             if (!VerifyBTreeChecksum()) return false;
    1981                 : 
    1982              67 :             if (!VerifyValueChecksum()) return false;
    1983                 : 
    1984              67 :             return true;
    1985                 :         }
    1986                 : 
    1987                 :         /**
    1988                 :          * Verify B-tree structure in the loaded database.
    1989                 :          *
    1990                 :          * @return      true if database is ok.
    1991                 :          */
    1992             132 :         bool VerifyBTree()
    1993                 :         {
    1994             132 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    1995                 : 
    1996             132 :             if (m_signature.btree_size == 0) return true;
    1997                 : 
    1998             132 :             key_type minkey = key_type(), maxkey = key_type();
    1999             132 :             uint64_t lastoffset = 0;
    2000             132 :             return VerifyBTreeNode(0, &minkey, &maxkey, &lastoffset);
    2001                 :         }
    2002                 : 
    2003                 :     protected:
    2004                 :         /**
    2005                 :          * Internal function: Recursively verify B-tree structure.
    2006                 :          */
    2007           18604 :         bool VerifyBTreeNode(uint32_t offset, key_type* minkey, key_type* maxkey, uint64_t* lastoffset)
    2008                 :         {
    2009           18604 :             BTreePage page = ReadIndexPage(offset);
    2010                 : 
    2011           18604 :             if ( page.IsLeafNode() )
    2012                 :             {
    2013           18456 :                 LeafNode* leaf = page.GetAsLeafNode();
    2014                 : 
    2015           18456 :                 if (*lastoffset != leaf->baseoffset + leaf->offsets[0]) return false;
    2016                 : 
    2017         2313876 :                 for(uint16_t s = 0; s < leaf->slots - 1; ++s)
    2018                 :                 {
    2019         2295420 :                     if (!m_key_less(leaf->keys[s], leaf->keys[s+1])) return false;
    2020                 : 
    2021         2295420 :                     if (!(leaf->offsets[s] <= leaf->offsets[s+1])) return false;
    2022                 :                 }
    2023                 : 
    2024           18456 :                 *minkey = leaf->keys[0];
    2025           18456 :                 *maxkey = leaf->keys[leaf->slots - 1];
    2026           18456 :                 *lastoffset = leaf->baseoffset + leaf->offsets[leaf->slots];
    2027                 :             }
    2028                 :             else
    2029                 :             {
    2030             148 :                 InnerNode* inner = page.GetAsInnerNode();
    2031                 : 
    2032           18324 :                 for(uint16_t s = 0; s < inner->slots - 1; ++s)
    2033                 :                 {
    2034           18176 :                     if (!m_key_less(inner->keys[s], inner->keys[s+1])) return false;
    2035                 :                 }
    2036                 : 
    2037           18620 :                 for(uint16_t s = 0; s <= inner->slots; ++s)
    2038                 :                 {
    2039           18472 :                     uint32_t childoffset = inner->childrenoffset;
    2040                 : 
    2041           18472 :                     if (inner->level > 1)
    2042              72 :                         childoffset += s * sizeof(InnerNode);
    2043                 :                     else
    2044           18400 :                         childoffset += s * sizeof(LeafNode);
    2045                 : 
    2046           18472 :                     key_type subminkey = key_type(), submaxkey = key_type();
    2047                 : 
    2048           18472 :                     if (!VerifyBTreeNode(childoffset, &subminkey, &submaxkey, lastoffset)) return false;
    2049                 : 
    2050           18472 :                     if (s == 0)
    2051             148 :                         *minkey = subminkey;
    2052                 :                     else
    2053           18324 :                         if (!m_key_less(inner->keys[s-1], subminkey)) return false;
    2054                 : 
    2055           18472 :                     if (s == inner->slots)
    2056             148 :                         *maxkey = submaxkey;
    2057                 :                     else
    2058           18324 :                         if (!KeyEqual(inner->keys[s], submaxkey)) return false;
    2059                 :                 }
    2060                 :             }
    2061                 : 
    2062           18604 :             return true;
    2063                 :         }
    2064                 : 
    2065                 :     public:
    2066                 :         /**
    2067                 :          * Verify the SHA256 checksum of the B-tree pages in the loaded
    2068                 :          * database.
    2069                 :          *
    2070                 :          * @return      true if database is ok.
    2071                 :          */
    2072             132 :         bool VerifyBTreeChecksum()
    2073                 :         {
    2074             132 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    2075                 : 
    2076             132 :             SHA256 sha;
    2077                 : 
    2078           18736 :             for(uint32_t offset = 0; offset < m_signature.btree_size; offset += BTreePageSize)
    2079                 :             {
    2080           18604 :                 BTreePage page = ReadIndexPage(offset);
    2081                 : 
    2082           18604 :                 sha.update(page.GetBuffer(), BTreePageSize);
    2083                 :             }
    2084                 : 
    2085             132 :             return ( sha.final_equals(m_signature.btree_sha256) );
    2086                 :         }
    2087                 : 
    2088                 :         /**
    2089                 :          * Verify the SHA256 checksum of value data area in the loaded
    2090                 :          * database.
    2091                 :          *
    2092                 :          * @return      true if database is ok.
    2093                 :          */
    2094             132 :         bool VerifyValueChecksum()
    2095                 :         {
    2096             132 :             CBTREEDB_CHECK(m_istream, "No database loaded.");
    2097                 : 
    2098             132 :             SHA256 sha;
    2099                 : 
    2100                 :             char buffer[64*1024];
    2101                 : 
    2102             396 :             for(uint64_t offset = 0; offset < m_signature.value_size; offset += sizeof(buffer))
    2103                 :             {
    2104             264 :                 uint64_t remsize = std::min<uint64_t>(sizeof(buffer), m_signature.value_size - offset);
    2105                 : 
    2106             264 :                 if (!ReadValueRange(offset, buffer, remsize))
    2107               0 :                     return false;
    2108                 : 
    2109             264 :                 sha.update(buffer, remsize);
    2110                 :             }
    2111                 : 
    2112             132 :             return( sha.final_equals(m_signature.value_sha256) );
    2113                 :         }
    2114                 :     };
    2115                 : 
    2116                 : public:
    2117                 :     /**
    2118                 :      * @brief Class used to read constant B-tree database files.
    2119                 :      *
    2120                 :      * This is a reference counted front-end to ReaderImpl.
    2121                 :      *
    2122                 :      * Refer to \ref sec_architecture and \ref sec_example on how to use this class.
    2123                 :      */
    2124                 :     class Reader
    2125                 :     {
    2126                 :     protected:
    2127                 : 
    2128                 :         /// pointer to implementation class.
    2129                 :         ReaderImpl*     m_impl;
    2130                 : 
    2131                 :     public:
    2132                 :         /// Create new reader, which is initially set to closed state.
    2133              73 :         Reader(const key_compare& key_less=key_compare())
    2134              73 :             : m_impl(new ReaderImpl(key_less))
    2135                 :         {
    2136              73 :             m_impl->RefInc();
    2137              73 :         }
    2138                 : 
    2139                 :         /// Copy Constructor: increment reference counter on base object.
    2140              65 :         Reader(const Reader& rd)
    2141              65 :             : m_impl(rd.m_impl)
    2142                 :         {
    2143              65 :             m_impl->RefInc();
    2144              65 :         }
    2145                 : 
    2146                 :         /// Destructor: decrement reference counter on buffer and possibly
    2147                 :         /// deallocate it.
    2148             138 :         ~Reader()
    2149                 :         {
    2150             138 :             if (m_impl->RefDec() == 0)
    2151              73 :                 delete m_impl;
    2152             138 :         }
    2153                 : 
    2154                 :         /// Assignment Operator: increment reference counter on base object.
    2155               0 :         Reader& operator=(const Reader& rd)
    2156                 :         {
    2157               0 :             if (this != &rd)
    2158                 :             {
    2159               0 :                 if (m_impl->RefDec() == 0)
    2160               0 :                     delete m_impl;
    2161                 : 
    2162               0 :                 m_impl = rd.m_impl;
    2163               0 :                 m_impl->RefInc();
    2164                 :             }
    2165                 : 
    2166               0 :             return *this;
    2167                 :         }
    2168                 : 
    2169                 :         /**
    2170                 :          * Change the database signature (first 8 bytes) from 'cbtreedb' to a
    2171                 :          * custom string. The signature is always 8 bytes long. Longer strings
    2172                 :          * are truncated, shorter ones padded with nulls.
    2173                 :          */
    2174              73 :         void SetSignature(const char* newsignature)
    2175                 :         {
    2176              73 :             return m_impl->SetSignature(newsignature);
    2177                 :         }
    2178                 : 
    2179                 :         /**
    2180                 :          * Attempt to open a cbtreedb database file. Reads and verifies the
    2181                 :          * signature and initializes the reader. Note that this function does
    2182                 :          * not through an exception if the file could not be loaded! The
    2183                 :          * istream object must exist as long as the Reader is used.
    2184                 :          *
    2185                 :          * @param file          database file input stream to attach.
    2186                 :          * @param errortext     in case of error, set to an informative text.
    2187                 :          * @return              true if loaded and verified correctly.
    2188                 :          */
    2189              73 :         bool Open(std::istream& file, std::string* errortext = NULL)
    2190                 :         {
    2191              73 :             return m_impl->Open(file, errortext);
    2192                 :         }
    2193                 : 
    2194                 :         /**
    2195                 :          * Close the opened database.
    2196                 :          */
    2197              65 :         void Close()
    2198                 :         {
    2199              65 :             return m_impl->Close();
    2200                 :         }
    2201                 : 
    2202                 :         /// Change the currently used page cache object
    2203              73 :         void SetPageCache(PageCache* newpagecache)
    2204                 :         {
    2205              73 :             return m_impl->SetPageCache(newpagecache);
    2206                 :         }
    2207                 : 
    2208                 :         /**
    2209                 :          * Returns the number of items in the loaded database.
    2210                 :          */
    2211              67 :         uint32_t Size() const
    2212                 :         {
    2213              67 :             return m_impl->Size();
    2214                 :         }
    2215                 : 
    2216                 :         /**
    2217                 :          * Returns a const reference to the signature page of the currently
    2218                 :          * loaded database.
    2219                 :          */
    2220               0 :         const SignaturePage& GetSignature() const
    2221                 :         {
    2222               0 :             return m_impl->GetSignature();
    2223                 :         }
    2224                 : 
    2225                 :         /**
    2226                 :          * Check if a key is in the constant database.
    2227                 :          *
    2228                 :          * @param key   key to lookup
    2229                 :          * @return      true if found.
    2230                 :          */
    2231         2293874 :         bool Exists(const key_type& key)
    2232                 :         {
    2233         2293874 :             return m_impl->Exists(key);
    2234                 :         }
    2235                 : 
    2236                 :         /**
    2237                 :          * Find a key in the constant database. If found the corresponding
    2238                 :          * value is copied into the output buffer.
    2239                 :          *
    2240                 :          * @param key     key to lookup
    2241                 :          * @param outvalue buffer filled with the associated value if the key is found
    2242                 :          * @param maxsize maximum size of buffer
    2243                 :          * @return        true if found.
    2244                 :          */
    2245         2293874 :         bool Lookup(const key_type& key, void* outvalue, uint32_t maxsize)
    2246                 :         {
    2247         2293874 :             return m_impl->Lookup(key, outvalue, maxsize);
    2248                 :         }
    2249                 : 
    2250                 :         /**
    2251                 :          * Find a key in the constant database. If found the coresponding value
    2252                 :          * is copied into the output string buffer.
    2253                 :          *
    2254                 :          * @param key     key to lookup
    2255                 :          * @param outvalue string filled with the associated value if the key is found
    2256                 :          * @return        true if found.
    2257                 :          */
    2258         2293874 :         bool Lookup(const key_type& key, std::string& outvalue)
    2259                 :         {
    2260         2293874 :             return m_impl->Lookup(key, outvalue);
    2261                 :         }
    2262                 : 
    2263                 :         /**
    2264                 :          * Find a key in the constant database. If found the corresponding
    2265                 :          * value is copied into the output string buffer. If the key does not
    2266                 :          * exist, an empty string is returned.
    2267                 :          *
    2268                 :          * @param key     key to lookup
    2269                 :          * @return        string containing the value
    2270                 :          */
    2271         2293874 :         std::string operator[](const key_type& key)
    2272                 :         {
    2273         2293874 :             return (*m_impl)[key];
    2274                 :         }
    2275                 : 
    2276                 :         /**
    2277                 :          * Returns only the key by index. Looks directly into the leaf pages.
    2278                 :          *
    2279                 :          * @param index         zero-based index of item to retrieve
    2280                 :          * @param outkey        set to key of item
    2281                 :          * @return              size of key's data
    2282                 :          */
    2283         1146937 :         uint32_t GetIndex(uint32_t index, key_type& outkey)
    2284                 :         {
    2285         1146937 :             return m_impl->GetIndex(index, outkey);
    2286                 :         }
    2287                 : 
    2288                 :         /**
    2289                 :          * Return a key and associated value by index. Looks directly into the
    2290                 :          * leaf pages.
    2291                 :          *
    2292                 :          * @param index         zero-based index of item to retrieve
    2293                 :          * @param outkey        set to key of item
    2294                 :          * @param outvalue      buffer to hold data of value
    2295                 :          * @param maxsize       maximum size of buffer
    2296                 :          * @return              size of associated value
    2297                 :          */
    2298         1146937 :         uint32_t GetIndex(uint32_t index, key_type& outkey, void* outvalue, uint32_t maxsize)
    2299                 :         {
    2300         1146937 :             return m_impl->GetIndex(index, outkey, outvalue, maxsize);
    2301                 :         }
    2302                 : 
    2303                 :         /**
    2304                 :          * Return a key and associated value by index. Looks directly into the
    2305                 :          * leaf pages.
    2306                 :          *
    2307                 :          * @param index         zero-based index of item to retrieve
    2308                 :          * @param outkey        set to key of item
    2309                 :          * @param outvalue      string to hold data of value
    2310                 :          * @return              size of associated value
    2311                 :          */
    2312         1146937 :         uint32_t GetIndex(uint32_t index, key_type& outkey, std::string& outvalue)
    2313                 :         {
    2314         1146937 :             return m_impl->GetIndex(index, outkey, outvalue);
    2315                 :         }
    2316                 : 
    2317                 :         /**
    2318                 :          * Verify all aspects of the loaded database.
    2319                 :          *
    2320                 :          * @return      true if database is ok.
    2321                 :          */
    2322              67 :         bool Verify()
    2323                 :         {
    2324              67 :             return m_impl->Verify();
    2325                 :         }
    2326                 : 
    2327                 :         /**
    2328                 :          * Verify B-tree structure in the loaded database.
    2329                 :          *
    2330                 :          * @return      true if database is ok.
    2331                 :          */
    2332              65 :         bool VerifyBTree()
    2333                 :         {
    2334              65 :             return m_impl->VerifyBTree();
    2335                 :         }
    2336                 : 
    2337                 :         /**
    2338                 :          * Verify the SHA256 checksum of the B-tree pages in the loaded
    2339                 :          * database.
    2340                 :          *
    2341                 :          * @return      true if database is ok.
    2342                 :          */
    2343              65 :         bool VerifyBTreeChecksum()
    2344                 :         {
    2345              65 :             return m_impl->VerifyBTreeChecksum();
    2346                 :         }
    2347                 : 
    2348                 :         /**
    2349                 :          * Verify the SHA256 checksum of value data area in the loaded
    2350                 :          * database.
    2351                 :          *
    2352                 :          * @return      true if database is ok.
    2353                 :          */
    2354              65 :         bool VerifyValueChecksum()
    2355                 :         {
    2356              65 :             return m_impl->VerifyValueChecksum();
    2357                 :         }
    2358                 :     };
    2359                 : 
    2360                 : protected:
    2361                 : 
    2362                 :     /**
    2363                 :      * @brief BTreeBuilder is used to construct an index very similar to a
    2364                 :      * B-tree from an ordered sequence.
    2365                 :      *
    2366                 :      * The tree builder class is fed with an ordered sequence of keys together
    2367                 :      * with their value size and offset. The information is stored into the
    2368                 :      * nodes of the tree in memory.
    2369                 :      */
    2370                 :     class BTreeBuilder
    2371              69 :     {
    2372                 :     protected:
    2373                 : 
    2374                 :         /// key comparison functional
    2375                 :         key_compare     m_key_less;
    2376                 : 
    2377                 :         /// total number of items stored in the tree
    2378                 :         unsigned int    m_size;
    2379                 : 
    2380                 :         /// leaves of the B-tree
    2381                 :         std::vector<LeafNode> m_leaves;
    2382                 : 
    2383                 :         /// typedef of each inner level of the B-tree
    2384                 :         typedef std::vector<InnerNode> innerlevel_type;
    2385                 : 
    2386                 :         /// vector holding a list of b-tree inner levels. Each level contains a
    2387                 :         /// list of inner nodes.
    2388                 :         std::vector<innerlevel_type> m_inners;
    2389                 : 
    2390                 :     public:
    2391                 :         /// Construct a new empty tree builder.
    2392              69 :         BTreeBuilder(const key_compare& key_less)
    2393              69 :             : m_key_less(key_less), m_size(0)
    2394               0 :         {
    2395              69 :         }
    2396                 : 
    2397                 :     protected:
    2398                 :         /// Add a key value in an inner node at given level. Called by Add when
    2399                 :         /// a leaf node overflows.
    2400            9400 :         void AddInner(uint16_t level, const key_type& key)
    2401                 :         {
    2402            9400 :             if (m_inners.size() < level)
    2403                 :             {
    2404              46 :                 CBTREEDB_ASSERT(m_inners.size()+1 == level);
    2405                 : 
    2406                 :                 // Create vector for this level
    2407              46 :                 m_inners.push_back(innerlevel_type());
    2408                 :             }
    2409                 : 
    2410            9400 :             if (m_inners[level-1].size() == 0)
    2411                 :             {
    2412                 :                 // Create first inner node on this level
    2413              46 :                 m_inners[level-1].push_back(InnerNode(level));
    2414                 :             }
    2415                 : 
    2416            9400 :             InnerNode* inner = &m_inners[level-1].back();
    2417                 : 
    2418            9400 :             CBTREEDB_ASSERT(inner->slots == 0 || m_key_less(inner->LastKey(), key));
    2419                 : 
    2420            9400 :             if (inner->IsFull())
    2421                 :             {
    2422              31 :                 CBTREEDB_ASSERT(m_key_less(inner->LastKey(), key));
    2423                 : 
    2424                 :                 // Put last key of leaf key into inner node(s) of higher level
    2425              31 :                 AddInner(inner->level + 1, key);
    2426                 : 
    2427                 :                 // Create new inner node
    2428              31 :                 m_inners[level-1].push_back(InnerNode(level));
    2429              31 :                 inner = &m_inners[level-1].back();
    2430                 :             }
    2431                 :             else
    2432                 :             {
    2433                 :                 // Append Key
    2434            9369 :                 inner->keys[inner->slots++] = key;
    2435                 :             }
    2436            9400 :         }
    2437                 : 
    2438                 :     public:
    2439                 :         /**
    2440                 :          * Insert a new key into the tree together with (offset,size) of the
    2441                 :          * associated data value. The keys must be delivered to this function
    2442                 :          * in ascending order!
    2443                 :          */
    2444         1186941 :         void Add(const key_type& key, uint64_t offset, uint32_t size)
    2445                 :         {
    2446         1186941 :             if (m_leaves.size() == 0) {
    2447                 :                 // create first leaf node
    2448              69 :                 m_leaves.push_back(LeafNode());
    2449                 :             }
    2450                 : 
    2451         1186941 :             LeafNode* leaf = &m_leaves.back();
    2452                 : 
    2453         1186941 :             if (leaf->slots > 0) {
    2454         1186872 :                 CBTREEDB_ASSERT(m_key_less(leaf->LastKey(), key));
    2455                 :             }
    2456                 : 
    2457         1186941 :             if (leaf->IsFull())
    2458                 :             {
    2459                 :                 // put last key into inner node(s)
    2460            9369 :                 AddInner(1, leaf->LastKey());
    2461                 : 
    2462                 :                 // create new leaf
    2463            9369 :                 m_leaves.push_back(LeafNode());
    2464            9369 :                 leaf = &m_leaves.back();
    2465            9369 :                 leaf->baseoffset = offset;
    2466                 :             }
    2467                 : 
    2468                 :             // append key + value items relative offset
    2469         1186941 :             leaf->keys[leaf->slots] = key;
    2470                 : 
    2471         1186941 :             CBTREEDB_ASSERT(offset >= leaf->baseoffset);
    2472         1186941 :             leaf->offsets[leaf->slots] = offset - leaf->baseoffset;
    2473         1186941 :             leaf->offsets[leaf->slots+1] = leaf->offsets[leaf->slots] + size;
    2474                 : 
    2475         1186941 :             ++leaf->slots;
    2476         1186941 :             ++m_size;
    2477         1186941 :         }
    2478                 : 
    2479                 :         /// Returns the number of items added to the tree.
    2480          171222 :         unsigned int Size() const
    2481                 :         {
    2482          171222 :             return m_size;
    2483                 :         }
    2484                 : 
    2485                 :         /// Return highest key currently inserted in the tree
    2486           85532 :         const key_type& GetLastKey() const
    2487                 :         {
    2488           85532 :             CBTREEDB_CHECK(m_size > 0, "No keys inserted in the tree yet");
    2489                 : 
    2490           85532 :             return m_leaves.back().LastKey();
    2491                 :         }
    2492                 : 
    2493                 :         /// Return key previously inserted at given index
    2494           85564 :         void GetIndex(unsigned int index, key_type& outkey, uint32_t& outsize) const
    2495                 :         {
    2496           85564 :             CBTREEDB_CHECK(index < m_size, "Attempting to retrieve out of bounds index.");
    2497                 : 
    2498           85564 :             unsigned int leafnum = index / LeafNodeNumKeys;
    2499           85564 :             unsigned int slot = index % LeafNodeNumKeys;
    2500                 : 
    2501           85564 :             CBTREEDB_ASSERT(leafnum < m_leaves.size());
    2502                 : 
    2503           85564 :             const LeafNode* leaf = &m_leaves[leafnum];
    2504                 : 
    2505           85564 :             CBTREEDB_ASSERT(slot < leaf->slots);
    2506                 : 
    2507           85564 :             outkey = leaf->keys[slot];
    2508           85564 :             outsize = leaf->offsets[slot+1] - leaf->offsets[slot];
    2509           85564 :         }
    2510                 : 
    2511                 : #if 0 // extra debug code for printing the tree
    2512                 : 
    2513                 :         /**
    2514                 :          * Debugging function which prints out all keys in the currently
    2515                 :          * constructed B-tree.
    2516                 :          */
    2517                 :         void Print(std::ostream& os) const
    2518                 :         {
    2519                 :             os << "Leaves:" << std::endl;
    2520                 :             for (unsigned int i = 0; i < m_leaves.size(); ++i)
    2521                 :             {
    2522                 :                 os << i << ":";
    2523                 :                 for (unsigned int j = 0; j < m_leaves[i].slots; ++j)
    2524                 :                 {
    2525                 :                     os << " " << m_leaves[i].keys[j];
    2526                 :                 }
    2527                 :                 os << std::endl;
    2528                 :             }
    2529                 : 
    2530                 :             for (unsigned int l = 0; l < m_inners.size(); ++l)
    2531                 :             {
    2532                 :                 os << "Level " << (l+1) << std::endl;
    2533                 : 
    2534                 :                 for (unsigned int i = 0; i < m_inners[l].size(); ++i)
    2535                 :                 {
    2536                 :                     os << i << ":";
    2537                 :                     for (unsigned int j = 0; j < m_inners[l][i].slots; ++j)
    2538                 :                     {
    2539                 :                         os << " " << m_inners[l][i].keys[j];
    2540                 :                     }
    2541                 :                     os << std::endl;
    2542                 :                 }
    2543                 :             }
    2544                 :         }
    2545                 : 
    2546                 : #endif // extra debug code for printing the tree
    2547                 : 
    2548                 :         /**
    2549                 :          * Write function which outputs the constructed B-tree to a stream. The
    2550                 :          * levels are outputted from root to leaf nodes in order. Updates the
    2551                 :          * given signature page with B-tree information.
    2552                 :          */
    2553              69 :         void Write(std::ostream& os, SignaturePage& signature)
    2554                 :         {
    2555              69 :             signature.btree_pagesize = BTreePageSize;
    2556              69 :             signature.btree_levels = m_inners.size() + 1;
    2557              69 :             signature.btree_leaves = m_leaves.size();
    2558                 : 
    2559                 :             // Fill in childrenoffset field by precomputing the offsets.
    2560                 :             {
    2561                 :                 // start with offset after the root (inner) node.
    2562              69 :                 uint32_t offset = sizeof(InnerNode);
    2563                 : 
    2564             115 :                 for (int l = m_inners.size()-1; l >= 0; --l)
    2565                 :                 {
    2566             123 :                     for (unsigned int i = 0; i < m_inners[l].size(); ++i)
    2567                 :                     {
    2568              77 :                         InnerNode& inner = m_inners[l][i];
    2569                 : 
    2570              77 :                         inner.childrenoffset = offset;
    2571                 : 
    2572                 :                         // add all children node sizes to offset.
    2573              77 :                         offset += (inner.slots + 1) * sizeof(InnerNode);
    2574                 :                     }
    2575                 :                 }
    2576                 :             }
    2577                 : 
    2578                 :             // Write out inner nodes
    2579                 : 
    2580              69 :             uint64_t writesize = 0;
    2581              69 :             SHA256 sha;
    2582                 : 
    2583             115 :             for (int l = m_inners.size()-1; l >= 0; --l)
    2584                 :             {
    2585             123 :                 for (unsigned int i = 0; i < m_inners[l].size(); ++i)
    2586                 :                 {
    2587              77 :                     os.write(reinterpret_cast<char*>(&m_inners[l][i]), sizeof(m_inners[l][i]));
    2588              77 :                     CBTREEDB_CHECK(os.good(), "Error writing B-tree inner node page to output stream.");
    2589                 : 
    2590              77 :                     sha.update(&m_inners[l][i], sizeof(m_inners[l][i]));
    2591                 : 
    2592              77 :                     writesize += sizeof(m_inners[l][i]);
    2593                 :                 }
    2594                 :             }
    2595                 : 
    2596                 :             // Write out leaf nodes
    2597                 : 
    2598              69 :             signature.btree_firstleaf = writesize;
    2599                 : 
    2600            9507 :             for (unsigned int i = 0; i < m_leaves.size(); ++i)
    2601                 :             {
    2602            9438 :                 os.write(reinterpret_cast<char*>(&m_leaves[i]), sizeof(m_leaves[i]));
    2603            9438 :                 CBTREEDB_CHECK(os.good(), "Error writing B-tree leaf node page to output stream.");
    2604                 : 
    2605            9438 :                 sha.update(&m_leaves[i], sizeof(m_leaves[i]));
    2606                 : 
    2607            9438 :                 writesize += sizeof(m_leaves[i]);
    2608                 :             }
    2609                 : 
    2610              69 :             sha.final(signature.btree_sha256);
    2611              69 :             signature.btree_size = writesize;
    2612              69 :         }
    2613                 :     };
    2614                 : 
    2615                 : public:
    2616                 :     /**
    2617                 :      * @brief Writer is used to construct an constant B-tree database from an
    2618                 :      * unsorted input sequence.
    2619                 :      *
    2620                 :      * The writer class is fed with a possibly unordered sequence of keys
    2621                 :      * together with their data. The complete data is buffered (and sorted) by
    2622                 :      * the class! This means it will use a lot of virtual memory, so make sure
    2623                 :      * your swap is large enough or use WriterSequential.
    2624                 :      */
    2625                 :     class Writer
    2626              37 :     {
    2627                 :     protected:
    2628                 :         /// Typedef key -> data mapping
    2629                 :         typedef std::map<key_type, std::string, key_compare> datamap_type;
    2630                 : 
    2631                 :         /// STL map to store all key -> values.
    2632                 :         datamap_type    m_datamap;
    2633                 : 
    2634                 :         /// key comparison functional
    2635                 :         key_compare     m_key_less;
    2636                 : 
    2637                 :         /// Signature characters to begin file with.
    2638                 :         char            m_signaturestr[8];
    2639                 : 
    2640                 :     public:
    2641                 :         /// Constructor
    2642              37 :         Writer(const key_compare& key_less=key_compare())
    2643                 :             : m_datamap(key_less),
    2644              37 :               m_key_less(key_less)
    2645                 :         {
    2646              37 :             memcpy(m_signaturestr, "cbtreedb", 8);
    2647              37 :         }
    2648                 : 
    2649                 :         /// Add a new key -> values mapping to the database.
    2650         1101377 :         void Add(const key_type& key, const void* data, size_t size)
    2651                 :         {
    2652         1101377 :             m_datamap.insert( typename datamap_type::value_type(key, std::string(reinterpret_cast<const char*>(data), size)) );
    2653         1101377 :         }
    2654                 : 
    2655                 :         /// Add a new key -> values mapping to the database.
    2656               0 :         void Add(const key_type& key, const std::string& data)
    2657                 :         {
    2658               0 :             Add(key, data.data(), data.size());
    2659               0 :         }
    2660                 : 
    2661                 :         /// Return number of items inserted into mapping
    2662              35 :         size_t Size() const
    2663                 :         {
    2664              35 :             return m_datamap.size();
    2665                 :         }
    2666                 : 
    2667                 :         /**
    2668                 :          * Change the database signature from 'cbtreedb' to a custom
    2669                 :          * string. The signature is always 8 bytes long. Longer strings are
    2670                 :          * truncated, shorter ones padded with nulls.
    2671                 :          */
    2672              37 :         void SetSignature(const char* newsignature)
    2673                 :         {
    2674              37 :             unsigned int i = 0;
    2675             333 :             for(; i < 8 && newsignature[i]; ++i)
    2676             296 :                 m_signaturestr[i] = newsignature[i];
    2677                 : 
    2678              37 :             for(; i < 8; ++i)
    2679               0 :                 m_signaturestr[i] = 0;
    2680              37 :         }
    2681                 : 
    2682                 :         /// Write the complete database out to a file. Because the stream must
    2683                 :         /// be seekable, a simple ostream will not suffice.
    2684              37 :         void Write(std::ostream& os) const
    2685                 :         {
    2686              37 :             os.clear();
    2687              37 :             os.seekp(0, std::ios::beg);
    2688                 : 
    2689                 :             // Write zeroed signature block to be overwritten when the file is
    2690                 :             // finialized.
    2691                 : 
    2692                 :             SignaturePage signature;
    2693              37 :             memset(&signature, 0, sizeof(signature));
    2694              37 :             os.write(reinterpret_cast<char*>(&signature), sizeof(signature));
    2695                 : 
    2696                 :             char signature_padding[SignaturePageSize - sizeof(SignaturePage)];
    2697              37 :             memset(signature_padding, 0, SignaturePageSize - sizeof(SignaturePage));
    2698              37 :             os.write(signature_padding, SignaturePageSize - sizeof(SignaturePage));
    2699                 : 
    2700              37 :             CBTREEDB_CHECK(os.good(), "Error writing signature block out output stream.");
    2701                 : 
    2702                 :             // Prepare signature for data
    2703                 : 
    2704              37 :             memcpy(signature.signature, m_signaturestr, 8);
    2705              37 :             signature.version = 0x00010000;
    2706              37 :             signature.app_version_id = AppVersionId;
    2707              37 :             signature.items = m_datamap.size();
    2708              37 :             signature.key_size = sizeof(key_type);
    2709                 : 
    2710                 :             // Construct a and write a constant B-Tree
    2711                 : 
    2712              37 :             BTreeBuilder btree(m_key_less);
    2713              37 :             uint64_t dataoffset = 0;
    2714                 : 
    2715         1101414 :             for (typename datamap_type::const_iterator di = m_datamap.begin();
    2716                 :                  di != m_datamap.end(); ++di)
    2717                 :             {
    2718         1101377 :                 btree.Add(di->first, dataoffset, di->second.size());
    2719         1101377 :                 dataoffset += di->second.size();
    2720                 :             }
    2721                 : 
    2722              37 :             signature.btree_offset = BTreePageSize;
    2723              37 :             btree.Write(os, signature);
    2724                 : 
    2725              37 :             CBTREEDB_CHECK(os.good(), "Error writing B-tree pages to output stream.");
    2726              37 :             CBTREEDB_ASSERT(os.tellp() == std::ostream::pos_type(SignaturePageSize + signature.btree_size));
    2727                 : 
    2728                 :             // Write all value blobs to file
    2729                 : 
    2730              37 :             SHA256 sha;
    2731                 : 
    2732         1101414 :             for (typename datamap_type::const_iterator di = m_datamap.begin();
    2733                 :                  di != m_datamap.end(); ++di)
    2734                 :             {
    2735         1101377 :                 os.write(di->second.data(), di->second.size());
    2736         1101377 :                 CBTREEDB_CHECK(os.good(), "Error writing data block to output stream.");
    2737                 : 
    2738         1101377 :                 sha.update(di->second.data(), di->second.size());
    2739                 :             }
    2740                 : 
    2741              37 :             CBTREEDB_ASSERT(os.tellp() == std::ostream::pos_type(SignaturePageSize + signature.btree_size + dataoffset));
    2742                 : 
    2743                 :             // Fill in signature page
    2744                 : 
    2745              37 :             signature.value_offset = SignaturePageSize + signature.btree_size;
    2746              37 :             signature.value_size = dataoffset;
    2747              37 :             sha.final(signature.value_sha256);
    2748                 : 
    2749                 :             // Calculate header checksum
    2750                 : 
    2751              37 :             signature.header_crc32 = CRC32::digest(reinterpret_cast<char*>(&signature)+16, sizeof(signature)-16);
    2752                 : 
    2753              37 :             os.seekp(0, std::ios::beg);
    2754              37 :             CBTREEDB_CHECK(os.good(), "Error seeking back to signature page in output stream.");
    2755              37 :             CBTREEDB_ASSERT(os.tellp() == std::ostream::pos_type(0));
    2756                 : 
    2757              37 :             os.write(reinterpret_cast<char*>(&signature), sizeof(signature));
    2758              37 :             CBTREEDB_CHECK(os.good(), "Error writing signature page to output stream.");
    2759              37 :         }
    2760                 :     };
    2761                 : 
    2762                 : public:
    2763                 :     /**
    2764                 :      * @brief WriterSequential is used to construct a constant B-tree database
    2765                 :      * from an _ordered_ input sequence.
    2766                 :      *
    2767                 :      * The writer class is fed in two phases. In phase one the ordered sequence
    2768                 :      * of keys together with their value data size (without contents) is
    2769                 :      * delivered to the class via the Add() function. Phase two is started by
    2770                 :      * calling WriteHeader() followed by a sequence to WriteValue() calls for
    2771                 :      * each of the predeclared key-value pairs. The value data is written
    2772                 :      * directly to the file and not buffered. The write loop is terminated by
    2773                 :      * WriteFinalize(), which finalizes the database file.
    2774                 :      */
    2775                 :     class WriterSequential
    2776              32 :     {
    2777                 :     protected:
    2778                 :         /// key comparison functional
    2779                 :         key_compare     m_key_less;
    2780                 : 
    2781                 :         /// phase 1: b-tree object built from sequential predeclared sequence
    2782                 :         BTreeBuilder    m_btree;
    2783                 : 
    2784                 :         /// phase 1: value data offset counter for predeclared sequence
    2785                 :         uint64_t        m_dataoffset;
    2786                 : 
    2787                 :         /// signature characters to begin file with
    2788                 :         char            m_signaturestr[8];
    2789                 : 
    2790                 :         /// signature page of current written file
    2791                 :         SignaturePage   m_signature;
    2792                 : 
    2793                 :         /// phase 2: output stream
    2794                 :         std::ostream*   m_ostream;
    2795                 : 
    2796                 :         /// phase 2: current position in array
    2797                 :         uint32_t        m_currpos;
    2798                 : 
    2799                 :         /// phase 2: current offset in output stream
    2800                 :         uint64_t        m_curroffset;
    2801                 : 
    2802                 :         /// phase 2: running digest of the value area
    2803                 :         SHA256          m_datasha;
    2804                 : 
    2805                 :     public:
    2806                 :         /// Constructor
    2807              32 :         WriterSequential(const key_compare& key_less=key_compare())
    2808                 :             : m_key_less(key_less),
    2809                 :               m_btree(key_less),
    2810                 :               m_dataoffset(0),
    2811                 :               m_ostream(NULL),
    2812              32 :               m_currpos(-1)
    2813                 :         {
    2814              32 :             memcpy(m_signaturestr, "cbtreedb", 8);
    2815              32 :         }
    2816                 : 
    2817                 :         /// Add a new key -> value size mapping to the database. The keys must
    2818                 :         /// be added in ascending order.
    2819           85564 :         void Add(const key_type& key, uint32_t size)
    2820                 :         {
    2821           85564 :             CBTREEDB_CHECK(m_btree.Size() == 0 || m_key_less(m_btree.GetLastKey(), key),
    2822                 :                            "Key sequence for Add() must be ascending.");
    2823           85564 :             CBTREEDB_CHECK(m_ostream == NULL,
    2824                 :                            "Cannot declare keys after starting phase 2.");
    2825                 : 
    2826           85564 :             m_btree.Add(key, m_dataoffset, size);
    2827           85564 :             m_dataoffset += size;
    2828           85564 :         }
    2829                 : 
    2830                 :         /// Return number of pairs inserted into mapping
    2831              30 :         size_t Size() const
    2832                 :         {
    2833              30 :             return m_btree.Size();
    2834                 :         }
    2835                 : 
    2836                 :         /**
    2837                 :          * Change the database signature from 'cbtreedb' to a custom
    2838                 :          * string. The signature is always 8 bytes long. Longer strings are
    2839                 :          * truncated, shorter ones padded with nulls.
    2840                 :          */
    2841              32 :         void SetSignature(const char* newsignature)
    2842                 :         {
    2843              32 :             unsigned int i = 0;
    2844             288 :             for(; i < 8 && newsignature[i]; ++i)
    2845             256 :                 m_signaturestr[i] = newsignature[i];
    2846                 : 
    2847              32 :             for(; i < 8; ++i)
    2848               0 :                 m_signaturestr[i] = 0;
    2849              32 :         }
    2850                 : 
    2851                 :         /// Write header and b-tree to file stream. Starts Phase 2.
    2852              32 :         void WriteHeader(std::ostream& os)
    2853                 :         {
    2854              32 :             CBTREEDB_CHECK(m_ostream == NULL,
    2855                 :                            "Cannot write header again in phase 2.");
    2856                 : 
    2857              32 :             os.seekp(0, std::ios::beg);
    2858                 : 
    2859                 :             // write zeroed signature page to be overwritten when the file is
    2860                 :             // finialized.
    2861              32 :             memset(&m_signature, 0, sizeof(m_signature));
    2862              32 :             os.write(reinterpret_cast<char*>(&m_signature), sizeof(m_signature));
    2863                 : 
    2864                 :             char signature_padding[SignaturePageSize - sizeof(SignaturePage)];
    2865              32 :             memset(signature_padding, 0, SignaturePageSize - sizeof(SignaturePage));
    2866              32 :             os.write(signature_padding, SignaturePageSize - sizeof(SignaturePage));
    2867                 : 
    2868              32 :             CBTREEDB_CHECK(os.good(), "Error writing signature page to output stream.");
    2869                 : 
    2870                 :             // prepare signature for data
    2871                 : 
    2872              32 :             memcpy(m_signature.signature, m_signaturestr, 8);
    2873              32 :             m_signature.version = 0x00010000;
    2874              32 :             m_signature.app_version_id = AppVersionId;
    2875              32 :             m_signature.btree_offset = SignaturePageSize;
    2876              32 :             m_signature.items = m_btree.Size();
    2877              32 :             m_signature.key_size = sizeof(key_type);
    2878              32 :             m_signature.value_size = m_dataoffset;
    2879                 : 
    2880                 :             // write constant B-tree
    2881                 : 
    2882              32 :             m_btree.Write(os, m_signature);
    2883              32 :             CBTREEDB_CHECK(os.good(), "Error writing B-tree pages to output stream.");
    2884              32 :             CBTREEDB_ASSERT(os.tellp() == std::ostream::pos_type(SignaturePageSize + m_signature.btree_size));
    2885                 : 
    2886                 :             // prepare for writing value area
    2887                 : 
    2888              32 :             m_ostream = &os;
    2889              32 :             m_currpos = 0;
    2890              32 :             m_curroffset = 0;
    2891              32 :             m_datasha.clear();
    2892              32 :         }
    2893                 : 
    2894                 :         /// Sequentially write value blobs to file. The key-value sequence must
    2895                 :         /// match the pre-declared sequence.
    2896           85564 :         void WriteValue(const key_type& key, const void* data, uint32_t size)
    2897                 :         {
    2898           85564 :             CBTREEDB_CHECK(m_ostream != NULL,
    2899                 :                            "Cannot write data, because phase 2 was not started.");
    2900                 : 
    2901           85564 :             CBTREEDB_CHECK(m_currpos < m_btree.Size(),
    2902                 :                            "Invalid key in WriteData() beyond end of predeclaration.");
    2903                 : 
    2904           85564 :             CBTREEDB_CHECK(m_ostream->tellp() == std::ostream::pos_type(SignaturePageSize + m_signature.btree_size + m_curroffset),
    2905                 :                            "Output stream data position is incorrect.");
    2906                 : 
    2907                 :             key_type expectedkey;
    2908                 :             uint32_t expectedsize;
    2909                 : 
    2910           85564 :             m_btree.GetIndex(m_currpos, expectedkey, expectedsize);
    2911                 : 
    2912           85564 :             CBTREEDB_CHECK(!m_key_less(key, expectedkey) && !m_key_less(expectedkey, key), // test equality
    2913                 :                            "Key in WriteData() mismatches predeclared sequence.");
    2914                 : 
    2915           85564 :             CBTREEDB_CHECK(size == expectedsize,
    2916                 :                            "Value data size in WriteData() mismatches predeclared sequence.");
    2917                 : 
    2918           85564 :             m_ostream->write(reinterpret_cast<const char*>(data), size);
    2919           85564 :             CBTREEDB_CHECK(m_ostream->good(), "Error writing data blocks to output stream.");
    2920                 : 
    2921           85564 :             m_datasha.update(data, size);
    2922                 : 
    2923           85564 :             ++m_currpos;
    2924           85564 :             m_curroffset += size;
    2925           85564 :         }
    2926                 : 
    2927                 :         /// Sequentially write value blobs to file. The key-value sequence must
    2928                 :         /// match the pre-declared sequence.
    2929               0 :         void WriteValue(const key_type& key, const std::string& data)
    2930                 :         {
    2931               0 :             return WriteValue(key, data.data(), data.size());
    2932                 :         }
    2933                 : 
    2934                 :         /// Finalize database file
    2935              32 :         void WriteFinalize()
    2936                 :         {
    2937              32 :             CBTREEDB_CHECK(m_ostream != NULL,
    2938                 :                            "Cannot write data, because phase 2 was not started.");
    2939                 : 
    2940              32 :             CBTREEDB_CHECK(m_currpos == m_btree.Size(),
    2941                 :                            "WriteFinalize() called before end of predeclared sequence.");
    2942                 : 
    2943              32 :             CBTREEDB_CHECK(m_ostream->tellp() == std::ostream::pos_type(SignaturePageSize + m_signature.btree_size + m_signature.value_size),
    2944                 :                            "Output stream data position is incorrect.");
    2945                 : 
    2946                 :             // Fill in signature page
    2947                 : 
    2948              32 :             m_signature.value_offset = SignaturePageSize + m_signature.btree_size;
    2949              32 :             m_datasha.final(m_signature.value_sha256);
    2950                 : 
    2951                 :             // Calculate header checksum
    2952                 : 
    2953              32 :             m_signature.header_crc32 = CRC32::digest(reinterpret_cast<char*>(&m_signature)+16, sizeof(m_signature)-16);
    2954                 : 
    2955              32 :             m_ostream->seekp(0, std::ios::beg);
    2956              32 :             CBTREEDB_CHECK(m_ostream->good(), "Error seeking back to signature page in output stream.");
    2957                 : 
    2958              32 :             m_ostream->write(reinterpret_cast<char*>(&m_signature), sizeof(m_signature));
    2959              32 :             CBTREEDB_CHECK(m_ostream->good(), "Error writing signature page to output stream.");
    2960                 : 
    2961              32 :             m_ostream->flush();
    2962              32 :             m_ostream = NULL;
    2963              32 :         }
    2964                 :     };
    2965                 : };
    2966                 : 
    2967                 : } // namespace stx
    2968                 : 
    2969                 : #endif // _STX_CBTREEDB_H_
 |