Stxxl  1.4.0
doxymain.h
Go to the documentation of this file.
00001 /*! \mainpage Documentation for STXXL library
00002  *
00003  *  \image html layer_diagram.png
00004  *
00005  * <br><br>
00006  * The core of \c S<small>TXXL</small> is an implementation of the C++
00007  * standard template library STL for external memory (out-of-core)
00008  * computations, i.e., \c S<small>TXXL</small> implements containers and algorithms
00009  * that can process huge volumes of data that only fit on
00010  * disks. While the compatibility to the STL supports
00011  * ease of use and compatibility with existing applications,
00012  * another design priority is high performance.
00013  * Here is a selection of \c S<small>TXXL</small> performance features:
00014  * - transparent support of multiple disks
00015  * - variable block lengths
00016  * - overlapping of I/O and computation
00017  * - prevention of OS file buffering overhead
00018  * - algorithm pipelining
00019  * - utilization of multiple processor cores for internal computation
00020  *
00021  *
00022  * \section platforms Supported Operating Systems
00023  * - Linux (kernel >= 2.4.18)
00024  * - Mac OS X
00025  * - FreeBSD
00026  * - other POSIX compatible systems should work, but have not been tested
00027  * - Windows 2000/XP/Vista/7
00028  *
00029  *
00030  * \section compilers Supported Compilers
00031  *
00032  * The following compilers have been tested in different
00033  * \c S<small>TXXL</small> configurations.
00034  * Other compilers might work, too, but we don't have the resources
00035  * (systems, compilers or time) to test them.
00036  * Feedback is welcome.
00037  *
00038  * The compilers marked with '*' are the developer's favorite choices
00039  * and are most thoroughly tested.
00040  *
00041  * \verbatim
00042                 |         parallel            parallel
00043                 |  stxxl   stxxl     stxxl     stxxl
00044   compiler      |                   + boost   + boost
00045 ----------------+----------------------------------------
00046   GCC 4.6 c++0x |    x     PMODE       x       PMODE 
00047   GCC 4.6       |    x     PMODE       x       PMODE 
00048   GCC 4.5 c++0x |    x     PMODE       x       PMODE 
00049   GCC 4.5       |    x     PMODE       x       PMODE 
00050 * GCC 4.4 c++0x |    x     PMODE       x       PMODE 
00051   GCC 4.4       |    x     PMODE       x       PMODE 
00052   GCC 4.3 c++0x |    x     PMODE²      x       PMODE²
00053   GCC 4.3       |    x     PMODE²      x       PMODE²
00054   GCC 4.2       |    x     MCSTL       x       MCSTL
00055   GCC 4.1       |    x       -         x         -
00056   GCC 4.0       |    x       -         x         -
00057   GCC 3.4       |    x       -         x         -
00058   GCC 3.3       |    o       -         o         -
00059   GCC 2.95      |    -       -         -         -
00060   ICPC 12.0.191 |    x¹    PMODE¹²     x¹      PMODE¹²
00061   ICPC 12.0.191 |    x¹    MCSTL¹      x¹      MCSTL¹
00062 * ICPC 11.1.075 |    x¹    MCSTL¹      x¹      MCSTL¹
00063   ICPC 11.0.084 |    x¹    MCSTL¹      x¹      MCSTL¹
00064   ICPC 10.1.026 |    x¹    MCSTL¹      x¹      MCSTL¹
00065   ICPC 10.0.026 |    x¹    MCSTL¹      x¹      MCSTL¹
00066   ICPC 9.1.053  |    x¹      -         x¹        -
00067   ICPC 9.0.032  |    x¹      -         x¹        -
00068   clang++ 2.9   |    x       -         x         -
00069   MSVC 2010 10.0|    -       -         x         -
00070 * MSVC 2008 9.0 |    -       -         x         -
00071   MSVC 2005 8.0 |    -       -         x         -
00072 
00073  x   = full support
00074  o   = partial support
00075  -   = unsupported
00076  ?   = untested
00077  PMODE = supports parallelization using libstdc++ parallel mode
00078  MCSTL = supports parallelization using the MCSTL library (superseded by
00079        PMODE, introduced in gcc 4.3)
00080  ¹   = you may have to add a -gcc-name=<gcc-x.y> option if the system default
00081        gcc does not come in the correct version:
00082        icpc 9.0: use with gcc 3.x
00083        icpc 9.1: use with gcc before 4.2
00084        icpc 10.x, 11.x, 12.0 with mcstl support: use with gcc 4.2
00085        icpc 12.0 with pmode support: use with gcc 4.3
00086  ²   = gcc 4.3 only provides partial support for the libstdc++ parallel mode,
00087        full support requires gcc 4.4 or later
00088 \endverbatim
00089  *
00090  *
00091  * \section boost Supported BOOST versions
00092  *
00093  * The <a href="http://www.boost.org">Boost</a> libraries are required on
00094  * Windows platforms using MSVC compiler and optional on other platforms.
00095  *
00096  * \c S<small>TXXL</small> has been tested with Boost 1.40.0, 1.42.0 and 1.46.1.
00097  * Other versions may work, too, but older versions will not get support.
00098  *
00099  *
00100  * \section installation Instructions on installation, usage, configuration
00101  *
00102  * - \link installation_linux_gcc Installation, usage, configuration (Linux/Unix &ndash; g++/icpc/clang++) \endlink
00103  * - \link installation_msvc Installation, usage, configuration (Windows &ndash; Microsoft Visual C++) \endlink
00104  *
00105  * - \link install-svn Installing from subversion \endlink
00106  *
00107  *
00108  * \section questions Questions
00109  *
00110  * - Questions concerning use and development of the \c S<small>TXXL</small>
00111  * library should be posted to the
00112  * <b><a href="http://sourceforge.net/projects/stxxl/forums">FORUMS</a></b>.
00113  * Please search the forum before posting,
00114  * your question may have been answered before.
00115  *
00116  * \section bugreports Bug Reports
00117  *
00118  * - Bugs should be reported in the 
00119  *   <b><a href="https://stxxl.ae.cs.uni-frankfurt.de/bugs/">Bugzilla Bug Tracker</a></b>
00120  *
00121  * - \link FAQ FAQ - Frequently Asked Questions \endlink
00122  *
00123  *
00124  * \section license License
00125  *
00126  * \c S<small>TXXL</small> is distributed under the Boost Software License, Version 1.0.<br>
00127  * You can find a copy of the license in the accompanying file \c LICENSE_1_0.txt or online at
00128  * <a href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</a>.
00129  */
00130 
00131 
00132 /*!
00133  * \page FAQ FAQ - Frequently Asked Questions
00134  *
00135  * \section FAQ-latest Latest version of this FAQ
00136  * The most recent version of this FAQ can always be found
00137  * <a href="http://algo2.iti.kit.edu/stxxl/trunk/FAQ.html">here</a>.
00138  *
00139  *
00140  * \section q1 References to Elements in External Memory Data Structures
00141  *
00142  * You should not pass or store references to elements in an external memory
00143  * data structure. When the reference is used, the block that contains the
00144  * element may be no longer in internal memory.<br>
00145  * Use/pass an iterator (reference) instead.<br>
00146  * For an \c stxxl::vector with \c n pages and LRU replacement strategy, it
00147  * can be guaranteed that the last \c n references
00148  * obtained using \c stxxl::vector::operator[] or dereferencing
00149  * an iterator are valid.
00150  * However, if \c n is 1, even a single innocent-looking line like
00151  * \verbatim std::cout << v[0] << " " << v[1000000] << std::endl; \endverbatim can lead to
00152  * inconsistent results.
00153  * <br>
00154  *
00155  *
00156  * \section q2 Parameterizing STXXL Containers
00157  *
00158  * STXXL container types like stxxl::vector can be parameterized only with a value type that is a
00159  * <a href="http://en.wikipedia.org/wiki/Plain_old_data_structures">POD</a>
00160  * (i. e. no virtual functions, no user-defined copy assignment/destructor, etc.)
00161  * and does not contain references (including pointers) to internal memory.
00162  * Usually, "complex" data types do not satisfy this requirements.
00163  *
00164  * This is why stxxl::vector<std::vector<T> > and stxxl::vector<stxxl::vector<T> > are invalid.
00165  * If appropriate, use std::vector<stxxl::vector<T> >, or emulate a two-dimensional array by
00166  * doing index calculation.
00167  *
00168  *
00169  * \section q3 Thread-Safety
00170  *
00171  * The I/O and block management layers are thread-safe (since release 1.1.1).
00172  * The user layer data structures are not thread-safe.<br>
00173  * I.e. you may access <b>different</b> \c S<small>TXXL</small> data structures from concurrent threads without problems,
00174  * but you should not share a data structure between threads (without implementing proper locking yourself).<br>
00175  * This is a design choice, having the data structures thread-safe would mean a significant performance loss.
00176  *
00177  *
00178  * \section q4 I have configured several disks to use with STXXL. Why does STXXL fail complaining about the lack of space? According to my calclulations, the space on the disks should be sufficient.
00179  *
00180  * This may happen if the disks have different size. With the default parameters \c S<small>TXXL</small> containers use randomized block-to-disk allocation strategies
00181  * that distribute data evenly between the disks but ignore the availability of free space on them. 
00182  *
00183  *
00184  * \section q5 STXXL in a Microsoft CLR Library
00185  *
00186  * From STXXL user Christian, posted in the <a href="https://sourceforge.net/projects/stxxl/forums/forum/446474/topic/3407329">forum</a>:
00187  *
00188  * Precondition: I use STXXL in a Microsoft CLR Library (a special DLL). That means that managed code and native code (e.g. STXXL) have to co-exist in your library.
00189  *
00190  * Symptom: Application crashes at process exit, when the DLL is unloaded.
00191  *
00192  * Cause: STXXL's singleton classes use the \c atexit() function to destruct themselves at process exit. The exit handling will cause the process to crash at exit (still unclear if it's a bug or a feature of the MS runtime).
00193  *
00194  * Solution:
00195  *
00196  * 1.) Compiled STXXL static library with \c STXXL_NON_DEFAULT_EXIT_HANDLER defined.
00197  *
00198  * 2.) For cleanup, \c stxxl::run_exit_handlers() has now to be called manually. To get this done automatically:
00199  *
00200  * Defined a CLI singleton class "Controller":
00201  *
00202  * \verbatim
00203 public ref class Controller {
00204 private: 
00205     static Controller^ instance = gcnew Controller;
00206     Controller();
00207 };
00208 \endverbatim
00209  *
00210  *     Registered my own cleanup function in Controller's constructor which will manage to call \c stxxl::run_exit_handlers():
00211  *
00212  * \verbatim
00213 #pragma managed(push, off)
00214 static int myexitfn()
00215 {
00216     stxxl::run_exit_handlers();
00217     return 0;
00218 }
00219 #pragma managed(pop)
00220 
00221 Controller::Controller()
00222 {
00223     onexit(myexitfn);
00224 }
00225 \endverbatim
00226  *
00227  *
00228  * \section q6 How can I credit STXXL, and thus foster its development?
00229  *
00230  * - For all users:
00231  *   - Sign up at Ohloh and add yourself as an STXXL user / rate STXXL: http://www.ohloh.net/p/stxxl
00232  *   - Rate STXXL at heise Software-Verzeichnis (German): http://www.heise.de/software/download/stxxl/76072
00233  *   - Rate STXXL at SourceForge: https://sourceforge.net/projects/stxxl/
00234  *
00235  * - For scientific work:  Cite the papers mentioned here: http://stxxl.sourceforge.net/
00236  *
00237  * - For industrial users:  Tell us the name of your company, so we can use it as a reference.
00238  *
00239  */
00240 
00241 
00242 /*!
00243  * \page install Instructions on Installation, usage, configuration
00244  * - \link installation_linux_gcc Installation, usage, configuration (Linux/Unix &ndash; g++/icpc/clang++) \endlink
00245  * - \link installation_msvc Installation, usage, configuration (Windows &ndash; Microsoft Visual C++) \endlink
00246  *
00247  * - \link install-svn Installing from subversion \endlink
00248  */
00249 
00250 
00251 /*!
00252  * \page installation_linux_gcc Installation, usage, configuration (Linux/Unix &ndash; g++/icpc/clang++)
00253  *
00254  * \section download_linux_gcc Download and Extraction
00255  *
00256  * - Download the latest gzipped tarball from
00257  *   <a href="http://sourceforge.net/projects/stxxl/files/stxxl/">SourceForge</a>
00258  * - Unpack in some directory executing: \c tar \c zfxv \c stxxl-x.y.z.tgz
00259  * - Change to \c stxxl directory: \c cd \c stxxl-x.y.z
00260  *
00261  * \section library_compilation_linux_gcc Library Compilation
00262  *
00263  * - Run: \verbatim make config_gnu \endverbatim to create a template \c make.settings.local file.
00264  *   Note: this will produce some warnings and abort with an error, which is intended.
00265  * - (optionally) change the \c make.settings.local file according to your system configuration:
00266  *   - (optionally) set the \c STXXL_ROOT variable to \c S<small>TXXL</small> root directory
00267  *     ( \c directory_where_you_unpacked_the_tar_ball/stxxl-x.y.z )
00268  *   - if you want \c S<small>TXXL</small> to use <a href="http://www.boost.org">Boost</a> libraries
00269  *     (you should have the Boost libraries already installed)
00270  *     - change the \c USE_BOOST variable to \c yes
00271  *     - change the \c BOOST_ROOT variable to the Boost root path, unless Boost is installed in system default search paths.
00272  *   - if you want \c S<small>TXXL</small> to use the <a href="http://gcc.gnu.org/onlinedocs/libstdc++/manual/parallel_mode.html">libstdc++ parallel mode</a>
00273  *     - use GCC version 4.3 or later
00274  *     - use the targets \c library_g++_pmode and \c tests_g++_pmode instead of the ones listed below
00275  *   - if you want \c S<small>TXXL</small> to use the <a href="http://algo2.iti.kit.edu/singler/mcstl/">MCSTL</a>
00276  *     library (you should have the MCSTL library already installed)
00277  *     - change the \c MCSTL_ROOT variable to the MCSTL root path
00278  *     - use the targets \c library_g++_mcstl and \c tests_g++_mcstl instead of the ones listed below
00279  *   - (optionally) set the \c OPT variable to \c -O3 or other g++ optimization level you like (default: \c -O3 )
00280  *   - (optionally) set the \c DEBUG variable to \c -g or other g++ debugging option
00281  *     if you want to produce a debug version of the Stxxl library or Stxxl examples (default: not set)
00282  *   - for more variables to tune take a look at \c make.settings.gnu ,
00283  *     they are usually overridden by settings in \c make.settings.local,
00284  *     using \c CPPFLAGS and \c LDFLAGS, for example, you can add arbitrary compiler and linker options
00285  * - Run: \verbatim make library_g++ \endverbatim
00286  * - Run: \verbatim make tests_g++ \endverbatim (optional, if you want to compile and run some test programs)
00287  *
00288  *
00289  * \section build_apps Building an Application
00290  *
00291  * After compiling the library, some Makefile variables are written to
00292  * \c stxxl.mk (\c pmstxxl.mk if you have built with parallel mode, \c mcstxxl.mk if you have built with MCSTL) in your
00293  * \c STXXL_ROOT directory. This file should be included from your
00294  * application's Makefile.
00295  *
00296  * The following variables can be used:
00297  * - \c STXXL_CXX - the compiler used to build the \c S<small>TXXL</small>
00298  *      library, it's recommended to use the same to build your applications
00299  * - \c STXXL_CPPFLAGS - add these flags to the compile commands
00300  * - \c STXXL_LDLIBS - add these libraries to the link commands
00301  *
00302  * An example Makefile for an application using \c S<small>TXXL</small>:
00303  * \verbatim
00304 STXXL_ROOT      ?= .../stxxl
00305 STXXL_CONFIG    ?= stxxl.mk
00306 include $(STXXL_ROOT)/$(STXXL_CONFIG)
00307 
00308 # use the variables from stxxl.mk
00309 CXX              = $(STXXL_CXX)
00310 CPPFLAGS        += $(STXXL_CPPFLAGS)
00311 LDLIBS          += $(STXXL_LDLIBS)
00312 
00313 # add your own optimization, warning, debug, ... flags
00314 # (these are *not* set in stxxl.mk)
00315 CPPFLAGS        += -O3 -Wall -g -DFOO=BAR
00316 
00317 # build your application
00318 # (my_example.o is generated from my_example.cpp automatically)
00319 my_example.bin: my_example.o
00320         $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) my_example.o -o $@ $(LDLIBS)
00321 \endverbatim
00322  *
00323  * \section parallel Enabling parallel execution
00324  *
00325  * To enable (shared-memory-)parallel execution of internal computation (in fact, sorting and merging, and random shuffling),
00326  * you have several options depending on the \ref compilers "compiler version" used:
00327  * - Enable the g++ parallel mode specifically for STXXL,
00328  *   by defining \c STXXL_PARALLEL_MODE_EXPLICIT and enabling OpenMP (\c -DSTXXL_PARALLEL_MODE_EXPLICIT \c -fopenmp)
00329  *   during compilation and linkage of your program.
00330  *   Compiling the library binary with this flag enabled is not really necessary,
00331  *   since the most time-consuming operations are called by the generic routines and thus contained in the header files.
00332  * - Enable the g++ parallel mode for your program globally,
00333  *   by defining \c _GLIBCXX_PARALLEL and enabling OpenMP (\c -D_GLIBCXX_PARALLEL \c -fopenmp).
00334  *   This has the implication that STL algorithms in your program will also be executed in parallel,
00335  *   which may have undesired side effects.
00336  *   These options are automatically used when you built STXXL using the \c *_pmode target,
00337  *   and your Makefile includes \c pmstxxl.mk.
00338  * - Enable MCSTL in your program by setting the include path appropriately.
00339  *   This implies that STL algorithms in your program will also be executed in parallel,
00340  *   which may have undesired side effects.
00341  *   These options are automatically used when you built STXXL using the \c *_mcstl target,
00342  *   and your Makefile includes \c mcstxxl.mk.
00343  *
00344  * We recommend to try the first option at first.
00345  *
00346  * The number of threads to be used can be set by the environment variable OMP_NUM_THREADS or
00347  * by calling omp_set_num_threads.
00348  * Detailed tuning can be achieved as described
00349  * <a href="http://gcc.gnu.org/onlinedocs/libstdc++/manual/bk01pt03ch18s04.html#parallel_mode.design.tuning">here</a>.
00350  *
00351  *
00352  * \section space Disk space
00353  *
00354  * Before you try to run one of the \c S<small>TXXL</small> examples
00355  * (or your own \c S<small>TXXL</small> program) you must configure the disk
00356  * space that will be used as external memory for the library.
00357  *
00358  * To get best performance with \c S<small>TXXL</small> you should assign separate disks to it.
00359  * These disks should be used by the library only.
00360  * Since \c S<small>TXXL</small> is developed to exploit disk parallelism, the performance of your
00361  * external memory application will increase if you use more than one disk.
00362  * But from how many disks your application can benefit depends on how "I/O bound" it is.
00363  * With modern disk bandwidths
00364  * of about 50-75 MiB/s most of applications are I/O bound for one disk. This means that if you add another disk
00365  * the running time will be halved. Adding more disks might also increase performance significantly.
00366  *
00367  *
00368  * \section filesystem Recommended file system
00369  *
00370  * The library benefits from direct transfers from user memory to disk, which saves superfluous copies.
00371  * We recommend to use the
00372  * \c <a href="http://xfs.org">XFS</a> file system,
00373  * which gives good read and write performance for large files.
00374  * Note that file creation speed of \c XFS is a bit slower,
00375  * so that disk files should be precreated for optimal performance.
00376  *
00377  * If the filesystems only use is to store one large \c S<small>TXXL</small> disk file,
00378  * we also recommend to add the following options to the \c mkfs.xfs command to gain maximum performance:
00379  * \verbatim -d agcount=1 -l size=512b \endverbatim
00380  *
00381  * The following filesystems have been reported not to support direct I/O: \c tmpfs , \c glusterfs .
00382  * Since direct I/O is enabled by default, you may recompile \c S<small>TXXL</small>
00383  * with \c STXXL_DIRECT_IO_OFF defined to access files on these file systems.
00384  *
00385  *
00386  * \section configuration Disk configuration file
00387  *
00388  * You must define the disk configuration for an
00389  * \c S<small>TXXL</small> program in a file named \c '.stxxl' that must reside
00390  * in the same directory where you execute the program.
00391  * You can change the default file name for the configuration
00392  * file by setting the environment variable \c STXXLCFG .
00393  *
00394  * Each line of the configuration file describes a disk.
00395  * A disk description uses the following format:<br>
00396  * \c disk=full_disk_filename,capacity,access_method
00397  *
00398  * Description of the parameters:
00399  * - \c full_disk_filename : full disk filename. In order to access disks S<small>TXXL</small> uses file
00400  *   access methods. Each disk is represented as a file. If you have a disk that is mounted in Unix
00401  *   to the path /mnt/disk0/, then the correct value for the \c full_disk_filename would be
00402  *   \c /mnt/disk0/some_file_name ,
00403  * - \c capacity : maximum capacity of the disk in megabytes
00404  *   (0 means autogrow, file will be deleted afterwards)
00405  * - \c access_method : \c S<small>TXXL</small> has a number of different
00406  *   file access implementations for POSIX systems, choose one of them:
00407  *   - \c syscall : use \c read and \c write system calls which perform disk transfers directly
00408  *     on user memory pages without superfluous copying (currently the fastest method)
00409  *   - \c mmap : \c use \c mmap and \c munmap system calls
00410  *   - \c boostfd : access the file using a Boost file descriptor
00411  *   - \c fileperblock_syscall, \c fileperblock_mmap, \c fileperblock_boostfd :
00412  *     same as above, but take a single file per block, using full_disk_filename as file name prefix.
00413  *     Usually provide worse performance than the standard variants,
00414  *     but release freed blocks to the file system immediately.
00415  *   - \c simdisk : simulates timings of the IBM IC35L080AVVA07 disk, full_disk_filename must point
00416  *     to a file on a RAM disk partition with sufficient space
00417  *   - \c memory : keeps all data in RAM, for quicker testing
00418  *   - \c wbtl : library-based write-combining (good for writing small blocks onto SSDs),
00419  *     based on \c syscall
00420  *
00421  * See also the example configuration file \c 'config_example' included in the tarball.
00422  *
00423  *
00424  * \section logfiles Log files
00425  *
00426  * \c S<small>TXXL</small> produces two kinds of log files, a message and an error log.
00427  * By setting the environment variables \c STXXLLOGFILE and \c STXXLERRLOGFILE, you can configure
00428  * the location of these files.
00429  * The default values are \c stxxl.log and \c stxxl.errlog, respectively.
00430  *
00431  *
00432  * \section excreation Precreating external memory files
00433  *
00434  * In order to get the maximum performance one should precreate disk files described in the configuration file,
00435  * before running \c S<small>TXXL</small> applications.
00436  *
00437  * The precreation utility is included in the set of \c S<small>TXXL</small>
00438  * utilities ( \c utils/createdisks.bin ). Run this utility
00439  * for each disk you have defined in the disk configuration file:
00440  * \verbatim utils/createdisks.bin capacity full_disk_filename... \endverbatim
00441  *
00442  * */
00443 
00444 
00445 /*!
00446  * \page installation_msvc Installation, usage, configuration (Windows &ndash; Microsoft Visual C++)
00447  *
00448  * \section download_msvc Download and Extraction
00449  *
00450  * - Install the <a href="http://www.boost.org">Boost</a> libraries (required).
00451  * - Download the latest \c Stxxl zip file from
00452  *   <a href="http://sourceforge.net/projects/stxxl/files/stxxl/">SourceForge</a>
00453  * - Unpack the zip file in some directory (e.&nbsp;g. \c 'C:\\' )
00454  * - Change to \c stxxl base directory: \c cd \c stxxl-x.y.z
00455  *
00456  * \section library_compilation_msvc Library Compilation
00457  *
00458  * - Create \c make.settings.local in the base directory according to your system configuration:
00459  *   - set \c BOOST_ROOT variable according to the Boost root path, e.&nbsp;g.
00460  *     BOOST_ROOT = "C:\Program Files (x86)\boost\boost_1_40_0"#
00461  *   - (optionally) set \c STXXL_ROOT variable to \c S<small>TXXL</small> root directory
00462  *   - (optionally) set \c OPT variable to \c /O2 or any other VC++ optimization level you like,
00463  *   add -D_SECURE_SCL=0 to switch off iterator checking, which improves performance
00464  *   - (optionally) set \c DEBUG_COMPILER=/MDd /Zi and DEBUG_LINKER=/DEBUG enable debugging
00465  * - Open the \c stxxl.vcproj file (VS Solution Object) in Visual Studio.
00466  *   The file is located in the \c STXXL_ROOT directory
00467  *   Press F7 to build the library.
00468  *   The library file (libstxxl.lib) should appear in \c STXXL_ROOT\\lib directory
00469  *   Or build the library and the S<small>TXXL</small> test programs by pressing Ctrl-Alt-F7
00470  *   (or choosing from 'Build' drop-down menu Rebuild Solution)
00471  * - (alternatively) Compile the library by executing \c nmake \c library_msvc
00472  *   and the tests by executing \c nmake \c tests_msvc,
00473  *   with all the appropriate environment set (e.&nbsp;g. by using the VS Command Shell)
00474  *
00475  *
00476  * \section build_apps Building an application
00477  *
00478  * Programs using Stxxl can be compiled using options from \c compiler.options
00479  * file (in the \c STXXL_ROOT directory). The linking options for the VC++
00480  * linker you can find in \c linker.options file. In order to accomplish this
00481  * do the following:
00482  * - Open project property pages (menu Project->Properties)
00483  * - Choose C/C++->Command Line page.
00484  * - In the 'Additional Options' field insert the contents of the \c compiler.options file.
00485  * Make sure that the Runtime libraries/debug options (/MDd or /MD or /MT or /MTd) of
00486  * the \c Stxxl library (see above) do not conflict with the options of your project.
00487  * Use the same options in the \c Stxxl and your project.
00488  * - Choose Linker->Command Line page.
00489  * - In the 'Additional Options' field insert the contents of the \c linker.options file.
00490  *
00491  * <br>
00492  * If you use make files you can
00493  * include the \c make.settings file in your make files and use \c STXXL_COMPILER_OPTIONS and
00494  * \c STXXL_LINKER_OPTIONS variables, defined therein.
00495  *
00496  * For example: <br>
00497  * \verbatim cl -c my_example.cpp $(STXXL_COMPILER_OPTIONS) \endverbatim <br>
00498  * \verbatim link my_example.obj /out:my_example.exe $(STXXL_LINKER_OPTIONS) \endverbatim
00499  *
00500  * <br>
00501  * The \c STXXL_ROOT\\test\\WinGUI directory contains an example MFC GUI project
00502  * that uses \c Stxxl. In order to compile it open the WinGUI.vcproj file in
00503  * Visual Studio .NET. Change if needed the Compiler and Linker Options of the project
00504  * (see above).
00505  *
00506  * Before you try to run one of the \c S<small>TXXL</small> examples
00507  * (or your own \c S<small>TXXL</small> program) you must configure the disk
00508  * space that will be used as external memory for the library. For instructions how to do that,
00509  * see the next section.
00510  *
00511  *
00512  * \section space Disk space
00513  *
00514  * To get best performance with \c S<small>TXXL</small> you should assign separate disks to it.
00515  * These disks should be used by the library only.
00516  * Since \c S<small>TXXL</small> is developed to exploit disk parallelism, the performance of your
00517  * external memory application will increase if you use more than one disk.
00518  * But from how many disks your application can benefit depends on how "I/O bound" it is.
00519  * With modern disk bandwidths
00520  * of about 50-75 MiB/s most of applications are I/O bound for one disk. This means that if you add another disk
00521  * the running time will be halved. Adding more disks might also increase performance significantly.
00522  *
00523  *
00524  * \section configuration Disk configuration file
00525  *
00526  * You must define the disk configuration for an
00527  * \c S<small>TXXL</small> program in a file named \c '.stxxl' that must reside
00528  * in the same directory where you execute the program.
00529  * You can change the default file name for the configuration
00530  * file by setting the environment variable \c STXXLCFG .
00531  *
00532  * Each line of the configuration file describes a disk.
00533  * A disk description uses the following format:<br>
00534  * \c disk=full_disk_filename,capacity,access_method
00535  *
00536  * Description of the parameters:
00537  * - \c full_disk_filename : full disk filename. In order to access disks S<small>TXXL</small> uses file
00538  *   access methods. Each disk is represented as a file. If you have a disk called \c e:
00539  *   then the correct value for the \c full_disk_filename would be
00540  *   \c e:\\some_file_name ,
00541  * - \c capacity : maximum capacity of the disk in megabytes
00542  * - \c access_method : \c S<small>TXXL</small> has a number of different
00543  *   file access implementations for WINDOWS, choose one of them:
00544  *   - \c syscall : use \c read and \c write POSIX system calls (slow)
00545  *   - \c wincall: performs disks transfers using \c ReadFile and \c WriteFile WinAPI calls
00546  *     This method supports direct I/O that avoids superfluous copying of data pages
00547  *     in the Windows kernel. This is the best (and default) method in Stxxl for Windows.
00548  *   - \c boostfd : access the file using a Boost file descriptor
00549  *   - \c fileperblock_syscall, \c fileperblock_wincall, \c fileperblock_boostfd :
00550  *     same as above, but take a single file per block, using full_disk_filename as file name prefix.
00551  *     Usually provide worse performance than the standard variants,
00552  *     but release freed blocks to the file system immediately.
00553  *   - \c memory : keeps all data in RAM, for quicker testing
00554  *   - \c wbtl : library-based write-combining (good for writing small blocks onto SSDs),
00555  *     based on \c syscall
00556  *
00557  * See also the example configuration file \c 'config_example_win' included in the archive.
00558  *
00559  *
00560  * \section excreation Precreating external memory files
00561  *
00562  * In order to get the maximum performance one should precreate disk files described in the configuration file,
00563  * before running \c S<small>TXXL</small> applications.
00564  *
00565  * The precreation utility is included in the set of \c S<small>TXXL</small>
00566  * utilities ( \c utils\\createdisks.exe ). Run this utility
00567  * for each disk you have defined in the disk configuration file:
00568  * \verbatim utils\createdisks.exe capacity full_disk_filename... \endverbatim
00569  *
00570  * */
00571 
00572 /*!
00573  * \page install-svn Installing from subversion
00574  *
00575  * \section checkout Retrieving the source from subversion
00576  *
00577  * The \c S<small>TXXL</small> source code is available in a subversion repository on sourceforge.net.<br>
00578  * To learn more about subversion and (command line and graphical) subversion clients
00579  * visit <a href="http://subversion.tigris.org/">http://subversion.tigris.org/</a>.
00580  *
00581  * The main development line (in subversion called the "trunk") is located at
00582  * \c https://stxxl.svn.sourceforge.net/svnroot/stxxl/trunk
00583  * <br>Alternatively you might use a branch where a new feature is being developed.
00584  * Branches have URLs like
00585  * \c https://stxxl.svn.sourceforge.net/svnroot/stxxl/branches/foobar
00586  *
00587  * For the following example let us assume you want to download the latest trunk version
00588  * using the command line client and store it in a directory called \c stxxl-trunk
00589  * (which should not exist, yet).
00590  * Otherwise replace URL and path to your needs.
00591  *
00592  * Run: \verbatim svn checkout https://stxxl.svn.sourceforge.net/svnroot/stxxl/trunk stxxl-trunk \endverbatim
00593  * Change to stxxl directory: \verbatim cd stxxl-trunk \endverbatim
00594  *
00595  * \section svn_continue_installation Continue as Usual
00596  *
00597  * Now follow the regular installation and usage instructions,
00598  * starting from "Library Compilation":
00599  * - \ref library_compilation_linux_gcc "Linux/Unix &ndash; g++/icpc/clang++"
00600  * - \ref library_compilation_msvc "Windows &ndash; Microsoft Visual C++"
00601  *
00602  * \section update Updating an existing subversion checkout
00603  *
00604  * Once you have checked out the source code you can easily update it to the latest version later on.
00605  *
00606  * In the S<small>TXXL</small> directory, run
00607  * \verbatim svn update \endverbatim
00608  * and rebuild.
00609  * */
00610 
00611 /*!
00612   
00613 \page intro-stream Introduction to the Stream Package
00614 
00615 \author Timo Bingmann (2012-06-11)
00616 
00617 This page gives a short introduction into the stream package. First the main
00618 abstractions are discussed and then some examples on how to utilize the
00619 existing algorithms are developed.
00620 
00621 All example code can be found in stream/test_intro1.cpp
00622 
00623 \section stream1 Abstraction, Interface and a Simple Example
00624 
00625 The stream package is built around the abstract notion of an object being able
00626 to produce a sequence of output values. Only three simple operations are necessary:
00627 - Retrieval of the current value: prefix * operator
00628 - Advance to the next value in the sequence: prefix ++ operator
00629 - Indication of the sequence's end: empty() function
00630 
00631 The most common place object that fits easily into this abstraction is the
00632 random generator. Actually, a random generator only requires two operations: it
00633 can be queried for its current value and be instructed to calculate/advance to
00634 new value. Of course the random sequence should be unbounded, so an empty()
00635 function would always be false. Nevertheless, this common-place example
00636 illustrates the purpose of the stream interface pretty well.
00637 
00638 All stream objects must support the three operations above, they form the
00639 stream algorithm concept. In C++ a class conforms to this concept if it
00640 implements the following interface:
00641 
00642 \code
00643 struct stream_object
00644 {
00645     // Type of the values in the output sequence.
00646     typedef output_type value_type;
00647 
00648     // Retrieval prefix * operator (like dereferencing a pointer or iterator).
00649     const value_type& operator* () const;
00650 
00651     // Prefix increment ++ operator, which advances the stream to the next value.
00652     stream_object& operator++ ();
00653 
00654     // Empty indicator. True if the last ++ operation could not fetch a value.
00655     bool empty() const;
00656 };
00657 \endcode
00658 
00659 A very simple stream object that produces the sequence 1,2,3,4,....,1000 is shown in the following snippet:
00660 
00661 \code
00662 struct counter_object
00663 {
00664     // This stream produces a sequence of integers.
00665     typedef int         value_type;
00666 
00667 private:
00668     // A class attribute to save the current value.
00669     int                 m_current_value;
00670 
00671 public:
00672     // A constructor to set the initial value to 1.
00673     counter_object()
00674         : m_current_value(1)
00675     {
00676     }
00677 
00678     // The retrieve operator returning the current value.
00679     const value_type& operator* () const
00680     {
00681         return m_current_value;
00682     }
00683 
00684     // Increment operator advancing to the next integer.
00685     counter_object& operator++ ()
00686     {
00687         ++m_current_value;
00688         return *this;
00689     }
00690 
00691     // Empty indicator, which in this case can check the current value.
00692     bool empty() const
00693     {
00694         return (m_current_value > 1000);
00695     }
00696 };
00697 \endcode
00698 
00699 After this verbose interface definition, the actual iteration over a stream object can be done as follows:
00700 
00701 \code
00702 counter_object counter;
00703 
00704 while (!counter.empty())
00705 {
00706     std::cout << *counter << " ";
00707     ++counter;
00708 }
00709 std::cout << std::endl;
00710 \endcode
00711 
00712 For those who like to shorten everything into fewer lines, the above can also be expressed as a for loop:
00713 
00714 \code
00715 for (counter_object cnt; !cnt.empty(); ++cnt)
00716 {
00717     std::cout << *cnt << " ";
00718 }
00719 std::cout << std::endl;
00720 \endcode
00721 
00722 Both loops will print the following output:
00723 \verbatim
00724 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 [...] 995 996 997 998 999 1000 
00725 \endverbatim
00726 
00727 \section stream2 Pipelining: Plugging Stream Objects Together
00728 
00729 The stream interface is so very useful for external memory algorithms because it represents the concept of sequential access to a stream of individual values. While the simple example above only works with integers, the value_type of streams will more often contain complex tuple structs with multiple components.
00730 
00731 A stream algorithm can then be constructed from multiple stream objects that pass data from one to another. This notion of "plugging together" stream objects is used in the following example to calculate the square of each value of an integer sequence:
00732 
00733 \code
00734 template <typename InputStream>
00735 struct squaring_object
00736 {
00737     // This stream produces a sequence of integers.
00738     typedef int         value_type;
00739 
00740 private:
00741     // A reference to another stream of integers, which are our input.
00742     InputStream&        m_input_stream;
00743 
00744     // A temporary value buffer to hold the current square for retrieval.
00745     value_type          m_current_value;
00746 
00747 public:
00748     // A constructor taking another stream of integers as input.
00749     squaring_object(InputStream& input_stream)
00750         : m_input_stream(input_stream)
00751     {
00752         if (!m_input_stream.empty())
00753         {
00754             m_current_value = *m_input_stream;
00755             m_current_value = m_current_value * m_current_value;
00756         }
00757     }
00758 
00759     // The retrieve operator returning the square of the input stream.
00760     const value_type& operator* () const
00761     {
00762         return m_current_value;
00763     }
00764 
00765     // Increment operator: handled by incrementing the input stream.
00766     squaring_object& operator++ ()
00767     {
00768         ++m_input_stream;
00769         if (!m_input_stream.empty())
00770         {
00771             m_current_value = *m_input_stream;
00772             m_current_value = m_current_value * m_current_value;
00773         }
00774         return *this;
00775     }
00776 
00777     // Empty indicator: this stream is empty when the input stream is.
00778     bool empty() const
00779     {
00780         return m_input_stream.empty();
00781     }
00782 };
00783 \endcode
00784 
00785 For a beginner in stream object programming, the squaring example contains multiple unexpected, verbose complications.
00786 
00787 - We wish to allow many different integer sequences as input streams to the squaring class. For this we use template meta-programming and define squaring to take any class as InputStream template parameter. As yet, in C++ we cannot syntactically define which concepts the template parameters must fulfill, in this case one would require InputStream to implement the stream interface.
00788 
00789 - After defining the input stream class, one will usually need an instantiated object of that class inside the new stream class. Most common practice is to define references to other streams as class attributes, and have the actual objects be passed to the constructor of the new stream object. <br> In the case of the squaring class, any InputStream object is accepted by the constructor and a reference is saved into m_input_stream.
00790 
00791 - As second attribute, the squaring class contains m_current_value. The additional temporary value is required in this case because operator*() must return a const-reference, so the square must actually be stored in a variable after it is calculated. Now note that the squaring operation in this version is implemented at two places: in the constructor and the operator++(). <br> This is necessary, because the stream concept requires that the first value be <em>immediately available after construction</em>! Therefore it must be calculated in the constructor, and this code is usually a duplicate to the action done in operator++(). A real implementation would probably combine the calculation code into a process() function and also do additional allocation work in the constructor.
00792 
00793 An instance of the counter_object can be plugged into a squaring_object as done in the following example:
00794 
00795 \code
00796 counter_object counter;
00797 squaring_object<counter_object> squares(counter);
00798 
00799 while (!squares.empty())
00800 {
00801     std::cout << *squares << " ";
00802     ++squares;
00803 }
00804 std::cout << std::endl;
00805 \endcode
00806 
00807 The example outputs:
00808 
00809 \verbatim
00810 1 4 9 16 25 36 49 64 81 100 121 144 169 [...] 986049 988036 990025 992016 994009 996004 998001 1000000
00811 \endverbatim
00812 
00813 \section stream3 Miscellaneous Utilities Provided by the Stream Package
00814 
00815 The above examples are pure C++ interface manipulations and do not even require stxxl. However, when writing stream algorithms you can take advantage of the utilities provided by the stream package to create complex algorithms. Probably the most useful is the pair of sorting classes, which will be discussed after a few preliminaries.
00816 
00817 More complex algorithms will most often use tuples as values passed from one stream to another. These tuples wrap all information fields of a specific piece of data. Simple tuples can be created using std::pair, tuples with larger number of components can use Boost.Tuple or just plain structs with multiple fields. (In the tuple case, the temporary value inside the stream struct can mostly be avoided.)
00818 
00819 The stream package contains utilities to plug stream classes together to form complex algorithms. The following few examples are very basic algorithms:
00820 
00821 Very often the input to a sequence of stream classes comes from an array or other container. In this case one requires an input stream object, which iterates through the container and outputs each element once. Stxxl provides iterator2stream for this common purpose:
00822 \code
00823 std::vector<int> intvector;
00824 // (fill intvector)
00825 
00826 // define stream class iterating over an integer vector
00827 typedef stxxl::stream::iterator2stream< std::vector<int>::const_iterator > intstream_type;
00828 
00829 // instantiate the stream object, iterate from begin to end of intvector.
00830 intstream_type intstream (intvector.begin(), intvector.end());
00831 
00832 // plug in squaring object after vector iterator stream.
00833 squaring_object<intstream_type> squares(intstream);
00834 \endcode
00835 
00836 Most important: if the input container is a stxxl::vector, then one should use vector_iterator2stream, because this class will prefetch additional blocks from the vector while processing the stream.
00837 \code
00838 stxxl::vector<int> intvector;
00839 // (fill intvector)
00840 
00841 // define stream class iterating over an integer STXXL vector
00842 typedef stxxl::stream::vector_iterator2stream< stxxl::vector<int>::const_iterator > intstream_type;
00843 
00844 // instantiate the stream object, iterate from begin to end of intvector using prefetching
00845 intstream_type intstream (intvector.begin(), intvector.end());
00846 
00847 // plug in squaring object after vector iterator stream.
00848 squaring_object<intstream_type> squares(intstream);
00849 \endcode
00850 
00851 The opposite to iterator2stream is to collect the output of a sequence of stream objects into a container or stxxl vector. This operation is called materialize and also comes in the general version and a special version for the STXXL-vector, which uses asynchronous writes.
00852 
00853 This example shows how to materialize a stream into a usual STL vector.
00854 \code
00855 // construct the squared counter stream
00856 counter_object counter;
00857 squaring_object<counter_object> squares(counter);
00858 
00859 // allocate vector of 100 integers
00860 std::vector<int> intvector (100);
00861 
00862 // materialize 100 integers from stream and put into vector
00863 stxxl::stream::materialize(squares, intvector.begin(), intvector.end());
00864 \endcode
00865 
00866 And the only modification needed to support larger data sets is to materialize to an STXXL vector:
00867 \code
00868 // construct the squared counter stream
00869 counter_object counter;
00870 squaring_object<counter_object> squares(counter);
00871 
00872 // allocate STXXL vector of 100 integers
00873 stxxl::vector<int> intvector (100);
00874 
00875 // materialize 100 integers from stream and put into STXXL vector
00876 stxxl::stream::materialize(squares, intvector.begin(), intvector.end());
00877 \endcode
00878 
00879 \section stream4 Sorting As Provided by the Stream Package
00880 
00881 Maybe the most important set of tools in the stream package is the pairs of sorter classes runs_creator and runs_merger. The general way to sort a sequential input stream is to first consolidate a large number of input items in an internal memory buffer. Then when the buffer is full, it can be sorted in internal memory and subsequently written out to disk. This sorted sequence is then called a run. When the input stream is finished and the sorted output must be produced, theses sorted sequences can efficiently be merged using a tournament tree or similar multi-way comparison structure.
00882 
00883 STXXL implements this using two stream classes: runs_creator and runs_merger.
00884 
00885 The following examples shows how to sort the integer sequence 1,2,...,1000 first by the right-most decimal digit, then by its absolute value (yes a somewhat constructed example, but it serves its purpose well.) For all sorters a comparator object is required which tells the sorter which of two objects is the smaller one. This is similar to the requirements of the usual STL, however, the STXXL sorters need to additional functions: min_value() and max_value() which are used as padding sentinels. These functions return the smallest and highest possible values of the given data type.
00886 \code
00887 // define comparator class: compare right-most decimal and then absolute value
00888 struct CompareMod10
00889 {
00890     // comparison operator() returning true if (a < b)
00891     inline bool operator() (int a, int b) const
00892     {
00893         if ((a % 10) == (b % 10))
00894             return a < b;
00895         else
00896             return (a % 10) < (b % 10);
00897     }
00898 
00899     // smallest possible integer value
00900     int min_value() const { return INT_MIN; }
00901     // largest possible integer value
00902     int max_value() const { return INT_MAX; }
00903 };
00904 \endcode
00905 
00906 All sorters steps require an internal memory buffer. This size can be fixed using a parameter to runs_creator and runs_merger.
00907 The following example code instantiates a counter object, plugs this into a runs_creator which is followed by a runs_merger.
00908 
00909 \code
00910 static const int ram_use = 10*1024*1024;   // amount of memory to use in runs creation
00911 
00912 counter_object  counter;        // the counter stream from first examples
00913 
00914 // define a runs sorter for the counter stream which order by CompareMod10 object.
00915 typedef stxxl::stream::runs_creator<counter_object, CompareMod10> rc_counter_type;
00916 
00917 // instance of CompareMod10 comparator class
00918 CompareMod10    comparemod10;
00919 
00920 // instance of runs_creator which reads the counter stream.
00921 rc_counter_type rc_counter (counter, comparemod10, ram_use);
00922 
00923 // define a runs merger for the sorted runs from rc_counter.
00924 typedef stxxl::stream::runs_merger<rc_counter_type::sorted_runs_type, CompareMod10> rm_counter_type;
00925 
00926 // instance of runs_merger which merges sorted runs from rc_counter.
00927 rm_counter_type rm_counter (rc_counter.result(), comparemod10, ram_use);
00928 
00929 // read sorted stream: runs_merger also conforms to the stream interface.
00930 while (!rm_counter.empty())
00931 {
00932     std::cout << *rm_counter << " ";
00933     ++rm_counter;
00934 }
00935 std::cout << std::endl;
00936 \endcode
00937 The output of the code above is:
00938 \verbatim
00939 10 20 30 40 50 60 70 80 [...] 990 1000 1 11 21 31 41 51 61 [...] 909 919 929 939 949 959 969 979 989 999
00940 \endverbatim
00941 
00942 Note that in the above example the input of the runs_creator is itself a stream. If however the data is not naturally available as a stream, one can use a variant of runs_creator which accepts input via a push() function. This is more useful when using an imperative programming style. Note that the runs_merger does not change.
00943 \code
00944 static const int ram_use = 10*1024*1024;   // amount of memory to use in runs creation
00945 
00946 // define a runs sorter which accepts imperative push()s and orders by CompareMod10 object.
00947 typedef stxxl::stream::runs_creator<stxxl::stream::use_push<int>, CompareMod10> rc_counter_type;
00948 
00949 // instance of CompareMod10 comparator class.
00950 CompareMod10    comparemod10;
00951 
00952 // instance of runs_creator which waits for input.
00953 rc_counter_type rc_counter (comparemod10, ram_use);
00954 
00955 // write sequence of integers into runs
00956 for (int i = 1; i <= 1000; ++i)
00957     rc_counter.push(i);
00958 
00959 // define a runs merger for the sorted runs from rc_counter.
00960 typedef stxxl::stream::runs_merger<rc_counter_type::sorted_runs_type, CompareMod10> rm_counter_type;
00961 
00962 // instance of runs_merger which merges sorted runs from rc_counter.
00963 rm_counter_type rm_counter (rc_counter.result(), comparemod10, ram_use);
00964 
00965 // read sorted stream: runs_merger also conforms to the stream interface.
00966 while (!rm_counter.empty())
00967 {
00968     std::cout << *rm_counter << " ";
00969     ++rm_counter;
00970 }
00971 std::cout << std::endl;
00972 \endcode
00973 
00974 And as the last example in this tutorial we show how to use stxxl::sorter, which combines runs_creator and runs_merger into one object. The sorter has two states: input and output. During input, new elements can be sorted using push(). Then to switch to output state, the function sort() is called, after which the sorter can be queried using the usual stream interface.
00975 \code
00976 static const int ram_use = 10*1024*1024;   // amount of memory to use in runs creation
00977 
00978 // define a runs sorter which accepts imperative push()s and orders by CompareMod10 object.
00979 typedef stxxl::sorter<int, CompareMod10> sr_counter_type;
00980 
00981 // instance of CompareMod10 comparator class.
00982 CompareMod10    comparemod10;
00983 
00984 // instance of sorter which waits for input.
00985 sr_counter_type sr_counter (comparemod10, ram_use);
00986 
00987 // write sequence of integers into sorter, which creates sorted runs
00988 for (int i = 1; i <= 1000; ++i)
00989     sr_counter.push(i);
00990 
00991 // signal sorter that the input stream is finished and switch to output mode.
00992 sr_counter.sort();
00993 
00994 // read sorted stream: sorter also conforms to the stream interface.
00995 while (!sr_counter.empty())
00996 {
00997     std::cout << *sr_counter << " ";
00998     ++sr_counter;
00999 }
01000 std::cout << std::endl;
01001 \endcode
01002 
01003 All three examples have the same output.
01004 
01005  */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines