Stxxl  1.4.0
include/stxxl/bits/algo/random_shuffle.h
Go to the documentation of this file.
00001 /***************************************************************************
00002  *  include/stxxl/bits/algo/random_shuffle.h
00003  *
00004  *  Part of the STXXL. See http://stxxl.sourceforge.net
00005  *
00006  *  Copyright (C) 2007 Manuel Krings
00007  *  Copyright (C) 2007 Markus Westphal
00008  *  Copyright (C) 2009, 2010 Andreas Beckmann <beckmann@cs.uni-frankfurt.de>
00009  *
00010  *  Distributed under the Boost Software License, Version 1.0.
00011  *  (See accompanying file LICENSE_1_0.txt or copy at
00012  *  http://www.boost.org/LICENSE_1_0.txt)
00013  **************************************************************************/
00014 
00015 #ifndef STXXL_RANDOM_SHUFFLE_HEADER
00016 #define STXXL_RANDOM_SHUFFLE_HEADER
00017 
00018 // TODO: improve main memory consumption in recursion
00019 //        (free stacks buffers)
00020 // TODO: shuffle small input in internal memory
00021 
00022 
00023 #include <stxxl/bits/stream/stream.h>
00024 #include <stxxl/scan>
00025 #include <stxxl/stack>
00026 
00027 
00028 __STXXL_BEGIN_NAMESPACE
00029 
00030 
00031 //! \addtogroup stlalgo
00032 //! \{
00033 
00034 
00035 //! \brief External equivalent of std::random_shuffle
00036 //! \param first begin of the range to shuffle
00037 //! \param last end of the range to shuffle
00038 //! \param rand random number generator object (functor)
00039 //! \param M number of bytes for internal use
00040 //! \param AS parallel disk allocation strategy
00041 //!
00042 //! - BlockSize_ size of the block to use for external memory data structures
00043 //! - PageSize_ page size in blocks to use for external memory data structures
00044 template <typename ExtIterator_,
00045           typename RandomNumberGenerator_,
00046           unsigned BlockSize_,
00047           unsigned PageSize_,
00048           typename AllocStrategy_>
00049 void random_shuffle(ExtIterator_ first,
00050                     ExtIterator_ last,
00051                     RandomNumberGenerator_ & rand,
00052                     unsigned_type M,
00053                     AllocStrategy_ AS = STXXL_DEFAULT_ALLOC_STRATEGY())
00054 {
00055     STXXL_UNUSED(AS);  // FIXME: Why is this not being used?
00056     typedef typename ExtIterator_::value_type value_type;
00057     typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
00058                                             stxxl::grow_shrink2, PageSize_,
00059                                             BlockSize_, void, 0, AllocStrategy_>::result stack_type;
00060     typedef typename stack_type::block_type block_type;
00061 
00062     STXXL_VERBOSE1("random_shuffle: Plain Version");
00063     STXXL_STATIC_ASSERT(int(BlockSize_) < 0 && "This implementation was never tested. Please report to the stxxl developers if you have an ExtIterator_ that works with this implementation.");
00064 
00065     stxxl::int64 n = last - first; // the number of input elements
00066 
00067     // make sure we have at least 6 blocks + 1 page
00068     if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) {
00069         STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,");
00070         M = 6 * BlockSize_ + PageSize_ * BlockSize_;
00071         STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)");
00072     }
00073 
00074     int_type k = M / (3 * BlockSize_); // number of buckets
00075 
00076 
00077     stxxl::int64 i, j, size = 0;
00078 
00079     value_type * temp_array;
00080     typedef typename stxxl::VECTOR_GENERATOR<value_type,
00081                                              PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
00082     temp_vector_type * temp_vector;
00083 
00084     STXXL_VERBOSE1("random_shuffle: " << M / BlockSize_ - k << " write buffers for " << k << " buckets");
00085     stxxl::read_write_pool<block_type> pool(0, M / BlockSize_ - k);  // no read buffers and M/B-k write buffers
00086 
00087     stack_type ** buckets;
00088 
00089     // create and put buckets into container
00090     buckets = new stack_type *[k];
00091     for (j = 0; j < k; j++)
00092         buckets[j] = new stack_type(pool, 0);
00093 
00094 
00095     ///// Reading input /////////////////////
00096     typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream;
00097     input_stream in = stxxl::stream::streamify(first, last);
00098 
00099     // distribute input into random buckets
00100     int_type random_bucket = 0;
00101     for (i = 0; i < n; ++i) {
00102         random_bucket = rand(k);
00103         buckets[random_bucket]->push(*in); // reading the current input element
00104         ++in;                              // go to the next input element
00105     }
00106 
00107     ///// Processing //////////////////////
00108     // resize buffers
00109     pool.resize_write(0);
00110     pool.resize_prefetch(PageSize_);
00111 
00112     unsigned_type space_left = M - k * BlockSize_ -
00113                                PageSize_ * BlockSize_; // remaining int space
00114     ExtIterator_ Writer = first;
00115     ExtIterator_ it = first;
00116 
00117     for (i = 0; i < k; i++) {
00118         STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
00119     }
00120 
00121     // shuffle each bucket
00122     for (i = 0; i < k; i++) {
00123         buckets[i]->set_prefetch_aggr(PageSize_);
00124         size = buckets[i]->size();
00125 
00126         // does the bucket fit into memory?
00127         if (size * sizeof(value_type) < space_left) {
00128             STXXL_VERBOSE1("random_shuffle: no recursion");
00129 
00130             // copy bucket into temp. array
00131             temp_array = new value_type[size];
00132             for (j = 0; j < size; j++) {
00133                 temp_array[j] = buckets[i]->top();
00134                 buckets[i]->pop();
00135             }
00136 
00137             // shuffle
00138             potentially_parallel::
00139             random_shuffle(temp_array, temp_array + size, rand);
00140 
00141             // write back
00142             for (j = 0; j < size; j++) {
00143                 *Writer = temp_array[j];
00144                 ++Writer;
00145             }
00146 
00147             // free memory
00148             delete[] temp_array;
00149         }
00150         else {
00151             STXXL_VERBOSE1("random_shuffle: recursion");
00152 
00153             // copy bucket into temp. stxxl::vector
00154             temp_vector = new temp_vector_type(size);
00155 
00156             for (j = 0; j < size; j++) {
00157                 (*temp_vector)[j] = buckets[i]->top();
00158                 buckets[i]->pop();
00159             }
00160 
00161             pool.resize_prefetch(0);
00162             space_left += PageSize_ * BlockSize_;
00163             STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
00164 
00165             // recursive shuffle
00166             stxxl::random_shuffle(temp_vector->begin(),
00167                                   temp_vector->end(), rand, space_left);
00168 
00169             pool.resize_prefetch(PageSize_);
00170 
00171             // write back
00172             for (j = 0; j < size; j++) {
00173                 *Writer = (*temp_vector)[j];
00174                 ++Writer;
00175             }
00176 
00177             // free memory
00178             delete temp_vector;
00179         }
00180 
00181         // free bucket
00182         delete buckets[i];
00183         space_left += BlockSize_;
00184     }
00185 
00186     delete[] buckets;
00187 }
00188 
00189 //! \brief External equivalent of std::random_shuffle (specialization for stxxl::vector)
00190 //! \param first begin of the range to shuffle
00191 //! \param last end of the range to shuffle
00192 //! \param rand random number generator object (functor)
00193 //! \param M number of bytes for internal use
00194 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
00195           unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_>
00196 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
00197                     stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> last,
00198                     RandomNumberGenerator_ & rand,
00199                     unsigned_type M)
00200 {
00201     typedef stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> ExtIterator_;
00202     typedef typename ExtIterator_::value_type value_type;
00203     typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
00204                                             stxxl::grow_shrink2, PageSize_, BlockSize_>::result stack_type;
00205     typedef typename stack_type::block_type block_type;
00206 
00207     STXXL_VERBOSE1("random_shuffle: Vector Version");
00208 
00209     // make sure we have at least 6 blocks + 1 page
00210     if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) {
00211         STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,");
00212         M = 6 * BlockSize_ + PageSize_ * BlockSize_;
00213         STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)");
00214     }
00215 
00216     stxxl::int64 n = last - first;     // the number of input elements
00217     int_type k = M / (3 * BlockSize_); // number of buckets
00218 
00219     stxxl::int64 i, j, size = 0;
00220 
00221     value_type * temp_array;
00222     typedef typename stxxl::VECTOR_GENERATOR<value_type,
00223                                              PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
00224     temp_vector_type * temp_vector;
00225 
00226     stxxl::read_write_pool<block_type> pool(0, M / BlockSize_ - k);  // no read buffers and M/B-k write buffers
00227 
00228     stack_type ** buckets;
00229 
00230     // create and put buckets into container
00231     buckets = new stack_type *[k];
00232     for (j = 0; j < k; j++)
00233         buckets[j] = new stack_type(pool, 0);
00234 
00235 
00236     typedef buf_istream<block_type, typename ExtIterator_::bids_container_iterator> buf_istream_type;
00237     typedef buf_ostream<block_type, typename ExtIterator_::bids_container_iterator> buf_ostream_type;
00238 
00239     first.flush();     // flush container
00240 
00241     // create prefetching stream,
00242     buf_istream_type in(first.bid(), last.bid() + ((last.block_offset()) ? 1 : 0), 2);
00243     // create buffered write stream for blocks
00244     buf_ostream_type out(first.bid(), 2);
00245 
00246     ExtIterator_ _cur = first - first.block_offset();
00247 
00248     // leave part of the block before _begin untouched (e.g. copy)
00249     for ( ; _cur != first; ++_cur)
00250     {
00251         typename ExtIterator_::value_type tmp;
00252         in >> tmp;
00253         out << tmp;
00254     }
00255 
00256     ///// Reading input /////////////////////
00257 
00258     // distribute input into random buckets
00259     int_type random_bucket = 0;
00260     for (i = 0; i < n; ++i, ++_cur) {
00261         random_bucket = rand(k);
00262         typename ExtIterator_::value_type tmp;
00263         in >> tmp;
00264         buckets[random_bucket]->push(tmp); // reading the current input element
00265     }
00266 
00267     ///// Processing //////////////////////
00268     // resize buffers
00269     pool.resize_write(0);
00270     pool.resize_prefetch(PageSize_);
00271 
00272     unsigned_type space_left = M - k * BlockSize_ -
00273                                PageSize_ * BlockSize_; // remaining int space
00274 
00275     for (i = 0; i < k; i++) {
00276         STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
00277     }
00278 
00279     // shuffle each bucket
00280     for (i = 0; i < k; i++) {
00281         buckets[i]->set_prefetch_aggr(PageSize_);
00282         size = buckets[i]->size();
00283 
00284         // does the bucket fit into memory?
00285         if (size * sizeof(value_type) < space_left) {
00286             STXXL_VERBOSE1("random_shuffle: no recursion");
00287 
00288             // copy bucket into temp. array
00289             temp_array = new value_type[size];
00290             for (j = 0; j < size; j++) {
00291                 temp_array[j] = buckets[i]->top();
00292                 buckets[i]->pop();
00293             }
00294 
00295             // shuffle
00296             potentially_parallel::
00297             random_shuffle(temp_array, temp_array + size, rand);
00298 
00299             // write back
00300             for (j = 0; j < size; j++) {
00301                 typename ExtIterator_::value_type tmp;
00302                 tmp = temp_array[j];
00303                 out << tmp;
00304             }
00305 
00306             // free memory
00307             delete[] temp_array;
00308         } else {
00309             STXXL_VERBOSE1("random_shuffle: recursion");
00310             // copy bucket into temp. stxxl::vector
00311             temp_vector = new temp_vector_type(size);
00312 
00313             for (j = 0; j < size; j++) {
00314                 (*temp_vector)[j] = buckets[i]->top();
00315                 buckets[i]->pop();
00316             }
00317 
00318             pool.resize_prefetch(0);
00319             space_left += PageSize_ * BlockSize_;
00320 
00321             STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
00322 
00323             // recursive shuffle
00324             stxxl::random_shuffle(temp_vector->begin(),
00325                                   temp_vector->end(), rand, space_left);
00326 
00327             pool.resize_prefetch(PageSize_);
00328 
00329             // write back
00330             for (j = 0; j < size; j++) {
00331                 typename ExtIterator_::value_type tmp;
00332                 tmp = (*temp_vector)[j];
00333                 out << tmp;
00334             }
00335 
00336             // free memory
00337             delete temp_vector;
00338         }
00339 
00340         // free bucket
00341         delete buckets[i];
00342         space_left += BlockSize_;
00343     }
00344 
00345     delete[] buckets;
00346 
00347     // leave part of the block after _end untouched
00348     if (last.block_offset())
00349     {
00350         ExtIterator_ _last_block_end = last + (block_type::size - last.block_offset());
00351         for ( ; _cur != _last_block_end; ++_cur)
00352         {
00353             typename ExtIterator_::value_type tmp;
00354             in >> tmp;
00355             out << tmp;
00356         }
00357     }
00358 }
00359 
00360 //! \brief External equivalent of std::random_shuffle (specialization for stxxl::vector)
00361 //! \param first begin of the range to shuffle
00362 //! \param last end of the range to shuffle
00363 //! \param M number of bytes for internal use
00364 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
00365           unsigned BlockSize_, typename PgTp_, unsigned PageSize_>
00366 inline
00367 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
00368                     stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> last,
00369                     unsigned_type M)
00370 {
00371     stxxl::random_number<> rand;
00372     stxxl::random_shuffle(first, last, rand, M);
00373 }
00374 
00375 //! \}
00376 
00377 __STXXL_END_NAMESPACE
00378 
00379 #endif // !STXXL_RANDOM_SHUFFLE_HEADER
00380 // vim: et:ts=4:sw=4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines