Stxxl
1.4.0
|
00001 /*************************************************************************** 00002 * include/stxxl/bits/algo/random_shuffle.h 00003 * 00004 * Part of the STXXL. See http://stxxl.sourceforge.net 00005 * 00006 * Copyright (C) 2007 Manuel Krings 00007 * Copyright (C) 2007 Markus Westphal 00008 * Copyright (C) 2009, 2010 Andreas Beckmann <beckmann@cs.uni-frankfurt.de> 00009 * 00010 * Distributed under the Boost Software License, Version 1.0. 00011 * (See accompanying file LICENSE_1_0.txt or copy at 00012 * http://www.boost.org/LICENSE_1_0.txt) 00013 **************************************************************************/ 00014 00015 #ifndef STXXL_RANDOM_SHUFFLE_HEADER 00016 #define STXXL_RANDOM_SHUFFLE_HEADER 00017 00018 // TODO: improve main memory consumption in recursion 00019 // (free stacks buffers) 00020 // TODO: shuffle small input in internal memory 00021 00022 00023 #include <stxxl/bits/stream/stream.h> 00024 #include <stxxl/scan> 00025 #include <stxxl/stack> 00026 00027 00028 __STXXL_BEGIN_NAMESPACE 00029 00030 00031 //! \addtogroup stlalgo 00032 //! \{ 00033 00034 00035 //! \brief External equivalent of std::random_shuffle 00036 //! \param first begin of the range to shuffle 00037 //! \param last end of the range to shuffle 00038 //! \param rand random number generator object (functor) 00039 //! \param M number of bytes for internal use 00040 //! \param AS parallel disk allocation strategy 00041 //! 00042 //! - BlockSize_ size of the block to use for external memory data structures 00043 //! - PageSize_ page size in blocks to use for external memory data structures 00044 template <typename ExtIterator_, 00045 typename RandomNumberGenerator_, 00046 unsigned BlockSize_, 00047 unsigned PageSize_, 00048 typename AllocStrategy_> 00049 void random_shuffle(ExtIterator_ first, 00050 ExtIterator_ last, 00051 RandomNumberGenerator_ & rand, 00052 unsigned_type M, 00053 AllocStrategy_ AS = STXXL_DEFAULT_ALLOC_STRATEGY()) 00054 { 00055 STXXL_UNUSED(AS); // FIXME: Why is this not being used? 00056 typedef typename ExtIterator_::value_type value_type; 00057 typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external, 00058 stxxl::grow_shrink2, PageSize_, 00059 BlockSize_, void, 0, AllocStrategy_>::result stack_type; 00060 typedef typename stack_type::block_type block_type; 00061 00062 STXXL_VERBOSE1("random_shuffle: Plain Version"); 00063 STXXL_STATIC_ASSERT(int(BlockSize_) < 0 && "This implementation was never tested. Please report to the stxxl developers if you have an ExtIterator_ that works with this implementation."); 00064 00065 stxxl::int64 n = last - first; // the number of input elements 00066 00067 // make sure we have at least 6 blocks + 1 page 00068 if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) { 00069 STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,"); 00070 M = 6 * BlockSize_ + PageSize_ * BlockSize_; 00071 STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)"); 00072 } 00073 00074 int_type k = M / (3 * BlockSize_); // number of buckets 00075 00076 00077 stxxl::int64 i, j, size = 0; 00078 00079 value_type * temp_array; 00080 typedef typename stxxl::VECTOR_GENERATOR<value_type, 00081 PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type; 00082 temp_vector_type * temp_vector; 00083 00084 STXXL_VERBOSE1("random_shuffle: " << M / BlockSize_ - k << " write buffers for " << k << " buckets"); 00085 stxxl::read_write_pool<block_type> pool(0, M / BlockSize_ - k); // no read buffers and M/B-k write buffers 00086 00087 stack_type ** buckets; 00088 00089 // create and put buckets into container 00090 buckets = new stack_type *[k]; 00091 for (j = 0; j < k; j++) 00092 buckets[j] = new stack_type(pool, 0); 00093 00094 00095 ///// Reading input ///////////////////// 00096 typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream; 00097 input_stream in = stxxl::stream::streamify(first, last); 00098 00099 // distribute input into random buckets 00100 int_type random_bucket = 0; 00101 for (i = 0; i < n; ++i) { 00102 random_bucket = rand(k); 00103 buckets[random_bucket]->push(*in); // reading the current input element 00104 ++in; // go to the next input element 00105 } 00106 00107 ///// Processing ////////////////////// 00108 // resize buffers 00109 pool.resize_write(0); 00110 pool.resize_prefetch(PageSize_); 00111 00112 unsigned_type space_left = M - k * BlockSize_ - 00113 PageSize_ * BlockSize_; // remaining int space 00114 ExtIterator_ Writer = first; 00115 ExtIterator_ it = first; 00116 00117 for (i = 0; i < k; i++) { 00118 STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements"); 00119 } 00120 00121 // shuffle each bucket 00122 for (i = 0; i < k; i++) { 00123 buckets[i]->set_prefetch_aggr(PageSize_); 00124 size = buckets[i]->size(); 00125 00126 // does the bucket fit into memory? 00127 if (size * sizeof(value_type) < space_left) { 00128 STXXL_VERBOSE1("random_shuffle: no recursion"); 00129 00130 // copy bucket into temp. array 00131 temp_array = new value_type[size]; 00132 for (j = 0; j < size; j++) { 00133 temp_array[j] = buckets[i]->top(); 00134 buckets[i]->pop(); 00135 } 00136 00137 // shuffle 00138 potentially_parallel:: 00139 random_shuffle(temp_array, temp_array + size, rand); 00140 00141 // write back 00142 for (j = 0; j < size; j++) { 00143 *Writer = temp_array[j]; 00144 ++Writer; 00145 } 00146 00147 // free memory 00148 delete[] temp_array; 00149 } 00150 else { 00151 STXXL_VERBOSE1("random_shuffle: recursion"); 00152 00153 // copy bucket into temp. stxxl::vector 00154 temp_vector = new temp_vector_type(size); 00155 00156 for (j = 0; j < size; j++) { 00157 (*temp_vector)[j] = buckets[i]->top(); 00158 buckets[i]->pop(); 00159 } 00160 00161 pool.resize_prefetch(0); 00162 space_left += PageSize_ * BlockSize_; 00163 STXXL_VERBOSE1("random_shuffle: Space left: " << space_left); 00164 00165 // recursive shuffle 00166 stxxl::random_shuffle(temp_vector->begin(), 00167 temp_vector->end(), rand, space_left); 00168 00169 pool.resize_prefetch(PageSize_); 00170 00171 // write back 00172 for (j = 0; j < size; j++) { 00173 *Writer = (*temp_vector)[j]; 00174 ++Writer; 00175 } 00176 00177 // free memory 00178 delete temp_vector; 00179 } 00180 00181 // free bucket 00182 delete buckets[i]; 00183 space_left += BlockSize_; 00184 } 00185 00186 delete[] buckets; 00187 } 00188 00189 //! \brief External equivalent of std::random_shuffle (specialization for stxxl::vector) 00190 //! \param first begin of the range to shuffle 00191 //! \param last end of the range to shuffle 00192 //! \param rand random number generator object (functor) 00193 //! \param M number of bytes for internal use 00194 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_, 00195 unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_> 00196 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first, 00197 stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> last, 00198 RandomNumberGenerator_ & rand, 00199 unsigned_type M) 00200 { 00201 typedef stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> ExtIterator_; 00202 typedef typename ExtIterator_::value_type value_type; 00203 typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external, 00204 stxxl::grow_shrink2, PageSize_, BlockSize_>::result stack_type; 00205 typedef typename stack_type::block_type block_type; 00206 00207 STXXL_VERBOSE1("random_shuffle: Vector Version"); 00208 00209 // make sure we have at least 6 blocks + 1 page 00210 if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) { 00211 STXXL_ERRMSG("random_shuffle: insufficient memory, " << M << " bytes supplied,"); 00212 M = 6 * BlockSize_ + PageSize_ * BlockSize_; 00213 STXXL_ERRMSG("random_shuffle: increasing to " << M << " bytes (6 blocks + 1 page)"); 00214 } 00215 00216 stxxl::int64 n = last - first; // the number of input elements 00217 int_type k = M / (3 * BlockSize_); // number of buckets 00218 00219 stxxl::int64 i, j, size = 0; 00220 00221 value_type * temp_array; 00222 typedef typename stxxl::VECTOR_GENERATOR<value_type, 00223 PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type; 00224 temp_vector_type * temp_vector; 00225 00226 stxxl::read_write_pool<block_type> pool(0, M / BlockSize_ - k); // no read buffers and M/B-k write buffers 00227 00228 stack_type ** buckets; 00229 00230 // create and put buckets into container 00231 buckets = new stack_type *[k]; 00232 for (j = 0; j < k; j++) 00233 buckets[j] = new stack_type(pool, 0); 00234 00235 00236 typedef buf_istream<block_type, typename ExtIterator_::bids_container_iterator> buf_istream_type; 00237 typedef buf_ostream<block_type, typename ExtIterator_::bids_container_iterator> buf_ostream_type; 00238 00239 first.flush(); // flush container 00240 00241 // create prefetching stream, 00242 buf_istream_type in(first.bid(), last.bid() + ((last.block_offset()) ? 1 : 0), 2); 00243 // create buffered write stream for blocks 00244 buf_ostream_type out(first.bid(), 2); 00245 00246 ExtIterator_ _cur = first - first.block_offset(); 00247 00248 // leave part of the block before _begin untouched (e.g. copy) 00249 for ( ; _cur != first; ++_cur) 00250 { 00251 typename ExtIterator_::value_type tmp; 00252 in >> tmp; 00253 out << tmp; 00254 } 00255 00256 ///// Reading input ///////////////////// 00257 00258 // distribute input into random buckets 00259 int_type random_bucket = 0; 00260 for (i = 0; i < n; ++i, ++_cur) { 00261 random_bucket = rand(k); 00262 typename ExtIterator_::value_type tmp; 00263 in >> tmp; 00264 buckets[random_bucket]->push(tmp); // reading the current input element 00265 } 00266 00267 ///// Processing ////////////////////// 00268 // resize buffers 00269 pool.resize_write(0); 00270 pool.resize_prefetch(PageSize_); 00271 00272 unsigned_type space_left = M - k * BlockSize_ - 00273 PageSize_ * BlockSize_; // remaining int space 00274 00275 for (i = 0; i < k; i++) { 00276 STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements"); 00277 } 00278 00279 // shuffle each bucket 00280 for (i = 0; i < k; i++) { 00281 buckets[i]->set_prefetch_aggr(PageSize_); 00282 size = buckets[i]->size(); 00283 00284 // does the bucket fit into memory? 00285 if (size * sizeof(value_type) < space_left) { 00286 STXXL_VERBOSE1("random_shuffle: no recursion"); 00287 00288 // copy bucket into temp. array 00289 temp_array = new value_type[size]; 00290 for (j = 0; j < size; j++) { 00291 temp_array[j] = buckets[i]->top(); 00292 buckets[i]->pop(); 00293 } 00294 00295 // shuffle 00296 potentially_parallel:: 00297 random_shuffle(temp_array, temp_array + size, rand); 00298 00299 // write back 00300 for (j = 0; j < size; j++) { 00301 typename ExtIterator_::value_type tmp; 00302 tmp = temp_array[j]; 00303 out << tmp; 00304 } 00305 00306 // free memory 00307 delete[] temp_array; 00308 } else { 00309 STXXL_VERBOSE1("random_shuffle: recursion"); 00310 // copy bucket into temp. stxxl::vector 00311 temp_vector = new temp_vector_type(size); 00312 00313 for (j = 0; j < size; j++) { 00314 (*temp_vector)[j] = buckets[i]->top(); 00315 buckets[i]->pop(); 00316 } 00317 00318 pool.resize_prefetch(0); 00319 space_left += PageSize_ * BlockSize_; 00320 00321 STXXL_VERBOSE1("random_shuffle: Space left: " << space_left); 00322 00323 // recursive shuffle 00324 stxxl::random_shuffle(temp_vector->begin(), 00325 temp_vector->end(), rand, space_left); 00326 00327 pool.resize_prefetch(PageSize_); 00328 00329 // write back 00330 for (j = 0; j < size; j++) { 00331 typename ExtIterator_::value_type tmp; 00332 tmp = (*temp_vector)[j]; 00333 out << tmp; 00334 } 00335 00336 // free memory 00337 delete temp_vector; 00338 } 00339 00340 // free bucket 00341 delete buckets[i]; 00342 space_left += BlockSize_; 00343 } 00344 00345 delete[] buckets; 00346 00347 // leave part of the block after _end untouched 00348 if (last.block_offset()) 00349 { 00350 ExtIterator_ _last_block_end = last + (block_type::size - last.block_offset()); 00351 for ( ; _cur != _last_block_end; ++_cur) 00352 { 00353 typename ExtIterator_::value_type tmp; 00354 in >> tmp; 00355 out << tmp; 00356 } 00357 } 00358 } 00359 00360 //! \brief External equivalent of std::random_shuffle (specialization for stxxl::vector) 00361 //! \param first begin of the range to shuffle 00362 //! \param last end of the range to shuffle 00363 //! \param M number of bytes for internal use 00364 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_, 00365 unsigned BlockSize_, typename PgTp_, unsigned PageSize_> 00366 inline 00367 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first, 00368 stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> last, 00369 unsigned_type M) 00370 { 00371 stxxl::random_number<> rand; 00372 stxxl::random_shuffle(first, last, rand, M); 00373 } 00374 00375 //! \} 00376 00377 __STXXL_END_NAMESPACE 00378 00379 #endif // !STXXL_RANDOM_SHUFFLE_HEADER 00380 // vim: et:ts=4:sw=4