Stxxl
1.4.0
|
00001 /*************************************************************************** 00002 * stream/test_naive_transpose.cpp 00003 * 00004 * Part of the STXXL. See http://stxxl.sourceforge.net 00005 * 00006 * Copyright (C) 2010 Andreas Beckmann <beckmann@cs.uni-frankfurt.de> 00007 * 00008 * Distributed under the Boost Software License, Version 1.0. 00009 * (See accompanying file LICENSE_1_0.txt or copy at 00010 * http://www.boost.org/LICENSE_1_0.txt) 00011 **************************************************************************/ 00012 00013 //! \example stream/test_naive_transpose.cpp 00014 //! This is an example of how to use some basic algorithms from the 00015 //! stream package. The example transposes a 2D-matrix which is serialized 00016 //! as an 1D-vector. 00017 //! 00018 //! This transpose implementation is trivial, not neccessarily fast or 00019 //! efficient. There are more sophisticated and faster algorithms for external 00020 //! memory matrix transpostion than sorting, see e.g. J.S. Vitter: Algorithms 00021 //! and Data Structures for External Memory, Chapter 7.2. 00022 00023 #include <limits> 00024 #include <vector> 00025 #include <stxxl/stream> 00026 #include <stxxl/vector> 00027 00028 00029 class streamop_matrix_transpose 00030 { 00031 unsigned cut, repeat; 00032 unsigned pos; 00033 00034 public: 00035 typedef unsigned value_type; 00036 00037 streamop_matrix_transpose(unsigned cut, unsigned repeat) : cut(cut), repeat(repeat), pos(0) 00038 { } 00039 00040 value_type operator * () const 00041 { 00042 return (pos % cut) * repeat + pos / cut; 00043 } 00044 00045 streamop_matrix_transpose & operator ++ () 00046 { 00047 ++pos; 00048 return *this; 00049 } 00050 00051 bool empty() const 00052 { 00053 return pos == (cut * repeat); 00054 } 00055 }; 00056 00057 template <typename T> 00058 struct cmp_tuple_first : std::binary_function<T, T, bool> 00059 { 00060 typedef T value_type; 00061 typedef typename value_type::first_type first_value_type; 00062 00063 bool operator () (const value_type & a, const value_type & b) const 00064 { 00065 return a.first < b.first; 00066 } 00067 00068 value_type min_value() const 00069 { 00070 return value_type((std::numeric_limits<first_value_type>::min)(), 0); 00071 } 00072 00073 value_type max_value() const 00074 { 00075 return value_type((std::numeric_limits<first_value_type>::max)(), 0); 00076 } 00077 }; 00078 00079 template <typename Vector> 00080 void dump_upper_left(const Vector & v, unsigned rows, unsigned cols, unsigned nx, unsigned ny) 00081 { 00082 int w = 5; 00083 00084 // assumes row-major layout in the vector serialization of the matrix 00085 for (unsigned y = 0; y < ny && y < rows; ++y) { 00086 std::cout << std::setw(w) << y << ":"; 00087 for (unsigned x = 0; x < nx && x < cols; ++x) 00088 std::cout << " " << std::setw(w) << v[y * cols + x]; 00089 if (nx < cols) 00090 std::cout << " ..."; 00091 std::cout << std::endl; 00092 } 00093 if (ny < rows) 00094 std::cout << std::setw(w) << "..." << std::endl; 00095 std::cout << std::endl; 00096 } 00097 00098 int main() 00099 { 00100 unsigned num_cols = 10000; 00101 unsigned num_rows = 5000; 00102 00103 // buffers for streamify and materialize, 00104 // block size matches the block size of the input/output vector 00105 unsigned numbuffers = 2 * stxxl::config::get_instance()->disks_number(); 00106 00107 // RAM to be used for sorting (in bytes) 00108 size_t memory_for_sorting = 1 << 28; 00109 00110 /////////////////////////////////////////////////////////////////////// 00111 00112 typedef stxxl::VECTOR_GENERATOR<unsigned>::result array_type; 00113 00114 array_type input(num_rows * num_cols); 00115 array_type output(num_cols * num_rows); 00116 00117 // fill the input array with some values 00118 for (unsigned i = 0; i < num_rows * num_cols; ++i) 00119 input[i] = i; 00120 00121 std::cout << "Before transpose:" << std::endl; 00122 dump_upper_left(input, num_rows, num_cols, 10, 10); 00123 00124 stxxl::stats_data stats_before(*stxxl::stats::get_instance()); 00125 00126 // HERE streaming part begins (streamifying) 00127 // create input stream 00128 #ifdef BOOST_MSVC 00129 typedef stxxl::stream::streamify_traits<array_type::iterator>::stream_type input_stream_type; 00130 #else 00131 typedef __typeof__(stxxl::stream::streamify(input.begin(), input.end())) input_stream_type; 00132 #endif 00133 input_stream_type input_stream = stxxl::stream::streamify(input.begin(), input.end(), numbuffers); 00134 00135 // create stream of destination indices 00136 typedef streamop_matrix_transpose destination_index_stream_type; 00137 destination_index_stream_type destination_index_stream(num_cols, num_rows); 00138 00139 // create tuple stream: (key, value) 00140 typedef stxxl::stream::make_tuple<destination_index_stream_type, input_stream_type> tuple_stream_type; 00141 tuple_stream_type tuple_stream(destination_index_stream, input_stream); 00142 00143 // sort tuples by first entry (key) 00144 typedef cmp_tuple_first<tuple_stream_type::value_type> cmp_type; 00145 typedef stxxl::stream::sort<tuple_stream_type, cmp_type> sorted_tuple_stream_type; 00146 sorted_tuple_stream_type sorted_tuple_stream(tuple_stream, cmp_type(), memory_for_sorting); 00147 00148 // discard the key we used for sorting, keep second entry of the tuple only (value) 00149 typedef stxxl::stream::choose<sorted_tuple_stream_type, 2> sorted_element_stream_type; 00150 sorted_element_stream_type sorted_element_stream(sorted_tuple_stream); 00151 00152 // HERE streaming part ends (materializing) 00153 array_type::iterator o = stxxl::stream::materialize(sorted_element_stream, output.begin(), output.end(), numbuffers); 00154 assert(o == output.end()); 00155 assert(sorted_element_stream.empty()); 00156 00157 stxxl::stats_data stats_after(*stxxl::stats::get_instance()); 00158 00159 std::cout << "After transpose:" << std::endl; 00160 dump_upper_left(output, num_cols, num_rows, 10, 10); 00161 00162 std::cout << "I/O stats (streaming part only!)" << std::endl << (stats_after - stats_before); 00163 00164 return 0; 00165 } 00166 00167 // vim: et:ts=4:sw=4