00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
00039 #define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
00040
00041 #include <limits>
00042 #include <bits/stl_numeric.h>
00043 #include <parallel/parallel.h>
00044 #include <parallel/random_number.h>
00045
00046 namespace __gnu_parallel
00047 {
00048
00049
00050
00051
00052
00053 typedef unsigned short bin_index;
00054
00055
00056
00057 template<typename RandomAccessIterator>
00058 struct DRandomShufflingGlobalData
00059 {
00060 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00061 typedef typename traits_type::value_type value_type;
00062 typedef typename traits_type::difference_type difference_type;
00063
00064
00065 RandomAccessIterator& source;
00066
00067
00068 value_type** temporaries;
00069
00070
00071
00072
00073 difference_type** dist;
00074
00075
00076 difference_type* starts;
00077
00078
00079
00080 thread_index_t* bin_proc;
00081
00082
00083 int num_bins;
00084
00085
00086 int num_bits;
00087
00088
00089 DRandomShufflingGlobalData(RandomAccessIterator& _source)
00090 : source(_source) { }
00091 };
00092
00093
00094
00095
00096 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00097 struct DRSSorterPU
00098 {
00099
00100 int num_threads;
00101
00102
00103 bin_index bins_begin;
00104
00105
00106 bin_index bins_end;
00107
00108
00109 uint32 seed;
00110
00111
00112 DRandomShufflingGlobalData<RandomAccessIterator>* sd;
00113 };
00114
00115
00116
00117
00118
00119 template<typename RandomNumberGenerator>
00120 inline int
00121 random_number_pow2(int logp, RandomNumberGenerator& rng)
00122 { return rng.genrand_bits(logp); }
00123
00124
00125
00126 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00127 void
00128 parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
00129 RandomNumberGenerator>* pus)
00130 {
00131 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00132 typedef typename traits_type::value_type value_type;
00133 typedef typename traits_type::difference_type difference_type;
00134
00135 thread_index_t iam = omp_get_thread_num();
00136 DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam];
00137 DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
00138
00139
00140 difference_type length = sd->starts[iam + 1] - sd->starts[iam];
00141 bin_index* oracles = new bin_index[length];
00142 difference_type* dist = new difference_type[sd->num_bins + 1];
00143 bin_index* bin_proc = new bin_index[sd->num_bins];
00144 value_type** temporaries = new value_type*[d->num_threads];
00145
00146
00147 for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00148 dist[b] = 0;
00149 int num_bits = sd->num_bits;
00150
00151 random_number rng(d->seed);
00152
00153
00154 for (difference_type i = 0; i < length; ++i)
00155 {
00156 bin_index oracle = random_number_pow2(num_bits, rng);
00157 oracles[i] = oracle;
00158
00159
00160 ++(dist[oracle + 1]);
00161 }
00162
00163 for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00164 sd->dist[b][iam + 1] = dist[b];
00165
00166 # pragma omp barrier
00167
00168 # pragma omp single
00169 {
00170
00171
00172 for (bin_index s = 0; s < sd->num_bins; ++s)
00173 __gnu_sequential::partial_sum(sd->dist[s + 1],
00174 sd->dist[s + 1] + d->num_threads + 1,
00175 sd->dist[s + 1]);
00176 }
00177
00178 # pragma omp barrier
00179
00180 sequence_index_t offset = 0, global_offset = 0;
00181 for (bin_index s = 0; s < d->bins_begin; ++s)
00182 global_offset += sd->dist[s + 1][d->num_threads];
00183
00184 # pragma omp barrier
00185
00186 for (bin_index s = d->bins_begin; s < d->bins_end; ++s)
00187 {
00188 for (int t = 0; t < d->num_threads + 1; ++t)
00189 sd->dist[s + 1][t] += offset;
00190 offset = sd->dist[s + 1][d->num_threads];
00191 }
00192
00193 sd->temporaries[iam] = static_cast<value_type*>(
00194 ::operator new(sizeof(value_type) * offset));
00195
00196 # pragma omp barrier
00197
00198
00199 for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00200 dist[b] = sd->dist[b][iam];
00201 for (bin_index b = 0; b < sd->num_bins; ++b)
00202 bin_proc[b] = sd->bin_proc[b];
00203 for (thread_index_t t = 0; t < d->num_threads; ++t)
00204 temporaries[t] = sd->temporaries[t];
00205
00206 RandomAccessIterator source = sd->source;
00207 difference_type start = sd->starts[iam];
00208
00209
00210 for (difference_type i = 0; i < length; ++i)
00211 {
00212 bin_index target_bin = oracles[i];
00213 thread_index_t target_p = bin_proc[target_bin];
00214
00215
00216 ::new(&(temporaries[target_p][dist[target_bin + 1]++]))
00217 value_type(*(source + i + start));
00218 }
00219
00220 delete[] oracles;
00221 delete[] dist;
00222 delete[] bin_proc;
00223 delete[] temporaries;
00224
00225 # pragma omp barrier
00226
00227
00228 for (bin_index b = d->bins_begin; b < d->bins_end; ++b)
00229 {
00230 value_type* begin =
00231 sd->temporaries[iam] +
00232 ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
00233 * end =
00234 sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
00235 sequential_random_shuffle(begin, end, rng);
00236 std::copy(begin, end, sd->source + global_offset +
00237 ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
00238 }
00239
00240 ::operator delete(sd->temporaries[iam]);
00241 }
00242
00243
00244
00245 template<typename T>
00246 T
00247 round_up_to_pow2(T x)
00248 {
00249 if (x <= 1)
00250 return 1;
00251 else
00252 return (T)1 << (log2(x - 1) + 1);
00253 }
00254
00255
00256
00257
00258
00259
00260
00261
00262 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00263 void
00264 parallel_random_shuffle_drs(RandomAccessIterator begin,
00265 RandomAccessIterator end,
00266 typename std::iterator_traits
00267 <RandomAccessIterator>::difference_type n,
00268 thread_index_t num_threads,
00269 RandomNumberGenerator& rng)
00270 {
00271 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00272 typedef typename traits_type::value_type value_type;
00273 typedef typename traits_type::difference_type difference_type;
00274
00275 _GLIBCXX_CALL(n)
00276
00277 const _Settings& __s = _Settings::get();
00278
00279 if (num_threads > n)
00280 num_threads = static_cast<thread_index_t>(n);
00281
00282 bin_index num_bins, num_bins_cache;
00283
00284 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00285
00286
00287
00288 num_bins_cache = std::max<difference_type>(
00289 1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
00290 num_bins_cache = round_up_to_pow2(num_bins_cache);
00291
00292
00293
00294 num_bins = std::min<difference_type>(n, num_bins_cache);
00295
00296 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00297
00298 num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins);
00299 #endif
00300 num_bins = round_up_to_pow2(num_bins);
00301
00302 if (num_bins < num_bins_cache)
00303 {
00304 #endif
00305
00306
00307 num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
00308 1, n / (__s.L2_cache_size / sizeof(value_type))));
00309 num_bins_cache = round_up_to_pow2(num_bins_cache);
00310
00311
00312 num_bins = static_cast<bin_index>(
00313 std::min(n, static_cast<difference_type>(num_bins_cache)));
00314
00315 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00316
00317 num_bins = std::min(
00318 static_cast<difference_type>(__s.TLB_size / 2), num_bins);
00319 #endif
00320 num_bins = round_up_to_pow2(num_bins);
00321 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00322 }
00323 #endif
00324
00325 num_threads = std::min<bin_index>(num_threads, num_bins);
00326
00327 if (num_threads <= 1)
00328 return sequential_random_shuffle(begin, end, rng);
00329
00330 DRandomShufflingGlobalData<RandomAccessIterator> sd(begin);
00331 DRSSorterPU<RandomAccessIterator, random_number >* pus;
00332 difference_type* starts;
00333
00334 # pragma omp parallel num_threads(num_threads)
00335 {
00336 thread_index_t num_threads = omp_get_num_threads();
00337 # pragma omp single
00338 {
00339 pus = new DRSSorterPU<RandomAccessIterator, random_number>
00340 [num_threads];
00341
00342 sd.temporaries = new value_type*[num_threads];
00343 sd.dist = new difference_type*[num_bins + 1];
00344 sd.bin_proc = new thread_index_t[num_bins];
00345 for (bin_index b = 0; b < num_bins + 1; ++b)
00346 sd.dist[b] = new difference_type[num_threads + 1];
00347 for (bin_index b = 0; b < (num_bins + 1); ++b)
00348 {
00349 sd.dist[0][0] = 0;
00350 sd.dist[b][0] = 0;
00351 }
00352 starts = sd.starts = new difference_type[num_threads + 1];
00353 int bin_cursor = 0;
00354 sd.num_bins = num_bins;
00355 sd.num_bits = log2(num_bins);
00356
00357 difference_type chunk_length = n / num_threads,
00358 split = n % num_threads, start = 0;
00359 difference_type bin_chunk_length = num_bins / num_threads,
00360 bin_split = num_bins % num_threads;
00361 for (thread_index_t i = 0; i < num_threads; ++i)
00362 {
00363 starts[i] = start;
00364 start += (i < split) ? (chunk_length + 1) : chunk_length;
00365 int j = pus[i].bins_begin = bin_cursor;
00366
00367
00368 bin_cursor += (i < bin_split) ?
00369 (bin_chunk_length + 1) : bin_chunk_length;
00370 pus[i].bins_end = bin_cursor;
00371 for (; j < bin_cursor; ++j)
00372 sd.bin_proc[j] = i;
00373 pus[i].num_threads = num_threads;
00374 pus[i].seed = rng(std::numeric_limits<uint32>::max());
00375 pus[i].sd = &sd;
00376 }
00377 starts[num_threads] = start;
00378 }
00379
00380 parallel_random_shuffle_drs_pu(pus);
00381 }
00382
00383 delete[] starts;
00384 delete[] sd.bin_proc;
00385 for (int s = 0; s < (num_bins + 1); ++s)
00386 delete[] sd.dist[s];
00387 delete[] sd.dist;
00388 delete[] sd.temporaries;
00389
00390 delete[] pus;
00391 }
00392
00393
00394
00395
00396
00397
00398 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00399 void
00400 sequential_random_shuffle(RandomAccessIterator begin,
00401 RandomAccessIterator end,
00402 RandomNumberGenerator& rng)
00403 {
00404 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00405 typedef typename traits_type::value_type value_type;
00406 typedef typename traits_type::difference_type difference_type;
00407
00408 difference_type n = end - begin;
00409 const _Settings& __s = _Settings::get();
00410
00411 bin_index num_bins, num_bins_cache;
00412
00413 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00414
00415 num_bins_cache =
00416 std::max<difference_type>
00417 (1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
00418 num_bins_cache = round_up_to_pow2(num_bins_cache);
00419
00420
00421
00422 num_bins = std::min(n, (difference_type)num_bins_cache);
00423 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00424
00425 num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins);
00426 #endif
00427 num_bins = round_up_to_pow2(num_bins);
00428
00429 if (num_bins < num_bins_cache)
00430 {
00431 #endif
00432
00433 num_bins_cache =
00434 static_cast<bin_index>(std::max<difference_type>(
00435 1, n / (__s.L2_cache_size / sizeof(value_type))));
00436 num_bins_cache = round_up_to_pow2(num_bins_cache);
00437
00438
00439
00440 num_bins = static_cast<bin_index>
00441 (std::min(n, static_cast<difference_type>(num_bins_cache)));
00442
00443 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00444
00445 num_bins =
00446 std::min<difference_type>(__s.TLB_size / 2, num_bins);
00447 #endif
00448 num_bins = round_up_to_pow2(num_bins);
00449 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00450 }
00451 #endif
00452
00453 int num_bits = log2(num_bins);
00454
00455 if (num_bins > 1)
00456 {
00457 value_type* target = static_cast<value_type*>(
00458 ::operator new(sizeof(value_type) * n));
00459 bin_index* oracles = new bin_index[n];
00460 difference_type* dist0 = new difference_type[num_bins + 1],
00461 * dist1 = new difference_type[num_bins + 1];
00462
00463 for (int b = 0; b < num_bins + 1; ++b)
00464 dist0[b] = 0;
00465
00466 random_number bitrng(rng(0xFFFFFFFF));
00467
00468 for (difference_type i = 0; i < n; ++i)
00469 {
00470 bin_index oracle = random_number_pow2(num_bits, bitrng);
00471 oracles[i] = oracle;
00472
00473
00474 ++(dist0[oracle + 1]);
00475 }
00476
00477
00478 __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
00479
00480 for (int b = 0; b < num_bins + 1; ++b)
00481 dist1[b] = dist0[b];
00482
00483
00484 for (difference_type i = 0; i < n; ++i)
00485 ::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
00486
00487 for (int b = 0; b < num_bins; ++b)
00488 {
00489 sequential_random_shuffle(target + dist1[b],
00490 target + dist1[b + 1],
00491 rng);
00492 }
00493
00494
00495 std::copy(target, target + n, begin);
00496
00497 delete[] dist0;
00498 delete[] dist1;
00499 delete[] oracles;
00500 ::operator delete(target);
00501 }
00502 else
00503 __gnu_sequential::random_shuffle(begin, end, rng);
00504 }
00505
00506
00507
00508
00509
00510
00511 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00512 inline void
00513 parallel_random_shuffle(RandomAccessIterator begin,
00514 RandomAccessIterator end,
00515 RandomNumberGenerator rng = random_number())
00516 {
00517 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00518 typedef typename traits_type::difference_type difference_type;
00519 difference_type n = end - begin;
00520 parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ;
00521 }
00522
00523 }
00524
00525 #endif