partial_sum.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 2, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // You should have received a copy of the GNU General Public License
00017 // along with this library; see the file COPYING.  If not, write to
00018 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
00019 // MA 02111-1307, USA.
00020 
00021 // As a special exception, you may use this file as part of a free
00022 // software library without restriction.  Specifically, if other files
00023 // instantiate templates or use macros or inline functions from this
00024 // file, or you compile this file and link it with other files to
00025 // produce an executable, this file does not by itself cause the
00026 // resulting executable to be covered by the GNU General Public
00027 // License.  This exception does not however invalidate any other
00028 // reasons why the executable file might be covered by the GNU General
00029 // Public License.
00030 
00031 /** @file parallel/partial_sum.h
00032  *  @brief Parallel implementation of std::partial_sum(), i. e. prefix
00033  *  sums.
00034  *  This file is a GNU parallel extension to the Standard C++ Library.
00035  */
00036 
00037 // Written by Johannes Singler.
00038 
00039 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
00040 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
00041 
00042 #include <omp.h>
00043 #include <new>
00044 #include <bits/stl_algobase.h>
00045 #include <parallel/parallel.h>
00046 #include <parallel/numericfwd.h>
00047 
00048 namespace __gnu_parallel
00049 {
00050   // Problem: there is no 0-element given.
00051 
00052 /** @brief Base case prefix sum routine.
00053   *  @param begin Begin iterator of input sequence.
00054   *  @param end End iterator of input sequence.
00055   *  @param result Begin iterator of output sequence.
00056   *  @param bin_op Associative binary function.
00057   *  @param value Start value. Must be passed since the neutral
00058   *  element is unknown in general.
00059   *  @return End iterator of output sequence. */
00060 template<typename InputIterator,
00061      typename OutputIterator,
00062      typename BinaryOperation>
00063   OutputIterator
00064   parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
00065                 OutputIterator result, BinaryOperation bin_op,
00066                 typename std::iterator_traits
00067                 <InputIterator>::value_type value)
00068   {
00069     if (begin == end)
00070       return result;
00071 
00072     while (begin != end)
00073       {
00074         value = bin_op(value, *begin);
00075         *result = value;
00076         ++result;
00077         ++begin;
00078       }
00079     return result;
00080   }
00081 
00082 /** @brief Parallel partial sum implementation, two-phase approach,
00083     no recursion.
00084     *  @param begin Begin iterator of input sequence.
00085     *  @param end End iterator of input sequence.
00086     *  @param result Begin iterator of output sequence.
00087     *  @param bin_op Associative binary function.
00088     *  @param n Length of sequence.
00089     *  @param num_threads Number of threads to use.
00090     *  @return End iterator of output sequence.
00091     */
00092 template<typename InputIterator,
00093      typename OutputIterator,
00094      typename BinaryOperation>
00095   OutputIterator
00096   parallel_partial_sum_linear(InputIterator begin, InputIterator end,
00097                   OutputIterator result, BinaryOperation bin_op,
00098                   typename std::iterator_traits
00099                   <InputIterator>::difference_type n)
00100   {
00101     typedef std::iterator_traits<InputIterator> traits_type;
00102     typedef typename traits_type::value_type value_type;
00103     typedef typename traits_type::difference_type difference_type;
00104 
00105     if (begin == end)
00106       return result;
00107 
00108     thread_index_t num_threads =
00109         std::min<difference_type>(get_max_threads(), n - 1);
00110 
00111     if (num_threads < 2)
00112       {
00113         *result = *begin;
00114         return parallel_partial_sum_basecase(
00115             begin + 1, end, result + 1, bin_op, *begin);
00116       }
00117 
00118     difference_type* borders;
00119     value_type* sums;
00120 
00121     const _Settings& __s = _Settings::get();
00122 
00123 #   pragma omp parallel num_threads(num_threads)
00124       {
00125 #       pragma omp single
00126           {
00127             num_threads = omp_get_num_threads();
00128 
00129             borders = new difference_type[num_threads + 2];
00130 
00131             if (__s.partial_sum_dilation == 1.0f)
00132               equally_split(n, num_threads + 1, borders);
00133             else
00134               {
00135                 difference_type chunk_length =
00136                     ((double)n
00137              / ((double)num_threads + __s.partial_sum_dilation)),
00138           borderstart = n - num_threads * chunk_length;
00139                 borders[0] = 0;
00140                 for (int i = 1; i < (num_threads + 1); ++i)
00141                   {
00142                     borders[i] = borderstart;
00143                     borderstart += chunk_length;
00144                   }
00145                 borders[num_threads + 1] = n;
00146               }
00147 
00148             sums = static_cast<value_type*>(::operator new(sizeof(value_type)
00149                                * num_threads));
00150             OutputIterator target_end;
00151           } //single
00152 
00153         thread_index_t iam = omp_get_thread_num();
00154         if (iam == 0)
00155           {
00156             *result = *begin;
00157             parallel_partial_sum_basecase(begin + 1, begin + borders[1],
00158                       result + 1, bin_op, *begin);
00159             ::new(&(sums[iam])) value_type(*(result + borders[1] - 1));
00160           }
00161         else
00162           {
00163             ::new(&(sums[iam]))
00164           value_type(std::accumulate(begin + borders[iam] + 1,
00165                      begin + borders[iam + 1],
00166                      *(begin + borders[iam]),
00167                      bin_op,
00168                      __gnu_parallel::sequential_tag()));
00169           }
00170 
00171 #       pragma omp barrier
00172 
00173 #       pragma omp single
00174           parallel_partial_sum_basecase(
00175               sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]);
00176 
00177 #       pragma omp barrier
00178 
00179         // Still same team.
00180         parallel_partial_sum_basecase(begin + borders[iam + 1],
00181                       begin + borders[iam + 2],
00182                       result + borders[iam + 1], bin_op,
00183                       sums[iam]);
00184       } //parallel
00185 
00186     ::operator delete(sums);
00187     delete[] borders;
00188 
00189     return result + n;
00190   }
00191 
00192 /** @brief Parallel partial sum front-end.
00193   *  @param begin Begin iterator of input sequence.
00194   *  @param end End iterator of input sequence.
00195   *  @param result Begin iterator of output sequence.
00196   *  @param bin_op Associative binary function.
00197   *  @return End iterator of output sequence. */
00198 template<typename InputIterator,
00199      typename OutputIterator,
00200      typename BinaryOperation>
00201   OutputIterator
00202   parallel_partial_sum(InputIterator begin, InputIterator end,
00203                        OutputIterator result, BinaryOperation bin_op)
00204   {
00205     _GLIBCXX_CALL(begin - end)
00206 
00207     typedef std::iterator_traits<InputIterator> traits_type;
00208     typedef typename traits_type::value_type value_type;
00209     typedef typename traits_type::difference_type difference_type;
00210 
00211     difference_type n = end - begin;
00212 
00213     switch (_Settings::get().partial_sum_algorithm)
00214       {
00215       case LINEAR:
00216         // Need an initial offset.
00217         return parallel_partial_sum_linear(begin, end, result, bin_op, n);
00218       default:
00219     // Partial_sum algorithm not implemented.
00220         _GLIBCXX_PARALLEL_ASSERT(0);
00221         return result + n;
00222       }
00223   }
00224 }
00225 
00226 #endif

Generated on Sat Oct 25 05:09:06 2008 for libstdc++ by  doxygen 1.5.6