source: trunk/sys/libgomp/iter.c @ 8

Last change on this file since 8 was 1, checked in by alain, 8 years ago

First import

File size: 7.9 KB
RevLine 
[1]1/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU OpenMP Library (libgomp).
5
6   Libgomp is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3, or (at your option)
9   any later version.
10
11   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14   more details.
15
16   Under Section 7 of GPL version 3, you are granted additional
17   permissions described in the GCC Runtime Library Exception, version
18   3.1, as published by the Free Software Foundation.
19
20   You should have received a copy of the GNU General Public License and
21   a copy of the GCC Runtime Library Exception along with this program;
22   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23   <http://www.gnu.org/licenses/>.  */
24
25/* This file contains routines for managing work-share iteration, both
26   for loops and sections.  */
27
28#include <gomp/libgomp.h>
29#include <stdlib.h>
30
31/* This function implements the STATIC scheduling method.  The caller should
32   iterate *pstart <= x < *pend.  Return zero if there are more iterations
33   to perform; nonzero if not.  Return less than 0 if this thread had
34   received the absolutely last iteration.  */
35
36int
37gomp_iter_static_next (long *pstart, long *pend)
38{
39  struct gomp_thread *thr = gomp_thread ();
40  struct gomp_team *team = thr->ts.team;
41  struct gomp_work_share *ws = thr->ts.work_share;
42  unsigned long nthreads = team ? team->nthreads : 1;
43
44  if ((int)thr->ts.static_trip == -1)
45    return -1;
46
47  /* Quick test for degenerate teams and orphaned constructs.  */
48  if (nthreads == 1)
49    {
50      *pstart = ws->next;
51      *pend = ws->end;
52      thr->ts.static_trip = -1;
53      return ws->next == ws->end;
54    }
55
56  /* We interpret chunk_size zero as "unspecified", which means that we
57     should break up the iterations such that each thread makes only one
58     trip through the outer loop.  */
59  if (ws->chunk_size == 0)
60    {
61      unsigned long n, q, i;
62      unsigned long s0, e0;
63      long s, e;
64
65      if (thr->ts.static_trip > 0)
66        return 1;
67
68      /* Compute the total number of iterations.  */
69      s = ws->incr + (ws->incr > 0 ? -1 : 1);
70      n = (ws->end - ws->next + s) / ws->incr;
71      i = thr->ts.team_id;
72
73      /* Compute the "zero-based" start and end points.  That is, as
74         if the loop began at zero and incremented by one.  */
75      q = n / nthreads;
76      q += (q * nthreads != n);
77      s0 = q * i;
78      e0 = s0 + q;
79      if (e0 > n)
80        e0 = n;
81
82      /* Notice when no iterations allocated for this thread.  */
83      if (s0 >= e0)
84        {
85          thr->ts.static_trip = 1;
86          return 1;
87        }
88
89      /* Transform these to the actual start and end numbers.  */
90      s = (long)s0 * ws->incr + ws->next;
91      e = (long)e0 * ws->incr + ws->next;
92
93      *pstart = s;
94      *pend = e;
95      thr->ts.static_trip = (e0 == n ? -1 : 1);
96      return 0;
97    }
98  else
99    {
100      unsigned long n, s0, e0, i, c;
101      long s, e;
102
103      /* Otherwise, each thread gets exactly chunk_size iterations
104         (if available) each time through the loop.  */
105
106      s = ws->incr + (ws->incr > 0 ? -1 : 1);
107      n = (ws->end - ws->next + s) / ws->incr;
108      i = thr->ts.team_id;
109      c = ws->chunk_size;
110
111      /* Initial guess is a C sized chunk positioned nthreads iterations
112         in, offset by our thread number.  */
113      s0 = (thr->ts.static_trip * nthreads + i) * c;
114      e0 = s0 + c;
115
116      /* Detect overflow.  */
117      if (s0 >= n)
118        return 1;
119      if (e0 > n)
120        e0 = n;
121
122      /* Transform these to the actual start and end numbers.  */
123      s = (long)s0 * ws->incr + ws->next;
124      e = (long)e0 * ws->incr + ws->next;
125
126      *pstart = s;
127      *pend = e;
128
129      if (e0 == n)
130        thr->ts.static_trip = -1;
131      else
132        thr->ts.static_trip++;
133      return 0;
134    }
135}
136
137
138/* This function implements the DYNAMIC scheduling method.  Arguments are
139   as for gomp_iter_static_next.  This function must be called with ws->lock
140   held.  */
141
142
143int gomp_iter_dynamic_next_locked (long *pstart, long *pend)
144{
145  struct gomp_thread *thr = gomp_thread ();
146  struct gomp_work_share *ws = thr->ts.work_share;
147  long start, end, chunk, left;
148
149  start = ws->next;
150  if (start == ws->end)
151    return false;
152
153  chunk = ws->chunk_size;
154  left = ws->end - start;
155  if (ws->incr < 0)
156    {
157      if (chunk < left)
158        chunk = left;
159    }
160  else
161    {
162      if (chunk > left)
163        chunk = left;
164    }
165  end = start + chunk;
166
167  ws->next = end;
168  *pstart = start;
169  *pend = end;
170  return true;
171}
172
173
174#ifdef HAVE_SYNC_BUILTINS
175/* Similar, but doesn't require the lock held, and uses compare-and-swap
176   instead.  Note that the only memory value that changes is ws->next.  */
177
178bool
179gomp_iter_dynamic_next (long *pstart, long *pend)
180{
181  struct gomp_thread *thr = gomp_thread ();
182  struct gomp_work_share *ws = thr->ts.work_share;
183  long start, end, nend, chunk, incr;
184
185  end = ws->end;
186  incr = ws->incr;
187  chunk = ws->chunk_size;
188
189  if (__builtin_expect (ws->mode, 1))
190    {
191      long tmp = __sync_fetch_and_add (&ws->next, chunk);
192      if (incr > 0)
193        {
194          if (tmp >= end)
195            return false;
196          nend = tmp + chunk;
197          if (nend > end)
198            nend = end;
199          *pstart = tmp;
200          *pend = nend;
201          return true;
202        }
203      else
204        {
205          if (tmp <= end)
206            return false;
207          nend = tmp + chunk;
208          if (nend < end)
209            nend = end;
210          *pstart = tmp;
211          *pend = nend;
212          return true;
213        }
214    }
215
216  start = ws->next;
217  while (1)
218    {
219      long left = end - start;
220      long tmp;
221
222      if (start == end)
223        return false;
224
225      if (incr < 0)
226        {
227          if (chunk < left)
228            chunk = left;
229        }
230      else
231        {
232          if (chunk > left)
233            chunk = left;
234        }
235      nend = start + chunk;
236
237      tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
238      if (__builtin_expect (tmp == start, 1))
239        break;
240
241      start = tmp;
242    }
243
244  *pstart = start;
245  *pend = nend;
246  return true;
247}
248#endif /* HAVE_SYNC_BUILTINS */
249
250
251/* This function implements the GUIDED scheduling method.  Arguments are
252   as for gomp_iter_static_next.  This function must be called with the
253   work share lock held.  */
254
255int
256gomp_iter_guided_next_locked (long *pstart, long *pend)
257{
258  struct gomp_thread *thr = gomp_thread ();
259  struct gomp_work_share *ws = thr->ts.work_share;
260  struct gomp_team *team = thr->ts.team;
261  unsigned long nthreads = team ? team->nthreads : 1;
262  unsigned long n, q;
263  long start, end;
264
265  if (ws->next == ws->end)
266    return false;
267
268  start = ws->next;
269  n = (ws->end - start) / ws->incr;
270  q = (n + nthreads - 1) / nthreads;
271
272  if (q < (unsigned long)ws->chunk_size)
273    q = ws->chunk_size;
274  if (q <= n)
275    end = start + q * ws->incr;
276  else
277    end = ws->end;
278
279  ws->next = end;
280  *pstart = start;
281  *pend = end;
282  return true;
283}
284
285#ifdef HAVE_SYNC_BUILTINS
286/* Similar, but doesn't require the lock held, and uses compare-and-swap
287   instead.  Note that the only memory value that changes is ws->next.  */
288
289bool
290gomp_iter_guided_next (long *pstart, long *pend)
291{
292  struct gomp_thread *thr = gomp_thread ();
293  struct gomp_work_share *ws = thr->ts.work_share;
294  struct gomp_team *team = thr->ts.team;
295  unsigned long nthreads = team ? team->nthreads : 1;
296  long start, end, nend, incr;
297  unsigned long chunk_size;
298
299  start = ws->next;
300  end = ws->end;
301  incr = ws->incr;
302  chunk_size = ws->chunk_size;
303
304  while (1)
305    {
306      unsigned long n, q;
307      long tmp;
308
309      if (start == end)
310        return false;
311
312      n = (end - start) / incr;
313      q = (n + nthreads - 1) / nthreads;
314
315      if (q < chunk_size)
316        q = chunk_size;
317      if (__builtin_expect (q <= n, 1))
318        nend = start + q * incr;
319      else
320        nend = end;
321
322      tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
323      if (__builtin_expect (tmp == start, 1))
324        break;
325
326      start = tmp;
327    }
328
329  *pstart = start;
330  *pend = nend;
331  return true;
332}
333#endif /* HAVE_SYNC_BUILTINS */
Note: See TracBrowser for help on using the repository browser.