source: trunk/sys/libgomp/iter_ull.c @ 91

Last change on this file since 91 was 1, checked in by alain, 8 years ago

First import

File size: 8.6 KB
Line 
1/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU OpenMP Library (libgomp).
5
6   Libgomp is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3, or (at your option)
9   any later version.
10
11   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14   more details.
15
16   Under Section 7 of GPL version 3, you are granted additional
17   permissions described in the GCC Runtime Library Exception, version
18   3.1, as published by the Free Software Foundation.
19
20   You should have received a copy of the GNU General Public License and
21   a copy of the GCC Runtime Library Exception along with this program;
22   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23   <http://www.gnu.org/licenses/>.  */
24
25/* This file contains routines for managing work-share iteration, both
26   for loops and sections.  */
27
28#include <gomp/libgomp.h>
29#include <stdlib.h>
30
31typedef unsigned long long gomp_ull;
32
33/* This function implements the STATIC scheduling method.  The caller should
34   iterate *pstart <= x < *pend.  Return zero if there are more iterations
35   to perform; nonzero if not.  Return less than 0 if this thread had
36   received the absolutely last iteration.  */
37
38int
39gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
40{
41  struct gomp_thread *thr = gomp_thread ();
42  struct gomp_team *team = thr->ts.team;
43  struct gomp_work_share *ws = thr->ts.work_share;
44  unsigned long nthreads = team ? team->nthreads : 1;
45
46  if ((int)thr->ts.static_trip == -1)
47    return -1;
48
49  /* Quick test for degenerate teams and orphaned constructs.  */
50  if (nthreads == 1)
51    {
52      *pstart = ws->next_ull;
53      *pend = ws->end_ull;
54      thr->ts.static_trip = -1;
55      return ws->next_ull == ws->end_ull;
56    }
57
58  /* We interpret chunk_size zero as "unspecified", which means that we
59     should break up the iterations such that each thread makes only one
60     trip through the outer loop.  */
61  if (ws->chunk_size_ull == 0)
62    {
63      gomp_ull n, q, i, s0, e0, s, e;
64
65      if (thr->ts.static_trip > 0)
66        return 1;
67
68      /* Compute the total number of iterations.  */
69      if (__builtin_expect (ws->mode, 0) == 0)
70        n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
71      else
72        n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
73      i = thr->ts.team_id;
74
75      /* Compute the "zero-based" start and end points.  That is, as
76         if the loop began at zero and incremented by one.  */
77      q = n / nthreads;
78      q += (q * nthreads != n);
79      s0 = q * i;
80      e0 = s0 + q;
81      if (e0 > n)
82        e0 = n;
83
84      /* Notice when no iterations allocated for this thread.  */
85      if (s0 >= e0)
86        {
87          thr->ts.static_trip = 1;
88          return 1;
89        }
90
91      /* Transform these to the actual start and end numbers.  */
92      s = s0 * ws->incr_ull + ws->next_ull;
93      e = e0 * ws->incr_ull + ws->next_ull;
94
95      *pstart = s;
96      *pend = e;
97      thr->ts.static_trip = (e0 == n ? -1 : 1);
98      return 0;
99    }
100  else
101    {
102      gomp_ull n, s0, e0, i, c, s, e;
103
104      /* Otherwise, each thread gets exactly chunk_size iterations
105         (if available) each time through the loop.  */
106
107      if (__builtin_expect (ws->mode, 0) == 0)
108        n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
109      else
110        n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
111      i = thr->ts.team_id;
112      c = ws->chunk_size_ull;
113
114      /* Initial guess is a C sized chunk positioned nthreads iterations
115         in, offset by our thread number.  */
116      s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
117      e0 = s0 + c;
118
119      /* Detect overflow.  */
120      if (s0 >= n)
121        return 1;
122      if (e0 > n)
123        e0 = n;
124
125      /* Transform these to the actual start and end numbers.  */
126      s = s0 * ws->incr_ull + ws->next_ull;
127      e = e0 * ws->incr_ull + ws->next_ull;
128
129      *pstart = s;
130      *pend = e;
131
132      if (e0 == n)
133        thr->ts.static_trip = -1;
134      else
135        thr->ts.static_trip++;
136      return 0;
137    }
138}
139
140
141/* This function implements the DYNAMIC scheduling method.  Arguments are
142   as for gomp_iter_ull_static_next.  This function must be called with
143   ws->lock held.  */
144
145int
146gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
147{
148  struct gomp_thread *thr = gomp_thread ();
149  struct gomp_work_share *ws = thr->ts.work_share;
150  gomp_ull start, end, chunk, left;
151
152  start = ws->next_ull;
153  if (start == ws->end_ull)
154    return false;
155
156  chunk = ws->chunk_size_ull;
157  left = ws->end_ull - start;
158  if (__builtin_expect (ws->mode & 2, 0))
159    {
160      if (chunk < left)
161        chunk = left;
162    }
163  else
164    {
165      if (chunk > left)
166        chunk = left;
167    }
168  end = start + chunk;
169
170  ws->next_ull = end;
171  *pstart = start;
172  *pend = end;
173  return true;
174}
175
176
177#if defined HAVE_SYNC_BUILTINS && defined __LP64__
178/* Similar, but doesn't require the lock held, and uses compare-and-swap
179   instead.  Note that the only memory value that changes is ws->next_ull.  */
180
181int
182gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
183{
184  struct gomp_thread *thr = gomp_thread ();
185  struct gomp_work_share *ws = thr->ts.work_share;
186  gomp_ull start, end, nend, chunk;
187
188  end = ws->end_ull;
189  chunk = ws->chunk_size_ull;
190
191  if (__builtin_expect (ws->mode & 1, 1))
192    {
193      gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
194      if (__builtin_expect (ws->mode & 2, 0) == 0)
195        {
196          if (tmp >= end)
197            return false;
198          nend = tmp + chunk;
199          if (nend > end)
200            nend = end;
201          *pstart = tmp;
202          *pend = nend;
203          return true;
204        }
205      else
206        {
207          if (tmp <= end)
208            return false;
209          nend = tmp + chunk;
210          if (nend < end)
211            nend = end;
212          *pstart = tmp;
213          *pend = nend;
214          return true;
215        }
216    }
217
218  start = ws->next_ull;
219  while (1)
220    {
221      gomp_ull left = end - start;
222      gomp_ull tmp;
223
224      if (start == end)
225        return false;
226
227      if (__builtin_expect (ws->mode & 2, 0))
228        {
229          if (chunk < left)
230            chunk = left;
231        }
232      else
233        {
234          if (chunk > left)
235            chunk = left;
236        }
237      nend = start + chunk;
238
239      tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
240      if (__builtin_expect (tmp == start, 1))
241        break;
242
243      start = tmp;
244    }
245
246  *pstart = start;
247  *pend = nend;
248  return true;
249}
250#endif /* HAVE_SYNC_BUILTINS */
251
252
253/* This function implements the GUIDED scheduling method.  Arguments are
254   as for gomp_iter_ull_static_next.  This function must be called with the
255   work share lock held.  */
256
257int
258gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
259{
260  struct gomp_thread *thr = gomp_thread ();
261  struct gomp_work_share *ws = thr->ts.work_share;
262  struct gomp_team *team = thr->ts.team;
263  gomp_ull nthreads = team ? team->nthreads : 1;
264  gomp_ull n, q;
265  gomp_ull start, end;
266
267  if (ws->next_ull == ws->end_ull)
268    return false;
269
270  start = ws->next_ull;
271  if (__builtin_expect (ws->mode, 0) == 0)
272    n = (ws->end_ull - start) / ws->incr_ull;
273  else
274    n = (start - ws->end_ull) / -ws->incr_ull;
275  q = (n + nthreads - 1) / nthreads;
276
277  if (q < ws->chunk_size_ull)
278    q = ws->chunk_size_ull;
279  if (q <= n)
280    end = start + q * ws->incr_ull;
281  else
282    end = ws->end_ull;
283
284  ws->next_ull = end;
285  *pstart = start;
286  *pend = end;
287  return true;
288}
289
290#if defined HAVE_SYNC_BUILTINS && defined __LP64__
291/* Similar, but doesn't require the lock held, and uses compare-and-swap
292   instead.  Note that the only memory value that changes is ws->next_ull.  */
293
294int
295gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
296{
297  struct gomp_thread *thr = gomp_thread ();
298  struct gomp_work_share *ws = thr->ts.work_share;
299  struct gomp_team *team = thr->ts.team;
300  gomp_ull nthreads = team ? team->nthreads : 1;
301  gomp_ull start, end, nend, incr;
302  gomp_ull chunk_size;
303
304  start = ws->next_ull;
305  end = ws->end_ull;
306  incr = ws->incr_ull;
307  chunk_size = ws->chunk_size_ull;
308
309  while (1)
310    {
311      gomp_ull n, q;
312      gomp_ull tmp;
313
314      if (start == end)
315        return false;
316
317      if (__builtin_expect (ws->mode, 0) == 0)
318        n = (end - start) / incr;
319      else
320        n = (start - end) / -incr;
321      q = (n + nthreads - 1) / nthreads;
322
323      if (q < chunk_size)
324        q = chunk_size;
325      if (__builtin_expect (q <= n, 1))
326        nend = start + q * incr;
327      else
328        nend = end;
329
330      tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
331      if (__builtin_expect (tmp == start, 1))
332        break;
333
334      start = tmp;
335    }
336
337  *pstart = start;
338  *pend = nend;
339  return true;
340}
341#endif /* HAVE_SYNC_BUILTINS */
Note: See TracBrowser for help on using the repository browser.