Context Navigation

source: trunk/sys/libgomp/iter_ull.c @ 50

Last change on this file since 50 was 1, checked in by alain, 8 years ago
First import
File size: 8.6 KB

Line
1	/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2	Contributed by Richard Henderson <rth@redhat.com>.
3
4	This file is part of the GNU OpenMP Library (libgomp).
5
6	Libgomp is free software; you can redistribute it and/or modify it
7	under the terms of the GNU General Public License as published by
8	the Free Software Foundation; either version 3, or (at your option)
9	any later version.
10
11	Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12	WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13	FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14	more details.
15
16	Under Section 7 of GPL version 3, you are granted additional
17	permissions described in the GCC Runtime Library Exception, version
18	3.1, as published by the Free Software Foundation.
19
20	You should have received a copy of the GNU General Public License and
21	a copy of the GCC Runtime Library Exception along with this program;
22	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23	<http://www.gnu.org/licenses/>. */
24
25	/* This file contains routines for managing work-share iteration, both
26	for loops and sections. */
27
28	#include <gomp/libgomp.h>
29	#include <stdlib.h>
30
31	typedef unsigned long long gomp_ull;
32
33	/* This function implements the STATIC scheduling method. The caller should
34	iterate pstart <= x < pend. Return zero if there are more iterations
35	to perform; nonzero if not. Return less than 0 if this thread had
36	received the absolutely last iteration. */
37
38	int
39	gomp_iter_ull_static_next (gomp_ull pstart, gomp_ull pend)
40	{
41	struct gomp_thread *thr = gomp_thread ();
42	struct gomp_team *team = thr->ts.team;
43	struct gomp_work_share *ws = thr->ts.work_share;
44	unsigned long nthreads = team ? team->nthreads : 1;
45
46	if ((int)thr->ts.static_trip == -1)
47	return -1;
48
49	/* Quick test for degenerate teams and orphaned constructs. */
50	if (nthreads == 1)
51	{
52	*pstart = ws->next_ull;
53	*pend = ws->end_ull;
54	thr->ts.static_trip = -1;
55	return ws->next_ull == ws->end_ull;
56	}
57
58	/* We interpret chunk_size zero as "unspecified", which means that we
59	should break up the iterations such that each thread makes only one
60	trip through the outer loop. */
61	if (ws->chunk_size_ull == 0)
62	{
63	gomp_ull n, q, i, s0, e0, s, e;
64
65	if (thr->ts.static_trip > 0)
66	return 1;
67
68	/* Compute the total number of iterations. */
69	if (__builtin_expect (ws->mode, 0) == 0)
70	n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
71	else
72	n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
73	i = thr->ts.team_id;
74
75	/* Compute the "zero-based" start and end points. That is, as
76	if the loop began at zero and incremented by one. */
77	q = n / nthreads;
78	q += (q * nthreads != n);
79	s0 = q * i;
80	e0 = s0 + q;
81	if (e0 > n)
82	e0 = n;
83
84	/* Notice when no iterations allocated for this thread. */
85	if (s0 >= e0)
86	{
87	thr->ts.static_trip = 1;
88	return 1;
89	}
90
91	/* Transform these to the actual start and end numbers. */
92	s = s0 * ws->incr_ull + ws->next_ull;
93	e = e0 * ws->incr_ull + ws->next_ull;
94
95	*pstart = s;
96	*pend = e;
97	thr->ts.static_trip = (e0 == n ? -1 : 1);
98	return 0;
99	}
100	else
101	{
102	gomp_ull n, s0, e0, i, c, s, e;
103
104	/* Otherwise, each thread gets exactly chunk_size iterations
105	(if available) each time through the loop. */
106
107	if (__builtin_expect (ws->mode, 0) == 0)
108	n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
109	else
110	n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
111	i = thr->ts.team_id;
112	c = ws->chunk_size_ull;
113
114	/* Initial guess is a C sized chunk positioned nthreads iterations
115	in, offset by our thread number. */
116	s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
117	e0 = s0 + c;
118
119	/* Detect overflow. */
120	if (s0 >= n)
121	return 1;
122	if (e0 > n)
123	e0 = n;
124
125	/* Transform these to the actual start and end numbers. */
126	s = s0 * ws->incr_ull + ws->next_ull;
127	e = e0 * ws->incr_ull + ws->next_ull;
128
129	*pstart = s;
130	*pend = e;
131
132	if (e0 == n)
133	thr->ts.static_trip = -1;
134	else
135	thr->ts.static_trip++;
136	return 0;
137	}
138	}
139
140
141	/* This function implements the DYNAMIC scheduling method. Arguments are
142	as for gomp_iter_ull_static_next. This function must be called with
143	ws->lock held. */
144
145	int
146	gomp_iter_ull_dynamic_next_locked (gomp_ull pstart, gomp_ull pend)
147	{
148	struct gomp_thread *thr = gomp_thread ();
149	struct gomp_work_share *ws = thr->ts.work_share;
150	gomp_ull start, end, chunk, left;
151
152	start = ws->next_ull;
153	if (start == ws->end_ull)
154	return false;
155
156	chunk = ws->chunk_size_ull;
157	left = ws->end_ull - start;
158	if (__builtin_expect (ws->mode & 2, 0))
159	{
160	if (chunk < left)
161	chunk = left;
162	}
163	else
164	{
165	if (chunk > left)
166	chunk = left;
167	}
168	end = start + chunk;
169
170	ws->next_ull = end;
171	*pstart = start;
172	*pend = end;
173	return true;
174	}
175
176
177	#if defined HAVE_SYNC_BUILTINS && defined __LP64__
178	/* Similar, but doesn't require the lock held, and uses compare-and-swap
179	instead. Note that the only memory value that changes is ws->next_ull. */
180
181	int
182	gomp_iter_ull_dynamic_next (gomp_ull pstart, gomp_ull pend)
183	{
184	struct gomp_thread *thr = gomp_thread ();
185	struct gomp_work_share *ws = thr->ts.work_share;
186	gomp_ull start, end, nend, chunk;
187
188	end = ws->end_ull;
189	chunk = ws->chunk_size_ull;
190
191	if (__builtin_expect (ws->mode & 1, 1))
192	{
193	gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
194	if (__builtin_expect (ws->mode & 2, 0) == 0)
195	{
196	if (tmp >= end)
197	return false;
198	nend = tmp + chunk;
199	if (nend > end)
200	nend = end;
201	*pstart = tmp;
202	*pend = nend;
203	return true;
204	}
205	else
206	{
207	if (tmp <= end)
208	return false;
209	nend = tmp + chunk;
210	if (nend < end)
211	nend = end;
212	*pstart = tmp;
213	*pend = nend;
214	return true;
215	}
216	}
217
218	start = ws->next_ull;
219	while (1)
220	{
221	gomp_ull left = end - start;
222	gomp_ull tmp;
223
224	if (start == end)
225	return false;
226
227	if (__builtin_expect (ws->mode & 2, 0))
228	{
229	if (chunk < left)
230	chunk = left;
231	}
232	else
233	{
234	if (chunk > left)
235	chunk = left;
236	}
237	nend = start + chunk;
238
239	tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
240	if (__builtin_expect (tmp == start, 1))
241	break;
242
243	start = tmp;
244	}
245
246	*pstart = start;
247	*pend = nend;
248	return true;
249	}
250	#endif /* HAVE_SYNC_BUILTINS */
251
252
253	/* This function implements the GUIDED scheduling method. Arguments are
254	as for gomp_iter_ull_static_next. This function must be called with the
255	work share lock held. */
256
257	int
258	gomp_iter_ull_guided_next_locked (gomp_ull pstart, gomp_ull pend)
259	{
260	struct gomp_thread *thr = gomp_thread ();
261	struct gomp_work_share *ws = thr->ts.work_share;
262	struct gomp_team *team = thr->ts.team;
263	gomp_ull nthreads = team ? team->nthreads : 1;
264	gomp_ull n, q;
265	gomp_ull start, end;
266
267	if (ws->next_ull == ws->end_ull)
268	return false;
269
270	start = ws->next_ull;
271	if (__builtin_expect (ws->mode, 0) == 0)
272	n = (ws->end_ull - start) / ws->incr_ull;
273	else
274	n = (start - ws->end_ull) / -ws->incr_ull;
275	q = (n + nthreads - 1) / nthreads;
276
277	if (q < ws->chunk_size_ull)
278	q = ws->chunk_size_ull;
279	if (q <= n)
280	end = start + q * ws->incr_ull;
281	else
282	end = ws->end_ull;
283
284	ws->next_ull = end;
285	*pstart = start;
286	*pend = end;
287	return true;
288	}
289
290	#if defined HAVE_SYNC_BUILTINS && defined __LP64__
291	/* Similar, but doesn't require the lock held, and uses compare-and-swap
292	instead. Note that the only memory value that changes is ws->next_ull. */
293
294	int
295	gomp_iter_ull_guided_next (gomp_ull pstart, gomp_ull pend)
296	{
297	struct gomp_thread *thr = gomp_thread ();
298	struct gomp_work_share *ws = thr->ts.work_share;
299	struct gomp_team *team = thr->ts.team;
300	gomp_ull nthreads = team ? team->nthreads : 1;
301	gomp_ull start, end, nend, incr;
302	gomp_ull chunk_size;
303
304	start = ws->next_ull;
305	end = ws->end_ull;
306	incr = ws->incr_ull;
307	chunk_size = ws->chunk_size_ull;
308
309	while (1)
310	{
311	gomp_ull n, q;
312	gomp_ull tmp;
313
314	if (start == end)
315	return false;
316
317	if (__builtin_expect (ws->mode, 0) == 0)
318	n = (end - start) / incr;
319	else
320	n = (start - end) / -incr;
321	q = (n + nthreads - 1) / nthreads;
322
323	if (q < chunk_size)
324	q = chunk_size;
325	if (__builtin_expect (q <= n, 1))
326	nend = start + q * incr;
327	else
328	nend = end;
329
330	tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
331	if (__builtin_expect (tmp == start, 1))
332	break;
333
334	start = tmp;
335	}
336
337	*pstart = start;
338	*pend = nend;
339	return true;
340	}
341	#endif /* HAVE_SYNC_BUILTINS */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: