Context Navigation

source: trunk/sys/libgomp/iter.c @ 196

Last change on this file since 196 was 1, checked in by alain, 8 years ago
First import
File size: 7.9 KB

Line
1	/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2	Contributed by Richard Henderson <rth@redhat.com>.
3
4	This file is part of the GNU OpenMP Library (libgomp).
5
6	Libgomp is free software; you can redistribute it and/or modify it
7	under the terms of the GNU General Public License as published by
8	the Free Software Foundation; either version 3, or (at your option)
9	any later version.
10
11	Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12	WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13	FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14	more details.
15
16	Under Section 7 of GPL version 3, you are granted additional
17	permissions described in the GCC Runtime Library Exception, version
18	3.1, as published by the Free Software Foundation.
19
20	You should have received a copy of the GNU General Public License and
21	a copy of the GCC Runtime Library Exception along with this program;
22	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23	<http://www.gnu.org/licenses/>. */
24
25	/* This file contains routines for managing work-share iteration, both
26	for loops and sections. */
27
28	#include <gomp/libgomp.h>
29	#include <stdlib.h>
30
31	/* This function implements the STATIC scheduling method. The caller should
32	iterate pstart <= x < pend. Return zero if there are more iterations
33	to perform; nonzero if not. Return less than 0 if this thread had
34	received the absolutely last iteration. */
35
36	int
37	gomp_iter_static_next (long pstart, long pend)
38	{
39	struct gomp_thread *thr = gomp_thread ();
40	struct gomp_team *team = thr->ts.team;
41	struct gomp_work_share *ws = thr->ts.work_share;
42	unsigned long nthreads = team ? team->nthreads : 1;
43
44	if ((int)thr->ts.static_trip == -1)
45	return -1;
46
47	/* Quick test for degenerate teams and orphaned constructs. */
48	if (nthreads == 1)
49	{
50	*pstart = ws->next;
51	*pend = ws->end;
52	thr->ts.static_trip = -1;
53	return ws->next == ws->end;
54	}
55
56	/* We interpret chunk_size zero as "unspecified", which means that we
57	should break up the iterations such that each thread makes only one
58	trip through the outer loop. */
59	if (ws->chunk_size == 0)
60	{
61	unsigned long n, q, i;
62	unsigned long s0, e0;
63	long s, e;
64
65	if (thr->ts.static_trip > 0)
66	return 1;
67
68	/* Compute the total number of iterations. */
69	s = ws->incr + (ws->incr > 0 ? -1 : 1);
70	n = (ws->end - ws->next + s) / ws->incr;
71	i = thr->ts.team_id;
72
73	/* Compute the "zero-based" start and end points. That is, as
74	if the loop began at zero and incremented by one. */
75	q = n / nthreads;
76	q += (q * nthreads != n);
77	s0 = q * i;
78	e0 = s0 + q;
79	if (e0 > n)
80	e0 = n;
81
82	/* Notice when no iterations allocated for this thread. */
83	if (s0 >= e0)
84	{
85	thr->ts.static_trip = 1;
86	return 1;
87	}
88
89	/* Transform these to the actual start and end numbers. */
90	s = (long)s0 * ws->incr + ws->next;
91	e = (long)e0 * ws->incr + ws->next;
92
93	*pstart = s;
94	*pend = e;
95	thr->ts.static_trip = (e0 == n ? -1 : 1);
96	return 0;
97	}
98	else
99	{
100	unsigned long n, s0, e0, i, c;
101	long s, e;
102
103	/* Otherwise, each thread gets exactly chunk_size iterations
104	(if available) each time through the loop. */
105
106	s = ws->incr + (ws->incr > 0 ? -1 : 1);
107	n = (ws->end - ws->next + s) / ws->incr;
108	i = thr->ts.team_id;
109	c = ws->chunk_size;
110
111	/* Initial guess is a C sized chunk positioned nthreads iterations
112	in, offset by our thread number. */
113	s0 = (thr->ts.static_trip * nthreads + i) * c;
114	e0 = s0 + c;
115
116	/* Detect overflow. */
117	if (s0 >= n)
118	return 1;
119	if (e0 > n)
120	e0 = n;
121
122	/* Transform these to the actual start and end numbers. */
123	s = (long)s0 * ws->incr + ws->next;
124	e = (long)e0 * ws->incr + ws->next;
125
126	*pstart = s;
127	*pend = e;
128
129	if (e0 == n)
130	thr->ts.static_trip = -1;
131	else
132	thr->ts.static_trip++;
133	return 0;
134	}
135	}
136
137
138	/* This function implements the DYNAMIC scheduling method. Arguments are
139	as for gomp_iter_static_next. This function must be called with ws->lock
140	held. */
141
142
143	int gomp_iter_dynamic_next_locked (long pstart, long pend)
144	{
145	struct gomp_thread *thr = gomp_thread ();
146	struct gomp_work_share *ws = thr->ts.work_share;
147	long start, end, chunk, left;
148
149	start = ws->next;
150	if (start == ws->end)
151	return false;
152
153	chunk = ws->chunk_size;
154	left = ws->end - start;
155	if (ws->incr < 0)
156	{
157	if (chunk < left)
158	chunk = left;
159	}
160	else
161	{
162	if (chunk > left)
163	chunk = left;
164	}
165	end = start + chunk;
166
167	ws->next = end;
168	*pstart = start;
169	*pend = end;
170	return true;
171	}
172
173
174	#ifdef HAVE_SYNC_BUILTINS
175	/* Similar, but doesn't require the lock held, and uses compare-and-swap
176	instead. Note that the only memory value that changes is ws->next. */
177
178	bool
179	gomp_iter_dynamic_next (long pstart, long pend)
180	{
181	struct gomp_thread *thr = gomp_thread ();
182	struct gomp_work_share *ws = thr->ts.work_share;
183	long start, end, nend, chunk, incr;
184
185	end = ws->end;
186	incr = ws->incr;
187	chunk = ws->chunk_size;
188
189	if (__builtin_expect (ws->mode, 1))
190	{
191	long tmp = __sync_fetch_and_add (&ws->next, chunk);
192	if (incr > 0)
193	{
194	if (tmp >= end)
195	return false;
196	nend = tmp + chunk;
197	if (nend > end)
198	nend = end;
199	*pstart = tmp;
200	*pend = nend;
201	return true;
202	}
203	else
204	{
205	if (tmp <= end)
206	return false;
207	nend = tmp + chunk;
208	if (nend < end)
209	nend = end;
210	*pstart = tmp;
211	*pend = nend;
212	return true;
213	}
214	}
215
216	start = ws->next;
217	while (1)
218	{
219	long left = end - start;
220	long tmp;
221
222	if (start == end)
223	return false;
224
225	if (incr < 0)
226	{
227	if (chunk < left)
228	chunk = left;
229	}
230	else
231	{
232	if (chunk > left)
233	chunk = left;
234	}
235	nend = start + chunk;
236
237	tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
238	if (__builtin_expect (tmp == start, 1))
239	break;
240
241	start = tmp;
242	}
243
244	*pstart = start;
245	*pend = nend;
246	return true;
247	}
248	#endif /* HAVE_SYNC_BUILTINS */
249
250
251	/* This function implements the GUIDED scheduling method. Arguments are
252	as for gomp_iter_static_next. This function must be called with the
253	work share lock held. */
254
255	int
256	gomp_iter_guided_next_locked (long pstart, long pend)
257	{
258	struct gomp_thread *thr = gomp_thread ();
259	struct gomp_work_share *ws = thr->ts.work_share;
260	struct gomp_team *team = thr->ts.team;
261	unsigned long nthreads = team ? team->nthreads : 1;
262	unsigned long n, q;
263	long start, end;
264
265	if (ws->next == ws->end)
266	return false;
267
268	start = ws->next;
269	n = (ws->end - start) / ws->incr;
270	q = (n + nthreads - 1) / nthreads;
271
272	if (q < (unsigned long)ws->chunk_size)
273	q = ws->chunk_size;
274	if (q <= n)
275	end = start + q * ws->incr;
276	else
277	end = ws->end;
278
279	ws->next = end;
280	*pstart = start;
281	*pend = end;
282	return true;
283	}
284
285	#ifdef HAVE_SYNC_BUILTINS
286	/* Similar, but doesn't require the lock held, and uses compare-and-swap
287	instead. Note that the only memory value that changes is ws->next. */
288
289	bool
290	gomp_iter_guided_next (long pstart, long pend)
291	{
292	struct gomp_thread *thr = gomp_thread ();
293	struct gomp_work_share *ws = thr->ts.work_share;
294	struct gomp_team *team = thr->ts.team;
295	unsigned long nthreads = team ? team->nthreads : 1;
296	long start, end, nend, incr;
297	unsigned long chunk_size;
298
299	start = ws->next;
300	end = ws->end;
301	incr = ws->incr;
302	chunk_size = ws->chunk_size;
303
304	while (1)
305	{
306	unsigned long n, q;
307	long tmp;
308
309	if (start == end)
310	return false;
311
312	n = (end - start) / incr;
313	q = (n + nthreads - 1) / nthreads;
314
315	if (q < chunk_size)
316	q = chunk_size;
317	if (__builtin_expect (q <= n, 1))
318	nend = start + q * incr;
319	else
320	nend = end;
321
322	tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
323	if (__builtin_expect (tmp == start, 1))
324	break;
325
326	start = tmp;
327	}
328
329	*pstart = start;
330	*pend = nend;
331	return true;
332	}
333	#endif /* HAVE_SYNC_BUILTINS */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: