Context Navigation

source: trunk/sys/libgomp/iter.c @ 8

Last change on this file since 8 was 1, checked in by alain, 8 years ago
First import
File size: 7.9 KB

Rev	Line
[1]	1	/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
	2	Contributed by Richard Henderson <rth@redhat.com>.
	3
	4	This file is part of the GNU OpenMP Library (libgomp).
	5
	6	Libgomp is free software; you can redistribute it and/or modify it
	7	under the terms of the GNU General Public License as published by
	8	the Free Software Foundation; either version 3, or (at your option)
	9	any later version.
	10
	11	Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
	12	WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	13	FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	14	more details.
	15
	16	Under Section 7 of GPL version 3, you are granted additional
	17	permissions described in the GCC Runtime Library Exception, version
	18	3.1, as published by the Free Software Foundation.
	19
	20	You should have received a copy of the GNU General Public License and
	21	a copy of the GCC Runtime Library Exception along with this program;
	22	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
	23	<http://www.gnu.org/licenses/>. */
	24
	25	/* This file contains routines for managing work-share iteration, both
	26	for loops and sections. */
	27
	28	#include <gomp/libgomp.h>
	29	#include <stdlib.h>
	30
	31	/* This function implements the STATIC scheduling method. The caller should
	32	iterate pstart <= x < pend. Return zero if there are more iterations
	33	to perform; nonzero if not. Return less than 0 if this thread had
	34	received the absolutely last iteration. */
	35
	36	int
	37	gomp_iter_static_next (long pstart, long pend)
	38	{
	39	struct gomp_thread *thr = gomp_thread ();
	40	struct gomp_team *team = thr->ts.team;
	41	struct gomp_work_share *ws = thr->ts.work_share;
	42	unsigned long nthreads = team ? team->nthreads : 1;
	43
	44	if ((int)thr->ts.static_trip == -1)
	45	return -1;
	46
	47	/* Quick test for degenerate teams and orphaned constructs. */
	48	if (nthreads == 1)
	49	{
	50	*pstart = ws->next;
	51	*pend = ws->end;
	52	thr->ts.static_trip = -1;
	53	return ws->next == ws->end;
	54	}
	55
	56	/* We interpret chunk_size zero as "unspecified", which means that we
	57	should break up the iterations such that each thread makes only one
	58	trip through the outer loop. */
	59	if (ws->chunk_size == 0)
	60	{
	61	unsigned long n, q, i;
	62	unsigned long s0, e0;
	63	long s, e;
	64
	65	if (thr->ts.static_trip > 0)
	66	return 1;
	67
	68	/* Compute the total number of iterations. */
	69	s = ws->incr + (ws->incr > 0 ? -1 : 1);
	70	n = (ws->end - ws->next + s) / ws->incr;
	71	i = thr->ts.team_id;
	72
	73	/* Compute the "zero-based" start and end points. That is, as
	74	if the loop began at zero and incremented by one. */
	75	q = n / nthreads;
	76	q += (q * nthreads != n);
	77	s0 = q * i;
	78	e0 = s0 + q;
	79	if (e0 > n)
	80	e0 = n;
	81
	82	/* Notice when no iterations allocated for this thread. */
	83	if (s0 >= e0)
	84	{
	85	thr->ts.static_trip = 1;
	86	return 1;
	87	}
	88
	89	/* Transform these to the actual start and end numbers. */
	90	s = (long)s0 * ws->incr + ws->next;
	91	e = (long)e0 * ws->incr + ws->next;
	92
	93	*pstart = s;
	94	*pend = e;
	95	thr->ts.static_trip = (e0 == n ? -1 : 1);
	96	return 0;
	97	}
	98	else
	99	{
	100	unsigned long n, s0, e0, i, c;
	101	long s, e;
	102
	103	/* Otherwise, each thread gets exactly chunk_size iterations
	104	(if available) each time through the loop. */
	105
	106	s = ws->incr + (ws->incr > 0 ? -1 : 1);
	107	n = (ws->end - ws->next + s) / ws->incr;
	108	i = thr->ts.team_id;
	109	c = ws->chunk_size;
	110
	111	/* Initial guess is a C sized chunk positioned nthreads iterations
	112	in, offset by our thread number. */
	113	s0 = (thr->ts.static_trip * nthreads + i) * c;
	114	e0 = s0 + c;
	115
	116	/* Detect overflow. */
	117	if (s0 >= n)
	118	return 1;
	119	if (e0 > n)
	120	e0 = n;
	121
	122	/* Transform these to the actual start and end numbers. */
	123	s = (long)s0 * ws->incr + ws->next;
	124	e = (long)e0 * ws->incr + ws->next;
	125
	126	*pstart = s;
	127	*pend = e;
	128
	129	if (e0 == n)
	130	thr->ts.static_trip = -1;
	131	else
	132	thr->ts.static_trip++;
	133	return 0;
	134	}
	135	}
	136
	137
	138	/* This function implements the DYNAMIC scheduling method. Arguments are
	139	as for gomp_iter_static_next. This function must be called with ws->lock
	140	held. */
	141
	142
	143	int gomp_iter_dynamic_next_locked (long pstart, long pend)
	144	{
	145	struct gomp_thread *thr = gomp_thread ();
	146	struct gomp_work_share *ws = thr->ts.work_share;
	147	long start, end, chunk, left;
	148
	149	start = ws->next;
	150	if (start == ws->end)
	151	return false;
	152
	153	chunk = ws->chunk_size;
	154	left = ws->end - start;
	155	if (ws->incr < 0)
	156	{
	157	if (chunk < left)
	158	chunk = left;
	159	}
	160	else
	161	{
	162	if (chunk > left)
	163	chunk = left;
	164	}
	165	end = start + chunk;
	166
	167	ws->next = end;
	168	*pstart = start;
	169	*pend = end;
	170	return true;
	171	}
	172
	173
	174	#ifdef HAVE_SYNC_BUILTINS
	175	/* Similar, but doesn't require the lock held, and uses compare-and-swap
	176	instead. Note that the only memory value that changes is ws->next. */
	177
	178	bool
	179	gomp_iter_dynamic_next (long pstart, long pend)
	180	{
	181	struct gomp_thread *thr = gomp_thread ();
	182	struct gomp_work_share *ws = thr->ts.work_share;
	183	long start, end, nend, chunk, incr;
	184
	185	end = ws->end;
	186	incr = ws->incr;
	187	chunk = ws->chunk_size;
	188
	189	if (__builtin_expect (ws->mode, 1))
	190	{
	191	long tmp = __sync_fetch_and_add (&ws->next, chunk);
	192	if (incr > 0)
	193	{
	194	if (tmp >= end)
	195	return false;
	196	nend = tmp + chunk;
	197	if (nend > end)
	198	nend = end;
	199	*pstart = tmp;
	200	*pend = nend;
	201	return true;
	202	}
	203	else
	204	{
	205	if (tmp <= end)
	206	return false;
	207	nend = tmp + chunk;
	208	if (nend < end)
	209	nend = end;
	210	*pstart = tmp;
	211	*pend = nend;
	212	return true;
	213	}
	214	}
	215
	216	start = ws->next;
	217	while (1)
	218	{
	219	long left = end - start;
	220	long tmp;
	221
	222	if (start == end)
	223	return false;
	224
	225	if (incr < 0)
	226	{
	227	if (chunk < left)
	228	chunk = left;
	229	}
	230	else
	231	{
	232	if (chunk > left)
	233	chunk = left;
	234	}
	235	nend = start + chunk;
	236
	237	tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
	238	if (__builtin_expect (tmp == start, 1))
	239	break;
	240
	241	start = tmp;
	242	}
	243
	244	*pstart = start;
	245	*pend = nend;
	246	return true;
	247	}
	248	#endif /* HAVE_SYNC_BUILTINS */
	249
	250
	251	/* This function implements the GUIDED scheduling method. Arguments are
	252	as for gomp_iter_static_next. This function must be called with the
	253	work share lock held. */
	254
	255	int
	256	gomp_iter_guided_next_locked (long pstart, long pend)
	257	{
	258	struct gomp_thread *thr = gomp_thread ();
	259	struct gomp_work_share *ws = thr->ts.work_share;
	260	struct gomp_team *team = thr->ts.team;
	261	unsigned long nthreads = team ? team->nthreads : 1;
	262	unsigned long n, q;
	263	long start, end;
	264
	265	if (ws->next == ws->end)
	266	return false;
	267
	268	start = ws->next;
	269	n = (ws->end - start) / ws->incr;
	270	q = (n + nthreads - 1) / nthreads;
	271
	272	if (q < (unsigned long)ws->chunk_size)
	273	q = ws->chunk_size;
	274	if (q <= n)
	275	end = start + q * ws->incr;
	276	else
	277	end = ws->end;
	278
	279	ws->next = end;
	280	*pstart = start;
	281	*pend = end;
	282	return true;
	283	}
	284
	285	#ifdef HAVE_SYNC_BUILTINS
	286	/* Similar, but doesn't require the lock held, and uses compare-and-swap
	287	instead. Note that the only memory value that changes is ws->next. */
	288
	289	bool
	290	gomp_iter_guided_next (long pstart, long pend)
	291	{
	292	struct gomp_thread *thr = gomp_thread ();
	293	struct gomp_work_share *ws = thr->ts.work_share;
	294	struct gomp_team *team = thr->ts.team;
	295	unsigned long nthreads = team ? team->nthreads : 1;
	296	long start, end, nend, incr;
	297	unsigned long chunk_size;
	298
	299	start = ws->next;
	300	end = ws->end;
	301	incr = ws->incr;
	302	chunk_size = ws->chunk_size;
	303
	304	while (1)
	305	{
	306	unsigned long n, q;
	307	long tmp;
	308
	309	if (start == end)
	310	return false;
	311
	312	n = (end - start) / incr;
	313	q = (n + nthreads - 1) / nthreads;
	314
	315	if (q < chunk_size)
	316	q = chunk_size;
	317	if (__builtin_expect (q <= n, 1))
	318	nend = start + q * incr;
	319	else
	320	nend = end;
	321
	322	tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
	323	if (__builtin_expect (tmp == start, 1))
	324	break;
	325
	326	start = tmp;
	327	}
	328
	329	*pstart = start;
	330	*pend = nend;
	331	return true;
	332	}
	333	#endif /* HAVE_SYNC_BUILTINS */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: