1 | /* Définitions des fonctions standard (simplifiées) utilisées par ocean pour GIET */ |
---|
2 | |
---|
3 | #include <stdarg.h> |
---|
4 | #include <stdio.h> |
---|
5 | #include <malloc.h> |
---|
6 | #include <stdlib.h> |
---|
7 | |
---|
8 | EXTERN_ENV |
---|
9 | |
---|
10 | #include "decs.h" |
---|
11 | #include "giet_utils.h" |
---|
12 | |
---|
13 | FILE * stdout = ""; |
---|
14 | FILE *stderr = "STDERR : "; |
---|
15 | |
---|
16 | extern double ****main_q_multi; |
---|
17 | extern double ****main_rhs_multi; |
---|
18 | extern double ****main_psi; |
---|
19 | extern double ****main_psim; |
---|
20 | extern double ***main_psium; |
---|
21 | extern double ***main_psilm; |
---|
22 | extern double ***main_psib; |
---|
23 | extern double ***main_ga; |
---|
24 | extern double ***main_gb; |
---|
25 | extern double ***main_oldga; |
---|
26 | extern double ***main_oldgb; |
---|
27 | extern double ****main_work1; |
---|
28 | extern double ***main_work2; |
---|
29 | extern double ***main_work3; |
---|
30 | extern double ****main_work4; |
---|
31 | extern double ****main_work5; |
---|
32 | extern double ***main_work6; |
---|
33 | extern double ****main_work7; |
---|
34 | extern long *main_imx; |
---|
35 | extern long *main_jmx; |
---|
36 | |
---|
37 | extern double *main_lev_res; |
---|
38 | extern double *main_lev_tol; |
---|
39 | extern double *main_i_int_coeff; |
---|
40 | extern double *main_j_int_coeff; |
---|
41 | extern long *main_xpts_per_proc; |
---|
42 | extern long *main_ypts_per_proc; |
---|
43 | extern long main_xprocs; |
---|
44 | extern long main_yprocs; |
---|
45 | extern long main_numlev; |
---|
46 | extern double main_eig2; |
---|
47 | extern long main_im; |
---|
48 | extern long main_jm; |
---|
49 | |
---|
50 | double ****work1 __attribute__ ((section("seg_ldata"))); |
---|
51 | double ***work2 __attribute__ ((section("seg_ldata"))); |
---|
52 | double ***work3 __attribute__ ((section("seg_ldata"))); |
---|
53 | double ****work4 __attribute__ ((section("seg_ldata"))); |
---|
54 | double ****work5 __attribute__ ((section("seg_ldata"))); |
---|
55 | double ***work6 __attribute__ ((section("seg_ldata"))); |
---|
56 | double ****work7 __attribute__ ((section("seg_ldata"))); |
---|
57 | double ****psi __attribute__ ((section("seg_ldata"))); |
---|
58 | double ****psim __attribute__ ((section("seg_ldata"))); |
---|
59 | double ***psium __attribute__ ((section("seg_ldata"))); |
---|
60 | double ***psilm __attribute__ ((section("seg_ldata"))); |
---|
61 | double ***psib __attribute__ ((section("seg_ldata"))); |
---|
62 | double ***ga __attribute__ ((section("seg_ldata"))); |
---|
63 | double ***gb __attribute__ ((section("seg_ldata"))); |
---|
64 | double ***oldga __attribute__ ((section("seg_ldata"))); |
---|
65 | double ***oldgb __attribute__ ((section("seg_ldata"))); |
---|
66 | double ****q_multi __attribute__ ((section("seg_ldata"))); |
---|
67 | double ****rhs_multi __attribute__ ((section("seg_ldata"))); |
---|
68 | long *imx __attribute__ ((section("seg_ldata"))); |
---|
69 | long *jmx __attribute__ ((section("seg_ldata"))); |
---|
70 | double *f __attribute__ ((section("seg_ldata"))); |
---|
71 | struct Global_Private *gp; |
---|
72 | |
---|
73 | double *lev_res __attribute__ ((section("seg_ldata"))); |
---|
74 | double *lev_tol __attribute__ ((section("seg_ldata"))); |
---|
75 | double *i_int_coeff __attribute__ ((section("seg_ldata"))); |
---|
76 | double *j_int_coeff __attribute__ ((section("seg_ldata"))); |
---|
77 | long *xpts_per_proc __attribute__ ((section("seg_ldata"))); |
---|
78 | long *ypts_per_proc __attribute__ ((section("seg_ldata"))); |
---|
79 | long xprocs __attribute__ ((section("seg_ldata"))); |
---|
80 | long yprocs __attribute__ ((section("seg_ldata"))); |
---|
81 | long numlev __attribute__ ((section("seg_ldata"))); |
---|
82 | double eig2 __attribute__ ((section("seg_ldata"))); |
---|
83 | long im __attribute__ ((section("seg_ldata"))); |
---|
84 | long jm __attribute__ ((section("seg_ldata"))); |
---|
85 | |
---|
86 | unsigned int nclusters_x __attribute__ ((section("seg_ldata"))); |
---|
87 | unsigned int nclusters_y __attribute__ ((section("seg_ldata"))); |
---|
88 | unsigned int procs_per_cluster __attribute__ ((section("seg_ldata"))); |
---|
89 | |
---|
90 | volatile long heap_inited = 0; |
---|
91 | volatile int run_threads = 0; |
---|
92 | |
---|
93 | //Entry point for all threads (except main) |
---|
94 | // waiting allocs and inits of main then copy read-only tabs in ldata segment (replicated) |
---|
95 | // some read-write tabs are also replicated, but not entirely : only pointers |
---|
96 | __attribute__ ((constructor)) void thread() |
---|
97 | { |
---|
98 | unsigned long size; |
---|
99 | long id = (long) giet_thread_id(); |
---|
100 | |
---|
101 | unsigned int cx, cy, lp; |
---|
102 | |
---|
103 | giet_proc_xyp(&cx, &cy, &lp); |
---|
104 | giet_shr_printf("Thread %d (%d:%d.%d) waiting\n", id, cx, cy, lp); |
---|
105 | |
---|
106 | if (lp == 0) { |
---|
107 | |
---|
108 | giet_procs_number(&nclusters_x, &nclusters_y, &procs_per_cluster); |
---|
109 | heap_init(cx, cy); |
---|
110 | |
---|
111 | while (heap_inited != id) { |
---|
112 | asm volatile ("nop\r\n"); |
---|
113 | } |
---|
114 | heap_inited += procs_per_cluster; |
---|
115 | |
---|
116 | |
---|
117 | size = nprocs * sizeof(double ***); |
---|
118 | rhs_multi = (double ****) G_MALLOC(size, id); |
---|
119 | q_multi = (double ****) G_MALLOC(size, id); |
---|
120 | psi = (double ****) G_MALLOC(size, id); |
---|
121 | psim = (double ****) G_MALLOC(size, id); |
---|
122 | work1 = (double ****) G_MALLOC(size, id); |
---|
123 | work4 = (double ****) G_MALLOC(size, id); |
---|
124 | work5 = (double ****) G_MALLOC(size, id); |
---|
125 | work7 = (double ****) G_MALLOC(size, id); |
---|
126 | |
---|
127 | size = nprocs * sizeof(double **); |
---|
128 | psium = (double ***) G_MALLOC(size, id); |
---|
129 | psilm = (double ***) G_MALLOC(size, id); |
---|
130 | psib = (double ***) G_MALLOC(size, id); |
---|
131 | ga = (double ***) G_MALLOC(size, id); |
---|
132 | gb = (double ***) G_MALLOC(size, id); |
---|
133 | oldga = (double ***) G_MALLOC(size, id); |
---|
134 | oldgb = (double ***) G_MALLOC(size, id); |
---|
135 | work2 = (double ***) G_MALLOC(size, id); |
---|
136 | work3 = (double ***) G_MALLOC(size, id); |
---|
137 | work6 = (double ***) G_MALLOC(size, id); |
---|
138 | } |
---|
139 | |
---|
140 | while (run_threads != 1) { |
---|
141 | asm volatile ("nop\r\n"); |
---|
142 | } |
---|
143 | |
---|
144 | *gp[id].lpid = lp; |
---|
145 | |
---|
146 | if (lp == 0) { |
---|
147 | int i, j, k; |
---|
148 | |
---|
149 | xprocs = main_xprocs; |
---|
150 | yprocs = main_yprocs; |
---|
151 | numlev = main_numlev; |
---|
152 | eig2 = main_eig2; |
---|
153 | im = main_im; |
---|
154 | jm = main_jm; |
---|
155 | |
---|
156 | size = numlev * sizeof(long); |
---|
157 | imx = (long *) G_MALLOC(size, id); |
---|
158 | jmx = (long *) G_MALLOC(size, id); |
---|
159 | xpts_per_proc = (long *) G_MALLOC(size, id); |
---|
160 | ypts_per_proc = (long *) G_MALLOC(size, id); |
---|
161 | |
---|
162 | size = numlev * sizeof(double); |
---|
163 | lev_res = (double *) G_MALLOC(size, id); |
---|
164 | lev_tol = (double *) G_MALLOC(size, id); |
---|
165 | i_int_coeff = (double *) G_MALLOC(size, id); |
---|
166 | j_int_coeff = (double *) G_MALLOC(size, id); |
---|
167 | |
---|
168 | for(i=0;i<numlev;i++) { |
---|
169 | imx[i] = main_imx[i]; |
---|
170 | jmx[i] = main_jmx[i]; |
---|
171 | lev_res[i] = main_lev_res[i]; |
---|
172 | lev_tol[i] = main_lev_tol[i]; |
---|
173 | i_int_coeff[i] = main_i_int_coeff[i]; |
---|
174 | j_int_coeff[i] = main_j_int_coeff[i]; |
---|
175 | xpts_per_proc[i] = main_xpts_per_proc[i]; |
---|
176 | ypts_per_proc[i] = main_ypts_per_proc[i]; |
---|
177 | } |
---|
178 | |
---|
179 | size = numlev * sizeof(double **); |
---|
180 | for (i = 0; i < nprocs; i++) { |
---|
181 | |
---|
182 | q_multi[i] = (double ***) G_MALLOC(size, id); |
---|
183 | rhs_multi[i] = (double ***) G_MALLOC(size, id); |
---|
184 | |
---|
185 | for (j = 0; j < numlev; j++) { |
---|
186 | |
---|
187 | rhs_multi[i][j] = (double **) G_MALLOC(((imx[j] - 2) / yprocs + 2) * sizeof(double *), id); |
---|
188 | q_multi[i][j] = (double **) G_MALLOC(((imx[j] - 2) / yprocs + 2) * sizeof(double *), id); |
---|
189 | for (k = 0; k < ((imx[j] - 2) / yprocs + 2); k++) { |
---|
190 | q_multi[i][j][k] = main_q_multi[i][j][k]; |
---|
191 | rhs_multi[i][j][k] = main_rhs_multi[i][j][k]; |
---|
192 | } |
---|
193 | |
---|
194 | } |
---|
195 | |
---|
196 | work1[i] = main_work1[i]; |
---|
197 | work2[i] = main_work2[i]; |
---|
198 | work3[i] = main_work3[i]; |
---|
199 | work4[i] = main_work4[i]; |
---|
200 | work5[i] = main_work5[i]; |
---|
201 | work6[i] = main_work6[i]; |
---|
202 | work7[i] = main_work7[i]; |
---|
203 | psi[i] = main_psi[i]; |
---|
204 | psim[i] = main_psim[i]; |
---|
205 | psium[i] = main_psium[i]; |
---|
206 | psilm[i] = main_psilm[i]; |
---|
207 | psib[i] = main_psib[i]; |
---|
208 | ga[i] = main_ga[i]; |
---|
209 | gb[i] = main_gb[i]; |
---|
210 | oldga[i] = main_oldga[i]; |
---|
211 | oldgb[i] = main_oldgb[i]; |
---|
212 | } |
---|
213 | } |
---|
214 | giet_shr_printf("Thread %d launched\n", id); |
---|
215 | |
---|
216 | slave(&id); |
---|
217 | |
---|
218 | BARRIER(bars->barrier, nprocs) |
---|
219 | |
---|
220 | giet_exit("done."); |
---|
221 | } |
---|
222 | |
---|
223 | |
---|
224 | const char *optarg; |
---|
225 | |
---|
226 | int getopt(int argc, char *const *argv, const char *optstring) |
---|
227 | { |
---|
228 | return -1; |
---|
229 | } |
---|
230 | |
---|
231 | //give the cluster coordinate by thread number |
---|
232 | // if tid=-1, return the next cluster (round robin) |
---|
233 | void clusterXY(int tid, unsigned int *cx, unsigned int *cy) |
---|
234 | { |
---|
235 | unsigned int cid; |
---|
236 | static unsigned int x = 0, y = 0; |
---|
237 | |
---|
238 | cid = tid / procs_per_cluster; |
---|
239 | |
---|
240 | if (tid != -1) { |
---|
241 | *cx = (cid / nclusters_y); |
---|
242 | *cy = (cid % nclusters_y); |
---|
243 | return; |
---|
244 | } |
---|
245 | |
---|
246 | if (giet_thread_id() != 0) { |
---|
247 | giet_exit("pseudo-random mapped malloc : thread 0 only"); |
---|
248 | } |
---|
249 | |
---|
250 | x++; |
---|
251 | if (x == nclusters_x) { |
---|
252 | x = 0; |
---|
253 | y++; |
---|
254 | if (y == nclusters_y) { |
---|
255 | y = 0; |
---|
256 | } |
---|
257 | } |
---|
258 | *cx = x; |
---|
259 | *cy = y; |
---|
260 | } |
---|
261 | |
---|
262 | void *ocean_malloc(unsigned long s, int tid) |
---|
263 | { |
---|
264 | void *ptr; |
---|
265 | unsigned int x, y; |
---|
266 | clusterXY(tid, &x, &y); |
---|
267 | ptr = remote_malloc(s, x, y); |
---|
268 | giet_assert (ptr != 0, "Malloc failed"); |
---|
269 | return ptr; |
---|
270 | } |
---|
271 | |
---|
272 | void exit(int status) |
---|
273 | { |
---|
274 | if (status) { |
---|
275 | giet_exit("Done (status != 0)"); |
---|
276 | } else { |
---|
277 | giet_exit("Done (ok)"); |
---|
278 | } |
---|
279 | } |
---|