[581] | 1 | /* Définitions des fonctions standard (simplifiées) utilisées par ocean pour GIET */ |
---|
| 2 | |
---|
| 3 | #include <stdarg.h> |
---|
| 4 | #include <stdio.h> |
---|
| 5 | #include <malloc.h> |
---|
| 6 | #include <stdlib.h> |
---|
| 7 | |
---|
| 8 | EXTERN_ENV |
---|
| 9 | |
---|
| 10 | #include "decs.h" |
---|
| 11 | #include "giet_utils.h" |
---|
| 12 | |
---|
| 13 | FILE * stdout = ""; |
---|
| 14 | FILE *stderr = "STDERR : "; |
---|
| 15 | |
---|
| 16 | extern double ****main_q_multi; |
---|
| 17 | extern double ****main_rhs_multi; |
---|
| 18 | extern double ****main_psi; |
---|
| 19 | extern double ****main_psim; |
---|
| 20 | extern double ***main_psium; |
---|
| 21 | extern double ***main_psilm; |
---|
| 22 | extern double ***main_psib; |
---|
| 23 | extern double ***main_ga; |
---|
| 24 | extern double ***main_gb; |
---|
| 25 | extern double ***main_oldga; |
---|
| 26 | extern double ***main_oldgb; |
---|
| 27 | extern double ****main_work1; |
---|
| 28 | extern double ***main_work2; |
---|
| 29 | extern double ***main_work3; |
---|
| 30 | extern double ****main_work4; |
---|
| 31 | extern double ****main_work5; |
---|
| 32 | extern double ***main_work6; |
---|
| 33 | extern double ****main_work7; |
---|
| 34 | extern long *main_imx; |
---|
| 35 | extern long *main_jmx; |
---|
| 36 | |
---|
| 37 | extern double *main_lev_res; |
---|
| 38 | extern double *main_lev_tol; |
---|
| 39 | extern double *main_i_int_coeff; |
---|
| 40 | extern double *main_j_int_coeff; |
---|
| 41 | extern long *main_xpts_per_proc; |
---|
| 42 | extern long *main_ypts_per_proc; |
---|
| 43 | extern long main_xprocs; |
---|
| 44 | extern long main_yprocs; |
---|
| 45 | extern long main_numlev; |
---|
| 46 | extern double main_eig2; |
---|
| 47 | extern long main_im; |
---|
| 48 | extern long main_jm; |
---|
| 49 | |
---|
| 50 | double ****work1 __attribute__ ((section("seg_ldata"))); |
---|
| 51 | double ***work2 __attribute__ ((section("seg_ldata"))); |
---|
| 52 | double ***work3 __attribute__ ((section("seg_ldata"))); |
---|
| 53 | double ****work4 __attribute__ ((section("seg_ldata"))); |
---|
| 54 | double ****work5 __attribute__ ((section("seg_ldata"))); |
---|
| 55 | double ***work6 __attribute__ ((section("seg_ldata"))); |
---|
| 56 | double ****work7 __attribute__ ((section("seg_ldata"))); |
---|
| 57 | double ****psi __attribute__ ((section("seg_ldata"))); |
---|
| 58 | double ****psim __attribute__ ((section("seg_ldata"))); |
---|
| 59 | double ***psium __attribute__ ((section("seg_ldata"))); |
---|
| 60 | double ***psilm __attribute__ ((section("seg_ldata"))); |
---|
| 61 | double ***psib __attribute__ ((section("seg_ldata"))); |
---|
| 62 | double ***ga __attribute__ ((section("seg_ldata"))); |
---|
| 63 | double ***gb __attribute__ ((section("seg_ldata"))); |
---|
| 64 | double ***oldga __attribute__ ((section("seg_ldata"))); |
---|
| 65 | double ***oldgb __attribute__ ((section("seg_ldata"))); |
---|
| 66 | double ****q_multi __attribute__ ((section("seg_ldata"))); |
---|
| 67 | double ****rhs_multi __attribute__ ((section("seg_ldata"))); |
---|
| 68 | long *imx __attribute__ ((section("seg_ldata"))); |
---|
| 69 | long *jmx __attribute__ ((section("seg_ldata"))); |
---|
| 70 | double *f __attribute__ ((section("seg_ldata"))); |
---|
| 71 | struct Global_Private *gp; |
---|
| 72 | |
---|
| 73 | double *lev_res __attribute__ ((section("seg_ldata"))); |
---|
| 74 | double *lev_tol __attribute__ ((section("seg_ldata"))); |
---|
| 75 | double *i_int_coeff __attribute__ ((section("seg_ldata"))); |
---|
| 76 | double *j_int_coeff __attribute__ ((section("seg_ldata"))); |
---|
| 77 | long *xpts_per_proc __attribute__ ((section("seg_ldata"))); |
---|
| 78 | long *ypts_per_proc __attribute__ ((section("seg_ldata"))); |
---|
| 79 | long xprocs __attribute__ ((section("seg_ldata"))); |
---|
| 80 | long yprocs __attribute__ ((section("seg_ldata"))); |
---|
| 81 | long numlev __attribute__ ((section("seg_ldata"))); |
---|
| 82 | double eig2 __attribute__ ((section("seg_ldata"))); |
---|
| 83 | long im __attribute__ ((section("seg_ldata"))); |
---|
| 84 | long jm __attribute__ ((section("seg_ldata"))); |
---|
| 85 | |
---|
| 86 | unsigned int nclusters_x __attribute__ ((section("seg_ldata"))); |
---|
| 87 | unsigned int nclusters_y __attribute__ ((section("seg_ldata"))); |
---|
| 88 | unsigned int procs_per_cluster __attribute__ ((section("seg_ldata"))); |
---|
| 89 | |
---|
| 90 | volatile long heap_inited = 0; |
---|
| 91 | volatile int run_threads = 0; |
---|
| 92 | |
---|
| 93 | //Entry point for all threads (except main) |
---|
| 94 | // waiting allocs and inits of main then copy read-only tabs in ldata segment (replicated) |
---|
| 95 | // some read-write tabs are also replicated, but not entirely : only pointers |
---|
| 96 | __attribute__ ((constructor)) void thread() |
---|
| 97 | { |
---|
| 98 | unsigned long size; |
---|
| 99 | long id = (long) giet_thread_id(); |
---|
| 100 | |
---|
| 101 | unsigned int cx, cy, lp; |
---|
| 102 | |
---|
| 103 | giet_proc_xyp(&cx, &cy, &lp); |
---|
| 104 | giet_shr_printf("Thread %d (%d:%d.%d) waiting\n", id, cx, cy, lp); |
---|
| 105 | |
---|
| 106 | if (lp == 0) { |
---|
| 107 | |
---|
| 108 | giet_procs_number(&nclusters_x, &nclusters_y, &procs_per_cluster); |
---|
| 109 | heap_init(cx, cy); |
---|
| 110 | |
---|
| 111 | while (heap_inited != id) { |
---|
| 112 | asm volatile ("nop\r\n"); |
---|
| 113 | } |
---|
| 114 | heap_inited += procs_per_cluster; |
---|
| 115 | |
---|
| 116 | |
---|
| 117 | size = nprocs * sizeof(double ***); |
---|
| 118 | rhs_multi = (double ****) G_MALLOC(size, id); |
---|
| 119 | q_multi = (double ****) G_MALLOC(size, id); |
---|
| 120 | psi = (double ****) G_MALLOC(size, id); |
---|
| 121 | psim = (double ****) G_MALLOC(size, id); |
---|
| 122 | work1 = (double ****) G_MALLOC(size, id); |
---|
| 123 | work4 = (double ****) G_MALLOC(size, id); |
---|
| 124 | work5 = (double ****) G_MALLOC(size, id); |
---|
| 125 | work7 = (double ****) G_MALLOC(size, id); |
---|
| 126 | |
---|
| 127 | size = nprocs * sizeof(double **); |
---|
| 128 | psium = (double ***) G_MALLOC(size, id); |
---|
| 129 | psilm = (double ***) G_MALLOC(size, id); |
---|
| 130 | psib = (double ***) G_MALLOC(size, id); |
---|
| 131 | ga = (double ***) G_MALLOC(size, id); |
---|
| 132 | gb = (double ***) G_MALLOC(size, id); |
---|
| 133 | oldga = (double ***) G_MALLOC(size, id); |
---|
| 134 | oldgb = (double ***) G_MALLOC(size, id); |
---|
| 135 | work2 = (double ***) G_MALLOC(size, id); |
---|
| 136 | work3 = (double ***) G_MALLOC(size, id); |
---|
| 137 | work6 = (double ***) G_MALLOC(size, id); |
---|
| 138 | } |
---|
| 139 | |
---|
| 140 | while (run_threads != 1) { |
---|
| 141 | asm volatile ("nop\r\n"); |
---|
| 142 | } |
---|
| 143 | |
---|
| 144 | *gp[id].lpid = lp; |
---|
| 145 | |
---|
| 146 | if (lp == 0) { |
---|
| 147 | int i, j, k; |
---|
| 148 | |
---|
| 149 | xprocs = main_xprocs; |
---|
| 150 | yprocs = main_yprocs; |
---|
| 151 | numlev = main_numlev; |
---|
| 152 | eig2 = main_eig2; |
---|
| 153 | im = main_im; |
---|
| 154 | jm = main_jm; |
---|
| 155 | |
---|
| 156 | size = numlev * sizeof(long); |
---|
| 157 | imx = (long *) G_MALLOC(size, id); |
---|
| 158 | jmx = (long *) G_MALLOC(size, id); |
---|
| 159 | xpts_per_proc = (long *) G_MALLOC(size, id); |
---|
| 160 | ypts_per_proc = (long *) G_MALLOC(size, id); |
---|
| 161 | |
---|
| 162 | size = numlev * sizeof(double); |
---|
| 163 | lev_res = (double *) G_MALLOC(size, id); |
---|
| 164 | lev_tol = (double *) G_MALLOC(size, id); |
---|
| 165 | i_int_coeff = (double *) G_MALLOC(size, id); |
---|
| 166 | j_int_coeff = (double *) G_MALLOC(size, id); |
---|
| 167 | |
---|
| 168 | for(i=0;i<numlev;i++) { |
---|
| 169 | imx[i] = main_imx[i]; |
---|
| 170 | jmx[i] = main_jmx[i]; |
---|
| 171 | lev_res[i] = main_lev_res[i]; |
---|
| 172 | lev_tol[i] = main_lev_tol[i]; |
---|
| 173 | i_int_coeff[i] = main_i_int_coeff[i]; |
---|
| 174 | j_int_coeff[i] = main_j_int_coeff[i]; |
---|
| 175 | xpts_per_proc[i] = main_xpts_per_proc[i]; |
---|
| 176 | ypts_per_proc[i] = main_ypts_per_proc[i]; |
---|
| 177 | } |
---|
| 178 | |
---|
| 179 | size = numlev * sizeof(double **); |
---|
| 180 | for (i = 0; i < nprocs; i++) { |
---|
| 181 | |
---|
| 182 | q_multi[i] = (double ***) G_MALLOC(size, id); |
---|
| 183 | rhs_multi[i] = (double ***) G_MALLOC(size, id); |
---|
| 184 | |
---|
| 185 | for (j = 0; j < numlev; j++) { |
---|
| 186 | |
---|
| 187 | rhs_multi[i][j] = (double **) G_MALLOC(((imx[j] - 2) / yprocs + 2) * sizeof(double *), id); |
---|
| 188 | q_multi[i][j] = (double **) G_MALLOC(((imx[j] - 2) / yprocs + 2) * sizeof(double *), id); |
---|
| 189 | for (k = 0; k < ((imx[j] - 2) / yprocs + 2); k++) { |
---|
| 190 | q_multi[i][j][k] = main_q_multi[i][j][k]; |
---|
| 191 | rhs_multi[i][j][k] = main_rhs_multi[i][j][k]; |
---|
| 192 | } |
---|
| 193 | |
---|
| 194 | } |
---|
| 195 | |
---|
| 196 | work1[i] = main_work1[i]; |
---|
| 197 | work2[i] = main_work2[i]; |
---|
| 198 | work3[i] = main_work3[i]; |
---|
| 199 | work4[i] = main_work4[i]; |
---|
| 200 | work5[i] = main_work5[i]; |
---|
| 201 | work6[i] = main_work6[i]; |
---|
| 202 | work7[i] = main_work7[i]; |
---|
| 203 | psi[i] = main_psi[i]; |
---|
| 204 | psim[i] = main_psim[i]; |
---|
| 205 | psium[i] = main_psium[i]; |
---|
| 206 | psilm[i] = main_psilm[i]; |
---|
| 207 | psib[i] = main_psib[i]; |
---|
| 208 | ga[i] = main_ga[i]; |
---|
| 209 | gb[i] = main_gb[i]; |
---|
| 210 | oldga[i] = main_oldga[i]; |
---|
| 211 | oldgb[i] = main_oldgb[i]; |
---|
| 212 | } |
---|
| 213 | } |
---|
| 214 | giet_shr_printf("Thread %d launched\n", id); |
---|
| 215 | |
---|
| 216 | slave(&id); |
---|
| 217 | |
---|
| 218 | BARRIER(bars->barrier, nprocs) |
---|
| 219 | |
---|
| 220 | giet_exit("done."); |
---|
| 221 | } |
---|
| 222 | |
---|
| 223 | |
---|
| 224 | const char *optarg; |
---|
| 225 | |
---|
| 226 | int getopt(int argc, char *const *argv, const char *optstring) |
---|
| 227 | { |
---|
| 228 | return -1; |
---|
| 229 | } |
---|
| 230 | |
---|
| 231 | //give the cluster coordinate by thread number |
---|
| 232 | // if tid=-1, return the next cluster (round robin) |
---|
| 233 | void clusterXY(int tid, unsigned int *cx, unsigned int *cy) |
---|
| 234 | { |
---|
| 235 | unsigned int cid; |
---|
| 236 | static unsigned int x = 0, y = 0; |
---|
| 237 | |
---|
| 238 | cid = tid / procs_per_cluster; |
---|
| 239 | |
---|
| 240 | if (tid != -1) { |
---|
| 241 | *cx = (cid / nclusters_y); |
---|
| 242 | *cy = (cid % nclusters_y); |
---|
| 243 | return; |
---|
| 244 | } |
---|
| 245 | |
---|
| 246 | if (giet_thread_id() != 0) { |
---|
| 247 | giet_exit("pseudo-random mapped malloc : thread 0 only"); |
---|
| 248 | } |
---|
| 249 | |
---|
| 250 | x++; |
---|
| 251 | if (x == nclusters_x) { |
---|
| 252 | x = 0; |
---|
| 253 | y++; |
---|
| 254 | if (y == nclusters_y) { |
---|
| 255 | y = 0; |
---|
| 256 | } |
---|
| 257 | } |
---|
| 258 | *cx = x; |
---|
| 259 | *cy = y; |
---|
| 260 | } |
---|
| 261 | |
---|
| 262 | void *ocean_malloc(unsigned long s, int tid) |
---|
| 263 | { |
---|
| 264 | void *ptr; |
---|
| 265 | unsigned int x, y; |
---|
| 266 | clusterXY(tid, &x, &y); |
---|
| 267 | ptr = remote_malloc(s, x, y); |
---|
| 268 | giet_assert (ptr != 0, "Malloc failed"); |
---|
| 269 | return ptr; |
---|
| 270 | } |
---|
| 271 | |
---|
| 272 | void exit(int status) |
---|
| 273 | { |
---|
| 274 | if (status) { |
---|
| 275 | giet_exit("Done (status != 0)"); |
---|
| 276 | } else { |
---|
| 277 | giet_exit("Done (ok)"); |
---|
| 278 | } |
---|
| 279 | } |
---|