1 | ////////////////////////////////////////////////////////////////////////////////// |
---|
2 | // File : gameoflife.c |
---|
3 | // Date : November 2013 / February 2015 |
---|
4 | // Authors : Alexandre Joannou <alexandre.joannou@lip6.fr> november 2013 |
---|
5 | // Alain Greiner <alain.greiner@lip6.fr> february 2015 |
---|
6 | ////////////////////////////////////////////////////////////////////////////////// |
---|
7 | // This multi-threaded application is an emulation of the Game of Life automaton. |
---|
8 | // The world size is defined by the Frame Buffer width and height. |
---|
9 | // |
---|
10 | // There is at most one thread per processor in the platform. |
---|
11 | // - If the number of processors is larger than the number of lines, |
---|
12 | // the number of threads is equal to the number of lines, and |
---|
13 | // each thread process one single line. |
---|
14 | // - if the number of processors is not larger than the number of lines, |
---|
15 | // the number of threads is equal to the number of processors, and |
---|
16 | // each thread process height/nthreads (or height/nthreads + 1) lines. |
---|
17 | // |
---|
18 | // Thread running on processor P(0,0,0) execute the main() function, |
---|
19 | // that initialises the barrier, the TTY terminal, the CMA controler, |
---|
20 | // and launch the other threads, before calling the execute function. |
---|
21 | // Other threads are just running the execute() function. |
---|
22 | // |
---|
23 | // The total number of processors cannot be larger than 1024 = 16 * 16 * 4 |
---|
24 | ////////////////////////////////////////////////////////////////////////////////// |
---|
25 | |
---|
26 | #include "stdio.h" |
---|
27 | #include "limits.h" |
---|
28 | #include "user_barrier.h" |
---|
29 | #include "mapping_info.h" |
---|
30 | #include "hard_config.h" |
---|
31 | #include "malloc.h" |
---|
32 | |
---|
33 | #define VERBOSE 1 |
---|
34 | |
---|
35 | typedef unsigned char uint8_t; |
---|
36 | |
---|
37 | typedef struct |
---|
38 | { |
---|
39 | unsigned int index; // index of first line to be processed |
---|
40 | unsigned int lines; // number of lines to be processed |
---|
41 | } arguments_t; |
---|
42 | |
---|
43 | arguments_t args[1024]; // at most 1024 threads |
---|
44 | |
---|
45 | uint8_t world[2][256][256] __attribute__((aligned(64))); |
---|
46 | |
---|
47 | uint8_t display[2][256][256] __attribute__((aligned(64))); |
---|
48 | |
---|
49 | unsigned int status0[16] __attribute__((aligned(64))); |
---|
50 | unsigned int status1[16] __attribute__((aligned(64))); |
---|
51 | |
---|
52 | giet_sqt_barrier_t barrier; |
---|
53 | |
---|
54 | unsigned int width; |
---|
55 | unsigned int height; |
---|
56 | |
---|
57 | //////////////////////////////////// |
---|
58 | void init_world( unsigned int phase, |
---|
59 | unsigned int base_line, |
---|
60 | unsigned int nb_line ) |
---|
61 | { |
---|
62 | unsigned int x,y; |
---|
63 | for (y = base_line ; y < base_line + nb_line ; y++) |
---|
64 | { |
---|
65 | for(x = 0 ; x < width ; x++) |
---|
66 | { |
---|
67 | world[phase][y][x] = (giet_rand() >> (x % 8)) & 0x1; |
---|
68 | } |
---|
69 | } |
---|
70 | } |
---|
71 | |
---|
72 | ////////////////////////////////////////////////////// |
---|
73 | uint8_t number_of_alive_neighbour( unsigned int phase, |
---|
74 | unsigned int x, |
---|
75 | unsigned int y ) |
---|
76 | { |
---|
77 | uint8_t nb = 0; |
---|
78 | |
---|
79 | nb += world[phase][(y - 1) % height][(x - 1) % width]; |
---|
80 | nb += world[phase][ y ][(x - 1) % width]; |
---|
81 | nb += world[phase][(y + 1) % height][(x - 1) % width]; |
---|
82 | nb += world[phase][(y - 1) % height][ x ]; |
---|
83 | nb += world[phase][(y + 1) % height][ x ]; |
---|
84 | nb += world[phase][(y - 1) % height][(x + 1) % width]; |
---|
85 | nb += world[phase][ y ][(x + 1) % width]; |
---|
86 | nb += world[phase][(y + 1) % height][(x + 1) % width]; |
---|
87 | |
---|
88 | return nb; |
---|
89 | } |
---|
90 | |
---|
91 | ///////////////////////////////////////// |
---|
92 | uint8_t compute_cell( unsigned int phase, |
---|
93 | unsigned int x, |
---|
94 | unsigned int y ) |
---|
95 | { |
---|
96 | uint8_t nb_neighbours_alive = number_of_alive_neighbour( phase, x , y ); |
---|
97 | |
---|
98 | if (world[phase][y][x] == 1) |
---|
99 | { |
---|
100 | if (nb_neighbours_alive == 2 || nb_neighbours_alive == 3) return 1; |
---|
101 | } |
---|
102 | else |
---|
103 | { |
---|
104 | if (nb_neighbours_alive == 3) return 1; |
---|
105 | else return world[phase][y][x]; |
---|
106 | } |
---|
107 | return 0; |
---|
108 | } |
---|
109 | |
---|
110 | ///////////////////////////////////////// |
---|
111 | void compute_new_gen( unsigned int phase, |
---|
112 | unsigned int base_line, |
---|
113 | unsigned int nb_line ) |
---|
114 | { |
---|
115 | unsigned int x,y; |
---|
116 | for (y = base_line; y < base_line + nb_line; y++) |
---|
117 | { |
---|
118 | for(x = 0; x < width ; x++) |
---|
119 | { |
---|
120 | world[phase][y][x] = compute_cell( 1 - phase , x , y ); |
---|
121 | } |
---|
122 | } |
---|
123 | } |
---|
124 | |
---|
125 | //////////////////////////////////// |
---|
126 | void copy_world( unsigned int phase, |
---|
127 | unsigned int base_line, |
---|
128 | unsigned int nb_line ) |
---|
129 | { |
---|
130 | unsigned int x,y; |
---|
131 | for (y = base_line; y < base_line + nb_line; y++) |
---|
132 | { |
---|
133 | for(x = 0; x < width ; x++) |
---|
134 | { |
---|
135 | display[phase][y][x] = world[phase][y][x]*255; |
---|
136 | } |
---|
137 | } |
---|
138 | } |
---|
139 | |
---|
140 | |
---|
141 | |
---|
142 | /////////////////////////////////////////////////////////////// |
---|
143 | __attribute__((constructor)) void execute( arguments_t* pargs ) |
---|
144 | /////////////////////////////////////////////////////////////// |
---|
145 | { |
---|
146 | unsigned int nb_lines = pargs->lines; |
---|
147 | unsigned int base_line = pargs->index; |
---|
148 | |
---|
149 | ///////////// parallel world initialization |
---|
150 | |
---|
151 | // All processors initialize world[0] |
---|
152 | init_world( 0 , base_line , nb_lines ); |
---|
153 | |
---|
154 | // copy world[0] to display[0] |
---|
155 | copy_world( 0 , base_line , nb_lines ); |
---|
156 | |
---|
157 | // synchronise with other procs |
---|
158 | sqt_barrier_wait( &barrier ); |
---|
159 | |
---|
160 | // main() makes display[0] |
---|
161 | if ( base_line == 0 ) giet_fbf_cma_display ( 0 ); |
---|
162 | |
---|
163 | //////////// evolution : 2 steps per iteration |
---|
164 | |
---|
165 | unsigned int i = 0; |
---|
166 | while( 1 ) |
---|
167 | { |
---|
168 | // compute world[1] from world[0] |
---|
169 | compute_new_gen( 1 , base_line , nb_lines ); |
---|
170 | |
---|
171 | // copy world[1] to display[1] |
---|
172 | copy_world( 1 , base_line , nb_lines ); |
---|
173 | |
---|
174 | // synchronise with other procs |
---|
175 | sqt_barrier_wait( &barrier ); |
---|
176 | |
---|
177 | // main makes display[1] |
---|
178 | if ( base_line == 0 ) giet_fbf_cma_display ( 1 ); |
---|
179 | |
---|
180 | #if VERBOSE |
---|
181 | if ( base_line == 0 ) giet_tty_printf(" - step %d\n", 2*i ); |
---|
182 | #endif |
---|
183 | |
---|
184 | // compute world[0] from world[1] |
---|
185 | compute_new_gen( 0 , base_line , nb_lines ); |
---|
186 | |
---|
187 | // copy world[0] to display[0] |
---|
188 | copy_world( 0 , base_line , nb_lines ); |
---|
189 | |
---|
190 | // synchronise with other procs |
---|
191 | sqt_barrier_wait( &barrier ); |
---|
192 | |
---|
193 | // main makes display[0] |
---|
194 | if ( base_line == 0 ) giet_fbf_cma_display ( 0 ); |
---|
195 | |
---|
196 | #if VERBOSE |
---|
197 | if ( base_line == 0 ) giet_tty_printf(" - step %d\n", 2*i + 1 ); |
---|
198 | #endif |
---|
199 | |
---|
200 | i++; |
---|
201 | |
---|
202 | } // end evolution loop |
---|
203 | |
---|
204 | giet_pthread_exit("Completed"); |
---|
205 | |
---|
206 | } // end main() |
---|
207 | |
---|
208 | |
---|
209 | |
---|
210 | //////////////////////////////////////// |
---|
211 | __attribute__((constructor)) void main() |
---|
212 | //////////////////////////////////////// |
---|
213 | { |
---|
214 | // get processor identifier |
---|
215 | unsigned int x; |
---|
216 | unsigned int y; |
---|
217 | unsigned int p; |
---|
218 | giet_proc_xyp( &x, &y, &p ); |
---|
219 | |
---|
220 | // get platform parameters |
---|
221 | unsigned int x_size; |
---|
222 | unsigned int y_size; |
---|
223 | unsigned int nprocs; |
---|
224 | giet_procs_number( &x_size, &y_size, &nprocs ); |
---|
225 | |
---|
226 | // get a shared TTY |
---|
227 | giet_tty_alloc( 1 ); |
---|
228 | |
---|
229 | giet_pthread_assert( (x_size <= 16) , "x_size no larger than 16" ); |
---|
230 | giet_pthread_assert( (y_size <= 16) , "y_size no larger than 16" ); |
---|
231 | giet_pthread_assert( (nprocs <= 4) , "nprocs no larger than 4" ); |
---|
232 | |
---|
233 | // get FBF width and height |
---|
234 | giet_fbf_size( &width , &height ); |
---|
235 | |
---|
236 | giet_pthread_assert( (width <= 256) , "FBF width larger than 256" ); |
---|
237 | giet_pthread_assert( (height <= 256) , "FBF height larger than 256" ); |
---|
238 | giet_pthread_assert( (width && height) , "FBF not available" ); |
---|
239 | |
---|
240 | // compute number of threads and min number of lines per thread |
---|
241 | // extra is the number of threads that must process one extra line |
---|
242 | unsigned int total_procs = x_size * y_size * nprocs; |
---|
243 | unsigned int nthreads; |
---|
244 | unsigned int nlines; |
---|
245 | unsigned int extra; |
---|
246 | if ( total_procs > height ) |
---|
247 | { |
---|
248 | nthreads = height; |
---|
249 | nlines = 1; |
---|
250 | extra = 0; |
---|
251 | } |
---|
252 | else |
---|
253 | { |
---|
254 | nthreads = total_procs; |
---|
255 | nlines = height / total_procs; |
---|
256 | extra = height % total_procs; |
---|
257 | } |
---|
258 | |
---|
259 | // get FBF ownership |
---|
260 | giet_fbf_alloc(); |
---|
261 | |
---|
262 | // get a Chained Buffer DMA channel |
---|
263 | giet_fbf_cma_alloc(); |
---|
264 | |
---|
265 | // initializes the source and destination buffers |
---|
266 | giet_fbf_cma_init_buf( &display[0][0][0] , |
---|
267 | &display[1][0][0] , |
---|
268 | status0 , |
---|
269 | status1 ); |
---|
270 | |
---|
271 | // activates CMA channel |
---|
272 | giet_fbf_cma_start( height * width ); |
---|
273 | |
---|
274 | // initializes distributed heap |
---|
275 | unsigned int cx; |
---|
276 | unsigned int cy; |
---|
277 | for ( cx = 0 ; cx < x_size ; cx++ ) |
---|
278 | { |
---|
279 | for ( cy = 0 ; cy < y_size ; cy++ ) |
---|
280 | { |
---|
281 | heap_init( cx , cy ); |
---|
282 | } |
---|
283 | } |
---|
284 | |
---|
285 | // initialises barrier |
---|
286 | sqt_barrier_init( &barrier , x_size , y_size , nprocs ); |
---|
287 | |
---|
288 | giet_tty_printf("\n[GAMEOFLIFE] P[%d,%d,%d] completes initialisation at cycle %d\n" |
---|
289 | " nprocs = %d / nlines = %d / nthreads = %d\n", |
---|
290 | x, y, p, giet_proctime() , total_procs , height , nthreads ); |
---|
291 | |
---|
292 | // compute arguments (index, nlines) for all threads |
---|
293 | unsigned int n; // thread index |
---|
294 | unsigned int index; // first line index |
---|
295 | for ( n = 0 , index = 0 ; n < nthreads ; n++ ) |
---|
296 | { |
---|
297 | if ( extra ) |
---|
298 | { |
---|
299 | args[n].index = index; |
---|
300 | args[n].lines = nlines + 1; |
---|
301 | index = index + nlines + 1; |
---|
302 | } |
---|
303 | else |
---|
304 | { |
---|
305 | args[n].index = index; |
---|
306 | args[n].lines = nlines; |
---|
307 | index = index + nlines; |
---|
308 | } |
---|
309 | #if VERBOSE |
---|
310 | giet_tty_printf("[GAMEOFLIFE] Thread %d : first = %d / nlines = %d\n", |
---|
311 | n , args[n].index , args[n].lines ); |
---|
312 | #endif |
---|
313 | } |
---|
314 | |
---|
315 | // launch all other threads |
---|
316 | pthread_t trdid; // unused because no pthread_join() |
---|
317 | for ( n = 1 ; n < nthreads ; n++ ) |
---|
318 | { |
---|
319 | if ( giet_pthread_create( &trdid, |
---|
320 | NULL, // no attribute |
---|
321 | &execute, |
---|
322 | &args[n] ) ) |
---|
323 | { |
---|
324 | giet_tty_printf("\n[TRANSPOSE ERROR] creating thread %x\n", n ); |
---|
325 | giet_pthread_exit( NULL ); |
---|
326 | } |
---|
327 | } |
---|
328 | |
---|
329 | // run execute function |
---|
330 | execute( &args[0] ); |
---|
331 | |
---|
332 | giet_pthread_exit( "completed" ); |
---|
333 | |
---|
334 | } // end main() |
---|
335 | |
---|
336 | |
---|
337 | |
---|
338 | // Local Variables: |
---|
339 | // tab-width: 3 |
---|
340 | // c-basic-offset: 3 |
---|
341 | // c-file-offsets:((innamespace . 0)(inline-open . 0)) |
---|
342 | // indent-tabs-mode: nil |
---|
343 | // End: |
---|
344 | |
---|
345 | // vim: filetype=cpp:expandtab:shiftwidth=3:tabstop=3:softtabstop=3 |
---|
346 | |
---|
347 | |
---|
348 | |
---|