| 1 | #!/usr/bin/env python |
|---|
| 2 | |
|---|
| 3 | from mapping import * |
|---|
| 4 | |
|---|
| 5 | ###################################################################################### |
|---|
| 6 | # file : fft.py (for the fft application) |
|---|
| 7 | # date : mars 2016 |
|---|
| 8 | # author : Alain Greiner |
|---|
| 9 | ####################################################################################### |
|---|
| 10 | # This file describes the mapping of the multi-threaded "fft" |
|---|
| 11 | # application on a multi-clusters, multi-processors architecture. |
|---|
| 12 | # There is one thread per processor. |
|---|
| 13 | # The mapping of virtual segments is the following: |
|---|
| 14 | # - There is one shared data vseg in cluster[0][0] |
|---|
| 15 | # - The code vsegs are replicated on all clusters containing processors. |
|---|
| 16 | # - There is one heap vseg per cluster containing processors. |
|---|
| 17 | # - The stacks vsegs are distributed on all clusters containing processors. |
|---|
| 18 | # This mapping uses 5 platform parameters, (obtained from the "mapping" argument) |
|---|
| 19 | # - x_size : number of clusters in a row |
|---|
| 20 | # - y_size : number of clusters in a column |
|---|
| 21 | # - x_width : number of bits coding x coordinate |
|---|
| 22 | # - y_width : number of bits coding y coordinate |
|---|
| 23 | # - nprocs : number of processors per cluster |
|---|
| 24 | #################################################################################### |
|---|
| 25 | |
|---|
| 26 | ###################### |
|---|
| 27 | def extend( mapping ): |
|---|
| 28 | |
|---|
| 29 | x_size = mapping.x_size |
|---|
| 30 | y_size = mapping.y_size |
|---|
| 31 | nprocs = mapping.nprocs |
|---|
| 32 | x_width = mapping.x_width |
|---|
| 33 | y_width = mapping.y_width |
|---|
| 34 | |
|---|
| 35 | # define vsegs base & size |
|---|
| 36 | code_base = 0x10000000 |
|---|
| 37 | code_size = 0x00010000 # 64 Kbytes (replicated in each cluster) |
|---|
| 38 | |
|---|
| 39 | data_base = 0x20000000 |
|---|
| 40 | data_size = 0x00010000 # 64 Kbytes (non replicated) |
|---|
| 41 | |
|---|
| 42 | heap_base = 0x40000000 |
|---|
| 43 | heap_size = 0x02000000 # 16 Mbytes (per cluster) |
|---|
| 44 | |
|---|
| 45 | stack_base = 0x30000000 |
|---|
| 46 | stack_size = 0x00040000 # 256 Kbytes (per thread) |
|---|
| 47 | |
|---|
| 48 | # create Vspace |
|---|
| 49 | vspace = mapping.addVspace( name = 'fft', startname = 'fft_data' , active = True ) |
|---|
| 50 | |
|---|
| 51 | # data vseg in cluster[0,0] : non local |
|---|
| 52 | mapping.addVseg( vspace, 'fft_data', data_base , data_size, |
|---|
| 53 | 'C_WU', vtype = 'ELF', x = 0 , y = 0 , pseg = 'RAM', |
|---|
| 54 | binpath = 'bin/fft/appli.elf', |
|---|
| 55 | local = False ) |
|---|
| 56 | |
|---|
| 57 | # code vsegs : local (one copy in each cluster) |
|---|
| 58 | for x in xrange (x_size): |
|---|
| 59 | for y in xrange (y_size): |
|---|
| 60 | cluster_id = (x * y_size) + y |
|---|
| 61 | if ( mapping.clusters[cluster_id].procs ): |
|---|
| 62 | |
|---|
| 63 | mapping.addVseg( vspace, 'fft_code_%d_%d' % (x,y), code_base, code_size, |
|---|
| 64 | 'CXWU', vtype = 'ELF', x = x , y = y , pseg = 'RAM', |
|---|
| 65 | binpath = 'bin/fft/appli.elf', |
|---|
| 66 | local = True ) |
|---|
| 67 | |
|---|
| 68 | # stack vsegs : local (one stack per processor) |
|---|
| 69 | for x in xrange (x_size): |
|---|
| 70 | for y in xrange (y_size): |
|---|
| 71 | cluster_id = (x * y_size) + y |
|---|
| 72 | if ( mapping.clusters[cluster_id].procs ): |
|---|
| 73 | for p in xrange( nprocs ): |
|---|
| 74 | proc_id = (((x * y_size) + y) * nprocs) + p |
|---|
| 75 | size = stack_size - 4096 |
|---|
| 76 | base = stack_base + (proc_id * stack_size) |
|---|
| 77 | |
|---|
| 78 | mapping.addVseg( vspace, 'fft_stack_%d_%d_%d' % (x,y,p), base, size, |
|---|
| 79 | 'C_WU', vtype = 'BUFFER', x = x , y = y , pseg = 'RAM', |
|---|
| 80 | local = True ) |
|---|
| 81 | |
|---|
| 82 | # heap vsegs : distributed but non local (all heap vsegs can be accessed by all tasks) |
|---|
| 83 | for x in xrange (x_size): |
|---|
| 84 | for y in xrange (y_size): |
|---|
| 85 | cluster_id = (x * y_size) + y |
|---|
| 86 | if ( mapping.clusters[cluster_id].procs ): |
|---|
| 87 | size = heap_size |
|---|
| 88 | base = heap_base + (cluster_id * size) |
|---|
| 89 | |
|---|
| 90 | mapping.addVseg( vspace, 'fft_heap_%d_%d' % (x,y), base, size, |
|---|
| 91 | 'C_WU', vtype = 'BUFFER', x = x , y = y , pseg = 'RAM', |
|---|
| 92 | local = False, big = True ) |
|---|
| 93 | |
|---|
| 94 | # distribute one thread per processor / Main on P[0,0,0] |
|---|
| 95 | for x in xrange (x_size): |
|---|
| 96 | for y in xrange (y_size): |
|---|
| 97 | cluster_id = (x * y_size) + y |
|---|
| 98 | if ( mapping.clusters[cluster_id].procs ): |
|---|
| 99 | for p in xrange( nprocs ): |
|---|
| 100 | if x == 0 and y == 0 and p == 0 : # main |
|---|
| 101 | startid = 1 |
|---|
| 102 | is_main = True |
|---|
| 103 | else : # slaves |
|---|
| 104 | startid = 0 |
|---|
| 105 | is_main = False |
|---|
| 106 | |
|---|
| 107 | mapping.addThread( vspace, |
|---|
| 108 | 'fft_%d_%d_%d' % (x,y,p), |
|---|
| 109 | is_main, |
|---|
| 110 | x, y, p, |
|---|
| 111 | 'fft_stack_%d_%d_%d' % (x,y,p), |
|---|
| 112 | 'fft_heap_%d_%d' % (x,y), |
|---|
| 113 | startid ) |
|---|
| 114 | |
|---|
| 115 | # extend mapping name |
|---|
| 116 | mapping.name += '_fft' |
|---|
| 117 | |
|---|
| 118 | return vspace # useful for test |
|---|
| 119 | |
|---|
| 120 | ################################ test ###################################################### |
|---|
| 121 | |
|---|
| 122 | if __name__ == '__main__': |
|---|
| 123 | |
|---|
| 124 | vspace = fft( Mapping( 'test', 2, 2, 4 ) ) |
|---|
| 125 | print vspace.xml() |
|---|
| 126 | |
|---|
| 127 | |
|---|
| 128 | # Local Variables: |
|---|
| 129 | # tab-width: 4; |
|---|
| 130 | # c-basic-offset: 4; |
|---|
| 131 | # c-file-offsets:((innamespace . 0)(inline-open . 0)); |
|---|
| 132 | # indent-tabs-mode: nil; |
|---|
| 133 | # End: |
|---|
| 134 | # |
|---|
| 135 | # vim: filetype=python:expandtab:shiftwidth=4:tabstop=4:softtabstop=4 |
|---|
| 136 | |
|---|