#!/usr/bin/env python

from mapping import *

######################################################################################
#   file   : transpose.py
#   date   : april 2014
#   author : Alain Greiner
#######################################################################################
#  This file describes the mapping of the multi-threaded "transpose" 
#  application on a multi-clusters, multi-processors architecture.
#  This include both the mapping of virtual segments on the clusters,
#  and the mapping of tasks on processors.
#  This mapping uses 5 platform parameters, (obtained from the "mapping" argument)
#  - x_size    : number of clusters in a row
#  - y_size    : number of clusters in a column
#  - x_width   : number of bits coding x coordinate
#  - y_width   : number of bits coding y coordinate
#  - nb_procs  : number of processors per cluster
####################################################################################

#########################
def transpose( mapping ):

    x_size   = mapping.x_size
    y_size   = mapping.y_size
    nb_procs = mapping.nb_procs
    x_width  = mapping.x_width
    y_width  = mapping.y_width

    # define vsegs base & size
    code_base  = 0x10000000
    code_size  = 0x00010000     # 64 Kbytes
    
    data_base  = 0x20000000
    data_size  = 0x00010000     # 64 Kbytes

    ptab_base  = 0x30000000
    ptab_size  = 0x00040000     # 256 Kbytes

    stack_base = 0x40000000 
    stack_size = 0x00010000     # 64 Kbytes

    heap_base  = 0x50000000 
    heap_size  = 0x00010000     # 64 Kbytes

    # create Vspace
    vspace = Vspace( 'transpose', 'data' )
    
    # non replicated vsegs in cluster[0,0]
    vseg = Vseg( 'seg_code', code_base , 'CXWU', 0, 0, 'PSEG_RAM' )
    vseg.add( Vobj( 'code' , code_size , 'ELF', binpath = 'build/transpose/transpose.elf' ) )
    vspace.addVseg( vseg )

    vseg = Vseg( 'seg_data', data_base , 'C_WU', 0, 0, 'PSEG_RAM' )
    vseg.add( Vobj( 'data' , data_size , 'ELF', binpath = 'build/transpose/transpose.elf' ) )
    vspace.addVseg( vseg )

    vseg = Vseg( 'seg_ptab', ptab_base , 'C_WU', 0, 0, 'PSEG_RAM' )
    vseg.add( Vobj( 'ptab' , ptab_size , 'PTAB', align = 13 ) )
    vspace.addVseg( vseg )

    # distributed vsegs: one stack per processor/task, one heap per cluster
    for x in xrange (x_size):
        for y in xrange (y_size):
            cluster_offset = ((x << y_width) + y) << 20   # max 1 Mbytes heap per cluster
            vseg = Vseg( 'seg_heap_%d_%d' % (x,y), \
                          heap_base + cluster_offset, \
                          'C_WU' , x, y, 'PSEG_RAM' )
            vseg.add( Vobj(  'heap_%d_%d' % (x,y), heap_size , 'BUFFER' ) )
            vspace.addVseg ( vseg )
            
            for p in xrange( nb_procs ):
                proc_offset = p << 18                     # max 256 Kbytes stack per proc
                vseg = Vseg( 'seg_stack_%d_%d_%d' % (x,y,p), \
                              stack_base + proc_offset + cluster_offset, \
                              'C_WU' , x, y, 'PSEG_RAM' )
                vseg.add( Vobj(  'stack_%d_%d_%d' % (x,y,p), stack_size , 'BUFFER' ) )
                vspace.addVseg ( vseg )
            
    # distributed tasks / one task per processor
    for x in xrange (x_size):
        for y in xrange (y_size):
            for p in xrange( nb_procs ):

                trdid = (((x * y_size) + y) * nb_procs) + p
                task = Task( 'trsp_%d_%d_%d' % (x,y,p), trdid, x, y, p, \
                             'stack_%d_%d_%d' % (x,y,p), 'heap_%d_%d' % (x,y), 0 )
                vspace.addTask ( task )

    return vspace

################################ test ######################################################

if __name__ == '__main__':
    print transpose( Mapping( 'test', 2, 2, 4 ) )


# Local Variables:
# tab-width: 4;
# c-basic-offset: 4;
# c-file-offsets:((innamespace . 0)(inline-open . 0));
# indent-tabs-mode: nil;
# End:
#
# vim: filetype=python:expandtab:shiftwidth=4:tabstop=4:softtabstop=4