///////////////////////////////////////////////////////////////////////////////////
// File     : init.c
// Date     : 26/05/2012
// Authors  : alain greiner & mohamed karaoui
// Copyright (c) UPMC-LIP6
////////////////////////////////////////////////////////////////////////////////////
// The init.c files is part of the GIET nano-kernel.
// This code can be used in the second phase of the boot to inititialise the kernel
// structures and to launch one or several multi-tasks applications on a many_cores 
// hardware architecture. 
// All procs at this phase should have their MMU activated, this activation is done 
// by the boot phase, wich is responsable of constructing all the pages tables.
////////////////////////////////////////////////////////////////////////////////////

#include <common.h>
#include <ctx_handler.h>
#include <sys_handler.h>
#include <mapping_info.h>
#include <giet_config.h>
#include <mips32_registers.h>
#include <irq_handler.h>
#include <hwr_mapping.h>
#include <mwmr.h>

#define in_kinit __attribute__((section (".kinit")))

unsigned int _ptabs[GIET_NB_VSPACE_MAX];

void _tcg_init();
void _peri_init(); 
extern void _task_init();

in_kinit void _init()
{
    _puts("\n[INIT] Starting kernel initialisations ");
    _putw( _proctime() );
    _puts("\n");

    // building tasks contexts
    _tcg_init();
    _puts("\n[INIT] Task Contexts completed at cycle ");
    _putw( _proctime() );
    _puts("\n");

    // Initialize peripherals
    _peri_init();
    _puts("\n[INIT] Peripherals completed at cycle ");
    _putw( _proctime() );
    _puts("\n");

    //wakeup all other processor
    mapping_header_t*   header = (mapping_header_t*)&seg_mapping_base;  
    header->signature = OUT_MAPPING_SIGNATURE;
}

////////////////////////////////////////////////////////////////////////////
//      _eret()
////////////////////////////////////////////////////////////////////////////
in_kinit void _eret()
{
    asm volatile("eret \n"
                 "nop");
}

///////////////////////////////////////////////////////////////////////////////
// This function initialises the task context for a given vspace.
// There is a private context array for each vspace, indexed by the
// (task_id, proc_id) composite index.
// The following values are written in the task context:
// - SP     stack pointer = stack_base + stack_length
// - RA     return address = &_eret
// - EPC    start address = start_vector[task->startid]
// - SR     status register = OxFF13
// - TTY    TTY index = base_tty_id + tty_local_id
// - PTPR   page table base address / 8K
// - MODE   mmu_mode = 0xF (TLBs and caches activated)
// It statically allocates the task to the proper scheduler
// (one scheduler per processor).
////////////////////////////////////////////////////////////////////////////////
in_kinit void _task_map( unsigned int        task_id,        // global index
                unsigned int        vspace_id,      // global index
                unsigned int        base_tty_id,
                unsigned int*       start_vector )
{
    mapping_header_t*   header = (mapping_header_t*)&seg_mapping_base;  

    mapping_vseg_t*     vseg   = _get_vseg_base(header);
    mapping_task_t*     task   = _get_task_base(header);
    mapping_vspace_t*   vspace = _get_vspace_base(header);

    unsigned int        vseg_id;
    unsigned int        loc_id;
    unsigned int        proc_id;

    unsigned int        sp;
    unsigned int        ra   = (unsigned int)&_eret;

#if INIT_DEBUG_CTX
    _puts("\n[INIT] : task start vector ");
	_putw((unsigned int)start_vector);
    _puts("\n[INIT] : task startid ");
	_putw(task[task_id].startid);
#endif
    unsigned int        epc  = start_vector[task[task_id].startid];
    unsigned int        tty  = base_tty_id + task[task_id].ttylocid; 
    unsigned int        sr   = 0x0000FF13;
    unsigned int        mode = 0xF;

	unsigned int 		ptpr = ((unsigned int)_ptabs[vspace_id]) >> 13;

    // check values
    if ( task[task_id].proclocid >= NB_PROCS )
    {
        _puts("\n[INIT ERROR] : processor index too large for task ");
        _puts( task[task_id].name );
        _puts(" in vspace ");
        _puts( vspace[vspace_id].name );
        _puts("\n");
        _exit();
    }
    if ( task[task_id].clusterid >= NB_CLUSTERS )
    {
        _puts("\n[INIT ERROR] : cluster index too large for task ");
        _puts( task[task_id].name );
        _puts(" in vspace ");
        _puts( vspace[vspace_id].name );
        _puts("\n");
        _exit();
    }
    if ( task[task_id].vobjlocid >= vspace->vsegs )
    {
        _puts("\n[INIT ERROR] : vseg index too large for task ");
        _puts( task[task_id].name );
        _puts(" in vspace ");
        _puts( vspace[vspace_id].name );
        _puts("\n");
        _exit();
    }
    if ( task[task_id].startid >= vspace->tasks )
    {
        _puts("\n[INIT ERROR] : start index too large for task ");
        _puts( task[task_id].name );
        _puts(" in vspace ");
        _puts( vspace[vspace_id].name );
        _puts("\n");
        _exit();
    }
    if ( tty >= NB_TTYS )
    {
        _puts("\n[INIT ERROR] : TTY index too large for task ");
        _puts( task[task_id].name );
        _puts(" in vspace ");
        _puts( vspace[vspace_id].name );
        _puts("\n");
        _exit();
    }

    // get stack pointer value
    vseg_id = task[task_id].vobjlocid + vspace[vspace_id].vobj_offset;
    sp = vseg[vseg_id].vbase + vseg[vseg_id].length;

    // compute global processor index
    proc_id = task[task_id].clusterid * NB_PROCS + task[task_id].proclocid;

    // check local task index
    loc_id = _scheduler[proc_id].tasks;
    if ( loc_id >= GIET_NB_TASKS_MAX )
    {
        _puts("\n[INIT ERROR] : too much tasks allocated to processor ");
        _putw( proc_id );
        _puts("\n");
        _exit();
    }
    
    // update number of tasks allocated to scheduler
    _scheduler[proc_id].tasks = loc_id + 1;

    // initializes the task context
    _scheduler[proc_id].context[loc_id][CTX_SR_ID]   = sr;
    _scheduler[proc_id].context[loc_id][CTX_SP_ID]   = sp;
    _scheduler[proc_id].context[loc_id][CTX_RA_ID]   = ra;
    _scheduler[proc_id].context[loc_id][CTX_EPC_ID]  = epc;
    _scheduler[proc_id].context[loc_id][CTX_TTY_ID]  = tty;
    _scheduler[proc_id].context[loc_id][CTX_PTPR_ID] = ptpr;
    _scheduler[proc_id].context[loc_id][CTX_MODE_ID] = mode;
    
#if INIT_DEBUG_CTX
_puts("Task ");
_puts( task[task_id].name );
_puts(" allocated to processor ");
_putw( proc_id );
_puts(" / loc_id = ");
_putw( loc_id );
_puts("\n");

_puts("  - SR          = ");
_putw( sr );
_puts("  saved at ");
_putw( (unsigned int)&_scheduler[proc_id].context[loc_id][CTX_SR_ID] );
_puts("\n");

_puts("  - RA          = ");
_putw( ra );
_puts("  saved at ");
_putw( (unsigned int)&_scheduler[proc_id].context[loc_id][CTX_RA_ID] );
_puts("\n");

_puts("  - SP          = ");
_putw( sp );
_puts("  saved at ");
_putw( (unsigned int)&_scheduler[proc_id].context[loc_id][CTX_SP_ID] );
_puts("\n");

_puts("  - EPC         = ");
_putw( epc );
_puts("  saved at ");
_putw( (unsigned int)&_scheduler[proc_id].context[loc_id][CTX_EPC_ID] );
_puts("\n");

_puts("  - TTY         = ");
_putw( tty );
_puts("  saved at ");
_putw( (unsigned int)&_scheduler[proc_id].context[loc_id][CTX_TTY_ID] );
_puts("\n");

_puts("  - PTPR        = ");
_putw( ptpr<<13 );
_puts("  saved at ");
_putw( (unsigned int)&_scheduler[proc_id].context[loc_id][CTX_PTPR_ID] );
_puts("\n");

_puts("  - MODE        = ");
_putw( mode );
_puts("  saved at ");
_putw( (unsigned int)&_scheduler[proc_id].context[loc_id][CTX_MODE_ID] );
_puts("\n");
#endif

} // end _task_map()

///////////////////////////////////////////////////////////////////////////
// Initialise vobjs
// parm:
// vobj: the vobj to initialise
// region_id: the vspace in wich the vobj is located or the global space(-1).
///////////////////////////////////////////////////////////////////////////
void initialise_vobj(mapping_vobj_t* vobj, unsigned int region_id)
{
    mwmr_channel_t* mwmr;
    switch(vobj->type)
    {

        case PTAB:
            break;

        case MWMR:
            // initializes MWMR channel if vseg is a MWMR
            // the channel storage capacity is (vobj->length/4 - 5) words
            mwmr = (mwmr_channel_t*)(vobj->vaddr);
            mwmr->ptw   = 0;
            mwmr->ptr   = 0;
            mwmr->sts   = 0;
            mwmr->depth = (vobj->length>>2) - 5;
            mwmr->lock  = 0;

#if INIT_DEBUG_CTX
_puts("   MWMR channel name = ");
_puts( vobj->name);
_puts("   MWMR channel depth = ");
_putw( mwmr->depth );
_puts("\n");
#endif
            break;
        case ELF:
            break;
        case PERI:
            break;
        case BARRIER:
        case BUFFER:
        case LOCK:
            break;//TODO
        default:
            _puts("Unknown Ressource of type: ");
            _putw(vobj->type);
            _puts("Unknown Ressource name: ");
            _puts(vobj->name);
            _puts("\n ");
            _exit();
    } 
}


///////////////////////////////////////////////////////////////////////////////
//
///////////////////////////////////////////////////////////////////////////////
void _set_ptpr(unsigned int vspace_id)
{
	unsigned int ptpr = ((unsigned int)_ptabs[vspace_id]) >> 13;
	asm volatile("mtc2 %0, $0"::"r"(ptpr));
}

///////////////////////////////////////////////////////////////////////////////
// This function sets the schedulers default values for all processors
// (tasks <= 0, and current <= 0).
// Then it scan all tasks (in all vspaces) to initialise the schedulers, 
// the tasks contexts, as defined in the mapping_info data structure. 
// A global TTY index is allocated to each task, as specified in the mapping.
// TTY0 is reserved for the kernel.
///////////////////////////////////////////////////////////////////////////////
in_kinit void _tcg_init()
{
    mapping_header_t*   header  = (mapping_header_t*)&seg_mapping_base;  
    mapping_cluster_t*  cluster = _get_cluster_base( header );
    mapping_vspace_t*   vspace  = _get_vspace_base( header );     
    mapping_vobj_t*     vobj    = _get_vobj_base( header );
    

    unsigned int*       start_vector_base;

    unsigned int        base_tty_id = 1;     // TTY allocator

    unsigned int        cluster_id;  
    unsigned int        proc_id;  
    unsigned int        vspace_id;  
    unsigned int        vobj_id;
    unsigned int        task_id;

_puts("\n SCHEDULLER ");
_putw((unsigned int)_scheduler);
_puts("\n");

    // initialise the schedulers (not done by the compiler/loader)
    for ( cluster_id = 0 ; cluster_id < header->clusters ; cluster_id++ )
    {
        for ( proc_id = 0 ; proc_id < cluster[cluster_id].procs ; proc_id++ )
        {
            if ( proc_id >= NB_PROCS )
            {
                _puts("\n[INIT ERROR] The number of processors in cluster ");
                _putw( cluster_id );
                _puts(" is larger than NB_PROCS \n");
                _exit();
            }
            _scheduler[cluster_id*NB_PROCS+proc_id].tasks   = 0;
            _scheduler[cluster_id*NB_PROCS+proc_id].current = 0;
        }
    }

    // loop on the virtual spaces and set the ptpr
    for ( vspace_id = 0 ; vspace_id < header->vspaces ; vspace_id++ )
    {
        char found = 0;
		//initialise all vobjs
	    for(vobj_id= vspace[vspace_id].vobj_offset; 
			vobj_id < (vspace[vspace_id].vobj_offset+ vspace[vspace_id].vobjs);
			vobj_id++)
	    {
            if(vobj[vobj_id].type == PTAB)
            {
                found = 1;
                //ptabs allready buided by the boot
                _ptabs[vspace_id]        = (unsigned int) vobj[vobj_id].paddr;
                _puts("ptabs for vspace " ); 
                _putw(vspace_id);
                _puts(" address: ") ;
                _putw(_ptabs[vspace_id]); 
                _puts("\n");
            }
	    } 

        if(!found)
        {
                _puts("\n[INIT ERROR] Forget to set a PTAB for vspace ");
                _putw( vspace_id );
                _puts(" ?\n");
                _exit();
        }
    }

    // main loop on the virtual spaces 
    for ( vspace_id = 0 ; vspace_id < header->vspaces ; vspace_id++ )
    {
		_set_ptpr(vspace_id);
#if INIT_DEBUG_CTX
_puts("\n******* mapping tasks and channels in vspace ");
_puts(vspace[vspace_id].name);
_puts(" ********\n");
#endif
		//initialise all vobjs
	    for(vobj_id= vspace[vspace_id].vobj_offset; 
			vobj_id < (vspace[vspace_id].vobj_offset+ vspace[vspace_id].vobjs);
			vobj_id++)
	    {
			initialise_vobj(&vobj[vobj_id], vspace_id);
	    } 


        // Get the physical address of the start_vector for the vspace.
        // The start_vector is stored at the beginning of the seg_data segment, 
        // and contains the start addresses for all tasks defined in a vspace.
        // The seg_data segment must be the first vseg defined in 
        // the mapping_info data structure. 
        mapping_vobj_t* vobj_data = &vobj[vspace[vspace_id].vobj_offset + vspace[vspace_id].funcs_offset]; 
#if INIT_DEBUG_CTX
    	_puts("\n[INIT] : vobj_data name ");
    	_puts(vobj_data->name);
    	_puts("\n[INIT] : vobj_data vaddrr ");
		_putw(vobj_data->vaddr);
    	_puts("\n[INIT] : vobj_data addrr ");
		_putw(vobj_data->paddr);
#endif
        start_vector_base = (unsigned int*)vobj_data->vaddr;

        // map tasks
        for ( task_id = vspace[vspace_id].task_offset ; 
              task_id < (vspace[vspace_id].task_offset + vspace[vspace_id].tasks) ; 
              task_id++ )
        {
            _task_map( task_id, 
                           vspace_id,
                           base_tty_id,
                           start_vector_base );            
        }

        // increment TTY allocator
        base_tty_id = base_tty_id + vspace[vspace_id].ttys;    
    }
} // end _tc_init()

////////////////////////////////////////////////////////////////////////////////
// 	_peri_init()
// This generic function initializes the interrupt vector, the ICU masks, 
// and the timers for the context switch.
// The hardware parameters are NB_CLUSTERS, NB_PROCS, NB_TIMERS, NB_DMAS
// CLUSTER_SPAN, seg_icu_base, seg_timer_base.
// The number of processor per cluster cannot be larger than 8.
// The total number of TTYs cannot be larger than 15. 
// The NB_TIMERS, NB_DMAS & NB_PROCS parameters must be equal. 
////////////////////////////////////////////////////////////////////////////////
in_kinit void _peri_init()
{
    mapping_header_t*   header = (mapping_header_t*)&seg_mapping_base; 
    mapping_cluster_t* cluster = _get_cluster_base( header );

    unsigned int cluster_id;

    if ( NB_TIMERS != NB_PROCS )
    {
        _puts("\n[INIT ERROR] NB_TIMERS != NB_PROCS\n");
        _exit();
    }
    if ( NB_DMAS != NB_PROCS )
    {
        _puts("\n[INIT ERROR] NB_DMAS != NB_PROCS\n");
        _exit();
    }

    // interrupt vector initialisation

    _interrupt_vector[0]  = &_isr_ioc;

    _interrupt_vector[1]  = &_isr_tty_get_0;
    _interrupt_vector[2]  = &_isr_tty_get_1;
    _interrupt_vector[3]  = &_isr_tty_get_2;
    _interrupt_vector[4]  = &_isr_tty_get_3;
    _interrupt_vector[5]  = &_isr_tty_get_4;
    _interrupt_vector[6]  = &_isr_tty_get_5;
    _interrupt_vector[7]  = &_isr_tty_get_6;
    _interrupt_vector[8]  = &_isr_tty_get_7;
    _interrupt_vector[9]  = &_isr_tty_get_8;
    _interrupt_vector[10] = &_isr_tty_get_9;
    _interrupt_vector[11] = &_isr_tty_get_10;
    _interrupt_vector[12] = &_isr_tty_get_11;
    _interrupt_vector[13] = &_isr_tty_get_12;
    _interrupt_vector[14] = &_isr_tty_get_13;
    _interrupt_vector[14] = &_isr_tty_get_14;


    _interrupt_vector[16] = &_isr_switch;
    _interrupt_vector[17] = &_isr_dma;
    _interrupt_vector[18] = &_isr_switch;
    _interrupt_vector[19] = &_isr_dma;
    _interrupt_vector[20] = &_isr_switch;
    _interrupt_vector[21] = &_isr_dma;
    _interrupt_vector[22] = &_isr_switch;
    _interrupt_vector[23] = &_isr_dma;
    _interrupt_vector[24] = &_isr_switch;
    _interrupt_vector[25] = &_isr_dma;
    _interrupt_vector[26] = &_isr_switch;
    _interrupt_vector[27] = &_isr_dma;
    _interrupt_vector[28] = &_isr_switch;
    _interrupt_vector[29] = &_isr_dma;
    _interrupt_vector[30] = &_isr_switch;
    _interrupt_vector[31] = &_isr_dma;


    // ICU MASKs and TIMERS initialisation

    volatile unsigned int* icu   = (unsigned int*)&seg_icu_base;
    volatile unsigned int* timer = (unsigned int*)&seg_timer_base;
    
    for ( cluster_id = 0 ; cluster_id < header->clusters ; cluster_id++ )
    {
        if ( cluster[cluster_id].procs == 0 ) break;

        icu[ICU_MASK_SET + 0*ICU_SPAN] = 0x000380FF; 	// ICU_MASK for proc 0
        if ( _scheduler[cluster_id*NB_PROCS + 0].tasks > 1 )
        {
           timer[TIMER_PERIOD + 0*TIMER_SPAN] = GIET_TICK_VALUE;
           timer[TIMER_MODE   + 0*TIMER_SPAN] = 0x3;
        }

        if ( cluster[cluster_id].procs == 1 ) break;

        icu[ICU_MASK_SET + 1*ICU_SPAN] = 0x000C0000; 	// ICU_MASK for proc 1
        if ( _scheduler[cluster_id*NB_PROCS + 1].tasks > 1 )
        {
           timer[TIMER_PERIOD + 1*TIMER_SPAN] = GIET_TICK_VALUE;
           timer[TIMER_MODE   + 1*TIMER_SPAN] = 0x3;
        }

        if ( cluster[cluster_id].procs == 2 ) break;

        icu[ICU_MASK_SET + 2*ICU_SPAN] = 0x00300000; 	// ICU_MASK for proc 2
        if ( _scheduler[cluster_id*NB_PROCS + 2].tasks > 1 )
        {
           timer[TIMER_PERIOD + 2*TIMER_SPAN] = GIET_TICK_VALUE;
           timer[TIMER_MODE   + 2*TIMER_SPAN] = 0x3;
        }

        if ( cluster[cluster_id].procs == 3 ) break;

        icu[ICU_MASK_SET + 3*ICU_SPAN] = 0x00C00000; 	// ICU_MASK for proc 3
        if ( _scheduler[cluster_id*NB_PROCS + 3].tasks > 1 )
        {
           timer[TIMER_PERIOD + 3*TIMER_SPAN] = GIET_TICK_VALUE;
           timer[TIMER_MODE   + 3*TIMER_SPAN] = 0x3;
        }

        if ( cluster[cluster_id].procs == 4 ) break;
        icu[ICU_MASK_SET + 4*ICU_SPAN] = 0x03000000; 	// ICU_MASK for proc 4

        if ( _scheduler[cluster_id*NB_PROCS + 4].tasks > 1 )
        {
           timer[TIMER_PERIOD + 4*TIMER_SPAN] = GIET_TICK_VALUE;
           timer[TIMER_MODE   + 4*TIMER_SPAN] = 0x3;
        }

        if ( cluster[cluster_id].procs == 5 ) break;

        icu[ICU_MASK_SET + 5*ICU_SPAN] = 0x0C000000; 	// ICU_MASK for proc 5
        if ( _scheduler[cluster_id*NB_PROCS + 5].tasks > 1 )
        {
           timer[TIMER_PERIOD + 5*TIMER_SPAN] = GIET_TICK_VALUE;
           timer[TIMER_MODE   + 5*TIMER_SPAN] = 0x3;
        }

        if ( cluster[cluster_id].procs == 6 ) break;

        icu[ICU_MASK_SET + 6*ICU_SPAN] = 0x30000000; 	// ICU_MASK for proc 6
        if ( _scheduler[cluster_id*NB_PROCS + 6].tasks > 1 )
        {
           timer[TIMER_PERIOD + 6*TIMER_SPAN] = GIET_TICK_VALUE;
           timer[TIMER_MODE   + 6*TIMER_SPAN] = 0x3;
        }

        if ( cluster[cluster_id].procs == 7 ) break;

        icu[ICU_MASK_SET + 7*ICU_SPAN] = 0xC0000000; 	// ICU_MASK for proc 7
        if ( _scheduler[cluster_id*NB_PROCS + 7].tasks > 1 )
        {
           timer[TIMER_PERIOD + 7*TIMER_SPAN] = GIET_TICK_VALUE;
           timer[TIMER_MODE   + 7*TIMER_SPAN] = 0x3;
        }

        if ( cluster[cluster_id].procs > 8 ) 
        {
            _puts("\n[INIT ERROR] The number of processors per cluster\n");
            _puts("               cannot be larger than 8\n");
            _exit();
        }
        icu   = icu   + (CLUSTER_SPAN>>2);
        timer = timer + (CLUSTER_SPAN>>2);
    }
} // end _peri_init()