///////////////////////////////////////////////////////////////////////////////////
// File     : kernel_init.c
// Date     : 26/05/2012
// Authors  : alain greiner & mohamed karaoui
// Copyright (c) UPMC-LIP6
////////////////////////////////////////////////////////////////////////////////////
// This kernel_init.c file is part of the GIET-VM nano-kernel.
////////////////////////////////////////////////////////////////////////////////////

#include <giet_config.h>
#include <hard_config.h>
#include <utils.h>
#include <kernel_utils.h>
#include <fat32.h>
#include <xcu_driver.h>
#include <ctx_handler.h>
#include <irq_handler.h>
#include <mapping_info.h>
#include <mips32_registers.h>

#if !defined(X_SIZE) 
# error: You must define X_SIZE in the hard_config.h file
#endif

#if !defined(Y_SIZE) 
# error: You must define Y_SIZE in the hard_config.h file
#endif

#if !defined(Y_WIDTH) 
# error: You must define Y_WIDTH in the hard_config.h file
#endif

#if !defined(Y_WIDTH) 
# error: You must define Y_WIDTH in the hard_config.h file
#endif

#if !defined(NB_PROCS_MAX)
# error: You must define NB_PROCS_MAX in the hard_config.h file
#endif

#if !defined(NB_TOTAL_PROCS)
# error: You must define NB_TOTAL_PROCS in the hard_config.h file
#endif

#if !defined(USE_XCU) 
# error: You must define USE_XCU in the hard_config.h file
#endif

#if !defined(IDLE_TASK_INDEX) 
# error: You must define IDLE_TASK_INDEX in the ctx_handler.h file
#endif

#if !defined(GIET_TICK_VALUE) 
# error: You must define GIET_TICK_VALUE in the giet_config.h file
#endif

#if !defined(GIET_NB_VSPACE_MAX)
# error: You must define GIET_NB_VSPACE_MAX in the giet_config.h file
#endif

///////////////////////////////////////////////////////////////////////////////////
// FAT internal representation for kernel code
///////////////////////////////////////////////////////////////////////////////////

__attribute__((section (".kdata"))) 
fat32_fs_t fat __attribute__((aligned(512)));

///////////////////////////////////////////////////////////////////////////////////
// array of pointers on the page tables (virtual addresses)
///////////////////////////////////////////////////////////////////////////////////

__attribute__((section (".kdata"))) 
volatile unsigned int _ptabs_vaddr[GIET_NB_VSPACE_MAX];    // virtual addresses

__attribute__((section (".kdata")))        
volatile unsigned int _ptabs_ptprs[GIET_NB_VSPACE_MAX];    // physical addresses >> 13

///////////////////////////////////////////////////////////////////////////////////
// array of pointers on the schedulers (physical addresses)
///////////////////////////////////////////////////////////////////////////////////

__attribute__((section (".kdata"))) 
volatile static_scheduler_t* _schedulers[X_SIZE][Y_SIZE][NB_PROCS_MAX]; 

////////////////////////////////////////////////////////////////////////////////////
// Synchonisation barrier before jumping to user code
////////////////////////////////////////////////////////////////////////////////////

__attribute__((section (".kdata"))) 
volatile unsigned int kernel_init_barrier = 0;

///////////////////////////////////////////////////////////////////////////////////
__attribute__((section (".kinit"))) void kernel_init() 
{
    // gpid : hardware processor index (fixed format: X_WIDTH|Y_WIDTH|P_WIDTH)
    // p : local processor id in a cluster ( p < NB_PROCS_MAX)
    // cpid : "continuous" processor index = (((x * Y_SIZE + y) * NB_PROCS_MAX) + p 

    unsigned int gpid       = _get_procid();
    unsigned int cluster_xy = gpid >> P_WIDTH;
    unsigned int x          = cluster_xy >> Y_WIDTH & ((1<<X_WIDTH)-1);
    unsigned int y          = cluster_xy & ((1<<Y_WIDTH)-1);
    unsigned int p          = gpid & ((1<<P_WIDTH)-1);
    unsigned int cpid       = ((( x * Y_SIZE) + y) * NB_PROCS_MAX) + p;


    // This initialisation is done sequencially by each processor
    while( cpid != kernel_init_barrier ) asm volatile ( "nop" );

    // Step 1 : each processor get its scheduler virtual address from CP0_SCHED register
    //          and contributes to _schedulers[] array initialisation

    static_scheduler_t* psched     = (static_scheduler_t*)_get_sched();
    unsigned int        tasks      = psched->tasks;

    _schedulers[x][y][p] = psched;

#if GIET_DEBUG_INIT
_printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] starts kernel init\n"
        " - scheduler vbase = %x\n"
        " - tasks           = %d\n",
        x, y, p, (unsigned int)psched, tasks );
#endif

    // step 2 : each processor that is allocated at least one task loops 
    //          on all allocated tasks: 
    //          - contributes to _ptabs_vaddr[] & _ptabs_ptprs[] initialisation.
    //          - set CTX_RA slot  with the kernel _ctx_eret() virtual address.
    //          - set CTX_EPC slot that must contain the task entry point, 
    //            and contain only at this point the virtual address of the memory 
    //            location containing this entry point. 

    unsigned int ltid;

    for (ltid = 0; ltid < tasks; ltid++) 
    {
        unsigned int vsid = _get_task_slot( x, y, p, ltid , CTX_VSID_ID ); 
        unsigned int ptab = _get_task_slot( x, y, p, ltid , CTX_PTAB_ID ); 
        unsigned int ptpr = _get_task_slot( x, y, p, ltid , CTX_PTPR_ID ); 

        // initialize PTABS arrays
        _ptabs_vaddr[vsid] = ptab;
        _ptabs_ptprs[vsid] = ptpr;

#if GIET_DEBUG_INIT
_printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] contributes to PTABS arrays\n"
        " - ptabs_vaddr[%d] = %x / ptpr_paddr[%d] = %l\n",
        x, y, p,  
        vsid, ptab, vsid, ((unsigned long long)ptpr)<<13 );
#endif

        // set the ptpr to use the task page table
        asm volatile( "mtc2    %0,   $0   \n"
                      : : "r" (ptpr) );

        // compute ctx_ra
        unsigned int ctx_ra = (unsigned int)(&_ctx_eret);
        _set_task_slot( x, y, p, ltid, CTX_RA_ID, ctx_ra );

        // compute ctx_epc
        unsigned int* ptr = (unsigned int*)_get_task_slot( x, y, p, ltid, CTX_EPC_ID );
        _set_task_slot( x, y, p, ltid, CTX_EPC_ID, *ptr );

#if GIET_DEBUG_INIT
_printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] updates context for task %d\n"
        " - ctx_epc   = %x\n"
        " - ctx_ra    = %x\n",
        x, y, p, ltid,
        _get_task_slot( x, y, p, ltid, CTX_EPC_ID ),
        _get_task_slot( x, y, p, ltid, CTX_RA_ID ) );
#endif

    }  // end for tasks

    // step 4 : compute and set XCU masks

    unsigned int isr_switch_index = 0xFFFFFFFF;
    unsigned int hwi_mask = 0;
    unsigned int pti_mask = 0;
    unsigned int wti_mask = 0;
    unsigned int irq_id;            // IN_IRQ index
    unsigned int entry;             // interrupt vector entry

    for (irq_id = 0; irq_id < 32; irq_id++) 
    {
        entry = psched->hwi_vector[irq_id];
        if ( entry & 0x80000000 ) hwi_mask = hwi_mask | (1<<irq_id);
        if ( (entry & 0x0000FFFF) == ISR_TICK ) isr_switch_index = irq_id;

        entry = psched->pti_vector[irq_id];
        if ( entry & 0x80000000 ) pti_mask = pti_mask | (1<<irq_id);
        if ( (entry & 0x0000FFFF) == ISR_TICK ) isr_switch_index = irq_id;

        entry = psched->wti_vector[irq_id];
        if ( entry & 0x80000000 ) wti_mask = wti_mask | (1<<irq_id);
        if ( (entry & 0x0000FFFF) == ISR_TICK ) isr_switch_index = irq_id;
    }

#if GIET_DEBUG_INIT
_printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] sets XCU masks\n"
        " - XCU HWI_MASK = %x\n"
        " - XCU WTI_MASK = %x\n"
        " - XCU PTI_MASK = %x\n",
        x, y, p, hwi_mask, wti_mask, pti_mask );
#endif

    unsigned int channel = p * IRQ_PER_PROCESSOR; 

    _xcu_set_mask( cluster_xy, channel, hwi_mask, IRQ_TYPE_HWI ); 
    _xcu_set_mask( cluster_xy, channel, wti_mask, IRQ_TYPE_WTI );
    _xcu_set_mask( cluster_xy, channel, pti_mask, IRQ_TYPE_PTI );

    // step 5 : start TICK timer if at least one task
    if (tasks > 0) 
    {
        // one ISR_TICK must be defined for each proc
        if (isr_switch_index == 0xFFFFFFFF) 
        {
            _printf("\n[GIET ERROR] ISR_TICK not found for processor[%d,%d,%d]\n",
                    x, y, p );
            _exit();
        }

        // start system timer
        _xcu_timer_start( cluster_xy, isr_switch_index, GIET_TICK_VALUE ); 

    }

#if GIET_DEBUG_INIT
_printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] starts TICK timer\n",
        x, y, p );
#endif

    // step 6 : each processor updates the idle_task context:
    //          (CTX_SP, CTX_RA, CTX_EPC).
    //          The 4 Kbytes idle stack is implemented in the scheduler.
    //          The PTPR register, the CTX_PTPR and CTX_PTAB slots 
    //          have been initialised in boot code.

    unsigned int pstack = ((unsigned int)psched) + 0x2000;

    _set_task_slot( x, y, p, IDLE_TASK_INDEX, CTX_SP_ID,  pstack);
    _set_task_slot( x, y, p, IDLE_TASK_INDEX, CTX_RA_ID,  (unsigned int) &_ctx_eret);
    _set_task_slot( x, y, p, IDLE_TASK_INDEX, CTX_EPC_ID, (unsigned int) &_idle_task);

#if GIET_DEBUG_INIT
_printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] initializes IDLE task\n"
        " - stack_base = %x\n"
        " - stack_size = 0x1000\n",
        x, y, p, pstack - 0x1000 );
#endif

    // step 7 : when all processors reach the synchronisation barrier, 
    //          each processor set registers SP, SR, PTPR, EPC,
    //          with the values corresponding to the first allocated task,
    //          or to the idle_task if there is no task allocated,
    //          and jump to user code 

    if (tasks == 0) 
    {
        ltid = IDLE_TASK_INDEX;

        _printf("\n[GIET WARNING] No task allocated to processor[%d,%d,%d]\n",
                x, y, p );
    }
    else
    {
        ltid = 0;
    }

    unsigned int sp_value   = _get_task_slot( x, y, p, ltid, CTX_SP_ID);
    unsigned int sr_value   = _get_task_slot( x, y, p, ltid, CTX_SR_ID);
    unsigned int ptpr_value = _get_task_slot( x, y, p, ltid, CTX_PTPR_ID);
    unsigned int epc_value  = _get_task_slot( x, y, p, ltid, CTX_EPC_ID);

#if GIET_DEBUG_INIT
_printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] reach barrier at cycle %d\n",
        x, y, p, _get_proctime() );
#endif

    // increment barrier counter 
    kernel_init_barrier++;

    // busy waiting until all processors synchronized
    while ( kernel_init_barrier != NB_TOTAL_PROCS );

#if GIET_DEBUG_INIT
_printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] initializes registers at cycle %d\n"
        " - sp   = %x\n"
        " - sr   = %x\n"
        " - ptpr = %x\n"
        " - epc  = %x\n",
        x, y, p, _get_proctime(),
        sp_value, sr_value, ptpr_value, epc_value );
#endif

    // set registers and jump to user code
    asm volatile ( "move  $29,  %0                  \n"   /* SP <= ctx[CTX_SP_ID] */
                   "mtc0  %1,   $12                 \n"   /* SR <= ctx[CTX_SR_ID] */
                   "mtc2  %2,   $0                  \n"   /* PTPR <= ctx[CTX_PTPR] */
                   "mtc0  %3,   $14                 \n"   /* EPC <= ctx[CTX_EPC]  */
                   "eret                            \n"   /* jump to user code  */
                   "nop                             \n"
                   : 
                   : "r"(sp_value), "r"(sr_value), "r"(ptpr_value), "r"(epc_value)
                   : "$29", "memory" );

} // end kernel_init()


// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:
// vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

