///////////////////////////////////////////////////////////////////////////////////
// File     : kernel_init.c
// Date     : 26/05/2012
// Authors  : alain greiner & mohamed karaoui
// Copyright (c) UPMC-LIP6
////////////////////////////////////////////////////////////////////////////////////
// The kernel_init.c file is part of the GIET-VM nano-kernel.
//
// This nano-kernel has been written for the MIPS32 processor.
// The virtual adresses are on 32 bits and use the (unsigned int) type, but the 
// physicals addresses can have up to 40 bits, and use the  (unsigned long long) type.
// It natively supports clusterised shared mmemory multi-processors architectures, 
// where each processor is identified by a composite index (cluster_xy, local_id),
// and where there is one physical memory bank per cluster.
//
// This file contains the _kernel_init() function, that performs the second 
// phase of system initialisation.  The three significant actions are:
// 1) processor 0 makes peripherals and system FAT initialisation.
// 2) processor 0 awake all other processors by an IPI.
// 3) all processors running in parallel perform register initialisation,
//    from their private scheduler, and jump to user code.
////////////////////////////////////////////////////////////////////////////////////

#include <giet_config.h>

// kernel libraries
#include <utils.h>
#include <fat32.h>

//for peripheral initialisation
#include <dma_driver.h>
#include <fbf_driver.h>
#include <tty_driver.h>
#include <icu_driver.h>
#include <xcu_driver.h>
#include <ioc_driver.h>
#include <mmc_driver.h>
#include <mwr_driver.h>
#include <nic_driver.h>
#include <tim_driver.h>

#include <ctx_handler.h>
#include <irq_handler.h>

#include <mapping_info.h>
#include <mips32_registers.h>

///////////////////////////////////////////////////////////////////////////////////
// array of pointers on the page tables (virtual addresses)
///////////////////////////////////////////////////////////////////////////////////

__attribute__((section (".kdata"))) 
unsigned int _ptabs_vaddr[GIET_NB_VSPACE_MAX];    // virtual addresses

__attribute__((section (".kdata")))        
unsigned int _ptabs_ptprs[GIET_NB_VSPACE_MAX];    // physical addresses >> 13

///////////////////////////////////////////////////////////////////////////////////
// array of pointers on the schedulers (physical addresses)
///////////////////////////////////////////////////////////////////////////////////

__attribute__((section (".kdata"))) 
static_scheduler_t* _schedulers[NB_PROCS_MAX<<(X_WIDTH+Y_WIDTH)];   // virtual addresses

////////////////////////////////////////////////////////////////////////////////////
// staks for the "idle" tasks (256 bytes for each processor)
////////////////////////////////////////////////////////////////////////////////////

__attribute__((section (".kdata"))) 
unsigned int _idle_stack[X_SIZE*Y_SIZE * NB_PROCS_MAX * 128]; 

////////////////////////////////////////////////////////////////////////////////////
// This function is the entry point in kernel for all processors.
// It is executed in parallel by all procesors, and completes the system 
// initialisation that has been started by processor 0 in the boot_init() function.
//
// This kernel code makes the following assuptions, regarding the work bone 
// by the boot code:
//
// 1) The page tables associated to the various vspaces have been build
//    in physical memory, and can be used by the kernel code.
//
// 2) All schedulers (this include all task contexts) have been initialised, 
//    Both the virtual and the physical base addresses of the page tables
//    are available in the CTX_PTAB and CTX_PTPR slots.
//
// 3) The CP0_SCHED register of each processor contains a pointer on its 
//    private scheduler (virtual address).
//
// 4) The CP2_PTPR register of each processor contains a pointer on 
//    the vspace_0 page table (physical address>>13). 
//
// 5) For all processors, the MMU is activated (CP2_MODE contains 0xF).
// 
// This code must be loaded in .kinit section, in order to control seg_kinit_base,
// as this address is used by the boot code to jump into kernel code.
////////////////////////////////////////////////////////////////////////////////////
// Each processor performs the following actions:
// 1/ contribute to _schedulers_paddr[] array initialisation.
// 2/ contribute to _ptabs_paddr[] and _ptabs_vaddr arrays initialisation 
// 3/ compute and set the ICU mask for its private ICU channel
// 4/ initialise its private TICK timer (if tasks > 0)
// 5/ initialise the "idle" task context in its private scheduler
// 6/ initialise the SP, SR, PTPR, EPC registers
// 7/ jump to the user code with an eret. 
////////////////////////////////////////////////////////////////////////////////////
__attribute__((section (".kinit"))) void kernel_parallel_init() 
{
    unsigned int global_pid = _get_procid();
    unsigned int cluster_xy = global_pid / NB_PROCS_MAX;
    unsigned int local_pid  = global_pid % NB_PROCS_MAX;

#if 0
////////////// Debug : we can kill all processors but one
if ( global_pid != 0 ) 
{
    _tty_get_lock( 0 );
    _puts("\n[GIET] Processor[");
    _putd( cluster_xy >> Y_WIDTH );
    _puts(",");
    _putd( cluster_xy & ((1<<Y_WIDTH)-1) );
    _puts(",");
    _putd( local_pid );
    _puts("] suicide...\n");
    _tty_release_lock( 0 );
    _exit();
}
#endif

    // Step 1 : each processor get its scheduler virtual address
    //          and contribute to initialise the _schedulers[] array

    static_scheduler_t* psched     = (static_scheduler_t*)_get_sched();
    unsigned int        tasks      = psched->tasks;

    _schedulers[global_pid] = psched;

#if GIET_DEBUG_INIT
_tty_get_lock( 0 );
_puts("\n[GIET DEBUG] Parallel init : step 1 for processor[");
_putd( cluster_xy >> Y_WIDTH );
_puts(",");
_putd( cluster_xy & ((1<<Y_WIDTH)-1) );
_puts(",");
_putd( local_pid );
_puts("]\n - scheduler vbase = ");
_putx((unsigned int) psched);
_puts("\n - tasks           = ");
_putd(tasks);
_puts("\n");
_tty_release_lock( 0 );
#endif

    // step 2 : each processor that is allocated at least one task
    //          completes its private scheduler initialisation, and
    //          contribute to _ptabs_vaddr[] and _ptabs_ptprs[] arrays initialisation.
    //          - set the CTX_RA slot vith the virtual address
    //            of the _ctx_eret() function (for context switch).
    //          - set the CTX_EPC slot that must contain the task
    //            entry point, and contain only the address of the
    //            memory location containing this entry point.

    unsigned int ltid;

    // loop on all allocated tasks
    for (ltid = 0; ltid < tasks; ltid++) 
    {
        unsigned int vsid = _get_task_slot( global_pid, ltid , CTX_VSID_ID ); 
        unsigned int ptab = _get_task_slot( global_pid, ltid , CTX_PTAB_ID ); 
        unsigned int ptpr = _get_task_slot( global_pid, ltid , CTX_PTPR_ID ); 

        _ptabs_vaddr[vsid] = ptab;
        _ptabs_ptprs[vsid] = ptpr;

        unsigned int ctx_ra = (unsigned int)(&_ctx_eret);
        _set_task_slot( global_pid, ltid, CTX_RA_ID, ctx_ra );

        unsigned int* ptr = (unsigned int*)_get_task_slot( global_pid, ltid, CTX_EPC_ID );
        _set_task_slot( global_pid, ltid, CTX_EPC_ID, *ptr );

#if GIET_DEBUG_INIT
_tty_get_lock( 0 );
_puts("\n[GIET DEBUG] Parallel init : step 2 for processor[");
_putd( cluster_xy >> Y_WIDTH );
_puts(",");
_putd( cluster_xy & ((1<<Y_WIDTH)-1) );
_puts(",");
_putd( local_pid );
_puts("] / task ");
_putd( ltid );
_puts("\n - ctx_vsid  = ");
_putd( _get_task_slot( global_pid, ltid, CTX_VSID_ID ) );
_puts("\n - ctx_ptpr  = ");
_putx( _get_task_slot( global_pid, ltid, CTX_PTPR_ID ) );
_puts("\n - ctx_ptab  = ");
_putx( _get_task_slot( global_pid, ltid, CTX_PTAB_ID ) );
_puts("\n - ctx_ltid  = ");
_putd( _get_task_slot( global_pid, ltid, CTX_LTID_ID ) );
_puts("\n - ctx_epc   = ");
_putx( _get_task_slot( global_pid, ltid, CTX_EPC_ID ) );
_puts("\n - ctx_ra    = ");
_putx( _get_task_slot( global_pid, ltid, CTX_RA_ID ) );
_puts("\n - ctx_gtid  = ");
_putd( _get_task_slot( global_pid, ltid, CTX_GTID_ID ) );
_puts("\n - ctx_tty   = ");
_putd( _get_task_slot( global_pid, ltid, CTX_TTY_ID ) );
_puts("\n");
_tty_release_lock( 0 );
#endif

    }

    // step 3 : compute and set ICU or XICU masks
    //          there is at most 32 interrupts per processor

    unsigned int isr_switch_index = 0xFFFFFFFF;
    unsigned int irq_id;            // IN_IRQ index
    unsigned int hwi_mask = 0;
    unsigned int swi_mask = 0;
    unsigned int pti_mask = 0;

    for (irq_id = 0; irq_id < 32; irq_id++) 
    {
        unsigned int entry = psched->interrupt_vector[irq_id];
        unsigned int isr   = (entry & 0x000000FF);
        unsigned int type  = (entry & 0x0000FF00) >> 8;
        unsigned int valid = (entry & 0x80000000);

        if      ((type == IRQ_TYPE_HWI) && valid ) hwi_mask = hwi_mask | (1<<irq_id);
        else if ((type == IRQ_TYPE_SWI) && valid ) swi_mask = swi_mask | (1<<irq_id);
        else if ((type == IRQ_TYPE_PTI) && valid ) pti_mask = pti_mask | (1<<irq_id);
        else if ( valid )
        {
            _puts("\n[GIET ERROR] _kernel_parallel_start() : illegal IRQ type\n");
            _puts(" irq_id = ");
            _putx( irq_id );
            _puts(" / entry = ");
            _putx( entry );
            _puts("\n");
            _exit();
        }
        if (isr == ISR_SWITCH) isr_switch_index = irq_id;
    }

#if GIET_DEBUG_INIT
_tty_get_lock( 0 );
_puts("\n[GIET DEBUG] Parallel init : step 3 for processor[");
_putd( cluster_xy >> Y_WIDTH );
_puts(",");
_putd( cluster_xy & ((1<<Y_WIDTH)-1) );
_puts(",");
_putd( local_pid );
_puts("]\n - ICU HWI_MASK = ");
_putx(hwi_mask);
_puts("\n - ICU SWI_MASK = ");
_putx(swi_mask);
_puts("\n - ICU PTI_MASK = ");
_putx(pti_mask);
_puts("\n");
_tty_release_lock( 0 );
#endif

    // GIET-VM consraint : only one IRQ type per irq_id 
    if ( hwi_mask & swi_mask & pti_mask )
    {
        _puts("[GIET ERROR] _kernel_parallel_start : conflicting IRQs\n");
        _exit();
    }

#if USE_XICU
    _xcu_set_mask(cluster_xy, local_pid, hwi_mask, IRQ_TYPE_HWI); // set HWI_MASK
    _xcu_set_mask(cluster_xy, local_pid, swi_mask, IRQ_TYPE_SWI); // set SWI_MASK
    _xcu_set_mask(cluster_xy, local_pid, pti_mask, IRQ_TYPE_PTI); // set PTI_MASK
#else
    _icu_set_mask(cluster_xy, local_pid, (hwi_mask | pti_mask | swi_mask) );   
#endif

    // step 4 : start TICK timer if at least one task
    if (tasks > 0) 
    {
        // one ISR_SWITCH must be defined for each proc
        if (isr_switch_index == 0xFFFFFFFF) 
        {
            _tty_get_lock( 0 );
            _puts("\n[GIET ERROR] ISR_SWITCH not found for processor ");
            _putx(global_pid);
            _puts("\n");
            _tty_release_lock( 0 );
            _exit();
        }

        // the ISR_SWITCH irq index must be NB_PROCS_MAX + local_pid because
        // the first NB_PROCS_MAX irqs are used by the WAKEUP ones
        if (isr_switch_index != (NB_PROCS_MAX + local_pid))
        {
            _tty_get_lock( 0 );
            _puts("\n[GIET ERROR] ISR_SWITCH wrong index for processor ");
            _putx(global_pid);
            _puts("\n. It should be NB_PROCS_MAX + local_pid =");
            _putd(NB_PROCS_MAX + local_pid);
            _puts("\n");
            _tty_release_lock( 0 );
            _exit();
        }

        // start system timer
        unsigned int ko;
#if USE_XICU
        ko = _xcu_timer_start( cluster_xy, isr_switch_index, GIET_TICK_VALUE ); 
#else
        ko = _timer_start( cluster_xy, isr_switch_index, GIET_TICK_VALUE ); 
#endif
        if ( ko )
        {
            _tty_get_lock( 0 );
            _puts("\n[GIET ERROR] cannot start timer for processor ");
            _putd(local_pid);
            _puts("\n");
            _tty_release_lock( 0 );
            _exit();
        }
    } 

#if GIET_DEBUG_INIT
_tty_get_lock( 0 );
_puts("\n[GIET DEBUG] Parallel init : step 4 for processor[");
_putd( cluster_xy >> Y_WIDTH );
_puts(",");
_putd( cluster_xy & ((1<<Y_WIDTH)-1) );
_puts(",");
_putd( local_pid );
_puts("]");
if ( tasks > 1 ) _puts("\n  context switch activated\n");
else             _puts("\n  context switch  not activated\n");
_tty_release_lock( 0 );
#endif

    // step 5 : each processor updates the idle_task context:
    //          (only CTX_SP, CTX_RA, CTX_EPC).
    //          The stack size is 256 bytes, reserved in seg_kdata.
    //          The PTPR register, the CTX_PTPR and CTX_PTAB slots 
    //          have been initialised in boot code.

    unsigned int stack = (unsigned int)_idle_stack + ((global_pid + 1)<<9);

    _set_task_slot( global_pid, IDLE_TASK_INDEX, CTX_SP_ID,  stack);
    _set_task_slot( global_pid, IDLE_TASK_INDEX, CTX_RA_ID,  (unsigned int) &_ctx_eret);
    _set_task_slot( global_pid, IDLE_TASK_INDEX, CTX_EPC_ID, (unsigned int) &_idle_task);

#if GIET_DEBUG_INIT
_tty_get_lock( 0 );
_puts("\n[GIET DEBUG] Parallel init : step 5 for processor[");
_putd( cluster_xy >> Y_WIDTH );
_puts(",");
_putd( cluster_xy & ((1<<Y_WIDTH)-1) );
_puts(",");
_putd( local_pid );
_puts("] : idle task context set\n");
_tty_release_lock( 0 );
#endif

    // step 6 : each processor initialises SP, SR, PTPR, EPC, registers
    //          with the values corresponding to the first allocated task,
    //          or to the idle_task if there is no task allocated.

    ltid = 0;

    if (tasks == 0) 
    {
        ltid = IDLE_TASK_INDEX;

        _tty_get_lock( 0 );
        _puts("\n[GIET WARNING] No task allocated to processor ");
        _putx(global_pid);
        _puts(" => idle\n");
        _tty_release_lock ( 0 );
    }

    unsigned int sp_value   = _get_task_slot(global_pid, ltid, CTX_SP_ID);
    unsigned int sr_value   = _get_task_slot(global_pid, ltid, CTX_SR_ID);
    unsigned int ptpr_value = _get_task_slot(global_pid, ltid, CTX_PTPR_ID);
    unsigned int epc_value  = _get_task_slot(global_pid, ltid, CTX_EPC_ID);

#if GIET_DEBUG_INIT
_tty_get_lock( 0 );
_puts("\n[GIET DEBUG] Parallel init : step 6 for processor[");
_putd( cluster_xy >> Y_WIDTH );
_puts(",");
_putd( cluster_xy & ((1<<Y_WIDTH)-1) );
_puts(",");
_putd( local_pid );
_puts("]\n - sp   = ");
_putx(sp_value);
_puts("\n - sr   = ");
_putx(sr_value);
_puts("\n - ptpr = ");
_putx(ptpr_value);
_puts("\n - epc  = ");
_putx(epc_value);
_puts("\n");
_tty_release_lock( 0 );
#endif

_tty_get_lock( 0 );
_puts("\n[GIET] Processor[");
_putd( cluster_xy >> Y_WIDTH );
_puts(",");
_putd( cluster_xy & ((1<<Y_WIDTH)-1) );
_puts(",");
_putd( local_pid );
_puts("] completes kernel init at cycle ");
_putd( _get_proctime() );
_puts(" / task_entry_point = ");
_putx( epc_value );
_puts("\n");
_tty_release_lock( 0 );

    // Step 7 : set  registers and jump to user code
    asm volatile (
            "move    $29,       %0    \n"        /* SP <= ctx[CTX_SP_ID] */
            "mtc0    %1,        $12   \n"        /* SR <= ctx[CTX_SR_ID] */
            "mtc2    %2,        $0    \n"        /* PTPR <= ctx[CTX_PTPR_ID] */
            "mtc0    %3,        $14   \n"        /* EPC <= ctx[CTX_EPC_ID] */
            "eret                     \n"        /* jump to user code */
            "nop                      \n"
            :
            : "r" (sp_value), "r" (sr_value), "r" (ptpr_value), "r" (epc_value));

} // end kernel_parallel_init()


// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:
// vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

