/*
 * \file  : reset.S
 * \date  : 01/12/2012
 * \author: Cesar FUGUET & Manuel BOUYER & Alain Greiner
 *
 * This is a generic reset code for a generic multi-clusters / multi-processors
 * TSAR architecture (up to 256 clusters / up to 4  processors per cluster).
 *
 * There is one XICU, one TTY, one DMA, and one memory bank per cluster.
 *
 * This preloader uses a stack segment allocated in cluster 0, defined
 * by the seg_reset_stack_base and seg_reset_stack_size parameters in ldscript.
 * - Processor 0 uses a larger stack:         64 Kbytes.
 * - Other processors use a smaller stack:    512 bytes.
 *     => the seg_stack_size cannot be smaller than 0x90000 bytes (576 K).
 * Those stacks can be used by both the preloader and the boot-loader code.
 * 
 * The replicated XICU is used to awake the sleeping processors:
 *      xicu_paddr_base = ICU_PADDR_BASE + (cluster_xy << 32)
 *
 * It is intended to be used with various operating systems or nano kernels,
 * including NetBSD, ALMOS, and GIET_VM.
 *
 * - Each processor initializes its Status Register (SR) to disable interrupts.
 * - Each processor initializes its Count Register.
 * - Each processor initialises its private XICU WTI mask register.
 * - Each processor initializes its Stack Pointer.
 * - Only processor 0 executes the reset_load_elf function to load into memory 
 *   the system specific boot-loader stored on disk at BOOT_LOADER_LBA 
 * - All other processors wait in a low power consumption mode that the
 *   processor 0 wakes them using an IPI (Inter Processor Interruption)
 */

    #include <defs.h>
    #include <mips32_registers.h>

    /* These define should be consistent with values defined in map.xml file  */

    .extern seg_reset_stack_base  
    .extern seg_reset_stack_size    

    .section .reset,"ax",@progbits

    .extern dtb_addr
    .extern reset_putc
    .extern reset_getc
    .extern reset_ioc_read
    .extern reset_elf_loader
    .extern memcpy
    .extern reset_puts
    .extern reset_putx
    .extern reset_putd
    .extern reset_ioc_init
    .extern versionstr

    .globl  reset                    /* Makes reset an external symbol */
    .ent    reset

    .align  2
    .set noreorder

reset:
    b       _reset                  /* 0xbfc0000 */
    nop                             /* 0xbfc0004 */

    /*  Addresses of the functions provided by this reset code */

    .word   RESET_VERSION           /* 0xbfc0008 */
    .word   dtb_addr                /* 0xbfc000c */
    .word   reset_putc              /* 0xbfc0010 */
    .word   reset_getc              /* 0xbfc0014 */
    .word   reset_ioc_read          /* 0xbfc0018 */
    .word   reset_elf_loader        /* 0xbfc001C */
    .word   memcpy                  /* 0xbfc0020 */
    .word   reset_puts              /* 0xbfc0024 */
    .word   reset_putx              /* 0xbfc0028 */
    .word   reset_putd              /* 0xbfc002C */

_reset:

    /* All processors Disable interruptions, keep STATUSbev enabled */

    li      k0,     (1 << 22)
    mtc0    k0,     CP0_STATUS

    /* All processors compute proc_id, lpid, cluster_xy */

    mfc0    k0,     CP0_EBASE
    andi    t0,     k0,     0x3FF   /* t0 <= proc_id (at most 1024 processors)    */

    move    t3,     t0

    la      k0,     NB_PROCS        /* k0 <= number of processors per cluster     */
    divu    t3,     k0
    mfhi    t1                      /* t1 <= lpid       = proc_id % NB_PROCS      */
    mflo    t2                      /* t2 <= cluster_xy = proc_id / NB_PROCS      */

    /* All processors initialise the count register in CP0 */

    mtc0    zero,   CP0_COUNT

    /*
     * All processors enable the WTI for XICU 
     * Each processor may have IRQ_PER_PROC irq outputs from the XICU
     * In each cluster, the XICU base address depends on the cluster_xy 
     */

    la      t3,     ICU_PADDR_BASE  /* t3 <= ICU base address                     */
    move    t4,     t1              /* t4 <= local_id                             */
    li      t5,     IRQ_PER_PROC    /* t5 <= IRQ_PER_PROC                         */
    multu   t4,     t5              
    mflo    t6                      /* t6 <= IRQ_PER_PROC * local_id              */
    sll     t4,     t6,     2       /* t4 <= OUT_INDEX = t6 * 4                   */

    li      t5,     (0xC << 7)      /* t5 <= FUNC      = XICU_MSK_WTI             */
    or      t4,     t4,     t5      /* t4 <= FUNC | INDEX | 00                    */
    or      t5,     t3,     t4      /* t5 <= &XICU[MSK_WTI][OUT_INDEX]            */
    
    /* All processors set WTI mask using the physical address extension    */

    li      t4,     1
    sllv    t4,     t4,     t1      /* Set XICU[MSK_WTI][INDEX][local_id]         */

    mtc2    t2,     CP2_PADDR_EXT   /* set PADDR extension                        */
    sw      t4,     0(t5)           /* XICU[MSK_WTI][INDEX] <= t4                 */
    mtc2    zero,   CP2_PADDR_EXT   /* reset PADDR extension                      */

    /* All processors initializes stack pointer, depending on proc_id */

   la      k0,      seg_reset_stack_base
   li      k1,      0x10000         /* k1 <= P0 stack size == 64 Kbytes           */
   addu    sp,      k0,     k1      /* P0 stack from base to (base + 64K)         */

   li      k1,      0x200           /* k1 <= Pi stack size == 512 bytes           */
   multu   k1,      t0              
   mflo    k0                       /* k0 <= 256 * proc_id                        */
   addu    sp,      sp,     k1
   addu    sp,      sp,     k0      /* Pi stacks from base + 64K + proc_id*256    */

    /*
     * Only processor 0 in cluster 0 loads and executes the boot-loader 
     * We have:
     * t0: global proc_id
     * t1: local proc_id
     * t2: cluster_xy
     * t3: xicu physical base address in cluster 0
     */

    bne     zero,   t0,     _reset_wait
    nop

    /* Processor 0 displays version for this reset code */

    la      a0,     versionstr
    la	    k0,     reset_puts
    jalr    k0
    nop


#ifndef SOCLIB_IOC

    /* Processor 0 Initialize the block device if required */

    la      k0,     reset_ioc_init
    jalr    k0
    nop

#endif

    /*
     * Processor 0 jumps to the reset_elf_loader routine 
     * Passing as argument the block number in which is loaded the .elf file
     */

    la      k0,     reset_elf_loader
    li      a0,     BOOT_LOADER_LBA
    jalr    k0
    nop

    /* 
     * Processor O jumps to the entry address defined in the .elf file,
     * and returned by reset_elf_loader function.
     * All function arguments are 0
     */

    move    a0,     zero
    move    a1,     zero
    move    a2,     zero
    move    a3,     zero
    jr      v0
    nop

    /*
     * All processor (but processor 0) wait in low power mode 
     * until processor 0 wakes them using an IPI.
     * We have:
     * t0: global id
     * t1: local id
     * t2: cluster id
     * t3: xicu physical base address in cluster 0
     */

_reset_wait:

    sll     t4,     t1,     2       /* t4 <= local_id * 4                 */
    addu    t5,     t4,     t3      /* t5 <= &XICU[WTI_REG][local_id]     */

    wait

    /* 
     * All other processors, when exiting wait mode,
     * read from XICU the address to jump.
     * This address is the boot-loader entry address that has been
     * written in the mailbox by the IPI sent by processor 0
     */

    mtc2    t2,     CP2_PADDR_EXT   /* set PADDR extension                */
    lw      k0,     0(t5)           /* k0 <= XICU[WTI_REG][local_id]      */
    mtc2    zero,   CP2_PADDR_EXT   /* reset PADDR extension              */

    jr      k0
    nop

/* Exception entry point */

.org 0x0380
_excep:
    mfc0    a0, CP0_STATUS          /* first arg is status                */
    mfc0    a1, CP0_CAUSE           /* second arg is cause                */
    mfc0    a2, CP0_EPC             /* third argc is epc                  */
    nop
    j       handle_except
    nop

    .end reset

    .set reorder

/*
 * vim: tabstop=4 : shiftwidth=4 : expandtab
 */
