#################################################################################
#   File : reset.s
#   Author : Alain Greiner
#   Date : 15/04/2011
#################################################################################
#   This is a boot code for a generic multi-clusters / multi-processors
#       TSAR architecture (up to 256 clusters / up to 4  processors per cluster). 
#       There is one XICU, one TTY, one DMA and one stack segment per cluster.
#       segment base adresses = base + cluster_segment_increment*cluster_id
#   - Each processor initializes the stack pointer ($29) depending on pid.
#   - Only processor 0 initializes the Interrupt vector (TTY, DMA & IOC).
#       - Each processor initialises its private ICU mask register.
#   - Each processor initializes the Status Register (SR) 
#   - Each processor initializes the EPC register, and jumps to the main 
#     address in kernel mode...
#################################################################################

#include <defs.h>
    .section .boot,"ax",@progbits

    .extern seg_stack_base
    .extern _boot_loader_entry

    .extern dtb_addr
    .extern boot_putc
    .extern boot_getc
    .extern _ioc_read

    .globl  boot               # makes reset an external symbol 
    .ent    boot
    .align  2
    .set noreorder

boot:
    b       _boot		#0xbfc0000
    nop				#0xbfc0004
    .word   BOOT_VERSION	#0xbfc0008
    .word   dtb_addr		#0xbfc000c
    .word   boot_putc		#0xbfc0010
    .word   boot_getc		#0xbfc0014
    .word   _ioc_read		#0xbfc0018

_boot:
    # Disable interruptions

    mtc0    $0,     $12

    # computes proc_id, local_id, cluster_id, and cluster_increment

    mfc0    $26,    $15,    1
    andi    $10,    $26,    0x3FF   # $10 <= proc_id (at most 1024 processors)
    la      $26,    NB_PROCS        # $26 <= number of processors per cluster
    divu    $10,    $26
    mfhi    $11                     # $11 <= local_id = proc_id % NB_PROCS
    mflo    $12                     # $12 <= cluster_id = proc_id / NB_PROCS

    mfc0    $26,    $15,    1
    andi    $10,    $26,    0x3FF   # $10 <= proc_id (at most 1024 processors)

    la      $26,    NB_CLUSTERS
    li      $13,    0x80000000
    divu    $13,    $26
    mflo    $14
    sll     $14,    1               # $14 <= cluster_increment = 4G / NB_CLUSTERS
    mult    $14,    $12 
    mflo    $13                     # $13 <= cluster_id * cluster_increment

    # Initialization of the count register in the coprocessor 0

    mtc0    $0 ,    $9

    # in each cluster, the ICU base address depends on the cluster_id

    la      $20,    ICU_BASE
    addu    $20,    $20,    $13     # $20 <= ICU_BASE + cluster_id*cluster_increment
    # we have:
    # $20 xicu base address
    # $12 cluster id
    # $11 local id
    # $10 global id
    # 
    # only processor 0 in cluster 0 executes the boot loader 
    bne     $0,    $10,     _reset_wait
    nop
    # initializes stack pointer 

    la      $27,    seg_stack_base
    li      $26,    0x10000         # $26 <= 0x10000
    addu    $29,    $27,    $26     # $29 <= seg_stack_base + 0x10000

    # Jump to the boot loader routine
    la      $26,    _boot_loader_entry
    jalr    $26
    nop

    # We jump to the main function, which is the entry point in the 
    # ELF file. The address is returned by _boot_loader_entry
    # all arguments are 0

    move    $4,    $0
    move    $5,    $0
    move    $6,    $0
    move    $7,    $0
    jr      $2
    nop


# Wait until the application wakes us.
# The application wakes up the non-boot CPUs with a IPI with a non-0
# value in the mailbox. This non-0 value is the address to jump to.
_reset_wait:
    # we have:
    # $20 xicu base address
    # $12 cluster id
    # $11 local id
    # $10 global id

    sll     $13,    $11,    2   # $13 = local_id * 4
    addu    $21,    $13,    $20 # $21 = XICU_WTI_REG(local_id)
1:
    lw	    $2,     0($21)
    beq     $0,     $2,     1b
    nop    
    jr	    $2

    .end    boot

    .set reorder
