#################################################################################
#	File : reset.s
#	Author : Alain Greiner
#	Date : 15/04/2011
#################################################################################
# 	This is a boot code for a generic multi-clusters / multi-processors
#       TSAR architecture (up to 256 clusters / up to 4  processors per cluster). 
#       There is one XICU, one TTY, one DMA and one stack segment per cluster.
#       segment base adresses = base + cluster_segment_increment*cluster_id
#	- Each processor initializes the stack pointer ($29) depending on pid.
#	- Only processor 0 initializes the Interrupt vector (TTY, DMA & IOC).
#       - Each processor initialises its private ICU mask register.
#	- Each processor initializes the Status Register (SR) 
#	- Each processor initializes the EPC register, and jumps to the main 
#	  address in kernel mode...
#################################################################################
		
	.section .reset,"ax",@progbits

	.extern	seg_stack_base
	.extern	seg_icu_base
	.extern _interrupt_vector
	.extern _isr_tty_get
	.extern _isr_dma
	.extern _isr_ioc

        .extern NB_PROCS
        .extern NB_CLUSTERS

	.globl  reset	 			# makes reset an external symbol 
	.ent	reset
	.align	2

reset:
       	.set noreorder

# computes proc_id, local_id, cluster_id, and cluster_increment
    mfc0    $26,    $15,    1
    andi    $10,    $26,    0x3FF	# $10 <= proc_id (at most 1024 processors)
    la      $26,    NB_PROCS		# $26 <= number of processors per cluster
    divu    $10,    $26
    mfhi    $11                 	# $11 <= local_id = proc_id % NB_PROCS
    mflo    $12              		# $12 <= cluster_id = proc_id / NB_PROCS
    la      $26,    NB_CLUSTERS
    li      $13,    0x80000000
    divu    $13,    $26
    mflo    $14
    sll     $14,    1			# $14 <= cluster_increment = 4G / NB_CLUSTERS
    mult    $14,    $12	
    mflo    $13                 	# $13 <= cluster_id * cluster_increment

# initializes stack pointer depending on both the local_id and the cluster_id
    la      $27,    seg_stack_base
    addu    $27,    $27,    $13		# $27 <= seg_stack_base + cluster_id * increment
    li      $26,    0x10000		# $26 <= 64K
    addi    $25,    $11,    1		# $25 <= local_id + 1
    mult    $25,    $26
    mflo    $24				# $24 <= 64K * (local_id+1)
    addu    $29,    $27,    $24		# $29 <= seg_stack_base + (cluster_id*increment) + (local_id+1)*64K

# in each cluster, each processor initializes its private XICU mask register
# in each cluster, the ICU base address depends on the cluster_id
    la      $20,    seg_icu_base
    addu    $20,    $20,    $13		# $20 <= seg_icu_base + cluster_id*cluster_increment
    la      $21,    _reset_switch
    sll     $22,    $11,    2           # $22 <= local_id*4
    addu    $23,    $21,    $22         # $23 <= &_reset_switch[local_id*4]
    lw      $24,    0($23)
    jr      $24
    nop
_reset_proc0:
    li      $13,    0b010010000000      # offset for MSK_HWI_ENABLE & proc[0]
    addu    $13,    $20,    $13
    li      $27,    0x111		# TTY[0] DMA[0] IOC
    sw      $27,    0($13)              # MASK[0]
    j       _reset_itvector
_reset_proc1:
    li      $13,    0b010010000100      # offset for MSK_HWI_ENABLE & proc[1]
    addu    $13,    $20,    $13
    li      $27,    0x022		# TTY[1] DMA[1]
    sw      $27,    0($13)              # MASK[1]
    j       _reset_itvector
_reset_proc2:
    li      $13,    0b010010001000      # offset for MSK_HWI_ENABLE & proc[2]
    addu    $13,    $20,    $13
    li      $27,    0x044		# TTY[2] DMA[2]
    sw      $27,    0($13)              # MASK[2]
    j       _reset_itvector
_reset_proc3:
    li      $13,    0b010010001100      # offset for MSK_HWI_ENABLE & proc[3]
    addu    $13,    $20,    $13
    li      $27,    0x088		# TTY[3] DMA[3]
    sw      $27,    0($13)              # MASK[3]
    j       _reset_itvector
    nop

_reset_switch:
    .word	_reset_proc0
    .word	_reset_proc1
    .word	_reset_proc2
    .word	_reset_proc3

# only processor 0 in cluster 0 initializes interrupt vector

_reset_itvector:
    bne	    $10,    $0,    _reset_end
    la      $26,    _interrupt_vector   # interrupt vector address
    la      $27,    _isr_tty_get 
    sw      $27,    0($26)              # interrupt_vector[0] <= _isr_tty_get
    sw      $27,    4($26)              # interrupt_vector[1] <= _isr_tty_get
    sw      $27,    8($26)              # interrupt_vector[2] <= _isr_tty_get
    sw      $27,   12($26)              # interrupt_vector[3] <= _isr_tty_get
    la      $27,    _isr_dma 
    sw      $27,   16($26)              # interrupt_vector[4] <= _isr_dma
    sw      $27,   20($26)              # interrupt_vector[5] <= _isr_dma
    sw      $27,   24($26)              # interrupt_vector[6] <= _isr_dma
    sw      $27,   28($26)              # interrupt_vector[7] <= _isr_dma
    la      $27,    _isr_ioc 
    sw      $27,   32($26)              # interrupt_vector[8] <= _isr_ioc

_reset_end:

# initializes SR register
    li	    $26,    0x0000FF01		
    mtc0    $26,    $12			# SR <= kernel mode / IRQ enable 

# jumps to main in kernel mode
    la	    $26,    main
    jr      $26
    nop

    .end	reset

    .set reorder
