/*
 * hal_context.c - implementation of Thread Context API for TSAR-MIPS32
 * 
 * Author  Alain Greiner    (2016)
 *
 * Copyright (c)  UPMC Sorbonne Universites
 * 
 * This file is part of ALMOS-MKH.
 *
 * ALMOS-MKH.is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2.0 of the License.
 *
 * ALMOS-MKH.is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ALMOS-MKH.; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <hal_types.h>
#include <hal_switch.h>
#include <memcpy.h>
#include <thread.h>
#include <string.h>
#include <process.h>
#include <printk.h>
#include <vmm.h>
#include <core.h>
#include <cluster.h>
#include <hal_context.h>

/////////////////////////////////////////////////////////////////////////////////////////
//       Define various SR values for TSAR-MIPS32 
/////////////////////////////////////////////////////////////////////////////////////////

#define SR_USR_MODE       0xFC11
#define SR_USR_MODE_FPU   0x2000FC11
#define SR_SYS_MODE       0xFC00

/////////////////////////////////////////////////////////////////////////////////////////
// These structuree defines the cpu_context anf fpu_context for TSAR MIPS32.
// These registers are saved/restored at each context switch.
// WARNING : update the hal_***_context_save() and hal_***_context_restore()
//           functions when modifying this structure, and check the two
//           CONFIG_CPU_CTX_SIZE & CONFIGFPU_CTX_SIZE configuration parameterss. 
/////////////////////////////////////////////////////////////////////////////////////////

typedef struct hal_cpu_context_s
{
    uint32_t c0_epc;     // slot 0
    uint32_t at_01;      // slot 1
    uint32_t v0_02;      // slot 2
    uint32_t v1_03;      // slot 3
    uint32_t a0_04;      // slot 4
    uint32_t a1_05;      // slot 5
    uint32_t a2_06;      // slot 6
    uint32_t a3_07;      // slot 7

    uint32_t t0_08;      // slot 8
    uint32_t t1_09;      // slot 9
    uint32_t t2_10;      // slot 10
    uint32_t t3_11;      // slot 11
    uint32_t t4_12;      // slot 12
    uint32_t t5_13;      // slot 13
    uint32_t t6_14;      // slot 14
    uint32_t t7_15;      // slot 15

	uint32_t s0_16;      // slot 16
	uint32_t s1_17;      // slot 17
	uint32_t s2_18;      // slot 18
	uint32_t s3_19;      // slot 19
	uint32_t s4_20;      // slot 20
	uint32_t s5_21;      // slot 21
	uint32_t s6_22;      // slot 22
	uint32_t s7_23;      // slot 23

    uint32_t t8_24;      // slot 24
    uint32_t t8_25;      // slot 25
    uint32_t hi_26;      // slot 26
    uint32_t lo_27;      // slot 27
    uint32_t gp_28;      // slot 28
	uint32_t sp_29;      // slot 29
	uint32_t fp_30;      // slot 30
	uint32_t ra_31;      // slot 31

	uint32_t c2_ptpr;    // slot 32
	uint32_t c2_mode;    // slot 33

	uint32_t c0_sr;      // slot 34
	uint32_t c0_th;      // slot 35
} 
hal_cpu_context_t;

/////////////////////////////////////////////////////////////////////////////////////////
// This structure defines the fpu_context for TSAR MIPS32.
/////////////////////////////////////////////////////////////////////////////////////////

typedef struct hal_fpu_context_s
{
	uint32_t   fpu_regs[32];     
}
hal_fpu_context_t;



/////////////////////////////////////////////////////////////////////////////////////////
//        CPU context access functions
/////////////////////////////////////////////////////////////////////////////////////////


/////////////////////////////////////////////////////////////////////////////////////////
// Seven registers are initialised by this function:
// GPR : sp_29 / fp_30 / ra_31
// CP0 : c0_sr / c0_th
// CP2 : c2_ptpr / c2_mode
/////////////////////////////////////////////////////////////////////////////////////////
error_t hal_cpu_context_create( thread_t * thread )
{
    kmem_req_t  req;

    context_dmsg("\n[INFO] %s : enters for thread %x in process %x\n",
                 __FUNCTION__ , thread->trdid , thread->process->pid );

    // allocate memory for cpu_context
    req.type   = KMEM_CPU_CTX;
    req.size   = sizeof(hal_cpu_context_t);
    req.flags  = AF_KERNEL | AF_ZERO;

    hal_cpu_context_t * context = (hal_cpu_context_t *)kmem_alloc( &req );
    if( context == NULL ) return ENOMEM;

    // set cpu context pointer in thread
    thread->cpu_context = (void*)context;

    // stack pointer, status register and mmu_mode depends on thread type
	uint32_t sp_29;
    uint32_t c0_sr;
    uint32_t c2_mode;
    if( thread->type == THREAD_USER )
    {
        sp_29   = ((uint32_t)thread->u_stack_base) + thread->u_stack_size;
        c0_sr   = SR_USR_MODE;
        c2_mode = 0xF;
    }
    else
    {
        sp_29   = ((uint32_t)thread->k_stack_base) + thread->k_stack_size;
        c0_sr   = SR_SYS_MODE;
        c2_mode = 0x3;
    }

    // align stack pointer on a double word boundary 
	sp_29 = (sp_29 - 8) & (~ 0x7);

    // initialise context
	context->sp_29      = sp_29; 
	context->fp_30      = sp_29;                          // TODO check this [AG]
    context->ra_31      = (uint32_t)thread->entry_func;
	context->c0_sr      = c0_sr;
	context->c0_th      = (uint32_t)thread; 
	context->c2_ptpr    = (uint32_t)((thread->process->vmm.gpt.ppn) >> 1);
	context->c2_mode    = c2_mode;

    context_dmsg("\n[INFO] %s : exit for thread %x in process %x / ra = %x\n",
                 __FUNCTION__ , thread->trdid , thread->process->pid , context->ra_31 );

    return 0;
}  // end hal_cpu_context_create()

/////////////////////////////////////////////////
void hal_cpu_context_display( thread_t * thread )
{
    hal_cpu_context_t * ctx = (hal_cpu_context_t *)thread->cpu_context;

    printk("\n***** cpu_context for thread %x in cluster %x / ctx = %x\n" 
           " gp_28   = %X    sp_29   = %X    ra_31   = %X\n" 
           " c0_sr   = %X    c0_epc  = %X    c0_th = %X\n"
           " c2_ptpr = %X    c2_mode = %X\n",
           thread->trdid, local_cxy, ctx,
           ctx->gp_28   , ctx->sp_29   , ctx->ra_31,
           ctx->c0_sr   , ctx->c0_epc  , ctx->c0_th,
           ctx->c2_ptpr , ctx->c2_mode );

}  // end hal_context_display()

/*
////////////////////////////////////////////////////////////////////////////////////////
// This static function makes the actual context switch.    
////////////////////////////////////////////////////////////////////////////////////////
static void hal_do_switch( hal_cpu_context_t * ctx_old,
                           hal_cpu_context_t * ctx_new )
{
    asm volatile(
    ".set noat                       \n"
    ".set noreorder                  \n"
    "move    $26,   %0               \n"

    "mfc0    $27,   $14              \n"
    "sw      $27,   0*4($26)         \n"
 
    "sw      $1,    1*4($26)         \n"
    "sw      $2,    2*4($26)         \n"
    "sw      $3,    3*4($26)         \n"
    "sw      $4,    4*4($26)         \n"
    "sw      $5,    5*4($26)         \n"
    "sw      $6,    6*4($26)         \n"
    "sw      $7,    7*4($26)         \n"

    "sw      $8,    8*4($26)         \n"
    "sw      $9,    9*4($26)         \n"
    "sw      $10,  10*4($26)         \n"
    "sw      $11,  11*4($26)         \n"
    "sw      $12,  12*4($26)         \n"
    "sw      $13,  13*4($26)         \n"
    "sw      $14,  14*4($26)         \n"
    "sw      $15,  15*4($26)         \n"

    "sw      $16,  16*4($26)         \n"
    "sw      $17,  17*4($26)         \n"
    "sw      $18,  18*4($26)         \n"
    "sw      $19,  19*4($26)         \n"
    "sw      $20,  20*4($26)         \n"
    "sw      $21,  21*4($26)         \n"
    "sw      $22,  22*4($26)         \n"
    "sw      $23,  23*4($26)         \n"

    "sw      $24,  24*4($26)         \n"
    "sw      $25,  25*4($26)         \n"

    "mfhi    $27                     \n"
    "sw      $27,  26*4($26)         \n"
    "mflo    $27                     \n"
    "sw      $27,  27*4($26)         \n"

    "sw      $28,  28*4($26)         \n"
    "sw      $29,  29*4($26)         \n"
    "sw      $30,  30*4($26)         \n"
    "sw      $31,  31*4($26)         \n"

	"mfc2    $27,  $0                \n"
	"sw      $27,  32*4($26)         \n"
	"mfc2    $27,  $1                \n"
	"sw      $27,  33*4($26)         \n"

    "mfc0	 $27,  $12               \n"
	"sw      $27,  34*4($26)         \n"
    "mfc0	 $27,  $4, 2             \n"
	"sw      $27,  35*4($26)         \n"

    "sync                            \n"

    "move    $26,   %1               \n"

    "lw      $27,   0*4($26)         \n"
    "mtc0    $27,   $14              \n"

    "lw      $1,    1*4($26)         \n"
    "lw      $2,    2*4($26)         \n"
    "lw      $3,    3*4($26)         \n"
    "lw      $4,    4*4($26)         \n"
    "lw      $5,    5*4($26)         \n"
    "lw      $6,    6*4($26)         \n"
    "lw      $7,    7*4($26)         \n"

    "lw      $8,    8*4($26)         \n"
    "lw      $9,    9*4($26)         \n"
    "lw      $10,  10*4($26)         \n"
    "lw      $11,  11*4($26)         \n"
    "lw      $12,  12*4($26)         \n"
    "lw      $13,  13*4($26)         \n"
    "lw      $14,  14*4($26)         \n"
    "lw      $15,  15*4($26)         \n"

	"lw      $16,  16*4($26)         \n"
	"lw      $17,  17*4($26)         \n"
    "lw      $18,  18*4($26)         \n"
    "lw      $19,  19*4($26)         \n"
    "lw      $20,  20*4($26)         \n"
    "lw      $21,  21*4($26)         \n"
    "lw      $22,  22*4($26)         \n"
    "lw      $23,  23*4($26)         \n"

    "lw      $24,  24*4($26)         \n"
    "lw      $25,  25*4($26)         \n"

    "lw      $27,  26*4($26)         \n"
    "mthi    $27                     \n"
    "lw      $27,  27*4($26)         \n"
    "mtlo    $27                     \n"

	"lw      $28,  28*4($26)         \n"
	"lw      $29,  29*4($26)         \n"
	"lw      $30,  30*4($26)         \n"
	"lw      $31,  31*4($26)         \n"

	"lw      $27,  32*4($26)         \n"
	"mtc2    $27,  $0                \n"
	"lw      $27,  33*4($26)         \n"
	"mtc2    $27,  $1                \n"

	"lw      $27,  34*4($26)         \n"
    "mtc0	 $27,  $12               \n"
	"lw      $27,  35*4($26)         \n"
    "mtc0	 $27,  $4, 2             \n"

    "jr      $31                     \n"

	".set reorder                    \n"
    ".set at                         \n"
    : : "r"(ctx_old) , "r"(ctx_new) : "$26" , "$27" , "memory" ); 

}  // hal_context_switch()

*/

/////////////////////////////////////////////////////////////////////////////////////////
// These registers are saved/restored to/from CPU context defined by <ctx> argument.
// - GPR : all, but (zero, k0, k1), plus (hi, lo)
// - CP0 : c0_th , c0_sr 
// - CP2 : c2_ptpr , C2_mode, C2_epc
/////////////////////////////////////////////////////////////////////////////////////////
void hal_cpu_context_switch( thread_t * old,
                             thread_t * new )
{
    hal_cpu_context_t * ctx_old = old->cpu_context;
    hal_cpu_context_t * ctx_new = new->cpu_context;

    #if CONFIG_CONTEXT_DEBUG
    hal_cpu_context_display( old );
    hal_cpu_context_display( new );
    #endif

    hal_do_switch( ctx_old , ctx_new );
}

/////////////////////////////////////////////
error_t hal_cpu_context_copy( thread_t * dst,
                              thread_t * src )
{
    kmem_req_t  req;

    // allocate memory for dst cpu_context
    req.type   = KMEM_CPU_CTX;
    req.size   = sizeof(hal_cpu_context_t);
    req.flags  = AF_KERNEL | AF_ZERO;

    hal_cpu_context_t * dst_context = (hal_cpu_context_t *)kmem_alloc( &req );
    if( dst_context == NULL ) return ENOMEM;

    // set cpu context pointer in dst thread
    dst->cpu_context = dst_context;

    // get cpu context pointer from src thread
    hal_cpu_context_t * src_context = src->cpu_context;

    // copy CPU context from src to dst
    memcpy( dst_context , src_context , sizeof(hal_cpu_context_t) );

    return 0;

}  // end hal_cpu_context_copy()

/////////////////////////////////////////////////
void hal_cpu_context_destroy( thread_t * thread )
{
    kmem_req_t  req;

    req.type = KMEM_CPU_CTX;
    req.ptr  = thread->cpu_context;
    kmem_free( &req );

}  // end hal_cpu_context_destroy()





///////////////////////////////////////////////////
error_t hal_fpu_context_create( thread_t * thread )
{
    kmem_req_t  req;

    // allocate memory for uzone
    req.type   = KMEM_FPU_CTX;
    req.size   = sizeof(hal_fpu_context_t);
    req.flags  = AF_KERNEL | AF_ZERO;

    hal_fpu_context_t * context = (hal_fpu_context_t *)kmem_alloc( &req );
    if( context == NULL ) return ENOMEM;

    // set fpu context pointer in thread
    thread->fpu_context = (void*)context;
    
    return 0;
}  // hal_fpu_context_create()

/////////////////////////////////////////////
error_t hal_fpu_context_copy( thread_t * dst,
                              thread_t * src )
{
    kmem_req_t  req;

    // allocate memory for dst fpu_context
    req.type   = KMEM_FPU_CTX;
    req.size   = sizeof(hal_fpu_context_t);
    req.flags  = AF_KERNEL | AF_ZERO;

    hal_fpu_context_t * dst_context = (hal_fpu_context_t *)kmem_alloc( &req );
    if( dst_context == NULL ) return ENOMEM;

    // set fpu context pointer in dst thread
    dst->fpu_context = (void*)dst_context;

    // get fpu context pointer from src thread
    hal_fpu_context_t * src_context = src->fpu_context;

    // copy CPU context from src to dst
    memcpy( dst_context , src_context , sizeof(hal_fpu_context_t) );

    return 0;
}  // end hal_fpu_context_copy()

/////////////////////////////////////////////////
void hal_fpu_context_destroy( thread_t * thread )
{
    kmem_req_t  req;

    req.type = KMEM_FPU_CTX;
    req.ptr  = thread->fpu_context;
    kmem_free( &req );

}  // end hal_fpu_context_destroy()

/////////////////////////////////////////////////////////////////////////////////////////
// These registers are initialised:
// - GPR : sp_29 , fp_30 , a0
// - CP0 : c0_sr , c0_epc , c0_th
// - CP2 : C2_ptpr , c2_mode
// TODO Quand cette fonction est-elle appelée? [AG]
/////////////////////////////////////////////////////////////////////////////////////////
void hal_cpu_context_load( thread_t * thread )
{
    // get relevant values from thread context
    hal_cpu_context_t * ctx     = (hal_cpu_context_t *)thread->cpu_context;     
    uint32_t            sp_29   = ctx->sp_29;
    uint32_t            fp_30   = ctx->fp_30;
    uint32_t            c0_th   = ctx->c0_th;
    uint32_t            c0_sr   = ctx->c0_sr;
    uint32_t            c2_ptpr = ctx->c2_ptpr;
    uint32_t            c2_mode = ctx->c2_mode;
 
    // get pointer on entry function & argument from thread attributes
    uint32_t            func    = (uint32_t)thread->entry_func;
    uint32_t            args    = (uint32_t)thread->entry_args;

    // reset loadable field in thread descriptor 
    thread->flags &= ~THREAD_FLAG_LOADABLE;

    // load registers
    asm volatile(
    ".set noreorder                \n"
	"or       $26,    %0,    $0    \n"   /* $26 <= stack pointer                */
	"or       $27,    %2,    $0    \n"   /* $27 <= status register              */
	"addiu    $26,    $26,  -4     \n"   /* decrement stack pointer             */
	"or       $4,     %7,   $0     \n"   /* load a0                             */
	"sw       $4,     ($26)        \n"   /* set entry_args in stack             */
	"ori      $27,    $27,  0x2    \n"   /* set EXL flag in status register     */
	"mtc0     $27,    $12          \n"   /* load c0_sr                          */
	"mtc0     %3,     $4,    2     \n"   /* load c0_th                          */
	"mtc2     %4,     $0           \n"   /* load c2 ptpr                        */
	"mtc0     %6,     $14          \n"   /* load c0_epc                         */
	"or	      $29,	  $16,  $0     \n"   /* load sp_29                          */
	"or	      $30,	  %1,   $0     \n"   /* load fp_30                          */
    "mtc2     %5,     $1           \n"   /* load c2_mode                        */
    "nop                           \n"
    "eret                          \n"   /* jump to user code                   */
    "nop                           \n"
    ".set reorder                  \n"
    : 
    : "r"(sp_29),"r"(fp_30),"r"(c0_sr),"r"(c0_th),
      "r"(c2_ptpr),"r"(c2_mode),"r"(func),"r"(args)
    : "$4","$26","$27","$29","$30" );

}  // end hal_cpu_context_load()


//////////////////////////////////////////////
void hal_fpu_context_save( thread_t * thread )
{
    uint32_t ctx = (uint32_t)thread->fpu_context;

    asm volatile(
    ".set noreorder           \n"
    "swc1    $f0,    0*4(%0)  \n"   
    "swc1    $f1,    1*4(%0)  \n"   
    "swc1    $f2,    2*4(%0)  \n"   
    "swc1    $f3,    3*4(%0)  \n"   
    "swc1    $f4,    4*4(%0)  \n"   
    "swc1    $f5,    5*4(%0)  \n"   
    "swc1    $f6,    6*4(%0)  \n"   
    "swc1    $f7,    7*4(%0)  \n"   
    "swc1    $f8,    8*4(%0)  \n"   
    "swc1    $f9,    9*4(%0)  \n"   
    "swc1    $f10,  10*4(%0)  \n"   
    "swc1    $f11,  11*4(%0)  \n"   
    "swc1    $f12,  12*4(%0)  \n"   
    "swc1    $f13,  13*4(%0)  \n"   
    "swc1    $f14,  14*4(%0)  \n"   
    "swc1    $f15,  15*4(%0)  \n"   
    "swc1    $f16,  16*4(%0)  \n"   
    "swc1    $f17,  17*4(%0)  \n"   
    "swc1    $f18,  18*4(%0)  \n"   
    "swc1    $f19,  19*4(%0)  \n"   
    "swc1    $f20,  20*4(%0)  \n"   
    "swc1    $f21,  21*4(%0)  \n"   
    "swc1    $f22,  22*4(%0)  \n"   
    "swc1    $f23,  23*4(%0)  \n"   
    "swc1    $f24,  24*4(%0)  \n"   
    "swc1    $f25,  25*4(%0)  \n"   
    "swc1    $f26,  26*4(%0)  \n"   
    "swc1    $f27,  27*4(%0)  \n"   
    "swc1    $f28,  28*4(%0)  \n"   
    "swc1    $f29,  29*4(%0)  \n"   
    "swc1    $f30,  30*4(%0)  \n"   
    "swc1    $f31,  31*4(%0)  \n"   
    ".set reorder             \n"
    : : "r"(ctx) );

}  // end hal_cpu_context_save()

/////////////////////////////////////////////////
void hal_fpu_context_restore( thread_t * thread )
{
    uint32_t ctx = (uint32_t)thread->fpu_context;

    asm volatile(
    ".set noreorder           \n"
    "lwc1    $f0,    0*4(%0)  \n"   
    "lwc1    $f1,    1*4(%0)  \n"   
    "lwc1    $f2,    2*4(%0)  \n"   
    "lwc1    $f3,    3*4(%0)  \n"   
    "lwc1    $f4,    4*4(%0)  \n"   
    "lwc1    $f5,    5*4(%0)  \n"   
    "lwc1    $f6,    6*4(%0)  \n"   
    "lwc1    $f7,    7*4(%0)  \n"   
    "lwc1    $f8,    8*4(%0)  \n"   
    "lwc1    $f9,    9*4(%0)  \n"   
    "lwc1    $f10,  10*4(%0)  \n"   
    "lwc1    $f11,  11*4(%0)  \n"   
    "lwc1    $f12,  12*4(%0)  \n"   
    "lwc1    $f13,  13*4(%0)  \n"   
    "lwc1    $f14,  14*4(%0)  \n"   
    "lwc1    $f15,  15*4(%0)  \n"   
    "lwc1    $f16,  16*4(%0)  \n"   
    "lwc1    $f17,  17*4(%0)  \n"   
    "lwc1    $f18,  18*4(%0)  \n"   
    "lwc1    $f19,  19*4(%0)  \n"   
    "lwc1    $f20,  20*4(%0)  \n"   
    "lwc1    $f21,  21*4(%0)  \n"   
    "lwc1    $f22,  22*4(%0)  \n"   
    "lwc1    $f23,  23*4(%0)  \n"   
    "lwc1    $f24,  24*4(%0)  \n"   
    "lwc1    $f25,  25*4(%0)  \n"   
    "lwc1    $f26,  26*4(%0)  \n"   
    "lwc1    $f27,  27*4(%0)  \n"   
    "lwc1    $f28,  28*4(%0)  \n"   
    "lwc1    $f29,  29*4(%0)  \n"   
    "lwc1    $f30,  30*4(%0)  \n"   
    "lwc1    $f31,  31*4(%0)  \n"   
    ".set reorder             \n"
    : : "r"(ctx) );

} // end hal_cpu_context_restore()

/////////////////////////////////////
void hal_fpu_context_dup( xptr_t dst,
                          xptr_t src )
{
	hal_remote_memcpy( dst , src , sizeof(hal_fpu_context_t) );
}

