/*
 * hal_context.c - implementation of Thread Context API for TSAR-MIPS32
 * 
 * Author  Alain Greiner    (2016)
 *
 * Copyright (c)  UPMC Sorbonne Universites
 * 
 * This file is part of ALMOS-MKH.
 *
 * ALMOS-MKH.is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2.0 of the License.
 *
 * ALMOS-MKH.is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ALMOS-MKH.; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <hal_types.h>
#include <memcpy.h>
#include <thread.h>
#include <string.h>
#include <process.h>
#include <vmm.h>
#include <core.h>
#include <cluster.h>

#include <hal_context.h>

//////////////////////////////////////////////////////////
error_t hal_cpu_context_create( struct thread_s * thread )
{
    kmem_req_t  req;

    // allocate memory for cpu_context
    req.type   = KMEM_GENERIC;
    req.size   = sizeof(hal_cpu_context_t);
    req.flags  = AF_KERNEL | AF_ZERO;

    hal_cpu_context_t * context = (hal_cpu_context_t *)kmem_alloc( &req );
    if( context == NULL ) return ENOMEM;

    // set cpu context pointer in thread
    thread->cpu_context = (void*)context;

    // stack pointer, status register and mmu_mode depends on thread type
	uint32_t sp_29;
    uint32_t c0_sr;
    uint32_t c2_mode;
    if( thread->type == THREAD_USER )
    {
        sp_29   = ((uint32_t)thread->u_stack_base) + thread->u_stack_size;
        c0_sr   = SR_USR_MODE;
        c2_mode = 0xF;
    }
    else
    {
        sp_29   = ((uint32_t)thread->k_stack_base) + thread->k_stack_size;
        c0_sr   = SR_SYS_MODE;
        c2_mode = 0x3;
    }

    // align stack pointer on a double word boundary 
	sp_29 = (sp_29 - 8) & (~ 0x7);

    // initialise context
	context->sp_29      = sp_29; 
	context->fp_30      = sp_29;                          // TODO check this [AG]
    context->ra_31      = (uint32_t)thread->entry_func;
	context->c0_sr      = c0_sr;
	context->c0_th      = (uint32_t)thread; 
	context->c2_ptpr    = (uint32_t)((thread->process->vmm.gpt.ppn) >> 1);
	context->c2_mode    = c2_mode;

    return 0;
}  // end hal_cpu_context_create()

/////////////////////////////////////////////
error_t hal_cpu_context_copy( thread_t * dst,
                              thread_t * src )
{
    kmem_req_t  req;

    // allocate memory for dst cpu_context
    req.type   = KMEM_GENERIC;
    req.size   = sizeof(hal_cpu_context_t);
    req.flags  = AF_KERNEL | AF_ZERO;

    hal_cpu_context_t * dst_context = (hal_cpu_context_t *)kmem_alloc( &req );
    if( dst_context == NULL ) return ENOMEM;

    // set cpu context pointer in dst thread
    dst->cpu_context = dst_context;

    // get cpu context pointer from src thread
    hal_cpu_context_t * src_context = src->cpu_context;

    // copy CPU context from src to dst
    memcpy( dst_context , src_context , sizeof(hal_cpu_context_t) );

    return 0;
}  // end hal_cpu_context_copy()

/////////////////////////////////////////////////
void hal_cpu_context_destroy( thread_t * thread )
{
    kmem_req_t  req;

    req.type = KMEM_GENERIC;
    req.ptr  = thread->cpu_context;
    kmem_free( &req );

}  // end hal_cpu_context_destroy()

///////////////////////////////////////////////////
error_t hal_fpu_context_create( thread_t * thread )
{
    kmem_req_t  req;

    // allocate memory for uzone
    req.type   = KMEM_GENERIC;
    req.size   = sizeof(hal_fpu_context_t);
    req.flags  = AF_KERNEL | AF_ZERO;

    hal_fpu_context_t * context = (hal_fpu_context_t *)kmem_alloc( &req );
    if( context == NULL ) return ENOMEM;

    // set fpu context pointer in thread
    thread->fpu_context = (void*)context;
    
    return 0;
}  // hal_fpu_context_create()

/////////////////////////////////////////////
error_t hal_fpu_context_copy( thread_t * dst,
                              thread_t * src )
{
    kmem_req_t  req;

    // allocate memory for dst fpu_context
    req.type   = KMEM_GENERIC;
    req.size   = sizeof(hal_fpu_context_t);
    req.flags  = AF_KERNEL | AF_ZERO;

    hal_fpu_context_t * dst_context = (hal_fpu_context_t *)kmem_alloc( &req );
    if( dst_context == NULL ) return ENOMEM;

    // set fpu context pointer in dst thread
    dst->fpu_context = (void*)dst_context;

    // get fpu context pointer from src thread
    hal_fpu_context_t * src_context = src->fpu_context;

    // copy CPU context from src to dst
    memcpy( dst_context , src_context , sizeof(hal_fpu_context_t) );

    return 0;
}  // end hal_fpu_context_copy()

/////////////////////////////////////////////////
void hal_fpu_context_destroy( thread_t * thread )
{
    kmem_req_t  req;

    req.type = KMEM_GENERIC;
    req.ptr  = thread->fpu_context;
    kmem_free( &req );

}  // end hal_fpu_context_destroy()

//////////////////////////////////////////////      
void hal_cpu_context_save( thread_t * thread )
{
    uint32_t ctx = (uint32_t)thread->cpu_context;

    asm volatile(
    ".set noreorder                \n" 
    "sw      $16,   0*4(%0)        \n"   /* save s0 to slot 0                   */
    "sw      $17,   1*4(%0)        \n"   /* save s1 to slot 1                   */
    "sw      $18,   2*4(%0)        \n"   /* save s2 to slot 2                   */
    "sw      $19,   3*4(%0)        \n"   /* save s3 to slot 3                   */
    "sw      $20,   4*4(%0)        \n"   /* save s4 to slot 4                   */
    "sw      $21,   5*4(%0)        \n"   /* save s5 to slot 5                   */
    "sw      $22,   6*4(%0)        \n"   /* save s6 to slot 6                   */
    "sw      $23,   7*4(%0)        \n"   /* save s7 to slot 7                   */
    "sw      $29,   8*4(%0)        \n"   /* save sp to slot 8                   */
    "sw      $30,   9*4(%0)        \n"   /* save fp to slot 9                   */
    "sw      $31,   10*4(%0)       \n"   /* save ra to slot 10                  */
    "mfc0	 $26,   $4,	  2        \n"   /* get c0_th from CP0                  */
	"sw      $26,   12*4(%0)       \n"   /* save c0_th to slot 12               */
	"mfc2    $26,   $0             \n"   /* get c2_ptpr from CP2                */
	"sw      $26,   13*4(%0)       \n"   /* save c2_ptpr to slot 13             */
	"mfc2    $26,   $1             \n"   /* get c2_mod from CP2                 */
	"sw      $26,   14*4(%0)       \n"   /* save c2_mode to slot 14             */
    "sync                          \n"
	".set reorder                  \n"
    : : "r"( ctx ) : "$26" , "memory" ); 
}

/////////////////////////////////////////////////      
void hal_cpu_context_restore( thread_t * thread )
{
    uint32_t ctx = (uint32_t)thread->cpu_context;

    asm volatile(
    ".set noreorder                \n"
    "nop                           \n"
	"lw      $16,  0*4(%0)         \n"   /* restore s0_16                       */
	"lw      $17,  1*4(%0)         \n"   /* restore s1_17                       */
    "lw      $18,  2*4(%0)         \n"   /* restore s2_18                       */
    "lw      $19,  3*4(%0)         \n"   /* restore s3_19                       */
    "lw      $20,  4*4(%0)         \n"   /* restore s4_20                       */
    "lw      $21,  5*4(%0)         \n"   /* restore s5_21                       */
    "lw      $22,  6*4(%0)         \n"   /* restore s6_22                       */
    "lw      $23,  7*4(%0)         \n"   /* restore s7_23                       */
	"lw      $29,  8*4(%0)         \n"   /* restore sp_29                       */	
	"lw      $30,  9*4(%0)         \n"   /* restore fp_30                       */
	"lw  	 $31,  10*4(%0)        \n"   /* restore ra_31                       */
    "lw      $26,  12*4(%0)        \n"   /* get c0_th from slot 12              */
	"mtc0    $26,  $4,    2        \n"   /* restore c0_th                       */
    "lw      $26,  13*4(%0)        \n"   /* get c2_ptpr from slot 13            */
	"mtc2    $26,  $0              \n"   /* restore c2_ptpr                     */
    "lw      $26,  14*4(%0)        \n"   /* get c2_mode from slot 14            */
	"mtc2    $26,  $1              \n"   /* restore c2_mode                     */
    ".set reorder                  \n"
    : : "r"(ctx)
      : "$16","$17","$18","$19","$20","$21","$22","$23","$26","$29","$30","$31" ); 
}

//////////////////////////////////////////////
void hal_cpu_context_load( thread_t * thread )
{
    // get relevant values from thread context
    hal_cpu_context_t * ctx     = (hal_cpu_context_t *)thread->cpu_context;     
    uint32_t            sp_29   = ctx->sp_29;
    uint32_t            fp_30   = ctx->fp_30;
    uint32_t            c0_th   = ctx->c0_th;
    uint32_t            c0_sr   = ctx->c0_sr;
    uint32_t            c2_ptpr = ctx->c2_ptpr;
    uint32_t            c2_mode = ctx->c2_mode;
 
    // get pointer on entry function & argument from thread attributes
    uint32_t            func    = (uint32_t)thread->entry_func;
    uint32_t            args    = (uint32_t)thread->entry_args;

    // reset loadable field in thread descriptor 
    thread->flags &= ~THREAD_FLAG_LOADABLE;

    // load registers
    asm volatile(
    ".set noreorder                \n"
	"or       $26,    %0,    $0    \n"   /* $26 <= stack pointer                */
	"or       $27,    %2,    $0    \n"   /* $27 <= status register              */
	"addiu    $26,    $26,  -4     \n"   /* decrement stack pointer             */
	"or       $4,     %7,   $0     \n"   /* load a0                             */
	"sw       $4,     ($26)        \n"   /* set entry_args in stack             */
	"ori      $27,    $27,  0x2    \n"   /* set EXL flag in status register     */
	"mtc0     $27,    $12          \n"   /* load c0_sr                          */
	"mtc0     %3,     $4,    2     \n"   /* load c0_th                          */
	"mtc2     %4,     $0           \n"   /* load c2 ptpr                        */
	"mtc0     %6,     $14          \n"   /* load c0_epc                         */
	"or	      $29,	  $16,  $0     \n"   /* load sp_29                          */
	"or	      $30,	  %1,   $0     \n"   /* load fp_30                          */
    "mtc2     %5,     $1           \n"   /* load c2_mode                        */
    "nop                           \n"
    "eret                          \n"   /* jump to user code                   */
    "nop                           \n"
    ".set reorder                  \n"
    : 
    : "r"(sp_29),"r"(fp_30),"r"(c0_sr),"r"(c0_th),
      "r"(c2_ptpr),"r"(c2_mode),"r"(func),"r"(args)
    : "$4","$26","$27","$29","$30" );

}  // end hal_cpu_context_load()


//////////////////////////////////////////////
void hal_fpu_context_save( thread_t * thread )
{
    uint32_t ctx = (uint32_t)thread->fpu_context;

    asm volatile(
    ".set noreorder           \n"
    "swc1    $f0,    0*4(%0)  \n"   
    "swc1    $f1,    1*4(%0)  \n"   
    "swc1    $f2,    2*4(%0)  \n"   
    "swc1    $f3,    3*4(%0)  \n"   
    "swc1    $f4,    4*4(%0)  \n"   
    "swc1    $f5,    5*4(%0)  \n"   
    "swc1    $f6,    6*4(%0)  \n"   
    "swc1    $f7,    7*4(%0)  \n"   
    "swc1    $f8,    8*4(%0)  \n"   
    "swc1    $f9,    9*4(%0)  \n"   
    "swc1    $f10,  10*4(%0)  \n"   
    "swc1    $f11,  11*4(%0)  \n"   
    "swc1    $f12,  12*4(%0)  \n"   
    "swc1    $f13,  13*4(%0)  \n"   
    "swc1    $f14,  14*4(%0)  \n"   
    "swc1    $f15,  15*4(%0)  \n"   
    "swc1    $f16,  16*4(%0)  \n"   
    "swc1    $f17,  17*4(%0)  \n"   
    "swc1    $f18,  18*4(%0)  \n"   
    "swc1    $f19,  19*4(%0)  \n"   
    "swc1    $f20,  20*4(%0)  \n"   
    "swc1    $f21,  21*4(%0)  \n"   
    "swc1    $f22,  22*4(%0)  \n"   
    "swc1    $f23,  23*4(%0)  \n"   
    "swc1    $f24,  24*4(%0)  \n"   
    "swc1    $f25,  25*4(%0)  \n"   
    "swc1    $f26,  26*4(%0)  \n"   
    "swc1    $f27,  27*4(%0)  \n"   
    "swc1    $f28,  28*4(%0)  \n"   
    "swc1    $f29,  29*4(%0)  \n"   
    "swc1    $f30,  30*4(%0)  \n"   
    "swc1    $f31,  31*4(%0)  \n"   
    ".set reorder             \n"
    : : "r"(ctx) );

}  // end hal_cpu_context_save()

/////////////////////////////////////////////////
void hal_fpu_context_restore( thread_t * thread )
{
    uint32_t ctx = (uint32_t)thread->fpu_context;

    asm volatile(
    ".set noreorder           \n"
    "lwc1    $f0,    0*4(%0)  \n"   
    "lwc1    $f1,    1*4(%0)  \n"   
    "lwc1    $f2,    2*4(%0)  \n"   
    "lwc1    $f3,    3*4(%0)  \n"   
    "lwc1    $f4,    4*4(%0)  \n"   
    "lwc1    $f5,    5*4(%0)  \n"   
    "lwc1    $f6,    6*4(%0)  \n"   
    "lwc1    $f7,    7*4(%0)  \n"   
    "lwc1    $f8,    8*4(%0)  \n"   
    "lwc1    $f9,    9*4(%0)  \n"   
    "lwc1    $f10,  10*4(%0)  \n"   
    "lwc1    $f11,  11*4(%0)  \n"   
    "lwc1    $f12,  12*4(%0)  \n"   
    "lwc1    $f13,  13*4(%0)  \n"   
    "lwc1    $f14,  14*4(%0)  \n"   
    "lwc1    $f15,  15*4(%0)  \n"   
    "lwc1    $f16,  16*4(%0)  \n"   
    "lwc1    $f17,  17*4(%0)  \n"   
    "lwc1    $f18,  18*4(%0)  \n"   
    "lwc1    $f19,  19*4(%0)  \n"   
    "lwc1    $f20,  20*4(%0)  \n"   
    "lwc1    $f21,  21*4(%0)  \n"   
    "lwc1    $f22,  22*4(%0)  \n"   
    "lwc1    $f23,  23*4(%0)  \n"   
    "lwc1    $f24,  24*4(%0)  \n"   
    "lwc1    $f25,  25*4(%0)  \n"   
    "lwc1    $f26,  26*4(%0)  \n"   
    "lwc1    $f27,  27*4(%0)  \n"   
    "lwc1    $f28,  28*4(%0)  \n"   
    "lwc1    $f29,  29*4(%0)  \n"   
    "lwc1    $f30,  30*4(%0)  \n"   
    "lwc1    $f31,  31*4(%0)  \n"   
    ".set reorder             \n"
    : : "r"(ctx) );

} // end hal_cpu_context_restore()

/////////////////////////////////////
void hal_fpu_context_dup( xptr_t dst,
                          xptr_t src )
{
	hal_remote_memcpy( dst , src , sizeof(hal_fpu_context_t) );
}

