/*
 * hal_uspace.c - implementation of Generic User Space Access API for MIPS32
 * 
 * Author        Alain Greiner   (2016,2017,2018,2019,2020)
 *
 * Copyright (c) UPMC Sorbonne Universites
 * 
 * This file is part of ALMOS-MKH..
 *
 * ALMOS-MKH. is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2.0 of the License.
 *
 * ALMOS-MKH. is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ALMOS-MKH.; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <hal_kernel_types.h>
#include <hal_uspace.h>
#include <hal_special.h>
#include <hal_irqmask.h>

#include <printk.h>
#include <thread.h>

///////////////////////////////////////////////////////////////////////////////////////
// This function moves <size> bytes from a source buffer in user virtual space,
// defined by the <u_src_ptr> argument, to a destination kernel buffer, defined by the
// <k_dst_xp> argument.
// It works in a critical section, as it modifies two CP2 registers:
// It activates briefly the DATA_MMU by writing into the CP2_MODE register to access the
// user buffer, and modifies the CP2_DEXT register to access the kernel buffer.
// If the two buffers are aligned on a word boundary, it moves the data word per word
// in a first loop, and moves byte per byte the remaining bytes in a second loop.
// If the buffers are not aligned, it moves all data byte per byte.
///////////////////////////////////////////////////////////////////////////////////////
// @ k_dst_xp  : extended pointer on destination kernel buffer
// @ u_src_ptr : pointer on source user buffer
// @ size     : number of bytes to move
///////////////////////////////////////////////////////////////////////////////////////
void hal_copy_from_uspace( xptr_t     k_dst_xp,
                           void     * u_src_ptr,
                           uint32_t   size )  
{
    uint32_t save_sr;
	uint32_t words;                            // number of words (if buffers aligned)
    uint32_t src = (uint32_t)u_src_ptr;
    uint32_t dst = (uint32_t)GET_PTR( k_dst_xp );
    uint32_t cxy = (uint32_t)GET_CXY( k_dst_xp );
 
#if DEBUG_HAL_USPACE
thread_t * this  = CURRENT_THREAD;
uint32_t   cycle = (uint32_t)hal_get_cycles();
if( cycle > DEBUG_HAL_USPACE )
printk("\n[%s] thread[%x,%x] enter / %d bytes / u_buf(%x,%x) -> k_buf(%x,%x) / cycle %d\n", 
__FUNCTION__, this->process->pid, this->trdid, size, local_cxy, src, cxy, dst, cycle );
#endif

	if( (dst & 0x3) || (src & 0x3) ) words = 0;          // do it all in bytes
    else                             words = size >> 2;

    // enter critical section
    hal_disable_irq( &save_sr );

    asm volatile( ".set noreorder             \n"
 
                  /* initialise registers                                         */
                  "move   $8,    %0           \n"   /* $8 <= src                  */
                  "move   $9,    %1           \n"   /* $9 <= dst                  */
                  "move   $10,   %2           \n"   /* $10 <= words               */
                  "move   $11,   %3           \n"   /* $11 <= size                */
                  "mfc2   $12,   $1           \n"   /* $12 <= old MMU_MODE        */
                  "ori    $13,   $12,   0x4	  \n"   /* $13 <= MMU_MODE with DTLB  */

                  /* save MMU_DEXT register                                       */
                  "mfc2   $16,   $24          \n"   /* $16 <= old MMU_DEXT        */
                  "mtc2   %4,    $24          \n"   /* MMU_DEXT <= dst_cxy        */

                  /* transfer one word per iteration in first loop if aligned     */
                  "move   $15,   $10          \n"   /* $15 <= words ($15 == i)    */
                  "1:                         \n"
                  "beq    $15,   $0,    2f    \n"   /* exit loop if (i==0)        */
                  "nop                        \n"
                  "mtc2   $13,   $1			  \n"   /* MMU_MODE <= DTLB ON        */
                  "lw	  $14,   0($8)        \n"   /* word from user space       */
                  "mtc2   $12,   $1			  \n"   /* restore old MMU_MODE       */
	              "sw     $14,   0($9)        \n"   /* word to kernel space       */
                  "addi   $15,   $15,   -1    \n"   /* i--                        */
                  "addi   $8,    $8,    4     \n"   /* src += 4 bytes             */
                  "j             1b           \n"   
                  "addi   $9,    $9,    4     \n"   /* dst += 4 bytes             */

                  /* transfer one byte per iteration in this second loop          */
                  "2:                         \n"
                  "sll    $15,   $10,   2     \n"   /* $15 <= words*4 ($15 == i)  */
                  "3:                         \n"
                  "beq    $15,   $11,   4f    \n"   /* exit loop if (i == size)   */
                  "nop                        \n"
                  "mtc2   $13,   $1			  \n"   /* MMU_MODE <= DTLB ON        */
                  "lb	  $14,   0($8)        \n"   /* byte from user space       */
                  "mtc2   $12,   $1			  \n"   /* restore omd MMU_MODE       */
	              "sb     $14,   0($9)        \n"   /* byte to kernel space       */
                  "addi   $15,   $15,   1     \n"   /* i++                        */
                  "addi   $8,    $8,    1     \n"   /* src += 1 byte              */
                  "j             3b           \n"   
                  "addi   $9,    $9,    1     \n"   /* dst += 1 byte              */

                  /* restore MMU_DEXT register                                    */
                  "4:                         \n"
                  "mtc2   $16,   $24          \n"   /* MMU_DEXT <= $16            */
                  ".set reorder               \n"
                  : 
                  : "r"(src) , "r"(dst) , "r"(words) , "r"(size) , "r"(cxy)
                  : "$8","$9","$10","$11","$12","$13","$14","$15","$16","memory" );

    // exit critical section
    hal_restore_irq( save_sr );

#if DEBUG_HAL_USPACE
cycle = (uint32_t)hal_get_cycles();
if( cycle > DEBUG_HAL_USPACE )
printk("\n[%s] thread[%x,%x] moved %d bytes / u_buf(%x,%x) -> k_buf(%x,%x) / cycle %d\n", 
__FUNCTION__, this->process->pid, this->trdid, size, local_cxy, src, cxy, dst, cycle );
#endif

}  // end hal_copy_from_uspace()

///////////////////////////////////////////////////////////////////////////////////////
// This function moves <size> bytes from a source kernel buffer, defined by the
// <k_src_xp> argument, to a destination buffer in user virtual space, defined by 
// the <u_dst_ptr> argument.
// It works in a critical section, as it modifies two CP2 registers:
// It activates briefly the DATA_MMU by writing into the CP2_MODE register to access the
// user buffer, and modifies the CP2_DEXT register to access the kernel buffer.
// If the two buffers are aligned on a word boundary, it moves the data word per word
// in a first loop, and moves byte per byte the remaining bytes in a second loop.
// If the buffers are not word aligned, it moves all data byte per byte.
///////////////////////////////////////////////////////////////////////////////////////
// @ u_dst_ptr : pointer on destination user buffer
// @ k_src_xp  : extended pointer on source kernel buffer
// @ size      : number of bytes to move
///////////////////////////////////////////////////////////////////////////////////////
void hal_copy_to_uspace( void     * u_dst_ptr,
                         xptr_t     k_src_xp,
                         uint32_t   size )
{
    uint32_t save_sr;
	uint32_t words;                           // number of words (if buffers aligned)
    uint32_t dst = (uint32_t)u_dst_ptr;
    uint32_t src = (uint32_t)GET_PTR( k_src_xp );
    uint32_t cxy = (uint32_t)GET_CXY( k_src_xp );

#if DEBUG_HAL_USPACE
thread_t * this  = CURRENT_THREAD;
uint32_t   cycle = (uint32_t)hal_get_cycles();
if( cycle > DEBUG_HAL_USPACE )
printk("\n[%s] thread[%x,%x] enter / %d bytes / k_buf(%x,%x) -> u_buf(%x,%x) / cycle %d\n", 
__FUNCTION__, this->process->pid, this->trdid, size, cxy, src, local_cxy, dst, cycle );
#endif

	if( (dst & 0x3) || (src & 0x3) ) words = 0;          // not aligned
    else                             words = size >> 2;

    // enter critical section
    hal_disable_irq( &save_sr );

    asm volatile( ".set noreorder             \n"
 
                  /* initialise registers                                         */
                  "move   $8,    %0           \n"   /* $8 <= k_src                */
                  "move   $9,    %1           \n"   /* $9 <= u_dst                */
                  "move   $10,   %2           \n"   /* $10 <= words               */
                  "move   $11,   %3           \n"   /* $11 <= size                */
                  "mfc2   $12,   $1           \n"   /* $12 <= old MMU_MODE        */
                  "ori    $13,   $12,   0x4	  \n"   /* $13 <= MMU_MODE with DTLB  */

                  /* save MMU_DEXT register                                       */
                  "mfc2   $16,   $24          \n"   /* $16 <= old MMU_DEXT        */
                  "mtc2   %4,    $24          \n"   /* MMU_DEXT <= cxy            */

                  /* transfer one word per iteration in first loop if aligned     */
                  "move   $15,   $10          \n"   /* $15 <= words ($15 == i)    */
                  "1:                         \n"
                  "beq    $15,   $0,    2f    \n"   /* exit loop if (i==0)        */
                  "nop                        \n"
                  "lw	  $14,   0($8)        \n"   /* load from kernel space     */
                  "mtc2   $13,   $1			  \n"   /* MMU_MODE <= DTLB ON        */
	              "sw     $14,   0($9)        \n"   /* store to user space        */
                  "mtc2   $12,   $1			  \n"   /* restore old MMU_MODE       */
                  "addi   $15,   $15,   -1    \n"   /* i--                        */
                  "addi   $8,    $8,    4     \n"   /* src += 4 bytes             */
                  "j             1b           \n"   
                  "addi   $9,    $9,    4     \n"   /* dst += 4 bytes             */

                  /* transfer one byte per iteration in this second loop          */
                  "2:                         \n"
                  "sll    $15,   $10,   2     \n"   /* $15 <= words*4 ($15 == i)  */
                  "3:                         \n"
                  "beq    $15,   $11,   4f    \n"   /* exit loop if (i == size)   */
                  "nop                        \n"
                  "lb	  $14,   0($8)        \n"   /* byte from kernel space     */
                  "mtc2   $13,   $1			  \n"   /* MMU_MODE <= DTLB ON        */
	              "sb     $14,   0($9)        \n"   /* byte to user space         */
                  "mtc2   $12,   $1			  \n"   /* restore omd MMU_MODE       */
                  "addi   $15,   $15,   1     \n"   /* i++                        */
                  "addi   $8,    $8,    1     \n"   /* src += 1 byte              */
                  "j             3b           \n"   
                  "addi   $9,    $9,    1     \n"   /* dst += 1 byte              */

                  /* restore MMU_DEXT register                                    */
                  "4:                         \n"
                  "mtc2   $16,   $24          \n"   /* MMU_DEXT <= $16            */
                  ".set reorder               \n"
                  : 
                  : "r"(src) , "r"(dst) , "r"(words) , "r"(size) , "r"(cxy)
                  : "$8","$9","$10","$11","$12","$13","$14","$15","$16","memory" );

    // exit critical section
    hal_restore_irq( save_sr );

#if DEBUG_HAL_USPACE
cycle = (uint32_t)hal_get_cycles();
if( cycle > DEBUG_HAL_USPACE )
printk("\n[%s] thread[%x,%x] moved %d bytes / k_buf(%x,%x) -> u_buf(%x,%x) / cycle %d\n", 
__FUNCTION__, this->process->pid, this->trdid, size, cxy, src, local_cxy, dst, cycle );
#endif

}  // end hal_copy_to_uspace()

/////////////////////////////////////////////////
void hal_strcpy_from_uspace( xptr_t     k_dst_xp,
                             char     * u_src_ptr,
                             uint32_t   size )
{
    uint32_t save_sr;
    uint32_t src = (uint32_t)u_src_ptr;
    uint32_t dst = (uint32_t)GET_PTR( k_dst_xp );
    uint32_t cxy = (uint32_t)GET_CXY( k_dst_xp );

    hal_disable_irq( &save_sr );

    // loop on characters while ( (character != NUL) and (count < size ) )

    asm volatile(
        ".set noreorder             \n"

        /* save old MMU_DEXT and set cxy in it                              */
        "mfc2   $16,   $24          \n"   /* $16 <= old MMU_DEXT            */
        "mtc2   %3,    $24          \n"   /* MMU_DEXT <= cxy                */

        "move   $11,   %0           \n"   /* $11 <= count == size           */
        "move   $12,   %1           \n"   /* $12 <= u_src                   */
        "move   $13,   %2           \n"   /* $13 <= k_dst                   */
        "mfc2   $15,   $1           \n"   /* $15 <= MMU_MODE                */
        "ori    $14,   $15,  0x4    \n"   /* $14 <= MMU_MODE / DTLB ON      */

        "1:                         \n"
        "mtc2   $14,   $1			\n"   /* MMU_MODE <= DTLB ON            */
        "lb     $10,   0($12)       \n"   /* read char from user space      */
        "mtc2   $15,   $1			\n"   /* MMU_MODE <= DTLB OFF           */
	    "sb	    $10,   0($13)       \n"   /* store char to kernel space     */
        "beq    $10,   $0,   2f     \n"   /* exit if char = 0               */
        "addi   $11,   $11, -1      \n"   /* decrement count                */
        "addi   $12,   $12,  1      \n"   /* increment u_src pointer        */
        "beq    $11,   $0,   2f     \n"   /* exit if count == 0             */
        "addi   $13,   $13,  1      \n"   /* increment k_src pointer        */
        "j                   1b     \n"   /* jump to next iteration         */
        "2:                         \n"
        "nop                        \n"

        /* restore old MMU_DEXT register                                    */
        "mtc2   $16,   $24          \n"   /* MMU_DEXT <= $16                */

        ".set reorder               \n"
        : 
        : "r"(size) , "r"(src) , "r"(dst) , "r"(cxy)
        : "$10","$11","$12","$13","$14","$15","$16" );
        
    hal_restore_irq( save_sr ); 

} // hal_strcpy_from_uspace()

////////////////////////////////////////////////
void hal_strcpy_to_uspace( char     * u_dst_ptr,
                           xptr_t     k_src_xp,
                           uint32_t   size )
{
    uint32_t save_sr;
    uint32_t dst = (uint32_t)u_dst_ptr;
    uint32_t src = (uint32_t)GET_PTR( k_src_xp );
    uint32_t cxy = (uint32_t)GET_CXY( k_src_xp );

    hal_disable_irq( &save_sr );

    // loop on characters while ( (character != NUL) and (count < size) ) 

    asm volatile(
        ".set noreorder             \n"

        /* save old MMU_DEXT and set cxy in it                              */
        "mfc2   $16,   $24          \n"   /* $16 <= old MMU_DEXT            */
        "mtc2   %3,    $24          \n"   /* MMU_DEXT <= cxy                */

        "move   $11,   %0           \n"   /* $11 <= count == size           */
        "move   $12,   %1           \n"   /* $12 <= k_src                   */
        "move   $13,   %2           \n"   /* $13 <= u_dst                   */
        "mfc2   $15,   $1           \n"   /* $15 <= MMU_MODE                */
        "ori    $14,   $15,  0x4    \n"   /* $14 <= MMU_MODE modified       */

        "1:                         \n"
        "lb     $10,   0($12)       \n"   /* read char from kernel space    */
        "mtc2   $14,   $1			\n"   /* MMU_MODE <= DTLB ON            */
	    "sb	    $10,   0($13)       \n"   /* store char to user space       */
        "mtc2   $15,   $1			\n"   /* MMU_MODE <= DTLB OFF           */
        "beq    $10,   $0,   2f     \n"   /* exit if char == 0              */
        "addi   $11,   $11, -1      \n"   /* decrement count                */
        "addi   $12,   $12,  1      \n"   /* increment k_src pointer        */
        "beq    $11,   $0,   2f     \n"   /* exit if count == 0             */
        "addi   $13,   $13,  1      \n"   /* increment u_src pointer        */
        "j                   1b     \n"   /* jump to next iteration         */
        "2:                         \n"
        "nop                        \n"

        /* restore old MMU_DEXT register                                    */
        "mtc2   $16,   $24          \n"   /* MMU_DEXT <= $16                */

        ".set reorder               \n"
        :
        : "r"(size) , "r"(src) , "r"(dst) , "r"(cxy)
        : "$10","$11","$12","$13","$14","$15","$16" );
        
    hal_restore_irq( save_sr ); 

} // hal_strcpy_to_uspace()

///////////////////////////////////////////////
uint32_t hal_strlen_from_uspace( char * u_str )
{
    uint32_t save_sr;
    uint32_t count = 0;
    uint32_t str   = (uint32_t)u_str;

    hal_disable_irq( &save_sr ); 

        asm volatile(
        ".set noreorder             \n"
        "move   $13,   %1           \n"   /* $13 <= str                     */
        "mfc2   $15,   $1           \n"   /* $15 <= MMU_MODE (DTLB off)     */
        "ori    $14,   $15,  0x4    \n"   /* $14 <= mode DTLB on            */
        "1:                         \n"
        "mtc2   $14,   $1			\n"   /* set DTLB on                    */
        "lb	    $12,   0($13)       \n"   /* $12 <= one byte from u_space   */
        "mtc2   $15,   $1			\n"   /* set DTLB off                   */
        "addi   $13,   $13,  1      \n"   /* increment address              */
        "bne    $12,   $0,   1b     \n"   /* loop until NUL found           */
        "addi	%0,    %0,   1      \n"   /* increment count                */
        ".set reorder               \n"
        : "+r"(count) 
        : "r"(str) 
        : "$12","$13","$14","$15" );

    hal_restore_irq( save_sr );

    return count;
}

