#include <stdarg.h>

#include <boot_tty_driver.h>
#include <hal_types.h>
#include <boot_utils.h>

/****************************************************************************
 *                              Remote accesses.                            *
 ****************************************************************************/

uint32_t boot_remote_lw(xptr_t xp)
{
    uint32_t res;   /* Value to be read, stored at the remote address.      */
    uint32_t ptr;   /* Classic pointer to the distant memory location.      */
    uint32_t cxy;   /* Identifier of the cluster containing the distant
                       memory location.                                     */

    /* Extracting information from the extended pointer. */
    ptr = (uint32_t)GET_PTR(xp);
    cxy = (uint32_t)GET_CXY(xp);

    /* Assembly instructions to get the work done. */
    asm volatile("mfc2  $15,    $24\n"  /* $15 <= CP2_DATA_PADDR_EXT        */
                 "mtc2  %2,     $24\n"  /* CP2_DATA_PADDR_EXT <= cxy        */
                 "lw    %0,   0(%1)\n"  /* *ptr <= data                     */
                 "mtc2  $15,    $24\n"  /* CP2_DATA_PADDR_EXT <= $15        */
                 "sync             \n"
                 : "=&r"(res)           /* Temporary register so that it    
                                           doesn't overlap the other inputs
                                           or outputs.                      */
                 : "r"(ptr), "r"(cxy)
                 : "$15"
                );

    return res;

} // boot_remote_lw()

/****************************************************************************/

void boot_remote_sw(xptr_t xp, uint32_t data)
{
    uint32_t ptr;   /* Classic pointer to the distant memory location.      */
    uint32_t cxy;   /* Identifier of the cluster containing the distant
                       memory location.                                     */
    
    /* Extracting information from the extended pointers. */
    ptr = (uint32_t)GET_PTR(xp);                                 
    cxy = (uint32_t)GET_CXY(xp);

    /* Assembly instructions to get the work done. */
    asm volatile("mfc2  $15,    $24\n"  /* $15 <= CP2_DATA_PADDR_EXT        */
                 "mtc2  %2,     $24\n"  /* CP2_DATA_PADDR_EXT <= cxy        */
                 "sw    %0,   0(%1)\n"  /* *ptr <= data                     */
                 "mtc2  $15,    $24\n"  /* CP2_DATA_PADDR_EXT <= $15        */
                 "sync             \n"
                 :
                 : "r"(data), "r"(ptr), "r"(cxy)
                 : "$15", "memory"
                );

} // boot_remote_sw()

/****************************************************************************/

int32_t boot_remote_atomic_add(xptr_t xp, int32_t val)
{
    int32_t  res;   /* Value stored at the distant memory location before
                       the atomic operation.                                */
    uint32_t ptr;   /* Classic pointer to the distant memory location.      */
    uint32_t cxy;   /* Identifier of the cluster containing the distant
                       memory location.                                     */

    /* Extracting information from the extended pointers. */
    ptr = (uint32_t)GET_PTR(xp);
    cxy = (uint32_t)GET_CXY(xp);

    /* Assembly instructions to get the work done. */
    asm volatile("mfc2  $15,    $24       \n" /* $15 <= CP2_DATA_PADDR_EXT  */ 
                 "mtc2  %3,     $24       \n" /* CP2_DATA_PADDR_EXT <= cxy  */
                 "1:                      \n"
                 "ll    %0,   0(%1)       \n" /* res <= *ptr                */
                 "addu  $3,     %0,     %2\n" /* $3 <= res + val            */
                 "sc    $3,   0(%1)       \n" /* *ptr <= $3                 */
                 "beq   $3,     $0,     1b\n" /* Retry until success.       */
                 "nop                     \n" /* Delayed slot.              */
                 "mtc2  $15,    $24       \n" /* CP2_DATA_PADDR_EXT <= $15  */
                 "sync                    \n"
                 : "=&r"(res)               /* Temporary register so that
                                               it doesn't overlap the other
                                               inputs or outputs.           */
                 : "r"(ptr), "r"(val), "r"(cxy)
                 : "$3", "$15", "memory"
                );

    return res;

} // boot_remote_atomic_add()

/****************************************************************************/

void boot_remote_memcpy(xptr_t dest, xptr_t src, unsigned int size)
{
    uint32_t words_nr;  /* Number of 32-bit words to be copied.             */
    uint32_t dptr;      /* Classic pointer to the destination buffer.       */
    uint32_t dcxy;      /* Identifier of the cluster containing the 
                           destination buffer.                              */
    uint32_t sptr;      /* Classic pointer to the source buffer.            */
    uint32_t scxy;      /* Identifier of the cluster containing the 
                           source buffer.                                   */
    uint32_t i;         /* Iterator for memory copying loop.                */

    /* Extracting information from the extended pointers. */
    dptr = (uint32_t)GET_PTR(dest);
    dcxy = (uint32_t)GET_CXY(dest);
    sptr = (uint32_t)GET_PTR(src);
    scxy = (uint32_t)GET_CXY(src);

    /* 
     * Testing if we could perform word-by-word copy (if both addresses are
     * word-aligned).
     */
    if ((dptr & 0x3) || (sptr & 0x3)) 
        words_nr = 0;
    else
        words_nr = size >> 2;

    /* Copying word-by-word. */
    for (i = 0; i < words_nr; i++)
    {
        asm volatile("mfc2  $15,    $24\n"  /* $15 <= CP2_DATA_PADDR_EXT    */
                     "mtc2  %0,     $24\n"  /* CP2_DATA_PADDR_EXT <= scxy   */
                     "lw    $3,   0(%1)\n"  /* $3 <= *(sptr + 4*i)          */ 
                     "mtc2  %2,     $24\n"  /* CP2_DATA_PADDR_EXT <= dcxy   */
                     "sw    $3,   0(%3)\n"  /* *(dptr + 4*i) <= $3          */
                     "mtc2  $15,    $24\n"  /* CP2_DATA_PADDR_EXT <= $15    */
                     "sync             \n"
                     :
                     : "r"(scxy), "r"(sptr + (i << 2)), 
                       "r"(dcxy), "r"(dptr + (i << 2))
                     : "$3", "$15", "memory"
                    );
    }

    /* Copying byte-by-byte if there is any left. */
    for (i = words_nr << 2; i < size; i++)
    {
        asm volatile("mfc2  $15,    $24\n"  /* $15 <= CP2_DATA_PADDR_EXT    */
                     "mtc2  %0,     $24\n"  /* CP2_DATA_PADDR_EXT <= scxy   */
                     "lb    $3,   0(%1)\n"  /* $3 <= *(sptr + i)            */ 
                     "mtc2  %2,     $24\n"  /* CP2_DATA_PADDR_EXT <= dcxy   */
                     "sb    $3,   0(%3)\n"  /* *(dptr + i) <= $3            */
                     "mtc2  $15,    $24\n"  /* CP2_DATA_PADDR_EXT <= $15    */
                     "sync             \n"
                     :
                     : "r"(scxy), "r"(sptr + i), 
                       "r"(dcxy), "r"(dptr + i)
                     : "$3", "$15", "memory"
                    );
    }

} // boot_remote_memcpy()

/****************************************************************************
 *                             Atomic operations.                           *
 ****************************************************************************/

int32_t boot_atomic_add(int32_t* ptr, int32_t val)
{
    int32_t res;    /* Value of the variable before the atomic operation.   */

    asm volatile(".set noreorder          \n" 
                 "1:                      \n"
                 "ll    %0,   0(%1)       \n" /* res <= *ptr                */
                 "addu  $3,     %0,     %2\n" /* $3 <= res + val            */
                 "sc    $3,   0(%1)       \n" /* $ptr <= $3                 */
                 "beq   $3,     $0,     1b\n" /* Retry until success.       */
                 "nop                     \n"
                 "sync                    \n"
                 ".set reorder            \n"
                 : "=&r"(res)               /* Temporary register so that it
                                               doesn't overlap the other
                                               inputs or outputs.           */
                 : "r"(ptr), "r"(val)
                 : "$3", "memory"
                );

    return res;

} // boot_atomic_add()

/****************************************************************************
 *                             Memory functions.                            *
 ****************************************************************************/

void boot_memcpy(void* dest, void* src, unsigned int size)
{
    /* Word-by-word copy if both addresses are word-aligned. */
    if ((((unsigned int)dest & 0x3) == 0) && 
        (((unsigned int)src & 0x3)  == 0))
    {
        // 'size' might not be a multiple of 4 bytes, we have to copy a few 
        // bytes left (at most 3) byte-by-byte later.
        while (size > 3)
        {
            *(unsigned int*)dest++ = *(unsigned int*)src++;
            size -= 4;
        }
    }

    /*
     * Byte-by-byte copy if:
     * - At least 1 of the 2 addresses is not word-aligned,
     * - 'size' value is not a multiple of 4 bytes.
     */
    while (size)
        *(unsigned char*)dest++ = *(unsigned char*)src++;

} // boot_memcpy()

/****************************************************************************/

void boot_memset(void* base, int val, unsigned int size)
{
    unsigned int wval;      /* Word-sized value to word-by-word filling.        */

    /* Word-by-word filling if the base address is word-aligned. */
    // Extracting the first 2 bytes of 'val'.
    val &= 0xFF;
    // Making it word-sized. 
    wval = (val << 24) | (val << 16) | (val << 8) | val;

    if (((unsigned int)base & 0x3) == 0)
    {
        // 'size' might not be a multiple of 4 bytes, we have to fill a 
        // few bytes left (at most 3) byte-by-byte later.
        while (size > 3)
        {
            *(unsigned int*)base++ = wval;
            size -= 4;
        }
    }
    
    /*
     * Byte-by-byte filling if:
     * - The base address is not word-aligned,
     * - 'size' value is not a multiple of 4 bytes.
     */
    while (size--)
        *(unsigned char*)base++ = val;

} // boot_memset()

/****************************************************************************
 *                              String functions.                           *
 ****************************************************************************/

void boot_strcpy(char* dest, char* src)
{
    /* Checking if the arguments are correct. */
    if ((dest == NULL) || (src == NULL))
        return;

    /* Copying the string. */
    while ((*dest++ = *src++) != '\0');

} // boot_strcpy()

/****************************************************************************/

unsigned int boot_strlen(char* s)
{
    unsigned int res = 0;   /* Length of the string (in bytes).             */

    if (s != NULL)
    {
        while (*s++ != '\0')
            res++;
    }

    return res;

} // boot_strlen()

/****************************************************************************/

int boot_strcmp(char* s1, char* s2)
{
    if ((s1 == NULL) || (s2 == NULL))
        return 0;

    while (1)
    {
        if (*s1 != *s2)
            return 1;
        if (*s1 == '\0')
            break;
        s1++;
        s2++;
    }

    return 0;

} // boot_strcmp()

/****************************************************************************
 *                             Display functions.                           *
 ****************************************************************************/

void boot_puts(char* str)
{
    boot_tty_write(str, boot_strlen(str));

} // boot_puts()
    
/****************************************************************************/

void boot_printf(char* format, ...)
{
    va_list      arg;           /* Used to iterate arguments list.          */
    char         buf[16];       /* Buffer for argument conversion.          */      
    char*        print_pt;      /* String pointer for argument printing.    */
    int          arg_val;       /* Raw value of the argument.               */
    unsigned int arg_len;       /* Length of a argument (in bytes).         */
    unsigned int nb_printed;    /* Iterator for text printing loop.         */
    unsigned int conv_index;    /* Index for argument conversion loop.      */

    const char conv_tab[] = "0123456789ABCDEF";

    /* Starting the arguments iterating process with a va_list. */
    va_start(arg, format);

print_text:
    
    while (*format)
    {
        /* Counting the number of ordinary characters. */
        for (nb_printed = 0; 
             (format[nb_printed] != '\0') && (format[nb_printed] != '%');
             nb_printed++);

        /* Copying them unchanged to the boot TTY terminal. */
        if (nb_printed > 0)
        {
            if (boot_tty_write(format, nb_printed))
                goto error;
            format += nb_printed;
        }

        /* Skipping the '%' character. */
        if (*format == '%')
        {
            format++;
            goto print_argument;
        }
    }

    /* Freeing the va_list. */
    va_end(arg);

    return;

print_argument:

    /* Analyzing the conversion specifier. */
    switch (*format++)
    {
        // A character.
        case ('c'):
        {
            // Retrieving the argument.
            arg_val  = va_arg(arg, int);

            // Preparing for the printing. 
            arg_len  = 1;
            buf[0]   = arg_val;
            print_pt = &buf[0];
            break;
        }

        // A 32-bit signed decimal notation of an integer.
        case ('d'):
        {
            // Retrieving the argument.
            arg_val  = va_arg(arg, int);

            // Printing the minus sign if needed.
            if (arg_val < 0)
            {
                arg_val = -arg_val;
                if (boot_tty_write("-", 1))
                    goto error;
            }

            // Converting the argument raw value to a character string.
            // Note that the maximum value for this type is 2.147.483.647 
            // (2^31 - 1), a 10-digit number.
            for (conv_index = 0; conv_index < 10; conv_index++)
            {
                // Writing to the buffer, starting from the least significant
                // digit.
                buf[9 - conv_index] = conv_tab[arg_val % 10];

                // Getting to the next digit, stop when no more digit.
                if ((arg_val /= 10) == 0)
                    break;
            }

            // Preparing for the printing.
            arg_len  = conv_index + 1;
            print_pt = &buf[9 - conv_index];
            break;
        }

        // A 32-bit unsigned decimal notation of an integer.
        case ('u'):
        {
            // Retrieving the argument.
            arg_val  = va_arg(arg, unsigned int);

            // Converting the argument raw value to a character string.
            // Note that the maximum value for this type is 4.294.967.295 
            // (2^32 - 1), also a 10-digit number.
            for (conv_index = 0; conv_index < 10; conv_index++)
            {
                // Writing to the buffer, starting from the least significant
                // digit.
                buf[9 - conv_index] = conv_tab[arg_val % 10];

                // Getting to the next digit, stop when no more digit.
                if ((arg_val /= 10) == 0)
                    break;
            }

            // Preparing for the printing.
            arg_len  = conv_index + 1;
            print_pt = &buf[9 - conv_index];
            break;
        }

        // A 32-bit unsigned hexadecimal notation of an integer.
        case ('x'):
        {
            // Retrieving the argument.
            arg_val  = va_arg(arg, unsigned int);

            // Printing the hexadecimal prefix.
            if (boot_tty_write("0x", 2))
                goto error;

            // Converting the argument raw value to a character string.
            // Note that the maximum value for this type is 0xFFFFFFFF
            // (2^32 - 1), a 8-digit hexadecimal number.
            for (conv_index = 0; conv_index < 8; conv_index++)
            {
                // Writing to the buffer, starting from the least significant
                // digit.
                buf[7 - conv_index] = conv_tab[arg_val % 16];

                // Getting to the next digit, stop when no more digit.
                if ((arg_val >>= 4) == 0)
                    break;
            }

            // Preparing for the printing.
            arg_len  = conv_index + 1;
            print_pt = &buf[7 - conv_index];
            break;
        }

        // A 64-bit unsigned hexadecimal notation of an integer.
        case ('l'):
        {
            // Retrieving the argument.
            arg_val  = va_arg(arg, unsigned long long);

            // Printing the hexadecimal prefix.
            if (boot_tty_write("0x", 2))
                goto error;

            // Converting the argument raw value to a character string.
            // Note that the maximum value for this type is 0xFFFFFFFFFFFFFFFF
            // (2^64 - 1), a 16-digit hexadecimal number.
            for (conv_index = 0; conv_index < 16; conv_index++)
            {
                // Writing to the buffer, starting from the least significant
                // digit.
                buf[15 - conv_index] = conv_tab[arg_val % 16];

                // Getting to the next digit, stop when no more digit.
                if ((arg_val >>= 4) == 0)
                    break;
            }

            // Preparing for the printing.
            arg_len  = conv_index + 1;
            print_pt = &buf[15 - conv_index];
            break;
        }

        // A NUL terminated string.
        case ('s'):
        {
            // Retrieving the argument.
            print_pt = va_arg(arg, char*);

            // Preparing for the printing. 
            arg_len  = boot_strlen(print_pt);
            break;
        }

        default:
            goto error;

    }

    /* Printing the converted argument. */
    if (boot_tty_write(print_pt, arg_len))
        goto error;

    goto print_text;

error:

    /* Trying to print an error message then exit. */
    boot_puts("\n[BOOT ERROR] boot_printf(): "
              "Cannot print the whole message\n"
             );

    boot_exit();

} // boot_printf()

/****************************************************************************
 *                              Misc. functions.                            *
 ****************************************************************************/

void boot_exit()
{
    boot_printf("\n[BOOT PANIC] Suiciding at cycle %d...\n", 
                boot_get_proctime()
               );

    while (1)
        asm volatile ("nop");

} // boot_exit()

/****************************************************************************/

unsigned int boot_get_proctime()
{
    unsigned int res;       /* Value stored in the CP0_COUNT register.      */

    asm volatile("mfc0 %0, $9" : "=r"(res));

    return res;

} // boot_get_proctime()

/****************************************************************************/

unsigned int boot_get_procid()
{
    unsigned int res;       /* Value stored in the CP0_PROCID register.     */

    asm volatile("mfc0 %0, $15, 1" : "=r"(res));

    return (res & 0xFFF);

} // boot_get_procid()

/****************************************************************************/

void boot_barrier(xptr_t xp_barrier, uint32_t count)
{
    boot_barrier_t* ptr;        /* Classic pointer to the toggling 
                                   barrier.                                 */
    uint32_t        cxy;        /* Identifier of the cluster containing
                                   the toggling barrier.                    */
    uint32_t        expected;   /* Expected barrier state after reset.      */
    uint32_t        current;    /* Number of processors reached the 
                                   barrier.                                 */

    /* Extracting information from the extended pointer. */
    ptr = (boot_barrier_t*)GET_PTR(xp_barrier);
    cxy = (uint32_t)       GET_CXY(xp_barrier);

    /* 
     * Explicitly testing the barrier sense value because no initialization
     * has been previously done.
     */
    if (boot_remote_lw(XPTR(cxy, &ptr->sense)) == 0)
        expected = 1;
    else
        expected = 0;
    
    /* Incrementing the counter. */
    current = boot_remote_atomic_add(XPTR(cxy, &ptr->current), 1);
    
    /* The processor arrived last resets the barrier and toggles its sense. */
    if (current == (count - 1))
    {
        boot_remote_sw(XPTR(cxy, &ptr->current), 0);
        boot_remote_sw(XPTR(cxy, &ptr->sense), expected);
    }
    /* Other processors poll the sense. */
    else
    {
        while (boot_remote_lw(XPTR(cxy, &ptr->sense)) != expected);
    }

} // boot_barrier()

