/////////////////////////////////////////////////////////////////////////////////////////
// File   : iqzz.c   
// Date   : octobre 2015
// author : Alain Greiner
/////////////////////////////////////////////////////////////////////////////////////////
// This file define the code of the IQZZ (Invert Quantisation) thread for MJPEG.
/////////////////////////////////////////////////////////////////////////////////////////

#include <stdio.h>
#include <mwmr_channel.h>
#include <stdint.h>
#include "mjpeg.h"

// macro to use a shared TTY
#define PRINTF(...)    lock_acquire( &tty_lock ); \
                       giet_tty_printf(__VA_ARGS__);  \
                       lock_release( &tty_lock );

//////////////////////////////////////////////////////////////
__attribute__ ((constructor)) void iqzz( unsigned int index )
//////////////////////////////////////////////////////////////
{
    const uint8_t G_ZZ[64] = 
    {
        0 ,  1,  8, 16,  9,  2,  3, 10,
        17, 24, 32, 25, 18, 11,  4,  5,
        12, 19, 26, 33, 40, 48, 41, 34,
        27, 20, 13,  6,  7, 14, 21, 28,
        35, 42, 49, 56, 57, 50, 43, 36,
        29, 22, 15, 23, 30, 37, 44, 51,
        58, 59, 52, 45, 38, 31, 39, 46,
        53, 60, 61, 54, 47, 55, 62, 63
    };

    mwmr_channel_t*   mwmr_in_data   = vld_2_iqzz[index];
    mwmr_channel_t*   mwmr_in_quanti = demux_2_iqzz[index];
    mwmr_channel_t*   mwmr_out_data  = iqzz_2_idct[index];

    uint32_t    block;
    uint32_t    i;
    uint8_t     QT[64];      // Quantisation Table / 1 byte per pixel
    int16_t     bin[64];     // Input data buffer  / 2 bytes per pixel
    int32_t     bout[64];    // Output data buffer / 4 bytes per pixel 

    uint32_t    nblocks = nblocks_w * nblocks_h;

    // get platform parameters
    uint32_t  x_size;
    uint32_t  y_size;
    uint32_t  nprocs;
    giet_procs_number( &x_size , &y_size , &nprocs );

    // get processor coordinates
    uint32_t    x, y, p;
    giet_proc_xyp( &x , &y , &p );

    PRINTF("\n[MJPEG] thread IQZZ[%d] starts on P[%d,%d,%d] / trdid = %x\n",
           index , x , y , p, (uint32_t)trdid_iqzz[index] )


    uint32_t image = index;

    while ( image < MAX_IMAGES ) // one image per iteration
    {
        // read the quantization coefs from mwmr_in_quanti (one byte per coef) 
        mwmr_read( mwmr_in_quanti , (uint32_t*)QT , 16 );

#if (DEBUG_IQZZ > 1)
if ( (index == DEBUG_CLUSTER_INDEX) || (DEBUG_CLUSTER_INDEX == 0XFFFFFFFF) )
{ PRINTF("\nIQZZ[%d] get quantisation coefs for image %d\n"
         "%d  %d  %d  %d  %d  %d  %d  %d\n"
         "%d  %d  %d  %d  %d  %d  %d  %d\n"
         "%d  %d  %d  %d  %d  %d  %d  %d\n"
         "%d  %d  %d  %d  %d  %d  %d  %d\n"
         "%d  %d  %d  %d  %d  %d  %d  %d\n"
         "%d  %d  %d  %d  %d  %d  %d  %d\n"
         "%d  %d  %d  %d  %d  %d  %d  %d\n"
         "%d  %d  %d  %d  %d  %d  %d  %d\n",
         index , image ,
         QT[ 0],QT[ 1],QT[ 2],QT[ 3],QT[ 4],QT[ 5],QT[ 6],QT[ 7],
         QT[ 8],QT[ 9],QT[10],QT[11],QT[12],QT[13],QT[14],QT[15],
         QT[16],QT[17],QT[18],QT[19],QT[20],QT[21],QT[22],QT[23],
         QT[24],QT[25],QT[26],QT[27],QT[28],QT[29],QT[30],QT[31],
         QT[32],QT[33],QT[34],QT[35],QT[36],QT[37],QT[38],QT[39],
         QT[40],QT[41],QT[42],QT[43],QT[44],QT[45],QT[46],QT[47],
         QT[48],QT[49],QT[50],QT[51],QT[52],QT[53],QT[54],QT[55],
         QT[56],QT[57],QT[58],QT[59],QT[60],QT[61],QT[62],QT[63] ) }
#endif

        for ( block = 0 ; block < nblocks ; ++block ) 
        {
            // read one block from mwmr_in_data (2 bytes per pixel)
            mwmr_read( mwmr_in_data , (uint32_t*)bin , 32 );

            // unquantify & UnZZ each pixel
            for ( i = 0 ; i < 64 ; ++i ) 
            {
                bout[G_ZZ[i]] = bin[i] * QT[i];
            }

            // write one block to IDCT / 4 bytes per pixel
            mwmr_write( mwmr_out_data , (uint32_t*)bout , 64 );

#if (DEBUG_IQZZ > 1) 
if ( (index == DEBUG_CLUSTER_INDEX) || (DEBUG_CLUSTER_INDEX == 0XFFFFFFFF) )
{ PRINTF("\nIQZZ[%d] completes block %d/%d in image %d\n" 
         "  %d  %d  %d  %d  %d  %d  %d  %d\n"
         "  %d  %d  %d  %d  %d  %d  %d  %d\n"
         "  %d  %d  %d  %d  %d  %d  %d  %d\n"
         "  %d  %d  %d  %d  %d  %d  %d  %d\n"
         "  %d  %d  %d  %d  %d  %d  %d  %d\n"
         "  %d  %d  %d  %d  %d  %d  %d  %d\n"
         "  %d  %d  %d  %d  %d  %d  %d  %d\n"
         "  %d  %d  %d  %d  %d  %d  %d  %d\n",
         index , block , nblocks , image ,
         bout[0] , bout[1] , bout[2] , bout[3] , bout[4] , bout[5] , bout[6] , bout[7] ,
         bout[8] , bout[9] , bout[10], bout[11], bout[12], bout[13], bout[14], bout[15],
         bout[16], bout[17], bout[18], bout[19], bout[20], bout[21], bout[22], bout[23],
         bout[24], bout[25], bout[26], bout[27], bout[28], bout[29], bout[30], bout[31],
         bout[32], bout[33], bout[34], bout[35], bout[36], bout[37], bout[38], bout[39],
         bout[40], bout[41], bout[42], bout[43], bout[44], bout[45], bout[46], bout[47],
         bout[48], bout[49], bout[50], bout[51], bout[52], bout[53], bout[54], bout[55],
         bout[56], bout[57], bout[58], bout[59], bout[60], bout[61], bout[62], bout[63]) }
#endif
        }  // end for blocks

#if DEBUG_IQZZ
if ( (index == DEBUG_CLUSTER_INDEX) || (DEBUG_CLUSTER_INDEX == 0XFFFFFFFF) )
{ PRINTF("\nIQZZ[%d] completes image %d at cycle %d\n", index , image , giet_proctime() ) }
#endif

        image = image + x_size* y_size;

    } // end while(1) on images

    giet_pthread_exit( "IQZZ completed" );

} // end iqzz()

