/* This file is part of DSX.
 *
 * DSX is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * DSX is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with DSX; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 *
 * Copyright (c) Lip6, Thalès
 *      Joel Porquet <joel.porquet@lip6.fr>, 2006-2007
 *
 * Based on Martin Fielder's work (http://keyj.s2000.ws/?page_id=41)
 */

#include "inter_render.h"
#include "utils.h"

typedef struct {
    int p[9][9];
} L_MC_temp_block;

typedef struct {
    int p[3][3];
} C_MC_temp_block;


static void GetLMCTempBlock(L_MC_temp_block *b, frame_t *ref, int org_x, int org_y) 
{
    int x, y, sx, sy;
    
    for (y=0; y<9; ++y) 
    {
        sy = org_y + y;
        
        if (sy<0) 
            sy=0;
        if (sy>=HEIGHT) 
            sy = HEIGHT-1;
            
        for (x=0; x<9; ++x) 
        {
            sx = org_x + x;
            if (sx<0)
                b->p[y][x] = L_pixel(ref, 0, sy); 
            else
            {
                if (sx>=WIDTH) 
                    b->p[y][x] = L_pixel(ref, WIDTH-1, sy);
                else 
                    b->p[y][x] = L_pixel(ref, sx, sy);
            }
        }
    }
}


#define Clip1(i)    Clip(i, 0, 255)
#define Filter(E,F,G,H,I,J) Clip1(((E)-5*(F)+20*(G)+20*(H)-5*(I)+(J)+16)>>5)

#define iffrac(x,y) if (frac == y*4+x)
#define Mix(a,b) (((a)+(b)+1)>>1)

static int L_MC_get_sub(int *data, int frac) 
{
#define p(x,y) data[(y)*9+(x)]
    int b, cc, dd, ee, ff, h, j, m, s;
    iffrac(0,0) return p(0,0);
    b = Filter(p(-2,0),p(-1,0),p(0,0),p(1,0),p(2,0),p(3,0));
    iffrac(1,0) return Mix(p(0,0),b);
    iffrac(2,0) return b;
    iffrac(3,0) return Mix(b,p(1,0));
    h = Filter(p(0,-2),p(0,-1),p(0,0),p(0,1),p(0,2),p(0,3));
    iffrac(0,1) return Mix(p(0,0),h);
    iffrac(0,2) return h;
    iffrac(0,3) return Mix(h,p(0,1));
    iffrac(1,1) return Mix(b,h);
    m = Filter(p(1,-2),p(1,-1),p(1,0),p(1,1),p(1,2),p(1,3));
    iffrac(3,1) return Mix(b,m);
    s = Filter(p(-2,1),p(-1,1),p(0,1),p(1,1),p(2,1),p(3,1));
    iffrac(1,3) return Mix(h,s);
    iffrac(3,3) return Mix(s,m);
    cc = Filter(p(-2,-2),p(-2,-1),p(-2,0),p(-2,1),p(-2,2),p(-2,3));
    dd = Filter(p(-1,-2),p(-1,-1),p(-1,0),p(-1,1),p(-1,2),p(-1,3));
    ee = Filter(p(2,-2),p(2,-1),p(2,0),p(2,1),p(2,2),p(2,3));
    ff = Filter(p(3,-2),p(3,-1),p(3,0),p(3,1),p(3,2),p(3,3));
    j = Filter(cc,dd,h,m,ee,ff);
    iffrac(2,2) return j;
    iffrac(2,1) return Mix(b,j);
    iffrac(1,2) return Mix(h,j);
    iffrac(2,3) return Mix(j,s);
    iffrac(3,2) return Mix(j,m);
    return 128;  // when we arrive here, something's going seriosly wrong ...
#undef p
}


static void GetCMCTempBlock (C_MC_temp_block *b, frame_t *ref, int iCbCr, int org_x, int org_y)

{
    int x, y, sx, sy;
    
    for (y=0; y<3; ++y) {
        sy = org_y + y;
        
        if (sy<0) 
            sy=0;
        if (sy>=CHEIGHT) 
            sy=CHEIGHT-1;
            
        for (x=0; x<3; ++x) 
        {
            sx = org_x + x;
            
            if (sx<0)
                b->p[y][x] = C_pixel(ref, iCbCr, 0, sy); 
            else
            {
                if (sx>=CWIDTH) 
                    b->p[y][x] = C_pixel(ref, iCbCr, CWIDTH-1, sy);
                else 
                    b->p[y][x] = C_pixel(ref, iCbCr, sx, sy);
            }
        }
    }
}


void MotionCompensateTB(frame_t *f, frame_t *ref, int org_x, int org_y, int mvx, int mvy) 
{
    int x, y, iCbCr;
    
    L_MC_temp_block b;
    GetLMCTempBlock(&b, ref, org_x+(mvx>>2)-2, org_y+(mvy>>2)-2);

    int frac = (mvy&3)*4 + (mvx&3);
    
    /* Luma */
    for (y=0; y<4; ++y)
    {
        for (x=0; x<4; ++x)
            L_pixel(f, x+org_x, y+org_y) = L_MC_get_sub(&(b.p[y+2][x+2]), frac);
    }

    org_x >>= 1; 
    org_y >>= 1;
    
    /* Chroma */
    for (iCbCr=0; iCbCr<2; ++iCbCr) 
    {
        C_MC_temp_block b;
        GetCMCTempBlock(&b, ref, iCbCr, org_x+(mvx>>3), org_y+(mvy>>3));
        int xFrac = (mvx&7);
        int yFrac = (mvy&7);
        
        for (y=0; y<2; ++y)
        {
            for (x=0; x<2; ++x)
                C_pixel(f, iCbCr, x+org_x, y+org_y) = 
                    ((8-xFrac)*(8-yFrac)*b.p[y][x] + xFrac*(8-yFrac)*b.p[y][x+1] +
                     (8-xFrac)*yFrac*b.p[y+1][x]   + xFrac*yFrac*b.p[y+1][x+1]   + 32)>>6;
        }
    }
}

/* inter prediction for a 16x16 block */
void MotionCompensateMB (frame_t *f, frame_t *ref, mb_fifo_global_t *mb_fifo_G, int x, int y)
{
    int i, j;
    for(j=0; j<4; ++j)
    {
        for(i=0; i<4; ++i)
        {
            /* check the availability of the macroblock */
            int max_x[2], max_y[2];
            max_x[0] = Clip((x + (i<<2)) + (mb_fifo_G->inter.MVx[i + j*4] >> 2) - 2, 0, WIDTH-1);
            max_x[1] = Clip((x + (i<<2)) + (mb_fifo_G->inter.MVx[i + j*4] >> 2) + 6, 0, WIDTH-1);

            max_y[0] = Clip((y + (j<<2)) + (mb_fifo_G->inter.MVy[i + j*4] >> 2) - 2, 0, HEIGHT-1);
            max_y[1] = Clip((y + (j<<2)) + (mb_fifo_G->inter.MVy[i + j*4] >> 2) + 6, 0, HEIGHT-1);

            int l;
            for (l=0; l<4; l++)
            {
                int index_x = l%2;
                int index_y = l/2;
                /* wait until this mb is available */
#if defined(SRL_SOCLIB)
                srl_log_printf(DEBUG, "(RENDER) - wait mb %d/%d\n", max_y[index_y]>>4, max_x[index_x]>>4);
                srl_sched_wait_eq_cpu(&ref->m_available[max_y[index_y]>>4][max_x[index_x]>>4], 1);
#elif defined(SRL_POSIX)
                while(*(unsigned char*)&ref->m_available[max_y[index_y]>>4][max_x[index_x]>>4] != 1)
                {
                    srl_log_printf(DEBUG, "(RENDER) - wait mb %d/%d\n", max_y[index_y]>>4, max_x[index_x]>>4);
                    sched_yield();
                }
#else
# error FIXME
#endif
            }
            
            MotionCompensateTB(f, ref, x + (i<<2), y + (j<<2),
                    mb_fifo_G->inter.MVx[i + j*4],
                    mb_fifo_G->inter.MVy[i + j*4]);
        }
    }
}
