/*
 * dqdt.c - Distributed Quaternary Decision Tree implementation.
 *
 * Author : Alain Greiner (2016)
 *
 * Copyright (c)  UPMC Sorbonne Universites
 *
 * This file is part of ALMOS-MKH.
 *
 * ALMOS-MKH is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2.0 of the License.
 *
 * ALMOS-MKH is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <kernel_config.h>
#include <hal_types.h>
#include <hal_special.h>
#include <hal_atomic.h>
#include <hal_remote.h>
#include <printk.h>
#include <cluster.h>
#include <bits.h>
#include <dqdt.h>


///////////////////////////////////////////
void dqdt_local_print( dqdt_node_t * node )
{
	printk("DQDT node : level = %d / cluster = %x / threads = %x / pages = %x\n",
	       node->level,
	       local_cxy,
	       node->threads,
           node->pages );
}

/////////////////////////////////////////
void dqdt_global_print( xptr_t  node_xp )
{
	uint32_t i;
    dqdt_node_t local_node;

    // get root node local copy
    hal_remote_memcpy( XPTR( local_cxy , &local_node ), node_xp , sizeof(dqdt_node_t) );

    // display DQDT node content
    dqdt_local_print( &local_node );

    // recursive call on children if node is not terminal
    if ( local_node.level > 0 )
    {
        for ( i = 0 ; i < 4 ; i++ )
        {
            if ( local_node.children[i] != XPTR_NULL ) dqdt_global_print( local_node.children[i] );
        }
    }
}

////////////////////////////////////
uint32_t dqdt_init( uint32_t x_size,
                    uint32_t y_size,
                    uint32_t y_width )
{
    if( (x_size > 32) || (y_size > 32) )
    {
        printk("\n[PANIC] in %s : illegal mesh size for DQDT support\n",
               __FUNCTION__ );
        hal_core_sleep();
    }

	dqdt_node_t * node;
    cxy_t         p_cxy;         // cluster coordinates for parent node
    cxy_t         c_cxy;         // cluster coordinates for child node
    uint32_t      level;         // node level in quad tree
    uint32_t      mask;          // mask on node coordinates to compute existence condition
    uint32_t      pmask;         // mask to compute parent coordinates from child coordinates
    cluster_t   * cluster;       // pointer on local cluster

    cluster = LOCAL_CLUSTER;

    // compute level_max
    uint32_t  x_size_ext = POW2_ROUNDUP( x_size );
    uint32_t  y_size_ext = POW2_ROUNDUP( y_size );
    uint32_t  size_ext   = MAX(x_size_ext , y_size_ext);
    uint32_t  level_max  = (bits_log2(size_ext * size_ext) >> 1) + 1;

    // get cluster coordinates
    uint32_t    x       = local_cxy >> y_width;
    uint32_t    y       = local_cxy & ((1<<y_width)-1);

    // loop on local dqdt nodes (at most one node per level)
    for( level = 0 ; level < level_max ; level++ )
    {
        // get pointer on the node to be initialised
        node = &cluster->dqdt_tbl[level];

        // set default values
        node->level       = level;
        node->arity       = 0;
        node->threads     = 0;
        node->pages       = 0;
        node->parent      = XPTR_NULL;
        node->children[0] = XPTR_NULL;
        node->children[1] = XPTR_NULL;
        node->children[2] = XPTR_NULL;
        node->children[3] = XPTR_NULL;

        // compute masks depending on level : 0x1, 0x3, 0x7, 0xF, 0x1F etc.
        mask  = (1<<level)-1;
        pmask = (1<<(level+1))-1;

        // check the node  existence condition at each level
        if( ((x & mask) == 0) && ((y & mask) == 0) )
        {
            // set parent extended pointer
            p_cxy = ((x & ~pmask)<<y_width) + (y & ~pmask);
            node->parent = XPTR( p_cxy , &cluster->dqdt_tbl[level+1] );

            // set child[0] extended pointer (same [x,y] coordinates)
            if ( level > 0 )
            {
                c_cxy = local_cxy;
                node->children[0] = XPTR( c_cxy , &cluster->dqdt_tbl[level-1]);
                node->arity++;
            }

            // set child[1] extended pointer (coordinates may overflow)
            if ( (level > 0) && ((y + (1<<(level-1))) < y_size) )
            {
                c_cxy = local_cxy + (1<<(level-1));
                node->children[1] = XPTR( c_cxy , &cluster->dqdt_tbl[level-1] );
                node->arity++;
            }

            // set child[2] extended pointer (coordinates may overflow)
            if ( (level > 0) && ((x + (1<<(level-1))) < x_size) )
            {
                c_cxy = local_cxy + ((1<<(level-1))<<y_width);
                node->children[2] = XPTR( c_cxy , &cluster->dqdt_tbl[level-1]);
                node->arity++;
            }

            // set child[3] extended pointer (coordinates may overflow)
            if ( (level > 0) && 
                 ((x + (1<<(level-1))) < x_size) && 
                 ((y + (1<<(level-1))) < y_size) )
            {
                c_cxy = local_cxy + ((1<<(level-1))<<y_width) + (1<<(level-1));
                node->children[3] = XPTR( c_cxy , &cluster->dqdt_tbl[level-1]);
                node->arity++;
            }
        }  // end if existence condition
    }  // end for level

    return level_max;

} // end dqdt_init()


///////////////////////////////////////////////////////////////////////////
// This recursive function is called by the dqdt_global_update() function.
// It traverses the quad tree from clusters to root.
///////////////////////////////////////////////////////////////////////////
static void dqdt_propagate( xptr_t  node,         // extended pointer on current node
                            int32_t threads_var,  // number of threads variation
                            int32_t pages_var )   // number of pages variation
{
    // get current node cluster identifier and local pointer
    cxy_t         cxy = (cxy_t)GET_CXY( node );
    dqdt_node_t * ptr = (dqdt_node_t *)GET_PTR( node );

    // update current node threads number
    hal_remote_atomic_add( XPTR( cxy , &ptr->threads ) , threads_var );

    // update current node pages number
    hal_remote_atomic_add( XPTR( cxy , &ptr->pages ) , pages_var );

    // get extended pointer on parent node
    xptr_t parent = (xptr_t)hal_remote_lwd( XPTR( cxy , &ptr->parent ) );

    // propagate if required
    if ( parent != XPTR_NULL )
    {
        dqdt_propagate( parent, threads_var, pages_var );
    }
}

/////////////////////////
void dqdt_global_update()
{
	cluster_t   * cluster = LOCAL_CLUSTER;
    dqdt_node_t * node    = &cluster->dqdt_tbl[0];

    // get variations
    int32_t      threads_var = cluster->threads_var;
    int32_t      pages_var   = cluster->pages_var;

    // propagate this variation to DQDT upper levels
    if( (threads_var || pages_var) && (node->parent != XPTR_NULL) )
    {
        dqdt_propagate( node->parent, threads_var, pages_var );
    }

    // update variations
    hal_atomic_add( &cluster->threads_var , -threads_var );
    hal_atomic_add( &cluster->pages_var   , -pages_var   );
}

///////////////////////////////////////////////////
void dqdt_local_update_threads( int32_t increment )
{
	cluster_t * cluster = LOCAL_CLUSTER;

    // register change for future propagation in DQDT
    hal_atomic_add( &cluster->threads_var , increment );

    // update DQDT node level 0
    hal_atomic_add( &cluster->dqdt_tbl[0].threads , increment );
}

/////////////////////////////////////////////////
void dqdt_local_update_pages( int32_t increment )
{
	cluster_t * cluster = LOCAL_CLUSTER;

    // register change for future propagation in DQDT
    hal_atomic_add( &cluster->pages_var , increment );

    // update DQDT node level 0
    hal_atomic_add( &cluster->dqdt_tbl[0].pages , increment );
}

////////////////////////////////////////////////////////////////////////////////
// This recursive function is called by both the dqdt_get_cluster_for_process()
// and by the dqdt_get_cluster_for_memory() functions to select the cluster
// with smallest number of thread, or smallest number of allocated pages.
// It traverses the quad tree from root to clusters.
///////////////////////////////////////////////////////////////////////////////
static cxy_t dqdt_select_cluster( xptr_t node,
                                  bool_t for_memory )
{
    dqdt_node_t   node_copy;     // local copy of the current DQDT node
    uint32_t      i;             // index in the loop on children
    uint32_t      select;        // index of selected child
    xptr_t        child;         // extended pointer on a DQDT child node
    cxy_t         cxy;           // DQDT child node cluster identifier
    dqdt_node_t * ptr;           // pointer on a DQDT child node
    uint32_t      load;          // load of the child (threads or pages)
    uint32_t      load_min;      // current value of the minimal load

    // get DQDT node local copy
    hal_remote_memcpy( XPTR( local_cxy , &node_copy ), node , sizeof(dqdt_node_t) );

    // return cluster identifier for a terminal mode
    if( node_copy.level == 0 ) return GET_CXY(node);

    // analyse load for all children in non terminal node
    load_min = 0xFFFFFFFF;
    select   = 0;
    for( i = 0 ; i < 4 ; i++ )
    {
        child = node_copy.children[i];
        if( child != XPTR_NULL )
        {
            cxy  = (cxy_t)GET_CXY( child );
            ptr  = (dqdt_node_t *)GET_PTR( child );
            if( for_memory ) load = hal_remote_lw( XPTR( cxy , &ptr->pages ) );
            else             load = hal_remote_lw( XPTR( cxy , &ptr->threads ) );
            if( load < load_min )
            {
                load_min = load;
                select   = i;
            }
        }
    }

    // select the child with the lowest load
    return dqdt_select_cluster( node_copy.children[select], for_memory );
}

////////////////////////////////////
cxy_t dqdt_get_cluster_for_process()
{
    // build extended pointer on DQDT root node
	cluster_t * cluster = LOCAL_CLUSTER;
    uint32_t    level   = cluster->dqdt_root_level;
    xptr_t      root    = XPTR( 0 , &cluster->dqdt_tbl[level] );

    // call recursive function
    return dqdt_select_cluster( root , false );
}

////////////////////////////////////
cxy_t dqdt_get_cluster_for_memory()
{
    // build extended pointer on DQDT root node
	cluster_t * cluster = LOCAL_CLUSTER;
    uint32_t    level   = cluster->dqdt_root_level;
    xptr_t      root    = XPTR( 0 , &cluster->dqdt_tbl[level] );

    // call recursive function
    return dqdt_select_cluster( root , true );
}

