/*
 * kern/dqdt.h - Distributed Quad Decision Tree
 *
 * Author : Alain Greiner (2016,2017,2018)
 *
 * Copyright (c)  UPMC Sorbonne Universites
 *
 * This file is part of ALMOS-MKH
 *
 * ALMOS-kernel is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2.0 of the License.
 *
 * ALMOS-kernel is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ALMOS-kernel; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef _DQDT_H_
#define _DQDT_H_

#include <kernel_config.h>
#include <hal_kernel_types.h>
#include <hal_atomic.h>

/****************************************************************************************
 * This DQDT infrastructure maintains a topological description of ressources usage
 * (number of threads, and number of physical pages allocated) in each cluster.
 *
 * - If X_SIZE or Y_SIZE are equal to 1, it makes the assumption that the cluster
 *   topology is a one dimensionnal vector, an build the smallest one-dimensionnal
 *   quad-tree covering this one-dimensionnal vector. If the number of clusters
 *   is not a power of 4, the tree is truncated as required.
 *
 *   TODO : the mapping for the one dimensionnal topology is not implemented yet [AG].
 *
 * - If both Y_SIZE and Y_SIZE are larger than 1, it makes the assumption that
 *   the clusters topology is a 2D mesh. The [X,Y] coordinates of a cluster are
 *   obtained from the CXY identifier using the following rules :
 *      X = CXY >> Y_WIDTH   /  Y = CXY & ((1<<Y_WIDTH)-1)
 *   If the mesh X_SIZE and Y_SIZE dimensions are not equal, or are not power of 2,
 *   we build the smallest two dimensionnal quad-tree covering all clusters,
 *   and this tree is truncated as required.
 *   The root node is always implemented in cluster [0,0]
 *   The mesh size is supposed to contain at most 32 * 32 clusters.
 *   There are at most 6 DQDT nodes in a cluster
 *   . Level 0 nodes exist on all clusters and have no children.
 *   . Level 1 nodes exist when both X and Y coordinates are multiple of 2
 *   . Level 2 nodes exist when both X and Y coordinates are multiple of 4
 *   . Level 3 nodes exist when both X and Y coordinates are multiple of 8
 *   . Level 4 nodes exist when both X and Y coordinates are multiple of 16
 *   . Level 5 nodes exist when both X and Y coordinates are multiple of 32
 *
 *   TODO : the cluster_info[x][y] array is not taken into account [AG].
 ***************************************************************************************/

/****************************************************************************************
 * This structure describes a node of the DQDT.
 * The max number of children is 4, but it can be smaller for some nodes.
 * Level 0 nodes are the clusters, and have no children.
 * The root node has no parent, and is always stored in cluster[0,0].
 ***************************************************************************************/
typedef struct dqdt_node_s
{
	uint32_t            level;               // node level
	uint32_t            arity;               // actual children number in this node
    uint32_t            threads;             // current number of threads in subtree
    uint32_t            pages;               // current number of pages in subtree
	xptr_t              parent;              // extended pointer on parent node
	xptr_t              children[4];         // extended pointers on children nodes
}
dqdt_node_t;


/****************************************************************************************
 * This local function initializes the local DQDT structures.
 * The information describing the hardware platform topology and the cluster
 * indexing policy is defined by the three arguments below.
 * This initialisation is done in parallel, locally in each cluster, because the DQDT
 * is allocated as a global variable in the cluster_manager, and the local addresses
 * are identical in all clusters.
 ****************************************************************************************
 * @ x_size   : number of clusters (containing memory and CPUs) in a row
 * @ y_size   : number of clusters (containing memory and CPUs) in a column
 * @ return the number of levels in quad-tree.
 ***************************************************************************************/
uint32_t dqdt_init( uint32_t x_size,
                    uint32_t y_size );

/****************************************************************************************
 * This local function updates the total number of threads in level 0 DQDT node,
 * and propagates the variation to the DQDT upper levels.
 * It should be called on each thread creation or destruction.
 ****************************************************************************************
 * @ increment : increment (can be positive or negative)
 ***************************************************************************************/
void dqdt_update_threads( int32_t  increment );

/****************************************************************************************
 * This local function updates the total number of pages in level 0 DQDT node,
 * and propagates the variation to the DQDT upper levels.
 * It should be called on each physical memory page allocation or release.
 ****************************************************************************************
 * @ increment : increment (can be positive or negative)
 ***************************************************************************************/
void dqdt_update_pages( int32_t increment );

/****************************************************************************************
 * This function can be called in any cluster. It traverses the DQDT tree
 * from the root to the bottom, to analyse the computing load and select the cluster
 * with the lowest number ot threads to place a new process.
 ****************************************************************************************
 * @ returns the cluster identifier with the lowest computing load.
 ***************************************************************************************/
cxy_t dqdt_get_cluster_for_process( void );

/****************************************************************************************
 * This function can be called in any cluster. It traverses the DQDT tree
 * from the root to the bottom, to analyse the memory load and select the cluster
 * with the lowest memory load for dynamic memory allocation with no locality constraint.
 ****************************************************************************************
 * @ returns the cluster identifier with the lowest memory load.
 ***************************************************************************************/
cxy_t dqdt_get_cluster_for_memory( void );

/****************************************************************************************
 * This function displays on kernel TXT0 the DQDT state for all nodes in the quad-tree.
 * It traverses the quadtree from root to bottom, and can be called by a thread 
 * running in any cluster
 ***************************************************************************************/
void dqdt_display( void );


#endif	/* _DQDT_H_ */
