/*
 * kern/dqdt.h - Distributed Quad Decision Tree
 *
 * Author : Alain Greiner (2016,2017,2018,2019)
 *
 * Copyright (c)  UPMC Sorbonne Universites
 *
 * This file is part of ALMOS-MKH
 *
 * ALMOS-kernel is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2.0 of the License.
 *
 * ALMOS-kernel is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ALMOS-kernel; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef _DQDT_H_
#define _DQDT_H_

#include <kernel_config.h>
#include <hal_kernel_types.h>
#include <hal_atomic.h>

/****************************************************************************************
 * This DQDT infrastructure maintains a topological description of ressources usage
 * in each cluster: number of threads per core, and number of physical pages allocated.
 *
 * It is organized as a quad-tree, where the leaf cells are the clusters, organised
 * as a 2D mesh. Each node in the quad-tree (including the root and the leaf cells,
 * covers a "macro-cluster", that is a square array of clusters where the number
 * in the macro-cluster is a power of 4, and the macro-cluster side is a power of two.
 * Each node contains informations on ressources usage (physical memory and cores)
 * in the covered macro-cluster.
 * This quad-tree can be truncated, if the physical mesh X_SIZE and Y_SIZE dimensions
 * are not equal, or are not power of 2, or if the physical mesh contains "holes".
 * The mesh size is supposed to contain at most 32*32 clusters in this implementation.
 *   . Level 0 nodes exist in all clusters and have no children.
 *   . Level 1 nodes can be placed in any cluster of the covered  2*2  macro-cluster.
 *   . Level 2 nodes can be placed in any cluster of the covered  4*4  macro-cluster.
 *   . Level 3 nodes can be placed in any cluster of the covered  8*8  macro-cluster.
 *   . Level 4 nodes can be placed in any cluster of the covered 16*16 macro-cluster.
 *   . Level 5 nodes can be placed in any cluster of the covered 32*32 macro-cluster.
 * The root node is placed in the cluster containing the core executing the dqdt_init()
 * function. Other (non level 0) nodes are placed pseudo-randomly.
 ***************************************************************************************/

/****************************************************************************************
 * This structure describes a node of the DQDT.
 * The max number of children is 4, but it can be smaller for some nodes.
 * Level 0 nodes have no children. The root node has no parent.
 ***************************************************************************************/

typedef struct dqdt_node_s
{
	uint32_t      level;            /*! node level                                     */
	uint32_t      arity;            /*! actual children number in this node            */
    uint32_t      threads;          /*! number of threads in macro-cluster             */
    uint32_t      pages;            /*! number of allocated pages in macro-cluster     */
    uint32_t      cores;            /*! number of active cores in macro cluster        */
    uint32_t      clusters;         /*! number of active clusters in macro cluster     */ 
	xptr_t        parent;           /*! extended pointer on parent node                */
	xptr_t        children[2][2];   /*! extended pointers on children nodes            */
}
dqdt_node_t;


/****************************************************************************************
 * This function recursively initializes the DQDT structure from informations
 * stored in cluster manager (x_size, y_size and cluster_info[x][y].
 * It is called in all clusters by the local CP0, to compute level_max and register
 * the DQDT root node in each cluster manager, but only CPO in cluster 0 build actually
 * the quad-tree covering all active clusters.
 * This initialisation can use remote_accesses, because the DQDT nodes are
 * allocated as global variables in the cluster_manager, and the local addresses
 * are identical in all clusters.
 ***************************************************************************************/
void dqdt_init( void );

/****************************************************************************************
 * These local function update the total number of threads in level 0 DQDT node,
 * and immediately propagates the variation to the DQDT upper levels.
 * They are called on each thread creation or destruction.
 ***************************************************************************************/
void dqdt_increment_threads( void );

void dqdt_decrement_threads( void );

/****************************************************************************************
 * These two functions can be called by any thread running in any cluster.
 * They increment/decrement the total number of 4 Kbytes pages allocated in a cluster
 * identified by the <cxy> argument, as specified by the <order> argument. The level 0 
 * DQDT node is udated, and this change is immediately propagated to upper levels.
 * They are called by PPM on each physical memory page allocation or release.
 ****************************************************************************************
 * @ cxy     : target cluster identifier.
 * @ order   : ln2( number of 4 Kbytes pages )
 ***************************************************************************************/
void dqdt_increment_pages( cxy_t    cxy , 
                           uint32_t order );

void dqdt_decrement_pages( cxy_t    cxy,
                           uint32_t order );

/****************************************************************************************
 * This function returns an extended pointer on the dqdt node that is the root of
 * the sub-tree covering the macro-cluster defined by the <level> argument and
 * containing the cluster defined by the <cxy> argument. It returns XPTR_NULL if
 * this macro-cluster is undefined (when the cxy cluster contains no core).
 ****************************************************************************************
 * @ cxy   : cluster identifier.
 * @ level   : level of the sub-tree.
 * @ returns  root_xp if success / return XPTR_NULL if no active core in macro_cluster.
 ***************************************************************************************/
xptr_t dqdt_get_root( cxy_t    cxy,
                      uint32_t level );

/****************************************************************************************
 * This function can be called in any cluster. It traverses the DQDT tree from the
 * local root of a macro-cluster, defined by the <root_xp> argument, to the bottom.
 * It analyses the computing load & select the cluster containing the lowest number
 * ot threads.
 ****************************************************************************************
 * @ root_xp  : extended pointer on DQDT node root.
 * @ returns the cluster identifier with the lowest computing load.
 ***************************************************************************************/
cxy_t dqdt_get_cluster_for_thread( xptr_t root_xp );

/****************************************************************************************
 * This function can be called in any cluster. It traverses the DQDT tree from the
 * local root of a macro-cluster, defined by the <root_xp> argument, to the bottom.
 * It analyses the memory load & select the cluster with the lowest number of allocated
 * physical pages.
 ****************************************************************************************
 * @ root_xp  : extended pointer on DQDT node root.
 * @ returns the cluster identifier with the lowest memory load.
 ***************************************************************************************/
cxy_t dqdt_get_cluster_for_memory( xptr_t root_xp );

/****************************************************************************************
 * This function displays on kernel TXT0 the DQDT state for all nodes in the quad-tree.
 * It traverses the quadtree from the global root to bottom.
 * It can be called by a thread running in any cluster
 ***************************************************************************************/
void dqdt_display( void );


#endif	/* _DQDT_H_ */
