source: trunk/kernel/kern/dqdt.h @ 653

Last change on this file since 653 was 637, checked in by alain, 5 years ago

Introduce the non-standard pthread_parallel_create() system call
and re-write the <fft> and <sort> applications to improve the
intrinsic paralelism in applications.

File size: 8.5 KB
Line 
1/*
2 * kern/dqdt.h - Distributed Quad Decision Tree
3 *
4 * Author : Alain Greiner (2016,2017,2018,2019)
5 *
6 * Copyright (c)  UPMC Sorbonne Universites
7 *
8 * This file is part of ALMOS-MKH
9 *
10 * ALMOS-kernel is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2.0 of the License.
13 *
14 * ALMOS-kernel is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with ALMOS-kernel; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#ifndef _DQDT_H_
25#define _DQDT_H_
26
27#include <kernel_config.h>
28#include <hal_kernel_types.h>
29#include <hal_atomic.h>
30
31/****************************************************************************************
32 * This DQDT infrastructure maintains a topological description of ressources usage
33 * in each cluster: number of threads per core, and number of physical pages allocated.
34 *
35 * It is organized as a quad-tree, where the leaf cells are the clusters, organised
36 * as a 2D mesh. Each node in the quad-tree (including the root and the leaf cells,
37 * covers a "macro-cluster", that is a square array of clusters where the number
38 * in the macro-cluster is a power of 4, and the macro-cluster side is a power of two.
39 * Each node contains informations on ressources usage (physical memory and cores)
40 * in the covered macro-cluster.
41 * This quad-tree can be truncated, if the physical mesh X_SIZE and Y_SIZE dimensions
42 * are not equal, or are not power of 2, or if the physical mesh contains "holes".
43 * The mesh size is supposed to contain at most 32*32 clusters in this implementation.
44 *   . Level 0 nodes exist in all clusters and have no children.
45 *   . Level 1 nodes can be placed in any cluster of the covered  2*2  macro-cluster.
46 *   . Level 2 nodes can be placed in any cluster of the covered  4*4  macro-cluster.
47 *   . Level 3 nodes can be placed in any cluster of the covered  8*8  macro-cluster.
48 *   . Level 4 nodes can be placed in any cluster of the covered 16*16 macro-cluster.
49 *   . Level 5 nodes can be placed in any cluster of the covered 32*32 macro-cluster.
50 * The root node is placed in the cluster containing the core executing the dqdt_init()
51 * function. Other (non level 0) nodes are placed pseudo-randomly.
52 ***************************************************************************************/
53
54/****************************************************************************************
55 * This structure describes a node of the DQDT.
56 * The max number of children is 4, but it can be smaller for some nodes.
57 * Level 0 nodes have no children. The root node has no parent.
58 ***************************************************************************************/
59
60typedef struct dqdt_node_s
61{
62        uint32_t      level;            /*! node level                                     */
63        uint32_t      arity;            /*! actual children number in this node            */
64    uint32_t      threads;          /*! number of threads in macro-cluster             */
65    uint32_t      pages;            /*! number of allocated pages in macro-cluster     */
66    uint32_t      cores;            /*! number of active cores in macro cluster        */
67    uint32_t      clusters;         /*! number of active clusters in macro cluster     */ 
68        xptr_t        parent;           /*! extended pointer on parent node                */
69        xptr_t        children[2][2];   /*! extended pointers on children nodes            */
70}
71dqdt_node_t;
72
73
74/****************************************************************************************
75 * This function recursively initializes the DQDT structure from informations
76 * stored in cluster manager (x_size, y_size and cluster_info[x][y].
77 * It is called in all clusters by the local CP0, to compute level_max and register
78 * the DQDT root node in each cluster manager, but only CPO in cluster 0 build actually
79 * the quad-tree covering all active clusters.
80 * This initialisation can use remote_accesses, because the DQDT nodes are
81 * allocated as global variables in the cluster_manager, and the local addresses
82 * are identical in all clusters.
83 ***************************************************************************************/
84void dqdt_init( void );
85
86/****************************************************************************************
87 * These local function update the total number of threads in level 0 DQDT node,
88 * and immediately propagates the variation to the DQDT upper levels.
89 * They are called on each thread creation or destruction.
90 ***************************************************************************************/
91void dqdt_increment_threads( void );
92
93void dqdt_decrement_threads( void );
94
95/****************************************************************************************
96 * These two functions can be called by any thread running in any cluster.
97 * They increment/decrement the total number of 4 Kbytes pages allocated in a cluster
98 * identified by the <cxy> argument, as specified by the <order> argument. The level 0
99 * DQDT node is udated, and this change is immediately propagated to upper levels.
100 * They are called by PPM on each physical memory page allocation or release.
101 ****************************************************************************************
102 * @ cxy     : target cluster identifier.
103 * @ order   : ln2( number of 4 Kbytes pages )
104 ***************************************************************************************/
105void dqdt_increment_pages( cxy_t    cxy , 
106                           uint32_t order );
107
108void dqdt_decrement_pages( cxy_t    cxy,
109                           uint32_t order );
110
111/****************************************************************************************
112 * This function returns an extended pointer on the dqdt node that is the root of
113 * the sub-tree covering the macro-cluster defined by the <level> argument and
114 * containing the cluster defined by the <cxy> argument. It returns XPTR_NULL if
115 * this macro-cluster is undefined (when the cxy cluster contains no core).
116 ****************************************************************************************
117 * @ cxy   : cluster identifier.
118 * @ level   : level of the sub-tree.
119 * @ returns  root_xp if success / return XPTR_NULL if no active core in macro_cluster.
120 ***************************************************************************************/
121xptr_t dqdt_get_root( cxy_t    cxy,
122                      uint32_t level );
123
124/****************************************************************************************
125 * This function can be called in any cluster. It traverses the DQDT tree from the
126 * local root of a macro-cluster, defined by the <root_xp> argument, to the bottom.
127 * It analyses the computing load & select the cluster containing the lowest number
128 * ot threads.
129 ****************************************************************************************
130 * @ root_xp  : extended pointer on DQDT node root.
131 * @ returns the cluster identifier with the lowest computing load.
132 ***************************************************************************************/
133cxy_t dqdt_get_cluster_for_thread( xptr_t root_xp );
134
135/****************************************************************************************
136 * This function can be called in any cluster. It traverses the DQDT tree from the
137 * local root of a macro-cluster, defined by the <root_xp> argument, to the bottom.
138 * It analyses the memory load & select the cluster with the lowest number of allocated
139 * physical pages.
140 ****************************************************************************************
141 * @ root_xp  : extended pointer on DQDT node root.
142 * @ returns the cluster identifier with the lowest memory load.
143 ***************************************************************************************/
144cxy_t dqdt_get_cluster_for_memory( xptr_t root_xp );
145
146/****************************************************************************************
147 * This function displays on kernel TXT0 the DQDT state for all nodes in the quad-tree.
148 * It traverses the quadtree from the global root to bottom.
149 * It can be called by a thread running in any cluster
150 ***************************************************************************************/
151void dqdt_display( void );
152
153
154#endif  /* _DQDT_H_ */
Note: See TracBrowser for help on using the repository browser.