source: trunk/libs/libalmosmkh/almosmkh.h @ 640

Last change on this file since 640 was 640, checked in by alain, 5 years ago

Remove all RPCs in page-fault handling.

File size: 29.0 KB
Line 
1/*
2 * almosmkh.h - User level ALMOS-MKH specific library definition.
3 *
4 * Author     Alain Greiner (2016,2017,2018,2019)
5 *
6 * Copyright (c) UPMC Sorbonne Universites
7 *
8 * This file is part of ALMOS-MKH.
9 *
10 * ALMOS-MKH is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2.0 of the License.
13 *
14 * ALMOS-MKH is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#ifndef _LIBALMOSMKH_H_
25#define _LIBALMOSMKH_H_
26
27/***************************************************************************************
28 * This file defines an user level, ALMOS-MKH specific library, containing:
29 * - non standard system calls.
30 * - debug functions.
31 * - remote malloc extensions.
32 **************************************************************************************/
33
34#include <pthread.h>
35#include <shared_almos.h>
36
37/****************** Non standard (ALMOS_MKH specific) system calls ********************/
38
39
40/***************************************************************************************
41 * This syscall gives the process identified by the <pid> argument the exclusive
42 * ownership of its TXT terminal.
43 ***************************************************************************************
44 * @ pid        : process identifier.
45 * @ returns O if success / returns -1 if process not found.
46 **************************************************************************************/
47int fg( unsigned int pid );
48
49/***************************************************************************************
50 * This syscall stores in the buffer identified by the <owner> argument a non zero
51 * value when the process identified by the <pid> argument is currently the exclusive
52 * owner of its TXT terminal.
53 ***************************************************************************************
54 * @ pid        : [in]  process identifier.
55 * @ owner      : [out] pointer on buffer to store the
56 * @ returns O if success / returns -1 if process not found.
57 **************************************************************************************/
58int is_fg( unsigned int pid,
59           unsigned int * owner );
60
61/***************************************************************************************
62 * This syscall returns the hardware platform parameters.
63 ***************************************************************************************
64 * @ x_size   : [out] number of clusters in a row.
65 * @ y_size   : [out] number of clusters in a column.
66 * @ ncores   : [out] number of cores per cluster.
67 * @ return always 0.
68 **************************************************************************************/
69int get_config( unsigned int * x_size,
70                unsigned int * y_size,
71                unsigned int * ncores );
72
73/***************************************************************************************
74 * This syscall returns the cluster identifier and the local index
75 * for the calling core.
76 ***************************************************************************************
77 * @ cxy      : [out] cluster identifier.
78 * @ lid      : [out] core local index in cluster.
79 * @ return always 0.
80 **************************************************************************************/
81int get_core_id( unsigned int * cxy,
82                 unsigned int * lid );
83
84/***************************************************************************************
85 * This syscall returns the number of cores in a given cluster.
86 ***************************************************************************************
87 * @ cxy      : [in]  target cluster identifier.
88 * @ ncores   : [out] number of cores in target cluster.
89 * @ return always 0.
90 **************************************************************************************/
91int get_nb_cores( unsigned int   cxy,
92                  unsigned int * ncores );
93
94/***************************************************************************************
95 * This syscall uses the DQDT to search, in a macro-cluster specified by the
96 * <cxy_base> and <level> arguments arguments, the core with the lowest load.
97 * it writes in the <cxy> and <lid> buffers the selected core cluster identifier
98 * and the local core index.
99 ***************************************************************************************
100 * @ cxy_base : [in]  any cluster identifier in macro-cluster.in clusters array.
101 * @ level    : [in]  macro-cluster level in [1,2,3,4,5].
102 * @ cxy      : [out] selected core cluster identifier.
103 * @ lid      : [out] selectod core local index.
104 * @ return 0 if success / 1 if no core in macro-cluster / -1 if illegal arguments.
105 **************************************************************************************/
106int get_best_core( unsigned int   cxy_base,
107                   unsigned int   level,
108                   unsigned int * cxy,
109                   unsigned int * lid );
110
111/***************************************************************************************
112 * This function returns the value contained in the calling core cycles counter,
113 * taking into account a possible overflow on 32 bits architectures.
114 ***************************************************************************************
115 * @ cycle    : [out] current cycle value.
116 * @ return always 0.
117 **************************************************************************************/
118int get_cycle( unsigned long long * cycle );
119
120/***************************************************************************************
121 * This syscall allows the calling thread to specify the target cluster for
122 * a subsequent fork(). It must be called for each fork().
123 ***************************************************************************************
124 * @ cxy      : [in] target cluster identifier.
125 * @ return 0 if success / returns -1 if illegal cxy argument.
126 **************************************************************************************/
127int place_fork( unsigned int cxy );
128
129/***************************************************************************************
130 * This syscall implements the operations related to User Thread Local Storage.
131 ***************************************************************************************
132 * @ operation  : UTLS operation type as defined in "shared_sycalls.h" file.
133 * @ value      : argument value for the UTLS_SET operation.
134 * @ return value for the UTLS_GET and UTLS_GET_ERRNO / return -1 if failure.
135 **************************************************************************************/
136int utls( unsigned int operation,
137          unsigned int value );
138
139/***************************************************************************************
140 * This syscall returns an unsigned 32 bits integer from the standard "stdin" stream.
141 * Both decimal numbers and hexadecimal numbers (prefixed by 0x) are supported.
142 ***************************************************************************************
143 * returns the integer value if success / returns -1 if failure.
144 **************************************************************************************/
145unsigned int get_uint32( void );
146
147
148/***************** Non standard (ALMOS-MKH specific) debug functions ******************/
149
150
151/***************************************************************************************
152 * This debug syscall displays on the kernel terminal TXT0
153 * the thread / process / core identifiers, the current cycle, plus a user defined
154 * message as specified by the <string> argument.
155 ***************************************************************************************
156 * @ string    : [in] user defined message.
157 **************************************************************************************/
158void display_string( char * string );
159
160/***************************************************************************************
161 * This debug function displays on the kernel terminal TXT0
162 * the state of the  VMM for the process <pid> in cluster <cxy>.
163 * It can be called by any thread running in any cluster.
164 ***************************************************************************************
165 * @ cxy      : [in] target cluster identifier.
166 * @ pid      : [in] process identifier.
167 * @ mapping  : [in] detailed mapping if non zero.
168 * @ return 0 if success / return -1 if illegal argument.
169 **************************************************************************************/
170int display_vmm( unsigned int cxy,
171                 unsigned int pid,
172                 unsigned int mapping );
173
174/***************************************************************************************
175 * This debug syscall displays on the kernel terminal TXT0
176 * the state of the core scheduler identified by the <cxy> and <lid> arguments.
177 * It can be called by any thread running in any cluster.
178 ***************************************************************************************
179 * @ cxy      : [in] target cluster identifier.
180 * @ lid      : [in] target core local index.
181 * @ return 0 if success / return -1 if illegal arguments.
182 **************************************************************************************/
183int display_sched( unsigned int  cxy,
184                   unsigned int  lid );
185
186/***************************************************************************************
187 * This debug syscall displays on the kernel terminal TXT0
188 * the list of process registered in a given cluster identified by the <cxy> argument.
189 * Only the owned processes are displayed when the <owned> argument is non zero.
190 * It can be called by any thread running in any cluster.
191 ***************************************************************************************
192 * @ cxy      : [in] target cluster identifier.
193 * @ owned    : [in] only owned processes if non zero.
194 * @ return 0 if success / return -1 if illegal argument.
195 **************************************************************************************/
196int display_cluster_processes( unsigned int  cxy,
197                               unsigned int  owned );
198
199/***************************************************************************************
200 * This debug syscall displays on the kernel terminal TXT0
201 * the list of processes attached to a given TXT channel.
202 * It can be called by any thread running in any cluster.
203 ***************************************************************************************
204 * @ txt_id   : [in] TXT terminal indes.
205 * @ return 0 if success / return -1 if illegal argument.
206 **************************************************************************************/
207int display_txt_processes( unsigned int txt_id );
208
209/***************************************************************************************
210 * This debug syscall displays on the kernel terminal TXT0
211 * the set of busylocks hold by a thread identified by the <pid> and <trdid> arguments.
212 * It can be called by any thread running in any cluster.
213 ***************************************************************************************
214 * @ pid      : [in] process identifier.
215 * @ trdid    : [in] thread identifier.
216 * @ return 0 if success / return -1 if illegal arguments.
217 **************************************************************************************/
218int display_busylocks( unsigned int pid,
219                       unsigned int trdid );
220
221/***************************************************************************************
222 * This debug syscall displays on the kernel terminal TXT0
223 * the list of channel devices available in the architecture.
224 * It can be called by any thread running in any cluster.
225 ***************************************************************************************
226 * @ return always 0.
227 **************************************************************************************/
228int display_chdev( void );
229
230/***************************************************************************************
231 * This debug syscall displays on the kernel terminal TXT0
232 * the list of channel device or pseudo-files registered in the VFS cache.
233 * It can be called by any thread running in any cluster.
234 ***************************************************************************************
235 * @ return always 0.
236 **************************************************************************************/
237int display_vfs( void );
238
239/***************************************************************************************
240 * This debug syscall displays on the kernel terminal TXT0 the current DQDT state.
241 * It can be called by any thread running in any cluster.
242 ***************************************************************************************
243 * @ return always 0.
244 **************************************************************************************/
245int display_dqdt( void );
246
247/***************************************************************************************
248 * This debug syscall displays on the kernel terminal TXT0 the content of a given
249 * page of a given VFS mapper.
250 * It can be called by any thread running in any cluster.
251 ***************************************************************************************
252 * @ path      : pathname identifying the file/directory in VFS.
253 * @ page_id   : page index in file.
254 * @ nbytes    : number of bytes to display.
255 * @ return 0 if success / return -1 if file or page not found.
256 **************************************************************************************/
257int display_mapper( char        * path,
258                    unsigned int  page_id,
259                    unsigned int  nbytes);
260
261/***************************************************************************************
262 * This debug syscall displays on the kernel terminal TXT0
263 * the state of the barrier used by the process identified by the <pid> argument.
264 * It can be called by any thread running in any cluster.
265 ***************************************************************************************
266 * @ pid      : [in] process identifier.
267 * @ return 0 if success / return -1 if illegal arguments.
268 **************************************************************************************/
269int display_barrier( unsigned int pid );
270
271/***************************************************************************************
272 * This debug syscall displays on the kernel terminal TXT0 the content of one given
273 * page of the FAT mapper.
274 * It can be called by any thread running in any cluster.
275 ***************************************************************************************
276 * @ page_id    : page index in file.
277 * @ nb_entries : number of bytes to display.
278 * @ return 0 if success / return -1 if page not found.
279 **************************************************************************************/
280int display_fat( unsigned int page_id,
281                 unsigned int nb_entries );
282
283/*****************************************************************************************
284* This debug syscall is used to activate / desactivate the context switches trace
285* for a core identified by the <cxy> and <lid> arguments.
286* It can be called by any thread running in any cluster.
287*****************************************************************************************
288* @ active     : activate trace if non zero / desactivate if zero.
289* @ cxy        : cluster identifier.
290* @ lid        : core local index.
291* @ returns O if success / returns -1 if illegal arguments.
292****************************************************************************************/
293int trace( unsigned int active,
294           unsigned int cxy, 
295           unsigned int lid );
296
297/****************************************************************************************
298 * This syscall implements an user-level interactive debugger that can be
299 * introduced in any user application to display various kernel distributed structures.
300 ***************************************************************************************/
301void idbg( void );
302
303
304/****************** Non standard (ALMOS-MKH specific) malloc operations  ***************/
305
306/////////////////////////////////////////////////////////////////////////////////////////
307// General principles:
308// - In user space the HEAP zone spread between the ELF zone and the STACK zone,
309//   as defined in the kernel_config.h file.
310// - The malloc library uses the mmap() syscall to create - on demand -
311//   one vseg in a given cluster. The size of this vseg is defined below
312//   by the MALLOC_LOCAL_STORE_SIZE parameter.
313// - For a standard malloc(), the target cluster is the cluster containing
314//   the core running the client thread.
315// - For a remote_malloc(), the target cluster is explicitely defined
316//   by the argument.
317// - In each cluster, the available storage in virtual space is handled by a
318//   local allocator using the buddy algorithm.
319//
320// TODO : In this first implementation one single - fixed size - vseg
321//        is allocated on demand in each cluster.
322//        We should introduce the possibility to dynamically allocate
323//        several vsegs in each cluster, using several mmap when required.
324/////////////////////////////////////////////////////////////////////////////////////////
325// Free blocks organisation in each cluster :
326// - All free blocks have a size that is a power of 2, larger or equal
327//   to MALLOC_MIN_BLOCK_SIZE (typically 64 bytes).
328// - All free blocks are aligned.
329// - They are pre-classed in an array of linked lists, where all blocks in a
330//   given list have the same size.
331// - The NEXT pointer implementing those linked lists is written
332//   in the first bytes of the block itself, using the unsigned int type.
333// - The pointers on the first free block for each size are stored in an
334//   array of pointers free[32] in the storage(x,y) descriptor.
335/////////////////////////////////////////////////////////////////////////////////////////
336// Allocation policy:
337// - The block size required by the user can be any value, but the allocated
338//   block size can be larger than the requested size:
339// - The allocator computes actual_size, that is the smallest power of 2
340//   value larger or equal to the requested size AND larger or equal to
341//   MALLOC_MIN_BLOCK_SIZE.
342// - It pop the linked list of free blocks corresponding to actual_size,
343//   and returns the block B if the list[actual_size] is not empty.
344// - If the list[actual_size] is empty, it pop the list[actual_size * 2].
345//   If a block B' is found, it breaks this block in 2 B/2 blocks, returns
346//   the first B/2 block and push the other B/2 block into list[actual_size].
347// - If the list[actual_size * 2] is empty, it pop the list[actual_size * 4].
348//   If a block B is found, it break this block in 3 blocks B/4, B/4 and B/2,
349//   returns the first B/4 block, push the other blocks B/4 and B/2 into
350//   the proper lists. etc...
351// - If no block satisfying the request is available it returns a failure
352//   (NULL pointer).
353// - This allocation policy has the nice following property:
354//   If the vseg is aligned (the vseg base is a multiple of the
355//   vseg size), all allocated blocks are aligned on the actual_size.
356/////////////////////////////////////////////////////////////////////////////////////////
357// Free policy:
358// - Each allocated block is registered in an alloc[] array of unsigned char.
359// - This registration is required by the free() operation, because the size
360//   of the allocated block must be obtained from the base address of the block. 
361// - The number of entries in this array is equal to the max number
362//   of allocated block : MALLOC_LOCAL_STORE_SIZE / MALLOC_MIN_BLOCK_SIZE.
363// - For each allocated block, the value registered in the alloc[] array
364//   is log2( size_of_allocated_block ).
365// - The index in this array is computed from the allocated block base address:
366//      index = (block_base - vseg_base) / MALLOC_MIN_BLOCK_SIZE
367// - The alloc[] array is stored at the end of heap segment. This consume
368//   (1 / MALLOC_MIN_BLOCK_SIZE) of the total storage capacity.
369/////////////////////////////////////////////////////////////////////////////////////////
370
371
372#define MALLOC_INITIALIZED         0xBABEF00D   // magic number when initialised
373#define MALLOC_MIN_BLOCK_SIZE      0x40         // 64 bytes
374#define MALLOC_LOCAL_STORE_SIZE    0x800000     // 8 Mbytes     
375#define MALLOC_MAX_CLUSTERS        0x100        // 256 clusters
376
377/////////////////////////////////////////////////////////////////////////////////////////
378//               store(x,y) descriptor (one per cluster)
379/////////////////////////////////////////////////////////////////////////////////////////
380
381typedef struct malloc_store_s
382{
383    pthread_mutex_t mutex;           // lock protecting exclusive access to local heap
384    unsigned int    initialized;     // initialised when value == MALLOC_INITIALIZED
385    unsigned int    cxy;             // cluster identifier 
386    unsigned int    store_base;      // store base address
387    unsigned int    store_size;      // store size (bytes)
388    unsigned int    alloc_base;      // alloc[] array base address
389    unsigned int    alloc_size;      // alloc[] array size (bytes)
390    unsigned int    free[32];        // array of addresses of first free block
391} 
392malloc_store_t;
393
394/*****************************************************************************************
395 * This function allocates <size> bytes of memory in user space, and returns a pointer
396 * to the allocated buffer. The pysical memory is allocated from store located in
397 * cluster identified by the <cxy> argument.
398 *****************************************************************************************
399 * @ size    : number of requested bytes.
400 * @ cxy     : target cluster identifier.
401 * @ returns a pointer on the allocated buffer if success / returns NULL if failure
402 ****************************************************************************************/
403void * remote_malloc( unsigned int size, 
404                      unsigned int cxy );
405
406/*****************************************************************************************
407 * This function releases the memory buffer identified by the <ptr> argument,
408 * to the store identified by the <cxy> argument.
409 * It displays an error message, but does nothing if the ptr is illegal.
410 *****************************************************************************************
411 * @ ptr   : pointer on the released buffer.
412 * @ cxy   : target cluster identifier.
413 ****************************************************************************************/
414void remote_free( void        * ptr,
415                  unsigned int  cxy );
416
417/*****************************************************************************************
418 * This function releases the memory buffer identified by the <ptr> argument,
419 * to the store located in cluster identified by the <cxy> argument, and allocates
420 * a new buffer containing <size> bytes from this store.
421 * The content of the old buffer is copied to the new buffer, up to <size> bytes.
422 * It displays an error message, but does nothing if the ptr is illegal.
423 *****************************************************************************************
424 * @ ptr     : pointer on the released buffer.
425 * @ size    : new buffer requested size (bytes).
426 * @ cxy     : target cluster identifier.
427 * @ return a pointer on allocated buffer if success / return NULL if failure
428 ****************************************************************************************/
429void * remote_realloc( void        * ptr,
430                       unsigned int  size,
431                       unsigned int  cxy );
432
433/*****************************************************************************************
434 * This function allocates enough space for <count> objects that are <size> bytes
435 * of memory each from the store located in cluster identied by the <cxy> argument.
436 * The allocated memory is filled with bytes of value zero.
437 *****************************************************************************************
438 * @ count   : number of requested objects.
439 * @ size    : number of bytes per object.
440 * @ cxy     : target cluster identifier.
441 * @ returns a pointer on allocated buffer if success / returns NULL if failure
442 ****************************************************************************************/
443void * remote_calloc( unsigned int count,
444                      unsigned int size,
445                      unsigned int cxy );
446
447/********* Non standard (ALMOS-MKH specific) pthread_parallel_create() syscall  *********/
448
449//////////////////////////////////////////////////////////////////////////////////////////
450// This system call can be used to parallelize the creation and the termination
451// of a parallel multi-threaded application. It removes the loop in the main thread that
452// creates the N working threads (N  sequencial pthread_create() ). It also removes the
453// loop that waits completion of these N working threads (N sequencial pthread_join() ).
454// It creates one "work" thread (in detached mode) per core in the target architecture.
455// Each "work" thread is identified by the [cxy][lid] indexes (cluster / local core).
456// The pthread_parallel_create() function returns only when all "work" threads completed
457// (successfully or not).
458//
459// To use this system call, the application code must define the following structures:
460// - To define the arguments to pass to the <work> function the application must allocate
461//   and initialize a first 2D array, indexed by [cxy] and [lid] indexes, where each slot
462//   contains an application specific structure, and another 2D array, indexed by the same
463//   indexes, containing pointers on these structures. This array of pointers is one
464//   argument of the pthread_parallel_create() function.
465// - To detect the completion of the <work> threads, the application must allocate a 1D
466//   array, indexed by the cluster index [cxy], where each slot contains a pthread_barrier
467//   descriptor. This barrier is initialised by the pthread_parallel_create() function,
468//   in all cluster containing at least one work thread. This array of barriers is another
469//   argument of the pthread_parallel_create() function.
470//
471// Implementation note:
472// To parallelize the "work" threads creation and termination, the pthread_parallel_create()
473// function creates a distributed quad-tree (DQT) of "build" threads covering all cores
474// required to execute the parallel application.
475// Depending on the hardware topology, this DQT can be truncated, (i.e. some
476// parent nodes can have less than 4 chidren), if (x_size != y_size), or if one size
477// is not a power of 2. Each "build" thread is identified by two indexes [cxy][level].
478// Each "build" thread makes the following tasks:
479// 1) It calls the pthread_create() function to create up to 4 children threads, that
480//    are are "work" threads when (level == 0), or "build" threads, when (level > 0).
481// 2) It initializes the barrier (global variable), used to block/unblock
482//    the parent thread until children completion.
483// 3) It calls the pthread_barrier_wait( self ) to wait until all children threads
484//    completed (successfully or not).
485// 4) It calls the pthread_barrier_wait( parent ) to unblock the parent thread.
486//////////////////////////////////////////////////////////////////////////////////////////
487
488/*****************************************************************************************
489 * This blocking function creates N working threads that execute the code defined
490 * by the <work_func> and <work_args> arguments.
491 * The number N of created threads is entirely defined by the <root_level> argument.
492 * This value defines an abstract quad-tree, with a square base : level in [0,1,2,3,4],
493 * side in [1,2,4,8,16], nclusters in [1,4,16,64,256]. This base is called  macro_cluster.
494 * A working thread is created on all cores contained in the specified macro-cluster.
495 * The actual number of physical clusters containing cores can be smaller than the number
496 * of clusters covered by the quad tree. The actual number of cores in a cluster can be
497 * less than the max value.
498 *
499 * In the current implementation, all threads execute the same <work_func> function,
500 * on different arguments, that are specified as a 2D array of pointers <work_args>.
501 * This can be modified in a future version, where the <work_func> argument can become
502 * a 2D array of pointers, to have one specific function for each thread.
503 *****************************************************************************************
504 * @ root_level            : [in]  DQT root level in [0,1,2,3,4].
505 * @ work_func             : [in]  pointer on start function.
506 * @ work_args_array       : [in]  pointer on a 2D array of pointers.
507 * @ parent_barriers_array : [in]  pointer on a 1D array of barriers.
508 * @ return 0 if success / return -1 if failure.
509 ****************************************************************************************/
510int pthread_parallel_create( unsigned int   root_level,
511                             void         * work_func,
512                             void         * work_args_array,
513                             void         * parent_barriers_array );
514
515#endif /* _LIBALMOSMKH_H_ */
516
Note: See TracBrowser for help on using the repository browser.