Changeset 637 for trunk/libs/libalmosmkh/almosmkh.c
- Timestamp:
- Jul 18, 2019, 2:06:55 PM (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/libs/libalmosmkh/almosmkh.c
r626 r637 2 2 * almosmkh.c - User level ALMOS-MKH specific library implementation. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 24 24 #include <almosmkh.h> 25 25 #include <hal_user.h> 26 #include <hal_macros.h> 26 27 #include <hal_shared_types.h> 27 28 #include <syscalls_numbers.h> … … 32 33 #include <mman.h> 33 34 34 #define MALLOC_DEBUG 0 35 #define DEBUG_REMOTE_MALLOC 0 36 #define DEBUG_PTHREAD_PARALLEL 1 35 37 36 ///////////// Non standard system calls ///////////////////////////////// 38 ////////////////////////////////////////////////////////////////////////////////////// 39 ///////////// Non standard system calls /////////////////////////////////////// 40 ////////////////////////////////////////////////////////////////////////////////////// 37 41 38 42 ////////////////////////// … … 63 67 } 64 68 65 ///////////////////////////////// 66 int get_core ( unsigned int * cxy,67 unsigned int * lid )68 { 69 return hal_user_syscall( SYS_GET_CORE ,69 //////////////////////////////////// 70 int get_core_id( unsigned int * cxy, 71 unsigned int * lid ) 72 { 73 return hal_user_syscall( SYS_GET_CORE_ID, 70 74 (reg_t)cxy, 71 75 (reg_t)lid, 0, 0 ); 76 } 77 78 ///////////////////////////////////// 79 int get_nb_cores( unsigned int cxy, 80 unsigned int * ncores ) 81 { 82 return hal_user_syscall( SYS_GET_NB_CORES, 83 (reg_t)cxy, 84 (reg_t)ncores, 0, 0 ); 85 } 86 87 /////////////////////////////////////////// 88 int get_best_core( unsigned int base_cxy, 89 unsigned int level, 90 unsigned int * cxy, 91 unsigned int * lid ) 92 { 93 return hal_user_syscall( SYS_GET_BEST_CORE, 94 (reg_t)base_cxy, 95 (reg_t)level, 96 (reg_t)cxy, 97 (reg_t)lid ); 72 98 } 73 99 … … 250 276 } // end get_string() 251 277 252 253 /////////////// non standard debug functions ////////////////////////// 278 ////////////////////////////////////////////////////////////////////////////////////// 279 /////////////// non standard debug functions /////////////////////////////////// 280 ////////////////////////////////////////////////////////////////////////////////////// 254 281 255 282 //////////////////////////////////// … … 496 523 497 524 498 /////////////// non standard malloc functions ////////////////////////// 525 ///////////////////////////////////////////////////////////////////////////////////////// 526 /////////////// non standard remote_malloc //////////////////////////////////////// 527 ///////////////////////////////////////////////////////////////////////////////////////// 499 528 500 529 ///////////////////////////////////////////////////////////////////////////////////////// 501 530 // Global variable defining the allocator array (one per cluster) 502 531 // This array (about 16 Kbytes ) will be stored in the data segment 503 // of any application linked with this malloclibray.532 // of any application linked with this libray. 504 533 ///////////////////////////////////////////////////////////////////////////////////////// 505 534 … … 546 575 //////////////////////////////////////////////////////////////////////////////////////////// 547 576 548 #if MALLOC_DEBUG577 #if DEBUG_REMOTE_MALLOC 549 578 static void display_free_array( unsigned int cxy ) 550 579 { … … 594 623 unsigned int iter; // iterator 595 624 596 #if MALLOC_DEBUG 597 printf("\n[MALLOC] %s : enter for store[%x] / size = %x\n", 598 __FUNCTION__, cxy, store_size ); 625 #if DEBUG_REMOTE_MALLOC 626 unsigned int core_cxy; 627 unsigned int core_lid; 628 get_core_id( &core_cxy , &core_lid ); 629 printf("\n[%s] core[%x,%d] enter for store[%x] / size = %x\n", 630 __FUNCTION__, core_cxy, core_lid, cxy, store_size ); 599 631 #endif 600 632 … … 635 667 } 636 668 637 #if MALLOC_DEBUG638 printf("\n[ MALLOC] %s : mmap done for store[%x] / base = %x\n",639 __FUNCTION__, c xy, store_base);669 #if DEBUG_REMOTE_MALLOC 670 printf("\n[%s] core[%x,%d] created vseg %x for store[%x]\n", 671 __FUNCTION__, core_cxy, core_lid, store_base, cxy ); 640 672 #endif 641 673 … … 656 688 } 657 689 658 // DEPRECATED: we don't reset the alloc_base array659 // because we don't want to allocate the physical memory660 // when the heap is created [AG]661 // memset( (void *)alloc_base , 0 , alloc_size );662 663 690 // split the store into various sizes blocks, 664 691 // initializes the free[] array and NEXT pointers … … 690 717 691 718 692 #if MALLOC_DEBUG 693 printf("\n[MALLOC] %s : completes store[%x] initialisation\n", 694 __FUNCTION__, cxy ); 695 719 #if DEBUG_REMOTE_MALLOC 720 printf("\n[%s] core[%x,%d] completed store[%x] initialisation\n", 721 __FUNCTION__, core_cxy, core_lid, cxy ); 722 #endif 723 724 #if (DEBUG_REMOTE_MALLOC & 1) 696 725 display_free_array( cxy ); 697 726 #endif … … 762 791 int error; 763 792 764 #if MALLOC_DEBUG 765 printf("\n[MALLOC] %s : enter for size = %x / cxy = %x\n", 766 __FUNCTION__ , size , cxy ); 793 #if DEBUG_REMOTE_MALLOC 794 unsigned int core_cxy; 795 unsigned int core_lid; 796 get_core_id( &core_cxy , &core_lid ); 797 printf("\n[%s] core[%x,%d] enter for size = %x / target_cxy = %x\n", 798 __FUNCTION__ , core_cxy, core_lid, size , cxy ); 767 799 #endif 768 800 … … 828 860 unsigned char * ptr = (unsigned char*)(store[cxy].alloc_base + offset); 829 861 830 // DEPRECATED : we cannot check the alloc[] array,831 // because it has not been initialised by store_init,832 // to avoid physical memory allocation at heap creation [AG]833 // if ( *ptr != 0 )834 // {835 // pthread_mutex_unlock( &store[cxy].mutex );836 // printf("\n[PANIC] in %s : allocate an already allocated block...\n",837 // __FUNCTION__ );838 // return NULL;839 // }840 841 862 // update alloc_array 842 863 *ptr = requested_index; … … 845 866 pthread_mutex_unlock( &store[cxy].mutex ); 846 867 847 #if MALLOC_DEBUG848 printf("\n[ MALLOC] %s :exit / base = %x / size = %x / from store[%x]\n",849 __FUNCTION__, base , size , cxy );868 #if DEBUG_REMOTE_MALLOC 869 printf("\n[%s] core[%x,%d] exit / base = %x / size = %x / from store[%x]\n", 870 __FUNCTION__, core_cxy, core_lid, base , size , cxy ); 850 871 #endif 851 872 … … 853 874 854 875 } // end remote_malloc() 855 856 857 876 858 877 ////////////////////////////////////////// … … 920 939 921 940 return new_ptr; 922 } 941 942 } // end remote_realloc() 943 923 944 924 945 ////////////////////////////////////////////////////// … … 991 1012 { 992 1013 993 #if MALLOC_DEBUG1014 #if DEBUG_REMOTE_MALLOC 994 1015 printf("\n[MALLOC] %s : enter for block = %x / cxy = %x\n", 995 1016 __FUNCTION__, ptr, cxy ); … … 1052 1073 pthread_mutex_unlock( &store[cxy].mutex ); 1053 1074 1054 #if MALLOC_DEBUG1075 #if DEBUG_REMOTE_MALLOC 1055 1076 printf("\n[MALLOC] %s : conmpletes for block = %x / cxy = %x\n", 1056 1077 __FUNCTION__, ptr, cxy ); … … 1058 1079 1059 1080 } // end remote_free() 1081 1082 ///////////////////////////////////////////////////////////////////////////////////////// 1083 /////////////// non standard pthread_parallel_create ////////////////////////////// 1084 ///////////////////////////////////////////////////////////////////////////////////////// 1085 1086 #define X_MAX 16 // max number of clusters in a row 1087 #define Y_MAX 16 // max number of clusters in a column 1088 #define CLUSTERS_MAX X_MAX * Y_MAX 1089 #define LEVEL_MAX 5 1090 #define CORES_MAX 4 // max number of cores per cluster 1091 1092 typedef struct build_args_s 1093 { 1094 unsigned char cxy; // this thread cluster identifier 1095 unsigned char level; // this thread level in quad-tree 1096 unsigned char parent_cxy; // parent thread cluster identifier 1097 unsigned char root_level; // quad-tree root level 1098 void * work_func; // pointer on work function pointer 1099 void * work_args_array; // pointer on 2D array of pointers 1100 pthread_barrier_t * parent_barriers_array; // pointer on 1D array of barriers 1101 unsigned int error; // return value : 0 if success 1102 } 1103 build_args_t; 1104 1105 ///////////////////////////////////////////////////////////////////////////////////////// 1106 // Global variables used for inter-thread communications 1107 ///////////////////////////////////////////////////////////////////////////////////////// 1108 1109 pthread_attr_t build_attr [CLUSTERS_MAX][LEVEL_MAX]; // POSIX thread attributes 1110 1111 build_args_t build_args [CLUSTERS_MAX][LEVEL_MAX]; // build function arguments 1112 1113 pthread_barrier_t build_barrier[CLUSTERS_MAX][LEVEL_MAX]; // parent/child synchro 1114 1115 pthread_attr_t work_attr [CLUSTERS_MAX][CORES_MAX]; // POSIX thread attributes 1116 1117 ////////////////////////////////////////////////////////// 1118 static void pthread_recursive_build( build_args_t * args ) 1119 { 1120 unsigned int trdid; // unused (required by pthread_create() 1121 1122 // get arguments 1123 unsigned int cxy = args->cxy; 1124 unsigned int level = args->level; 1125 unsigned int parent_cxy = args->parent_cxy; 1126 unsigned int root_level = args->root_level; 1127 void * work_func = args->work_func; 1128 void * work_args_array = args->work_args_array; 1129 pthread_barrier_t * parent_barriers_array = args->parent_barriers_array; 1130 1131 // set error default value 1132 build_args[cxy][level].error = 0; 1133 1134 /////////////////////////////////////////////////////////// 1135 if( level == 0 ) // children are "work" threads 1136 { 1137 unsigned int lid; // core local index 1138 unsigned int ncores; // number of cores in a cluster 1139 1140 // get number of cores per cluster 1141 get_nb_cores( cxy , &ncores ); 1142 1143 // kill process if no active core in cluster 1144 // TODO this "if" should be replaced by an "assert" [AG] 1145 if( ncores == 0 ) 1146 { 1147 printf("\n[PANIC] in %s : no active core in cluster %x\n", 1148 __FUNCTION__ , cxy ); 1149 1150 // report error to parent 1151 build_args[parent_cxy][level+1].error = 1; 1152 1153 // kill process 1154 exit( EXIT_FAILURE ); 1155 } 1156 1157 // initialize the parent_barrier 1158 if( pthread_barrier_init( &parent_barriers_array[cxy] , NULL , ncores + 1 ) ) 1159 { 1160 printf("\n[ERROR] in %s : cannot initialise barrier for build thread[%x][%d]\n", 1161 __FUNCTION__ , cxy , level ); 1162 1163 // report error to parent 1164 build_args[parent_cxy][level+1].error = 1; 1165 } 1166 1167 #if DEBUG_PTHREAD_PARALLEL 1168 printf("\n[%s] <build> thread[%x][%d] created barrier / %d children\n", 1169 __FUNCTION__, cxy, level, ncores + 1 ); 1170 #endif 1171 // create (ncores) "work" threads 1172 for ( lid = 0 ; lid < ncores ; lid++ ) 1173 { 1174 // set attributes for thread[cxy][lid] 1175 work_attr[cxy][lid].attributes = PT_ATTR_DETACH | 1176 PT_ATTR_CLUSTER_DEFINED | 1177 PT_ATTR_CORE_DEFINED; 1178 work_attr[cxy][lid].cxy = cxy; 1179 work_attr[cxy][lid].lid = lid; 1180 1181 // compute pointer on thread[cxy][lid] arguments 1182 void * work_args = *((void **)work_args_array + (cxy * CORES_MAX) + lid); 1183 1184 // create thread 1185 if ( pthread_create( &trdid, // unused 1186 &work_attr[cxy][lid], 1187 work_func, 1188 work_args ) ) 1189 { 1190 printf("\n[ERROR] in %s : cannot create work thread[%x,%x]\n", 1191 __FUNCTION__ , cxy , lid ); 1192 1193 // report error to parent 1194 build_args[parent_cxy][level+1].error = 1; 1195 } 1196 1197 #if DEBUG_PTHREAD_PARALLEL 1198 printf("\n[%s] <build> thread[%x][%d] created <work> thread[%x][%d]\n", 1199 __FUNCTION__, cxy, level, cxy, lid ); 1200 #endif 1201 } 1202 1203 // wait on barrier until "work" children threads completed 1204 if( pthread_barrier_wait( &parent_barriers_array[cxy] ) ) 1205 { 1206 printf("\n[ERROR] in %s / first barrier for <build> thread[%x][%d]\n", 1207 __FUNCTION__ , cxy , level ); 1208 1209 // report error to parent 1210 build_args[parent_cxy][level+1].error = 1; 1211 } 1212 1213 #if DEBUG_PTHREAD_PARALLEL 1214 printf("\n[%s] <build> thread[%x][%d] resume after children completion\n", 1215 __FUNCTION__, cxy, level ); 1216 #endif 1217 1218 } // end level == 0 1219 1220 //////////////////////////////////////////////////////////// 1221 else // children are "build" threads 1222 { 1223 // the 4 children threads can be created in any core of each quarters 1224 // of the parent macro-cluster 1225 1226 unsigned int parent_x; // X coordinate of parent macro-cluster 1227 unsigned int parent_y; // Y coordinate of parent macro-cluster 1228 unsigned int child_x; // X coordinate of child macro-cluster 1229 unsigned int child_y; // Y coordinate of child macro-cluster 1230 unsigned int child_cxy[2][2]; // selected cluster for child thread 1231 unsigned int child_lid[2][2]; // selected core index for child thread 1232 int child_sts[2][2]; // -1 if error / 0 if success / +1 if not found 1233 unsigned int x; // X loop index for children 1234 unsigned int y; // Y loop index for children 1235 1236 unsigned int nb_children = 0; 1237 1238 // get parent macro-cluster mask and half-size from level 1239 unsigned int mask = (1 << level) - 1; 1240 unsigned int half = (level > 0) ? (1 << (level - 1)) : 0; 1241 1242 // get parent macro-cluster coordinates 1243 parent_x = HAL_X_FROM_CXY( cxy ) & ~mask; 1244 parent_y = HAL_Y_FROM_CXY( cxy ) & ~mask; 1245 1246 // get child_cxy and child_lid for up to 4 children threads : 00 / 01 / 10 / 11 1247 for (x = 0 ; x < 2 ; x++) 1248 { 1249 // compute child macro-cluster X coordinate 1250 child_x = (x == 0) ? parent_x : (parent_x + half); 1251 1252 for (y = 0 ; y < 2 ; y++) 1253 { 1254 // compute child macro-cluster Y coordinate 1255 child_y = (y == 0) ? parent_y : (parent_y + half); 1256 1257 // select the best core in macro-cluster 1258 child_sts[x][y] = get_best_core( HAL_CXY_FROM_XY( child_x , child_y ), 1259 level-1, 1260 &child_cxy[x][y], 1261 &child_lid[x][y] ); 1262 1263 if( child_sts[x][y] < 0 ) // failure => report error 1264 { 1265 printf("\n[ERROR] in %s : illegal arguments for <build> thread[%x,%x]\n", 1266 __FUNCTION__ , cxy , level ); 1267 1268 // report error to parent 1269 build_args[parent_cxy][level+1].error = 1; 1270 } 1271 else if (child_sts[x][y] > 0 ) // macro-cluster undefined => does nothing 1272 { 1273 } 1274 else // core found 1275 { 1276 nb_children++; 1277 } 1278 } // end for y 1279 } // end for x 1280 1281 // kill process if no active core in cluster 1282 // TODO this "if" should be replaced by an "assert" [AG] 1283 if( nb_children == 0 ) 1284 { 1285 printf("\n[PANIC] in %s : no active core in macro cluster [%x,%d]\n", 1286 __FUNCTION__ , cxy , level ); 1287 1288 // report error to parent 1289 build_args[parent_cxy][level+1].error = 1; 1290 1291 // kill process 1292 exit( EXIT_FAILURE ); 1293 } 1294 1295 // initialize the barrier for (nb_children + 1) 1296 if( pthread_barrier_init( &build_barrier[cxy][level], NULL , nb_children + 1 ) ) 1297 { 1298 printf("\n[error] in %s : cannot initialise barrier for build thread[%x][%d]\n", 1299 __FUNCTION__ , cxy , level ); 1300 1301 // report error to parent 1302 build_args[parent_cxy][level+1].error = 1; 1303 } 1304 1305 #if DEBUG_PTHREAD_PARALLEL 1306 printf("\n[%s] <build> thread[%x][%d] created barrier / %d children\n", 1307 __FUNCTION__, cxy, level, nb_children + 1 ); 1308 #endif 1309 // create 1 to 4 children threads 1310 for (x = 0 ; x < 2 ; x++) 1311 { 1312 for (y = 0 ; y < 2 ; y++) 1313 { 1314 // thread is created only if macro-cluster is active 1315 if( child_sts[x][y] == 0 ) 1316 { 1317 unsigned int tgt_cxy = child_cxy[x][y]; 1318 unsigned int tgt_lid = child_lid[x][y]; 1319 1320 // set child thread attributes 1321 build_attr[tgt_cxy][level-1].attributes = PT_ATTR_DETACH | 1322 PT_ATTR_CLUSTER_DEFINED | 1323 PT_ATTR_CORE_DEFINED; 1324 build_attr[tgt_cxy][level-1].cxy = tgt_cxy; 1325 build_attr[tgt_cxy][level-1].lid = tgt_lid; 1326 1327 // propagate build function arguments 1328 build_args[tgt_cxy][level-1].cxy = child_cxy[x][y]; 1329 build_args[tgt_cxy][level-1].level = level-1; 1330 build_args[tgt_cxy][level-1].parent_cxy = cxy; 1331 build_args[tgt_cxy][level-1].root_level = root_level; 1332 build_args[tgt_cxy][level-1].work_func = work_func; 1333 build_args[tgt_cxy][level-1].work_args_array = work_args_array; 1334 build_args[tgt_cxy][level-1].parent_barriers_array = parent_barriers_array; 1335 1336 // create thread 1337 if( pthread_create( &trdid, 1338 &build_attr[tgt_cxy][level-1], 1339 &pthread_recursive_build, 1340 &build_args[tgt_cxy][level-1] ) ) 1341 { 1342 printf("\n[ERROR] in %s : cannot create build thread[%x][%d]\n", 1343 __FUNCTION__ , child_cxy , level -1 ); 1344 1345 // report error to parent 1346 build_args[parent_cxy][level+1].error = 1; 1347 } 1348 1349 #if DEBUG_PTHREAD_PARALLEL 1350 printf("\n[%s] <build> thread[%x][%d] created <build> thread[%x][%d] on core[%x,%d]\n", 1351 __FUNCTION__, cxy, level, tgt_cxy, level - 1, tgt_cxy, tgt_lid ); 1352 #endif 1353 } //end if sts[x][y] 1354 } // end for y 1355 } // end for x 1356 1357 // wait on barrier until "build" children threads completed 1358 if( pthread_barrier_wait( &build_barrier[cxy][level] ) ) 1359 { 1360 printf("\n[ERROR] in %s / first barrier for <build> thread[%x][%d]\n", 1361 __FUNCTION__ , cxy , level ); 1362 1363 // report error to parent 1364 build_args[parent_cxy][level+1].error = 1; 1365 } 1366 1367 #if DEBUG_PTHREAD_PARALLEL 1368 printf("\n[%s] <build> thread[%x][%d] resume after children completion\n", 1369 __FUNCTION__, cxy, level ); 1370 #endif 1371 1372 } // end level > 0 1373 1374 // report error to parent when required 1375 if( build_args[cxy][level].error ) 1376 { 1377 build_args[parent_cxy][level+1].error = 1; 1378 } 1379 1380 // all <build> threads - but the root - 1381 // signal completion to parent thread and exit 1382 if( level < root_level ) 1383 { 1384 if( pthread_barrier_wait( &build_barrier[parent_cxy][level+1] ) ) 1385 { 1386 printf("\n[ERROR] in %s / second barrier for <build> thread[%x][%d]\n", 1387 __FUNCTION__ , cxy , level ); 1388 1389 // report error to parent 1390 build_args[parent_cxy][level+1].error = 1; 1391 } 1392 1393 #if DEBUG_PTHREAD_PARALLEL 1394 printf("\n[%s] <build> thread[%x][%d] exit\n", 1395 __FUNCTION__, cxy , level ); 1396 #endif 1397 // "build" thread exit 1398 pthread_exit( NULL ); 1399 } 1400 } // end pthread_recursive_build() 1401 1402 /////////////////////////////////////////////////////// 1403 int pthread_parallel_create( unsigned int root_level, 1404 void * work_func, 1405 void * work_args_array, 1406 void * parent_barriers_array ) 1407 { 1408 unsigned int root_cxy; 1409 unsigned int root_lid; // unused, but required by get_core_id() 1410 1411 #if DEBUG_PTHREAD_PARALLEL 1412 printf("\n[%s] enter / root_level %d / func %x / args %x / barriers %x\n", 1413 __FUNCTION__, root_level, work_func, work_args_array, parent_barriers_array ); 1414 #endif 1415 1416 // get calling thread cluster 1417 get_core_id( &root_cxy , &root_lid ); 1418 1419 // set the build function arguments for the root <build> thread 1420 build_args[root_cxy][root_level].cxy = root_cxy; 1421 build_args[root_cxy][root_level].level = root_level; 1422 build_args[root_cxy][root_level].root_level = root_level; 1423 build_args[root_cxy][root_level].work_func = work_func; 1424 build_args[root_cxy][root_level].work_args_array = work_args_array; 1425 build_args[root_cxy][root_level].parent_barriers_array = parent_barriers_array; 1426 1427 // call the recursive build function 1428 pthread_recursive_build( &build_args[root_cxy][root_level] ); 1429 1430 // check error 1431 if( build_args[root_cxy][root_level].error ) 1432 { 1433 printf("\n[error] in %s\n", __FUNCTION__ ); 1434 return -1; 1435 } 1436 1437 return 0; 1438 1439 } // end pthread_parallel_create() 1440 1441 1060 1442 1061 1443 // Local Variables:
Note: See TracChangeset
for help on using the changeset viewer.