source: trunk/kernel/kern/ksocket.c @ 662

Last change on this file since 662 was 662, checked in by alain, 4 years ago

Introduce the ksocket.h & ksocket.c files in kernel/kern.

File size: 78.9 KB
Line 
1/*
2 * ksocket.c - kernel socket API implementation.
3 *
4 * Authors  Alain Greiner   (2016,2017,2018,2019,2020)
5 *
6 * Copyright (c) UPMC Sorbonne Universites
7 *
8 * This file is part of ALMOS-MKH.
9 *
10 * ALMOS-MKH is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2.0 of the License.
13 *
14 * ALMOS-MKH is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with ALMOS-MKH.; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#include <kernel_config.h>
25#include <hal_kernel_types.h>
26#include <hal_remote.h>
27#include <hal_uspace.h>
28#include <shared_socket.h>
29#include <process.h>
30#include <remote_buf.h>
31#include <bits.h>
32#include <printk.h>
33#include <kmem.h>
34#include <thread.h>
35#include <vfs.h>
36#include <ksocket.h>
37#include <dev_nic.h>
38
39//////////////////////////////////////////////////////////////////////////////////////
40// Extern global variables
41//////////////////////////////////////////////////////////////////////////////////////
42
43extern chdev_directory_t  chdev_dir;         // allocated in kernel_init.c
44
45///////////////////////////////////////////
46char * socket_domain_str( uint32_t domain )
47{
48    switch( domain )
49    {
50        case AF_INET         : return "INET";
51        case AF_LOCAL        : return "LOCAL";
52
53        default              : return "undefined";
54    }
55}
56   
57///////////////////////////////////////
58char * socket_type_str( uint32_t type )
59{
60    switch( type )
61    {
62        case SOCK_DGRAM         : return "UDP";
63        case SOCK_STREAM        : return "TCP";
64
65        default                 : return "undefined";
66    }
67}
68   
69/////////////////////////////////////////
70char * socket_state_str( uint32_t state )
71{
72    switch( state )
73    {
74        case UDP_STATE_UNBOUND    : return "UDP_UNBOUND";
75        case UDP_STATE_BOUND      : return "UDP_BOUND";
76        case UDP_STATE_ESTAB      : return "UDP_ESTAB";
77
78        case TCP_STATE_UNBOUND    : return "TCP_UNBOUND";
79        case TCP_STATE_BOUND      : return "TCP_BOUND";
80        case TCP_STATE_LISTEN     : return "TCP_LISTEN";
81        case TCP_STATE_SYN_SENT   : return "TCP_SYN_SENT";
82        case TCP_STATE_SYN_RCVD   : return "TCP_SYN_RCVD";
83        case TCP_STATE_ESTAB      : return "TCP_ESTAB";
84        case TCP_STATE_FIN_WAIT1  : return "TCP_FIN_WAIT1";
85        case TCP_STATE_FIN_WAIT2  : return "TCP_FIN_WAIT2";
86        case TCP_STATE_CLOSING    : return "TCP_CLOSING";
87        case TCP_STATE_TIME_WAIT  : return "TCP_TIME_WAIT";
88        case TCP_STATE_CLOSE_WAIT : return "TCP_CLOSE_WAIT";
89        case TCP_STATE_LAST_ACK   : return "TCP_LAST_ACK";
90        case TCP_STATE_CLOSED     : return "TCP_CLOSED";
91
92        default                   : return "undefined";
93    }
94}
95
96///////////////////////////////////////////
97char * socket_cmd_type_str( uint32_t type )
98{
99    switch( type )
100    {
101        case CMD_TX_CONNECT  : return "TX_CONNECT";
102        case CMD_TX_ACCEPT   : return "TX_ACCEPT";
103        case CMD_TX_CLOSE    : return "TX_CLOSE";
104        case CMD_TX_SEND     : return "TX_SEND";
105
106        case CMD_RX_ACCEPT   : return "RX_ACCEPT";
107        case CMD_RX_RECV     : return "RX_RECV";
108       
109        default                 : return "undefined";
110    }
111}
112   
113///////////////////////////////////////////
114char * socket_cmd_sts_str( uint32_t sts )
115{
116    switch( sts )
117    {
118        case CMD_STS_SUCCESS  : return "TX_CONNECT";
119        case CMD_STS_EOF      : return "EOF";
120        case CMD_STS_RST      : return "RST";
121        case CMD_STS_BADACK   : return "BADACK";
122        case CMD_STS_BADSTATE : return "BADSTATE";
123        case CMD_STS_BADCMD   : return "BADCMD";
124       
125        default               : return "undefined";
126    }
127}
128
129/////////////////////////////////////////////////////////////////////////////////////////
130// This static function registers the socket defined by the <socket_xp> argument into
131// the lists of sockets attached to the relevant NIC_TX and NIC_TX chdevs identified
132// by the <channel> argument, and update the channel field in socket descriptor.
133/////////////////////////////////////////////////////////////////////////////////////////
134// @ socket_xp   : [in]  extended pointer on socket descriptor.
135// @ channel     : [in]  NIC channel index.
136/////////////////////////////////////////////////////////////////////////////////////////
137static void socket_link_to_servers( xptr_t   socket_xp,
138                                    uint32_t channel )
139{
140    cxy_t      socket_cxy = GET_CXY( socket_xp );
141    socket_t * socket_ptr = GET_PTR( socket_xp );
142
143#if DEBUG_SOCKET_LINK
144thread_t  * this        = CURRENT_THREAD;
145process_t * process     = this->process;
146pid_t       socket_pid  = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ));
147fdid_t      socket_fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ));
148uint32_t   cycle        = (uint32_t)hal_get_cycles();
149if( DEBUG_SOCKET_LINK < cycle )
150printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / cycle %d\n",
151__FUNCTION__, process->pid, this->trdid, socket_pid, socket_fdid, cycle ); 
152#endif
153
154    // get pointers on NIC_TX[channel] chdev
155    xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[channel];
156    chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
157    cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
158
159    // build various TX extended pointers
160    xptr_t    tx_root_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_root );
161    xptr_t    tx_lock_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_lock );
162    xptr_t    tx_list_xp = XPTR( socket_cxy   , &socket_ptr->tx_list );
163
164    // get pointers on NIC_RX[channel] chdev
165    xptr_t    rx_chdev_xp  = chdev_dir.nic_rx[channel];
166    chdev_t * rx_chdev_ptr = GET_PTR( rx_chdev_xp );
167    cxy_t     rx_chdev_cxy = GET_CXY( rx_chdev_xp );
168
169    // build various RX extended pointers
170    xptr_t    rx_root_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_root );
171    xptr_t    rx_lock_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_lock );
172    xptr_t    rx_list_xp = XPTR( socket_cxy   , &socket_ptr->rx_list );
173
174    // register socket in the NIC_TX[channel] chdev clients queue
175    remote_busylock_acquire( tx_lock_xp );
176    xlist_add_last( tx_root_xp , tx_list_xp );
177    remote_busylock_release( tx_lock_xp );
178
179    // register socket in the NIC_RX[channel] chdev clients queue
180    remote_busylock_acquire( rx_lock_xp );
181    xlist_add_last( rx_root_xp , rx_list_xp );
182    remote_busylock_release( rx_lock_xp );
183
184#if DEBUG_SOCKET_LINK
185cycle = (uint32_t)hal_get_cycles();
186if( DEBUG_SOCKET_LINK < cycle )
187printk("\n[%s] thread[%x,%x] linked socket[%x,%d] to channel %d / cycle %d\n",
188__FUNCTION__, process->pid, this->trdid, process->pid, socket_pid, socket_fdid, channel, cycle );
189#endif
190
191}  // end socket_link_to_servers()
192
193/////////////////////////////////////////////////////////////////////////////////////////
194// This function removes the socket defined by the <socket_xp> argument from the
195// lists of sockets attached to the relevant NIC_TX and NIC_TX chdevs.
196/////////////////////////////////////////////////////////////////////////////////////////
197// @ socket_xp   : [in]  extended pointer on socket descriptor
198/////////////////////////////////////////////////////////////////////////////////////////
199static void socket_unlink_from_servers( xptr_t socket_xp )
200{
201    cxy_t      socket_cxy = GET_CXY( socket_xp );
202    socket_t * socket_ptr = GET_PTR( socket_xp );
203
204#if DEBUG_SOCKET_LINK
205thread_t  * this        = CURRENT_THREAD;
206process_t * process     = this->process;
207pid_t       socket_pid  = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ));
208fdid_t      socket_fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ));
209uint32_t   cycle        = (uint32_t)hal_get_cycles();
210if( DEBUG_SOCKET_LINK < cycle )
211printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / cycle %d\n",
212__FUNCTION__, process->pid, this->trdid, socket_pid, socket_fdid, cycle ); 
213#endif
214
215    // get NIC channel
216    uint32_t channel = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->nic_channel ));
217
218    // get pointers on NIC_TX[channel] chdev
219    xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[channel];
220    chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
221    cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
222
223    // build various TX extended pointers
224    xptr_t    tx_lock_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_lock );
225    xptr_t    tx_list_xp = XPTR( socket_cxy   , &socket_ptr->tx_list );
226
227    // get pointers on NIC_RX[channel] chdev
228    xptr_t    rx_chdev_xp  = chdev_dir.nic_rx[channel];
229    chdev_t * rx_chdev_ptr = GET_PTR( rx_chdev_xp );
230    cxy_t     rx_chdev_cxy = GET_CXY( rx_chdev_xp );
231
232    // build various RX extended pointers
233    xptr_t    rx_lock_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_lock );
234    xptr_t    rx_list_xp = XPTR( socket_cxy   , &socket_ptr->rx_list );
235
236    // remove socket from the NIC_TX[channel] chdev clients queue
237    remote_busylock_acquire( tx_lock_xp );
238    xlist_unlink( tx_list_xp );
239    remote_busylock_release( tx_lock_xp );
240
241    // remove socket from the NIC_RX[channel] chdev clients queue
242    remote_busylock_acquire( rx_lock_xp );
243    xlist_unlink( rx_list_xp );
244    remote_busylock_release( rx_lock_xp );
245
246#if DEBUG_SOCKET_LINK
247cycle = (uint32_t)hal_get_cycles();
248if( DEBUG_SOCKET_LINK < cycle )
249printk("\n[%s] thread[%x,%x] unlinked socket [%x,%d] / cycle %d\n",
250__FUNCTION__, process->pid, this->trdid, socket_pid, socket_fdid, cycle ); 
251#endif
252
253}  // end socket_unlink_from_servers()
254       
255/////////////////////////////////////////////////////////////////////////////////////////
256// This static function is called by the socket_build() and socket_accept() functions.
257// It allocates memory in cluster defined by the <cxy> argument for all structures
258// associated to a socket: file descriptor, socket descriptor, RX buffer, R2T queue,
259// and CRQ queue. It allocates an fdid, and register it in the process fd_array.
260// It initialise the  the socket desccriptor static fields, other than local_addr,
261// local_port, remote_addr, remote_port), and set the socket state to UNBOUND.
262// It returns the local pointer on socket descriptor and the fdid value in buffers
263// defined by the <socket_ptr> & <fdid_ptr> arguments.
264/////////////////////////////////////////////////////////////////////////////////////////
265// @ cxy        : [in]  target cluster fo socket & file descriptors.
266// @ domain     : [in]  socket domain.
267// @ type       : [in]  socket type.
268// @ socket_ptr : [out] local pointer on buffer for socket pointer.
269// @ fdid_ptr   : [out] local pointer on buffer for fdid value.
270// # return 0 if success / return -1 if no memory.
271/////////////////////////////////////////////////////////////////////////////////////////
272static error_t socket_create( cxy_t       cxy,
273                              uint32_t    domain,
274                              uint32_t    type,
275                              socket_t ** socket_ptr,
276                              uint32_t  * fdid_ptr )
277{
278    uint32_t    fdid;
279
280    thread_t  * this    = CURRENT_THREAD;
281    process_t * process = this->process;
282
283    kmem_req_t     req;
284    socket_t     * socket;
285    vfs_file_t   * file;
286    uint32_t       state;
287    error_t        error;
288
289#if DEBUG_SOCKET_CREATE
290uint32_t cycle = (uint32_t)hal_get_cycles();
291if( DEBUG_SOCKET_CREATE < cycle )
292printk("\n[%s] thread[%x,%x] enter / cycle %d\n",
293__FUNCTION__, process->pid, this->trdid, cycle ); 
294#endif
295   
296    // allocate memory for socket descriptor
297    req.type   = KMEM_KCM;
298    req.order  = bits_log2( sizeof(socket_t) );
299    req.flags  = AF_ZERO;
300    socket     = kmem_remote_alloc( cxy , &req );
301
302    if( socket == NULL )
303    {
304        printk("\n[ERROR] in %s : cannot allocate socket descriptor / thread[%x,%x]\n",
305        __FUNCTION__, process->pid, this->trdid );
306        return -1;
307    }
308
309    // allocate memory for rx_buf buffer
310    error = remote_buf_init( XPTR( cxy , &socket->rx_buf ),
311                             NIC_RX_BUF_SIZE );
312
313    if( error )
314    {
315        printk("\n[ERROR] in %s : cannot allocate rx_buf / thread[%x,%x]\n",
316        __FUNCTION__, process->pid, this->trdid );
317        req.type = KMEM_KCM;
318        req.ptr  = socket;
319        kmem_remote_free( cxy , &req );
320        return -1;
321    }
322
323    // allocate memory for r2tq queue
324    error = remote_buf_init( XPTR( cxy , &socket->r2tq ),
325                             NIC_R2T_QUEUE_SIZE );
326    if( error )
327    {
328        printk("\n[ERROR] in %s : cannot allocate R2T queue / thread[%x,%x]\n",
329        __FUNCTION__, process->pid, this->trdid );
330        remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
331        req.type = KMEM_KCM;
332        req.ptr  = socket;
333        kmem_remote_free( cxy , &req );
334        return -1;
335    }
336
337    // don't allocate memory for crqq queue, as it is done by the socket_listen function
338
339    //  allocate memory for file descriptor
340        req.type  = KMEM_KCM;
341        req.order = bits_log2( sizeof(vfs_file_t) );
342    req.flags = AF_ZERO;
343        file      = kmem_remote_alloc( cxy , &req );
344
345    if( file == NULL ) 
346    {
347        printk("\n[ERROR] in %s : cannot allocate file descriptor / thread[%x,%x]\n",
348        __FUNCTION__, process->pid, this->trdid );
349        remote_buf_destroy( XPTR( cxy , &socket->r2tq ) );
350        remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
351        req.type = KMEM_KCM;
352        req.ptr  = socket;
353        kmem_remote_free( cxy , &req );
354        return -1;
355    }
356   
357    // get an fdid value, and register file descriptor in fd_array[]
358    error = process_fd_register( process->ref_xp,
359                                 XPTR( cxy , file ),
360                                 &fdid );
361    if ( error ) 
362    {
363        printk("\n[ERROR] in %s : cannot register file descriptor / thread[%x,%x]\n",
364        __FUNCTION__, process->pid, this->trdid );
365        req.type = KMEM_KCM;
366        req.ptr  = file;
367        kmem_free( &req );
368        remote_buf_destroy( XPTR( cxy , &socket->r2tq ) );
369        remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
370        req.ptr  = socket;
371        kmem_free( &req );
372        return -1;
373    }
374
375    state = (type == SOCK_STREAM) ? TCP_STATE_UNBOUND : UDP_STATE_UNBOUND;
376
377    // initialise socket descriptor
378    hal_remote_s32( XPTR( cxy , &socket->pid         ) , process->pid );
379    hal_remote_s32( XPTR( cxy , &socket->fdid        ) , fdid );
380    hal_remote_s32( XPTR( cxy , &socket->domain      ) , domain );
381    hal_remote_s32( XPTR( cxy , &socket->type        ) , type );
382    hal_remote_s32( XPTR( cxy , &socket->state       ) , state );
383    hal_remote_s64( XPTR( cxy , &socket->tx_client   ) , XPTR_NULL );
384    hal_remote_s64( XPTR( cxy , &socket->rx_client   ) , XPTR_NULL );
385    hal_remote_s32( XPTR( cxy , &socket->tx_valid    ) , false );
386    hal_remote_s32( XPTR( cxy , &socket->rx_valid    ) , false );
387    hal_remote_s32( XPTR( cxy , &socket->nic_channel ) , 0 );
388
389    // initialize file descriptor
390    hal_remote_s32( XPTR( cxy , &file->type        ) , INODE_TYPE_SOCK );
391    hal_remote_spt( XPTR( cxy , &file->socket      ) , socket );
392    hal_remote_s32( XPTR( cxy , &file->refcount    ) , 1 );
393
394    // initialize socket lock
395    remote_queuelock_init( XPTR( cxy , &socket->lock ) , LOCK_SOCKET_STATE );
396
397#if DEBUG_SOCKET_CREATE
398if( DEBUG_SOCKET_CREATE < cycle )
399printk("\n[%s] thread[%x,%x] exit / socket[%x,%d] / xptr[%x,%x] / cycle %d\n",
400__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cxy, socket, cycle );
401#endif
402   
403    // return success
404    *socket_ptr = socket;
405    *fdid_ptr   = fdid;
406
407    return 0;
408
409}  // end socket_create
410
411/////////////////////////////////////////////////////////////////////////////////////////
412// This static function is called by the socket_close() function to destroy a socket
413// identified by the <file_xp> argument.
414// It remove the associated file from the reference process fd_array. It unlink the
415// socket from the NIC_TX [k] and NIC_RX[k] chdevs. It release all memory allocated
416// for the structures associated to the target socket socket : file descriptor,
417// socket descriptor, RX buffer, R2T queue, CRQ queue.
418/////////////////////////////////////////////////////////////////////////////////////////
419// @ file_xp  : extended pointer on the file descriptor.
420/////////////////////////////////////////////////////////////////////////////////////////
421static void socket_destroy( xptr_t file_xp )
422{
423    kmem_req_t          req;
424
425    thread_t  * this    = CURRENT_THREAD;
426    process_t * process = this->process;
427
428// check file_xp argument
429assert( (file_xp != XPTR_NULL), "illegal argument\n" );
430
431    // get cluster & local pointer for file descriptor
432    vfs_file_t * file_ptr = GET_PTR( file_xp );
433    cxy_t        file_cxy = GET_CXY( file_xp );
434
435#if DEBUG_SOCKET_DESTROY
436uint32_t cycle = (uint32_t)hal_get_cycles();
437if( DEBUG_SOCKET_DESTROY < cycle )
438printk("\n[%s] thread[%x,%x] enter / file[%x,%x] / cycle %d\n",
439__FUNCTION__, process->pid, this->trdid, file_cxy, file_ptr, cycle );
440#endif
441
442    // get local pointer for socket and file type
443    socket_t * socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
444    uint32_t   file_type  = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
445   
446// check file descriptor type
447assert( (file_type == INODE_TYPE_SOCK), "illegal file type\n" );
448
449    // get socket nic_channel and fdid
450    uint32_t channel = hal_remote_l32( XPTR( file_cxy , &socket_ptr->nic_channel ));
451    uint32_t fdid    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->fdid ));
452
453    // remove socket from NIC_TX & NIC_RX chdev queues when socket is connected
454    if( channel < LOCAL_CLUSTER->nb_nic_channels )
455    {
456        socket_unlink_from_servers( XPTR( file_cxy , socket_ptr ) );
457    }
458
459    // remove the file descriptor from the fd_array
460    process_fd_remove( process->owner_xp , fdid );
461
462    // release memory allocated for file descriptor
463    req.type = KMEM_KCM;
464    req.ptr  = file_ptr;
465    kmem_remote_free( file_cxy , &req );
466
467    // release memory allocated for buffers attached to socket descriptor
468    remote_buf_destroy( XPTR( file_cxy , &socket_ptr->crqq ) );
469    remote_buf_destroy( XPTR( file_cxy , &socket_ptr->r2tq ) );
470    remote_buf_destroy( XPTR( file_cxy , &socket_ptr->rx_buf ) );
471
472    // release memory allocated for socket descriptor
473    req.type = KMEM_KCM;
474    req.ptr  = socket_ptr;
475    kmem_remote_free( file_cxy , &req );
476
477#if DEBUG_SOCKET_DESTROY
478cycle = (uint32_t)hal_get_cycles();
479if( DEBUG_SOCKET_DESTROY < cycle )
480printk("\n[%s] thread[%x,%x] exit / cycle %d\n",
481__FUNCTION__, process->pid, this->trdid, cycle );
482#endif
483
484}  // end socket_destroy()
485
486////////////////////////////////////////////////
487void socket_put_r2t_request( xptr_t    queue_xp,
488                             uint32_t  flags,
489                             uint32_t  channel )
490{
491    xptr_t     chdev_xp;
492    cxy_t      chdev_cxy;
493    chdev_t  * chdev_ptr;
494    thread_t * server_ptr;
495    xptr_t     server_xp;
496
497    while( 1 )
498    {
499        // try to register R2T request
500        error_t error = remote_buf_put_from_kernel( queue_xp,
501                                                    (uint8_t *)(&flags),
502                                                    1 );
503        if( error )
504        {
505            // queue full => wait and retry
506            sched_yield( "waiting R2T queue" );
507        }
508        else
509        {
510            // get NIC_TX chdev pointers
511            chdev_xp = chdev_dir.nic_tx[channel];
512            chdev_cxy = GET_CXY( chdev_xp );
513            chdev_ptr = GET_PTR( chdev_xp );
514 
515            // get NIC_TX server thread pointers
516            server_ptr = hal_remote_lpt( XPTR( chdev_cxy , &chdev_ptr->server ) );
517            server_xp  = XPTR( chdev_cxy , server_ptr );
518
519            // unblocks NIC_TX server thread
520            thread_unblock( server_xp , THREAD_BLOCKED_CLIENT );
521
522            return;
523        }
524    }
525}  // end socket_put_r2t_request()
526 
527///////////////////////////////////////////////////
528error_t socket_put_crq_request( xptr_t    queue_xp,
529                                uint32_t  remote_addr,
530                                uint32_t  remote_port,
531                                uint32_t  remote_iss,
532                                uint32_t  remote_window )
533{
534    connect_request_t   req;
535
536    // build request
537    req.addr   = remote_addr;
538    req.port   = remote_port;
539    req.iss    = remote_iss;
540    req.window = remote_window;
541
542    // try to register request in CRQ
543    return remote_buf_put_from_kernel( queue_xp,
544                                       (uint8_t *)(&req),
545                                       sizeof(connect_request_t) );
546}  // end socket_put_crq_request()
547 
548////////////////////////////////////////////////////
549error_t socket_get_crq_request( xptr_t     queue_xp,
550                                uint32_t * remote_addr,
551                                uint32_t * remote_port,
552                                uint32_t * remote_iss,
553                                uint32_t * remote_window )
554{
555    connect_request_t   req;
556    error_t             error;
557
558    // get request from CRQ
559    error = remote_buf_get_to_kernel( queue_xp,
560                                      (uint8_t *)(&req),
561                                      sizeof(connect_request_t) );
562    // extract request arguments
563    *remote_addr   = req.addr;
564    *remote_port   = req.port;
565    *remote_iss    = req.iss;
566    *remote_window = req.window;
567
568    return error;
569
570}  // end socket_get_crq_request()
571 
572
573/////////////////////////////////////////////////////////////////////////////////////////
574//                 Functions implementing the SOCKET related syscalls
575/////////////////////////////////////////////////////////////////////////////////////////
576
577//////////////////////////////////////
578int socket_build( uint32_t   domain,
579                  uint32_t   type )
580{
581    uint32_t    fdid;
582    socket_t  * socket;
583    error_t     error;
584
585#if DEBUG_SOCKET_BUILD
586uint32_t    cycle   = (uint32_t)hal_get_cycles();
587thread_t  * this    = CURRENT_THREAD;
588process_t * process = this->process;
589if( DEBUG_SOCKET_BUILD < cycle )
590printk("\n[%s] thread[%x,%x] enter / %s / %s / cycle %d\n",
591__FUNCTION__, process->pid, this->trdid, 
592socket_domain_str(domain), socket_type_str(type), cycle );
593#endif
594
595
596    // allocate memory for the file descriptor and for the socket
597    error = socket_create( local_cxy,
598                           domain,
599                           type,
600                           &socket, 
601                           &fdid );
602
603#if DEBUG_SOCKET_BUILD
604cycle = (uint32_t)hal_get_cycles();
605if( DEBUG_SOCKET_BUILD < cycle )
606printk("\n[%s] thread[%x,%x] exit / socket %x / fdid %d / %s / cycle %d\n",
607__FUNCTION__, process->pid, this->trdid, socket, fdid, 
608socket_state_str(hal_remote_l32(XPTR(local_cxy , &socket->state))),
609cycle );
610#endif
611
612    if( error ) return -1;
613    return fdid;
614}
615
616////////////////////////////////
617int socket_bind( uint32_t  fdid,
618                 uint32_t  addr,
619                 uint16_t  port )
620{
621    vfs_inode_type_t    file_type;
622    socket_t          * socket;
623    uint32_t            socket_type;
624    uint32_t            socket_state;
625
626    thread_t  * this    = CURRENT_THREAD;
627    process_t * process = this->process;
628
629#if DEBUG_SOCKET_BIND
630uint32_t cycle = (uint32_t)hal_get_cycles();
631if( DEBUG_SOCKET_BIND < cycle )
632printk("\n[%s] thread[%x,%x] enter / socket[%x,%d] / addr %x / port %x / cycle %d\n",
633__FUNCTION__, process->pid, this->trdid, process->pid, fdid, addr, port, cycle );
634#endif
635
636    // get pointers on file descriptor
637    xptr_t       file_xp  = process_fd_get_xptr_from_local( process , fdid );
638    vfs_file_t * file_ptr = GET_PTR( file_xp );
639    cxy_t        file_cxy = GET_CXY( file_xp );
640
641    // check file_xp
642    if( file_xp == XPTR_NULL )
643    {
644        printk("\n[ERROR] in %s : undefined fdid %d / thread[%x,%x]\n",
645        __FUNCTION__, fdid, process->pid, this->trdid );
646        return -1;
647    }
648
649    file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
650    socket    = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
651
652    // check file descriptor type
653    if( file_type != INODE_TYPE_SOCK )
654    {
655        printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x]",
656        __FUNCTION__, vfs_inode_type_str( file_type ), process->pid, this->trdid );
657        return -1;
658    }
659
660    // get socket type
661    socket_type = hal_remote_l32(XPTR( file_cxy , &socket->type ));
662
663    // compute socket state
664    socket_state = (socket_type == SOCK_STREAM) ? TCP_STATE_BOUND : UDP_STATE_BOUND;
665
666    // update the socket descriptor
667    hal_remote_s32( XPTR( file_cxy , &socket->local_addr ) , addr );
668    hal_remote_s32( XPTR( file_cxy , &socket->local_port ) , port );
669    hal_remote_s32( XPTR( file_cxy , &socket->state      ) , socket_state );
670
671#if DEBUG_SOCKET_BIND
672cycle = (uint32_t)hal_get_cycles();
673if( DEBUG_SOCKET_BIND < cycle )
674printk("\n[%s] thread[%x,%x] exit / socket[%x,%d] / %s / addr %x / port %x / cycle %d\n",
675__FUNCTION__, process->pid, this->trdid, process->pid, fdid,
676socket_state_str(hal_remote_l32( XPTR( file_cxy , &socket->state ))),
677hal_remote_l32( XPTR( file_cxy , &socket->local_addr )),
678hal_remote_l32( XPTR( file_cxy , &socket->local_port )),
679cycle );
680#endif
681
682    return 0;
683
684}  // end socket_bind()
685
686//////////////////////////////////
687int socket_listen( uint32_t fdid,
688                   uint32_t crq_depth )
689{
690    xptr_t              file_xp;
691    vfs_file_t        * file_ptr;
692    cxy_t               file_cxy;
693    vfs_inode_type_t    file_type;
694    socket_t          * socket_ptr;
695    uint32_t            socket_type;
696    uint32_t            socket_state;
697    uint32_t            socket_local_addr;
698    uint32_t            socket_local_port;
699    error_t             error;
700
701    thread_t  * this    = CURRENT_THREAD;
702    process_t * process = this->process;
703
704#if DEBUG_SOCKET_LISTEN
705uint32_t cycle = (uint32_t)hal_get_cycles();
706if( DEBUG_SOCKET_LISTEN < cycle )
707printk("\n[%s] thread[%x,%x] enter / socket[%x,%d] / crq_depth %x / cycle %d\n",
708__FUNCTION__, process->pid, this->trdid, process->pid, fdid, crq_depth, cycle );
709#endif
710
711    // get pointers on file descriptor
712    file_xp  = process_fd_get_xptr_from_local( process , fdid );
713    file_ptr = GET_PTR( file_xp );
714    file_cxy = GET_CXY( file_xp );
715
716    // check file_xp
717    if( file_xp == XPTR_NULL )
718    {
719        printk("\n[ERROR] in %s : undefined fdid %d / thread[%x,%x]\n",
720        __FUNCTION__, fdid, process->pid, this->trdid );
721        return -1;
722    }
723
724    file_type  = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
725    socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
726
727    // check file descriptor type
728    if( file_type != INODE_TYPE_SOCK )
729    {
730        printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x]\n",
731        __FUNCTION__, vfs_inode_type_str(file_type), process->pid, this->trdid );
732        return -1;
733    }
734
735    // get relevant infos from <fdid> socket descriptor
736    socket_type       = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type )); 
737    socket_state      = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); 
738    socket_local_addr = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_addr )); 
739    socket_local_port = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_port )); 
740
741    // check socket type
742    if( socket_type != SOCK_STREAM )
743    {
744        printk("\n[ERROR] in %s : illegal socket type %s / thread[%x,%x]\n",
745        __FUNCTION__, socket_type_str(socket_type), process->pid, this->trdid );
746        return -1;
747    }
748   
749    // check socket state
750    if( socket_state != TCP_STATE_BOUND )
751    {
752        printk("\n[ERROR] in %s : illegal socket state %s / thread[%x,%x]\n",
753        __FUNCTION__, socket_state_str(socket_state), process->pid, this->trdid );
754        return -1;
755    }
756   
757    // compute CRQ queue depth : max( crq_depth , NIC_CRQ_QUEUE_SIZE )
758    uint32_t depth = ( crq_depth > NIC_CRQ_QUEUE_SIZE ) ? crq_depth : NIC_CRQ_QUEUE_SIZE;
759
760    // allocate memory for the CRQ queue
761    error = remote_buf_init( XPTR( file_cxy , &socket_ptr->crqq ),
762                                   depth * sizeof(connect_request_t) );
763    if( error )
764    {
765        printk("\n[ERROR] in %s : cannot allocate CRQ queue / thread[%x,%x]\n",
766        __FUNCTION__, process->pid, this->trdid );
767        return -1;
768    }
769
770    // update socket.state
771    hal_remote_s32( XPTR( file_cxy , &socket_ptr->state ) , TCP_STATE_LISTEN );
772
773    // get pointers on NIC_RX[0] chdev
774    xptr_t    rx0_chdev_xp  = chdev_dir.nic_rx[0];
775    chdev_t * rx0_chdev_ptr = GET_PTR( rx0_chdev_xp );
776    cxy_t     rx0_chdev_cxy = GET_CXY( rx0_chdev_xp );
777   
778    // build extended pointers on list of listening sockets
779    xptr_t    rx0_root_xp = XPTR( rx0_chdev_cxy , &rx0_chdev_ptr->ext.nic.root );
780    xptr_t    rx0_lock_xp = XPTR( rx0_chdev_cxy , &rx0_chdev_ptr->ext.nic.lock );
781
782    // build extended pointer on socket rx_list field
783    xptr_t    list_entry_xp = XPTR( file_cxy , &socket_ptr->rx_list );
784
785    // register  <fdid> socket in listening sockets list
786    remote_busylock_acquire( rx0_lock_xp );
787    xlist_add_last( rx0_root_xp , list_entry_xp );
788    remote_busylock_release( rx0_lock_xp );
789
790#if DEBUG_SOCKET_LISTEN
791cycle = (uint32_t)hal_get_cycles();
792if( DEBUG_SOCKET_LISTEN < cycle )
793printk("\n[%s] thread[%x,%x] exit / socket[%x,%d] / %s / cycle %d\n",
794__FUNCTION__, process->pid, this->trdid, process->pid, fdid,
795socket_state_str(socket_state), cycle );
796#endif
797
798    return 0;
799
800}  // end socket_listen()
801
802///////////////////////////////////
803int socket_accept( uint32_t   fdid,
804                   uint32_t * remote_addr,
805                   uint16_t * remote_port )
806{
807    xptr_t              file_xp;             // extended pointer on remote file
808    vfs_file_t        * file_ptr;
809    cxy_t               file_cxy;
810    vfs_inode_type_t    file_type;           // file descriptor type
811    socket_t          * socket_ptr;          // local pointer on remote waiting socket
812    uint32_t            socket_type;         // listening socket type   
813    uint32_t            socket_state;        // listening socket state
814    uint32_t            socket_domain;       // listening socket domain
815    uint32_t            socket_local_addr;   // listening socket local IP address
816    uint32_t            socket_local_port;   // listening socket local port
817    uint32_t            socket_tx_nxt;       // listening socket tx_nxt
818    bool_t              socket_tx_valid;     // listening socket tx_valid
819    xptr_t              socket_tx_client;    // listening socket tx_client thread
820    bool_t              socket_rx_valid;     // listening socket rx_valid
821    xptr_t              socket_rx_client;    // listening socket rx_client thread
822    xptr_t              socket_lock_xp;      // listening socket lock
823    xptr_t              crq_xp;              // listening socket CRQ queue
824    uint32_t            crq_status;          // number of bytes in CRQ
825    cxy_t               new_socket_cxy;      // new socket cluster identifier
826    socket_t          * new_socket_ptr;      // local pointer on new socket
827    xptr_t              new_socket_xp;       // extended pointer on new socket
828    volatile uint32_t   new_state;           // new socket state (modified by NIC_RX thread)
829    uint32_t            new_fdid;            // new socket file descriptor index
830    uint32_t            new_remote_addr;     // new socket remote IP address
831    uint32_t            new_remote_port;     // new socket remote port
832    uint32_t            new_remote_iss;      // new socket remote iss
833    uint32_t            new_remote_window;   // new socket receive window
834    xptr_t              tx_server_xp;        // extended pointer on TX server thread
835    thread_t          * tx_server_ptr;       // local pointer on TX server thread
836    uint32_t            cmd_status;          // command status (rx_sts or tx_sts)
837    bool_t              cmd_valid;           // valid command (rx_valid or tx_valid)
838    error_t             error;
839
840    thread_t  * this      = CURRENT_THREAD;
841    xptr_t      client_xp = XPTR( local_cxy , this );
842    process_t * process   = this->process;
843
844#if DEBUG_SOCKET_ACCEPT
845uint32_t cycle = (uint32_t)hal_get_cycles();
846if( DEBUG_SOCKET_ACCEPT < cycle )
847printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / cycle %d\n",
848__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
849#endif
850
851    // 1) get pointers on file descriptor
852    file_xp  = process_fd_get_xptr_from_local( process , fdid );
853    file_ptr = GET_PTR( file_xp );
854    file_cxy = GET_CXY( file_xp );
855
856    // check file_xp
857    if( file_xp == XPTR_NULL )
858    {
859        printk("\n[ERROR] in %s : undefined fdid %d",
860        __FUNCTION__, fdid );
861        return -1;
862    }
863 
864    file_type  = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
865    socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
866
867    // check file descriptor type
868    if( file_type != INODE_TYPE_SOCK )
869    {
870        printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x]\n",
871        __FUNCTION__, vfs_inode_type_str(file_type), process->pid, this->trdid );
872        return -1;
873    }
874
875    // build extended pointer on listening socket lock
876    socket_lock_xp = XPTR( file_cxy , &socket_ptr->lock );
877
878    // acquire listening socket lock
879    remote_queuelock_acquire( socket_lock_xp );
880                   
881    // get listening socket type, domain, state, local_addr, local_port & tx_nxt
882    socket_type       = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type )); 
883    socket_state      = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); 
884    socket_domain     = hal_remote_l32( XPTR( file_cxy , &socket_ptr->domain )); 
885    socket_local_addr = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_addr )); 
886    socket_local_port = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_port )); 
887    socket_tx_nxt     = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_nxt ));
888    socket_tx_valid   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )); 
889    socket_tx_client  = hal_remote_l64( XPTR( file_cxy , &socket_ptr->tx_client ));
890    socket_rx_valid   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid )); 
891    socket_rx_client  = hal_remote_l64( XPTR( file_cxy , &socket_ptr->rx_client ));
892
893    // check socket type
894    if( socket_type != SOCK_STREAM )
895    {
896        // release listening socket lock
897        remote_queuelock_release( socket_lock_xp );
898                   
899        printk("\n[ERROR] in %s : illegal socket type %s / thread[%x,%x]\n",
900        __FUNCTION__, socket_type_str(socket_type), process->pid , this->trdid );
901        return -1;
902    }
903   
904    // check socket state
905    if( socket_state != TCP_STATE_LISTEN ) 
906    {
907        // release listening socket lock
908        remote_queuelock_release( socket_lock_xp );
909                   
910        printk("\n[ERROR] in %s : illegal socket state %s / thread[%x,%x]\n",
911        __FUNCTION__, socket_state_str(socket_state), process->pid, this->trdid );
912        return -1;
913    }
914   
915    // check no previous RX command
916    if( (socket_rx_valid == true) || (socket_rx_client != XPTR_NULL) )
917    { 
918        // release listening socket lock
919        remote_queuelock_release( socket_lock_xp );
920                   
921        printk("\n[ERROR] in %s : previous RX cmd on socket[%x,%d] / thread[%x,%x]\n",
922        __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
923        return -1;
924    }
925
926    // check no previous TX command
927    if( (socket_tx_valid == true) || (socket_tx_client != XPTR_NULL) )
928    { 
929        // release socket lock
930        remote_queuelock_release( socket_lock_xp );
931                   
932        printk("\n[ERROR] in %s : previous TX cmd on socket[%x,%d] / thread[%x,%x]\n",
933        __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
934        return -1;
935    }
936
937    // 2) build extended pointer on listening socket.crq
938    crq_xp  = XPTR( file_cxy , &socket_ptr->crqq );
939
940    // get CRQ status
941    crq_status = remote_buf_status( crq_xp );
942
943    // block & deschedule when CRQ empty
944    if( crq_status == 0 )
945    {
946        // register command arguments in listening socket
947        hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_cmd    ), CMD_RX_ACCEPT );
948        hal_remote_s64( XPTR( file_cxy , &socket_ptr->rx_client ), client_xp );
949        hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_valid  ), true );
950
951        // release listening socket lock
952        remote_queuelock_release( socket_lock_xp );
953
954#if DEBUG_SOCKET_ACCEPT
955cycle = (uint32_t)hal_get_cycles();
956if( DEBUG_SOCKET_ACCEPT < cycle )
957printk("\n[%s] thread[%x,%x] socket[%x,%d] / CRQ empty => blocks on <IO> / cycle %d\n",
958__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
959#endif
960        // block & deschedule when CRQQ empty
961        thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO );
962        sched_yield( "CRQ queue empty");
963
964#if DEBUG_SOCKET_ACCEPT
965cycle = (uint32_t)hal_get_cycles();
966if( DEBUG_SOCKET_ACCEPT < cycle )
967printk("\n[%s] thread[%x,%x] socket[%x,%d] / resumes / cycle %d\n",
968__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
969#endif
970        // take listening socket lock
971        remote_queuelock_acquire( socket_lock_xp );
972
973        // get CRQ status & command status
974        cmd_valid    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid ) );
975        cmd_status   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_sts ) );
976        crq_status   = remote_buf_status( crq_xp );
977
978assert( (((crq_status > 0) || (cmd_status!= CMD_STS_SUCCESS)) && (cmd_valid == false)),
979"illegal socket state when client thread resumes after RX_ACCEPT" );
980
981        // reset socket.rx_client
982        hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_client ) , XPTR_NULL );
983
984        if( cmd_status != CMD_STS_SUCCESS )
985        {
986            // release socket lock
987            remote_queuelock_release( socket_lock_xp );
988
989            printk("\n[ERROR] in %s for RX_ACCEPT command / socket[%x,%d] / thread[%x,%x]\n",
990            __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
991            return -1;
992        }
993
994        // extract first request from the listening socket CRQ
995        error = socket_get_crq_request( crq_xp,
996                                    &new_remote_addr,
997                                    &new_remote_port,
998                                    &new_remote_iss,
999                                    &new_remote_window );
1000
1001assert( (error == 0),
1002"cannot get a connection request from a non-empty CRQ" ); 
1003
1004        // reset listening socket rx_client
1005        hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_client ) , XPTR_NULL );
1006
1007        // release socket lock
1008        remote_queuelock_release( socket_lock_xp );
1009
1010    }  // end blocking on CRQ status
1011
1012    // from this point, we can create a new socket
1013    // and ask the NIC_TX to send a SYN-ACK segment
1014
1015#if DEBUG_SOCKET_ACCEPT
1016cycle = (uint32_t)hal_get_cycles();
1017if( DEBUG_SOCKET_ACCEPT < cycle )
1018printk("\n[%s] thread[%x,%x] socket[%x,%d] / got a CRQ request / cycle %d\n",
1019__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1020#endif
1021
1022    // 3) select a cluster for the new socket
1023    new_socket_cxy = cluster_random_select();
1024
1025    // allocate memory for the new socket descriptor
1026    error = socket_create( new_socket_cxy,
1027                           socket_domain,
1028                           socket_type,
1029                           &new_socket_ptr,
1030                           &new_fdid );
1031    if( error )
1032    {
1033        printk("\n[ERROR] in %s : cannot allocate new socket / thread[%x,%x]\n",
1034        __FUNCTION__, process->pid, this->trdid );
1035        return -1;
1036    }
1037   
1038    // build extended poiner on new socket
1039    new_socket_xp = XPTR( new_socket_cxy , new_socket_ptr );
1040
1041#if DEBUG_SOCKET_ACCEPT
1042cycle = (uint32_t)hal_get_cycles();
1043if( DEBUG_SOCKET_ACCEPT < cycle )
1044printk("\n[%s] thread[%x,%x] created new socket[%x,%d] / cycle %d\n",
1045__FUNCTION__, process->pid, this->trdid, process->pid, new_fdid, cycle );
1046#endif
1047       
1048    // compute NIC channel index from remote_addr and remote_port
1049    uint32_t new_nic_channel = dev_nic_get_key( new_remote_addr , new_remote_port );
1050
1051    // update new socket descriptor
1052    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->local_addr ) , socket_local_addr );
1053    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->local_port ) , socket_local_port );
1054    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->remote_addr) , new_remote_addr );
1055    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->remote_port) , new_remote_port );
1056    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->nic_channel) , new_nic_channel );
1057    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->state      ) , TCP_STATE_SYN_RCVD );
1058
1059    // set new socket TCB : increment tx_nxt / initialize rx_nxt, rx_irs, rx_wnd
1060    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->tx_nxt ), socket_tx_nxt + 1 );
1061    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->rx_nxt ), new_remote_iss + 1 );
1062    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->rx_irs ), new_remote_iss );
1063    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->rx_wnd ), new_remote_window );
1064
1065    // link new socket to chdev servers
1066    socket_link_to_servers( new_socket_xp , new_nic_channel );
1067
1068    // 3) get pointers on NIC_TX[channel] chdev
1069    xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[new_nic_channel];
1070    chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
1071    cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
1072
1073    // get pointers on NIC_TX[channel] server thread
1074    tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server ));
1075    tx_server_xp  = XPTR( tx_chdev_cxy , tx_server_ptr );
1076
1077    // register command arguments in new socket to request a SYN_ACK segment
1078    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->tx_cmd    ), CMD_TX_ACCEPT );
1079    hal_remote_s64( XPTR( new_socket_cxy , &new_socket_ptr->tx_client ), client_xp );
1080    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->tx_valid  ), true );
1081
1082    // unblock NIC_TX server thread
1083    thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT );
1084 
1085#if DEBUG_SOCKET_ACCEPT
1086cycle = (uint32_t)hal_get_cycles();
1087if( DEBUG_SOCKET_ACCEPT < cycle )
1088printk("\n[%s] thread[%x,%x] new_socket[%x,%d] blocks on <IO> waiting ESTAB / cycle %d\n",
1089__FUNCTION__, process->pid, this->trdid, process->pid, new_fdid, cycle );
1090#endif
1091
1092    // client thread blocks & deschedules
1093    thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO );
1094    sched_yield( "waiting new socket connection");
1095
1096#if DEBUG_SOCKET_ACCEPT
1097cycle = (uint32_t)hal_get_cycles();
1098if( DEBUG_SOCKET_ACCEPT < cycle )
1099printk("\n[%s] thread[%x,%x] new_socket[%x,%d] resumes  / cycle %d\n",
1100__FUNCTION__, process->pid, this->trdid, process->pid, new_fdid, cycle );
1101#endif
1102
1103    // get new socket state, tx_valid and tx_sts
1104    new_state  = hal_remote_l32( XPTR( new_socket_cxy , &new_socket_ptr->state ));
1105    cmd_valid  = hal_remote_l32( XPTR( new_socket_cxy , &new_socket_ptr->tx_valid ));
1106    cmd_status = hal_remote_l32( XPTR( new_socket_cxy , &new_socket_ptr->tx_sts ));
1107
1108assert( (((new_state == TCP_STATE_ESTAB) || (cmd_status != CMD_STS_SUCCESS))
1109        && (cmd_valid == false)), 
1110"illegal socket state when client thread resumes after TX_ACCEPT" ); 
1111
1112    // reset socket.tx_client
1113    hal_remote_s64( XPTR( new_socket_cxy , &new_socket_ptr->tx_client ) , XPTR_NULL );
1114
1115    if( cmd_status != CMD_STS_SUCCESS ) 
1116    {
1117        printk("\n[ERROR] in %s for TX_ACCEPT command / socket[%x,%d] / thread[%x,%x]\n",
1118        __FUNCTION__, process->pid, new_fdid, process->pid, this->trdid );
1119        return -1;
1120    }
1121    else
1122    {
1123
1124#if DEBUG_SOCKET_ACCEPT
1125cycle = (uint32_t)hal_get_cycles();
1126if( DEBUG_SOCKET_ACCEPT < cycle )
1127printk("\n[%s] thread[%x,%x] new_socket[%x,%d] / state %s / addr %x / port %x / cycle %d\n",
1128__FUNCTION__, process->pid, this->trdid, process->pid, new_fdid,
1129socket_state_str(new_state), new_remote_addr, new_remote_port, cycle );
1130#endif
1131
1132        // return success
1133        *remote_addr = new_remote_addr;
1134        *remote_port = new_remote_port;
1135        return new_fdid;
1136    }
1137 
1138}  // end socket_accept()
1139
1140//////////////////////////////////
1141int socket_connect( uint32_t fdid,
1142                    uint32_t remote_addr,
1143                    uint16_t remote_port )
1144{
1145    vfs_inode_type_t    file_type;
1146    socket_t          * socket_ptr;       // local pointer on thread descriptor
1147    volatile uint32_t   socket_state;     // socket state (modified by the NIC_TX thread)
1148    uint32_t            socket_type;      // socket type 
1149    uint32_t            local_addr;       // local IP address
1150    uint32_t            local_port;       // local port
1151    xptr_t              tx_server_xp;     // extended pointer on TX server thread
1152    thread_t          * tx_server_ptr;    // local pointer on TX server thread
1153    uint32_t            nic_channel;      // NIC channel index
1154    uint32_t            cmd_status;       // command status (tx_sts field)
1155    bool_t              cmd_valid;        // command valid (tx_valid field)
1156
1157    thread_t  * this      = CURRENT_THREAD;
1158    xptr_t      client_xp = XPTR( local_cxy , this );
1159
1160    // get pointers on file descriptor
1161    xptr_t       file_xp  = process_fd_get_xptr_from_local( this->process , fdid );
1162    vfs_file_t * file_ptr = GET_PTR( file_xp );
1163    cxy_t        file_cxy = GET_CXY( file_xp );
1164
1165    // check file_xp
1166    if( file_xp == XPTR_NULL )
1167    {
1168        printk("\n[ERROR] in %s : undefined fdid %d",
1169        __FUNCTION__, fdid );
1170        return -1;
1171    }
1172
1173    file_type  = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
1174    socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
1175
1176#if DEBUG_SOCKET_CONNECT
1177uint32_t cycle = (uint32_t)hal_get_cycles();
1178pid_t    pid   = this->process->pid;
1179trdid_t  trdid = this->trdid;
1180if( DEBUG_SOCKET_CONNECT < cycle )
1181printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / addr %x / port %d / cycle %d\n",
1182__FUNCTION__,  pid, trdid, pid, fdid, remote_addr, remote_port, cycle );
1183#endif
1184
1185    // check file descriptor type
1186    if( file_type != INODE_TYPE_SOCK )
1187    {
1188        printk("\n[ERROR] in %s : illegal file type %s",
1189        __FUNCTION__, vfs_inode_type_str( file_type ) );
1190        return -1;
1191    }
1192
1193    // get relevant socket infos
1194    socket_type   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type ) );
1195    socket_state  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ) );
1196    local_addr    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_addr ) );
1197    local_port    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_port ) );
1198
1199    if( socket_type == SOCK_DGRAM )       // UDP
1200    {
1201        if( socket_state != UDP_STATE_BOUND )
1202        {
1203            printk("\n[ERROR] in %s : illegal socket state %s for type %s",
1204            __FUNCTION__, socket_state_str(socket_state), socket_type_str(socket_type) );
1205            return -1;
1206        }
1207    }
1208    else if( socket_type == SOCK_STREAM )  // TCP
1209    {
1210        if( socket_state != TCP_STATE_BOUND )
1211        {
1212            printk("\n[ERROR] in %s : illegal socket state %s for type %s",
1213            __FUNCTION__, socket_state_str(socket_state), socket_type_str(socket_type) );
1214            return -1;
1215        }
1216    }
1217    else
1218    {
1219        printk("\n[ERROR] in %s : illegal socket type %s",
1220        __FUNCTION__,  socket_type_str(socket_type) );
1221        return -1;
1222    }
1223
1224    // compute nic_channel index from remote_addr and remote_port
1225    nic_channel = dev_nic_get_key( remote_addr , remote_port );
1226
1227    // link socket to chdev servers
1228    socket_link_to_servers( XPTR( file_cxy , socket_ptr ), nic_channel );
1229
1230    // update the socket descriptor
1231    hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_addr ) , remote_addr  );
1232    hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_port ) , remote_port  );
1233    hal_remote_s32( XPTR( file_cxy , &socket_ptr->nic_channel ) , nic_channel  );
1234
1235    // the actual connection mechanism depends on socket type
1236    // UDP : client thread updates the local socket state without blocking
1237    // TCP : client thread request TX server thread to start the 3 steps handshake
1238
1239    if( socket_type == SOCK_DGRAM )  // UDP
1240    {
1241        // directly update the local socket state
1242        hal_remote_s32( XPTR( file_cxy , &socket_ptr->state ) , UDP_STATE_ESTAB );
1243
1244        return 0;
1245    }
1246    else                             // TCP
1247    {
1248        // get pointers on NIC_TX[channel] chdev
1249        xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[nic_channel];
1250        chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
1251        cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
1252
1253        // get pointers on NIC_TX[channel] server thread
1254        tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server ));
1255        tx_server_xp  = XPTR( tx_chdev_cxy , tx_server_ptr );
1256
1257        // register command arguments in socket descriptor for a SYN segment
1258        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_cmd    ), CMD_TX_CONNECT );
1259        hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ), client_xp );
1260        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_valid  ), true );
1261
1262        // unblock NIC_TX server thread
1263        thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT );
1264 
1265#if DEBUG_SOCKET_CONNECT
1266cycle = (uint32_t)hal_get_cycles();
1267if( DEBUG_SOCKET_CONNECT < cycle )
1268printk("\n[%s] thread[%x,%x] socket[%x,%d] blocks on <IO> waiting connexion / cycle %d \n",
1269__FUNCTION__, pid, trdid, pid, fdid, cycle );
1270#endif
1271        // block itself and deschedule
1272        thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO );
1273        sched_yield( "waiting connection" );
1274
1275#if DEBUG_SOCKET_CONNECT
1276cycle = (uint32_t)hal_get_cycles();
1277if( DEBUG_SOCKET_CONNECT < cycle )
1278printk("\n[%s] thread[%x,%x] socket[%x,%d] / resumes / cycle %d \n",
1279__FUNCTION__, pid, trdid, pid, fdid, cycle );
1280#endif
1281
1282        // get socket state, tx_valid and tx_sts
1283        cmd_valid    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid ));
1284        cmd_status   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_sts ));
1285        socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ));
1286
1287assert( (((socket_state == TCP_STATE_ESTAB) || (cmd_status != CMD_STS_SUCCESS))
1288        && (cmd_valid == false)),
1289"illegal socket state when client thread resumes after TX_CONNECT" );
1290
1291        // reset socket.tx_client
1292        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_client ) , XPTR_NULL );
1293
1294        if( cmd_status != CMD_STS_SUCCESS )
1295        {
1296            printk("\n[ERROR] in %s : for command TX_CONNECT / socket[%x,%d] / thread[%x,%x]\n",
1297            __FUNCTION__, pid, fdid, pid, trdid );
1298            return -1;
1299        }
1300        else
1301        {
1302
1303#if DEBUG_SOCKET_CONNECT
1304cycle = (uint32_t)hal_get_cycles();
1305if( DEBUG_SOCKET_CONNECT < cycle )
1306printk("\n[%s] thread[%x,%x] exit for socket[%x,%d] / %s / cycle %d \n",
1307__FUNCTION__, pid, trdid, pid, fdid, socket_state_str(socket_state),cycle );
1308#endif
1309             return 0;
1310        }
1311    }  // end TCP
1312
1313}  // end socket_connect()
1314
1315///////////////////////////////////
1316int socket_close( xptr_t   file_xp,
1317                  uint32_t fdid )
1318{
1319    uint32_t     socket_type;
1320    uint32_t     socket_state;
1321    uint32_t     nic_channel;
1322    uint32_t     cmd_status;      // socket.tx_sts
1323    bool_t       cmd_valid;       // socket.tx_valid
1324    thread_t   * tx_server_ptr;   // local pointer on NIC_TX server thread
1325    xptr_t       tx_server_xp;    // extended pointer on NIC_TX server thread
1326    xptr_t       socket_lock_xp;  // extended pointer on socket lock
1327
1328    thread_t   * this      = CURRENT_THREAD;
1329    xptr_t       client_xp = XPTR( local_cxy , this );
1330    process_t  * process   = this->process;
1331
1332    // get pointer on socket descriptor
1333    cxy_t        file_cxy    = GET_CXY( file_xp );
1334    vfs_file_t * file_ptr    = GET_PTR( file_xp );
1335    socket_t   * socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
1336
1337assert( (hal_remote_l32( XPTR( file_cxy , &socket_ptr->fdid )) == fdid),
1338"unconsistent file_xp & fdid arguments");
1339
1340#if DEBUG_SOCKET_CLOSE
1341uint32_t cycle = (uint32_t)hal_get_cycles();
1342pid_t    pid   = this->process->pid;
1343if (DEBUG_SOCKET_CLOSE < cycle )
1344printk("\n[%s] thread[%x,%x] enters for socket[%x,%d] / cycle %d\n",
1345__FUNCTION__, pid, this->trdid, pid, fdid, cycle );
1346#endif
1347
1348    // build extended pointer on lock protecting socket
1349    socket_lock_xp = XPTR( file_cxy , &socket_ptr->lock );
1350
1351    // take socket lock
1352    remote_queuelock_acquire( socket_lock_xp );
1353
1354    // check no previous TX command
1355    if( (hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )) == true) || 
1356        (hal_remote_l64( XPTR( file_cxy , &socket_ptr->tx_client)) != XPTR_NULL) )
1357    { 
1358        // release socket lock
1359        remote_queuelock_release( socket_lock_xp );
1360                   
1361        printk("\n[ERROR] in %s : previous TX cmd on socket[%x,%d] / thread[%x,%x]\n",
1362        __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
1363        return -1;
1364    }
1365
1366    // get relevant socket infos
1367    socket_type   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type ));
1368    nic_channel   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->nic_channel ));
1369    socket_state  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ));
1370
1371
1372    // the actual close mechanism depends on socket type and state:
1373    // UDP or TCP not connected : client thread directly destroy the socket descriptor
1374    // TCP connected : client thread request TX server thread to make the TCP close handshake
1375
1376    if( socket_type == SOCK_DGRAM )                   // UDP
1377    {
1378
1379#if DEBUG_SOCKET_CLOSE
1380cycle = (uint32_t)hal_get_cycles();
1381if( cycle > DEBUG_DEV_NIC_TX )
1382printk("\n[%s] thread[%x,%x] socket[%x,%d] %s / destroy socket / cycle %d\n",
1383__FUNCTION__, this->process->pid, this->trdid, pid, fdid,
1384socket_state_str( socket_state ), cycle );
1385#endif
1386        // directly destroy socket
1387        socket_destroy( file_xp );
1388
1389        return 0;
1390    }
1391    else if( (socket_state == TCP_STATE_BOUND) ||
1392             (socket_state == TCP_STATE_LISTEN) ||
1393             (socket_state == TCP_STATE_SYN_SENT) )   // TCP not connected
1394    {
1395
1396#if DEBUG_SOCKET_CLOSE
1397cycle = (uint32_t)hal_get_cycles();
1398if( cycle > DEBUG_DEV_NIC_TX )
1399printk("\n[%s] thread[%x,%x] socket[%x,%d] %s / destroy socket / cycle %d\n",
1400__FUNCTION__, this->process->pid, this->trdid, pid, fdid,
1401socket_state_str( socket_state ), cycle );
1402#endif
1403        // directly destroy socket
1404        socket_destroy( file_xp );
1405
1406        return 0;
1407    }
1408    else                                             // TCP connected
1409    {
1410        // get pointers on NIC_TX[index] chdev
1411        xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[nic_channel];
1412        chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
1413        cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
1414
1415        // get pointers on NIC_TX[channel] server thread
1416        tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server ));
1417        tx_server_xp  = XPTR( tx_chdev_cxy , tx_server_ptr );
1418
1419        // register command arguments in socket descriptor
1420        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_cmd    ), CMD_TX_CLOSE );
1421        hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ), client_xp );
1422        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_valid  ), true );
1423       
1424        // unblock NIC_TX server thread
1425        thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT );
1426 
1427        // release socket lock
1428        remote_queuelock_release( socket_lock_xp );
1429
1430#if DEBUG_SOCKET_CLOSE
1431cycle = (uint32_t)hal_get_cycles();
1432if( DEBUG_SOCKET_CLOSE < cycle )
1433printk("\n[%s] thread[%x,%x] socket[%x,%d] blocks on <IO> waiting close / cycle %d \n",
1434__FUNCTION__, pid, this->trdid, pid, fdid, cycle );
1435#endif
1436        // block itself and deschedule
1437        thread_block( client_xp , THREAD_BLOCKED_IO );
1438        sched_yield( "blocked in close" );
1439
1440#if DEBUG_SOCKET_CLOSE
1441cycle = (uint32_t)hal_get_cycles();
1442if( DEBUG_SOCKET_CLOSE < cycle )
1443printk("\n[%s] thread[%x,%x] socket[%x,%d] / resumes / cycle %d \n",
1444__FUNCTION__, pid, this->trdid, pid, fdid, cycle );
1445#endif
1446        // take socket lock
1447        remote_queuelock_acquire( socket_lock_xp );
1448
1449        // get socket state & command status
1450        socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ) );
1451        cmd_status   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_sts) );
1452        cmd_valid    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid ) );
1453
1454assert( (((socket_state == TCP_STATE_CLOSED) || (cmd_status != CMD_STS_SUCCESS))
1455         && (cmd_valid == false)),
1456"illegal socket state when client thread resumes after TX_CLOSE\n"
1457" socket_state = %s / cmd_status = %d / cmd_valid = %d\n",
1458socket_state_str(socket_state), cmd_status, cmd_valid );
1459
1460        // reset socket.tx_client
1461        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_client ) , XPTR_NULL );
1462
1463        if( cmd_status != CMD_STS_SUCCESS )  // error reported
1464        {
1465            printk("\n[ERROR] in %s for command TX_CLOSE / socket[%x,%d] / thread[%x,%x]\n",
1466            __FUNCTION__, pid, fdid, pid, this->trdid );
1467            return -1;
1468        }
1469        else                                 // success
1470        {
1471
1472#if DEBUG_SOCKET_CLOSE
1473cycle = (uint32_t)hal_get_cycles();
1474if( DEBUG_SOCKET_CLOSE < cycle )
1475printk("\n[%s] thread[%x,%x] socket[%x,%d] / destroy socket / cycle %d\n",
1476__FUNCTION__, pid, this->trdid, pid, fdid, socket_state_str(socket_state) , cycle );
1477#endif
1478            // destroy socket
1479            socket_destroy( file_xp );
1480
1481            return 0;
1482        }
1483    }   // end if TCP
1484}  // end socket_close()
1485
1486////////////////////////////////////////////////////////////////////////////////////////
1487// This static and blocking function is executed by an user thread calling one of the
1488// four functions: socket_send() / socket_recv() / socket_sendto() / socket_recvfrom()
1489// It can be used for both UDP and TCP sockets.
1490////////////////////////////////////////////////////////////////////////////////////////
1491// @ is_send   : send when true / receive when false.
1492// @ fdid      : socket identifier.
1493// @ u_buf     : pointer on user buffer in user space.
1494// @ length    : number of bytes.
1495// @ explicit  : explicit remote IP address and port when true.
1496////////////////////////////////////////////////////////////////////////////////////////
1497// Implementation note : The behavior is different for SEND & RECV
1498// - For a SEND, the client thread checks that there is no TX command registered
1499//   in the socket. It registers the command arguments in the socket descriptor
1500//   (tx_client, tx_cmd, tx_buf, tx_len). Then the client thread unblocks the
1501//   TX server thread from the BLOCKED_CLIENT condition, blocks itself on the
1502//   BLOCKED_IO condition, and deschedules. It is unblocked by the TX server thread
1503//   when the last byte has been sent (for UDP) or acknowledged (for TCP).
1504//   When the client thread resumes, it reset the command in socket, and returns.
1505// - For a RECV, the client thread checks that there is no RX command registered
1506//   in the socket. It registers itself in socket (rx_client). It checks the status
1507//   of the receive buffer. It the rx_buf is empty, it blocks on the BLOCKED_IO
1508//   condition, and deschedules. It is unblocked by the RX server thread when an UDP
1509//   packet or TCP segment has been writen in the rx_buf. When it resumes, it moves
1510//   the available data from the rx_buf to the user buffer, reset its registration
1511//   in socket (reset the rx_buf for an UDP socket), and returns.
1512////////////////////////////////////////////////////////////////////////////////////////
1513int socket_move_data( bool_t     is_send,
1514                      uint32_t   fdid,
1515                      uint8_t  * u_buf,
1516                      uint32_t   length,
1517                      bool_t     explicit,
1518                      uint32_t   explicit_addr,
1519                      uint32_t   explicit_port )
1520{
1521    vfs_inode_type_t    file_type;       // file descriptor type
1522    socket_t          * socket_ptr;      // local pointer on socket descriptor
1523    uint32_t            socket_state;    // current socket state
1524    uint32_t            socket_type;     // socket type (UDP/TCP)
1525    uint32_t            nic_channel;     // NIC channel for this socket
1526    xptr_t              socket_lock_xp;  // extended pointer on socket lock
1527    xptr_t              file_xp;         // extended pointer on file descriptor
1528    vfs_file_t        * file_ptr;
1529    cxy_t               file_cxy;
1530    xptr_t              chdev_xp;        // extended pointer on NIC_TX[channel] chdev
1531    chdev_t           * chdev_ptr;
1532    cxy_t               chdev_cxy;
1533    uint32_t            remote_addr;
1534    uint32_t            remote_port;
1535    uint32_t            buf_status;      // number of bytes in rx_buf
1536    int32_t             moved_bytes;     // total number of moved bytes (fot return)
1537    xptr_t              server_xp;       // extended pointer on NIC_TX / NIC_RX server thread
1538    thread_t          * server_ptr;      // local pointer on NIC_TX / NIC_RX server thread
1539    kmem_req_t          req;             // KCM request for TX kernel buffer
1540    uint8_t           * tx_buf;          // kernel buffer for TX transfer
1541    bool_t              cmd_valid;       // from socket descriptor
1542    uint32_t            cmd_status;      // from socket descriptor
1543    uint32_t            tx_todo;         // from socket descriptor
1544
1545    thread_t  * this    = CURRENT_THREAD;
1546    process_t * process = this->process;
1547
1548    // build extended pointer on client thread
1549    xptr_t client_xp = XPTR( local_cxy , this );
1550
1551    // get pointers on file descriptor identifying the socket
1552    file_xp  = process_fd_get_xptr_from_local( process , fdid );
1553    file_ptr = GET_PTR( file_xp );
1554    file_cxy = GET_CXY( file_xp );
1555
1556    if( file_xp == XPTR_NULL )
1557    {
1558        printk("\n[ERROR] in %s : undefined fdid %d / thread%x,%x]\n",
1559        __FUNCTION__, fdid , process->pid, this->trdid );
1560        return -1;
1561    }
1562 
1563    // get file type and socket pointer
1564    file_type  = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
1565
1566    // get local pointer on socket
1567    socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
1568
1569    // check file descriptor type
1570    if( file_type != INODE_TYPE_SOCK )
1571    {
1572        printk("\n[ERROR] in %s : illegal file type %s / socket[%x,%d]\n",
1573        __FUNCTION__, vfs_inode_type_str(file_type), process->pid, fdid );
1574        return -1;
1575    }
1576
1577    // build extended pointer on lock protecting socket
1578    socket_lock_xp = XPTR( file_cxy , &socket_ptr->lock );
1579
1580    // take the socket lock
1581    remote_queuelock_acquire( socket_lock_xp );
1582
1583    // get socket type, state, and channel
1584    socket_type  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type ));
1585    socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ));
1586    nic_channel  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->nic_channel ));
1587
1588    // handle the explicit remote address and port
1589    if( socket_type == SOCK_DGRAM )                  // UDP socket
1590    {
1591        if( socket_state == UDP_STATE_UNBOUND )
1592        {
1593            // release socket lock
1594            remote_queuelock_release( socket_lock_xp );
1595                   
1596            printk("\n[ERROR] in %s : SEND/RECV for socket[%x,%d] in state %s\n",
1597            __FUNCTION__, process->pid, fdid, socket_state_str(socket_state) );
1598            return -1;
1599        }
1600
1601        if( explicit )
1602        {
1603            // update remote IP address and port into socket descriptor
1604            hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_addr ), explicit_addr );
1605            hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_port ), explicit_port );
1606
1607            // update socket state if required
1608            if( socket_state == UDP_STATE_BOUND )
1609            {
1610                hal_remote_s32( XPTR( file_cxy , &socket_ptr->state ), UDP_STATE_ESTAB );
1611            }
1612        }
1613    }
1614    else                                            // TCP socket
1615    {
1616        if( explicit )
1617        {
1618            // get remote IP address and port from socket descriptor
1619            remote_addr = hal_remote_l32( XPTR( file_cxy , &socket_ptr->remote_addr ));
1620            remote_port = hal_remote_l32( XPTR( file_cxy , &socket_ptr->remote_port ));
1621
1622            if( (remote_addr != explicit_addr) || (remote_port != explicit_port) )
1623            {
1624                // release socket lock
1625                remote_queuelock_release( socket_lock_xp );
1626                   
1627                printk("\n[ERROR] in %s : wrong expliciy access for socket[%x,%d]\n",
1628                __FUNCTION__, process->pid, fdid );
1629                return -1;
1630            }
1631        }
1632    }
1633
1634    ///////////////////////////////////////////////////////
1635    if( is_send )                       // TX_SEND command
1636    {
1637
1638#if DEBUG_SOCKET_SEND
1639uint32_t    cycle = (uint32_t)hal_get_cycles();
1640if (DEBUG_SOCKET_SEND < cycle )
1641printk("\n[%s] thread[%x,%x] received SEND command for socket[%x,%d] / length %d / cycle %d\n",
1642__FUNCTION__, process->pid, this->trdid, process->pid, fdid, length, cycle );
1643#endif
1644        // check no previous TX command
1645        if( (hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )) == true) || 
1646            (hal_remote_l64( XPTR( file_cxy , &socket_ptr->tx_client)) != XPTR_NULL) )
1647        { 
1648            // release socket lock
1649            remote_queuelock_release( socket_lock_xp );
1650                   
1651            printk("\n[ERROR] in %s : previous TX command / socket[%x,%d] / thread[%x,%x]\n",
1652            __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
1653            return -1;
1654        }
1655
1656        // allocate a temporary kernel buffer
1657        req.type  = KMEM_KCM;
1658        req.order = bits_log2( length );
1659        req.flags = AF_NONE;
1660        tx_buf    = kmem_alloc( &req ); 
1661
1662        if( tx_buf == NULL )
1663        {
1664            // release socket lock
1665            remote_queuelock_release( socket_lock_xp );
1666                   
1667            printk("\n[ERROR] in %s : no memory for tx_buf / socket[%x,%d] / thread[%x,%x]\n",
1668            __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
1669            return -1;
1670        }
1671
1672        // copy data from user u_buf to kernel tx_buf   
1673        hal_copy_from_uspace( XPTR( local_cxy , tx_buf ),
1674                              u_buf,
1675                              length );
1676
1677        // register command in socket descriptor
1678        hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ) , client_xp );
1679        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_cmd    ) , CMD_TX_SEND );
1680        hal_remote_spt( XPTR( file_cxy , &socket_ptr->tx_buf    ) , tx_buf );
1681        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_len    ) , length );
1682        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_todo   ) , length );
1683        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_valid  ) , true );
1684
1685        // release socket lock
1686        remote_queuelock_release( socket_lock_xp );
1687                   
1688        // get pointers on relevant chdev
1689        chdev_xp  = chdev_dir.nic_tx[nic_channel];
1690        chdev_ptr = GET_PTR( chdev_xp );
1691        chdev_cxy = GET_CXY( chdev_xp );
1692
1693        // get pointers on NIC_TX[channel] server thread
1694        server_ptr = hal_remote_lpt( XPTR( chdev_cxy , &chdev_ptr->server ));
1695        server_xp  = XPTR( chdev_cxy , server_ptr );
1696
1697        // unblocks the NIC_TX server thread
1698        thread_unblock( server_xp , THREAD_BLOCKED_CLIENT );
1699
1700#if DEBUG_SOCKET_SEND   
1701cycle = (uint32_t)hal_get_cycles();
1702if( DEBUG_SOCKET_SEND < cycle )
1703printk("\n[%s] thread[%x,%x] socket[%x,%d] register SEND => blocks on <IO> / cycle %d\n",
1704__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1705#endif
1706        // client thread blocks itself and deschedules
1707        thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO );
1708        sched_yield( "blocked in nic_io" );
1709
1710#if DEBUG_SOCKET_SEND   
1711cycle = (uint32_t)hal_get_cycles();
1712if( DEBUG_SOCKET_SEND < cycle )
1713printk("\n[%s] thread[%x,%x] socket[%x,%d] for SEND resumes / cycle %d\n",
1714__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1715#endif
1716        // take socket lock
1717        remote_queuelock_acquire( socket_lock_xp );
1718     
1719        // get tx_valid, tx_todo, and tx_sts
1720        tx_todo    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_todo ));
1721        cmd_valid  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid ));
1722        cmd_status = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_sts ));
1723
1724        // reset tx_client in socket descriptor
1725        hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client  ) , XPTR_NULL );
1726
1727        // release socket lock
1728        remote_queuelock_release( socket_lock_xp );
1729     
1730// check SEND command completed when TX client thread resumes
1731assert( (((tx_todo == 0) || (cmd_status != CMD_STS_SUCCESS)) && (cmd_valid == false)),
1732"illegal socket state when client thread resumes after TX_SEND\n"
1733" tx_todo = %d / tx_status = %d / tx_valid = %d\n",
1734tx_todo, cmd_status, cmd_valid );
1735
1736        // release the tx_buf
1737        req.ptr = tx_buf;
1738        kmem_free( &req );
1739
1740        if( cmd_status != CMD_STS_SUCCESS )
1741        {
1742
1743#if DEBUG_SOCKET_SEND
1744cycle = (uint32_t)hal_get_cycles();
1745if( DEBUG_SOCKET_RECV < cycle )
1746printk("\n[%s] error %s for TX_SEND / socket[%x,%d] / thread[%x,%x]\n",
1747__FUNCTION__, socket_cmd_sts_str(cmd_status), process->pid, fdid, process->pid, this->trdid );
1748#endif
1749            return -1;
1750        }
1751        else
1752        {
1753
1754#if DEBUG_SOCKET_SEND
1755cycle = (uint32_t)hal_get_cycles();
1756if (DEBUG_SOCKET_SEND < cycle )
1757printk("\n[%s] thread[%x,%x] success for SEND / socket[%x,%d] / length %d / cycle %d\n",
1758__FUNCTION__, process->pid, this->trdid, process->pid, fdid, length, cycle );
1759#endif
1760            return length;
1761        }
1762
1763    }  // end TX_SEND command
1764
1765    ////////////////////////////////////////////////////////
1766    else                                 // RX_RECV command
1767    {
1768
1769#if DEBUG_SOCKET_RECV
1770uint32_t    cycle = (uint32_t)hal_get_cycles();
1771if (DEBUG_SOCKET_SEND < cycle )
1772printk("\n[%s] thread[%x,%x] received RECV command for socket[%x,%d] / length %d / cycle %d\n",
1773__FUNCTION__, process->pid, this->trdid, process->pid, fdid, length, cycle );
1774#endif
1775        // check no previous RX command
1776        if( (hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid )) == true) || 
1777            (hal_remote_l64( XPTR( file_cxy , &socket_ptr->rx_client)) != XPTR_NULL) )
1778        {
1779            // release socket lock
1780            remote_queuelock_release( socket_lock_xp );
1781                   
1782            printk("\n[ERROR] in %s : previous RX command on socket[%x,%d] / thread[%x,%x]\n",
1783            __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
1784            return -1;
1785        }
1786
1787        // return EOF for a TCP socket not in ESTAB state
1788        if( (socket_type == SOCK_STREAM ) && (socket_state != TCP_STATE_ESTAB) )
1789        { 
1790            // release socket lock
1791            remote_queuelock_release( socket_lock_xp );
1792                   
1793#if DEBUG_SOCKET_RECV 
1794uint32_t cycle = (uint32_t)hal_get_cycles();
1795if( DEBUG_SOCKET_RECV < cycle )
1796printk("\n[%s] thread[%x,%x] socket[%x,%d] TCP connection closed / cycle %d\n",
1797__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1798#endif
1799            return 0;
1800        }
1801        // build extended pointer on socket.rx_buf
1802        xptr_t rx_buf_xp   = XPTR( file_cxy , &socket_ptr->rx_buf );
1803
1804        // get rx_buf status
1805        buf_status = remote_buf_status( rx_buf_xp );
1806
1807        if( buf_status == 0 )
1808        {
1809            // registers RX_RECV command in socket descriptor
1810            hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_cmd    ) , CMD_RX_RECV );
1811            hal_remote_s64( XPTR( file_cxy , &socket_ptr->rx_client ) , client_xp );
1812            hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_valid  ) , true );
1813
1814            // release socket lock
1815            remote_queuelock_release( socket_lock_xp );
1816
1817#if DEBUG_SOCKET_RECV 
1818uint32_t cycle = (uint32_t)hal_get_cycles();
1819if( DEBUG_SOCKET_RECV < cycle )
1820printk("\n[%s] thread[%x,%x] socket[%x,%d] rx_buf empty => blocks on <IO> / cycle %d\n",
1821__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1822#endif
1823            // client thread blocks itself and deschedules
1824            thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO );
1825            sched_yield( "blocked in nic_io" );
1826
1827#if DEBUG_SOCKET_RECV 
1828cycle = (uint32_t)hal_get_cycles();
1829if( DEBUG_SOCKET_RECV < cycle )
1830printk("\n[%s] thread[%x,%x] socket[%x,%d] for RECV resumes / cycle %d\n",
1831__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1832#endif
1833            // take socket lock
1834            remote_queuelock_acquire( socket_lock_xp );
1835
1836            // get rx_sts and rx_buf status
1837            cmd_valid  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid ));
1838            cmd_status = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_sts ));
1839            buf_status = remote_buf_status( rx_buf_xp );
1840       
1841assert( (((buf_status != 0) || (cmd_status != CMD_STS_SUCCESS)) && (cmd_valid == false)),
1842"illegal socket state when client thread resumes after RX_RECV\n"
1843" buf_status = %d / rx_sts = %d / rx_valid = %d\n",
1844buf_status , cmd_status , cmd_valid );
1845
1846            // reset rx_client in socket descriptor
1847            hal_remote_s64( XPTR( file_cxy , &socket_ptr->rx_client  ) , XPTR_NULL );
1848
1849            // reset rx_buf for an UDP socket
1850            if( socket_type == SOCK_DGRAM ) remote_buf_reset( rx_buf_xp );
1851
1852            // release socket lock
1853            remote_queuelock_release( socket_lock_xp );
1854
1855            if( cmd_status == CMD_STS_EOF )           // EOF (remote close) reported
1856            {
1857
1858#if DEBUG_SOCKET_RECV
1859cycle = (uint32_t)hal_get_cycles();
1860if( DEBUG_SOCKET_RECV < cycle )
1861printk("\n[%s] EOF for RX_RECV / socket[%x,%d] / thread[%x,%x]\n",
1862__FUNCTION__, process->pid, fdid, process->pid, this->trdid );
1863#endif
1864                return 0;
1865            }
1866            else if( cmd_status != CMD_STS_SUCCESS )   // other error reported
1867            {
1868
1869#if DEBUG_SOCKET_RECV
1870cycle = (uint32_t)hal_get_cycles();
1871if( DEBUG_SOCKET_RECV < cycle )
1872printk("\n[%s] error %s for RX_RECV / socket[%x,%d] / thread[%x,%x]\n",
1873__FUNCTION__, socket_cmd_sts_str(cmd_status), process->pid, fdid, process->pid, this->trdid );
1874#endif
1875