Ignore:
Timestamp:
Oct 10, 2020, 4:50:41 PM (4 years ago)
Author:
alain
Message:

Introduce the ksocket.h & ksocket.c files in kernel/kern.

File:
1 moved

Legend:

Unmodified
Added
Removed
  • trunk/kernel/kern/ksocket.h

    r657 r662  
    11/*
    2  * socket.c - socket API implementation.
    3  *
    4  * Authors  Alain Greiner   (2016,2017,2018,2019,2020)
     2 * ksocket.h - kernel socket descriptor and API definition.
     3 * 
     4 * Authors  Alain Greiner    (2016,2017,2018,2019,2020)
    55 *
    66 * Copyright (c) UPMC Sorbonne Universites
    77 *
    8  * This file is part of ALMOS-MKH.
    9  *
    10  * ALMOS-MKH.is free software; you can redistribute it and/or modify it
     8 * This file is part of ALMOS-MKH
     9 *
     10 * ALMOS-MKH is free software; you can redistribute it and/or modify it
    1111 * under the terms of the GNU General Public License as published by
    1212 * the Free Software Foundation; version 2.0 of the License.
    1313 *
    14  * ALMOS-MKH.is distributed in the hope that it will be useful, but
     14 * ALMOS-MKH is distributed in the hope that it will be useful, but
    1515 * WITHOUT ANY WARRANTY; without even the implied warranty of
    1616 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     
    1818 *
    1919 * You should have received a copy of the GNU General Public License
    20  * along with ALMOS-MKH.; if not, write to the Free Software Foundation,
     20 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
    2121 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
    2222 */
    2323
     24#ifndef _KSOCKET_H_
     25#define _KSOCKET_H_
     26
    2427#include <kernel_config.h>
    2528#include <hal_kernel_types.h>
    26 #include <hal_remote.h>
    27 #include <shared_socket.h>
    28 #include <process.h>
     29#include <xlist.h>
    2930#include <remote_buf.h>
    30 #include <printk.h>
    31 #include <kmem.h>
    32 #include <thread.h>
    33 #include <vfs.h>
    34 #include <socket.h>
    35 
    36 //////////////////////////////////////////////////////////////////////////////////////
    37 // Extern global variables
    38 //////////////////////////////////////////////////////////////////////////////////////
    39 
    40 extern chdev_directory_t  chdev_dir;         // allocated in kernel_init.c
    41 
    42 //////////////////////////////////////
    43 char * socket_cmd_str( uint32_t type )
     31#include <remote_busylock.h>
     32
     33/*****************************************************************************************
     34 * This structure defines a kernel socket descriptor, used for both UDP or TCP sockets.
     35 * A socket is a private resource used by a most two user threads : one TX client
     36 * thread to send packets, and one RX client thread, to receive packets. The TX client
     37 * thread and the RX client thread can be the same thread.
     38 *
     39 * When the Network Interface Controller contains several channels, the set of all
     40 * existing sockets is split in as many subsets as the number of NIC channels, in order
     41 * to parallelize the transfers. The distribution key defining the channel index
     42 * is computed from the (remote_addr/remote_port) couple: by the NIC hardware for the
     43 * RX packets; by the software for the TX packets, using a dedicated NIC driver function.
     44 * All sockets that have the same key share the same channel, and each socket is
     45 * therefore linked to two chdevs : NIC_TX[key] & NIC_RX[key].
     46 * The socket allows the NIC-TX and NIC_RX server threads to access various buffers:
     47 * - the kernel "tx_buf" buffer contains the data to be send by the TX server thread.
     48 *   It is dynamically allocated, and used as retransmission buffer when required.
     49 * - the kernel "rx_buf" buffer contains the data received by the RX server thread.
     50 *   It is allocated in socket and handled as a single writer / single reader FIFO.
     51 * - the kernel "r2t" buffer allows the RX server thread to make direct requests
     52 *   to the associated TX server (mainly used to handle the TCP ACKs).
     53 * - the kernel "crq" buffer allows to store concurrent remote client connect requests
     54 *   to a local server socket. It is allocated in socket.
     55 *
     56 * The synchronisation mechanism between the client threads and the server threads
     57 * is different for the TX and RX directions:
     58 *
     59 * 1) TX stream
     60 *
     61 * - The internal API between the TX client thread and the NIC_TX server thread defines
     62 *   four command types, stored in the "tx_cmd" variable of the socket descriptor:
     63 *   . SOCKET_TX_CONNECT : TCP client request to start the 3 steps connection handshake.
     64 *   . SOCKET_TX_ACCEPT  : TCP server request to accept one pending connection request.
     65 *   . SOCKET_TX_SEND    : local (UDP/TCP) request to send data to a remote (UDP/TCP).
     66 *   . SOCKET_TX_CLOSE   : local TCP socket request remote TCP socket to close connection.
     67 * - All commands are blocking for the TX client thread: to make a command, the TX client
     68 *   registers the command type in the socket "tx_cmd",field, set the "tx_valid" field,
     69 *   reset the "tx_error" field, and registers itself in the "tx_client" field.
     70 *   Then, it unblocks the TX server thread from the BLOCKED_CLIENT condition, blocks itself
     71 *   on the BLOCKED_IO condition, and deschedules. For a SEND, the "tx_buf" kernel buffer
     72 *   is dynamicaly allocated by the client thread, that copies the payload from the user
     73 *   buffer to this kernel buffer, that is used as retransmission buffer, when required.
     74 * - A command is valid for the TX server when the socket descriptor "tx_valid" is true.
     75 *   For a SEND command, the "tx_valid" is reset by the NIC_TX server when the last byte has
     76 *   been sent, but the TX client thread is unblocked by the NIC_RX server thread only when
     77 *   the last byte has been acknowledged, or to report an error.
     78 *   For the CONNECT, ACCEPT and CLOSE commands, the "tx_valid" is reset by the NIC_TX server
     79 *   when the first segment of the handshake has been sent, but the TX client thread is
     80 *   unblocked by the NIC_RX server thread only when the handshake is actually completed.
     81 *   The TX server thread is acting as a multiplexer. It scans the list of attached sockets,
     82 *   to sequencially handle the valid commands: one UDP packet or TCP segment per iteration.
     83 *   The TX server blocks and deschedules on the BLOCKED_CLIENT condition when there is
     84 *   no more valid TX command or R2T request registered in any socket. It is unblocked
     85 *   from BLOCKED_CLIENT by a client thread registering a TX command, or by the RX server
     86 *   thread registering a R2T request. The TX server thread signals an error to the TX client
     87 *   thread using the "tx_error" field in socket descriptor.
     88 *   When "tx_valid" or "r2t_valid" are true, the TX server thread build and send an UDP
     89 *   packet or TCP segment. A single SEND command can require a large number of TCP
     90 *   segments to move a big data buffer.
     91 *   This TX server thread blocks and deschedules on the BLOCKED_ISR condition when there
     92 *   the NIC_RX queue is full . It is unblocked by the hardware NIC_TX_ISR.
     93 * - In order to detect and report error for multiple simultaneous TX accesses to the same
     94 *   socket, the client thread makes a double check before posting a new TX command :
     95 *   the "tx_valid" field must be false, and the "tx_client" field must be XPTR_NULL.
     96 *   The "tx_valid" field is reset by the TX server thread, and the "tx_client"
     97 *   field is reset by the TX client thread itself, when it resumes after a TX command.
     98 *   . For a SEND command on an UDP socket, the TX server thread reset "tx_valid" and
     99 *     unblocks the TX client thread as soon as the last data byte has been sent.
     100 *   . For a SEND command on a TCP socket, the TX server thread reset "tx_valid" when the
     101 *     last data byte has been sent, but the TX client thread is unblocked by the TX server
     102 *     only when the last data byte has been acknowledged by the remote socket.
     103 *   . For the CONNECT or ACCEPT commands, the "tx_valid" flag is reset and the TX client
     104 *     thread is unblocked by the RX server thread only when the command is completed,
     105 *     and the local TCP socket is actually in the ESTAB state.
     106 *   . For a CLOSE command, the "tx_valid" flag is reset, and the TX client thread is
     107 *     unblocked by the RX server thread only when the remote socket is disconnected.
     108 *
     109 * 2) RX stream
     110 *
     111 * - The internal API between the RX client thread and the RX server thread defines two
     112 *   command types stored in the rx_cmd variable of the socket descriptor:
     113 *   . SOCKET_RX_ACCEPT : TCP server request a connection request from CRQ queue.
     114 *   . SOCKET_RX_RECV   : local (UDP/TCP) socket expect data from a remote (UDP/TCP).
     115 *   For the RECV command the communication is done through the "rx_buf" buffer,
     116 *   attached to the socket, and handled as a single-writer / single reader-FIFO.
     117 *   For the ACCEPT command the communication is done through the CRQ buffer, attached
     118 *   to the socket, and handled as a single-writer / single reader-FIFO.
     119 *   These two commands are blocking for the RX client thread as long as the buffer is
     120 *   empty. The client thread set the socket "rx_valid" field, reset the "rx_error" field,
     121 *   registers itself in the "rx_client" field, and  blocks on the BLOCKED_IO condition.
     122 * - The RX server thread is acting as a demultiplexor: it handle one received TCP segment,
     123 *   or UDP packet per iteration in the loop on the NIC_RX queue, and moves the data to
     124 *   the relevant buffer of the socket matching the packet. It discard packets that don't
     125 *   match a registered socket. When a client thread is registered in the socket descriptor,
     126 *   the RX server thread reset the "rx_valid" field and unblocks the RX client thread from
     127 *   the BLOCKED_IO condition as soon as there is data available in the "rx_buf".
     128 *   This RX server thread blocks and deschedules on the BLOCKED_ISR condition when there
     129 *   is no more packets in the NIC_RX queue. It is unblocked by the hardware NIC_RX_ISR.
     130 * - In order to detect and report error for multiple simultaneous RX accesses to the same
     131 *   socket, the RX client thread makes a double check before posting a new RX command :
     132 *   the "rx_valid" field must be false, and the "rx_client" field must be XPTR_NULL.
     133 *   The "rx_valid" field is reset by the RX server thread, and the "rx_client"
     134 *   field is reset by the RX client thread itself, when it resumes after an RX command.
     135 *
     136 * 3) R2T queue
     137 *
     138 * To implement the TCP "3 steps handshake" protocol for connection or to send RST,
     139 * the RX server thread can directly request the associated TX server thread to send
     140 * control packets in  the TX stream, using a dedicate R2T (RX to TX) FIFO stored in
     141 * the socket descriptor. Each R2T request occupy one byte in this R2T queue.
     142 *
     143 * 4) CRQ queue
     144 *
     145 * The remote CONNECT requests received by a TCP socket (SYN segments) are stored in a
     146 * dedicated CRQ FIFO stored in the local socket descriptor. These requests are consumed
     147 * by the local client thread executing an ACCEPT.
     148 * Each CRQ request occupy sizeof(connect_request_t) bytes in this CRQ queue.
     149 * The connect_request_t structure containing the request arguments is defined below.
     150 *
     151 * Note : the socket domains and types are defined in the "shared_socket.h" file.
     152 ****************************************************************************************/
     153
     154/*****************************************************************************************
     155 * This enum defines the set of commands that can be registered in the socket
     156 * by the TX & RX client threads to be executed by the NIC_TX & NIC_TX server threads.
     157 ****************************************************************************************/
     158typedef enum socket_cmd_type_e
    44159{
    45     switch( type )
    46     {
    47         case SOCKET_TX_CONNECT  : return "CONNECT";
    48         case SOCKET_TX_SEND     : return "SEND";
    49         case SOCKET_TX_CLOSE    : return "CLOSE";
    50 
    51         default:                return "undefined";
    52     }
     160    CMD_TX_CONNECT      = 20,         /*! request a SYN segment     (TCP only)          */
     161    CMD_TX_ACCEPT       = 21,         /*! request a SYN-ACK segment (TCP only)          */
     162    CMD_TX_CLOSE        = 22,         /*! request a RST segment     (TCP only)          */
     163    CMD_TX_SEND         = 23,         /*! request to send data      (TCP or UDP)        */
     164
     165    CMD_RX_ACCEPT       = 30,         /*! wait request from CRQ     (TCP only)          */
     166    CMD_RX_RECV         = 31,         /*! wait DATA from rx_buf     (TCP or UDP)        */
    53167}
    54    
    55 /////////////////////////////////////////
    56 char * socket_state_str( uint32_t state )
     168socket_cmd_type_t;
     169
     170/*****************************************************************************************
     171 * This enum defines the set of command status that can be returned by the NIC_RX and
     172 * NIC_TX server threads to the TX & RX client threads.
     173 * The success must be signaled by the null value / the various failure cases are
     174 * signaled by a non-null value.
     175 ****************************************************************************************/
     176typedef enum socket_cmd_sts_e
    57177{
    58     switch( state )
    59     {
    60         case UDP_STATE_UNBOUND    : return "UDP_UNBOUND";
    61         case UDP_STATE_BOUND      : return "UDP_BOUND";
    62         case UDP_STATE_CONNECT    : return "UDP_CONNECT";
    63 
    64         case TCP_STATE_UNBOUND    : return "TCP_UNBOUND";
    65         case TCP_STATE_BOUND      : return "TCP_BOUND";
    66         case TCP_STATE_LISTEN     : return "TCP_LISTEN";
    67         case TCP_STATE_SYN_SENT   : return "TCP_SYN_SENT";
    68         case TCP_STATE_SYN_RCVD   : return "TCP_SYN_RCVD";
    69         case TCP_STATE_ESTAB      : return "TCP_ESTAB";
    70         case TCP_STATE_FIN_WAIT1  : return "TCP_FIN_WAIT1";
    71         case TCP_STATE_FIN_WAIT2  : return "TCP_FIN_WAIT2";
    72         case TCP_STATE_CLOSING    : return "TCP_CLOSING";
    73         case TCP_STATE_TIME_WAIT  : return "TCP_TIME_WAIT";
    74         case TCP_STATE_CLOSE_WAIT : return "TCP_CLOSE_WAIT";
    75         case TCP_STATE_LAST_ACK   : return "TCP_LAST_ACK";
    76 
    77         default:                    return "undefined";
    78     }
     178    CMD_STS_SUCCESS     =  0,
     179    CMD_STS_EOF         =  1,
     180    CMD_STS_RST         =  2,
     181    CMD_STS_BADACK      =  3,
     182    CMD_STS_BADSTATE    =  4,
     183    CMD_STS_BADCMD      =  5,
    79184}
    80 
    81 ///////////////////////////////////////
    82 error_t socket_create( cxy_t       cxy,
    83                        uint32_t    domain,
    84                        uint32_t    type,
    85                        socket_t ** socket_ptr,
    86                        uint32_t  * fdid_ptr )
     185socket_cmd_sts_t;
     186
     187/*****************************************************************************************
     188 * This enum defines the set of tates for an UDP socket.
     189 ****************************************************************************************/
     190typedef enum udp_socket_state_e
    87191{
    88     uint32_t    fdid;
    89 
    90     thread_t  * this    = CURRENT_THREAD;
    91     process_t * process = this->process;
    92 
    93     kmem_req_t     req;
    94     socket_t     * socket;
    95     vfs_file_t   * file;
    96     uint32_t       state;
    97     error_t        error;
    98 
    99     // allocate memory for socket descriptor
    100     req.type   = KMEM_KCM;
    101     req.order  = bits_log2( sizeof(socket_t) );
    102     req.flags  = AF_ZERO;
    103     socket     = kmem_remote_alloc( cxy , &req );
    104 
    105     if( socket == NULL )
    106     {
    107         printk("\n[ERROR] in %s : cannot allocate socket descriptor / thread[%x,%x]\n",
    108         __FUNCTION__, process->pid, this->trdid );
    109         return -1;
    110     }
    111 
    112     // allocate memory for rx_buf buffer
    113     error = remote_buf_create( XPTR( cxy , &socket->rx_buf ),
    114                                NIC_RX_BUF_SIZE );
    115 
    116     if( error )
    117     {
    118         printk("\n[ERROR] in %s : cannot allocate rx_buf / thread[%x,%x]\n",
    119         __FUNCTION__, process->pid, this->trdid );
    120         req.type = KMEM_KCM;
    121         req.ptr  = socket;
    122         kmem_remote_free( cxy , &req );
    123         return -1;
    124     }
    125 
    126     // allocate memory for r2tq queue
    127     error = remote_buf_create( XPTR( cxy , &socket->r2tq ),
    128                                NIC_R2T_QUEUE_SIZE );
    129     if( error )
    130     {
    131         printk("\n[ERROR] in %s : cannot allocate R2T queue / thread[%x,%x]\n",
    132         __FUNCTION__, process->pid, this->trdid );
    133         remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
    134         req.type = KMEM_KCM;
    135         req.ptr  = socket;
    136         kmem_remote_free( cxy , &req );
    137         return -1;
    138     }
    139 
    140     // allocate memory for crqq queue
    141     error = remote_buf_create( XPTR( cxy , &socket->crqq ),
    142                                NIC_CRQ_QUEUE_SIZE * sizeof(sockaddr_t) );
    143     if( error )
    144     {
    145         printk("\n[ERROR] in %s : cannot allocate CRQ queue / thread[%x,%x]\n",
    146         __FUNCTION__, process->pid, this->trdid );
    147         remote_buf_destroy( XPTR( cxy , &socket->r2tq ) );
    148         remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
    149         req.type = KMEM_KCM;
    150         req.ptr  = socket;
    151         kmem_remote_free( cxy , &req );
    152         return -1;
    153     }
    154 
    155     //  allocate memory for file descriptor
    156         req.type  = KMEM_KCM;
    157         req.order = bits_log2( sizeof(vfs_file_t) );
    158     req.flags = AF_ZERO;
    159         file      = kmem_remote_alloc( cxy , &req );
    160 
    161     if( file == NULL )
    162     {
    163         printk("\n[ERROR] in %s : cannot allocate file descriptor / thread[%x,%x]\n",
    164         __FUNCTION__, process->pid, this->trdid );
    165         remote_buf_destroy( XPTR( cxy , &socket->crqq ) );
    166         remote_buf_destroy( XPTR( cxy , &socket->r2tq ) );
    167         remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
    168         req.type = KMEM_KCM;
    169         req.ptr  = socket;
    170         kmem_remote_free( cxy , &req );
    171         return -1;
    172     }
    173    
    174     // get an fdid value, and register file descriptor in fd_array[]
    175     error = process_fd_register( process->ref_xp,
    176                                  XPTR( cxy , file ),
    177                                  &fdid );
    178     if ( error )
    179     {
    180         printk("\n[ERROR] in %s : cannot register file descriptor / thread[%x,%x]\n",
    181         __FUNCTION__, process->pid, this->trdid );
    182         req.type = KMEM_KCM;
    183         req.ptr  = file;
    184         kmem_free( &req );
    185         remote_buf_destroy( XPTR( cxy , &socket->crqq ) );
    186         remote_buf_destroy( XPTR( cxy , &socket->r2tq ) );
    187         remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
    188         req.ptr  = socket;
    189         kmem_free( &req );
    190         return -1;
    191     }
    192     state = (type == SOCK_STREAM) ? TCP_STATE_UNBOUND : UDP_STATE_UNBOUND;
    193 
    194     // initialise socket descriptor
    195     hal_remote_s32( XPTR( cxy , &socket->domain    ) , domain );
    196     hal_remote_s32( XPTR( cxy , &socket->type      ) , type );
    197     hal_remote_s32( XPTR( cxy , &socket->pid       ) , process->pid );
    198     hal_remote_s32( XPTR( cxy , &socket->state     ) , state );
    199     hal_remote_s64( XPTR( cxy , &socket->tx_client ) , XPTR_NULL );
    200     hal_remote_s64( XPTR( cxy , &socket->rx_client ) , XPTR_NULL );
    201 
    202     // initialize file descriptor
    203     hal_remote_s32( XPTR( cxy , &file->type        ) , INODE_TYPE_SOCK );
    204     hal_remote_spt( XPTR( cxy , &file->socket      ) , socket );
    205     hal_remote_s32( XPTR( cxy , &file->refcount    ) , 1 );
    206 
    207     remote_rwlock_init( XPTR( cxy , &file->lock ) , LOCK_VFS_FILE );
    208    
    209     // return success
    210     *socket_ptr = socket;
    211     *fdid_ptr   = fdid;
    212 
    213     return 0;
    214 
    215 }  // end socket_create
    216 
    217 ////////////////////////////////////
    218 void socket_destroy( uint32_t fdid )
     192    UDP_STATE_UNBOUND    = 0x00,
     193    UDP_STATE_BOUND      = 0x01,
     194    UDP_STATE_ESTAB      = 0x02,
     195}
     196udp_socket_state_t;
     197
     198/*****************************************************************************************
     199 * This enum defines the set of tates for an TCP socket.
     200 ****************************************************************************************/
     201typedef enum tcp_socket_state_e
    219202{
    220     uint32_t            type;
    221     socket_t          * socket;
    222     kmem_req_t          req;
    223 
    224     thread_t  * this    = CURRENT_THREAD;
    225     process_t * process = this->process;
    226 
    227     // get pointers on file descriptor
    228     xptr_t       file_xp  = process_fd_get_xptr( process , fdid );
    229     vfs_file_t * file     = GET_PTR( file_xp );
    230     cxy_t        cxy      = GET_CXY( file_xp );
    231 
    232     type   = hal_remote_l32( XPTR( cxy , &file->type ) );
    233     socket = hal_remote_lpt( XPTR( cxy , &file->socket ) );
    234 
    235 // check file descriptor pointer
    236 assert( (file_xp != XPTR_NULL), "illegal fdid\n" );
    237 
    238 // check file descriptor type
    239 assert( (type == INODE_TYPE_SOCK), "illegal file type\n" );
    240 
    241     // remove the file descriptor from the process
    242     process_fd_remove( process->owner_xp , fdid );
    243 
    244     // release memory allocated for file descriptor
    245     req.type = KMEM_KCM;
    246     req.ptr  = file;
    247     kmem_remote_free( cxy , &req );
    248 
    249     // release memory allocated for buffers attached to socket descriptor
    250     remote_buf_destroy( XPTR( cxy , &socket->crqq ) );
    251     remote_buf_destroy( XPTR( cxy , &socket->r2tq ) );
    252     remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
    253 
    254     // release memory allocated for socket descriptor
    255     req.type = KMEM_KCM;
    256     req.ptr  = socket;
    257     kmem_remote_free( cxy , &req );
    258 
    259 }  // end socket_destroy()
    260 
    261 /////////////////////////////////////////////////
    262 void socket_link_to_servers( xptr_t    socket_xp,
    263                              uint32_t  nic_channel )
     203    TCP_STATE_UNBOUND    = 0x10,
     204    TCP_STATE_BOUND      = 0x11,
     205    TCP_STATE_LISTEN     = 0x12,
     206    TCP_STATE_SYN_SENT   = 0x13,
     207    TCP_STATE_SYN_RCVD   = 0x14,
     208    TCP_STATE_ESTAB      = 0x15,
     209    TCP_STATE_FIN_WAIT1  = 0x16,
     210    TCP_STATE_FIN_WAIT2  = 0x17,
     211    TCP_STATE_CLOSING    = 0x18,
     212    TCP_STATE_TIME_WAIT  = 0x19,
     213    TCP_STATE_CLOSE_WAIT = 0x1A,
     214    TCP_STATE_LAST_ACK   = 0x1B,
     215    TCP_STATE_CLOSED     = 0x1C,
     216}
     217tcp_socket_state_t;
     218
     219/*****************************************************************************************
     220 * This structure defines one connection request, registered in the CRQ queue.
     221 ****************************************************************************************/
     222typedef struct connect_request_s
    264223{
    265     cxy_t      socket_cxy = GET_CXY( socket_xp );
    266     socket_t * socket_ptr = GET_PTR( socket_xp );
    267 
    268     // get pointers on NIC_TX[index] chdev
    269     xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[nic_channel];
    270     chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
    271     cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
    272 
    273     // build extended pointers on root of sockets attached to NIC_TX[channel] chdev
    274     xptr_t    tx_root_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_root );
    275     xptr_t    tx_lock_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_lock );
    276 
    277     // register socket in the NIC_TX[channel] chdev clients queue
    278     remote_rwlock_wr_acquire( tx_lock_xp );
    279     xlist_add_last( tx_root_xp , XPTR( socket_cxy , &socket_ptr->tx_list ) );
    280     remote_rwlock_wr_release( tx_lock_xp );
    281 
    282     // get pointers on NIC_RX[index] chdev
    283     xptr_t    rx_chdev_xp  = chdev_dir.nic_rx[nic_channel];
    284     chdev_t * rx_chdev_ptr = GET_PTR( rx_chdev_xp );
    285     cxy_t     rx_chdev_cxy = GET_CXY( rx_chdev_xp );
    286 
    287     // build extended pointer on root of sockets attached to NIC_TX[channel] chdev
    288     xptr_t    rx_root_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_root );
    289     xptr_t    rx_lock_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_lock );
    290 
    291     // register socket in the NIC_RX[channel] chdev clients queue
    292     remote_rwlock_wr_acquire( rx_lock_xp );
    293     xlist_add_last( rx_root_xp , XPTR( socket_cxy , &socket_ptr->rx_list ) );
    294     remote_rwlock_wr_release( rx_lock_xp );
    295 
    296 }  // end socket_link_to_server()
    297 
    298 
     224    uint32_t          addr;          /* requesting socket IP address                   */
     225    uint32_t          port;          /* requesting socket port number                  */
     226    uint32_t          iss;           /* requesting socket initial sequence number      */
     227    uint32_t          window;        /* requesting socket receive window               */
     228}         
     229connect_request_t;
     230
     231/*****************************************************************************************
     232 * This structure defines the socket descriptor.
     233 ****************************************************************************************/
     234typedef struct socket_s
     235{
     236    remote_queuelock_t lock;         /*! lock protecting socket state                  */
     237    pid_t              pid;          /*! owner process identifier                      */
     238    uint32_t           fdid;         /*! associated file descriptor index              */
     239    uint32_t           domain;       /*! domain : AF_LOCAL / AF_INET                   */
     240    uint32_t           type;         /*! type : SOCK_DGRAM / SOCK_STREAM               */
     241    uint32_t           state;        /*! socket state (see above)                      */
     242    uint32_t           local_addr;   /*! local  socket IP address                      */
     243    uint32_t           remote_addr;  /*! remote socket IP address                      */
     244    uint32_t           local_port;   /*! local  socket port number                     */
     245    uint32_t           remote_port;  /*! remote socket port number                     */
     246    uint32_t           nic_channel;  /*! derived from (remote_addr,remote_port)        */
     247
     248    xlist_entry_t      tx_list;      /*! all sockets attached to same NIC_TX channel   */
     249    xptr_t             tx_client;    /*! extended pointer on current TX client thread  */
     250    bool_t             tx_valid;     /*! TX command valid                              */
     251    socket_cmd_type_t  tx_cmd;       /*! TX command (CONNECT / ACCEPT / SEND / CLOSE)  */
     252    uint32_t           tx_sts;       /*! signal a TX command success / failure         */
     253    uint8_t         *  tx_buf;       /*! pointer on TX data buffer in kernel space     */
     254    uint32_t           tx_len;       /*! number of data bytes for a SEND command       */
     255    uint32_t           tx_todo;      /*! number of bytes not yet sent                  */
     256    xlist_entry_t      tx_temp;      /*! temporary list of sockets (root in TX chdev)  */
     257
     258    xlist_entry_t      rx_list;      /*! all sockets attached to same NIC_RX channel   */
     259    xptr_t             rx_client;    /*! extended pointer on current RX client thread  */
     260    bool_t             rx_valid;     /*! RX command valid                              */
     261    socket_cmd_type_t  rx_cmd;       /*! RX command ( ACCEPT / RECV )                  */
     262    uint32_t           rx_sts;       /*! signal a RX command success / failure         */
     263    remote_buf_t       rx_buf;       /*! embedded receive buffer descriptor            */
     264
     265    remote_buf_t       r2tq;         /*! RX_to_TX requests queue descriptor            */
     266    remote_buf_t       crqq;         /*! connection requests queue descriptor          */
     267
     268    /* the following fields defines the TCB (only used for a TCP connection)           */
     269
     270    uint32_t           tx_nxt;       /*! next byte to send in TX_data stream           */
     271    uint32_t           tx_wnd;       /*! number of acceptable bytes in TX_data stream  */
     272    uint32_t           tx_una;       /*! first unack byte in TX_data stream            */
     273    uint32_t           rx_nxt;       /*! next expected byte in RX_data stream          */
     274    uint32_t           rx_wnd;       /*! number of acceptable bytes in RX_data stream  */
     275    uint32_t           rx_irs;       /*! initial sequence number in RX_data stream     */
     276}
     277socket_t;
     278
     279/****************************************************************************************
     280 * This function returns a printable string for a socket domain.
     281 ****************************************************************************************
     282 * domain   :  AF_INET / AF_LOCAL
     283 ***************************************************************************************/
     284char * socket_domain_str( uint32_t domain );
     285
     286/****************************************************************************************
     287 * This function returns a printable string for a socket type.
     288 ****************************************************************************************
     289 * type   :  SOCK_DGRAM / SOCK_STREAM
     290 ***************************************************************************************/
     291char * socket_type_str( uint32_t type );
     292
     293/****************************************************************************************
     294 * This function returns a printable string for an UDP or TCP socket state.
     295 ****************************************************************************************
     296 * state  :  UDP_STATE_*** / TCP_STATE***
     297 ***************************************************************************************/
     298char * socket_state_str( uint32_t state );
     299
     300/****************************************************************************************
     301 * This function returns a printable string for a command type.
     302 ****************************************************************************************
     303 * type  :  command type
     304 ***************************************************************************************/
     305char * socket_cmd_type_str( uint32_t type );
     306
     307/****************************************************************************************
     308 * This function returns a printable string for a command status.
     309 ****************************************************************************************
     310 * sts   : command status.
     311 ***************************************************************************************/
     312char * socket_cmd_sts_str( uint32_t sts );
     313
     314
     315
     316/****************************************************************************************
     317 *      Functions used by the NIC_TX and NIC_RX server threads.
     318 ***************************************************************************************/
     319
     320/****************************************************************************************
     321 * This function is called by the dev_nic_rx_handle_tcp() function, executed by the
     322 * NIC_RX[channel] server thread, to register a R2T request defined by the <flags>
     323 * argument in the socket R2T queue, specified by the <queue_xp> argument.
     324 * This function unblocks the NIC_TX[channel] server thread, identified by the <channel>
     325 * argumentfrom the THREAD_BLOCKED_CLIENT condition.
     326 ****************************************************************************************
     327 * @ queue_xp   : [in] extended pointer on the R2T qeue descriptor.
     328 * @ flags      : [in] flags to be set in the TCP segment.
     329 * @ channel    : [in] NIC channel (both TX & RX).
     330 ***************************************************************************************/
     331void socket_put_r2t_request( xptr_t    queue_xp,
     332                             uint32_t  flags,
     333                             uint32_t  channel );
     334 
     335/****************************************************************************************
     336 * This function is called by the dev_nic_rx_handle_tcp() function to register
     337 * a client connection request, defined by the <remote_addr>, <remote_port>,
     338 * <remote_iss>, and <remote_window> arguments, * in the CRQ queue, specified
     339 * by the <queue_xp> argument.
     340 ****************************************************************************************
     341 * @ queue_xp      : [in] extended pointer on the CRQ qeue descriptor.
     342 * @ remote_addr   : [in] remote socket IP address.
     343 * @ remote_port   : [in] remote socket port.
     344 * @ remote_iss    : [in] remote socket initial sequence number.
     345 * @ remote_window : [in] remote socket receive window
     346 * @ return 0 if success / return -1 if queue full.
     347 ***************************************************************************************/
     348error_t socket_put_crq_request( xptr_t    queue_xp,
     349                                uint32_t  remote_addr,
     350                                uint32_t  remote_port,
     351                                uint32_t  remote_iss,
     352                                uint32_t  remote_window );
     353
     354/****************************************************************************************
     355 * This function is called by the socket_accept() function to extract a connection
     356 * request from a CRQ queue, specified by the <queue_xp> argument, to the buffers
     357 * defined by <remote_addr>, <remote_port>, <remote_iss>, and <remote_window>.
     358 *****************************************************************************************
     359 * @ queue_xp      : [in]  extended pointer on the CRQ qeue descriptor.
     360 * @ remote_addr   : [out] buffer for remote socket IP address.
     361 * @ remote_port   : [out] buffer for remote socket port.
     362 * @ remote_iss    : [out] buffer for remote socket initial sequence number.
     363 * @ remote_window : [out] buffer for remote socket receive window
     364 * @ return 0 if success / return -1 if queue empty.
     365 ***************************************************************************************/
     366error_t socket_get_crq_request( xptr_t     queue_xp,
     367                                uint32_t * remote_addr,
     368                                uint32_t * remote_port,
     369                                uint32_t * remote_iss,
     370                                uint32_t * remote_window );
     371
     372/****************************************************************************************
     373 * This blocking function diplays the socket state (including the TCB).
     374 ****************************************************************************************
     375 * @ socket_xp     : [in] extended pointer on socket descriptor.
     376 $ @ string        : [in] name of calling function.
     377 ***************************************************************************************/
     378void socket_display( xptr_t         socket_xp,
     379                     const char   * func_str );
     380
     381
     382
     383/****************************************************************************************
     384 *      Functions implementing the socket related system calls
     385 ***************************************************************************************/
     386
     387/****************************************************************************************
     388 * This function implements the socket() syscall.
     389 * This function allocates and intializes in the calling thread cluster:
     390 * - a new socket descriptor, defined by the <domain> and <type> arguments,
     391 * - a new file descriptor, associated to this socket,
     392 * It registers the file descriptor in the reference process fd_array[],
     393 * set the socket state to UNBOUND, and returns the <fdid> value.
     394 ****************************************************************************************
     395 * @ domain  : [in] socket protocol family (AF_UNIX / AF_INET)
     396 * @ type    : [in] socket type (SOCK_DGRAM / SOCK_STREAM).
     397 * @ return a file descriptor <fdid> if success / return -1 if failure.
     398 ***************************************************************************************/
     399int socket_build( uint32_t   domain,
     400                  uint32_t   type );
     401
     402/****************************************************************************************
     403 * This function implements the bind() syscall.
     404 * It assigns an IP address, defined by the <local_addr> argument, and a port number,
     405 * defined by the <local_port> argument to an unnamed local socket, identified by the
     406 * <fdid> argument, and set the socket state to BOUND. It applies to UDP or TCP sockets.
     407 * It does not require any service from the NIC_TX and NIC_RX server threads.
     408 * It can be called by a thread running in any cluster.
     409 ****************************************************************************************
     410 * @ fdid         : [in] file descriptor index identifying the socket.
     411 * @ local_addr   : [in] local IP address.
     412 * @ local_port   : [in] local port.
     413 * @ return 0 if success / return -1 if failure.
     414 ***************************************************************************************/
     415int socket_bind( uint32_t  fdid,
     416                 uint32_t  addr,
     417                 uint16_t  port );
     418
     419/****************************************************************************************
     420 * This function implements the listen() syscall().
     421 * It is called by a (local) server process to specify the max size of the CRQ queue
     422 * for a socket identified by the <fdid> argument, that expect connection requests
     423 * from one or several (remote) client processes.  The selected socket CRQ is supposed
     424 * to register all connections requests, whatever the client IP address and port values.
     425 * This function applies only to a TCP socket, that must be in the BOUND state.
     426 * The <fdid> socket is set to the LISTEN state.
     427 * It does not require any service from the NIC_TX and NIC_RX server threads.
     428 * It can be called by a thread running in any cluster.
     429 ****************************************************************************************
     430 * Implementation notes :
     431 * The number N of channels available in the NIC contrôler can be larger than 1.
     432 * Depending on the remote client IP address and port, the  connection request can be
     433 * received by any NIC_RX[k] server thread. To find the relevant listening socket, each
     434 * NIC_RX[k] server thread must be able to scan the set of all listening sockets.
     435 * Therefore a list of listening sockets is implemented as a dedicated xlist, rooted in
     436 * the NIC_RX[0] chdev extension, and using the listening socket <rx_list> field,
     437 * because a listening socket is never used to move data. 
     438 ****************************************************************************************
     439 * @ fdid      : [in] file descriptor index identifying the local server socket.
     440 * @ crq_depth : [in] depth of CRQ queue of pending connection requests.
     441 ***************************************************************************************/
     442int socket_listen( uint32_t fdid,
     443                   uint32_t crq_depth );
     444
     445/****************************************************************************************
     446 * This blocking function implements the accept() syscall().
     447 * It applies only to TCP sockets in the LISTEN state.
     448 * It is executed by a server process, waiting for one (or several) client process(es)
     449 * requesting a connection on a listening socket identified by the <fdid> argument.
     450 * This socket must have been previouly created with socket(), bound to a local address
     451 * with bind(), and listening for connections after a listen(). It  blocks on the <IO>
     452 * condition if the CRQ is empty. Otherwise, it get a pending connection request from
     453 * the listening socket CRQ queue, and creates & initializes a new socket with
     454 * the same properties as the listening socket, allocating a new file descriptor
     455 * for this new socket. It returns the new socket fdid as well as the remote IP address
     456 * and port, but only when the new socket is set to the ESTAB state. The new socket
     457 * cannot accept connections, but the listening socket keeps open for new connections. 
     458 ****************************************************************************************
     459 * Implementation Note:
     460 * This blocking function contains two blocking conditions because it requests services
     461 * to both the NIC_RX server thread, and he NIC_TX server thread.
     462 * It can be split in five steps:
     463 * 1) It makes several checkings on the listening socket domain, type, and state.
     464 * 2) If the socket CRQ queue is empty, the function makes an SOCKET_RX_ACCEPT command
     465 *    to the NIC_RX server thread, waiting registration of a connection request in the
     466 *    CRQ queue. Then it blocks on the <IO> condition and deschedules. It is unblocked
     467 *    by the NIC_RX server thread receiving a valid TCP SYN segment.
     468 * 3) When it found a pending request, it creates a new socket with the same properties
     469 *    as the listening socket, and a new file descriptor for this socket. It initializes
     470 *    the new socket descriptor using the values in the registered connect_request_t
     471 *    structure, and set this new socket to the SYN_RECV state.
     472 * 4) Then it makes a SOCKET_TX_command to the NIC_TX thread, requesting a TCP SYN_ACK
     473 *    segment to the remote socket. Then, it blocks on <IO> condition and dechedules.
     474 *    It is unblocked by the NIC_RX server thread when this SYN_ACK is acknowledged,
     475 *    and the new socket is set in ESTAB state (by the NIC_RX server).
     476 * 5) Finally, it returns the new socket fdid, and registers, in the <address> and
     477 *    <port> arguments, the remote client IP address & port.
     478 ****************************************************************************************
     479 * @ fdid         : [in] file descriptor index identifying the listening socket.
     480 * @ address      : [out] server IP address.
     481 * @ port         : [out] server port address length in bytes.
     482 * @ return the new socket <fdid> if success / return -1 if failure
     483 ***************************************************************************************/
     484int socket_accept( uint32_t   fdid,
     485                   uint32_t * address,
     486                   uint16_t * port );
     487
     488/****************************************************************************************
     489 * This blocking function implements the connect() syscall.
     490 * It is used by a client process to connect a local socket identified by
     491 * the <fdid> argument, to a remote socket identified by the <remote_addr> and
     492 * <remote_port> arguments. It can be used for both  UDP and TCP sockets.
     493 * It computes the nic_channel index [k] from <remote_addr> and <remote_port> values,
     494 * and initializes "remote_addr","remote_port", "nic_channel" in local socket.
     495 * It registers the socket in the lists of sockets rooted in the NIC_RX[k] & NIC_TX[k]
     496 * chdevs. It can be called by a thread running in any cluster.
     497 * It returns only when the local socket is in the ESTAB state, or to report an error.
     498 ****************************************************************************************
     499 * Implementation Note:
     500 * - For a TCP socket, it updates the "remote_addr", "remote_port", "nic_channel" fields
     501 *   in the socket descriptor defined by the <fdid> argument, and register this socket,
     502 *   in the lists of sockets attached to the NIC_TX[k] and NIC_RX[k] chdevs.
     503 *   Then, it builds a TX_CONNECT command to the NIC_TX server thread to send a SYN to
     504 *   the remote socket, unblocks the NIC_TX server thread from the <CLIENT> condition,
     505 *   blocks itself on <IO> condition and deschedules. It is unblocked by the NIC_RX
     506 *   server thread when this thread receive the expected SYN-ACK, and the local socket
     507 *   has been set to the ESTAB state, or when an error is reported in "tx_error" field.
     508 * - For an UDP socket, it simply updates "remote_addr", "remote_port", "nic_channel"
     509 *   in the socket descriptor defined by the <fdid> argument, and register this socket
     510 *   in the lists of sockets attached to the NIC_TX[k] and NIC_RX[k] chdevs.
     511 *   Then, it set the socket to the ESTAB state, or returns an error without blocking.
     512 ****************************************************************************************
     513 * @ fdid          : [in] file descriptor index identifying the socket.
     514 * @ remote_addr   : [in] remote IP address.
     515 * @ remote_port   : [in] remote port.
     516 * @ return 0 if success / return -1 if failure.
     517 ***************************************************************************************/
     518int socket_connect( uint32_t  fdid,
     519                    uint32_t  remote_addr,
     520                    uint16_t  remote_port );
     521
     522/****************************************************************************************
     523 * This blocking function implements the send() syscall.
     524 * It is used to send data stored in the user buffer, identified the <u_buf> and <length>
     525 * arguments, to a connected (TCP or UDP) socket, identified by the <fdid> argument.
     526 * The work is actually done by the NIC_TX server thread, and the synchronisation
     527 * between the client and the server threads uses the "rx_valid" set/reset flip-flop:
     528 * The client thread registers itself in the socket descriptor, registers in the queue
     529 * rooted in the NIC_TX[index] chdev, set "rx_valid", unblocks the server thread, and
     530 * finally blocks on THREAD_BLOCKED_IO, and deschedules.
     531 * When the TX server thread completes the command (all data has been sent for an UDP
     532 * socket, or acknowledged for a TCP socket), the server thread reset "rx_valid" and
     533 * unblocks the client thread.
     534 * This function can be called by a thread running in any cluster.
     535 * WARNING : This implementation does not support several concurent SEND/SENDTO commands
     536 * on the same socket, as only one TX thread can register in a given socket.
     537 ****************************************************************************************
     538 * @ fdid      : [in] file descriptor index identifying the socket.
     539 * @ u_buf     : [in] pointer on buffer containing packet in user space.
     540 * @ length    : [in] packet size in bytes.
     541 * @ return number of sent bytes if success / return -1 if failure.
     542 ***************************************************************************************/
     543int socket_send( uint32_t    fdid,
     544                 uint8_t   * u_buf,
     545                 uint32_t    length );
     546
     547/****************************************************************************************
     548 * This blocking function implements the sendto() syscall.
     549 * It registers the <remote_addr> and <remote_port> arguments in the local socket
     550 * descriptor, and does the same thing as the socket_send() function above,
     551 * but can be called  on an unconnected UDP socket.
     552 ****************************************************************************************
     553 * @ fdid        : [in] file descriptor index identifying the socket.
     554 * @ u_buf       : [in] pointer on buffer containing packet in user space.
     555 * @ length      : [in] packet size in bytes.
     556 * @ remote_addr : [in] destination IP address.
     557 * @ remote_port : [in] destination port.
     558 * @ return number of sent bytes if success / return -1 if failure.
     559 ***************************************************************************************/
     560int socket_sendto( uint32_t    fdid,
     561                   uint8_t   * u_buf,
     562                   uint32_t    length,
     563                   uint32_t    remote_addr,
     564                   uint32_t    remote_port );
     565
     566/****************************************************************************************
     567 * This blocking function implements the recv() syscall.
     568 * It is used to receive data that has been stored by the NIC_RX server thread in the
     569 * rx_buf of a connected (TCP or UDP) socket, identified by the <fdid> argument.
     570 * The synchronisation between the client and the server threads uses the "rx_valid"
     571 * set/reset flip-flop: If "rx_valid" is set, the client simply moves the available
     572 * data from the "rx_buf" to the user buffer identified by the <u_buf> and <length>
     573 * arguments, and reset the "rx_valid" flip_flop. If "rx_valid" is not set, the client
     574 * thread register itself in the socket descriptor, registers in the clients queue rooted
     575 * in the NIC_RX[index] chdev, and finally blocks on THREAD_BLOCKED_IO, and deschedules.
     576 * The client thread is re-activated by the RX server, that set the "rx_valid" flip-flop
     577 * as soon as data is available in the "rx_buf". The number of bytes actually transfered
     578 * can be less than the user buffer size.
     579 * This  function can be called by a thread running in any cluster.
     580 * WARNING : This implementation does not support several concurent RECV/RECVFROM
     581 * commands on the same socket, as only one RX thread can register in a given socket.
     582 ****************************************************************************************
     583 * @ fdid      : [in] file descriptor index identifying the socket.
     584 * @ u_buf     : [in] pointer on buffer in user space.
     585 * @ length    : [in] buffer size in bytes.
     586 * @ return number of received bytes if success / return -1 if failure.
     587 ***************************************************************************************/
     588int socket_recv( uint32_t    fdid,
     589                 uint8_t   * u_buf,
     590                 uint32_t    length );
     591
     592/****************************************************************************************
     593 * This blocking function implements the recvfrom() syscall.
     594 * It registers the <remote_addr> and <remote_port> arguments in the local socket
     595 * descriptor, and does the same thing as the socket_recv() function above,
     596 * but can be called on an unconnected UDP socket.
     597 ****************************************************************************************
     598 * @ fdid        : [in] file descriptor index identifying the socket.
     599 * @ u_buf       : [in] pointer on buffer containing packet in user space.
     600 * @ length      : [in] packet size in bytes.
     601 * @ remote_addr : [in] destination IP address.
     602 * @ remote_port : [in] destination port.
     603 * @ return number of received bytes if success / return -1 if failure.
     604 ***************************************************************************************/
     605int socket_recvfrom( uint32_t    fdid,
     606                     uint8_t   * u_buf,
     607                     uint32_t    length,
     608                     uint32_t    remote_addr,
     609                     uint32_t    remote_port );
     610
     611/****************************************************************************************
     612 * This blocking function implements the close() syscall for a socket.
     613 * - For a UDP socket, it simply calls the static socket_destroy() function to release
     614 *   all structures associated to the local socket, including the file descriptor.
     615 * - For a TCP socket, it makes a CLOSE command to NIC_TX, and blocks on the <IO>
     616 *   condition. The close TCP hanshake is done by the NIC_TX and NIC_RX threads.
     617 *   It is unblocked when the socket is in CLOSED state, or when an error is reported.
     618 *   Finally, it calls the static socket_destroy() function to release all structures
     619 *   associated to the local socket, including the file descriptor.
     620 ****************************************************************************************
     621 * @ file_xp     : [in] extended pointer on file descriptor.
     622 * @ fdid        : [in] file descriptor index identifying the socket.
     623 * @ return 0 if success / return -1 if failure.
     624 ***************************************************************************************/
     625int socket_close( xptr_t     file_xp,
     626                  uint32_t   fdid );
     627
     628
     629#endif  /* _KSOCKET_H_ */
     630
     631
     632
Note: See TracChangeset for help on using the changeset viewer.