[657] | 1 | /* |
---|
[662] | 2 | * ksocket.h - kernel socket descriptor and API definition. |
---|
| 3 | * |
---|
| 4 | * Authors Alain Greiner (2016,2017,2018,2019,2020) |
---|
[657] | 5 | * |
---|
| 6 | * Copyright (c) UPMC Sorbonne Universites |
---|
| 7 | * |
---|
[662] | 8 | * This file is part of ALMOS-MKH |
---|
[657] | 9 | * |
---|
[662] | 10 | * ALMOS-MKH is free software; you can redistribute it and/or modify it |
---|
[657] | 11 | * under the terms of the GNU General Public License as published by |
---|
| 12 | * the Free Software Foundation; version 2.0 of the License. |
---|
| 13 | * |
---|
[662] | 14 | * ALMOS-MKH is distributed in the hope that it will be useful, but |
---|
[657] | 15 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
| 17 | * General Public License for more details. |
---|
| 18 | * |
---|
| 19 | * You should have received a copy of the GNU General Public License |
---|
[662] | 20 | * along with ALMOS-MKH; if not, write to the Free Software Foundation, |
---|
[657] | 21 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
---|
| 22 | */ |
---|
| 23 | |
---|
[662] | 24 | #ifndef _KSOCKET_H_ |
---|
| 25 | #define _KSOCKET_H_ |
---|
| 26 | |
---|
[657] | 27 | #include <kernel_config.h> |
---|
| 28 | #include <hal_kernel_types.h> |
---|
[662] | 29 | #include <xlist.h> |
---|
[657] | 30 | #include <remote_buf.h> |
---|
[662] | 31 | #include <remote_busylock.h> |
---|
[657] | 32 | |
---|
[662] | 33 | /***************************************************************************************** |
---|
| 34 | * This structure defines a kernel socket descriptor, used for both UDP or TCP sockets. |
---|
| 35 | * A socket is a private resource used by a most two user threads : one TX client |
---|
| 36 | * thread to send packets, and one RX client thread, to receive packets. The TX client |
---|
| 37 | * thread and the RX client thread can be the same thread. |
---|
| 38 | * |
---|
| 39 | * When the Network Interface Controller contains several channels, the set of all |
---|
| 40 | * existing sockets is split in as many subsets as the number of NIC channels, in order |
---|
| 41 | * to parallelize the transfers. The distribution key defining the channel index |
---|
| 42 | * is computed from the (remote_addr/remote_port) couple: by the NIC hardware for the |
---|
| 43 | * RX packets; by the software for the TX packets, using a dedicated NIC driver function. |
---|
| 44 | * All sockets that have the same key share the same channel, and each socket is |
---|
| 45 | * therefore linked to two chdevs : NIC_TX[key] & NIC_RX[key]. |
---|
| 46 | * The socket allows the NIC-TX and NIC_RX server threads to access various buffers: |
---|
| 47 | * - the kernel "tx_buf" buffer contains the data to be send by the TX server thread. |
---|
| 48 | * It is dynamically allocated, and used as retransmission buffer when required. |
---|
| 49 | * - the kernel "rx_buf" buffer contains the data received by the RX server thread. |
---|
| 50 | * It is allocated in socket and handled as a single writer / single reader FIFO. |
---|
| 51 | * - the kernel "r2t" buffer allows the RX server thread to make direct requests |
---|
| 52 | * to the associated TX server (mainly used to handle the TCP ACKs). |
---|
| 53 | * - the kernel "crq" buffer allows to store concurrent remote client connect requests |
---|
| 54 | * to a local server socket. It is allocated in socket. |
---|
| 55 | * |
---|
| 56 | * The synchronisation mechanism between the client threads and the server threads |
---|
| 57 | * is different for the TX and RX directions: |
---|
| 58 | * |
---|
| 59 | * 1) TX stream |
---|
| 60 | * |
---|
| 61 | * - The internal API between the TX client thread and the NIC_TX server thread defines |
---|
| 62 | * four command types, stored in the "tx_cmd" variable of the socket descriptor: |
---|
| 63 | * . SOCKET_TX_CONNECT : TCP client request to start the 3 steps connection handshake. |
---|
| 64 | * . SOCKET_TX_ACCEPT : TCP server request to accept one pending connection request. |
---|
| 65 | * . SOCKET_TX_SEND : local (UDP/TCP) request to send data to a remote (UDP/TCP). |
---|
| 66 | * . SOCKET_TX_CLOSE : local TCP socket request remote TCP socket to close connection. |
---|
| 67 | * - All commands are blocking for the TX client thread: to make a command, the TX client |
---|
| 68 | * registers the command type in the socket "tx_cmd",field, set the "tx_valid" field, |
---|
| 69 | * reset the "tx_error" field, and registers itself in the "tx_client" field. |
---|
| 70 | * Then, it unblocks the TX server thread from the BLOCKED_CLIENT condition, blocks itself |
---|
| 71 | * on the BLOCKED_IO condition, and deschedules. For a SEND, the "tx_buf" kernel buffer |
---|
| 72 | * is dynamicaly allocated by the client thread, that copies the payload from the user |
---|
| 73 | * buffer to this kernel buffer, that is used as retransmission buffer, when required. |
---|
| 74 | * - A command is valid for the TX server when the socket descriptor "tx_valid" is true. |
---|
| 75 | * For a SEND command, the "tx_valid" is reset by the NIC_TX server when the last byte has |
---|
| 76 | * been sent, but the TX client thread is unblocked by the NIC_RX server thread only when |
---|
| 77 | * the last byte has been acknowledged, or to report an error. |
---|
| 78 | * For the CONNECT, ACCEPT and CLOSE commands, the "tx_valid" is reset by the NIC_TX server |
---|
| 79 | * when the first segment of the handshake has been sent, but the TX client thread is |
---|
| 80 | * unblocked by the NIC_RX server thread only when the handshake is actually completed. |
---|
| 81 | * The TX server thread is acting as a multiplexer. It scans the list of attached sockets, |
---|
| 82 | * to sequencially handle the valid commands: one UDP packet or TCP segment per iteration. |
---|
| 83 | * The TX server blocks and deschedules on the BLOCKED_CLIENT condition when there is |
---|
| 84 | * no more valid TX command or R2T request registered in any socket. It is unblocked |
---|
| 85 | * from BLOCKED_CLIENT by a client thread registering a TX command, or by the RX server |
---|
| 86 | * thread registering a R2T request. The TX server thread signals an error to the TX client |
---|
| 87 | * thread using the "tx_error" field in socket descriptor. |
---|
| 88 | * When "tx_valid" or "r2t_valid" are true, the TX server thread build and send an UDP |
---|
| 89 | * packet or TCP segment. A single SEND command can require a large number of TCP |
---|
| 90 | * segments to move a big data buffer. |
---|
| 91 | * This TX server thread blocks and deschedules on the BLOCKED_ISR condition when there |
---|
| 92 | * the NIC_RX queue is full . It is unblocked by the hardware NIC_TX_ISR. |
---|
| 93 | * - In order to detect and report error for multiple simultaneous TX accesses to the same |
---|
| 94 | * socket, the client thread makes a double check before posting a new TX command : |
---|
| 95 | * the "tx_valid" field must be false, and the "tx_client" field must be XPTR_NULL. |
---|
| 96 | * The "tx_valid" field is reset by the TX server thread, and the "tx_client" |
---|
| 97 | * field is reset by the TX client thread itself, when it resumes after a TX command. |
---|
| 98 | * . For a SEND command on an UDP socket, the TX server thread reset "tx_valid" and |
---|
| 99 | * unblocks the TX client thread as soon as the last data byte has been sent. |
---|
| 100 | * . For a SEND command on a TCP socket, the TX server thread reset "tx_valid" when the |
---|
| 101 | * last data byte has been sent, but the TX client thread is unblocked by the TX server |
---|
| 102 | * only when the last data byte has been acknowledged by the remote socket. |
---|
| 103 | * . For the CONNECT or ACCEPT commands, the "tx_valid" flag is reset and the TX client |
---|
| 104 | * thread is unblocked by the RX server thread only when the command is completed, |
---|
| 105 | * and the local TCP socket is actually in the ESTAB state. |
---|
| 106 | * . For a CLOSE command, the "tx_valid" flag is reset, and the TX client thread is |
---|
| 107 | * unblocked by the RX server thread only when the remote socket is disconnected. |
---|
| 108 | * |
---|
| 109 | * 2) RX stream |
---|
| 110 | * |
---|
| 111 | * - The internal API between the RX client thread and the RX server thread defines two |
---|
| 112 | * command types stored in the rx_cmd variable of the socket descriptor: |
---|
| 113 | * . SOCKET_RX_ACCEPT : TCP server request a connection request from CRQ queue. |
---|
| 114 | * . SOCKET_RX_RECV : local (UDP/TCP) socket expect data from a remote (UDP/TCP). |
---|
| 115 | * For the RECV command the communication is done through the "rx_buf" buffer, |
---|
| 116 | * attached to the socket, and handled as a single-writer / single reader-FIFO. |
---|
| 117 | * For the ACCEPT command the communication is done through the CRQ buffer, attached |
---|
| 118 | * to the socket, and handled as a single-writer / single reader-FIFO. |
---|
| 119 | * These two commands are blocking for the RX client thread as long as the buffer is |
---|
| 120 | * empty. The client thread set the socket "rx_valid" field, reset the "rx_error" field, |
---|
| 121 | * registers itself in the "rx_client" field, and blocks on the BLOCKED_IO condition. |
---|
| 122 | * - The RX server thread is acting as a demultiplexor: it handle one received TCP segment, |
---|
| 123 | * or UDP packet per iteration in the loop on the NIC_RX queue, and moves the data to |
---|
| 124 | * the relevant buffer of the socket matching the packet. It discard packets that don't |
---|
| 125 | * match a registered socket. When a client thread is registered in the socket descriptor, |
---|
| 126 | * the RX server thread reset the "rx_valid" field and unblocks the RX client thread from |
---|
| 127 | * the BLOCKED_IO condition as soon as there is data available in the "rx_buf". |
---|
| 128 | * This RX server thread blocks and deschedules on the BLOCKED_ISR condition when there |
---|
| 129 | * is no more packets in the NIC_RX queue. It is unblocked by the hardware NIC_RX_ISR. |
---|
| 130 | * - In order to detect and report error for multiple simultaneous RX accesses to the same |
---|
| 131 | * socket, the RX client thread makes a double check before posting a new RX command : |
---|
| 132 | * the "rx_valid" field must be false, and the "rx_client" field must be XPTR_NULL. |
---|
| 133 | * The "rx_valid" field is reset by the RX server thread, and the "rx_client" |
---|
| 134 | * field is reset by the RX client thread itself, when it resumes after an RX command. |
---|
| 135 | * |
---|
| 136 | * 3) R2T queue |
---|
| 137 | * |
---|
| 138 | * To implement the TCP "3 steps handshake" protocol for connection or to send RST, |
---|
| 139 | * the RX server thread can directly request the associated TX server thread to send |
---|
| 140 | * control packets in the TX stream, using a dedicate R2T (RX to TX) FIFO stored in |
---|
| 141 | * the socket descriptor. Each R2T request occupy one byte in this R2T queue. |
---|
| 142 | * |
---|
| 143 | * 4) CRQ queue |
---|
| 144 | * |
---|
| 145 | * The remote CONNECT requests received by a TCP socket (SYN segments) are stored in a |
---|
| 146 | * dedicated CRQ FIFO stored in the local socket descriptor. These requests are consumed |
---|
| 147 | * by the local client thread executing an ACCEPT. |
---|
| 148 | * Each CRQ request occupy sizeof(connect_request_t) bytes in this CRQ queue. |
---|
| 149 | * The connect_request_t structure containing the request arguments is defined below. |
---|
| 150 | * |
---|
| 151 | * Note : the socket domains and types are defined in the "shared_socket.h" file. |
---|
| 152 | ****************************************************************************************/ |
---|
[657] | 153 | |
---|
[662] | 154 | /***************************************************************************************** |
---|
| 155 | * This enum defines the set of commands that can be registered in the socket |
---|
| 156 | * by the TX & RX client threads to be executed by the NIC_TX & NIC_TX server threads. |
---|
| 157 | ****************************************************************************************/ |
---|
| 158 | typedef enum socket_cmd_type_e |
---|
[657] | 159 | { |
---|
[662] | 160 | CMD_TX_CONNECT = 20, /*! request a SYN segment (TCP only) */ |
---|
| 161 | CMD_TX_ACCEPT = 21, /*! request a SYN-ACK segment (TCP only) */ |
---|
| 162 | CMD_TX_CLOSE = 22, /*! request a RST segment (TCP only) */ |
---|
| 163 | CMD_TX_SEND = 23, /*! request to send data (TCP or UDP) */ |
---|
[657] | 164 | |
---|
[662] | 165 | CMD_RX_ACCEPT = 30, /*! wait request from CRQ (TCP only) */ |
---|
| 166 | CMD_RX_RECV = 31, /*! wait DATA from rx_buf (TCP or UDP) */ |
---|
[657] | 167 | } |
---|
[662] | 168 | socket_cmd_type_t; |
---|
| 169 | |
---|
| 170 | /***************************************************************************************** |
---|
| 171 | * This enum defines the set of command status that can be returned by the NIC_RX and |
---|
| 172 | * NIC_TX server threads to the TX & RX client threads. |
---|
| 173 | * The success must be signaled by the null value / the various failure cases are |
---|
| 174 | * signaled by a non-null value. |
---|
| 175 | ****************************************************************************************/ |
---|
| 176 | typedef enum socket_cmd_sts_e |
---|
[657] | 177 | { |
---|
[662] | 178 | CMD_STS_SUCCESS = 0, |
---|
| 179 | CMD_STS_EOF = 1, |
---|
| 180 | CMD_STS_RST = 2, |
---|
| 181 | CMD_STS_BADACK = 3, |
---|
| 182 | CMD_STS_BADSTATE = 4, |
---|
| 183 | CMD_STS_BADCMD = 5, |
---|
| 184 | } |
---|
| 185 | socket_cmd_sts_t; |
---|
[657] | 186 | |
---|
[662] | 187 | /***************************************************************************************** |
---|
| 188 | * This enum defines the set of tates for an UDP socket. |
---|
| 189 | ****************************************************************************************/ |
---|
| 190 | typedef enum udp_socket_state_e |
---|
| 191 | { |
---|
| 192 | UDP_STATE_UNBOUND = 0x00, |
---|
| 193 | UDP_STATE_BOUND = 0x01, |
---|
| 194 | UDP_STATE_ESTAB = 0x02, |
---|
| 195 | } |
---|
| 196 | udp_socket_state_t; |
---|
[657] | 197 | |
---|
[662] | 198 | /***************************************************************************************** |
---|
| 199 | * This enum defines the set of tates for an TCP socket. |
---|
| 200 | ****************************************************************************************/ |
---|
| 201 | typedef enum tcp_socket_state_e |
---|
| 202 | { |
---|
| 203 | TCP_STATE_UNBOUND = 0x10, |
---|
| 204 | TCP_STATE_BOUND = 0x11, |
---|
| 205 | TCP_STATE_LISTEN = 0x12, |
---|
| 206 | TCP_STATE_SYN_SENT = 0x13, |
---|
| 207 | TCP_STATE_SYN_RCVD = 0x14, |
---|
| 208 | TCP_STATE_ESTAB = 0x15, |
---|
| 209 | TCP_STATE_FIN_WAIT1 = 0x16, |
---|
| 210 | TCP_STATE_FIN_WAIT2 = 0x17, |
---|
| 211 | TCP_STATE_CLOSING = 0x18, |
---|
| 212 | TCP_STATE_TIME_WAIT = 0x19, |
---|
| 213 | TCP_STATE_CLOSE_WAIT = 0x1A, |
---|
| 214 | TCP_STATE_LAST_ACK = 0x1B, |
---|
| 215 | TCP_STATE_CLOSED = 0x1C, |
---|
[657] | 216 | } |
---|
[662] | 217 | tcp_socket_state_t; |
---|
[657] | 218 | |
---|
[662] | 219 | /***************************************************************************************** |
---|
| 220 | * This structure defines one connection request, registered in the CRQ queue. |
---|
| 221 | ****************************************************************************************/ |
---|
| 222 | typedef struct connect_request_s |
---|
[657] | 223 | { |
---|
[662] | 224 | uint32_t addr; /* requesting socket IP address */ |
---|
| 225 | uint32_t port; /* requesting socket port number */ |
---|
| 226 | uint32_t iss; /* requesting socket initial sequence number */ |
---|
| 227 | uint32_t window; /* requesting socket receive window */ |
---|
| 228 | } |
---|
| 229 | connect_request_t; |
---|
[657] | 230 | |
---|
[662] | 231 | /***************************************************************************************** |
---|
| 232 | * This structure defines the socket descriptor. |
---|
| 233 | ****************************************************************************************/ |
---|
| 234 | typedef struct socket_s |
---|
| 235 | { |
---|
| 236 | remote_queuelock_t lock; /*! lock protecting socket state */ |
---|
| 237 | pid_t pid; /*! owner process identifier */ |
---|
| 238 | uint32_t fdid; /*! associated file descriptor index */ |
---|
| 239 | uint32_t domain; /*! domain : AF_LOCAL / AF_INET */ |
---|
| 240 | uint32_t type; /*! type : SOCK_DGRAM / SOCK_STREAM */ |
---|
| 241 | uint32_t state; /*! socket state (see above) */ |
---|
| 242 | uint32_t local_addr; /*! local socket IP address */ |
---|
| 243 | uint32_t remote_addr; /*! remote socket IP address */ |
---|
| 244 | uint32_t local_port; /*! local socket port number */ |
---|
| 245 | uint32_t remote_port; /*! remote socket port number */ |
---|
| 246 | uint32_t nic_channel; /*! derived from (remote_addr,remote_port) */ |
---|
[657] | 247 | |
---|
[662] | 248 | xlist_entry_t tx_list; /*! all sockets attached to same NIC_TX channel */ |
---|
| 249 | xptr_t tx_client; /*! extended pointer on current TX client thread */ |
---|
| 250 | bool_t tx_valid; /*! TX command valid */ |
---|
| 251 | socket_cmd_type_t tx_cmd; /*! TX command (CONNECT / ACCEPT / SEND / CLOSE) */ |
---|
| 252 | uint32_t tx_sts; /*! signal a TX command success / failure */ |
---|
| 253 | uint8_t * tx_buf; /*! pointer on TX data buffer in kernel space */ |
---|
| 254 | uint32_t tx_len; /*! number of data bytes for a SEND command */ |
---|
| 255 | uint32_t tx_todo; /*! number of bytes not yet sent */ |
---|
| 256 | xlist_entry_t tx_temp; /*! temporary list of sockets (root in TX chdev) */ |
---|
[657] | 257 | |
---|
[662] | 258 | xlist_entry_t rx_list; /*! all sockets attached to same NIC_RX channel */ |
---|
| 259 | xptr_t rx_client; /*! extended pointer on current RX client thread */ |
---|
| 260 | bool_t rx_valid; /*! RX command valid */ |
---|
| 261 | socket_cmd_type_t rx_cmd; /*! RX command ( ACCEPT / RECV ) */ |
---|
| 262 | uint32_t rx_sts; /*! signal a RX command success / failure */ |
---|
| 263 | remote_buf_t rx_buf; /*! embedded receive buffer descriptor */ |
---|
[657] | 264 | |
---|
[662] | 265 | remote_buf_t r2tq; /*! RX_to_TX requests queue descriptor */ |
---|
| 266 | remote_buf_t crqq; /*! connection requests queue descriptor */ |
---|
[657] | 267 | |
---|
[662] | 268 | /* the following fields defines the TCB (only used for a TCP connection) */ |
---|
[657] | 269 | |
---|
[662] | 270 | uint32_t tx_nxt; /*! next byte to send in TX_data stream */ |
---|
| 271 | uint32_t tx_wnd; /*! number of acceptable bytes in TX_data stream */ |
---|
| 272 | uint32_t tx_una; /*! first unack byte in TX_data stream */ |
---|
| 273 | uint32_t rx_nxt; /*! next expected byte in RX_data stream */ |
---|
| 274 | uint32_t rx_wnd; /*! number of acceptable bytes in RX_data stream */ |
---|
| 275 | uint32_t rx_irs; /*! initial sequence number in RX_data stream */ |
---|
| 276 | } |
---|
| 277 | socket_t; |
---|
[657] | 278 | |
---|
[662] | 279 | /**************************************************************************************** |
---|
| 280 | * This function returns a printable string for a socket domain. |
---|
| 281 | **************************************************************************************** |
---|
| 282 | * domain : AF_INET / AF_LOCAL |
---|
| 283 | ***************************************************************************************/ |
---|
| 284 | char * socket_domain_str( uint32_t domain ); |
---|
[657] | 285 | |
---|
[662] | 286 | /**************************************************************************************** |
---|
| 287 | * This function returns a printable string for a socket type. |
---|
| 288 | **************************************************************************************** |
---|
| 289 | * type : SOCK_DGRAM / SOCK_STREAM |
---|
| 290 | ***************************************************************************************/ |
---|
| 291 | char * socket_type_str( uint32_t type ); |
---|
[657] | 292 | |
---|
[662] | 293 | /**************************************************************************************** |
---|
| 294 | * This function returns a printable string for an UDP or TCP socket state. |
---|
| 295 | **************************************************************************************** |
---|
| 296 | * state : UDP_STATE_*** / TCP_STATE*** |
---|
| 297 | ***************************************************************************************/ |
---|
| 298 | char * socket_state_str( uint32_t state ); |
---|
[657] | 299 | |
---|
[662] | 300 | /**************************************************************************************** |
---|
| 301 | * This function returns a printable string for a command type. |
---|
| 302 | **************************************************************************************** |
---|
| 303 | * type : command type |
---|
| 304 | ***************************************************************************************/ |
---|
| 305 | char * socket_cmd_type_str( uint32_t type ); |
---|
[657] | 306 | |
---|
[662] | 307 | /**************************************************************************************** |
---|
| 308 | * This function returns a printable string for a command status. |
---|
| 309 | **************************************************************************************** |
---|
| 310 | * sts : command status. |
---|
| 311 | ***************************************************************************************/ |
---|
| 312 | char * socket_cmd_sts_str( uint32_t sts ); |
---|
[657] | 313 | |
---|
| 314 | |
---|
| 315 | |
---|
[662] | 316 | /**************************************************************************************** |
---|
| 317 | * Functions used by the NIC_TX and NIC_RX server threads. |
---|
| 318 | ***************************************************************************************/ |
---|
[657] | 319 | |
---|
[662] | 320 | /**************************************************************************************** |
---|
| 321 | * This function is called by the dev_nic_rx_handle_tcp() function, executed by the |
---|
| 322 | * NIC_RX[channel] server thread, to register a R2T request defined by the <flags> |
---|
| 323 | * argument in the socket R2T queue, specified by the <queue_xp> argument. |
---|
| 324 | * This function unblocks the NIC_TX[channel] server thread, identified by the <channel> |
---|
| 325 | * argumentfrom the THREAD_BLOCKED_CLIENT condition. |
---|
| 326 | **************************************************************************************** |
---|
| 327 | * @ queue_xp : [in] extended pointer on the R2T qeue descriptor. |
---|
| 328 | * @ flags : [in] flags to be set in the TCP segment. |
---|
| 329 | * @ channel : [in] NIC channel (both TX & RX). |
---|
| 330 | ***************************************************************************************/ |
---|
| 331 | void socket_put_r2t_request( xptr_t queue_xp, |
---|
| 332 | uint32_t flags, |
---|
| 333 | uint32_t channel ); |
---|
| 334 | |
---|
| 335 | /**************************************************************************************** |
---|
| 336 | * This function is called by the dev_nic_rx_handle_tcp() function to register |
---|
| 337 | * a client connection request, defined by the <remote_addr>, <remote_port>, |
---|
| 338 | * <remote_iss>, and <remote_window> arguments, * in the CRQ queue, specified |
---|
| 339 | * by the <queue_xp> argument. |
---|
| 340 | **************************************************************************************** |
---|
| 341 | * @ queue_xp : [in] extended pointer on the CRQ qeue descriptor. |
---|
| 342 | * @ remote_addr : [in] remote socket IP address. |
---|
| 343 | * @ remote_port : [in] remote socket port. |
---|
| 344 | * @ remote_iss : [in] remote socket initial sequence number. |
---|
| 345 | * @ remote_window : [in] remote socket receive window |
---|
| 346 | * @ return 0 if success / return -1 if queue full. |
---|
| 347 | ***************************************************************************************/ |
---|
| 348 | error_t socket_put_crq_request( xptr_t queue_xp, |
---|
| 349 | uint32_t remote_addr, |
---|
| 350 | uint32_t remote_port, |
---|
| 351 | uint32_t remote_iss, |
---|
| 352 | uint32_t remote_window ); |
---|
[657] | 353 | |
---|
[662] | 354 | /**************************************************************************************** |
---|
| 355 | * This function is called by the socket_accept() function to extract a connection |
---|
| 356 | * request from a CRQ queue, specified by the <queue_xp> argument, to the buffers |
---|
| 357 | * defined by <remote_addr>, <remote_port>, <remote_iss>, and <remote_window>. |
---|
| 358 | ***************************************************************************************** |
---|
| 359 | * @ queue_xp : [in] extended pointer on the CRQ qeue descriptor. |
---|
| 360 | * @ remote_addr : [out] buffer for remote socket IP address. |
---|
| 361 | * @ remote_port : [out] buffer for remote socket port. |
---|
| 362 | * @ remote_iss : [out] buffer for remote socket initial sequence number. |
---|
| 363 | * @ remote_window : [out] buffer for remote socket receive window |
---|
| 364 | * @ return 0 if success / return -1 if queue empty. |
---|
| 365 | ***************************************************************************************/ |
---|
| 366 | error_t socket_get_crq_request( xptr_t queue_xp, |
---|
| 367 | uint32_t * remote_addr, |
---|
| 368 | uint32_t * remote_port, |
---|
| 369 | uint32_t * remote_iss, |
---|
| 370 | uint32_t * remote_window ); |
---|
[657] | 371 | |
---|
[662] | 372 | /**************************************************************************************** |
---|
| 373 | * This blocking function diplays the socket state (including the TCB). |
---|
| 374 | **************************************************************************************** |
---|
| 375 | * @ socket_xp : [in] extended pointer on socket descriptor. |
---|
| 376 | $ @ string : [in] name of calling function. |
---|
| 377 | ***************************************************************************************/ |
---|
| 378 | void socket_display( xptr_t socket_xp, |
---|
| 379 | const char * func_str ); |
---|
[657] | 380 | |
---|
| 381 | |
---|
| 382 | |
---|
[662] | 383 | /**************************************************************************************** |
---|
| 384 | * Functions implementing the socket related system calls |
---|
| 385 | ***************************************************************************************/ |
---|
[657] | 386 | |
---|
[662] | 387 | /**************************************************************************************** |
---|
| 388 | * This function implements the socket() syscall. |
---|
| 389 | * This function allocates and intializes in the calling thread cluster: |
---|
| 390 | * - a new socket descriptor, defined by the <domain> and <type> arguments, |
---|
| 391 | * - a new file descriptor, associated to this socket, |
---|
| 392 | * It registers the file descriptor in the reference process fd_array[], |
---|
| 393 | * set the socket state to UNBOUND, and returns the <fdid> value. |
---|
| 394 | **************************************************************************************** |
---|
| 395 | * @ domain : [in] socket protocol family (AF_UNIX / AF_INET) |
---|
| 396 | * @ type : [in] socket type (SOCK_DGRAM / SOCK_STREAM). |
---|
| 397 | * @ return a file descriptor <fdid> if success / return -1 if failure. |
---|
| 398 | ***************************************************************************************/ |
---|
| 399 | int socket_build( uint32_t domain, |
---|
| 400 | uint32_t type ); |
---|
[657] | 401 | |
---|
[662] | 402 | /**************************************************************************************** |
---|
| 403 | * This function implements the bind() syscall. |
---|
| 404 | * It assigns an IP address, defined by the <local_addr> argument, and a port number, |
---|
| 405 | * defined by the <local_port> argument to an unnamed local socket, identified by the |
---|
| 406 | * <fdid> argument, and set the socket state to BOUND. It applies to UDP or TCP sockets. |
---|
| 407 | * It does not require any service from the NIC_TX and NIC_RX server threads. |
---|
| 408 | * It can be called by a thread running in any cluster. |
---|
| 409 | **************************************************************************************** |
---|
| 410 | * @ fdid : [in] file descriptor index identifying the socket. |
---|
| 411 | * @ local_addr : [in] local IP address. |
---|
| 412 | * @ local_port : [in] local port. |
---|
| 413 | * @ return 0 if success / return -1 if failure. |
---|
| 414 | ***************************************************************************************/ |
---|
| 415 | int socket_bind( uint32_t fdid, |
---|
| 416 | uint32_t addr, |
---|
| 417 | uint16_t port ); |
---|
[657] | 418 | |
---|
[662] | 419 | /**************************************************************************************** |
---|
| 420 | * This function implements the listen() syscall(). |
---|
| 421 | * It is called by a (local) server process to specify the max size of the CRQ queue |
---|
| 422 | * for a socket identified by the <fdid> argument, that expect connection requests |
---|
[668] | 423 | * from one or several (remote) client processes. The selected socket CRQ is supposed |
---|
[662] | 424 | * to register all connections requests, whatever the client IP address and port values. |
---|
[668] | 425 | * |
---|
[662] | 426 | * This function applies only to a TCP socket, that must be in the BOUND state. |
---|
[668] | 427 | * The socket is set to the LISTEN state. |
---|
[662] | 428 | * It does not require any service from the NIC_TX and NIC_RX server threads. |
---|
| 429 | * It can be called by a thread running in any cluster. |
---|
| 430 | **************************************************************************************** |
---|
| 431 | * Implementation notes : |
---|
| 432 | * The number N of channels available in the NIC contrôler can be larger than 1. |
---|
| 433 | * Depending on the remote client IP address and port, the connection request can be |
---|
| 434 | * received by any NIC_RX[k] server thread. To find the relevant listening socket, each |
---|
| 435 | * NIC_RX[k] server thread must be able to scan the set of all listening sockets. |
---|
| 436 | * Therefore a list of listening sockets is implemented as a dedicated xlist, rooted in |
---|
| 437 | * the NIC_RX[0] chdev extension, and using the listening socket <rx_list> field, |
---|
| 438 | * because a listening socket is never used to move data. |
---|
| 439 | **************************************************************************************** |
---|
| 440 | * @ fdid : [in] file descriptor index identifying the local server socket. |
---|
| 441 | * @ crq_depth : [in] depth of CRQ queue of pending connection requests. |
---|
[669] | 442 | * @ return 0 if success / return -1 if failure |
---|
[662] | 443 | ***************************************************************************************/ |
---|
| 444 | int socket_listen( uint32_t fdid, |
---|
| 445 | uint32_t crq_depth ); |
---|
[657] | 446 | |
---|
[662] | 447 | /**************************************************************************************** |
---|
| 448 | * This blocking function implements the accept() syscall(). |
---|
| 449 | * It applies only to TCP sockets in the LISTEN state. |
---|
| 450 | * It is executed by a server process, waiting for one (or several) client process(es) |
---|
| 451 | * requesting a connection on a listening socket identified by the <fdid> argument. |
---|
| 452 | * This socket must have been previouly created with socket(), bound to a local address |
---|
| 453 | * with bind(), and listening for connections after a listen(). It blocks on the <IO> |
---|
| 454 | * condition if the CRQ is empty. Otherwise, it get a pending connection request from |
---|
[668] | 455 | * the listening socket CRQ queue, and creates a new socket with the same properties |
---|
| 456 | * as the listening socket, allocating a new file descriptor for this new socket. |
---|
| 457 | * It computes the nic_channel index [k] from <remote_addr> and <remote_port> values, |
---|
| 458 | * and initializes "remote_addr","remote_port", "nic_channel" in local socket. |
---|
| 459 | * It returns the new socket fdid as well as the remote IP address |
---|
[662] | 460 | * and port, but only when the new socket is set to the ESTAB state. The new socket |
---|
| 461 | * cannot accept connections, but the listening socket keeps open for new connections. |
---|
| 462 | **************************************************************************************** |
---|
| 463 | * Implementation Note: |
---|
| 464 | * This blocking function contains two blocking conditions because it requests services |
---|
| 465 | * to both the NIC_RX server thread, and he NIC_TX server thread. |
---|
| 466 | * It can be split in five steps: |
---|
| 467 | * 1) It makes several checkings on the listening socket domain, type, and state. |
---|
| 468 | * 2) If the socket CRQ queue is empty, the function makes an SOCKET_RX_ACCEPT command |
---|
| 469 | * to the NIC_RX server thread, waiting registration of a connection request in the |
---|
| 470 | * CRQ queue. Then it blocks on the <IO> condition and deschedules. It is unblocked |
---|
| 471 | * by the NIC_RX server thread receiving a valid TCP SYN segment. |
---|
| 472 | * 3) When it found a pending request, it creates a new socket with the same properties |
---|
| 473 | * as the listening socket, and a new file descriptor for this socket. It initializes |
---|
| 474 | * the new socket descriptor using the values in the registered connect_request_t |
---|
| 475 | * structure, and set this new socket to the SYN_RECV state. |
---|
| 476 | * 4) Then it makes a SOCKET_TX_command to the NIC_TX thread, requesting a TCP SYN_ACK |
---|
| 477 | * segment to the remote socket. Then, it blocks on <IO> condition and dechedules. |
---|
| 478 | * It is unblocked by the NIC_RX server thread when this SYN_ACK is acknowledged, |
---|
| 479 | * and the new socket is set in ESTAB state (by the NIC_RX server). |
---|
| 480 | * 5) Finally, it returns the new socket fdid, and registers, in the <address> and |
---|
| 481 | * <port> arguments, the remote client IP address & port. |
---|
| 482 | **************************************************************************************** |
---|
| 483 | * @ fdid : [in] file descriptor index identifying the listening socket. |
---|
| 484 | * @ address : [out] server IP address. |
---|
| 485 | * @ port : [out] server port address length in bytes. |
---|
| 486 | * @ return the new socket <fdid> if success / return -1 if failure |
---|
| 487 | ***************************************************************************************/ |
---|
| 488 | int socket_accept( uint32_t fdid, |
---|
| 489 | uint32_t * address, |
---|
| 490 | uint16_t * port ); |
---|
[657] | 491 | |
---|
[662] | 492 | /**************************************************************************************** |
---|
| 493 | * This blocking function implements the connect() syscall. |
---|
| 494 | * It is used by a client process to connect a local socket identified by |
---|
| 495 | * the <fdid> argument, to a remote socket identified by the <remote_addr> and |
---|
| 496 | * <remote_port> arguments. It can be used for both UDP and TCP sockets. |
---|
| 497 | * It computes the nic_channel index [k] from <remote_addr> and <remote_port> values, |
---|
| 498 | * and initializes "remote_addr","remote_port", "nic_channel" in local socket. |
---|
| 499 | * It registers the socket in the lists of sockets rooted in the NIC_RX[k] & NIC_TX[k] |
---|
| 500 | * chdevs. It can be called by a thread running in any cluster. |
---|
| 501 | * It returns only when the local socket is in the ESTAB state, or to report an error. |
---|
| 502 | **************************************************************************************** |
---|
| 503 | * Implementation Note: |
---|
| 504 | * - For a TCP socket, it updates the "remote_addr", "remote_port", "nic_channel" fields |
---|
| 505 | * in the socket descriptor defined by the <fdid> argument, and register this socket, |
---|
| 506 | * in the lists of sockets attached to the NIC_TX[k] and NIC_RX[k] chdevs. |
---|
| 507 | * Then, it builds a TX_CONNECT command to the NIC_TX server thread to send a SYN to |
---|
| 508 | * the remote socket, unblocks the NIC_TX server thread from the <CLIENT> condition, |
---|
| 509 | * blocks itself on <IO> condition and deschedules. It is unblocked by the NIC_RX |
---|
| 510 | * server thread when this thread receive the expected SYN-ACK, and the local socket |
---|
| 511 | * has been set to the ESTAB state, or when an error is reported in "tx_error" field. |
---|
| 512 | * - For an UDP socket, it simply updates "remote_addr", "remote_port", "nic_channel" |
---|
| 513 | * in the socket descriptor defined by the <fdid> argument, and register this socket |
---|
| 514 | * in the lists of sockets attached to the NIC_TX[k] and NIC_RX[k] chdevs. |
---|
| 515 | * Then, it set the socket to the ESTAB state, or returns an error without blocking. |
---|
| 516 | **************************************************************************************** |
---|
| 517 | * @ fdid : [in] file descriptor index identifying the socket. |
---|
| 518 | * @ remote_addr : [in] remote IP address. |
---|
| 519 | * @ remote_port : [in] remote port. |
---|
| 520 | * @ return 0 if success / return -1 if failure. |
---|
| 521 | ***************************************************************************************/ |
---|
| 522 | int socket_connect( uint32_t fdid, |
---|
| 523 | uint32_t remote_addr, |
---|
| 524 | uint16_t remote_port ); |
---|
[657] | 525 | |
---|
[662] | 526 | /**************************************************************************************** |
---|
| 527 | * This blocking function implements the send() syscall. |
---|
| 528 | * It is used to send data stored in the user buffer, identified the <u_buf> and <length> |
---|
| 529 | * arguments, to a connected (TCP or UDP) socket, identified by the <fdid> argument. |
---|
| 530 | * The work is actually done by the NIC_TX server thread, and the synchronisation |
---|
| 531 | * between the client and the server threads uses the "rx_valid" set/reset flip-flop: |
---|
| 532 | * The client thread registers itself in the socket descriptor, registers in the queue |
---|
| 533 | * rooted in the NIC_TX[index] chdev, set "rx_valid", unblocks the server thread, and |
---|
| 534 | * finally blocks on THREAD_BLOCKED_IO, and deschedules. |
---|
| 535 | * When the TX server thread completes the command (all data has been sent for an UDP |
---|
| 536 | * socket, or acknowledged for a TCP socket), the server thread reset "rx_valid" and |
---|
| 537 | * unblocks the client thread. |
---|
| 538 | * This function can be called by a thread running in any cluster. |
---|
| 539 | * WARNING : This implementation does not support several concurent SEND/SENDTO commands |
---|
| 540 | * on the same socket, as only one TX thread can register in a given socket. |
---|
| 541 | **************************************************************************************** |
---|
| 542 | * @ fdid : [in] file descriptor index identifying the socket. |
---|
| 543 | * @ u_buf : [in] pointer on buffer containing packet in user space. |
---|
| 544 | * @ length : [in] packet size in bytes. |
---|
| 545 | * @ return number of sent bytes if success / return -1 if failure. |
---|
| 546 | ***************************************************************************************/ |
---|
| 547 | int socket_send( uint32_t fdid, |
---|
| 548 | uint8_t * u_buf, |
---|
| 549 | uint32_t length ); |
---|
[657] | 550 | |
---|
[662] | 551 | /**************************************************************************************** |
---|
| 552 | * This blocking function implements the recv() syscall. |
---|
| 553 | * It is used to receive data that has been stored by the NIC_RX server thread in the |
---|
| 554 | * rx_buf of a connected (TCP or UDP) socket, identified by the <fdid> argument. |
---|
| 555 | * The synchronisation between the client and the server threads uses the "rx_valid" |
---|
| 556 | * set/reset flip-flop: If "rx_valid" is set, the client simply moves the available |
---|
| 557 | * data from the "rx_buf" to the user buffer identified by the <u_buf> and <length> |
---|
| 558 | * arguments, and reset the "rx_valid" flip_flop. If "rx_valid" is not set, the client |
---|
| 559 | * thread register itself in the socket descriptor, registers in the clients queue rooted |
---|
| 560 | * in the NIC_RX[index] chdev, and finally blocks on THREAD_BLOCKED_IO, and deschedules. |
---|
| 561 | * The client thread is re-activated by the RX server, that set the "rx_valid" flip-flop |
---|
| 562 | * as soon as data is available in the "rx_buf". The number of bytes actually transfered |
---|
| 563 | * can be less than the user buffer size. |
---|
| 564 | * This function can be called by a thread running in any cluster. |
---|
| 565 | * WARNING : This implementation does not support several concurent RECV/RECVFROM |
---|
| 566 | * commands on the same socket, as only one RX thread can register in a given socket. |
---|
| 567 | **************************************************************************************** |
---|
| 568 | * @ fdid : [in] file descriptor index identifying the socket. |
---|
| 569 | * @ u_buf : [in] pointer on buffer in user space. |
---|
| 570 | * @ length : [in] buffer size in bytes. |
---|
| 571 | * @ return number of received bytes if success / return -1 if failure. |
---|
| 572 | ***************************************************************************************/ |
---|
| 573 | int socket_recv( uint32_t fdid, |
---|
| 574 | uint8_t * u_buf, |
---|
| 575 | uint32_t length ); |
---|
[657] | 576 | |
---|
[662] | 577 | /**************************************************************************************** |
---|
| 578 | * This blocking function implements the close() syscall for a socket. |
---|
| 579 | * - For a UDP socket, it simply calls the static socket_destroy() function to release |
---|
| 580 | * all structures associated to the local socket, including the file descriptor. |
---|
| 581 | * - For a TCP socket, it makes a CLOSE command to NIC_TX, and blocks on the <IO> |
---|
| 582 | * condition. The close TCP hanshake is done by the NIC_TX and NIC_RX threads. |
---|
| 583 | * It is unblocked when the socket is in CLOSED state, or when an error is reported. |
---|
| 584 | * Finally, it calls the static socket_destroy() function to release all structures |
---|
| 585 | * associated to the local socket, including the file descriptor. |
---|
| 586 | **************************************************************************************** |
---|
| 587 | * @ file_xp : [in] extended pointer on file descriptor. |
---|
| 588 | * @ fdid : [in] file descriptor index identifying the socket. |
---|
| 589 | * @ return 0 if success / return -1 if failure. |
---|
| 590 | ***************************************************************************************/ |
---|
| 591 | int socket_close( xptr_t file_xp, |
---|
| 592 | uint32_t fdid ); |
---|
[657] | 593 | |
---|
| 594 | |
---|
[662] | 595 | #endif /* _KSOCKET_H_ */ |
---|
[657] | 596 | |
---|
| 597 | |
---|
| 598 | |
---|