[1] | 1 | /* |
---|
| 2 | * dev_nic.h - NIC (Network Controler) generic device API definition. |
---|
| 3 | * |
---|
[657] | 4 | * Author Alain Greiner (2016,2017,2018,2019,2020) |
---|
[1] | 5 | * |
---|
| 6 | * Copyright (c) UPMC Sorbonne Universites |
---|
| 7 | * |
---|
| 8 | * This file is part of ALMOS-MKH |
---|
| 9 | * |
---|
| 10 | * ALMOS-MKH is free software; you can redistribute it and/or modify it |
---|
| 11 | * under the terms of the GNU General Public License as published by |
---|
| 12 | * the Free Software Foundation; version 2.0 of the License. |
---|
| 13 | * |
---|
| 14 | * ALMOS-MKH is distributed in the hope that it will be useful, but |
---|
| 15 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
| 17 | * General Public License for more details. |
---|
| 18 | * |
---|
| 19 | * You should have received a copy of the GNU General Public License |
---|
| 20 | * along with ALMOS-kernel; if not, write to the Free Software Foundation, |
---|
| 21 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
---|
| 22 | */ |
---|
| 23 | |
---|
| 24 | #ifndef _DEV_NIC_H |
---|
| 25 | #define _DEV_NIC_H |
---|
| 26 | |
---|
[14] | 27 | #include <kernel_config.h> |
---|
[457] | 28 | #include <hal_kernel_types.h> |
---|
[657] | 29 | #include <remote_busylock.h> |
---|
| 30 | #include <remote_buf.h> |
---|
| 31 | #include <xlist.h> |
---|
[1] | 32 | |
---|
[657] | 33 | /**** Forward declarations ****/ |
---|
| 34 | |
---|
| 35 | struct chdev_s; |
---|
| 36 | |
---|
[1] | 37 | /***************************************************************************************** |
---|
| 38 | * Generic Network Interface Controler definition |
---|
| 39 | * |
---|
[657] | 40 | * This device provides access to a generic Gigabit Ethernet network controler. |
---|
| 41 | * It assumes that the NIC hardware peripheral handles two packets queues for sent (TX) |
---|
| 42 | * and received (RX) packets. |
---|
| 43 | * |
---|
| 44 | * The supported protocols stack is : Ethernet / IPV4 / TCP or UDP |
---|
[1] | 45 | * |
---|
[657] | 46 | * 1) hardware assumptions |
---|
| 47 | * |
---|
| 48 | * The NIC device is handling two (infinite) streams of packets to or from the network. |
---|
[1] | 49 | * It is the driver responsibility to move the RX packets from the NIC to the RX queue, |
---|
| 50 | * and the TX packets from the TX queue to the NIC. |
---|
| 51 | * |
---|
[657] | 52 | * AS the RX and TX queues are independant, there is one NIC_RX device descriptor |
---|
| 53 | * to handle RX packets, and another NIC_TX device descriptor to handle TX packets. |
---|
[1] | 54 | * |
---|
[657] | 55 | * In order to improve throughput, the NIC controller can implement multiple (N) channels. |
---|
| 56 | * In this case, the channel index is defined by an hash function computed from the remote |
---|
| 57 | * IP address and port. This index is computed by the hardware for an RX packet, and is |
---|
| 58 | * computed by the kernel for a TX packet, using a specific driver function. TODO ... |
---|
| 59 | * The 2*N chdevs, and the associated server threads implementing the protocols stack, |
---|
| 60 | * are distributed in 2*N different clusters. |
---|
[1] | 61 | * |
---|
[657] | 62 | * 2) User API |
---|
| 63 | * |
---|
| 64 | * On the user side, ALMOS-MKH implements the POSIX socket API. |
---|
| 65 | * The kernel functions implementing the socket related syscalls are : |
---|
| 66 | * - dev_nic_socket() : create a local socket registered in process fd_array[]. |
---|
| 67 | * - dev_nic_bind() : attach a local IP address and port to a local socket. |
---|
| 68 | * - dev_nic_listen() : local server makes a passive open. |
---|
| 69 | * - dev_nic_connect() : local client makes an active open to a remote server. |
---|
| 70 | * - dev_nic_accept() : local server accept a new remote client. |
---|
| 71 | * - dev_nic_send() : send data on a connected socket. |
---|
| 72 | * - dev_nic_recv() : receive data on a connected socket. |
---|
| 73 | * - dev_nic_sendto() : send a packet to a remote (IP address/port). |
---|
| 74 | * - dev_nic_recvfrom() : receive a paket from a remote (IP address/port). |
---|
| 75 | * - dev_nic_close() : close a socket |
---|
[1] | 76 | * |
---|
[657] | 77 | * 3) TX stream |
---|
| 78 | * |
---|
| 79 | * The internal API between the client threads and the TX server thread defines |
---|
| 80 | * the 3 following commands: |
---|
| 81 | * . SOCKET_TX_CONNECT : request to execute the 3 steps TCP connection handshake. |
---|
| 82 | * . SOCKET_TX_SEND : send data to a remote socket (UDP or TCP). |
---|
| 83 | * . SOCKET_TX_CLOSE : request to execute the 3 steps TCP close handshake. |
---|
| 84 | * |
---|
| 85 | * - These 3 commands are blocking for the client thread that registers the command in the |
---|
| 86 | * socket descriptor, blocks on the BLOCKED_IO condition, and deschedules. |
---|
| 87 | * - The TX server thread is acting as a multiplexer. It scans the list of attached sockets, |
---|
| 88 | * to handle all valid commands: one UDP packet or TCP segment per iteration. |
---|
| 89 | * It uses the user buffer defined by the client thread, and attached to socket descriptor, |
---|
| 90 | * as a retransmission buffer. It blocks and deschedules on the BLOCKED_CLIENT condition, |
---|
| 91 | * when there is no more active TX command registered in any socket. It is re-activated |
---|
| 92 | * by the first client thread registering a new TX command in the socket descriptor. |
---|
| 93 | * It unblocks a client thread only when a command is fully completed. It signals errors |
---|
| 94 | * to the client thread using the tx_error field in socket descriptor. |
---|
| 95 | * |
---|
| 96 | * 4) RX stream |
---|
| 97 | * |
---|
| 98 | * The communication between the RX server thread and the client threads expecting data |
---|
| 99 | * is done through receive buffers (one private buffer per socket) that are handled |
---|
| 100 | * as single-writer / single reader-FIFOs, called rx_buf. |
---|
| 101 | * - The RX server thread is acting as a demultiplexor: it handle one TCP segment or UDP |
---|
| 102 | * packet per iteration, and register the data in the rx_buf of the socket matching |
---|
| 103 | * the packet. It simply discard all packets that does not match a registered socket. |
---|
| 104 | * When a client thread is registered in the socket descriptor, the RX server thread |
---|
| 105 | * unblocks this client thread as soon as there is data available in rx_buf. |
---|
| 106 | * It blocks and deschedules on the BLOCKED_ISR condition when there is no more packets |
---|
| 107 | * in the NIC_RX queue. It is unblocked by the hardware ISR. |
---|
| 108 | * - The client thread simply access the rx_buf attached to socket descriptor, and consumes |
---|
| 109 | * the available data when the rx_buf is non empty. It blocks on the BLOCKED_IO condition, |
---|
| 110 | * and deschedules when the rx_buf is empty. |
---|
| 111 | * |
---|
| 112 | * 5) R2T queue |
---|
| 113 | * |
---|
| 114 | * To implement the TCP "3 steps handshake" protocol, the RX server thread can directly |
---|
| 115 | * request the associated TX server thread to send control packets in the TX stream, |
---|
| 116 | * using a dedicate R2T (RX to TX) FIFO stored in the socket descriptor. |
---|
| 117 | * |
---|
| 118 | * 6) NIC driver API |
---|
| 119 | * |
---|
| 120 | * The generic NIC device "driver" API defines the following commands to the NIC driver: |
---|
[1] | 121 | * - READABLE : returns true if at least one RX paquet is available in RX queue. |
---|
[657] | 122 | * - WRITABLE : returns true if at least one empty slot is available in TX queue. |
---|
[1] | 123 | * - READ : consume one packet from the RX queue. |
---|
[657] | 124 | * - WRITE : produce one packet to the TX queue. |
---|
[1] | 125 | * All RX or TX paquets are sent or received in standard 2 Kbytes kernel buffers, |
---|
[657] | 126 | * that are dynamically allocated by the protocols stack. |
---|
[1] | 127 | * |
---|
| 128 | * The actual TX an RX queues structures depends on the hardware NIC implementation, |
---|
[657] | 129 | * and are defined in the HAL specific driver code. |
---|
| 130 | * |
---|
| 131 | * WARNING: the WTI mailboxes used by the driver ro receive events from the hardware |
---|
| 132 | * (available RX packet, or available free TX slot, for a given channel), must be |
---|
| 133 | * statically allocated during the kernel initialisation phase, and must be |
---|
| 134 | * routed to the cluster containing the associated TX/RX chdev and server thread. |
---|
| 135 | * |
---|
[1] | 136 | *****************************************************************************************/ |
---|
| 137 | |
---|
| 138 | /**** Forward declarations ****/ |
---|
| 139 | |
---|
[3] | 140 | struct chdev_s; |
---|
[1] | 141 | |
---|
| 142 | /****************************************************************************************** |
---|
[657] | 143 | * Various constants used by the Protocols stack |
---|
[1] | 144 | *****************************************************************************************/ |
---|
| 145 | |
---|
[657] | 146 | #define SRC_MAC_54 0x54 |
---|
| 147 | #define SRC_MAC_32 0x32 |
---|
| 148 | #define SRC_MAC_10 0x10 |
---|
| 149 | #define DST_MAC_54 0x54 |
---|
| 150 | #define DST_MAC_32 0x32 |
---|
| 151 | #define DST_MAC_10 0x10 |
---|
| 152 | |
---|
| 153 | #define TCP_HEAD_LEN 20 |
---|
| 154 | #define UDP_HEAD_LEN 8 |
---|
| 155 | #define IP_HEAD_LEN 20 |
---|
| 156 | #define ETH_HEAD_LEN 14 |
---|
| 157 | |
---|
| 158 | #define PROTOCOL_UDP 0x11 |
---|
| 159 | #define PROTOCOL_TCP 0x06 |
---|
| 160 | |
---|
| 161 | #define TCP_ISS 0x10000 |
---|
| 162 | |
---|
| 163 | #define PAYLOAD_MAX_LEN 1500 // max payload for and UDP packet or a TCP segment |
---|
| 164 | |
---|
| 165 | #define TCP_FLAG_FIN 0x01 |
---|
| 166 | #define TCP_FLAG_SYN 0x02 |
---|
| 167 | #define TCP_FLAG_RST 0x04 |
---|
| 168 | #define TCP_FLAG_PSH 0x08 |
---|
| 169 | #define TCP_FLAG_ACK 0x10 |
---|
| 170 | #define TCP_FLAG_URG 0x20 |
---|
| 171 | |
---|
| 172 | #define NIC_RX_BUF_SIZE 0x100000 // 1 Mbytes |
---|
| 173 | #define NIC_R2T_QUEUE_SIZE 0x64 // smallest KCM size |
---|
| 174 | #define NIC_CRQ_QUEUE_SIZE 0x8 // 8 * sizeof(sockaddr_t) = smallest KCM size |
---|
| 175 | #define NIC_PKT_MAX_SIZE 1500 // for Ethernet |
---|
| 176 | #define NIC_KERNEL_BUF_SIZE 2000 // for on ETH/IP/TCP packet |
---|
| 177 | |
---|
| 178 | /***************************************************************************************** |
---|
| 179 | * This defines the extension for the generic NIC device. |
---|
| 180 | * The actual queue descriptor depends on the implementation. |
---|
| 181 | * |
---|
| 182 | * WARNING : for all NIC_TX and NIC_RX chdevs, the xlist rooted in in the chdev |
---|
| 183 | * ("wait_root" and "wait_lock" fields) is actually a list of sockets. |
---|
| 184 | ****************************************************************************************/ |
---|
| 185 | |
---|
[1] | 186 | typedef struct nic_extend_s |
---|
| 187 | { |
---|
[657] | 188 | void * queue; /*! local pointer on NIC queue descriptor (RX or TX) */ |
---|
[1] | 189 | } |
---|
| 190 | nic_extend_t; |
---|
| 191 | |
---|
[657] | 192 | /***************************************************************************************** |
---|
[1] | 193 | * This enum defines the various implementations of the generic NIC peripheral. |
---|
| 194 | * This array must be kept consistent with the define in the arch_info.h file. |
---|
[657] | 195 | ****************************************************************************************/ |
---|
[1] | 196 | |
---|
[657] | 197 | typedef enum nic_impl_e |
---|
[1] | 198 | { |
---|
[407] | 199 | IMPL_NIC_CBF = 0, |
---|
[1] | 200 | IMPL_NIC_I86 = 1, |
---|
| 201 | } |
---|
| 202 | nic_impl_t; |
---|
| 203 | |
---|
[657] | 204 | /**************************************************************************************** |
---|
| 205 | * This defines the (implementation independant) commands to access the NIC hardware. |
---|
| 206 | * These commands are registered by the NIC_TX and NIC_RX server threads in the |
---|
| 207 | * server thread descriptor, to be used by the NIC driver. |
---|
| 208 | * The buffer is always a 2K bytes kernel buffer, containing an Ethernet packet. |
---|
| 209 | ****************************************************************************************/ |
---|
[1] | 210 | |
---|
| 211 | typedef enum nic_cmd_e |
---|
| 212 | { |
---|
[657] | 213 | NIC_CMD_WRITABLE = 10, /*! test TX queue not full (for a given packet length) */ |
---|
| 214 | NIC_CMD_WRITE = 11, /*! put one (given length) packet to TX queue */ |
---|
| 215 | NIC_CMD_READABLE = 12, /*! test RX queue not empty (for any packet length) */ |
---|
| 216 | NIC_CMD_READ = 13, /*! get one (any length) packet from RX queue */ |
---|
[1] | 217 | } |
---|
| 218 | nic_cmd_t; |
---|
| 219 | |
---|
| 220 | typedef struct nic_command_s |
---|
| 221 | { |
---|
[657] | 222 | xptr_t dev_xp; /*! extended pointer on NIC chdev descriptor */ |
---|
| 223 | nic_cmd_t type; /*! command type */ |
---|
| 224 | uint8_t * buffer; /*! local pointer on buffer (kernel or user space) */ |
---|
| 225 | uint32_t length; /*! number of bytes in buffer */ |
---|
| 226 | uint32_t status; /*! return value (depends on command type) */ |
---|
| 227 | uint32_t error; /*! return an error from the hardware (0 if no error) */ |
---|
[1] | 228 | } |
---|
| 229 | nic_command_t; |
---|
| 230 | |
---|
[657] | 231 | /***************************************************************************************** |
---|
| 232 | * This structure defines a socket descriptor. In order to parallelize the transfers, |
---|
| 233 | * the set of all registered sockets is split in several subsets. |
---|
| 234 | * The number of subsets is the number of NIC channels. |
---|
| 235 | * The distribution key is computed from the (remote_addr/remote_port) couple. |
---|
| 236 | * This computation is done by the NIC hardware for RX packets, |
---|
| 237 | * and by the dev_nic_connect() function for the TX packets. |
---|
| 238 | * |
---|
| 239 | * A socket is attached to the NIC_TX[channel] & NIC_RX[channel] chdevs. |
---|
| 240 | * Each socket descriptor allows the TX and TX server threads to access various buffers: |
---|
| 241 | * - the user "send" buffer contains the data to be send by the TX server thread. |
---|
| 242 | * - the kernel "receive" buffer contains the data received by the RX server thread. |
---|
| 243 | * - the kernel "r2t" buffer allows the RX server thread to make direct requests |
---|
| 244 | * to the associated TX server (to implement the TCP 3 steps handshake). |
---|
| 245 | * |
---|
| 246 | * The synchronisation mechanism between the clients threads and the servers threads |
---|
| 247 | * is different for TX and RX transfers: |
---|
| 248 | * |
---|
| 249 | * 1) For a TX transfer, it can exist only one client thread for a given socket, |
---|
| 250 | * the transfer is always initiated by the local process, and all TX commands |
---|
| 251 | * (CONNECT/SEND/CLOSE) are blocking for the client thread. The user buffer is |
---|
| 252 | * used by TCP to handle retransmissions when required.in case of re |
---|
| 253 | * The client thread registers the command in the thread descriptor, registers itself |
---|
| 254 | * in the socket descriptor, unblocks the TX server thread from the BLOCKED_CLIENT |
---|
| 255 | * condition, blocks itself on the BLOCKED_IO condition, and deschedules. |
---|
| 256 | * When the command is completed, the TX server thread unblocks the client thread. |
---|
| 257 | * The TX server blocks itself on the BLOCKED_CLIENT condition, when there is no |
---|
| 258 | * pending commands and the R2T queue is empty. It is unblocked when a client |
---|
| 259 | * register a new command, or when the TX server thread register a mew request |
---|
| 260 | * in the R2T queue. |
---|
| 261 | * The tx_valid flip-flop is SET by the client thread to signal a valid command. |
---|
| 262 | * It is RESET by the server thread when the command is completed: For a SEND, |
---|
| 263 | * all bytes have been sent (UDP) or acknowledged (TCP). |
---|
| 264 | * |
---|
| 265 | * 2) For an RX transfer, it can exist only one client thread for a given socket, |
---|
| 266 | * but the transfer is initiated by the remote process, and the RECV command |
---|
| 267 | * is not really blocking: the data can arrive before the local RECV command is |
---|
| 268 | * executed, and the server thread does not wait to receive all requested data |
---|
| 269 | * to deliver data to client thread. Therefore each socket contains a receive |
---|
| 270 | * buffer (rx_buf) handled as a single-writer/single-reader fifo. |
---|
| 271 | * The client thread consumes data from the rx_buf when possible. It blocks on the |
---|
| 272 | * BLOCKED_IO condition and deschedules when the rx_buf is empty. |
---|
| 273 | * It is unblocked by the RX server thread when new data is available in the rx_buf. |
---|
| 274 | * The RX server blocks itself on the BLOCKED_ISR condition When the NIC_RX packets |
---|
| 275 | * queue is empty. It is unblocked by the hardware when new packets are available. |
---|
| 276 | * |
---|
| 277 | * Note : the socket domains and types are defined in the "shared_socket.h" file. |
---|
| 278 | ****************************************************************************************/ |
---|
| 279 | |
---|
[1] | 280 | /****************************************************************************************** |
---|
[657] | 281 | * This function returns a printable string for a given NIC command <type>. |
---|
| 282 | ****************************************************************************************** |
---|
| 283 | * @ type : NIC command type |
---|
| 284 | *****************************************************************************************/ |
---|
| 285 | char * nic_cmd_str( uint32_t type ); |
---|
| 286 | |
---|
| 287 | /****************************************************************************************** |
---|
| 288 | * This function returns a printable string for a given socket <state>. |
---|
| 289 | ****************************************************************************************** |
---|
| 290 | * @ state : socket state |
---|
| 291 | *****************************************************************************************/ |
---|
| 292 | char * socket_state_str( uint32_t state ); |
---|
| 293 | |
---|
| 294 | /****************************************************************************************** |
---|
[3] | 295 | * This function completes the NIC-RX and NIC-TX chdev descriptors initialisation. |
---|
| 296 | * namely the link with the implementation specific driver. |
---|
| 297 | * The func, impl, channel, is_rx, base fields have been previously initialised. |
---|
| 298 | * It calls the specific driver initialisation function, to initialise the hardware |
---|
| 299 | * device and the specific data structures when required. |
---|
| 300 | * It creates the associated server thread and allocates a WTI from local ICU. |
---|
[657] | 301 | * For a TX_NIC chedv, it allocates and initializes the R2T waiting queue used by the |
---|
| 302 | * NIC_RX[channel] server to send direct requests to the NIC_TX[channel] server. |
---|
[3] | 303 | * It must de executed by a local thread. |
---|
[1] | 304 | ****************************************************************************************** |
---|
[3] | 305 | * @ chdev : local pointer on NIC chdev descriptor. |
---|
[1] | 306 | *****************************************************************************************/ |
---|
[3] | 307 | void dev_nic_init( struct chdev_s * chdev ); |
---|
[1] | 308 | |
---|
[657] | 309 | |
---|
| 310 | /* functions implementing the socket API */ |
---|
| 311 | |
---|
| 312 | /**************************************************************************************** |
---|
| 313 | * This function implements the socket() syscall. |
---|
| 314 | * This function allocates and intializes in the calling thread cluster: |
---|
| 315 | * - a new socket descriptor, defined by the <domain> and <type> arguments, |
---|
| 316 | * - a new file descriptor, associated to this socket, |
---|
| 317 | * It registers the file descriptor in the reference process fd_array[], set |
---|
| 318 | * the socket state to IDLE, and returns the <fdid> value. |
---|
| 319 | **************************************************************************************** |
---|
| 320 | * @ domain : [in] socket protocol family (AF_UNIX / AF_INET) |
---|
| 321 | * @ type : [in] socket type (SOCK_DGRAM / SOCK_STREAM). |
---|
| 322 | * @ return a file descriptor <fdid> if success / return -1 if failure. |
---|
| 323 | ***************************************************************************************/ |
---|
| 324 | int dev_nic_socket( uint32_t domain, |
---|
| 325 | uint32_t type ); |
---|
| 326 | |
---|
| 327 | /**************************************************************************************** |
---|
| 328 | * This function implements the bind() syscall. |
---|
| 329 | * It initializes the "local_addr" and "local_port" fields in the socket |
---|
| 330 | * descriptor identified by the <fdid> argument and set the socket state to BOUND. |
---|
| 331 | * It can be called by a thread running in any cluster. |
---|
| 332 | **************************************************************************************** |
---|
| 333 | * @ fdid : [in] file descriptor identifying the socket. |
---|
| 334 | * @ addr : [in] local IP address. |
---|
| 335 | * @ port : [in] local port. |
---|
| 336 | * @ return 0 if success / return -1 if failure. |
---|
| 337 | ***************************************************************************************/ |
---|
| 338 | int dev_nic_bind( uint32_t fdid, |
---|
| 339 | uint32_t addr, |
---|
| 340 | uint16_t port ); |
---|
| 341 | |
---|
| 342 | /**************************************************************************************** |
---|
| 343 | * This function implements the listen() syscall(). |
---|
| 344 | * It is called by a (local) server process to specify the max size of the queue |
---|
| 345 | * registering the (remote) client process connections, and set the socket identified |
---|
| 346 | * by the <fdid> argument to LISTEN state. It applies only to sockets of type TCP. |
---|
| 347 | * It can be called by a thread running in any cluster. |
---|
| 348 | * TODO handle the <max_pending> argument... |
---|
| 349 | **************************************************************************************** |
---|
| 350 | * @ fdid : [in] file descriptor identifying the local server socket. |
---|
| 351 | * @ max_pending : [in] max number of accepted remote client connections. |
---|
| 352 | ***************************************************************************************/ |
---|
| 353 | int dev_nic_listen( uint32_t fdid, |
---|
| 354 | uint32_t max_pending ); |
---|
| 355 | |
---|
| 356 | /**************************************************************************************** |
---|
| 357 | * This function implements the connect() syscall. |
---|
| 358 | * It is used by a (local) client process to connect a local socket identified by |
---|
| 359 | * the <fdid> argument, to a remote socket identified by the <remote_addr> and |
---|
| 360 | * <remote_port> arguments. It can be used for both UDP and TCP sockets. |
---|
| 361 | * It computes the nic_channel index from <remote_addr> and <remote_port> values, |
---|
| 362 | * and initializes "remote_addr","remote_port", "nic_channel" in local socket. |
---|
| 363 | * It registers the socket in the two lists of clients rooted in the NIC_RX[channel] |
---|
| 364 | * and NIC_TX[channel] chdevs. It can be called by a thread running in any cluster. |
---|
| 365 | * WARNING : the clients are the socket descriptors, and NOT the threads descriptors. |
---|
| 366 | **************************************************************************************** |
---|
| 367 | * Implementation Note: |
---|
| 368 | * - For a TCP socket, it updates the "remote_addr", "remote_port", "nic_channel" fields |
---|
| 369 | * in the socket descriptor defined by the <fdid> argument, and register this socket, |
---|
| 370 | * in the lists of sockets attached to the NIC_TX and NIC_RX chdevs. |
---|
| 371 | * Then, it registers a CONNECT command in the "nic_cmd" field ot the client thread |
---|
| 372 | * descriptor to request the NIC_TX server thread to execute the 3 steps handshake, |
---|
| 373 | * and updates the "tx_client" field in the socket descriptor. It unblocks the NIC_TX |
---|
| 374 | * server thread, blocks on the THREAD_BLOCKED_IO condition and deschedules. |
---|
| 375 | * - For an UDP socket, it simply updates "remote_addr", "remote_port", "nic_channel" |
---|
| 376 | * in the socket descriptor defined by the <fdid> argument, and register this socket, |
---|
| 377 | * in the lists of sockets attached to the NIC_TX and NIC_RX chdevs. |
---|
| 378 | * Then, it set the socket state to CONNECT, without unblocking the NIC_TX server |
---|
| 379 | * thread, and without blocking itself. |
---|
| 380 | * TODO : the nic_channel index computation must be done by a driver specific function. |
---|
| 381 | **************************************************************************************** |
---|
| 382 | * @ fdid : [in] file descriptor identifying the socket. |
---|
| 383 | * @ remote_addr : [in] remote IP address. |
---|
| 384 | * @ remote_port : [in] remote port. |
---|
| 385 | * @ return 0 if success / return -1 if failure. |
---|
| 386 | ***************************************************************************************/ |
---|
| 387 | int dev_nic_connect( uint32_t fdid, |
---|
| 388 | uint32_t remote_addr, |
---|
| 389 | uint16_t remote_port ); |
---|
| 390 | |
---|
| 391 | /**************************************************************************************** |
---|
| 392 | * This function implements the accept() syscall(). |
---|
| 393 | * It is executed by a server process, waiting for one (or several) client process(es) |
---|
| 394 | * requesting a connection on a socket identified by the <fdid> argument. |
---|
| 395 | * This socket was previouly created with socket(), bound to a local address with bind(), |
---|
| 396 | * and is listening for connections after a listen(). |
---|
| 397 | * This function extracts the first connection request on the CRQQ queue of pending |
---|
| 398 | * requests, creates a new socket with the same properties as the existing socket, |
---|
| 399 | * and allocates a new file descriptor for this new socket. |
---|
| 400 | * If no pending connections are present on the queue, it blocks the caller until a |
---|
| 401 | * connection is present. |
---|
| 402 | * The new socket cannot accept more connections, but the original socket remains open. |
---|
| 403 | * It returns the new socket <fdid>, and register in the <address> an <port> arguments |
---|
| 404 | * the remote client IP address & port. It applies only to sockets of type SOCK_STREAM. |
---|
| 405 | **************************************************************************************** |
---|
| 406 | * @ fdid : [in] file descriptor identifying the listening socket. |
---|
| 407 | * @ address : [out] server IP address. |
---|
| 408 | * @ port : [out] server port address length in bytes. |
---|
| 409 | * @ return the new socket <fdid> if success / return -1 if failure |
---|
| 410 | ***************************************************************************************/ |
---|
| 411 | int dev_nic_accept( uint32_t fdid, |
---|
| 412 | uint32_t * address, |
---|
| 413 | uint16_t * port ); |
---|
| 414 | |
---|
| 415 | /**************************************************************************************** |
---|
| 416 | * This blocking function implements the send() syscall. |
---|
| 417 | * It is used to send data stored in the user buffer, identified the <u_buf> and <length> |
---|
| 418 | * arguments, to a connected (TCP or UDP) socket, identified by the <fdid> argument. |
---|
| 419 | * The work is actually done by the NIC_TX server thread, and the synchronisation |
---|
| 420 | * between the client and the server threads uses the "rx_valid" set/reset flip-flop: |
---|
| 421 | * The client thread registers itself in the socket descriptor, registers in the queue |
---|
| 422 | * rooted in the NIC_TX[index] chdev, set "rx_valid", unblocks the server thread, and |
---|
| 423 | * finally blocks on THREAD_BLOCKED_IO, and deschedules. |
---|
| 424 | * When the TX server thread completes the command (all data has been sent for an UDP |
---|
| 425 | * socket, or acknowledeged for a TCP socket), the server thread reset "rx_valid" and |
---|
| 426 | * unblocks the client thread. |
---|
| 427 | * This function can be called by a thread running in any cluster. |
---|
| 428 | * WARNING : This implementation does not support several concurent SEND/SENDTO commands |
---|
| 429 | * on the same socket, as only one TX thread can register in a given socket. |
---|
| 430 | **************************************************************************************** |
---|
| 431 | * @ fdid : [in] file descriptor identifying the socket. |
---|
| 432 | * @ u_buf : [in] pointer on buffer containing packet in user space. |
---|
| 433 | * @ length : [in] packet size in bytes. |
---|
| 434 | * @ return number of sent bytes if success / return -1 if failure. |
---|
| 435 | ***************************************************************************************/ |
---|
| 436 | int dev_nic_send( uint32_t fdid, |
---|
| 437 | uint8_t * u_buf, |
---|
| 438 | uint32_t length ); |
---|
| 439 | |
---|
| 440 | /**************************************************************************************** |
---|
| 441 | * This blocking function implements the sendto() syscall. |
---|
| 442 | * It registers the <remote_addr> and <remote_port> arguments in the local socket |
---|
| 443 | * descriptor, and does the same thing as the dev_nic_send() function above, |
---|
| 444 | * but can be called on an unconnected UDP socket. |
---|
| 445 | **************************************************************************************** |
---|
| 446 | * @ fdid : [in] file descriptor identifying the socket. |
---|
| 447 | * @ u_buf : [in] pointer on buffer containing packet in user space. |
---|
| 448 | * @ length : [in] packet size in bytes. |
---|
| 449 | * @ remote_addr : [in] destination IP address. |
---|
| 450 | * @ remote_port : [in] destination port. |
---|
| 451 | * @ return number of sent bytes if success / return -1 if failure. |
---|
| 452 | ***************************************************************************************/ |
---|
| 453 | int dev_nic_sendto( uint32_t fdid, |
---|
| 454 | uint8_t * u_buf, |
---|
| 455 | uint32_t length, |
---|
| 456 | uint32_t remote_addr, |
---|
| 457 | uint32_t remote_port ); |
---|
| 458 | |
---|
| 459 | /**************************************************************************************** |
---|
| 460 | * This blocking function implements the recv() syscall. |
---|
| 461 | * It is used to receive data that has been stored by the NIC_RX server thread in the |
---|
| 462 | * rx_buf of a connected (TCP or UDP) socket, identified by the <fdid> argument. |
---|
| 463 | * The synchronisation between the client and the server threads uses the "rx_valid" |
---|
| 464 | * set/reset flip-flop: If "rx_valid" is set, the client simply moves the available |
---|
| 465 | * data from the "rx_buf" to the user buffer identified by the <u_buf> and <length> |
---|
| 466 | * arguments, and reset the "rx_valid" flip_flop. If "rx_valid" is not set, the client |
---|
| 467 | * thread register itself in the socket descriptor, registers in the clients queue rooted |
---|
| 468 | * in the NIC_RX[index] chdev, and finally blocks on THREAD_BLOCKED_IO, and deschedules. |
---|
| 469 | * The client thread is re-activated by the RX server, that set the "rx_valid" flip-flop |
---|
| 470 | * as soon as data is available in the "rcv_buf" (can be less than the user buffer size). |
---|
| 471 | * This function can be called by a thread running in any cluster. |
---|
| 472 | * WARNING : This implementation does not support several concurent RECV/RECVFROM |
---|
| 473 | * commands on the same socket, as only one RX thread can register in a given socket. |
---|
| 474 | **************************************************************************************** |
---|
| 475 | * @ fdid : [in] file descriptor identifying the socket. |
---|
| 476 | * @ u_buf : [in] pointer on buffer in user space. |
---|
| 477 | * @ length : [in] buffer size in bytes. |
---|
| 478 | * @ return number of received bytes if success / return -1 if failure. |
---|
| 479 | ***************************************************************************************/ |
---|
| 480 | int dev_nic_recv( uint32_t fdid, |
---|
| 481 | uint8_t * u_buf, |
---|
| 482 | uint32_t length ); |
---|
| 483 | |
---|
| 484 | /**************************************************************************************** |
---|
| 485 | * This blocking function implements the recvfrom() syscall. |
---|
| 486 | * It registers the <remote_addr> and <remote_port> arguments in the local socket |
---|
| 487 | * descriptor, and does the same thing as the dev_nic_recv() function above, |
---|
| 488 | * but can be called on an unconnected UDP socket. |
---|
| 489 | **************************************************************************************** |
---|
| 490 | * @ fdid : [in] file descriptor identifying the socket. |
---|
| 491 | * @ u_buf : [in] pointer on buffer containing packet in user space. |
---|
| 492 | * @ length : [in] packet size in bytes. |
---|
| 493 | * @ remote_addr : [in] destination IP address. |
---|
| 494 | * @ remote_port : [in] destination port. |
---|
| 495 | * @ return number of received bytes if success / return -1 if failure. |
---|
| 496 | ***************************************************************************************/ |
---|
| 497 | int dev_nic_recvfrom( uint32_t fdid, |
---|
| 498 | uint8_t * u_buf, |
---|
| 499 | uint32_t length, |
---|
| 500 | uint32_t remote_addr, |
---|
| 501 | uint32_t remote_port ); |
---|
| 502 | |
---|
| 503 | |
---|
| 504 | /* Instrumentation functions */ |
---|
| 505 | |
---|
| 506 | |
---|
[1] | 507 | /****************************************************************************************** |
---|
[657] | 508 | * This instrumentation function displays on the TXT0 kernel terminal the content |
---|
| 509 | * of the instrumentation registers contained in the NIC device. |
---|
[1] | 510 | *****************************************************************************************/ |
---|
[657] | 511 | void dev_nic_print_stats( void ); |
---|
[1] | 512 | |
---|
| 513 | /****************************************************************************************** |
---|
[657] | 514 | * This instrumentation function reset all instrumentation registers contained |
---|
| 515 | * in the NIC device. |
---|
| 516 | *****************************************************************************************/ |
---|
| 517 | void dev_nic_clear_stats( void ); |
---|
| 518 | |
---|
| 519 | |
---|
| 520 | /* Functions executed by the TX and RX server threads */ |
---|
| 521 | |
---|
| 522 | /****************************************************************************************** |
---|
| 523 | * This function is executed by the server thread associated to a NIC_TX[channel] chdev. |
---|
| 524 | * This TX server thread is created by the dev_nic_init() function. |
---|
| 525 | * It build and send UDP packets or TCP segments for all clients threads registered in |
---|
| 526 | * the NIC_TX[channel] chdev. The command types are (CONNECT / SEND / CLOSE), and the |
---|
| 527 | * priority between clients is round-robin. It takes into account the request registered |
---|
| 528 | * by the RX server thread in the R2T queue associated to the involved socket. |
---|
| 529 | * When a command is completed, it unblocks the client thread. For a SEND command, the |
---|
| 530 | * last byte must have been sent for an UDP socket, and it must have been acknowledged |
---|
| 531 | * for a TCP socket. |
---|
| 532 | * When the TX client threads queue is empty, it blocks on THREAD_BLOCKED_CLIENT |
---|
| 533 | * condition and deschedules. It is re-activated by a client thread registering a command. |
---|
[1] | 534 | ****************************************************************************************** |
---|
[657] | 535 | * Implementation note: |
---|
| 536 | * It execute an infinite loop in which it takes the lock protecting the clients list |
---|
| 537 | * to build a "kleenex" list of currently registered clients. |
---|
| 538 | * For each client registered in this "kleenex" list, it takes the lock protecting the |
---|
| 539 | * socket state, build one packet/segment in a local 2K bytes kernel buffer, calls the |
---|
| 540 | * transport layer to add the UDP/TCP header, calls the IP layer to add the IP header, |
---|
| 541 | * calls the ETH layer to add the ETH header, and moves the packet to the NIC_TX_QUEUE. |
---|
| 542 | * Finally, it updates the socket state, and release the socket lock. |
---|
| 543 | ****************************************************************************************** |
---|
| 544 | * @ chdev : [in] local pointer on one local NIC_TX[channel] chdev descriptor. |
---|
[1] | 545 | *****************************************************************************************/ |
---|
[657] | 546 | void dev_nic_tx_server( struct chdev_s * chdev ); |
---|
[1] | 547 | |
---|
| 548 | |
---|
| 549 | /****************************************************************************************** |
---|
[657] | 550 | * This function is executed by the server thread associated to a NIC_RX[channel] chdev. |
---|
| 551 | * This RX server thread is created by the dev_nic_init() function. |
---|
| 552 | * It handles all UDP packets or TCP segments received by the sockets attached to |
---|
| 553 | * the NIC_RX[channel] chdev. It writes the received data in the socket rcv_buf, and |
---|
| 554 | * unblocks the client thread waiting on a RECV command. |
---|
| 555 | * To implement the three steps handshahke required by a TCP connection, it posts direct |
---|
| 556 | * requests to the TX server, using the R2T queue attached to the involved socket. |
---|
| 557 | * It blocks on the THREAD_BLOCKED_ISR condition and deschedules when the NIC_RX_QUEUE |
---|
| 558 | * is empty. It is re-activated by the NIC_RX_ISR, when the queue becomes non empty. |
---|
[1] | 559 | ****************************************************************************************** |
---|
[657] | 560 | * Implementation note: |
---|
| 561 | * It executes an infinite loop in which it extracts one packet from the NIC_RX_QUEUE |
---|
| 562 | * of received packets, copies this packet in a local 2 kbytes kernel buffer, checks |
---|
| 563 | * the Ethernet header, checks the IP header, calls the relevant (TCP or UDP) transport |
---|
| 564 | * protocol that search a matching socket for the received packet. It copies the payload |
---|
| 565 | * to the relevant socket rcv_buf when the packet is acceptable, and unblocks the client |
---|
| 566 | * thread. It discard the packet if no socket found. |
---|
| 567 | ****************************************************************************************** |
---|
| 568 | * @ chdev : [in] local pointer on one local NIC_RX[channel] chdev descriptor. |
---|
[1] | 569 | *****************************************************************************************/ |
---|
[657] | 570 | void dev_nic_rx_server( struct chdev_s * chdev ); |
---|
[1] | 571 | |
---|
| 572 | #endif /* _DEV_NIC_H */ |
---|