| 1 | /* |
|---|
| 2 | * dev_nic.h - NIC (Network Controler) generic device API definition. |
|---|
| 3 | * |
|---|
| 4 | * Author Alain Greiner (2016,2017,2018,2019,2020) |
|---|
| 5 | * |
|---|
| 6 | * Copyright (c) UPMC Sorbonne Universites |
|---|
| 7 | * |
|---|
| 8 | * This file is part of ALMOS-MKH |
|---|
| 9 | * |
|---|
| 10 | * ALMOS-MKH is free software; you can redistribute it and/or modify it |
|---|
| 11 | * under the terms of the GNU General Public License as published by |
|---|
| 12 | * the Free Software Foundation; version 2.0 of the License. |
|---|
| 13 | * |
|---|
| 14 | * ALMOS-MKH is distributed in the hope that it will be useful, but |
|---|
| 15 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|---|
| 17 | * General Public License for more details. |
|---|
| 18 | * |
|---|
| 19 | * You should have received a copy of the GNU General Public License |
|---|
| 20 | * along with ALMOS-kernel; if not, write to the Free Software Foundation, |
|---|
| 21 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
|---|
| 22 | */ |
|---|
| 23 | |
|---|
| 24 | #ifndef _DEV_NIC_H |
|---|
| 25 | #define _DEV_NIC_H |
|---|
| 26 | |
|---|
| 27 | #include <kernel_config.h> |
|---|
| 28 | #include <hal_kernel_types.h> |
|---|
| 29 | #include <remote_busylock.h> |
|---|
| 30 | #include <remote_buf.h> |
|---|
| 31 | #include <xlist.h> |
|---|
| 32 | |
|---|
| 33 | /**** Forward declarations ****/ |
|---|
| 34 | |
|---|
| 35 | struct chdev_s; |
|---|
| 36 | |
|---|
| 37 | /***************************************************************************************** |
|---|
| 38 | * Generic Network Interface Controler definition |
|---|
| 39 | * |
|---|
| 40 | * This device provides access to a generic Gigabit Ethernet network controler. |
|---|
| 41 | * It assumes that the NIC hardware peripheral handles two packets queues for sent (TX) |
|---|
| 42 | * and received (RX) packets. |
|---|
| 43 | * |
|---|
| 44 | * The supported protocols stack is : Ethernet / IPV4 / TCP or UDP |
|---|
| 45 | * |
|---|
| 46 | * 1) hardware assumptions |
|---|
| 47 | * |
|---|
| 48 | * The NIC device is handling two (infinite) streams of packets to or from the network. |
|---|
| 49 | * It is the driver responsibility to move the RX packets from the NIC to the RX queue, |
|---|
| 50 | * and the TX packets from the TX queue to the NIC. |
|---|
| 51 | * |
|---|
| 52 | * AS the RX and TX queues are independant, there is one NIC_RX device descriptor |
|---|
| 53 | * to handle RX packets, and another NIC_TX device descriptor to handle TX packets. |
|---|
| 54 | * |
|---|
| 55 | * In order to improve throughput, the NIC controller can implement multiple (N) channels. |
|---|
| 56 | * In this case, the channel index is defined by an hash function computed from the remote |
|---|
| 57 | * IP address and port. This index is computed by the hardware for an RX packet, and is |
|---|
| 58 | * computed by the kernel for a TX packet, using a specific driver function. TODO ... |
|---|
| 59 | * The 2*N chdevs, and the associated server threads implementing the protocols stack, |
|---|
| 60 | * are distributed in 2*N different clusters. |
|---|
| 61 | * |
|---|
| 62 | * 2) User API |
|---|
| 63 | * |
|---|
| 64 | * On the user side, ALMOS-MKH implements the POSIX socket API. |
|---|
| 65 | * The kernel functions implementing the socket related syscalls are : |
|---|
| 66 | * - dev_nic_socket() : create a local socket registered in process fd_array[]. |
|---|
| 67 | * - dev_nic_bind() : attach a local IP address and port to a local socket. |
|---|
| 68 | * - dev_nic_listen() : local server makes a passive open. |
|---|
| 69 | * - dev_nic_connect() : local client makes an active open to a remote server. |
|---|
| 70 | * - dev_nic_accept() : local server accept a new remote client. |
|---|
| 71 | * - dev_nic_send() : send data on a connected socket. |
|---|
| 72 | * - dev_nic_recv() : receive data on a connected socket. |
|---|
| 73 | * - dev_nic_sendto() : send a packet to a remote (IP address/port). |
|---|
| 74 | * - dev_nic_recvfrom() : receive a paket from a remote (IP address/port). |
|---|
| 75 | * - dev_nic_close() : close a socket |
|---|
| 76 | * |
|---|
| 77 | * 3) TX stream |
|---|
| 78 | * |
|---|
| 79 | * The internal API between the client threads and the TX server thread defines |
|---|
| 80 | * the 3 following commands: |
|---|
| 81 | * . SOCKET_TX_CONNECT : request to execute the 3 steps TCP connection handshake. |
|---|
| 82 | * . SOCKET_TX_SEND : send data to a remote socket (UDP or TCP). |
|---|
| 83 | * . SOCKET_TX_CLOSE : request to execute the 3 steps TCP close handshake. |
|---|
| 84 | * |
|---|
| 85 | * - These 3 commands are blocking for the client thread that registers the command in the |
|---|
| 86 | * socket descriptor, blocks on the BLOCKED_IO condition, and deschedules. |
|---|
| 87 | * - The TX server thread is acting as a multiplexer. It scans the list of attached sockets, |
|---|
| 88 | * to handle all valid commands: one UDP packet or TCP segment per iteration. |
|---|
| 89 | * It uses the user buffer defined by the client thread, and attached to socket descriptor, |
|---|
| 90 | * as a retransmission buffer. It blocks and deschedules on the BLOCKED_CLIENT condition, |
|---|
| 91 | * when there is no more active TX command registered in any socket. It is re-activated |
|---|
| 92 | * by the first client thread registering a new TX command in the socket descriptor. |
|---|
| 93 | * It unblocks a client thread only when a command is fully completed. It signals errors |
|---|
| 94 | * to the client thread using the tx_error field in socket descriptor. |
|---|
| 95 | * |
|---|
| 96 | * 4) RX stream |
|---|
| 97 | * |
|---|
| 98 | * The communication between the RX server thread and the client threads expecting data |
|---|
| 99 | * is done through receive buffers (one private buffer per socket) that are handled |
|---|
| 100 | * as single-writer / single reader-FIFOs, called rx_buf. |
|---|
| 101 | * - The RX server thread is acting as a demultiplexor: it handle one TCP segment or UDP |
|---|
| 102 | * packet per iteration, and register the data in the rx_buf of the socket matching |
|---|
| 103 | * the packet. It simply discard all packets that does not match a registered socket. |
|---|
| 104 | * When a client thread is registered in the socket descriptor, the RX server thread |
|---|
| 105 | * unblocks this client thread as soon as there is data available in rx_buf. |
|---|
| 106 | * It blocks and deschedules on the BLOCKED_ISR condition when there is no more packets |
|---|
| 107 | * in the NIC_RX queue. It is unblocked by the hardware ISR. |
|---|
| 108 | * - The client thread simply access the rx_buf attached to socket descriptor, and consumes |
|---|
| 109 | * the available data when the rx_buf is non empty. It blocks on the BLOCKED_IO condition, |
|---|
| 110 | * and deschedules when the rx_buf is empty. |
|---|
| 111 | * |
|---|
| 112 | * 5) R2T queue |
|---|
| 113 | * |
|---|
| 114 | * To implement the TCP "3 steps handshake" protocol, the RX server thread can directly |
|---|
| 115 | * request the associated TX server thread to send control packets in the TX stream, |
|---|
| 116 | * using a dedicate R2T (RX to TX) FIFO stored in the socket descriptor. |
|---|
| 117 | * |
|---|
| 118 | * 6) NIC driver API |
|---|
| 119 | * |
|---|
| 120 | * The generic NIC device "driver" API defines the following commands to the NIC driver: |
|---|
| 121 | * - READABLE : returns true if at least one RX paquet is available in RX queue. |
|---|
| 122 | * - WRITABLE : returns true if at least one empty slot is available in TX queue. |
|---|
| 123 | * - READ : consume one packet from the RX queue. |
|---|
| 124 | * - WRITE : produce one packet to the TX queue. |
|---|
| 125 | * All RX or TX paquets are sent or received in standard 2 Kbytes kernel buffers, |
|---|
| 126 | * that are dynamically allocated by the protocols stack. |
|---|
| 127 | * |
|---|
| 128 | * The actual TX an RX queues structures depends on the hardware NIC implementation, |
|---|
| 129 | * and are defined in the HAL specific driver code. |
|---|
| 130 | * |
|---|
| 131 | * WARNING: the WTI mailboxes used by the driver ro receive events from the hardware |
|---|
| 132 | * (available RX packet, or available free TX slot, for a given channel), must be |
|---|
| 133 | * statically allocated during the kernel initialisation phase, and must be |
|---|
| 134 | * routed to the cluster containing the associated TX/RX chdev and server thread. |
|---|
| 135 | * |
|---|
| 136 | *****************************************************************************************/ |
|---|
| 137 | |
|---|
| 138 | /**** Forward declarations ****/ |
|---|
| 139 | |
|---|
| 140 | struct chdev_s; |
|---|
| 141 | |
|---|
| 142 | /****************************************************************************************** |
|---|
| 143 | * Various constants used by the Protocols stack |
|---|
| 144 | *****************************************************************************************/ |
|---|
| 145 | |
|---|
| 146 | #define SRC_MAC_54 0x54 |
|---|
| 147 | #define SRC_MAC_32 0x32 |
|---|
| 148 | #define SRC_MAC_10 0x10 |
|---|
| 149 | #define DST_MAC_54 0x54 |
|---|
| 150 | #define DST_MAC_32 0x32 |
|---|
| 151 | #define DST_MAC_10 0x10 |
|---|
| 152 | |
|---|
| 153 | #define TCP_HEAD_LEN 20 |
|---|
| 154 | #define UDP_HEAD_LEN 8 |
|---|
| 155 | #define IP_HEAD_LEN 20 |
|---|
| 156 | #define ETH_HEAD_LEN 14 |
|---|
| 157 | |
|---|
| 158 | #define PROTOCOL_UDP 0x11 |
|---|
| 159 | #define PROTOCOL_TCP 0x06 |
|---|
| 160 | |
|---|
| 161 | #define TCP_ISS 0x10000 |
|---|
| 162 | |
|---|
| 163 | #define PAYLOAD_MAX_LEN 1500 // max payload for and UDP packet or a TCP segment |
|---|
| 164 | |
|---|
| 165 | #define TCP_FLAG_FIN 0x01 |
|---|
| 166 | #define TCP_FLAG_SYN 0x02 |
|---|
| 167 | #define TCP_FLAG_RST 0x04 |
|---|
| 168 | #define TCP_FLAG_PSH 0x08 |
|---|
| 169 | #define TCP_FLAG_ACK 0x10 |
|---|
| 170 | #define TCP_FLAG_URG 0x20 |
|---|
| 171 | |
|---|
| 172 | #define NIC_RX_BUF_SIZE 0x100000 // 1 Mbytes |
|---|
| 173 | #define NIC_R2T_QUEUE_SIZE 0x64 // smallest KCM size |
|---|
| 174 | #define NIC_CRQ_QUEUE_SIZE 0x8 // 8 * sizeof(sockaddr_t) = smallest KCM size |
|---|
| 175 | #define NIC_PKT_MAX_SIZE 1500 // for Ethernet |
|---|
| 176 | #define NIC_KERNEL_BUF_SIZE 2000 // for on ETH/IP/TCP packet |
|---|
| 177 | |
|---|
| 178 | /***************************************************************************************** |
|---|
| 179 | * This defines the extension for the generic NIC device. |
|---|
| 180 | * The actual queue descriptor depends on the implementation. |
|---|
| 181 | * |
|---|
| 182 | * WARNING : for all NIC_TX and NIC_RX chdevs, the xlist rooted in in the chdev |
|---|
| 183 | * ("wait_root" and "wait_lock" fields) is actually a list of sockets. |
|---|
| 184 | ****************************************************************************************/ |
|---|
| 185 | |
|---|
| 186 | typedef struct nic_extend_s |
|---|
| 187 | { |
|---|
| 188 | void * queue; /*! local pointer on NIC queue descriptor (RX or TX) */ |
|---|
| 189 | } |
|---|
| 190 | nic_extend_t; |
|---|
| 191 | |
|---|
| 192 | /***************************************************************************************** |
|---|
| 193 | * This enum defines the various implementations of the generic NIC peripheral. |
|---|
| 194 | * This array must be kept consistent with the define in the arch_info.h file. |
|---|
| 195 | ****************************************************************************************/ |
|---|
| 196 | |
|---|
| 197 | typedef enum nic_impl_e |
|---|
| 198 | { |
|---|
| 199 | IMPL_NIC_CBF = 0, |
|---|
| 200 | IMPL_NIC_I86 = 1, |
|---|
| 201 | } |
|---|
| 202 | nic_impl_t; |
|---|
| 203 | |
|---|
| 204 | /**************************************************************************************** |
|---|
| 205 | * This defines the (implementation independant) commands to access the NIC hardware. |
|---|
| 206 | * These commands are registered by the NIC_TX and NIC_RX server threads in the |
|---|
| 207 | * server thread descriptor, to be used by the NIC driver. |
|---|
| 208 | * The buffer is always a 2K bytes kernel buffer, containing an Ethernet packet. |
|---|
| 209 | ****************************************************************************************/ |
|---|
| 210 | |
|---|
| 211 | typedef enum nic_cmd_e |
|---|
| 212 | { |
|---|
| 213 | NIC_CMD_WRITABLE = 10, /*! test TX queue not full (for a given packet length) */ |
|---|
| 214 | NIC_CMD_WRITE = 11, /*! put one (given length) packet to TX queue */ |
|---|
| 215 | NIC_CMD_READABLE = 12, /*! test RX queue not empty (for any packet length) */ |
|---|
| 216 | NIC_CMD_READ = 13, /*! get one (any length) packet from RX queue */ |
|---|
| 217 | } |
|---|
| 218 | nic_cmd_t; |
|---|
| 219 | |
|---|
| 220 | typedef struct nic_command_s |
|---|
| 221 | { |
|---|
| 222 | xptr_t dev_xp; /*! extended pointer on NIC chdev descriptor */ |
|---|
| 223 | nic_cmd_t type; /*! command type */ |
|---|
| 224 | uint8_t * buffer; /*! local pointer on buffer (kernel or user space) */ |
|---|
| 225 | uint32_t length; /*! number of bytes in buffer */ |
|---|
| 226 | uint32_t status; /*! return value (depends on command type) */ |
|---|
| 227 | uint32_t error; /*! return an error from the hardware (0 if no error) */ |
|---|
| 228 | } |
|---|
| 229 | nic_command_t; |
|---|
| 230 | |
|---|
| 231 | /***************************************************************************************** |
|---|
| 232 | * This structure defines a socket descriptor. In order to parallelize the transfers, |
|---|
| 233 | * the set of all registered sockets is split in several subsets. |
|---|
| 234 | * The number of subsets is the number of NIC channels. |
|---|
| 235 | * The distribution key is computed from the (remote_addr/remote_port) couple. |
|---|
| 236 | * This computation is done by the NIC hardware for RX packets, |
|---|
| 237 | * and by the dev_nic_connect() function for the TX packets. |
|---|
| 238 | * |
|---|
| 239 | * A socket is attached to the NIC_TX[channel] & NIC_RX[channel] chdevs. |
|---|
| 240 | * Each socket descriptor allows the TX and TX server threads to access various buffers: |
|---|
| 241 | * - the user "send" buffer contains the data to be send by the TX server thread. |
|---|
| 242 | * - the kernel "receive" buffer contains the data received by the RX server thread. |
|---|
| 243 | * - the kernel "r2t" buffer allows the RX server thread to make direct requests |
|---|
| 244 | * to the associated TX server (to implement the TCP 3 steps handshake). |
|---|
| 245 | * |
|---|
| 246 | * The synchronisation mechanism between the clients threads and the servers threads |
|---|
| 247 | * is different for TX and RX transfers: |
|---|
| 248 | * |
|---|
| 249 | * 1) For a TX transfer, it can exist only one client thread for a given socket, |
|---|
| 250 | * the transfer is always initiated by the local process, and all TX commands |
|---|
| 251 | * (CONNECT/SEND/CLOSE) are blocking for the client thread. The user buffer is |
|---|
| 252 | * used by TCP to handle retransmissions when required.in case of re |
|---|
| 253 | * The client thread registers the command in the thread descriptor, registers itself |
|---|
| 254 | * in the socket descriptor, unblocks the TX server thread from the BLOCKED_CLIENT |
|---|
| 255 | * condition, blocks itself on the BLOCKED_IO condition, and deschedules. |
|---|
| 256 | * When the command is completed, the TX server thread unblocks the client thread. |
|---|
| 257 | * The TX server blocks itself on the BLOCKED_CLIENT condition, when there is no |
|---|
| 258 | * pending commands and the R2T queue is empty. It is unblocked when a client |
|---|
| 259 | * register a new command, or when the TX server thread register a mew request |
|---|
| 260 | * in the R2T queue. |
|---|
| 261 | * The tx_valid flip-flop is SET by the client thread to signal a valid command. |
|---|
| 262 | * It is RESET by the server thread when the command is completed: For a SEND, |
|---|
| 263 | * all bytes have been sent (UDP) or acknowledged (TCP). |
|---|
| 264 | * |
|---|
| 265 | * 2) For an RX transfer, it can exist only one client thread for a given socket, |
|---|
| 266 | * but the transfer is initiated by the remote process, and the RECV command |
|---|
| 267 | * is not really blocking: the data can arrive before the local RECV command is |
|---|
| 268 | * executed, and the server thread does not wait to receive all requested data |
|---|
| 269 | * to deliver data to client thread. Therefore each socket contains a receive |
|---|
| 270 | * buffer (rx_buf) handled as a single-writer/single-reader fifo. |
|---|
| 271 | * The client thread consumes data from the rx_buf when possible. It blocks on the |
|---|
| 272 | * BLOCKED_IO condition and deschedules when the rx_buf is empty. |
|---|
| 273 | * It is unblocked by the RX server thread when new data is available in the rx_buf. |
|---|
| 274 | * The RX server blocks itself on the BLOCKED_ISR condition When the NIC_RX packets |
|---|
| 275 | * queue is empty. It is unblocked by the hardware when new packets are available. |
|---|
| 276 | * |
|---|
| 277 | * Note : the socket domains and types are defined in the "shared_socket.h" file. |
|---|
| 278 | ****************************************************************************************/ |
|---|
| 279 | |
|---|
| 280 | /****************************************************************************************** |
|---|
| 281 | * This function returns a printable string for a given NIC command <type>. |
|---|
| 282 | ****************************************************************************************** |
|---|
| 283 | * @ type : NIC command type |
|---|
| 284 | *****************************************************************************************/ |
|---|
| 285 | char * nic_cmd_str( uint32_t type ); |
|---|
| 286 | |
|---|
| 287 | /****************************************************************************************** |
|---|
| 288 | * This function returns a printable string for a given socket <state>. |
|---|
| 289 | ****************************************************************************************** |
|---|
| 290 | * @ state : socket state |
|---|
| 291 | *****************************************************************************************/ |
|---|
| 292 | char * socket_state_str( uint32_t state ); |
|---|
| 293 | |
|---|
| 294 | /****************************************************************************************** |
|---|
| 295 | * This function completes the NIC-RX and NIC-TX chdev descriptors initialisation. |
|---|
| 296 | * namely the link with the implementation specific driver. |
|---|
| 297 | * The func, impl, channel, is_rx, base fields have been previously initialised. |
|---|
| 298 | * It calls the specific driver initialisation function, to initialise the hardware |
|---|
| 299 | * device and the specific data structures when required. |
|---|
| 300 | * It creates the associated server thread and allocates a WTI from local ICU. |
|---|
| 301 | * For a TX_NIC chedv, it allocates and initializes the R2T waiting queue used by the |
|---|
| 302 | * NIC_RX[channel] server to send direct requests to the NIC_TX[channel] server. |
|---|
| 303 | * It must de executed by a local thread. |
|---|
| 304 | ****************************************************************************************** |
|---|
| 305 | * @ chdev : local pointer on NIC chdev descriptor. |
|---|
| 306 | *****************************************************************************************/ |
|---|
| 307 | void dev_nic_init( struct chdev_s * chdev ); |
|---|
| 308 | |
|---|
| 309 | |
|---|
| 310 | /* functions implementing the socket API */ |
|---|
| 311 | |
|---|
| 312 | /**************************************************************************************** |
|---|
| 313 | * This function implements the socket() syscall. |
|---|
| 314 | * This function allocates and intializes in the calling thread cluster: |
|---|
| 315 | * - a new socket descriptor, defined by the <domain> and <type> arguments, |
|---|
| 316 | * - a new file descriptor, associated to this socket, |
|---|
| 317 | * It registers the file descriptor in the reference process fd_array[], set |
|---|
| 318 | * the socket state to IDLE, and returns the <fdid> value. |
|---|
| 319 | **************************************************************************************** |
|---|
| 320 | * @ domain : [in] socket protocol family (AF_UNIX / AF_INET) |
|---|
| 321 | * @ type : [in] socket type (SOCK_DGRAM / SOCK_STREAM). |
|---|
| 322 | * @ return a file descriptor <fdid> if success / return -1 if failure. |
|---|
| 323 | ***************************************************************************************/ |
|---|
| 324 | int dev_nic_socket( uint32_t domain, |
|---|
| 325 | uint32_t type ); |
|---|
| 326 | |
|---|
| 327 | /**************************************************************************************** |
|---|
| 328 | * This function implements the bind() syscall. |
|---|
| 329 | * It initializes the "local_addr" and "local_port" fields in the socket |
|---|
| 330 | * descriptor identified by the <fdid> argument and set the socket state to BOUND. |
|---|
| 331 | * It can be called by a thread running in any cluster. |
|---|
| 332 | **************************************************************************************** |
|---|
| 333 | * @ fdid : [in] file descriptor identifying the socket. |
|---|
| 334 | * @ addr : [in] local IP address. |
|---|
| 335 | * @ port : [in] local port. |
|---|
| 336 | * @ return 0 if success / return -1 if failure. |
|---|
| 337 | ***************************************************************************************/ |
|---|
| 338 | int dev_nic_bind( uint32_t fdid, |
|---|
| 339 | uint32_t addr, |
|---|
| 340 | uint16_t port ); |
|---|
| 341 | |
|---|
| 342 | /**************************************************************************************** |
|---|
| 343 | * This function implements the listen() syscall(). |
|---|
| 344 | * It is called by a (local) server process to specify the max size of the queue |
|---|
| 345 | * registering the (remote) client process connections, and set the socket identified |
|---|
| 346 | * by the <fdid> argument to LISTEN state. It applies only to sockets of type TCP. |
|---|
| 347 | * It can be called by a thread running in any cluster. |
|---|
| 348 | * TODO handle the <max_pending> argument... |
|---|
| 349 | **************************************************************************************** |
|---|
| 350 | * @ fdid : [in] file descriptor identifying the local server socket. |
|---|
| 351 | * @ max_pending : [in] max number of accepted remote client connections. |
|---|
| 352 | ***************************************************************************************/ |
|---|
| 353 | int dev_nic_listen( uint32_t fdid, |
|---|
| 354 | uint32_t max_pending ); |
|---|
| 355 | |
|---|
| 356 | /**************************************************************************************** |
|---|
| 357 | * This function implements the connect() syscall. |
|---|
| 358 | * It is used by a (local) client process to connect a local socket identified by |
|---|
| 359 | * the <fdid> argument, to a remote socket identified by the <remote_addr> and |
|---|
| 360 | * <remote_port> arguments. It can be used for both UDP and TCP sockets. |
|---|
| 361 | * It computes the nic_channel index from <remote_addr> and <remote_port> values, |
|---|
| 362 | * and initializes "remote_addr","remote_port", "nic_channel" in local socket. |
|---|
| 363 | * It registers the socket in the two lists of clients rooted in the NIC_RX[channel] |
|---|
| 364 | * and NIC_TX[channel] chdevs. It can be called by a thread running in any cluster. |
|---|
| 365 | * WARNING : the clients are the socket descriptors, and NOT the threads descriptors. |
|---|
| 366 | **************************************************************************************** |
|---|
| 367 | * Implementation Note: |
|---|
| 368 | * - For a TCP socket, it updates the "remote_addr", "remote_port", "nic_channel" fields |
|---|
| 369 | * in the socket descriptor defined by the <fdid> argument, and register this socket, |
|---|
| 370 | * in the lists of sockets attached to the NIC_TX and NIC_RX chdevs. |
|---|
| 371 | * Then, it registers a CONNECT command in the "nic_cmd" field ot the client thread |
|---|
| 372 | * descriptor to request the NIC_TX server thread to execute the 3 steps handshake, |
|---|
| 373 | * and updates the "tx_client" field in the socket descriptor. It unblocks the NIC_TX |
|---|
| 374 | * server thread, blocks on the THREAD_BLOCKED_IO condition and deschedules. |
|---|
| 375 | * - For an UDP socket, it simply updates "remote_addr", "remote_port", "nic_channel" |
|---|
| 376 | * in the socket descriptor defined by the <fdid> argument, and register this socket, |
|---|
| 377 | * in the lists of sockets attached to the NIC_TX and NIC_RX chdevs. |
|---|
| 378 | * Then, it set the socket state to CONNECT, without unblocking the NIC_TX server |
|---|
| 379 | * thread, and without blocking itself. |
|---|
| 380 | * TODO : the nic_channel index computation must be done by a driver specific function. |
|---|
| 381 | **************************************************************************************** |
|---|
| 382 | * @ fdid : [in] file descriptor identifying the socket. |
|---|
| 383 | * @ remote_addr : [in] remote IP address. |
|---|
| 384 | * @ remote_port : [in] remote port. |
|---|
| 385 | * @ return 0 if success / return -1 if failure. |
|---|
| 386 | ***************************************************************************************/ |
|---|
| 387 | int dev_nic_connect( uint32_t fdid, |
|---|
| 388 | uint32_t remote_addr, |
|---|
| 389 | uint16_t remote_port ); |
|---|
| 390 | |
|---|
| 391 | /**************************************************************************************** |
|---|
| 392 | * This function implements the accept() syscall(). |
|---|
| 393 | * It is executed by a server process, waiting for one (or several) client process(es) |
|---|
| 394 | * requesting a connection on a socket identified by the <fdid> argument. |
|---|
| 395 | * This socket was previouly created with socket(), bound to a local address with bind(), |
|---|
| 396 | * and is listening for connections after a listen(). |
|---|
| 397 | * This function extracts the first connection request on the CRQQ queue of pending |
|---|
| 398 | * requests, creates a new socket with the same properties as the existing socket, |
|---|
| 399 | * and allocates a new file descriptor for this new socket. |
|---|
| 400 | * If no pending connections are present on the queue, it blocks the caller until a |
|---|
| 401 | * connection is present. |
|---|
| 402 | * The new socket cannot accept more connections, but the original socket remains open. |
|---|
| 403 | * It returns the new socket <fdid>, and register in the <address> an <port> arguments |
|---|
| 404 | * the remote client IP address & port. It applies only to sockets of type SOCK_STREAM. |
|---|
| 405 | **************************************************************************************** |
|---|
| 406 | * @ fdid : [in] file descriptor identifying the listening socket. |
|---|
| 407 | * @ address : [out] server IP address. |
|---|
| 408 | * @ port : [out] server port address length in bytes. |
|---|
| 409 | * @ return the new socket <fdid> if success / return -1 if failure |
|---|
| 410 | ***************************************************************************************/ |
|---|
| 411 | int dev_nic_accept( uint32_t fdid, |
|---|
| 412 | uint32_t * address, |
|---|
| 413 | uint16_t * port ); |
|---|
| 414 | |
|---|
| 415 | /**************************************************************************************** |
|---|
| 416 | * This blocking function implements the send() syscall. |
|---|
| 417 | * It is used to send data stored in the user buffer, identified the <u_buf> and <length> |
|---|
| 418 | * arguments, to a connected (TCP or UDP) socket, identified by the <fdid> argument. |
|---|
| 419 | * The work is actually done by the NIC_TX server thread, and the synchronisation |
|---|
| 420 | * between the client and the server threads uses the "rx_valid" set/reset flip-flop: |
|---|
| 421 | * The client thread registers itself in the socket descriptor, registers in the queue |
|---|
| 422 | * rooted in the NIC_TX[index] chdev, set "rx_valid", unblocks the server thread, and |
|---|
| 423 | * finally blocks on THREAD_BLOCKED_IO, and deschedules. |
|---|
| 424 | * When the TX server thread completes the command (all data has been sent for an UDP |
|---|
| 425 | * socket, or acknowledeged for a TCP socket), the server thread reset "rx_valid" and |
|---|
| 426 | * unblocks the client thread. |
|---|
| 427 | * This function can be called by a thread running in any cluster. |
|---|
| 428 | * WARNING : This implementation does not support several concurent SEND/SENDTO commands |
|---|
| 429 | * on the same socket, as only one TX thread can register in a given socket. |
|---|
| 430 | **************************************************************************************** |
|---|
| 431 | * @ fdid : [in] file descriptor identifying the socket. |
|---|
| 432 | * @ u_buf : [in] pointer on buffer containing packet in user space. |
|---|
| 433 | * @ length : [in] packet size in bytes. |
|---|
| 434 | * @ return number of sent bytes if success / return -1 if failure. |
|---|
| 435 | ***************************************************************************************/ |
|---|
| 436 | int dev_nic_send( uint32_t fdid, |
|---|
| 437 | uint8_t * u_buf, |
|---|
| 438 | uint32_t length ); |
|---|
| 439 | |
|---|
| 440 | /**************************************************************************************** |
|---|
| 441 | * This blocking function implements the sendto() syscall. |
|---|
| 442 | * It registers the <remote_addr> and <remote_port> arguments in the local socket |
|---|
| 443 | * descriptor, and does the same thing as the dev_nic_send() function above, |
|---|
| 444 | * but can be called on an unconnected UDP socket. |
|---|
| 445 | **************************************************************************************** |
|---|
| 446 | * @ fdid : [in] file descriptor identifying the socket. |
|---|
| 447 | * @ u_buf : [in] pointer on buffer containing packet in user space. |
|---|
| 448 | * @ length : [in] packet size in bytes. |
|---|
| 449 | * @ remote_addr : [in] destination IP address. |
|---|
| 450 | * @ remote_port : [in] destination port. |
|---|
| 451 | * @ return number of sent bytes if success / return -1 if failure. |
|---|
| 452 | ***************************************************************************************/ |
|---|
| 453 | int dev_nic_sendto( uint32_t fdid, |
|---|
| 454 | uint8_t * u_buf, |
|---|
| 455 | uint32_t length, |
|---|
| 456 | uint32_t remote_addr, |
|---|
| 457 | uint32_t remote_port ); |
|---|
| 458 | |
|---|
| 459 | /**************************************************************************************** |
|---|
| 460 | * This blocking function implements the recv() syscall. |
|---|
| 461 | * It is used to receive data that has been stored by the NIC_RX server thread in the |
|---|
| 462 | * rx_buf of a connected (TCP or UDP) socket, identified by the <fdid> argument. |
|---|
| 463 | * The synchronisation between the client and the server threads uses the "rx_valid" |
|---|
| 464 | * set/reset flip-flop: If "rx_valid" is set, the client simply moves the available |
|---|
| 465 | * data from the "rx_buf" to the user buffer identified by the <u_buf> and <length> |
|---|
| 466 | * arguments, and reset the "rx_valid" flip_flop. If "rx_valid" is not set, the client |
|---|
| 467 | * thread register itself in the socket descriptor, registers in the clients queue rooted |
|---|
| 468 | * in the NIC_RX[index] chdev, and finally blocks on THREAD_BLOCKED_IO, and deschedules. |
|---|
| 469 | * The client thread is re-activated by the RX server, that set the "rx_valid" flip-flop |
|---|
| 470 | * as soon as data is available in the "rcv_buf" (can be less than the user buffer size). |
|---|
| 471 | * This function can be called by a thread running in any cluster. |
|---|
| 472 | * WARNING : This implementation does not support several concurent RECV/RECVFROM |
|---|
| 473 | * commands on the same socket, as only one RX thread can register in a given socket. |
|---|
| 474 | **************************************************************************************** |
|---|
| 475 | * @ fdid : [in] file descriptor identifying the socket. |
|---|
| 476 | * @ u_buf : [in] pointer on buffer in user space. |
|---|
| 477 | * @ length : [in] buffer size in bytes. |
|---|
| 478 | * @ return number of received bytes if success / return -1 if failure. |
|---|
| 479 | ***************************************************************************************/ |
|---|
| 480 | int dev_nic_recv( uint32_t fdid, |
|---|
| 481 | uint8_t * u_buf, |
|---|
| 482 | uint32_t length ); |
|---|
| 483 | |
|---|
| 484 | /**************************************************************************************** |
|---|
| 485 | * This blocking function implements the recvfrom() syscall. |
|---|
| 486 | * It registers the <remote_addr> and <remote_port> arguments in the local socket |
|---|
| 487 | * descriptor, and does the same thing as the dev_nic_recv() function above, |
|---|
| 488 | * but can be called on an unconnected UDP socket. |
|---|
| 489 | **************************************************************************************** |
|---|
| 490 | * @ fdid : [in] file descriptor identifying the socket. |
|---|
| 491 | * @ u_buf : [in] pointer on buffer containing packet in user space. |
|---|
| 492 | * @ length : [in] packet size in bytes. |
|---|
| 493 | * @ remote_addr : [in] destination IP address. |
|---|
| 494 | * @ remote_port : [in] destination port. |
|---|
| 495 | * @ return number of received bytes if success / return -1 if failure. |
|---|
| 496 | ***************************************************************************************/ |
|---|
| 497 | int dev_nic_recvfrom( uint32_t fdid, |
|---|
| 498 | uint8_t * u_buf, |
|---|
| 499 | uint32_t length, |
|---|
| 500 | uint32_t remote_addr, |
|---|
| 501 | uint32_t remote_port ); |
|---|
| 502 | |
|---|
| 503 | |
|---|
| 504 | /* Instrumentation functions */ |
|---|
| 505 | |
|---|
| 506 | |
|---|
| 507 | /****************************************************************************************** |
|---|
| 508 | * This instrumentation function displays on the TXT0 kernel terminal the content |
|---|
| 509 | * of the instrumentation registers contained in the NIC device. |
|---|
| 510 | *****************************************************************************************/ |
|---|
| 511 | void dev_nic_print_stats( void ); |
|---|
| 512 | |
|---|
| 513 | /****************************************************************************************** |
|---|
| 514 | * This instrumentation function reset all instrumentation registers contained |
|---|
| 515 | * in the NIC device. |
|---|
| 516 | *****************************************************************************************/ |
|---|
| 517 | void dev_nic_clear_stats( void ); |
|---|
| 518 | |
|---|
| 519 | |
|---|
| 520 | /* Functions executed by the TX and RX server threads */ |
|---|
| 521 | |
|---|
| 522 | /****************************************************************************************** |
|---|
| 523 | * This function is executed by the server thread associated to a NIC_TX[channel] chdev. |
|---|
| 524 | * This TX server thread is created by the dev_nic_init() function. |
|---|
| 525 | * It build and send UDP packets or TCP segments for all clients threads registered in |
|---|
| 526 | * the NIC_TX[channel] chdev. The command types are (CONNECT / SEND / CLOSE), and the |
|---|
| 527 | * priority between clients is round-robin. It takes into account the request registered |
|---|
| 528 | * by the RX server thread in the R2T queue associated to the involved socket. |
|---|
| 529 | * When a command is completed, it unblocks the client thread. For a SEND command, the |
|---|
| 530 | * last byte must have been sent for an UDP socket, and it must have been acknowledged |
|---|
| 531 | * for a TCP socket. |
|---|
| 532 | * When the TX client threads queue is empty, it blocks on THREAD_BLOCKED_CLIENT |
|---|
| 533 | * condition and deschedules. It is re-activated by a client thread registering a command. |
|---|
| 534 | ****************************************************************************************** |
|---|
| 535 | * Implementation note: |
|---|
| 536 | * It execute an infinite loop in which it takes the lock protecting the clients list |
|---|
| 537 | * to build a "kleenex" list of currently registered clients. |
|---|
| 538 | * For each client registered in this "kleenex" list, it takes the lock protecting the |
|---|
| 539 | * socket state, build one packet/segment in a local 2K bytes kernel buffer, calls the |
|---|
| 540 | * transport layer to add the UDP/TCP header, calls the IP layer to add the IP header, |
|---|
| 541 | * calls the ETH layer to add the ETH header, and moves the packet to the NIC_TX_QUEUE. |
|---|
| 542 | * Finally, it updates the socket state, and release the socket lock. |
|---|
| 543 | ****************************************************************************************** |
|---|
| 544 | * @ chdev : [in] local pointer on one local NIC_TX[channel] chdev descriptor. |
|---|
| 545 | *****************************************************************************************/ |
|---|
| 546 | void dev_nic_tx_server( struct chdev_s * chdev ); |
|---|
| 547 | |
|---|
| 548 | |
|---|
| 549 | /****************************************************************************************** |
|---|
| 550 | * This function is executed by the server thread associated to a NIC_RX[channel] chdev. |
|---|
| 551 | * This RX server thread is created by the dev_nic_init() function. |
|---|
| 552 | * It handles all UDP packets or TCP segments received by the sockets attached to |
|---|
| 553 | * the NIC_RX[channel] chdev. It writes the received data in the socket rcv_buf, and |
|---|
| 554 | * unblocks the client thread waiting on a RECV command. |
|---|
| 555 | * To implement the three steps handshahke required by a TCP connection, it posts direct |
|---|
| 556 | * requests to the TX server, using the R2T queue attached to the involved socket. |
|---|
| 557 | * It blocks on the THREAD_BLOCKED_ISR condition and deschedules when the NIC_RX_QUEUE |
|---|
| 558 | * is empty. It is re-activated by the NIC_RX_ISR, when the queue becomes non empty. |
|---|
| 559 | ****************************************************************************************** |
|---|
| 560 | * Implementation note: |
|---|
| 561 | * It executes an infinite loop in which it extracts one packet from the NIC_RX_QUEUE |
|---|
| 562 | * of received packets, copies this packet in a local 2 kbytes kernel buffer, checks |
|---|
| 563 | * the Ethernet header, checks the IP header, calls the relevant (TCP or UDP) transport |
|---|
| 564 | * protocol that search a matching socket for the received packet. It copies the payload |
|---|
| 565 | * to the relevant socket rcv_buf when the packet is acceptable, and unblocks the client |
|---|
| 566 | * thread. It discard the packet if no socket found. |
|---|
| 567 | ****************************************************************************************** |
|---|
| 568 | * @ chdev : [in] local pointer on one local NIC_RX[channel] chdev descriptor. |
|---|
| 569 | *****************************************************************************************/ |
|---|
| 570 | void dev_nic_rx_server( struct chdev_s * chdev ); |
|---|
| 571 | |
|---|
| 572 | #endif /* _DEV_NIC_H */ |
|---|