| 1 | /* |
|---|
| 2 | * dev_nic.h - NIC (Network Controler) generic device API definition. |
|---|
| 3 | * |
|---|
| 4 | * Author Alain Greiner (2016,2017,2018,2019,2020) |
|---|
| 5 | * |
|---|
| 6 | * Copyright (c) UPMC Sorbonne Universites |
|---|
| 7 | * |
|---|
| 8 | * This file is part of ALMOS-MKH |
|---|
| 9 | * |
|---|
| 10 | * ALMOS-MKH is free software; you can redistribute it and/or modify it |
|---|
| 11 | * under the terms of the GNU General Public License as published by |
|---|
| 12 | * the Free Software Foundation; version 2.0 of the License. |
|---|
| 13 | * |
|---|
| 14 | * ALMOS-MKH is distributed in the hope that it will be useful, but |
|---|
| 15 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|---|
| 17 | * General Public License for more details. |
|---|
| 18 | * |
|---|
| 19 | * You should have received a copy of the GNU General Public License |
|---|
| 20 | * along with ALMOS-kernel; if not, write to the Free Software Foundation, |
|---|
| 21 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
|---|
| 22 | */ |
|---|
| 23 | |
|---|
| 24 | #ifndef _DEV_NIC_H |
|---|
| 25 | #define _DEV_NIC_H |
|---|
| 26 | |
|---|
| 27 | #include <kernel_config.h> |
|---|
| 28 | #include <hal_kernel_types.h> |
|---|
| 29 | #include <remote_busylock.h> |
|---|
| 30 | #include <remote_buf.h> |
|---|
| 31 | #include <xlist.h> |
|---|
| 32 | |
|---|
| 33 | /**** Forward declarations ****/ |
|---|
| 34 | |
|---|
| 35 | struct chdev_s; |
|---|
| 36 | |
|---|
| 37 | /***************************************************************************************** |
|---|
| 38 | * Generic Network Interface Controler definition |
|---|
| 39 | * |
|---|
| 40 | * This device provides access to a generic Gigabit Ethernet network controler. |
|---|
| 41 | * It assumes that the NIC hardware peripheral handles two packets queues for sent (TX) |
|---|
| 42 | * and received (RX) packets. |
|---|
| 43 | * |
|---|
| 44 | * The supported protocols stack is : Ethernet / IPV4 / TCP or UDP |
|---|
| 45 | * |
|---|
| 46 | * 1) hardware assumptions |
|---|
| 47 | * |
|---|
| 48 | * The NIC device is handling two (infinite) streams of packets to or from the network. |
|---|
| 49 | * It is the driver responsibility to move the RX packets from the NIC to the RX queue, |
|---|
| 50 | * and the TX packets from the TX queue to the NIC. |
|---|
| 51 | * |
|---|
| 52 | * AS the RX and TX queues are independant, there is one NIC_RX device descriptor |
|---|
| 53 | * to handle RX packets, and another NIC_TX device descriptor to handle TX packets. |
|---|
| 54 | * |
|---|
| 55 | * In order to improve throughput, the NIC controller can implement multiple (N) channels. |
|---|
| 56 | * In this case, the channel index is defined by an hash function computed from the remote |
|---|
| 57 | * IP address and port. This index is computed by the hardware for an RX packet, and is |
|---|
| 58 | * computed by the kernel for a TX packet, using a specific driver function. |
|---|
| 59 | * The 2*N chdevs, and the associated server threads implementing the protocols stack, |
|---|
| 60 | * are distributed in 2*N different clusters. |
|---|
| 61 | * |
|---|
| 62 | * 2) User API |
|---|
| 63 | * |
|---|
| 64 | * On the user side, ALMOS-MKH implements the POSIX socket API. |
|---|
| 65 | * The following kernel functions implement the socket related syscalls : |
|---|
| 66 | * - socket_build() : create a local socket registered in process fd_array[]. |
|---|
| 67 | * - socket_bind() : attach a local IP address and port to a local socket. |
|---|
| 68 | * - socket_listen() : local server makes a passive open. |
|---|
| 69 | * - socket_connect() : local client makes an active open to a remote server. |
|---|
| 70 | * - socket_accept() : local server accept a new remote client. |
|---|
| 71 | * - socket_send() : send data on a connected socket. |
|---|
| 72 | * - socket_recv() : receive data on a connected socket. |
|---|
| 73 | * - socket_sendto() : send a packet to a remote (IP address/port). |
|---|
| 74 | * - socket_recvfrom() : receive a paket from a remote (IP address/port). |
|---|
| 75 | * |
|---|
| 76 | * 3) NIC TX and NIC_RX server threads |
|---|
| 77 | * |
|---|
| 78 | * The dev_nic_tx_server() & dev_nic_rx_server() functions defined below execute |
|---|
| 79 | * the user commands stored in the sockets to implement the [ETH / IP / TCP or UDP] |
|---|
| 80 | * protocols stack, as defined in the <ksocket.c> and <ksocket.h> files. |
|---|
| 81 | * |
|---|
| 82 | * 4) NIC driver API |
|---|
| 83 | * |
|---|
| 84 | * The generic NIC device "driver" API defines the following commands, used by the |
|---|
| 85 | * NIC_TX and NIC_RX server threads, running in the cluster containing the relevant chdev, |
|---|
| 86 | * to access the NIC_TX and NIC_RX packets queues: |
|---|
| 87 | * |
|---|
| 88 | * - READ : consume one packet from the NIC_RX queue. |
|---|
| 89 | * - WRITE : produce one packet to the NIC_TX queue. |
|---|
| 90 | * |
|---|
| 91 | * All RX or TX paquets are sent or received in standard 2 Kbytes kernel buffers, |
|---|
| 92 | * that are dynamically allocated by the protocols stack. |
|---|
| 93 | * The actual TX an RX queues structures depends on the hardware NIC implementation, |
|---|
| 94 | * and are defined in the HAL specific driver code. |
|---|
| 95 | * |
|---|
| 96 | * Moreover, the generic NIC device "driver" API defines the following commands, |
|---|
| 97 | * used directly by a client thread running in any cluster, to access the NIC device |
|---|
| 98 | * configuration or status registers: |
|---|
| 99 | * |
|---|
| 100 | * - GET_KEY : get channel index from remote IP address and port |
|---|
| 101 | * - SET_RUN : activate/desactivate one channel |
|---|
| 102 | * - GET_INSTRU : get one instrumentation counter value |
|---|
| 103 | * - CLEAR_INSTRU : reset all instrumentation counters |
|---|
| 104 | * |
|---|
| 105 | * WARNING: the WTI mailboxes used by the driver ro receive events from the hardware |
|---|
| 106 | * (available RX packet, or available free TX slot, for a given channel), must be |
|---|
| 107 | * statically allocated during the kernel initialisation phase, and must be |
|---|
| 108 | * routed to the cluster containing the associated TX/RX chdev and server thread. |
|---|
| 109 | * |
|---|
| 110 | *****************************************************************************************/ |
|---|
| 111 | |
|---|
| 112 | /**** Forward declarations ****/ |
|---|
| 113 | |
|---|
| 114 | struct chdev_s; |
|---|
| 115 | |
|---|
| 116 | /***************************************************************************************** |
|---|
| 117 | * Various constants used by the protocols stack |
|---|
| 118 | ****************************************************************************************/ |
|---|
| 119 | |
|---|
| 120 | #define SRC_MAC_5 0x66 // This is a temporary short-cut for debug |
|---|
| 121 | #define SRC_MAC_4 0x55 |
|---|
| 122 | #define SRC_MAC_3 0x44 |
|---|
| 123 | #define SRC_MAC_2 0x33 |
|---|
| 124 | #define SRC_MAC_1 0x22 |
|---|
| 125 | #define SRC_MAC_0 0x11 |
|---|
| 126 | |
|---|
| 127 | #define DST_MAC_5 0x66 // This is a temporary short-cut for debug |
|---|
| 128 | #define DST_MAC_4 0x55 |
|---|
| 129 | #define DST_MAC_3 0x44 |
|---|
| 130 | #define DST_MAC_2 0x33 |
|---|
| 131 | #define DST_MAC_1 0x22 |
|---|
| 132 | #define DST_MAC_0 0x11 |
|---|
| 133 | |
|---|
| 134 | #define TCP_HEAD_LEN 20 |
|---|
| 135 | #define UDP_HEAD_LEN 8 |
|---|
| 136 | #define IP_HEAD_LEN 20 |
|---|
| 137 | #define ETH_HEAD_LEN 14 |
|---|
| 138 | |
|---|
| 139 | #define PROTOCOL_UDP 0x11 |
|---|
| 140 | #define PROTOCOL_TCP 0x06 |
|---|
| 141 | |
|---|
| 142 | #define TCP_ISS_CLIENT 0x10000 // initial sequence number for TCP client |
|---|
| 143 | #define TCP_ISS_SERVER 0x20000 // initial sequence number for TCP server |
|---|
| 144 | #define TCP_MAX_WINDOW 0xFFFFF // initial TCP send window |
|---|
| 145 | |
|---|
| 146 | #define PAYLOAD_MAX_LEN 1500 // max length for an UDP packet / TCP segment |
|---|
| 147 | |
|---|
| 148 | #define TCP_FLAG_FIN 0x01 |
|---|
| 149 | #define TCP_FLAG_SYN 0x02 |
|---|
| 150 | #define TCP_FLAG_RST 0x04 |
|---|
| 151 | #define TCP_FLAG_PSH 0x08 |
|---|
| 152 | #define TCP_FLAG_ACK 0x10 |
|---|
| 153 | #define TCP_FLAG_URG 0x20 |
|---|
| 154 | |
|---|
| 155 | #define NIC_RX_BUF_SIZE 0x100000 // 1 Mbytes |
|---|
| 156 | #define NIC_R2T_QUEUE_SIZE 0x64 // smallest KCM size |
|---|
| 157 | #define NIC_CRQ_QUEUE_SIZE 0x8 // actual size is 8 * sizeof(sockaddr_t) |
|---|
| 158 | #define NIC_KERNEL_BUF_SIZE 0x800 // 2 Kbytes for one ETH/IP/TCP packet |
|---|
| 159 | |
|---|
| 160 | /***************************************************************************************** |
|---|
| 161 | * This structure defines the specific chdev extension for NIC device: |
|---|
| 162 | * - queue : local pointer on the memory mapped queue of TX or RX packets, used |
|---|
| 163 | * by the NIC driver to move packets to/from the NIC hardware. The actual descriptor |
|---|
| 164 | * depends on the NIC implementation. |
|---|
| 165 | * - root : root of an xlist of sockets that are in the LISTEN state, waiting one or |
|---|
| 166 | * several TCP connection requests from remote processes. It is only used by the |
|---|
| 167 | * NIC_RX server thread attached to a NIC_RX chdev. |
|---|
| 168 | * - lock : lock protecting concurrent access to the litening sockets list. |
|---|
| 169 | ****************************************************************************************/ |
|---|
| 170 | |
|---|
| 171 | typedef struct nic_extend_s |
|---|
| 172 | { |
|---|
| 173 | void * queue; /*! pointer on NIC packets queue descriptor (RX or TX) */ |
|---|
| 174 | xlist_entry_t root; /*! root of listening sockets list (only used in RX[0]) */ |
|---|
| 175 | remote_busylock_t lock; /*! lock protecting this list (only used in RX[0] */ |
|---|
| 176 | } |
|---|
| 177 | nic_extend_t; |
|---|
| 178 | |
|---|
| 179 | /***************************************************************************************** |
|---|
| 180 | * This enum defines the various implementations of the generic NIC peripheral. |
|---|
| 181 | * This array must be kept consistent with the define in the arch_info.h file. |
|---|
| 182 | ****************************************************************************************/ |
|---|
| 183 | |
|---|
| 184 | typedef enum nic_impl_e |
|---|
| 185 | { |
|---|
| 186 | IMPL_NIC_CBF = 0, |
|---|
| 187 | IMPL_NIC_I86 = 1, |
|---|
| 188 | } |
|---|
| 189 | nic_impl_t; |
|---|
| 190 | |
|---|
| 191 | /**************************************************************************************** |
|---|
| 192 | * This defines the (implementation independant) commands to access the NIC hardware. |
|---|
| 193 | * There is two types of commands |
|---|
| 194 | * - The first 2 commands are used by the NIC_TX and NIC_RX server threads, and stored |
|---|
| 195 | * in the server thread descriptor, to access the NIC_RX & NIC_TX packet queues. |
|---|
| 196 | * The buffer is always a 2K bytes kernel buffer, containing an Ethernet packet. |
|---|
| 197 | * - The next 4 synchronous commands are used by the client th, and stored in the |
|---|
| 198 | * client thread descriptor, to directly access the NIC registers. |
|---|
| 199 | ****************************************************************************************/ |
|---|
| 200 | |
|---|
| 201 | typedef enum nic_cmd_e |
|---|
| 202 | { |
|---|
| 203 | NIC_CMD_WRITE = 10, /*! put one (given length) packet to TX queue */ |
|---|
| 204 | NIC_CMD_READ = 11, /*! get one (any length) packet from RX queue */ |
|---|
| 205 | |
|---|
| 206 | NIC_CMD_GET_KEY = 20, /*! return channel index from IP address and port */ |
|---|
| 207 | NIC_CMD_SET_RUN = 21, /*! enable/disable one NIC channel */ |
|---|
| 208 | NIC_CMD_GET_INSTRU = 22, /*! return one intrumentation register value */ |
|---|
| 209 | NIC_CMD_CLEAR_INSTRU = 23, /*! reset all instrumentation registers */ |
|---|
| 210 | } |
|---|
| 211 | nic_cmd_t; |
|---|
| 212 | |
|---|
| 213 | typedef struct nic_command_s |
|---|
| 214 | { |
|---|
| 215 | xptr_t dev_xp; /*! extended pointer on NIC chdev descriptor */ |
|---|
| 216 | nic_cmd_t type; /*! command type */ |
|---|
| 217 | uint8_t * buffer; /*! local pointer on kernel buffer */ |
|---|
| 218 | uint32_t length; /*! number of bytes in buffer */ |
|---|
| 219 | uint32_t status; /*! return value (depends on command type) */ |
|---|
| 220 | uint32_t error; /*! return an error from the hardware (0 if no error) */ |
|---|
| 221 | } |
|---|
| 222 | nic_command_t; |
|---|
| 223 | |
|---|
| 224 | /****************************************************************************************** |
|---|
| 225 | * This function completes the NIC-RX and NIC-TX chdev descriptors initialisation. |
|---|
| 226 | * namely the link with the implementation specific driver. |
|---|
| 227 | * The func, impl, channel, is_rx, base fields have been previously initialised. |
|---|
| 228 | * It calls the specific driver initialisation function, to initialise the hardware |
|---|
| 229 | * device and the specific data structures when required. |
|---|
| 230 | * It creates the associated server thread and allocates a WTI from local ICU. |
|---|
| 231 | * For a TX_NIC chedv, it allocates and initializes the R2T queue used by the |
|---|
| 232 | * NIC_RX[channel] server to send direct requests to the NIC_TX[channel] server, |
|---|
| 233 | * and the CRQ queue used to register connection requests. |
|---|
| 234 | * It must de executed by a local thread. |
|---|
| 235 | * For NIC_TX and NIC_RX chdevs, the "wait_root" field is actually a list of sockets. |
|---|
| 236 | ****************************************************************************************** |
|---|
| 237 | * @ chdev : local pointer on NIC chdev descriptor. |
|---|
| 238 | *****************************************************************************************/ |
|---|
| 239 | void dev_nic_init( struct chdev_s * chdev ); |
|---|
| 240 | |
|---|
| 241 | /* Functions directly called by a client thread in any cluster */ |
|---|
| 242 | |
|---|
| 243 | /****************************************************************************************** |
|---|
| 244 | * This function compute a channel index in range [0,nic_channels[ from the remote IP |
|---|
| 245 | * address <addr> and <port>, by calling the relevant driver command. |
|---|
| 246 | ****************************************************************************************** |
|---|
| 247 | * @ addr : [in] IP address. |
|---|
| 248 | * @ port : [in] TCP/UDP port. |
|---|
| 249 | * @ return the selected channel index |
|---|
| 250 | *****************************************************************************************/ |
|---|
| 251 | uint32_t dev_nic_get_key( uint32_t addr, |
|---|
| 252 | uint16_t port ); |
|---|
| 253 | |
|---|
| 254 | /****************************************************************************************** |
|---|
| 255 | * This function activate / de-activate a NIC channel DMA engine identified by the |
|---|
| 256 | * <channel> argument, as defined by the <run> argument. |
|---|
| 257 | ****************************************************************************************** |
|---|
| 258 | * @ channel : [in] NIC channel index. |
|---|
| 259 | * @ run : [in] activate if non-zero / desactivate if zero. |
|---|
| 260 | * @ return 0 if success / return -1 if error. |
|---|
| 261 | *****************************************************************************************/ |
|---|
| 262 | error_t dev_nic_set_run( uint32_t channel, |
|---|
| 263 | uint32_t run ); |
|---|
| 264 | |
|---|
| 265 | /****************************************************************************************** |
|---|
| 266 | * This instrumentation function displays on the TXT0 kernel terminal the content |
|---|
| 267 | * of the instrumentation registers contained in the NIC device. |
|---|
| 268 | ****************************************************************************************** |
|---|
| 269 | * @ return 0 if success / return -1 if error. |
|---|
| 270 | *****************************************************************************************/ |
|---|
| 271 | error_t dev_nic_get_instru( void ); |
|---|
| 272 | |
|---|
| 273 | /****************************************************************************************** |
|---|
| 274 | * This instrumentation function reset all instrumentation registers contained |
|---|
| 275 | * in the NIC device. |
|---|
| 276 | ****************************************************************************************** |
|---|
| 277 | * @ return 0 if success / return -1 if error. |
|---|
| 278 | *****************************************************************************************/ |
|---|
| 279 | error_t dev_nic_clear_instru( void ); |
|---|
| 280 | |
|---|
| 281 | /* Functions executed by the TX and RX server threads */ |
|---|
| 282 | |
|---|
| 283 | /****************************************************************************************** |
|---|
| 284 | * This function is executed by the server thread associated to a NIC_TX[channel] chdev. |
|---|
| 285 | * This TX server thread is created by the dev_nic_init() function. |
|---|
| 286 | * It build and send UDP packets or TCP segments for all clients threads registered in |
|---|
| 287 | * the NIC_TX[channel] chdev. The command types are (CONNECT / SEND / CLOSE), and the |
|---|
| 288 | * priority between clients is round-robin. It takes into account the request registered |
|---|
| 289 | * by the RX server thread in the R2T queue associated to the involved socket. |
|---|
| 290 | * When a command is completed, it unblocks the client thread. For a SEND command, the |
|---|
| 291 | * last byte must have been sent for an UDP socket, and it must have been acknowledged |
|---|
| 292 | * for a TCP socket. |
|---|
| 293 | * When the TX client threads queue is empty, it blocks on THREAD_BLOCKED_CLIENT |
|---|
| 294 | * condition and deschedules. It is re-activated by a client thread registering a command. |
|---|
| 295 | * When the NIC_TX packet queue is full, it blocks on the THREAD_BLOCKED_ISR condition |
|---|
| 296 | * and deschedules. It is reactivated by the NIC_TX DMA engine. |
|---|
| 297 | ****************************************************************************************** |
|---|
| 298 | * Implementation note: |
|---|
| 299 | * It execute an infinite loop in which it takes the lock protecting the clients list |
|---|
| 300 | * to build a "kleenex" list of currently registered clients. |
|---|
| 301 | * For each client registered in this "kleenex" list, it takes the lock protecting the |
|---|
| 302 | * socket state, build one packet/segment in a local 2K bytes kernel buffer, calls the |
|---|
| 303 | * transport layer to add the UDP/TCP header, calls the IP layer to add the IP header, |
|---|
| 304 | * calls the ETH layer to add the ETH header, and moves the packet to the NIC_TX_QUEUE. |
|---|
| 305 | * Finally, it updates the socket state, and release the socket lock. |
|---|
| 306 | ****************************************************************************************** |
|---|
| 307 | * @ chdev : [in] local pointer on one local NIC_TX[channel] chdev descriptor. |
|---|
| 308 | *****************************************************************************************/ |
|---|
| 309 | void dev_nic_tx_server( struct chdev_s * chdev ); |
|---|
| 310 | |
|---|
| 311 | |
|---|
| 312 | /****************************************************************************************** |
|---|
| 313 | * This function is executed by the server thread associated to a NIC_RX[channel] chdev. |
|---|
| 314 | * This RX server thread is created by the dev_nic_init() function. |
|---|
| 315 | * It handles all UDP packets or TCP segments received by the sockets attached to |
|---|
| 316 | * the NIC_RX[channel] chdev. It writes the received data in the socket rcv_buf, and |
|---|
| 317 | * unblocks the client thread waiting on a RECV command. |
|---|
| 318 | * To implement the three steps handshahke required by a TCP connection, it posts direct |
|---|
| 319 | * requests to the TX server, using the R2T queue attached to the involved socket. |
|---|
| 320 | * It blocks on the THREAD_BLOCKED_ISR condition and deschedules when the NIC_RX_QUEUE |
|---|
| 321 | * is empty, and is re-activated by the NIC_RX_ISR, when the queue becomes non empty. |
|---|
| 322 | ****************************************************************************************** |
|---|
| 323 | * Implementation note: |
|---|
| 324 | * It executes an infinite loop in which it extracts one packet from the NIC_RX_QUEUE |
|---|
| 325 | * of received packets, copies this packet in a local 2 kbytes kernel buffer, checks |
|---|
| 326 | * the Ethernet header, checks the IP header, calls the relevant (TCP or UDP) transport |
|---|
| 327 | * protocol that search a matching socket for the received packet. It copies the payload |
|---|
| 328 | * to the relevant socket rcv_buf when the packet is acceptable, and unblocks the client |
|---|
| 329 | * thread. It discard the packet if no socket found. |
|---|
| 330 | ****************************************************************************************** |
|---|
| 331 | * @ chdev : [in] local pointer on one local NIC_RX[channel] chdev descriptor. |
|---|
| 332 | *****************************************************************************************/ |
|---|
| 333 | void dev_nic_rx_server( struct chdev_s * chdev ); |
|---|
| 334 | |
|---|
| 335 | /****************************************************************************************** |
|---|
| 336 | * This function displays all the fields of an ETH/IP/TCP segment or ETH/IP/UDP packet. |
|---|
| 337 | ****************************************************************************************** |
|---|
| 338 | * @ is_tx : [in] sent packet if true / received packet if false. |
|---|
| 339 | * @ pid : [in] process identifier. |
|---|
| 340 | * @ trdid : [in] thread identifier. |
|---|
| 341 | * @ cycle : [in] date (number of cycles). |
|---|
| 342 | * @ buf : [in] local pointer on kernel buffer containing the packet. |
|---|
| 343 | *****************************************************************************************/ |
|---|
| 344 | void dev_nic_packet_display( bool_t is_tx, |
|---|
| 345 | pid_t pid, |
|---|
| 346 | trdid_t trdid, |
|---|
| 347 | uint32_t cycle, |
|---|
| 348 | uint8_t * buf ); |
|---|
| 349 | |
|---|
| 350 | #endif /* _DEV_NIC_H */ |
|---|