Fix typo.
[gpxe.git] / src / net / ipv4.c
blob2f50f0e4e90a8d67c54ab98db638764772f38cf3
1 #include <string.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <byteswap.h>
7 #include <gpxe/list.h>
8 #include <gpxe/in.h>
9 #include <gpxe/arp.h>
10 #include <gpxe/if_ether.h>
11 #include <gpxe/iobuf.h>
12 #include <gpxe/netdevice.h>
13 #include <gpxe/ip.h>
14 #include <gpxe/tcpip.h>
16 /** @file
18 * IPv4 protocol
22 /* Unique IP datagram identification number */
23 static uint16_t next_ident = 0;
25 struct net_protocol ipv4_protocol;
27 /** List of IPv4 miniroutes */
28 struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
30 /** List of fragment reassembly buffers */
31 static LIST_HEAD ( frag_buffers );
33 /**
34 * Add IPv4 minirouting table entry
36 * @v netdev Network device
37 * @v address IPv4 address
38 * @v netmask Subnet mask
39 * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
40 * @ret miniroute Routing table entry, or NULL
42 static struct ipv4_miniroute * __malloc
43 add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address,
44 struct in_addr netmask, struct in_addr gateway ) {
45 struct ipv4_miniroute *miniroute;
47 DBG ( "IPv4 add %s", inet_ntoa ( address ) );
48 DBG ( "/%s ", inet_ntoa ( netmask ) );
49 if ( gateway.s_addr != INADDR_NONE )
50 DBG ( "gw %s ", inet_ntoa ( gateway ) );
51 DBG ( "via %s\n", netdev->name );
53 /* Allocate and populate miniroute structure */
54 miniroute = malloc ( sizeof ( *miniroute ) );
55 if ( ! miniroute ) {
56 DBG ( "IPv4 could not add miniroute\n" );
57 return NULL;
60 /* Record routing information */
61 miniroute->netdev = netdev_get ( netdev );
62 miniroute->address = address;
63 miniroute->netmask = netmask;
64 miniroute->gateway = gateway;
66 /* Add to end of list if we have a gateway, otherwise
67 * to start of list.
69 if ( gateway.s_addr != INADDR_NONE ) {
70 list_add_tail ( &miniroute->list, &ipv4_miniroutes );
71 } else {
72 list_add ( &miniroute->list, &ipv4_miniroutes );
75 return miniroute;
78 /**
79 * Delete IPv4 minirouting table entry
81 * @v miniroute Routing table entry
83 static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
85 DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
86 DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
87 if ( miniroute->gateway.s_addr != INADDR_NONE )
88 DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
89 DBG ( "via %s\n", miniroute->netdev->name );
91 netdev_put ( miniroute->netdev );
92 list_del ( &miniroute->list );
93 free ( miniroute );
96 /**
97 * Add IPv4 interface
99 * @v netdev Network device
100 * @v address IPv4 address
101 * @v netmask Subnet mask
102 * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
103 * @ret rc Return status code
106 int add_ipv4_address ( struct net_device *netdev, struct in_addr address,
107 struct in_addr netmask, struct in_addr gateway ) {
108 struct ipv4_miniroute *miniroute;
110 /* Clear any existing address for this net device */
111 del_ipv4_address ( netdev );
113 /* Add new miniroute */
114 miniroute = add_ipv4_miniroute ( netdev, address, netmask, gateway );
115 if ( ! miniroute )
116 return -ENOMEM;
118 return 0;
122 * Remove IPv4 interface
124 * @v netdev Network device
126 void del_ipv4_address ( struct net_device *netdev ) {
127 struct ipv4_miniroute *miniroute;
129 list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
130 if ( miniroute->netdev == netdev ) {
131 del_ipv4_miniroute ( miniroute );
132 break;
138 * Perform IPv4 routing
140 * @v dest Final destination address
141 * @ret dest Next hop destination address
142 * @ret miniroute Routing table entry to use, or NULL if no route
144 * If the route requires use of a gateway, the next hop destination
145 * address will be overwritten with the gateway address.
147 static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
148 struct ipv4_miniroute *miniroute;
149 int local;
150 int has_gw;
152 /* Never attempt to route the broadcast address */
153 if ( dest->s_addr == INADDR_BROADCAST )
154 return NULL;
156 /* Find first usable route in routing table */
157 list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
158 local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
159 & miniroute->netmask.s_addr ) == 0 );
160 has_gw = ( miniroute->gateway.s_addr != INADDR_NONE );
161 if ( local || has_gw ) {
162 if ( ! local )
163 *dest = miniroute->gateway;
164 return miniroute;
168 return NULL;
172 * Fragment reassembly counter timeout
174 * @v timer Retry timer
175 * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
177 static void ipv4_frag_expired ( struct retry_timer *timer __unused,
178 int over ) {
179 if ( over ) {
180 DBG ( "Fragment reassembly timeout" );
181 /* Free the fragment buffer */
186 * Free fragment buffer
188 * @v fragbug Fragment buffer
190 static void free_fragbuf ( struct frag_buffer *fragbuf ) {
191 free ( fragbuf );
195 * Fragment reassembler
197 * @v iobuf I/O buffer, fragment of the datagram
198 * @ret frag_iob Reassembled packet, or NULL
200 static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) {
201 struct iphdr *iphdr = iobuf->data;
202 struct frag_buffer *fragbuf;
205 * Check if the fragment belongs to any fragment series
207 list_for_each_entry ( fragbuf, &frag_buffers, list ) {
208 if ( fragbuf->ident == iphdr->ident &&
209 fragbuf->src.s_addr == iphdr->src.s_addr ) {
211 * Check if the packet is the expected fragment
213 * The offset of the new packet must be equal to the
214 * length of the data accumulated so far (the length of
215 * the reassembled I/O buffer
217 if ( iob_len ( fragbuf->frag_iob ) ==
218 ( iphdr->frags & IP_MASK_OFFSET ) ) {
220 * Append the contents of the fragment to the
221 * reassembled I/O buffer
223 iob_pull ( iobuf, sizeof ( *iphdr ) );
224 memcpy ( iob_put ( fragbuf->frag_iob,
225 iob_len ( iobuf ) ),
226 iobuf->data, iob_len ( iobuf ) );
227 free_iob ( iobuf );
229 /** Check if the fragment series is over */
230 if ( !iphdr->frags & IP_MASK_MOREFRAGS ) {
231 iobuf = fragbuf->frag_iob;
232 free_fragbuf ( fragbuf );
233 return iobuf;
236 } else {
237 /* Discard the fragment series */
238 free_fragbuf ( fragbuf );
239 free_iob ( iobuf );
241 return NULL;
245 /** Check if the fragment is the first in the fragment series */
246 if ( iphdr->frags & IP_MASK_MOREFRAGS &&
247 ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
249 /** Create a new fragment buffer */
250 fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
251 fragbuf->ident = iphdr->ident;
252 fragbuf->src = iphdr->src;
254 /* Set up the reassembly I/O buffer */
255 fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE );
256 iob_pull ( iobuf, sizeof ( *iphdr ) );
257 memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ),
258 iobuf->data, iob_len ( iobuf ) );
259 free_iob ( iobuf );
261 /* Set the reassembly timer */
262 fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
263 fragbuf->frag_timer.expired = ipv4_frag_expired;
264 start_timer ( &fragbuf->frag_timer );
266 /* Add the fragment buffer to the list of fragment buffers */
267 list_add ( &fragbuf->list, &frag_buffers );
270 return NULL;
274 * Add IPv4 pseudo-header checksum to existing checksum
276 * @v iobuf I/O buffer
277 * @v csum Existing checksum
278 * @ret csum Updated checksum
280 static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) {
281 struct ipv4_pseudo_header pshdr;
282 struct iphdr *iphdr = iobuf->data;
283 size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
285 /* Build pseudo-header */
286 pshdr.src = iphdr->src;
287 pshdr.dest = iphdr->dest;
288 pshdr.zero_padding = 0x00;
289 pshdr.protocol = iphdr->protocol;
290 pshdr.len = htons ( iob_len ( iobuf ) - hdrlen );
292 /* Update the checksum value */
293 return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
297 * Determine link-layer address
299 * @v dest IPv4 destination address
300 * @v src IPv4 source address
301 * @v netdev Network device
302 * @v ll_dest Link-layer destination address buffer
303 * @ret rc Return status code
305 static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
306 struct net_device *netdev, uint8_t *ll_dest ) {
307 struct ll_protocol *ll_protocol = netdev->ll_protocol;
308 uint8_t *dest_bytes = ( ( uint8_t * ) &dest );
310 if ( dest.s_addr == INADDR_BROADCAST ) {
311 /* Broadcast address */
312 memcpy ( ll_dest, ll_protocol->ll_broadcast,
313 ll_protocol->ll_addr_len );
314 return 0;
315 } else if ( IN_MULTICAST ( dest.s_addr ) ) {
316 /* Special case: IPv4 multicast over Ethernet. This
317 * code may need to be generalised once we find out
318 * what happens for other link layers.
320 ll_dest[0] = 0x01;
321 ll_dest[1] = 0x00;
322 ll_dest[2] = 0x5e;
323 ll_dest[3] = dest_bytes[1] & 0x7f;
324 ll_dest[4] = dest_bytes[2];
325 ll_dest[5] = dest_bytes[3];
326 return 0;
327 } else {
328 /* Unicast address: resolve via ARP */
329 return arp_resolve ( netdev, &ipv4_protocol, &dest,
330 &src, ll_dest );
335 * Transmit IP packet
337 * @v iobuf I/O buffer
338 * @v tcpip Transport-layer protocol
339 * @v st_dest Destination network-layer address
340 * @v netdev Network device to use if no route found, or NULL
341 * @v trans_csum Transport-layer checksum to complete, or NULL
342 * @ret rc Status
344 * This function expects a transport-layer segment and prepends the IP header
346 static int ipv4_tx ( struct io_buffer *iobuf,
347 struct tcpip_protocol *tcpip_protocol,
348 struct sockaddr_tcpip *st_dest,
349 struct net_device *netdev,
350 uint16_t *trans_csum ) {
351 struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
352 struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
353 struct ipv4_miniroute *miniroute;
354 struct in_addr next_hop;
355 uint8_t ll_dest[MAX_LL_ADDR_LEN];
356 int rc;
358 /* Fill up the IP header, except source address */
359 memset ( iphdr, 0, sizeof ( *iphdr ) );
360 iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
361 iphdr->service = IP_TOS;
362 iphdr->len = htons ( iob_len ( iobuf ) );
363 iphdr->ident = htons ( ++next_ident );
364 iphdr->ttl = IP_TTL;
365 iphdr->protocol = tcpip_protocol->tcpip_proto;
366 iphdr->dest = sin_dest->sin_addr;
368 /* Use routing table to identify next hop and transmitting netdev */
369 next_hop = iphdr->dest;
370 if ( ( miniroute = ipv4_route ( &next_hop ) ) ) {
371 iphdr->src = miniroute->address;
372 netdev = miniroute->netdev;
374 if ( ! netdev ) {
375 DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
376 rc = -ENETUNREACH;
377 goto err;
380 /* Determine link-layer destination address */
381 if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev,
382 ll_dest ) ) != 0 ) {
383 DBG ( "IPv4 has no link-layer address for %s: %s\n",
384 inet_ntoa ( next_hop ), strerror ( rc ) );
385 goto err;
388 /* Fix up checksums */
389 if ( trans_csum )
390 *trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum );
391 iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
393 /* Print IP4 header for debugging */
394 DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
395 DBG ( "%s len %d proto %d id %04x csum %04x\n",
396 inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
397 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
399 /* Hand off to link layer */
400 if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest ) ) != 0 ) {
401 DBG ( "IPv4 could not transmit packet via %s: %s\n",
402 netdev->name, strerror ( rc ) );
403 return rc;
406 return 0;
408 err:
409 free_iob ( iobuf );
410 return rc;
414 * Process incoming packets
416 * @v iobuf I/O buffer
417 * @v netdev Network device
418 * @v ll_source Link-layer destination source
420 * This function expects an IP4 network datagram. It processes the headers
421 * and sends it to the transport layer.
423 static int ipv4_rx ( struct io_buffer *iobuf, struct net_device *netdev __unused,
424 const void *ll_source __unused ) {
425 struct iphdr *iphdr = iobuf->data;
426 size_t hdrlen;
427 size_t len;
428 union {
429 struct sockaddr_in sin;
430 struct sockaddr_tcpip st;
431 } src, dest;
432 uint16_t csum;
433 uint16_t pshdr_csum;
434 int rc;
436 /* Sanity check the IPv4 header */
437 if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
438 DBG ( "IPv4 packet too short at %d bytes (min %d bytes)\n",
439 iob_len ( iobuf ), sizeof ( *iphdr ) );
440 goto err;
442 if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
443 DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
444 goto err;
446 hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
447 if ( hdrlen < sizeof ( *iphdr ) ) {
448 DBG ( "IPv4 header too short at %d bytes (min %d bytes)\n",
449 hdrlen, sizeof ( *iphdr ) );
450 goto err;
452 if ( hdrlen > iob_len ( iobuf ) ) {
453 DBG ( "IPv4 header too long at %d bytes "
454 "(packet is %d bytes)\n", hdrlen, iob_len ( iobuf ) );
455 goto err;
457 if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
458 DBG ( "IPv4 checksum incorrect (is %04x including checksum "
459 "field, should be 0000)\n", csum );
460 goto err;
462 len = ntohs ( iphdr->len );
463 if ( len < hdrlen ) {
464 DBG ( "IPv4 length too short at %d bytes "
465 "(header is %d bytes)\n", len, hdrlen );
466 goto err;
468 if ( len > iob_len ( iobuf ) ) {
469 DBG ( "IPv4 length too long at %d bytes "
470 "(packet is %d bytes)\n", len, iob_len ( iobuf ) );
471 goto err;
474 /* Print IPv4 header for debugging */
475 DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
476 DBG ( "%s len %d proto %d id %04x csum %04x\n",
477 inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
478 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
480 /* Truncate packet to correct length, calculate pseudo-header
481 * checksum and then strip off the IPv4 header.
483 iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
484 pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
485 iob_pull ( iobuf, hdrlen );
487 /* Fragment reassembly */
488 if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
489 ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
490 /* Pass the fragment to ipv4_reassemble() which either
491 * returns a fully reassembled I/O buffer or NULL.
493 iobuf = ipv4_reassemble ( iobuf );
494 if ( ! iobuf )
495 return 0;
498 /* Construct socket addresses and hand off to transport layer */
499 memset ( &src, 0, sizeof ( src ) );
500 src.sin.sin_family = AF_INET;
501 src.sin.sin_addr = iphdr->src;
502 memset ( &dest, 0, sizeof ( dest ) );
503 dest.sin.sin_family = AF_INET;
504 dest.sin.sin_addr = iphdr->dest;
505 if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st,
506 &dest.st, pshdr_csum ) ) != 0 ) {
507 DBG ( "IPv4 received packet rejected by stack: %s\n",
508 strerror ( rc ) );
509 return rc;
512 return 0;
514 err:
515 free_iob ( iobuf );
516 return -EINVAL;
519 /**
520 * Check existence of IPv4 address for ARP
522 * @v netdev Network device
523 * @v net_addr Network-layer address
524 * @ret rc Return status code
526 static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
527 const struct in_addr *address = net_addr;
528 struct ipv4_miniroute *miniroute;
530 list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
531 if ( ( miniroute->netdev == netdev ) &&
532 ( miniroute->address.s_addr == address->s_addr ) ) {
533 /* Found matching address */
534 return 0;
537 return -ENOENT;
541 * Convert IPv4 address to dotted-quad notation
543 * @v in IP address
544 * @ret string IP address in dotted-quad notation
546 char * inet_ntoa ( struct in_addr in ) {
547 static char buf[16]; /* "xxx.xxx.xxx.xxx" */
548 uint8_t *bytes = ( uint8_t * ) &in;
550 sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
551 return buf;
555 * Transcribe IP address
557 * @v net_addr IP address
558 * @ret string IP address in dotted-quad notation
561 static const char * ipv4_ntoa ( const void *net_addr ) {
562 return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
565 /** IPv4 protocol */
566 struct net_protocol ipv4_protocol __net_protocol = {
567 .name = "IP",
568 .net_proto = htons ( ETH_P_IP ),
569 .net_addr_len = sizeof ( struct in_addr ),
570 .rx = ipv4_rx,
571 .ntoa = ipv4_ntoa,
574 /** IPv4 TCPIP net protocol */
575 struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
576 .name = "IPv4",
577 .sa_family = AF_INET,
578 .tx = ipv4_tx,
581 /** IPv4 ARP protocol */
582 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
583 .net_protocol = &ipv4_protocol,
584 .check = ipv4_arp_check,