[ipv6] Enable router solicitations to timeout
[gpxe.git] / src / net / ipv4.c
blob92d0684189403258be7336882bcb810dbbc226d4
1 #include <string.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <byteswap.h>
7 #include <gpxe/list.h>
8 #include <gpxe/in.h>
9 #include <gpxe/arp.h>
10 #include <gpxe/if_ether.h>
11 #include <gpxe/iobuf.h>
12 #include <gpxe/netdevice.h>
13 #include <gpxe/ip.h>
14 #include <gpxe/tcpip.h>
15 #include <gpxe/dhcp.h>
16 #include <gpxe/settings.h>
18 /** @file
20 * IPv4 protocol
24 FILE_LICENCE ( GPL2_OR_LATER );
26 /* Unique IP datagram identification number */
27 static uint16_t next_ident = 0;
29 struct net_protocol ipv4_protocol;
31 /** List of IPv4 miniroutes */
32 struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
34 /** List of fragment reassembly buffers */
35 static LIST_HEAD ( frag_buffers );
37 /**
38 * Add IPv4 minirouting table entry
40 * @v netdev Network device
41 * @v address IPv4 address
42 * @v netmask Subnet mask
43 * @v gateway Gateway address (if any)
44 * @ret miniroute Routing table entry, or NULL
46 static struct ipv4_miniroute * __malloc
47 add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address,
48 struct in_addr netmask, struct in_addr gateway ) {
49 struct ipv4_miniroute *miniroute;
51 DBG ( "IPv4 add %s", inet_ntoa ( address ) );
52 DBG ( "/%s ", inet_ntoa ( netmask ) );
53 if ( gateway.s_addr )
54 DBG ( "gw %s ", inet_ntoa ( gateway ) );
55 DBG ( "via %s\n", netdev->name );
57 /* Allocate and populate miniroute structure */
58 miniroute = malloc ( sizeof ( *miniroute ) );
59 if ( ! miniroute ) {
60 DBG ( "IPv4 could not add miniroute\n" );
61 return NULL;
64 /* Record routing information */
65 miniroute->netdev = netdev_get ( netdev );
66 miniroute->address = address;
67 miniroute->netmask = netmask;
68 miniroute->gateway = gateway;
70 /* Add to end of list if we have a gateway, otherwise
71 * to start of list.
73 if ( gateway.s_addr ) {
74 list_add_tail ( &miniroute->list, &ipv4_miniroutes );
75 } else {
76 list_add ( &miniroute->list, &ipv4_miniroutes );
79 return miniroute;
82 /**
83 * Delete IPv4 minirouting table entry
85 * @v miniroute Routing table entry
87 static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
89 DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
90 DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
91 if ( miniroute->gateway.s_addr )
92 DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
93 DBG ( "via %s\n", miniroute->netdev->name );
95 netdev_put ( miniroute->netdev );
96 list_del ( &miniroute->list );
97 free ( miniroute );
101 * Perform IPv4 routing
103 * @v dest Final destination address
104 * @ret dest Next hop destination address
105 * @ret miniroute Routing table entry to use, or NULL if no route
107 * If the route requires use of a gateway, the next hop destination
108 * address will be overwritten with the gateway address.
110 static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
111 struct ipv4_miniroute *miniroute;
112 int local;
113 int has_gw;
115 /* Never attempt to route the broadcast address */
116 if ( dest->s_addr == INADDR_BROADCAST )
117 return NULL;
119 /* Find first usable route in routing table */
120 list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
121 if ( ! netdev_is_open ( miniroute->netdev ) )
122 continue;
123 local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
124 & miniroute->netmask.s_addr ) == 0 );
125 has_gw = ( miniroute->gateway.s_addr );
126 if ( local || has_gw ) {
127 if ( ! local )
128 *dest = miniroute->gateway;
129 return miniroute;
133 return NULL;
137 * Fragment reassembly counter timeout
139 * @v timer Retry timer
140 * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
142 static void ipv4_frag_expired ( struct retry_timer *timer __unused,
143 int over ) {
144 if ( over ) {
145 DBG ( "Fragment reassembly timeout" );
146 /* Free the fragment buffer */
151 * Free fragment buffer
153 * @v fragbug Fragment buffer
155 static void free_fragbuf ( struct frag_buffer *fragbuf ) {
156 free ( fragbuf );
160 * Fragment reassembler
162 * @v iobuf I/O buffer, fragment of the datagram
163 * @ret frag_iob Reassembled packet, or NULL
165 static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) {
166 struct iphdr *iphdr = iobuf->data;
167 struct frag_buffer *fragbuf;
170 * Check if the fragment belongs to any fragment series
172 list_for_each_entry ( fragbuf, &frag_buffers, list ) {
173 if ( fragbuf->ident == iphdr->ident &&
174 fragbuf->src.s_addr == iphdr->src.s_addr ) {
176 * Check if the packet is the expected fragment
178 * The offset of the new packet must be equal to the
179 * length of the data accumulated so far (the length of
180 * the reassembled I/O buffer
182 if ( iob_len ( fragbuf->frag_iob ) ==
183 ( iphdr->frags & IP_MASK_OFFSET ) ) {
185 * Append the contents of the fragment to the
186 * reassembled I/O buffer
188 iob_pull ( iobuf, sizeof ( *iphdr ) );
189 memcpy ( iob_put ( fragbuf->frag_iob,
190 iob_len ( iobuf ) ),
191 iobuf->data, iob_len ( iobuf ) );
192 free_iob ( iobuf );
194 /** Check if the fragment series is over */
195 if ( ! ( iphdr->frags & IP_MASK_MOREFRAGS ) ) {
196 iobuf = fragbuf->frag_iob;
197 free_fragbuf ( fragbuf );
198 return iobuf;
201 } else {
202 /* Discard the fragment series */
203 free_fragbuf ( fragbuf );
204 free_iob ( iobuf );
206 return NULL;
210 /** Check if the fragment is the first in the fragment series */
211 if ( iphdr->frags & IP_MASK_MOREFRAGS &&
212 ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
214 /** Create a new fragment buffer */
215 fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
216 fragbuf->ident = iphdr->ident;
217 fragbuf->src = iphdr->src;
219 /* Set up the reassembly I/O buffer */
220 fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE );
221 iob_pull ( iobuf, sizeof ( *iphdr ) );
222 memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ),
223 iobuf->data, iob_len ( iobuf ) );
224 free_iob ( iobuf );
226 /* Set the reassembly timer */
227 timer_init ( &fragbuf->frag_timer, ipv4_frag_expired );
228 start_timer_fixed ( &fragbuf->frag_timer, IP_FRAG_TIMEOUT );
230 /* Add the fragment buffer to the list of fragment buffers */
231 list_add ( &fragbuf->list, &frag_buffers );
234 return NULL;
238 * Add IPv4 pseudo-header checksum to existing checksum
240 * @v iobuf I/O buffer
241 * @v csum Existing checksum
242 * @ret csum Updated checksum
244 static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) {
245 struct ipv4_pseudo_header pshdr;
246 struct iphdr *iphdr = iobuf->data;
247 size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
249 /* Build pseudo-header */
250 pshdr.src = iphdr->src;
251 pshdr.dest = iphdr->dest;
252 pshdr.zero_padding = 0x00;
253 pshdr.protocol = iphdr->protocol;
254 pshdr.len = htons ( iob_len ( iobuf ) - hdrlen );
256 /* Update the checksum value */
257 return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
261 * Determine link-layer address
263 * @v dest IPv4 destination address
264 * @v src IPv4 source address
265 * @v netdev Network device
266 * @v ll_dest Link-layer destination address buffer
267 * @ret rc Return status code
269 static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
270 struct net_device *netdev, uint8_t *ll_dest ) {
271 struct ll_protocol *ll_protocol = netdev->ll_protocol;
273 if ( dest.s_addr == INADDR_BROADCAST ) {
274 /* Broadcast address */
275 memcpy ( ll_dest, netdev->ll_broadcast,
276 ll_protocol->ll_addr_len );
277 return 0;
278 } else if ( IN_MULTICAST ( ntohl ( dest.s_addr ) ) ) {
279 return ll_protocol->mc_hash ( AF_INET, &dest, ll_dest );
280 } else {
281 /* Unicast address: resolve via ARP */
282 return arp_resolve ( netdev, &ipv4_protocol, &dest,
283 &src, ll_dest );
288 * Transmit IP packet
290 * @v iobuf I/O buffer
291 * @v tcpip Transport-layer protocol
292 * @v st_src Source network-layer address
293 * @v st_dest Destination network-layer address
294 * @v netdev Network device to use if no route found, or NULL
295 * @v trans_csum Transport-layer checksum to complete, or NULL
296 * @ret rc Status
298 * This function expects a transport-layer segment and prepends the IP header
300 static int ipv4_tx ( struct io_buffer *iobuf,
301 struct tcpip_protocol *tcpip_protocol,
302 struct sockaddr_tcpip *st_src,
303 struct sockaddr_tcpip *st_dest,
304 struct net_device *netdev,
305 uint16_t *trans_csum ) {
306 struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
307 struct sockaddr_in *sin_src = ( ( struct sockaddr_in * ) st_src );
308 struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
309 struct ipv4_miniroute *miniroute;
310 struct in_addr next_hop;
311 uint8_t ll_dest[MAX_LL_ADDR_LEN];
312 int rc;
314 /* Fill up the IP header, except source address */
315 memset ( iphdr, 0, sizeof ( *iphdr ) );
316 iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
317 iphdr->service = IP_TOS;
318 iphdr->len = htons ( iob_len ( iobuf ) );
319 iphdr->ident = htons ( ++next_ident );
320 iphdr->ttl = IP_TTL;
321 iphdr->protocol = tcpip_protocol->tcpip_proto;
322 iphdr->dest = sin_dest->sin_addr;
324 /* Use routing table to identify next hop and transmitting netdev */
325 next_hop = iphdr->dest;
326 if ( sin_src )
327 iphdr->src = sin_src->sin_addr;
328 if ( ( next_hop.s_addr != INADDR_BROADCAST ) &&
329 ( ! IN_MULTICAST ( ntohl ( next_hop.s_addr ) ) ) &&
330 ( ( miniroute = ipv4_route ( &next_hop ) ) != NULL ) ) {
331 iphdr->src = miniroute->address;
332 netdev = miniroute->netdev;
334 if ( ! netdev ) {
335 DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
336 rc = -ENETUNREACH;
337 goto err;
340 /* Determine link-layer destination address */
341 if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev,
342 ll_dest ) ) != 0 ) {
343 DBG ( "IPv4 has no link-layer address for %s: %s\n",
344 inet_ntoa ( next_hop ), strerror ( rc ) );
345 goto err;
348 /* Fix up checksums */
349 if ( trans_csum )
350 *trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum );
351 iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
353 /* Print IP4 header for debugging */
354 DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
355 DBG ( "%s len %d proto %d id %04x csum %04x\n",
356 inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
357 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
359 /* Hand off to link layer */
360 if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest ) ) != 0 ) {
361 DBG ( "IPv4 could not transmit packet via %s: %s\n",
362 netdev->name, strerror ( rc ) );
363 return rc;
366 return 0;
368 err:
369 free_iob ( iobuf );
370 return rc;
374 * Process incoming packets
376 * @v iobuf I/O buffer
377 * @v netdev Network device
378 * @v ll_source Link-layer destination source
380 * This function expects an IP4 network datagram. It processes the headers
381 * and sends it to the transport layer.
383 static int ipv4_rx ( struct io_buffer *iobuf, struct net_device *netdev __unused,
384 const void *ll_source __unused ) {
385 struct iphdr *iphdr = iobuf->data;
386 size_t hdrlen;
387 size_t len;
388 union {
389 struct sockaddr_in sin;
390 struct sockaddr_tcpip st;
391 } src, dest;
392 uint16_t csum;
393 uint16_t pshdr_csum;
394 int rc;
396 /* Sanity check the IPv4 header */
397 if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
398 DBG ( "IPv4 packet too short at %zd bytes (min %zd bytes)\n",
399 iob_len ( iobuf ), sizeof ( *iphdr ) );
400 goto err;
402 if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
403 DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
404 goto err;
406 hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
407 if ( hdrlen < sizeof ( *iphdr ) ) {
408 DBG ( "IPv4 header too short at %zd bytes (min %zd bytes)\n",
409 hdrlen, sizeof ( *iphdr ) );
410 goto err;
412 if ( hdrlen > iob_len ( iobuf ) ) {
413 DBG ( "IPv4 header too long at %zd bytes "
414 "(packet is %zd bytes)\n", hdrlen, iob_len ( iobuf ) );
415 goto err;
417 if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
418 DBG ( "IPv4 checksum incorrect (is %04x including checksum "
419 "field, should be 0000)\n", csum );
420 goto err;
422 len = ntohs ( iphdr->len );
423 if ( len < hdrlen ) {
424 DBG ( "IPv4 length too short at %zd bytes "
425 "(header is %zd bytes)\n", len, hdrlen );
426 goto err;
428 if ( len > iob_len ( iobuf ) ) {
429 DBG ( "IPv4 length too long at %zd bytes "
430 "(packet is %zd bytes)\n", len, iob_len ( iobuf ) );
431 goto err;
434 /* Print IPv4 header for debugging */
435 DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
436 DBG ( "%s len %d proto %d id %04x csum %04x\n",
437 inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
438 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
440 /* Truncate packet to correct length, calculate pseudo-header
441 * checksum and then strip off the IPv4 header.
443 iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
444 pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
445 iob_pull ( iobuf, hdrlen );
447 /* Fragment reassembly */
448 if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
449 ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
450 /* Pass the fragment to ipv4_reassemble() which either
451 * returns a fully reassembled I/O buffer or NULL.
453 iobuf = ipv4_reassemble ( iobuf );
454 if ( ! iobuf )
455 return 0;
458 /* Construct socket addresses and hand off to transport layer */
459 memset ( &src, 0, sizeof ( src ) );
460 src.sin.sin_family = AF_INET;
461 src.sin.sin_addr = iphdr->src;
462 memset ( &dest, 0, sizeof ( dest ) );
463 dest.sin.sin_family = AF_INET;
464 dest.sin.sin_addr = iphdr->dest;
465 if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st,
466 &dest.st, pshdr_csum ) ) != 0 ) {
467 DBG ( "IPv4 received packet rejected by stack: %s\n",
468 strerror ( rc ) );
469 return rc;
472 return 0;
474 err:
475 free_iob ( iobuf );
476 return -EINVAL;
479 /**
480 * Check existence of IPv4 address for ARP
482 * @v netdev Network device
483 * @v net_addr Network-layer address
484 * @ret rc Return status code
486 static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
487 const struct in_addr *address = net_addr;
488 struct ipv4_miniroute *miniroute;
490 list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
491 if ( ( miniroute->netdev == netdev ) &&
492 ( miniroute->address.s_addr == address->s_addr ) ) {
493 /* Found matching address */
494 return 0;
497 return -ENOENT;
501 * Convert IPv4 address to dotted-quad notation
503 * @v in IP address
504 * @ret string IP address in dotted-quad notation
506 char * inet_ntoa ( struct in_addr in ) {
507 static char buf[16]; /* "xxx.xxx.xxx.xxx" */
508 uint8_t *bytes = ( uint8_t * ) &in;
510 sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
511 return buf;
515 * Transcribe IP address
517 * @v net_addr IP address
518 * @ret string IP address in dotted-quad notation
521 static const char * ipv4_ntoa ( const void *net_addr ) {
522 return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
525 /** IPv4 protocol */
526 struct net_protocol ipv4_protocol __net_protocol = {
527 .name = "IP",
528 .net_proto = htons ( ETH_P_IP ),
529 .net_addr_len = sizeof ( struct in_addr ),
530 .rx = ipv4_rx,
531 .ntoa = ipv4_ntoa,
534 /** IPv4 TCPIP net protocol */
535 struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
536 .name = "IPv4",
537 .sa_family = AF_INET,
538 .tx = ipv4_tx,
541 /** IPv4 ARP protocol */
542 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
543 .net_protocol = &ipv4_protocol,
544 .check = ipv4_arp_check,
547 /******************************************************************************
549 * Settings
551 ******************************************************************************
554 /** IPv4 address setting */
555 struct setting ip_setting __setting = {
556 .name = "ip",
557 .description = "IPv4 address",
558 .tag = DHCP_EB_YIADDR,
559 .type = &setting_type_ipv4,
562 /** IPv4 subnet mask setting */
563 struct setting netmask_setting __setting = {
564 .name = "netmask",
565 .description = "IPv4 subnet mask",
566 .tag = DHCP_SUBNET_MASK,
567 .type = &setting_type_ipv4,
570 /** Default gateway setting */
571 struct setting gateway_setting __setting = {
572 .name = "gateway",
573 .description = "Default gateway",
574 .tag = DHCP_ROUTERS,
575 .type = &setting_type_ipv4,
579 * Create IPv4 routing table based on configured settings
581 * @ret rc Return status code
583 static int ipv4_create_routes ( void ) {
584 struct ipv4_miniroute *miniroute;
585 struct ipv4_miniroute *tmp;
586 struct net_device *netdev;
587 struct settings *settings;
588 struct in_addr address = { 0 };
589 struct in_addr netmask = { 0 };
590 struct in_addr gateway = { 0 };
592 /* Delete all existing routes */
593 list_for_each_entry_safe ( miniroute, tmp, &ipv4_miniroutes, list )
594 del_ipv4_miniroute ( miniroute );
596 /* Create a route for each configured network device */
597 for_each_netdev ( netdev ) {
598 settings = netdev_settings ( netdev );
599 /* Get IPv4 address */
600 address.s_addr = 0;
601 fetch_ipv4_setting ( settings, &ip_setting, &address );
602 if ( ! address.s_addr )
603 continue;
604 /* Get subnet mask */
605 fetch_ipv4_setting ( settings, &netmask_setting, &netmask );
606 /* Calculate default netmask, if necessary */
607 if ( ! netmask.s_addr ) {
608 if ( IN_CLASSA ( ntohl ( address.s_addr ) ) ) {
609 netmask.s_addr = htonl ( IN_CLASSA_NET );
610 } else if ( IN_CLASSB ( ntohl ( address.s_addr ) ) ) {
611 netmask.s_addr = htonl ( IN_CLASSB_NET );
612 } else if ( IN_CLASSC ( ntohl ( address.s_addr ) ) ) {
613 netmask.s_addr = htonl ( IN_CLASSC_NET );
616 /* Get default gateway, if present */
617 fetch_ipv4_setting ( settings, &gateway_setting, &gateway );
618 /* Configure route */
619 miniroute = add_ipv4_miniroute ( netdev, address,
620 netmask, gateway );
621 if ( ! miniroute )
622 return -ENOMEM;
625 return 0;
628 /** IPv4 settings applicator */
629 struct settings_applicator ipv4_settings_applicator __settings_applicator = {
630 .apply = ipv4_create_routes,
633 /* Drag in ICMP */
634 REQUIRE_OBJECT ( icmp );