Add memtest support.
[syslinux-debian/hramrach.git] / gpxe / src / net / ipv4.c
blob4c1393f2234c9a41f39404a6628db1479190572e
1 #include <string.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <byteswap.h>
7 #include <gpxe/list.h>
8 #include <gpxe/in.h>
9 #include <gpxe/arp.h>
10 #include <gpxe/if_ether.h>
11 #include <gpxe/iobuf.h>
12 #include <gpxe/netdevice.h>
13 #include <gpxe/ip.h>
14 #include <gpxe/tcpip.h>
15 #include <gpxe/dhcp.h>
16 #include <gpxe/settings.h>
18 /** @file
20 * IPv4 protocol
24 FILE_LICENCE ( GPL2_OR_LATER );
26 /* Unique IP datagram identification number */
27 static uint16_t next_ident = 0;
29 struct net_protocol ipv4_protocol;
31 /** List of IPv4 miniroutes */
32 struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
34 /** List of fragment reassembly buffers */
35 static LIST_HEAD ( frag_buffers );
37 /**
38 * Add IPv4 minirouting table entry
40 * @v netdev Network device
41 * @v address IPv4 address
42 * @v netmask Subnet mask
43 * @v gateway Gateway address (if any)
44 * @ret miniroute Routing table entry, or NULL
46 static struct ipv4_miniroute * __malloc
47 add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address,
48 struct in_addr netmask, struct in_addr gateway ) {
49 struct ipv4_miniroute *miniroute;
51 DBG ( "IPv4 add %s", inet_ntoa ( address ) );
52 DBG ( "/%s ", inet_ntoa ( netmask ) );
53 if ( gateway.s_addr )
54 DBG ( "gw %s ", inet_ntoa ( gateway ) );
55 DBG ( "via %s\n", netdev->name );
57 /* Allocate and populate miniroute structure */
58 miniroute = malloc ( sizeof ( *miniroute ) );
59 if ( ! miniroute ) {
60 DBG ( "IPv4 could not add miniroute\n" );
61 return NULL;
64 /* Record routing information */
65 miniroute->netdev = netdev_get ( netdev );
66 miniroute->address = address;
67 miniroute->netmask = netmask;
68 miniroute->gateway = gateway;
70 /* Add to end of list if we have a gateway, otherwise
71 * to start of list.
73 if ( gateway.s_addr ) {
74 list_add_tail ( &miniroute->list, &ipv4_miniroutes );
75 } else {
76 list_add ( &miniroute->list, &ipv4_miniroutes );
79 return miniroute;
82 /**
83 * Delete IPv4 minirouting table entry
85 * @v miniroute Routing table entry
87 static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
89 DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
90 DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
91 if ( miniroute->gateway.s_addr )
92 DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
93 DBG ( "via %s\n", miniroute->netdev->name );
95 netdev_put ( miniroute->netdev );
96 list_del ( &miniroute->list );
97 free ( miniroute );
101 * Perform IPv4 routing
103 * @v dest Final destination address
104 * @ret dest Next hop destination address
105 * @ret miniroute Routing table entry to use, or NULL if no route
107 * If the route requires use of a gateway, the next hop destination
108 * address will be overwritten with the gateway address.
110 static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
111 struct ipv4_miniroute *miniroute;
112 int local;
113 int has_gw;
115 /* Never attempt to route the broadcast address */
116 if ( dest->s_addr == INADDR_BROADCAST )
117 return NULL;
119 /* Find first usable route in routing table */
120 list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
121 if ( ! ( miniroute->netdev->state & NETDEV_OPEN ) )
122 continue;
123 local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
124 & miniroute->netmask.s_addr ) == 0 );
125 has_gw = ( miniroute->gateway.s_addr );
126 if ( local || has_gw ) {
127 if ( ! local )
128 *dest = miniroute->gateway;
129 return miniroute;
133 return NULL;
137 * Fragment reassembly counter timeout
139 * @v timer Retry timer
140 * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
142 static void ipv4_frag_expired ( struct retry_timer *timer __unused,
143 int over ) {
144 if ( over ) {
145 DBG ( "Fragment reassembly timeout" );
146 /* Free the fragment buffer */
151 * Free fragment buffer
153 * @v fragbug Fragment buffer
155 static void free_fragbuf ( struct frag_buffer *fragbuf ) {
156 free ( fragbuf );
160 * Fragment reassembler
162 * @v iobuf I/O buffer, fragment of the datagram
163 * @ret frag_iob Reassembled packet, or NULL
165 static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) {
166 struct iphdr *iphdr = iobuf->data;
167 struct frag_buffer *fragbuf;
170 * Check if the fragment belongs to any fragment series
172 list_for_each_entry ( fragbuf, &frag_buffers, list ) {
173 if ( fragbuf->ident == iphdr->ident &&
174 fragbuf->src.s_addr == iphdr->src.s_addr ) {
176 * Check if the packet is the expected fragment
178 * The offset of the new packet must be equal to the
179 * length of the data accumulated so far (the length of
180 * the reassembled I/O buffer
182 if ( iob_len ( fragbuf->frag_iob ) ==
183 ( iphdr->frags & IP_MASK_OFFSET ) ) {
185 * Append the contents of the fragment to the
186 * reassembled I/O buffer
188 iob_pull ( iobuf, sizeof ( *iphdr ) );
189 memcpy ( iob_put ( fragbuf->frag_iob,
190 iob_len ( iobuf ) ),
191 iobuf->data, iob_len ( iobuf ) );
192 free_iob ( iobuf );
194 /** Check if the fragment series is over */
195 if ( ! ( iphdr->frags & IP_MASK_MOREFRAGS ) ) {
196 iobuf = fragbuf->frag_iob;
197 free_fragbuf ( fragbuf );
198 return iobuf;
201 } else {
202 /* Discard the fragment series */
203 free_fragbuf ( fragbuf );
204 free_iob ( iobuf );
206 return NULL;
210 /** Check if the fragment is the first in the fragment series */
211 if ( iphdr->frags & IP_MASK_MOREFRAGS &&
212 ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
214 /** Create a new fragment buffer */
215 fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
216 fragbuf->ident = iphdr->ident;
217 fragbuf->src = iphdr->src;
219 /* Set up the reassembly I/O buffer */
220 fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE );
221 iob_pull ( iobuf, sizeof ( *iphdr ) );
222 memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ),
223 iobuf->data, iob_len ( iobuf ) );
224 free_iob ( iobuf );
226 /* Set the reassembly timer */
227 fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
228 fragbuf->frag_timer.expired = ipv4_frag_expired;
229 start_timer ( &fragbuf->frag_timer );
231 /* Add the fragment buffer to the list of fragment buffers */
232 list_add ( &fragbuf->list, &frag_buffers );
235 return NULL;
239 * Add IPv4 pseudo-header checksum to existing checksum
241 * @v iobuf I/O buffer
242 * @v csum Existing checksum
243 * @ret csum Updated checksum
245 static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) {
246 struct ipv4_pseudo_header pshdr;
247 struct iphdr *iphdr = iobuf->data;
248 size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
250 /* Build pseudo-header */
251 pshdr.src = iphdr->src;
252 pshdr.dest = iphdr->dest;
253 pshdr.zero_padding = 0x00;
254 pshdr.protocol = iphdr->protocol;
255 pshdr.len = htons ( iob_len ( iobuf ) - hdrlen );
257 /* Update the checksum value */
258 return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
262 * Determine link-layer address
264 * @v dest IPv4 destination address
265 * @v src IPv4 source address
266 * @v netdev Network device
267 * @v ll_dest Link-layer destination address buffer
268 * @ret rc Return status code
270 static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
271 struct net_device *netdev, uint8_t *ll_dest ) {
272 struct ll_protocol *ll_protocol = netdev->ll_protocol;
274 if ( dest.s_addr == INADDR_BROADCAST ) {
275 /* Broadcast address */
276 memcpy ( ll_dest, netdev->ll_broadcast,
277 ll_protocol->ll_addr_len );
278 return 0;
279 } else if ( IN_MULTICAST ( ntohl ( dest.s_addr ) ) ) {
280 return ll_protocol->mc_hash ( AF_INET, &dest, ll_dest );
281 } else {
282 /* Unicast address: resolve via ARP */
283 return arp_resolve ( netdev, &ipv4_protocol, &dest,
284 &src, ll_dest );
289 * Transmit IP packet
291 * @v iobuf I/O buffer
292 * @v tcpip Transport-layer protocol
293 * @v st_src Source network-layer address
294 * @v st_dest Destination network-layer address
295 * @v netdev Network device to use if no route found, or NULL
296 * @v trans_csum Transport-layer checksum to complete, or NULL
297 * @ret rc Status
299 * This function expects a transport-layer segment and prepends the IP header
301 static int ipv4_tx ( struct io_buffer *iobuf,
302 struct tcpip_protocol *tcpip_protocol,
303 struct sockaddr_tcpip *st_src,
304 struct sockaddr_tcpip *st_dest,
305 struct net_device *netdev,
306 uint16_t *trans_csum ) {
307 struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
308 struct sockaddr_in *sin_src = ( ( struct sockaddr_in * ) st_src );
309 struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
310 struct ipv4_miniroute *miniroute;
311 struct in_addr next_hop;
312 uint8_t ll_dest[MAX_LL_ADDR_LEN];
313 int rc;
315 /* Fill up the IP header, except source address */
316 memset ( iphdr, 0, sizeof ( *iphdr ) );
317 iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
318 iphdr->service = IP_TOS;
319 iphdr->len = htons ( iob_len ( iobuf ) );
320 iphdr->ident = htons ( ++next_ident );
321 iphdr->ttl = IP_TTL;
322 iphdr->protocol = tcpip_protocol->tcpip_proto;
323 iphdr->dest = sin_dest->sin_addr;
325 /* Use routing table to identify next hop and transmitting netdev */
326 next_hop = iphdr->dest;
327 if ( sin_src )
328 iphdr->src = sin_src->sin_addr;
329 if ( ( next_hop.s_addr != INADDR_BROADCAST ) &&
330 ( ! IN_MULTICAST ( ntohl ( next_hop.s_addr ) ) ) &&
331 ( ( miniroute = ipv4_route ( &next_hop ) ) != NULL ) ) {
332 iphdr->src = miniroute->address;
333 netdev = miniroute->netdev;
335 if ( ! netdev ) {
336 DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
337 rc = -ENETUNREACH;
338 goto err;
341 /* Determine link-layer destination address */
342 if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev,
343 ll_dest ) ) != 0 ) {
344 DBG ( "IPv4 has no link-layer address for %s: %s\n",
345 inet_ntoa ( next_hop ), strerror ( rc ) );
346 goto err;
349 /* Fix up checksums */
350 if ( trans_csum )
351 *trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum );
352 iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
354 /* Print IP4 header for debugging */
355 DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
356 DBG ( "%s len %d proto %d id %04x csum %04x\n",
357 inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
358 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
360 /* Hand off to link layer */
361 if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest ) ) != 0 ) {
362 DBG ( "IPv4 could not transmit packet via %s: %s\n",
363 netdev->name, strerror ( rc ) );
364 return rc;
367 return 0;
369 err:
370 free_iob ( iobuf );
371 return rc;
375 * Process incoming packets
377 * @v iobuf I/O buffer
378 * @v netdev Network device
379 * @v ll_source Link-layer destination source
381 * This function expects an IP4 network datagram. It processes the headers
382 * and sends it to the transport layer.
384 static int ipv4_rx ( struct io_buffer *iobuf, struct net_device *netdev __unused,
385 const void *ll_source __unused ) {
386 struct iphdr *iphdr = iobuf->data;
387 size_t hdrlen;
388 size_t len;
389 union {
390 struct sockaddr_in sin;
391 struct sockaddr_tcpip st;
392 } src, dest;
393 uint16_t csum;
394 uint16_t pshdr_csum;
395 int rc;
397 /* Sanity check the IPv4 header */
398 if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
399 DBG ( "IPv4 packet too short at %zd bytes (min %zd bytes)\n",
400 iob_len ( iobuf ), sizeof ( *iphdr ) );
401 goto err;
403 if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
404 DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
405 goto err;
407 hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
408 if ( hdrlen < sizeof ( *iphdr ) ) {
409 DBG ( "IPv4 header too short at %zd bytes (min %zd bytes)\n",
410 hdrlen, sizeof ( *iphdr ) );
411 goto err;
413 if ( hdrlen > iob_len ( iobuf ) ) {
414 DBG ( "IPv4 header too long at %zd bytes "
415 "(packet is %zd bytes)\n", hdrlen, iob_len ( iobuf ) );
416 goto err;
418 if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
419 DBG ( "IPv4 checksum incorrect (is %04x including checksum "
420 "field, should be 0000)\n", csum );
421 goto err;
423 len = ntohs ( iphdr->len );
424 if ( len < hdrlen ) {
425 DBG ( "IPv4 length too short at %zd bytes "
426 "(header is %zd bytes)\n", len, hdrlen );
427 goto err;
429 if ( len > iob_len ( iobuf ) ) {
430 DBG ( "IPv4 length too long at %zd bytes "
431 "(packet is %zd bytes)\n", len, iob_len ( iobuf ) );
432 goto err;
435 /* Print IPv4 header for debugging */
436 DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
437 DBG ( "%s len %d proto %d id %04x csum %04x\n",
438 inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
439 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
441 /* Truncate packet to correct length, calculate pseudo-header
442 * checksum and then strip off the IPv4 header.
444 iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
445 pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
446 iob_pull ( iobuf, hdrlen );
448 /* Fragment reassembly */
449 if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
450 ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
451 /* Pass the fragment to ipv4_reassemble() which either
452 * returns a fully reassembled I/O buffer or NULL.
454 iobuf = ipv4_reassemble ( iobuf );
455 if ( ! iobuf )
456 return 0;
459 /* Construct socket addresses and hand off to transport layer */
460 memset ( &src, 0, sizeof ( src ) );
461 src.sin.sin_family = AF_INET;
462 src.sin.sin_addr = iphdr->src;
463 memset ( &dest, 0, sizeof ( dest ) );
464 dest.sin.sin_family = AF_INET;
465 dest.sin.sin_addr = iphdr->dest;
466 if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st,
467 &dest.st, pshdr_csum ) ) != 0 ) {
468 DBG ( "IPv4 received packet rejected by stack: %s\n",
469 strerror ( rc ) );
470 return rc;
473 return 0;
475 err:
476 free_iob ( iobuf );
477 return -EINVAL;
480 /**
481 * Check existence of IPv4 address for ARP
483 * @v netdev Network device
484 * @v net_addr Network-layer address
485 * @ret rc Return status code
487 static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
488 const struct in_addr *address = net_addr;
489 struct ipv4_miniroute *miniroute;
491 list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
492 if ( ( miniroute->netdev == netdev ) &&
493 ( miniroute->address.s_addr == address->s_addr ) ) {
494 /* Found matching address */
495 return 0;
498 return -ENOENT;
502 * Convert IPv4 address to dotted-quad notation
504 * @v in IP address
505 * @ret string IP address in dotted-quad notation
507 char * inet_ntoa ( struct in_addr in ) {
508 static char buf[16]; /* "xxx.xxx.xxx.xxx" */
509 uint8_t *bytes = ( uint8_t * ) &in;
511 sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
512 return buf;
516 * Transcribe IP address
518 * @v net_addr IP address
519 * @ret string IP address in dotted-quad notation
522 static const char * ipv4_ntoa ( const void *net_addr ) {
523 return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
526 /** IPv4 protocol */
527 struct net_protocol ipv4_protocol __net_protocol = {
528 .name = "IP",
529 .net_proto = htons ( ETH_P_IP ),
530 .net_addr_len = sizeof ( struct in_addr ),
531 .rx = ipv4_rx,
532 .ntoa = ipv4_ntoa,
535 /** IPv4 TCPIP net protocol */
536 struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
537 .name = "IPv4",
538 .sa_family = AF_INET,
539 .tx = ipv4_tx,
542 /** IPv4 ARP protocol */
543 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
544 .net_protocol = &ipv4_protocol,
545 .check = ipv4_arp_check,
548 /******************************************************************************
550 * Settings
552 ******************************************************************************
555 /** IPv4 address setting */
556 struct setting ip_setting __setting = {
557 .name = "ip",
558 .description = "IPv4 address",
559 .tag = DHCP_EB_YIADDR,
560 .type = &setting_type_ipv4,
563 /** IPv4 subnet mask setting */
564 struct setting netmask_setting __setting = {
565 .name = "netmask",
566 .description = "IPv4 subnet mask",
567 .tag = DHCP_SUBNET_MASK,
568 .type = &setting_type_ipv4,
571 /** Default gateway setting */
572 struct setting gateway_setting __setting = {
573 .name = "gateway",
574 .description = "Default gateway",
575 .tag = DHCP_ROUTERS,
576 .type = &setting_type_ipv4,
580 * Create IPv4 routing table based on configured settings
582 * @ret rc Return status code
584 static int ipv4_create_routes ( void ) {
585 struct ipv4_miniroute *miniroute;
586 struct ipv4_miniroute *tmp;
587 struct net_device *netdev;
588 struct settings *settings;
589 struct in_addr address = { 0 };
590 struct in_addr netmask = { 0 };
591 struct in_addr gateway = { 0 };
593 /* Delete all existing routes */
594 list_for_each_entry_safe ( miniroute, tmp, &ipv4_miniroutes, list )
595 del_ipv4_miniroute ( miniroute );
597 /* Create a route for each configured network device */
598 for_each_netdev ( netdev ) {
599 settings = netdev_settings ( netdev );
600 /* Get IPv4 address */
601 address.s_addr = 0;
602 fetch_ipv4_setting ( settings, &ip_setting, &address );
603 if ( ! address.s_addr )
604 continue;
605 /* Get subnet mask */
606 fetch_ipv4_setting ( settings, &netmask_setting, &netmask );
607 /* Calculate default netmask, if necessary */
608 if ( ! netmask.s_addr ) {
609 if ( IN_CLASSA ( ntohl ( address.s_addr ) ) ) {
610 netmask.s_addr = htonl ( IN_CLASSA_NET );
611 } else if ( IN_CLASSB ( ntohl ( address.s_addr ) ) ) {
612 netmask.s_addr = htonl ( IN_CLASSB_NET );
613 } else if ( IN_CLASSC ( ntohl ( address.s_addr ) ) ) {
614 netmask.s_addr = htonl ( IN_CLASSC_NET );
617 /* Get default gateway, if present */
618 fetch_ipv4_setting ( settings, &gateway_setting, &gateway );
619 /* Configure route */
620 miniroute = add_ipv4_miniroute ( netdev, address,
621 netmask, gateway );
622 if ( ! miniroute )
623 return -ENOMEM;
626 return 0;
629 /** IPv4 settings applicator */
630 struct settings_applicator ipv4_settings_applicator __settings_applicator = {
631 .apply = ipv4_create_routes,
634 /* Drag in ICMP */
635 REQUIRE_OBJECT ( icmp );