10 #include <gpxe/if_ether.h>
11 #include <gpxe/iobuf.h>
12 #include <gpxe/netdevice.h>
14 #include <gpxe/tcpip.h>
15 #include <gpxe/dhcp.h>
16 #include <gpxe/settings.h>
24 FILE_LICENCE ( GPL2_OR_LATER
);
26 /* Unique IP datagram identification number */
27 static uint16_t next_ident
= 0;
29 struct net_protocol ipv4_protocol
;
31 /** List of IPv4 miniroutes */
32 struct list_head ipv4_miniroutes
= LIST_HEAD_INIT ( ipv4_miniroutes
);
34 /** List of fragment reassembly buffers */
35 static LIST_HEAD ( frag_buffers
);
38 * Add IPv4 minirouting table entry
40 * @v netdev Network device
41 * @v address IPv4 address
42 * @v netmask Subnet mask
43 * @v gateway Gateway address (if any)
44 * @ret miniroute Routing table entry, or NULL
46 static struct ipv4_miniroute
* __malloc
47 add_ipv4_miniroute ( struct net_device
*netdev
, struct in_addr address
,
48 struct in_addr netmask
, struct in_addr gateway
) {
49 struct ipv4_miniroute
*miniroute
;
51 DBG ( "IPv4 add %s", inet_ntoa ( address
) );
52 DBG ( "/%s ", inet_ntoa ( netmask
) );
54 DBG ( "gw %s ", inet_ntoa ( gateway
) );
55 DBG ( "via %s\n", netdev
->name
);
57 /* Allocate and populate miniroute structure */
58 miniroute
= malloc ( sizeof ( *miniroute
) );
60 DBG ( "IPv4 could not add miniroute\n" );
64 /* Record routing information */
65 miniroute
->netdev
= netdev_get ( netdev
);
66 miniroute
->address
= address
;
67 miniroute
->netmask
= netmask
;
68 miniroute
->gateway
= gateway
;
70 /* Add to end of list if we have a gateway, otherwise
73 if ( gateway
.s_addr
) {
74 list_add_tail ( &miniroute
->list
, &ipv4_miniroutes
);
76 list_add ( &miniroute
->list
, &ipv4_miniroutes
);
83 * Delete IPv4 minirouting table entry
85 * @v miniroute Routing table entry
87 static void del_ipv4_miniroute ( struct ipv4_miniroute
*miniroute
) {
89 DBG ( "IPv4 del %s", inet_ntoa ( miniroute
->address
) );
90 DBG ( "/%s ", inet_ntoa ( miniroute
->netmask
) );
91 if ( miniroute
->gateway
.s_addr
)
92 DBG ( "gw %s ", inet_ntoa ( miniroute
->gateway
) );
93 DBG ( "via %s\n", miniroute
->netdev
->name
);
95 netdev_put ( miniroute
->netdev
);
96 list_del ( &miniroute
->list
);
101 * Perform IPv4 routing
103 * @v dest Final destination address
104 * @ret dest Next hop destination address
105 * @ret miniroute Routing table entry to use, or NULL if no route
107 * If the route requires use of a gateway, the next hop destination
108 * address will be overwritten with the gateway address.
110 static struct ipv4_miniroute
* ipv4_route ( struct in_addr
*dest
) {
111 struct ipv4_miniroute
*miniroute
;
115 /* Never attempt to route the broadcast address */
116 if ( dest
->s_addr
== INADDR_BROADCAST
)
119 /* Find first usable route in routing table */
120 list_for_each_entry ( miniroute
, &ipv4_miniroutes
, list
) {
121 if ( ! ( miniroute
->netdev
->state
& NETDEV_OPEN
) )
123 local
= ( ( ( dest
->s_addr
^ miniroute
->address
.s_addr
)
124 & miniroute
->netmask
.s_addr
) == 0 );
125 has_gw
= ( miniroute
->gateway
.s_addr
);
126 if ( local
|| has_gw
) {
128 *dest
= miniroute
->gateway
;
137 * Fragment reassembly counter timeout
139 * @v timer Retry timer
140 * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
142 static void ipv4_frag_expired ( struct retry_timer
*timer __unused
,
145 DBG ( "Fragment reassembly timeout" );
146 /* Free the fragment buffer */
151 * Free fragment buffer
153 * @v fragbug Fragment buffer
155 static void free_fragbuf ( struct frag_buffer
*fragbuf
) {
160 * Fragment reassembler
162 * @v iobuf I/O buffer, fragment of the datagram
163 * @ret frag_iob Reassembled packet, or NULL
165 static struct io_buffer
* ipv4_reassemble ( struct io_buffer
* iobuf
) {
166 struct iphdr
*iphdr
= iobuf
->data
;
167 struct frag_buffer
*fragbuf
;
170 * Check if the fragment belongs to any fragment series
172 list_for_each_entry ( fragbuf
, &frag_buffers
, list
) {
173 if ( fragbuf
->ident
== iphdr
->ident
&&
174 fragbuf
->src
.s_addr
== iphdr
->src
.s_addr
) {
176 * Check if the packet is the expected fragment
178 * The offset of the new packet must be equal to the
179 * length of the data accumulated so far (the length of
180 * the reassembled I/O buffer
182 if ( iob_len ( fragbuf
->frag_iob
) ==
183 ( iphdr
->frags
& IP_MASK_OFFSET
) ) {
185 * Append the contents of the fragment to the
186 * reassembled I/O buffer
188 iob_pull ( iobuf
, sizeof ( *iphdr
) );
189 memcpy ( iob_put ( fragbuf
->frag_iob
,
191 iobuf
->data
, iob_len ( iobuf
) );
194 /** Check if the fragment series is over */
195 if ( ! ( iphdr
->frags
& IP_MASK_MOREFRAGS
) ) {
196 iobuf
= fragbuf
->frag_iob
;
197 free_fragbuf ( fragbuf
);
202 /* Discard the fragment series */
203 free_fragbuf ( fragbuf
);
210 /** Check if the fragment is the first in the fragment series */
211 if ( iphdr
->frags
& IP_MASK_MOREFRAGS
&&
212 ( ( iphdr
->frags
& IP_MASK_OFFSET
) == 0 ) ) {
214 /** Create a new fragment buffer */
215 fragbuf
= ( struct frag_buffer
* ) malloc ( sizeof( *fragbuf
) );
216 fragbuf
->ident
= iphdr
->ident
;
217 fragbuf
->src
= iphdr
->src
;
219 /* Set up the reassembly I/O buffer */
220 fragbuf
->frag_iob
= alloc_iob ( IP_FRAG_IOB_SIZE
);
221 iob_pull ( iobuf
, sizeof ( *iphdr
) );
222 memcpy ( iob_put ( fragbuf
->frag_iob
, iob_len ( iobuf
) ),
223 iobuf
->data
, iob_len ( iobuf
) );
226 /* Set the reassembly timer */
227 fragbuf
->frag_timer
.timeout
= IP_FRAG_TIMEOUT
;
228 fragbuf
->frag_timer
.expired
= ipv4_frag_expired
;
229 start_timer ( &fragbuf
->frag_timer
);
231 /* Add the fragment buffer to the list of fragment buffers */
232 list_add ( &fragbuf
->list
, &frag_buffers
);
239 * Add IPv4 pseudo-header checksum to existing checksum
241 * @v iobuf I/O buffer
242 * @v csum Existing checksum
243 * @ret csum Updated checksum
245 static uint16_t ipv4_pshdr_chksum ( struct io_buffer
*iobuf
, uint16_t csum
) {
246 struct ipv4_pseudo_header pshdr
;
247 struct iphdr
*iphdr
= iobuf
->data
;
248 size_t hdrlen
= ( ( iphdr
->verhdrlen
& IP_MASK_HLEN
) * 4 );
250 /* Build pseudo-header */
251 pshdr
.src
= iphdr
->src
;
252 pshdr
.dest
= iphdr
->dest
;
253 pshdr
.zero_padding
= 0x00;
254 pshdr
.protocol
= iphdr
->protocol
;
255 pshdr
.len
= htons ( iob_len ( iobuf
) - hdrlen
);
257 /* Update the checksum value */
258 return tcpip_continue_chksum ( csum
, &pshdr
, sizeof ( pshdr
) );
262 * Determine link-layer address
264 * @v dest IPv4 destination address
265 * @v src IPv4 source address
266 * @v netdev Network device
267 * @v ll_dest Link-layer destination address buffer
268 * @ret rc Return status code
270 static int ipv4_ll_addr ( struct in_addr dest
, struct in_addr src
,
271 struct net_device
*netdev
, uint8_t *ll_dest
) {
272 struct ll_protocol
*ll_protocol
= netdev
->ll_protocol
;
274 if ( dest
.s_addr
== INADDR_BROADCAST
) {
275 /* Broadcast address */
276 memcpy ( ll_dest
, netdev
->ll_broadcast
,
277 ll_protocol
->ll_addr_len
);
279 } else if ( IN_MULTICAST ( ntohl ( dest
.s_addr
) ) ) {
280 return ll_protocol
->mc_hash ( AF_INET
, &dest
, ll_dest
);
282 /* Unicast address: resolve via ARP */
283 return arp_resolve ( netdev
, &ipv4_protocol
, &dest
,
291 * @v iobuf I/O buffer
292 * @v tcpip Transport-layer protocol
293 * @v st_src Source network-layer address
294 * @v st_dest Destination network-layer address
295 * @v netdev Network device to use if no route found, or NULL
296 * @v trans_csum Transport-layer checksum to complete, or NULL
299 * This function expects a transport-layer segment and prepends the IP header
301 static int ipv4_tx ( struct io_buffer
*iobuf
,
302 struct tcpip_protocol
*tcpip_protocol
,
303 struct sockaddr_tcpip
*st_src
,
304 struct sockaddr_tcpip
*st_dest
,
305 struct net_device
*netdev
,
306 uint16_t *trans_csum
) {
307 struct iphdr
*iphdr
= iob_push ( iobuf
, sizeof ( *iphdr
) );
308 struct sockaddr_in
*sin_src
= ( ( struct sockaddr_in
* ) st_src
);
309 struct sockaddr_in
*sin_dest
= ( ( struct sockaddr_in
* ) st_dest
);
310 struct ipv4_miniroute
*miniroute
;
311 struct in_addr next_hop
;
312 uint8_t ll_dest
[MAX_LL_ADDR_LEN
];
315 /* Fill up the IP header, except source address */
316 memset ( iphdr
, 0, sizeof ( *iphdr
) );
317 iphdr
->verhdrlen
= ( IP_VER
| ( sizeof ( *iphdr
) / 4 ) );
318 iphdr
->service
= IP_TOS
;
319 iphdr
->len
= htons ( iob_len ( iobuf
) );
320 iphdr
->ident
= htons ( ++next_ident
);
322 iphdr
->protocol
= tcpip_protocol
->tcpip_proto
;
323 iphdr
->dest
= sin_dest
->sin_addr
;
325 /* Use routing table to identify next hop and transmitting netdev */
326 next_hop
= iphdr
->dest
;
328 iphdr
->src
= sin_src
->sin_addr
;
329 if ( ( next_hop
.s_addr
!= INADDR_BROADCAST
) &&
330 ( ! IN_MULTICAST ( ntohl ( next_hop
.s_addr
) ) ) &&
331 ( ( miniroute
= ipv4_route ( &next_hop
) ) != NULL
) ) {
332 iphdr
->src
= miniroute
->address
;
333 netdev
= miniroute
->netdev
;
336 DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr
->dest
) );
341 /* Determine link-layer destination address */
342 if ( ( rc
= ipv4_ll_addr ( next_hop
, iphdr
->src
, netdev
,
344 DBG ( "IPv4 has no link-layer address for %s: %s\n",
345 inet_ntoa ( next_hop
), strerror ( rc
) );
349 /* Fix up checksums */
351 *trans_csum
= ipv4_pshdr_chksum ( iobuf
, *trans_csum
);
352 iphdr
->chksum
= tcpip_chksum ( iphdr
, sizeof ( *iphdr
) );
354 /* Print IP4 header for debugging */
355 DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr
->src
) );
356 DBG ( "%s len %d proto %d id %04x csum %04x\n",
357 inet_ntoa ( iphdr
->dest
), ntohs ( iphdr
->len
), iphdr
->protocol
,
358 ntohs ( iphdr
->ident
), ntohs ( iphdr
->chksum
) );
360 /* Hand off to link layer */
361 if ( ( rc
= net_tx ( iobuf
, netdev
, &ipv4_protocol
, ll_dest
) ) != 0 ) {
362 DBG ( "IPv4 could not transmit packet via %s: %s\n",
363 netdev
->name
, strerror ( rc
) );
375 * Process incoming packets
377 * @v iobuf I/O buffer
378 * @v netdev Network device
379 * @v ll_source Link-layer destination source
381 * This function expects an IP4 network datagram. It processes the headers
382 * and sends it to the transport layer.
384 static int ipv4_rx ( struct io_buffer
*iobuf
, struct net_device
*netdev __unused
,
385 const void *ll_source __unused
) {
386 struct iphdr
*iphdr
= iobuf
->data
;
390 struct sockaddr_in sin
;
391 struct sockaddr_tcpip st
;
397 /* Sanity check the IPv4 header */
398 if ( iob_len ( iobuf
) < sizeof ( *iphdr
) ) {
399 DBG ( "IPv4 packet too short at %zd bytes (min %zd bytes)\n",
400 iob_len ( iobuf
), sizeof ( *iphdr
) );
403 if ( ( iphdr
->verhdrlen
& IP_MASK_VER
) != IP_VER
) {
404 DBG ( "IPv4 version %#02x not supported\n", iphdr
->verhdrlen
);
407 hdrlen
= ( ( iphdr
->verhdrlen
& IP_MASK_HLEN
) * 4 );
408 if ( hdrlen
< sizeof ( *iphdr
) ) {
409 DBG ( "IPv4 header too short at %zd bytes (min %zd bytes)\n",
410 hdrlen
, sizeof ( *iphdr
) );
413 if ( hdrlen
> iob_len ( iobuf
) ) {
414 DBG ( "IPv4 header too long at %zd bytes "
415 "(packet is %zd bytes)\n", hdrlen
, iob_len ( iobuf
) );
418 if ( ( csum
= tcpip_chksum ( iphdr
, hdrlen
) ) != 0 ) {
419 DBG ( "IPv4 checksum incorrect (is %04x including checksum "
420 "field, should be 0000)\n", csum
);
423 len
= ntohs ( iphdr
->len
);
424 if ( len
< hdrlen
) {
425 DBG ( "IPv4 length too short at %zd bytes "
426 "(header is %zd bytes)\n", len
, hdrlen
);
429 if ( len
> iob_len ( iobuf
) ) {
430 DBG ( "IPv4 length too long at %zd bytes "
431 "(packet is %zd bytes)\n", len
, iob_len ( iobuf
) );
435 /* Print IPv4 header for debugging */
436 DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr
->dest
) );
437 DBG ( "%s len %d proto %d id %04x csum %04x\n",
438 inet_ntoa ( iphdr
->src
), ntohs ( iphdr
->len
), iphdr
->protocol
,
439 ntohs ( iphdr
->ident
), ntohs ( iphdr
->chksum
) );
441 /* Truncate packet to correct length, calculate pseudo-header
442 * checksum and then strip off the IPv4 header.
444 iob_unput ( iobuf
, ( iob_len ( iobuf
) - len
) );
445 pshdr_csum
= ipv4_pshdr_chksum ( iobuf
, TCPIP_EMPTY_CSUM
);
446 iob_pull ( iobuf
, hdrlen
);
448 /* Fragment reassembly */
449 if ( ( iphdr
->frags
& htons ( IP_MASK_MOREFRAGS
) ) ||
450 ( ( iphdr
->frags
& htons ( IP_MASK_OFFSET
) ) != 0 ) ) {
451 /* Pass the fragment to ipv4_reassemble() which either
452 * returns a fully reassembled I/O buffer or NULL.
454 iobuf
= ipv4_reassemble ( iobuf
);
459 /* Construct socket addresses and hand off to transport layer */
460 memset ( &src
, 0, sizeof ( src
) );
461 src
.sin
.sin_family
= AF_INET
;
462 src
.sin
.sin_addr
= iphdr
->src
;
463 memset ( &dest
, 0, sizeof ( dest
) );
464 dest
.sin
.sin_family
= AF_INET
;
465 dest
.sin
.sin_addr
= iphdr
->dest
;
466 if ( ( rc
= tcpip_rx ( iobuf
, iphdr
->protocol
, &src
.st
,
467 &dest
.st
, pshdr_csum
) ) != 0 ) {
468 DBG ( "IPv4 received packet rejected by stack: %s\n",
481 * Check existence of IPv4 address for ARP
483 * @v netdev Network device
484 * @v net_addr Network-layer address
485 * @ret rc Return status code
487 static int ipv4_arp_check ( struct net_device
*netdev
, const void *net_addr
) {
488 const struct in_addr
*address
= net_addr
;
489 struct ipv4_miniroute
*miniroute
;
491 list_for_each_entry ( miniroute
, &ipv4_miniroutes
, list
) {
492 if ( ( miniroute
->netdev
== netdev
) &&
493 ( miniroute
->address
.s_addr
== address
->s_addr
) ) {
494 /* Found matching address */
502 * Convert IPv4 address to dotted-quad notation
505 * @ret string IP address in dotted-quad notation
507 char * inet_ntoa ( struct in_addr in
) {
508 static char buf
[16]; /* "xxx.xxx.xxx.xxx" */
509 uint8_t *bytes
= ( uint8_t * ) &in
;
511 sprintf ( buf
, "%d.%d.%d.%d", bytes
[0], bytes
[1], bytes
[2], bytes
[3] );
516 * Transcribe IP address
518 * @v net_addr IP address
519 * @ret string IP address in dotted-quad notation
522 static const char * ipv4_ntoa ( const void *net_addr
) {
523 return inet_ntoa ( * ( ( struct in_addr
* ) net_addr
) );
527 struct net_protocol ipv4_protocol __net_protocol
= {
529 .net_proto
= htons ( ETH_P_IP
),
530 .net_addr_len
= sizeof ( struct in_addr
),
535 /** IPv4 TCPIP net protocol */
536 struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol
= {
538 .sa_family
= AF_INET
,
542 /** IPv4 ARP protocol */
543 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol
= {
544 .net_protocol
= &ipv4_protocol
,
545 .check
= ipv4_arp_check
,
548 /******************************************************************************
552 ******************************************************************************
555 /** IPv4 address setting */
556 struct setting ip_setting __setting
= {
558 .description
= "IPv4 address",
559 .tag
= DHCP_EB_YIADDR
,
560 .type
= &setting_type_ipv4
,
563 /** IPv4 subnet mask setting */
564 struct setting netmask_setting __setting
= {
566 .description
= "IPv4 subnet mask",
567 .tag
= DHCP_SUBNET_MASK
,
568 .type
= &setting_type_ipv4
,
571 /** Default gateway setting */
572 struct setting gateway_setting __setting
= {
574 .description
= "Default gateway",
576 .type
= &setting_type_ipv4
,
580 * Create IPv4 routing table based on configured settings
582 * @ret rc Return status code
584 static int ipv4_create_routes ( void ) {
585 struct ipv4_miniroute
*miniroute
;
586 struct ipv4_miniroute
*tmp
;
587 struct net_device
*netdev
;
588 struct settings
*settings
;
589 struct in_addr address
= { 0 };
590 struct in_addr netmask
= { 0 };
591 struct in_addr gateway
= { 0 };
593 /* Delete all existing routes */
594 list_for_each_entry_safe ( miniroute
, tmp
, &ipv4_miniroutes
, list
)
595 del_ipv4_miniroute ( miniroute
);
597 /* Create a route for each configured network device */
598 for_each_netdev ( netdev
) {
599 settings
= netdev_settings ( netdev
);
600 /* Get IPv4 address */
602 fetch_ipv4_setting ( settings
, &ip_setting
, &address
);
603 if ( ! address
.s_addr
)
605 /* Get subnet mask */
606 fetch_ipv4_setting ( settings
, &netmask_setting
, &netmask
);
607 /* Calculate default netmask, if necessary */
608 if ( ! netmask
.s_addr
) {
609 if ( IN_CLASSA ( ntohl ( address
.s_addr
) ) ) {
610 netmask
.s_addr
= htonl ( IN_CLASSA_NET
);
611 } else if ( IN_CLASSB ( ntohl ( address
.s_addr
) ) ) {
612 netmask
.s_addr
= htonl ( IN_CLASSB_NET
);
613 } else if ( IN_CLASSC ( ntohl ( address
.s_addr
) ) ) {
614 netmask
.s_addr
= htonl ( IN_CLASSC_NET
);
617 /* Get default gateway, if present */
618 fetch_ipv4_setting ( settings
, &gateway_setting
, &gateway
);
619 /* Configure route */
620 miniroute
= add_ipv4_miniroute ( netdev
, address
,
629 /** IPv4 settings applicator */
630 struct settings_applicator ipv4_settings_applicator __settings_applicator
= {
631 .apply
= ipv4_create_routes
,
635 REQUIRE_OBJECT ( icmp
);