10 #include <gpxe/if_ether.h>
11 #include <gpxe/iobuf.h>
12 #include <gpxe/netdevice.h>
14 #include <gpxe/tcpip.h>
15 #include <gpxe/dhcp.h>
16 #include <gpxe/settings.h>
24 FILE_LICENCE ( GPL2_OR_LATER
);
26 /* Unique IP datagram identification number */
27 static uint16_t next_ident
= 0;
29 struct net_protocol ipv4_protocol
;
31 /** List of IPv4 miniroutes */
32 struct list_head ipv4_miniroutes
= LIST_HEAD_INIT ( ipv4_miniroutes
);
34 /** List of fragment reassembly buffers */
35 static LIST_HEAD ( frag_buffers
);
38 * Add IPv4 minirouting table entry
40 * @v netdev Network device
41 * @v address IPv4 address
42 * @v netmask Subnet mask
43 * @v gateway Gateway address (if any)
44 * @ret miniroute Routing table entry, or NULL
46 static struct ipv4_miniroute
* __malloc
47 add_ipv4_miniroute ( struct net_device
*netdev
, struct in_addr address
,
48 struct in_addr netmask
, struct in_addr gateway
) {
49 struct ipv4_miniroute
*miniroute
;
51 DBG ( "IPv4 add %s", inet_ntoa ( address
) );
52 DBG ( "/%s ", inet_ntoa ( netmask
) );
54 DBG ( "gw %s ", inet_ntoa ( gateway
) );
55 DBG ( "via %s\n", netdev
->name
);
57 /* Allocate and populate miniroute structure */
58 miniroute
= malloc ( sizeof ( *miniroute
) );
60 DBG ( "IPv4 could not add miniroute\n" );
64 /* Record routing information */
65 miniroute
->netdev
= netdev_get ( netdev
);
66 miniroute
->address
= address
;
67 miniroute
->netmask
= netmask
;
68 miniroute
->gateway
= gateway
;
70 /* Add to end of list if we have a gateway, otherwise
73 if ( gateway
.s_addr
) {
74 list_add_tail ( &miniroute
->list
, &ipv4_miniroutes
);
76 list_add ( &miniroute
->list
, &ipv4_miniroutes
);
83 * Delete IPv4 minirouting table entry
85 * @v miniroute Routing table entry
87 static void del_ipv4_miniroute ( struct ipv4_miniroute
*miniroute
) {
89 DBG ( "IPv4 del %s", inet_ntoa ( miniroute
->address
) );
90 DBG ( "/%s ", inet_ntoa ( miniroute
->netmask
) );
91 if ( miniroute
->gateway
.s_addr
)
92 DBG ( "gw %s ", inet_ntoa ( miniroute
->gateway
) );
93 DBG ( "via %s\n", miniroute
->netdev
->name
);
95 netdev_put ( miniroute
->netdev
);
96 list_del ( &miniroute
->list
);
101 * Perform IPv4 routing
103 * @v dest Final destination address
104 * @ret dest Next hop destination address
105 * @ret miniroute Routing table entry to use, or NULL if no route
107 * If the route requires use of a gateway, the next hop destination
108 * address will be overwritten with the gateway address.
110 static struct ipv4_miniroute
* ipv4_route ( struct in_addr
*dest
) {
111 struct ipv4_miniroute
*miniroute
;
115 /* Never attempt to route the broadcast address */
116 if ( dest
->s_addr
== INADDR_BROADCAST
)
119 /* Find first usable route in routing table */
120 list_for_each_entry ( miniroute
, &ipv4_miniroutes
, list
) {
121 if ( ! netdev_is_open ( miniroute
->netdev
) )
123 local
= ( ( ( dest
->s_addr
^ miniroute
->address
.s_addr
)
124 & miniroute
->netmask
.s_addr
) == 0 );
125 has_gw
= ( miniroute
->gateway
.s_addr
);
126 if ( local
|| has_gw
) {
128 *dest
= miniroute
->gateway
;
137 * Fragment reassembly counter timeout
139 * @v timer Retry timer
140 * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
142 static void ipv4_frag_expired ( struct retry_timer
*timer __unused
,
145 DBG ( "Fragment reassembly timeout" );
146 /* Free the fragment buffer */
151 * Free fragment buffer
153 * @v fragbug Fragment buffer
155 static void free_fragbuf ( struct frag_buffer
*fragbuf
) {
160 * Fragment reassembler
162 * @v iobuf I/O buffer, fragment of the datagram
163 * @ret frag_iob Reassembled packet, or NULL
165 static struct io_buffer
* ipv4_reassemble ( struct io_buffer
* iobuf
) {
166 struct iphdr
*iphdr
= iobuf
->data
;
167 struct frag_buffer
*fragbuf
;
170 * Check if the fragment belongs to any fragment series
172 list_for_each_entry ( fragbuf
, &frag_buffers
, list
) {
173 if ( fragbuf
->ident
== iphdr
->ident
&&
174 fragbuf
->src
.s_addr
== iphdr
->src
.s_addr
) {
176 * Check if the packet is the expected fragment
178 * The offset of the new packet must be equal to the
179 * length of the data accumulated so far (the length of
180 * the reassembled I/O buffer
182 if ( iob_len ( fragbuf
->frag_iob
) ==
183 ( iphdr
->frags
& IP_MASK_OFFSET
) ) {
185 * Append the contents of the fragment to the
186 * reassembled I/O buffer
188 iob_pull ( iobuf
, sizeof ( *iphdr
) );
189 memcpy ( iob_put ( fragbuf
->frag_iob
,
191 iobuf
->data
, iob_len ( iobuf
) );
194 /** Check if the fragment series is over */
195 if ( ! ( iphdr
->frags
& IP_MASK_MOREFRAGS
) ) {
196 iobuf
= fragbuf
->frag_iob
;
197 free_fragbuf ( fragbuf
);
202 /* Discard the fragment series */
203 free_fragbuf ( fragbuf
);
210 /** Check if the fragment is the first in the fragment series */
211 if ( iphdr
->frags
& IP_MASK_MOREFRAGS
&&
212 ( ( iphdr
->frags
& IP_MASK_OFFSET
) == 0 ) ) {
214 /** Create a new fragment buffer */
215 fragbuf
= ( struct frag_buffer
* ) malloc ( sizeof( *fragbuf
) );
216 fragbuf
->ident
= iphdr
->ident
;
217 fragbuf
->src
= iphdr
->src
;
219 /* Set up the reassembly I/O buffer */
220 fragbuf
->frag_iob
= alloc_iob ( IP_FRAG_IOB_SIZE
);
221 iob_pull ( iobuf
, sizeof ( *iphdr
) );
222 memcpy ( iob_put ( fragbuf
->frag_iob
, iob_len ( iobuf
) ),
223 iobuf
->data
, iob_len ( iobuf
) );
226 /* Set the reassembly timer */
227 timer_init ( &fragbuf
->frag_timer
, ipv4_frag_expired
);
228 start_timer_fixed ( &fragbuf
->frag_timer
, IP_FRAG_TIMEOUT
);
230 /* Add the fragment buffer to the list of fragment buffers */
231 list_add ( &fragbuf
->list
, &frag_buffers
);
238 * Add IPv4 pseudo-header checksum to existing checksum
240 * @v iobuf I/O buffer
241 * @v csum Existing checksum
242 * @ret csum Updated checksum
244 static uint16_t ipv4_pshdr_chksum ( struct io_buffer
*iobuf
, uint16_t csum
) {
245 struct ipv4_pseudo_header pshdr
;
246 struct iphdr
*iphdr
= iobuf
->data
;
247 size_t hdrlen
= ( ( iphdr
->verhdrlen
& IP_MASK_HLEN
) * 4 );
249 /* Build pseudo-header */
250 pshdr
.src
= iphdr
->src
;
251 pshdr
.dest
= iphdr
->dest
;
252 pshdr
.zero_padding
= 0x00;
253 pshdr
.protocol
= iphdr
->protocol
;
254 pshdr
.len
= htons ( iob_len ( iobuf
) - hdrlen
);
256 /* Update the checksum value */
257 return tcpip_continue_chksum ( csum
, &pshdr
, sizeof ( pshdr
) );
261 * Determine link-layer address
263 * @v dest IPv4 destination address
264 * @v src IPv4 source address
265 * @v netdev Network device
266 * @v ll_dest Link-layer destination address buffer
267 * @ret rc Return status code
269 static int ipv4_ll_addr ( struct in_addr dest
, struct in_addr src
,
270 struct net_device
*netdev
, uint8_t *ll_dest
) {
271 struct ll_protocol
*ll_protocol
= netdev
->ll_protocol
;
273 if ( dest
.s_addr
== INADDR_BROADCAST
) {
274 /* Broadcast address */
275 memcpy ( ll_dest
, netdev
->ll_broadcast
,
276 ll_protocol
->ll_addr_len
);
278 } else if ( IN_MULTICAST ( ntohl ( dest
.s_addr
) ) ) {
279 return ll_protocol
->mc_hash ( AF_INET
, &dest
, ll_dest
);
281 /* Unicast address: resolve via ARP */
282 return arp_resolve ( netdev
, &ipv4_protocol
, &dest
,
290 * @v iobuf I/O buffer
291 * @v tcpip Transport-layer protocol
292 * @v st_src Source network-layer address
293 * @v st_dest Destination network-layer address
294 * @v netdev Network device to use if no route found, or NULL
295 * @v trans_csum Transport-layer checksum to complete, or NULL
298 * This function expects a transport-layer segment and prepends the IP header
300 static int ipv4_tx ( struct io_buffer
*iobuf
,
301 struct tcpip_protocol
*tcpip_protocol
,
302 struct sockaddr_tcpip
*st_src
,
303 struct sockaddr_tcpip
*st_dest
,
304 struct net_device
*netdev
,
305 uint16_t *trans_csum
) {
306 struct iphdr
*iphdr
= iob_push ( iobuf
, sizeof ( *iphdr
) );
307 struct sockaddr_in
*sin_src
= ( ( struct sockaddr_in
* ) st_src
);
308 struct sockaddr_in
*sin_dest
= ( ( struct sockaddr_in
* ) st_dest
);
309 struct ipv4_miniroute
*miniroute
;
310 struct in_addr next_hop
;
311 uint8_t ll_dest
[MAX_LL_ADDR_LEN
];
314 /* Fill up the IP header, except source address */
315 memset ( iphdr
, 0, sizeof ( *iphdr
) );
316 iphdr
->verhdrlen
= ( IP_VER
| ( sizeof ( *iphdr
) / 4 ) );
317 iphdr
->service
= IP_TOS
;
318 iphdr
->len
= htons ( iob_len ( iobuf
) );
319 iphdr
->ident
= htons ( ++next_ident
);
321 iphdr
->protocol
= tcpip_protocol
->tcpip_proto
;
322 iphdr
->dest
= sin_dest
->sin_addr
;
324 /* Use routing table to identify next hop and transmitting netdev */
325 next_hop
= iphdr
->dest
;
327 iphdr
->src
= sin_src
->sin_addr
;
328 if ( ( next_hop
.s_addr
!= INADDR_BROADCAST
) &&
329 ( ! IN_MULTICAST ( ntohl ( next_hop
.s_addr
) ) ) &&
330 ( ( miniroute
= ipv4_route ( &next_hop
) ) != NULL
) ) {
331 iphdr
->src
= miniroute
->address
;
332 netdev
= miniroute
->netdev
;
335 DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr
->dest
) );
340 /* Determine link-layer destination address */
341 if ( ( rc
= ipv4_ll_addr ( next_hop
, iphdr
->src
, netdev
,
343 DBG ( "IPv4 has no link-layer address for %s: %s\n",
344 inet_ntoa ( next_hop
), strerror ( rc
) );
348 /* Fix up checksums */
350 *trans_csum
= ipv4_pshdr_chksum ( iobuf
, *trans_csum
);
351 iphdr
->chksum
= tcpip_chksum ( iphdr
, sizeof ( *iphdr
) );
353 /* Print IP4 header for debugging */
354 DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr
->src
) );
355 DBG ( "%s len %d proto %d id %04x csum %04x\n",
356 inet_ntoa ( iphdr
->dest
), ntohs ( iphdr
->len
), iphdr
->protocol
,
357 ntohs ( iphdr
->ident
), ntohs ( iphdr
->chksum
) );
359 /* Hand off to link layer */
360 if ( ( rc
= net_tx ( iobuf
, netdev
, &ipv4_protocol
, ll_dest
) ) != 0 ) {
361 DBG ( "IPv4 could not transmit packet via %s: %s\n",
362 netdev
->name
, strerror ( rc
) );
374 * Process incoming packets
376 * @v iobuf I/O buffer
377 * @v netdev Network device
378 * @v ll_source Link-layer destination source
380 * This function expects an IP4 network datagram. It processes the headers
381 * and sends it to the transport layer.
383 static int ipv4_rx ( struct io_buffer
*iobuf
, struct net_device
*netdev __unused
,
384 const void *ll_source __unused
) {
385 struct iphdr
*iphdr
= iobuf
->data
;
389 struct sockaddr_in sin
;
390 struct sockaddr_tcpip st
;
396 /* Sanity check the IPv4 header */
397 if ( iob_len ( iobuf
) < sizeof ( *iphdr
) ) {
398 DBG ( "IPv4 packet too short at %zd bytes (min %zd bytes)\n",
399 iob_len ( iobuf
), sizeof ( *iphdr
) );
402 if ( ( iphdr
->verhdrlen
& IP_MASK_VER
) != IP_VER
) {
403 DBG ( "IPv4 version %#02x not supported\n", iphdr
->verhdrlen
);
406 hdrlen
= ( ( iphdr
->verhdrlen
& IP_MASK_HLEN
) * 4 );
407 if ( hdrlen
< sizeof ( *iphdr
) ) {
408 DBG ( "IPv4 header too short at %zd bytes (min %zd bytes)\n",
409 hdrlen
, sizeof ( *iphdr
) );
412 if ( hdrlen
> iob_len ( iobuf
) ) {
413 DBG ( "IPv4 header too long at %zd bytes "
414 "(packet is %zd bytes)\n", hdrlen
, iob_len ( iobuf
) );
417 if ( ( csum
= tcpip_chksum ( iphdr
, hdrlen
) ) != 0 ) {
418 DBG ( "IPv4 checksum incorrect (is %04x including checksum "
419 "field, should be 0000)\n", csum
);
422 len
= ntohs ( iphdr
->len
);
423 if ( len
< hdrlen
) {
424 DBG ( "IPv4 length too short at %zd bytes "
425 "(header is %zd bytes)\n", len
, hdrlen
);
428 if ( len
> iob_len ( iobuf
) ) {
429 DBG ( "IPv4 length too long at %zd bytes "
430 "(packet is %zd bytes)\n", len
, iob_len ( iobuf
) );
434 /* Print IPv4 header for debugging */
435 DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr
->dest
) );
436 DBG ( "%s len %d proto %d id %04x csum %04x\n",
437 inet_ntoa ( iphdr
->src
), ntohs ( iphdr
->len
), iphdr
->protocol
,
438 ntohs ( iphdr
->ident
), ntohs ( iphdr
->chksum
) );
440 /* Truncate packet to correct length, calculate pseudo-header
441 * checksum and then strip off the IPv4 header.
443 iob_unput ( iobuf
, ( iob_len ( iobuf
) - len
) );
444 pshdr_csum
= ipv4_pshdr_chksum ( iobuf
, TCPIP_EMPTY_CSUM
);
445 iob_pull ( iobuf
, hdrlen
);
447 /* Fragment reassembly */
448 if ( ( iphdr
->frags
& htons ( IP_MASK_MOREFRAGS
) ) ||
449 ( ( iphdr
->frags
& htons ( IP_MASK_OFFSET
) ) != 0 ) ) {
450 /* Pass the fragment to ipv4_reassemble() which either
451 * returns a fully reassembled I/O buffer or NULL.
453 iobuf
= ipv4_reassemble ( iobuf
);
458 /* Construct socket addresses and hand off to transport layer */
459 memset ( &src
, 0, sizeof ( src
) );
460 src
.sin
.sin_family
= AF_INET
;
461 src
.sin
.sin_addr
= iphdr
->src
;
462 memset ( &dest
, 0, sizeof ( dest
) );
463 dest
.sin
.sin_family
= AF_INET
;
464 dest
.sin
.sin_addr
= iphdr
->dest
;
465 if ( ( rc
= tcpip_rx ( iobuf
, iphdr
->protocol
, &src
.st
,
466 &dest
.st
, pshdr_csum
) ) != 0 ) {
467 DBG ( "IPv4 received packet rejected by stack: %s\n",
480 * Check existence of IPv4 address for ARP
482 * @v netdev Network device
483 * @v net_addr Network-layer address
484 * @ret rc Return status code
486 static int ipv4_arp_check ( struct net_device
*netdev
, const void *net_addr
) {
487 const struct in_addr
*address
= net_addr
;
488 struct ipv4_miniroute
*miniroute
;
490 list_for_each_entry ( miniroute
, &ipv4_miniroutes
, list
) {
491 if ( ( miniroute
->netdev
== netdev
) &&
492 ( miniroute
->address
.s_addr
== address
->s_addr
) ) {
493 /* Found matching address */
501 * Convert IPv4 address to dotted-quad notation
504 * @ret string IP address in dotted-quad notation
506 char * inet_ntoa ( struct in_addr in
) {
507 static char buf
[16]; /* "xxx.xxx.xxx.xxx" */
508 uint8_t *bytes
= ( uint8_t * ) &in
;
510 sprintf ( buf
, "%d.%d.%d.%d", bytes
[0], bytes
[1], bytes
[2], bytes
[3] );
515 * Transcribe IP address
517 * @v net_addr IP address
518 * @ret string IP address in dotted-quad notation
521 static const char * ipv4_ntoa ( const void *net_addr
) {
522 return inet_ntoa ( * ( ( struct in_addr
* ) net_addr
) );
526 struct net_protocol ipv4_protocol __net_protocol
= {
528 .net_proto
= htons ( ETH_P_IP
),
529 .net_addr_len
= sizeof ( struct in_addr
),
534 /** IPv4 TCPIP net protocol */
535 struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol
= {
537 .sa_family
= AF_INET
,
541 /** IPv4 ARP protocol */
542 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol
= {
543 .net_protocol
= &ipv4_protocol
,
544 .check
= ipv4_arp_check
,
547 /******************************************************************************
551 ******************************************************************************
554 /** IPv4 address setting */
555 struct setting ip_setting __setting
= {
557 .description
= "IPv4 address",
558 .tag
= DHCP_EB_YIADDR
,
559 .type
= &setting_type_ipv4
,
562 /** IPv4 subnet mask setting */
563 struct setting netmask_setting __setting
= {
565 .description
= "IPv4 subnet mask",
566 .tag
= DHCP_SUBNET_MASK
,
567 .type
= &setting_type_ipv4
,
570 /** Default gateway setting */
571 struct setting gateway_setting __setting
= {
573 .description
= "Default gateway",
575 .type
= &setting_type_ipv4
,
579 * Create IPv4 routing table based on configured settings
581 * @ret rc Return status code
583 static int ipv4_create_routes ( void ) {
584 struct ipv4_miniroute
*miniroute
;
585 struct ipv4_miniroute
*tmp
;
586 struct net_device
*netdev
;
587 struct settings
*settings
;
588 struct in_addr address
= { 0 };
589 struct in_addr netmask
= { 0 };
590 struct in_addr gateway
= { 0 };
592 /* Delete all existing routes */
593 list_for_each_entry_safe ( miniroute
, tmp
, &ipv4_miniroutes
, list
)
594 del_ipv4_miniroute ( miniroute
);
596 /* Create a route for each configured network device */
597 for_each_netdev ( netdev
) {
598 settings
= netdev_settings ( netdev
);
599 /* Get IPv4 address */
601 fetch_ipv4_setting ( settings
, &ip_setting
, &address
);
602 if ( ! address
.s_addr
)
604 /* Get subnet mask */
605 fetch_ipv4_setting ( settings
, &netmask_setting
, &netmask
);
606 /* Calculate default netmask, if necessary */
607 if ( ! netmask
.s_addr
) {
608 if ( IN_CLASSA ( ntohl ( address
.s_addr
) ) ) {
609 netmask
.s_addr
= htonl ( IN_CLASSA_NET
);
610 } else if ( IN_CLASSB ( ntohl ( address
.s_addr
) ) ) {
611 netmask
.s_addr
= htonl ( IN_CLASSB_NET
);
612 } else if ( IN_CLASSC ( ntohl ( address
.s_addr
) ) ) {
613 netmask
.s_addr
= htonl ( IN_CLASSC_NET
);
616 /* Get default gateway, if present */
617 fetch_ipv4_setting ( settings
, &gateway_setting
, &gateway
);
618 /* Configure route */
619 miniroute
= add_ipv4_miniroute ( netdev
, address
,
628 /** IPv4 settings applicator */
629 struct settings_applicator ipv4_settings_applicator __settings_applicator
= {
630 .apply
= ipv4_create_routes
,
634 REQUIRE_OBJECT ( icmp
);