10 #include <gpxe/if_ether.h>
11 #include <gpxe/iobuf.h>
12 #include <gpxe/netdevice.h>
14 #include <gpxe/tcpip.h>
22 /* Unique IP datagram identification number */
23 static uint16_t next_ident
= 0;
25 struct net_protocol ipv4_protocol
;
27 /** List of IPv4 miniroutes */
28 struct list_head ipv4_miniroutes
= LIST_HEAD_INIT ( ipv4_miniroutes
);
30 /** List of fragment reassembly buffers */
31 static LIST_HEAD ( frag_buffers
);
34 * Add IPv4 minirouting table entry
36 * @v netdev Network device
37 * @v address IPv4 address
38 * @v netmask Subnet mask
39 * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
40 * @ret miniroute Routing table entry, or NULL
42 static struct ipv4_miniroute
* __malloc
43 add_ipv4_miniroute ( struct net_device
*netdev
, struct in_addr address
,
44 struct in_addr netmask
, struct in_addr gateway
) {
45 struct ipv4_miniroute
*miniroute
;
47 DBG ( "IPv4 add %s", inet_ntoa ( address
) );
48 DBG ( "/%s ", inet_ntoa ( netmask
) );
49 if ( gateway
.s_addr
!= INADDR_NONE
)
50 DBG ( "gw %s ", inet_ntoa ( gateway
) );
51 DBG ( "via %s\n", netdev
->name
);
53 /* Allocate and populate miniroute structure */
54 miniroute
= malloc ( sizeof ( *miniroute
) );
56 DBG ( "IPv4 could not add miniroute\n" );
60 /* Record routing information */
61 miniroute
->netdev
= netdev_get ( netdev
);
62 miniroute
->address
= address
;
63 miniroute
->netmask
= netmask
;
64 miniroute
->gateway
= gateway
;
66 /* Add to end of list if we have a gateway, otherwise
69 if ( gateway
.s_addr
!= INADDR_NONE
) {
70 list_add_tail ( &miniroute
->list
, &ipv4_miniroutes
);
72 list_add ( &miniroute
->list
, &ipv4_miniroutes
);
79 * Delete IPv4 minirouting table entry
81 * @v miniroute Routing table entry
83 static void del_ipv4_miniroute ( struct ipv4_miniroute
*miniroute
) {
85 DBG ( "IPv4 del %s", inet_ntoa ( miniroute
->address
) );
86 DBG ( "/%s ", inet_ntoa ( miniroute
->netmask
) );
87 if ( miniroute
->gateway
.s_addr
!= INADDR_NONE
)
88 DBG ( "gw %s ", inet_ntoa ( miniroute
->gateway
) );
89 DBG ( "via %s\n", miniroute
->netdev
->name
);
91 netdev_put ( miniroute
->netdev
);
92 list_del ( &miniroute
->list
);
99 * @v netdev Network device
100 * @v address IPv4 address
101 * @v netmask Subnet mask
102 * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
103 * @ret rc Return status code
106 int add_ipv4_address ( struct net_device
*netdev
, struct in_addr address
,
107 struct in_addr netmask
, struct in_addr gateway
) {
108 struct ipv4_miniroute
*miniroute
;
110 /* Clear any existing address for this net device */
111 del_ipv4_address ( netdev
);
113 /* Add new miniroute */
114 miniroute
= add_ipv4_miniroute ( netdev
, address
, netmask
, gateway
);
122 * Remove IPv4 interface
124 * @v netdev Network device
126 void del_ipv4_address ( struct net_device
*netdev
) {
127 struct ipv4_miniroute
*miniroute
;
129 list_for_each_entry ( miniroute
, &ipv4_miniroutes
, list
) {
130 if ( miniroute
->netdev
== netdev
) {
131 del_ipv4_miniroute ( miniroute
);
138 * Perform IPv4 routing
140 * @v dest Final destination address
141 * @ret dest Next hop destination address
142 * @ret miniroute Routing table entry to use, or NULL if no route
144 * If the route requires use of a gateway, the next hop destination
145 * address will be overwritten with the gateway address.
147 static struct ipv4_miniroute
* ipv4_route ( struct in_addr
*dest
) {
148 struct ipv4_miniroute
*miniroute
;
152 /* Never attempt to route the broadcast address */
153 if ( dest
->s_addr
== INADDR_BROADCAST
)
156 /* Find first usable route in routing table */
157 list_for_each_entry ( miniroute
, &ipv4_miniroutes
, list
) {
158 local
= ( ( ( dest
->s_addr
^ miniroute
->address
.s_addr
)
159 & miniroute
->netmask
.s_addr
) == 0 );
160 has_gw
= ( miniroute
->gateway
.s_addr
!= INADDR_NONE
);
161 if ( local
|| has_gw
) {
163 *dest
= miniroute
->gateway
;
172 * Fragment reassembly counter timeout
174 * @v timer Retry timer
175 * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
177 static void ipv4_frag_expired ( struct retry_timer
*timer __unused
,
180 DBG ( "Fragment reassembly timeout" );
181 /* Free the fragment buffer */
186 * Free fragment buffer
188 * @v fragbug Fragment buffer
190 static void free_fragbuf ( struct frag_buffer
*fragbuf
) {
195 * Fragment reassembler
197 * @v iobuf I/O buffer, fragment of the datagram
198 * @ret frag_iob Reassembled packet, or NULL
200 static struct io_buffer
* ipv4_reassemble ( struct io_buffer
* iobuf
) {
201 struct iphdr
*iphdr
= iobuf
->data
;
202 struct frag_buffer
*fragbuf
;
205 * Check if the fragment belongs to any fragment series
207 list_for_each_entry ( fragbuf
, &frag_buffers
, list
) {
208 if ( fragbuf
->ident
== iphdr
->ident
&&
209 fragbuf
->src
.s_addr
== iphdr
->src
.s_addr
) {
211 * Check if the packet is the expected fragment
213 * The offset of the new packet must be equal to the
214 * length of the data accumulated so far (the length of
215 * the reassembled I/O buffer
217 if ( iob_len ( fragbuf
->frag_iob
) ==
218 ( iphdr
->frags
& IP_MASK_OFFSET
) ) {
220 * Append the contents of the fragment to the
221 * reassembled I/O buffer
223 iob_pull ( iobuf
, sizeof ( *iphdr
) );
224 memcpy ( iob_put ( fragbuf
->frag_iob
,
226 iobuf
->data
, iob_len ( iobuf
) );
229 /** Check if the fragment series is over */
230 if ( !iphdr
->frags
& IP_MASK_MOREFRAGS
) {
231 iobuf
= fragbuf
->frag_iob
;
232 free_fragbuf ( fragbuf
);
237 /* Discard the fragment series */
238 free_fragbuf ( fragbuf
);
245 /** Check if the fragment is the first in the fragment series */
246 if ( iphdr
->frags
& IP_MASK_MOREFRAGS
&&
247 ( ( iphdr
->frags
& IP_MASK_OFFSET
) == 0 ) ) {
249 /** Create a new fragment buffer */
250 fragbuf
= ( struct frag_buffer
* ) malloc ( sizeof( *fragbuf
) );
251 fragbuf
->ident
= iphdr
->ident
;
252 fragbuf
->src
= iphdr
->src
;
254 /* Set up the reassembly I/O buffer */
255 fragbuf
->frag_iob
= alloc_iob ( IP_FRAG_IOB_SIZE
);
256 iob_pull ( iobuf
, sizeof ( *iphdr
) );
257 memcpy ( iob_put ( fragbuf
->frag_iob
, iob_len ( iobuf
) ),
258 iobuf
->data
, iob_len ( iobuf
) );
261 /* Set the reassembly timer */
262 fragbuf
->frag_timer
.timeout
= IP_FRAG_TIMEOUT
;
263 fragbuf
->frag_timer
.expired
= ipv4_frag_expired
;
264 start_timer ( &fragbuf
->frag_timer
);
266 /* Add the fragment buffer to the list of fragment buffers */
267 list_add ( &fragbuf
->list
, &frag_buffers
);
274 * Add IPv4 pseudo-header checksum to existing checksum
276 * @v iobuf I/O buffer
277 * @v csum Existing checksum
278 * @ret csum Updated checksum
280 static uint16_t ipv4_pshdr_chksum ( struct io_buffer
*iobuf
, uint16_t csum
) {
281 struct ipv4_pseudo_header pshdr
;
282 struct iphdr
*iphdr
= iobuf
->data
;
283 size_t hdrlen
= ( ( iphdr
->verhdrlen
& IP_MASK_HLEN
) * 4 );
285 /* Build pseudo-header */
286 pshdr
.src
= iphdr
->src
;
287 pshdr
.dest
= iphdr
->dest
;
288 pshdr
.zero_padding
= 0x00;
289 pshdr
.protocol
= iphdr
->protocol
;
290 pshdr
.len
= htons ( iob_len ( iobuf
) - hdrlen
);
292 /* Update the checksum value */
293 return tcpip_continue_chksum ( csum
, &pshdr
, sizeof ( pshdr
) );
297 * Determine link-layer address
299 * @v dest IPv4 destination address
300 * @v src IPv4 source address
301 * @v netdev Network device
302 * @v ll_dest Link-layer destination address buffer
303 * @ret rc Return status code
305 static int ipv4_ll_addr ( struct in_addr dest
, struct in_addr src
,
306 struct net_device
*netdev
, uint8_t *ll_dest
) {
307 struct ll_protocol
*ll_protocol
= netdev
->ll_protocol
;
308 uint8_t *dest_bytes
= ( ( uint8_t * ) &dest
);
310 if ( dest
.s_addr
== INADDR_BROADCAST
) {
311 /* Broadcast address */
312 memcpy ( ll_dest
, ll_protocol
->ll_broadcast
,
313 ll_protocol
->ll_addr_len
);
315 } else if ( IN_MULTICAST ( dest
.s_addr
) ) {
316 /* Special case: IPv4 multicast over Ethernet. This
317 * code may need to be generalised once we find out
318 * what happens for other link layers.
323 ll_dest
[3] = dest_bytes
[1] & 0x7f;
324 ll_dest
[4] = dest_bytes
[2];
325 ll_dest
[5] = dest_bytes
[3];
328 /* Unicast address: resolve via ARP */
329 return arp_resolve ( netdev
, &ipv4_protocol
, &dest
,
337 * @v iobuf I/O buffer
338 * @v tcpip Transport-layer protocol
339 * @v st_dest Destination network-layer address
340 * @v netdev Network device to use if no route found, or NULL
341 * @v trans_csum Transport-layer checksum to complete, or NULL
344 * This function expects a transport-layer segment and prepends the IP header
346 static int ipv4_tx ( struct io_buffer
*iobuf
,
347 struct tcpip_protocol
*tcpip_protocol
,
348 struct sockaddr_tcpip
*st_dest
,
349 struct net_device
*netdev
,
350 uint16_t *trans_csum
) {
351 struct iphdr
*iphdr
= iob_push ( iobuf
, sizeof ( *iphdr
) );
352 struct sockaddr_in
*sin_dest
= ( ( struct sockaddr_in
* ) st_dest
);
353 struct ipv4_miniroute
*miniroute
;
354 struct in_addr next_hop
;
355 uint8_t ll_dest
[MAX_LL_ADDR_LEN
];
358 /* Fill up the IP header, except source address */
359 memset ( iphdr
, 0, sizeof ( *iphdr
) );
360 iphdr
->verhdrlen
= ( IP_VER
| ( sizeof ( *iphdr
) / 4 ) );
361 iphdr
->service
= IP_TOS
;
362 iphdr
->len
= htons ( iob_len ( iobuf
) );
363 iphdr
->ident
= htons ( ++next_ident
);
365 iphdr
->protocol
= tcpip_protocol
->tcpip_proto
;
366 iphdr
->dest
= sin_dest
->sin_addr
;
368 /* Use routing table to identify next hop and transmitting netdev */
369 next_hop
= iphdr
->dest
;
370 if ( ( miniroute
= ipv4_route ( &next_hop
) ) ) {
371 iphdr
->src
= miniroute
->address
;
372 netdev
= miniroute
->netdev
;
375 DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr
->dest
) );
380 /* Determine link-layer destination address */
381 if ( ( rc
= ipv4_ll_addr ( next_hop
, iphdr
->src
, netdev
,
383 DBG ( "IPv4 has no link-layer address for %s: %s\n",
384 inet_ntoa ( next_hop
), strerror ( rc
) );
388 /* Fix up checksums */
390 *trans_csum
= ipv4_pshdr_chksum ( iobuf
, *trans_csum
);
391 iphdr
->chksum
= tcpip_chksum ( iphdr
, sizeof ( *iphdr
) );
393 /* Print IP4 header for debugging */
394 DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr
->src
) );
395 DBG ( "%s len %d proto %d id %04x csum %04x\n",
396 inet_ntoa ( iphdr
->dest
), ntohs ( iphdr
->len
), iphdr
->protocol
,
397 ntohs ( iphdr
->ident
), ntohs ( iphdr
->chksum
) );
399 /* Hand off to link layer */
400 if ( ( rc
= net_tx ( iobuf
, netdev
, &ipv4_protocol
, ll_dest
) ) != 0 ) {
401 DBG ( "IPv4 could not transmit packet via %s: %s\n",
402 netdev
->name
, strerror ( rc
) );
414 * Process incoming packets
416 * @v iobuf I/O buffer
417 * @v netdev Network device
418 * @v ll_source Link-layer destination source
420 * This function expects an IP4 network datagram. It processes the headers
421 * and sends it to the transport layer.
423 static int ipv4_rx ( struct io_buffer
*iobuf
, struct net_device
*netdev __unused
,
424 const void *ll_source __unused
) {
425 struct iphdr
*iphdr
= iobuf
->data
;
429 struct sockaddr_in sin
;
430 struct sockaddr_tcpip st
;
436 /* Sanity check the IPv4 header */
437 if ( iob_len ( iobuf
) < sizeof ( *iphdr
) ) {
438 DBG ( "IPv4 packet too short at %d bytes (min %d bytes)\n",
439 iob_len ( iobuf
), sizeof ( *iphdr
) );
442 if ( ( iphdr
->verhdrlen
& IP_MASK_VER
) != IP_VER
) {
443 DBG ( "IPv4 version %#02x not supported\n", iphdr
->verhdrlen
);
446 hdrlen
= ( ( iphdr
->verhdrlen
& IP_MASK_HLEN
) * 4 );
447 if ( hdrlen
< sizeof ( *iphdr
) ) {
448 DBG ( "IPv4 header too short at %d bytes (min %d bytes)\n",
449 hdrlen
, sizeof ( *iphdr
) );
452 if ( hdrlen
> iob_len ( iobuf
) ) {
453 DBG ( "IPv4 header too long at %d bytes "
454 "(packet is %d bytes)\n", hdrlen
, iob_len ( iobuf
) );
457 if ( ( csum
= tcpip_chksum ( iphdr
, hdrlen
) ) != 0 ) {
458 DBG ( "IPv4 checksum incorrect (is %04x including checksum "
459 "field, should be 0000)\n", csum
);
462 len
= ntohs ( iphdr
->len
);
463 if ( len
< hdrlen
) {
464 DBG ( "IPv4 length too short at %d bytes "
465 "(header is %d bytes)\n", len
, hdrlen
);
468 if ( len
> iob_len ( iobuf
) ) {
469 DBG ( "IPv4 length too long at %d bytes "
470 "(packet is %d bytes)\n", len
, iob_len ( iobuf
) );
474 /* Print IPv4 header for debugging */
475 DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr
->dest
) );
476 DBG ( "%s len %d proto %d id %04x csum %04x\n",
477 inet_ntoa ( iphdr
->src
), ntohs ( iphdr
->len
), iphdr
->protocol
,
478 ntohs ( iphdr
->ident
), ntohs ( iphdr
->chksum
) );
480 /* Truncate packet to correct length, calculate pseudo-header
481 * checksum and then strip off the IPv4 header.
483 iob_unput ( iobuf
, ( iob_len ( iobuf
) - len
) );
484 pshdr_csum
= ipv4_pshdr_chksum ( iobuf
, TCPIP_EMPTY_CSUM
);
485 iob_pull ( iobuf
, hdrlen
);
487 /* Fragment reassembly */
488 if ( ( iphdr
->frags
& htons ( IP_MASK_MOREFRAGS
) ) ||
489 ( ( iphdr
->frags
& htons ( IP_MASK_OFFSET
) ) != 0 ) ) {
490 /* Pass the fragment to ipv4_reassemble() which either
491 * returns a fully reassembled I/O buffer or NULL.
493 iobuf
= ipv4_reassemble ( iobuf
);
498 /* Construct socket addresses and hand off to transport layer */
499 memset ( &src
, 0, sizeof ( src
) );
500 src
.sin
.sin_family
= AF_INET
;
501 src
.sin
.sin_addr
= iphdr
->src
;
502 memset ( &dest
, 0, sizeof ( dest
) );
503 dest
.sin
.sin_family
= AF_INET
;
504 dest
.sin
.sin_addr
= iphdr
->dest
;
505 if ( ( rc
= tcpip_rx ( iobuf
, iphdr
->protocol
, &src
.st
,
506 &dest
.st
, pshdr_csum
) ) != 0 ) {
507 DBG ( "IPv4 received packet rejected by stack: %s\n",
520 * Check existence of IPv4 address for ARP
522 * @v netdev Network device
523 * @v net_addr Network-layer address
524 * @ret rc Return status code
526 static int ipv4_arp_check ( struct net_device
*netdev
, const void *net_addr
) {
527 const struct in_addr
*address
= net_addr
;
528 struct ipv4_miniroute
*miniroute
;
530 list_for_each_entry ( miniroute
, &ipv4_miniroutes
, list
) {
531 if ( ( miniroute
->netdev
== netdev
) &&
532 ( miniroute
->address
.s_addr
== address
->s_addr
) ) {
533 /* Found matching address */
541 * Convert IPv4 address to dotted-quad notation
544 * @ret string IP address in dotted-quad notation
546 char * inet_ntoa ( struct in_addr in
) {
547 static char buf
[16]; /* "xxx.xxx.xxx.xxx" */
548 uint8_t *bytes
= ( uint8_t * ) &in
;
550 sprintf ( buf
, "%d.%d.%d.%d", bytes
[0], bytes
[1], bytes
[2], bytes
[3] );
555 * Transcribe IP address
557 * @v net_addr IP address
558 * @ret string IP address in dotted-quad notation
561 static const char * ipv4_ntoa ( const void *net_addr
) {
562 return inet_ntoa ( * ( ( struct in_addr
* ) net_addr
) );
566 struct net_protocol ipv4_protocol __net_protocol
= {
568 .net_proto
= htons ( ETH_P_IP
),
569 .net_addr_len
= sizeof ( struct in_addr
),
574 /** IPv4 TCPIP net protocol */
575 struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol
= {
577 .sa_family
= AF_INET
,
581 /** IPv4 ARP protocol */
582 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol
= {
583 .net_protocol
= &ipv4_protocol
,
584 .check
= ipv4_arp_check
,