1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2017 Cavium, Inc.
5 #include <linux/netlink.h>
6 #include <linux/rtnetlink.h>
13 #include <sys/socket.h>
16 #include <arpa/inet.h>
21 #include <sys/ioctl.h>
22 #include <sys/syscall.h>
24 #include <bpf/libbpf.h>
28 #include "xdp_sample_user.h"
29 #include "xdp_router_ipv4.skel.h"
31 static const char *__doc__
=
32 "XDP IPv4 router implementation\n"
33 "Usage: xdp_router_ipv4 <IFNAME-0> ... <IFNAME-N>\n";
35 static char buf
[8192];
36 static int lpm_map_fd
;
37 static int arp_table_map_fd
;
38 static int exact_match_map_fd
;
39 static int tx_port_map_fd
;
41 static bool routes_thread_exit
;
42 static int interval
= 5;
44 static int mask
= SAMPLE_RX_CNT
| SAMPLE_REDIRECT_ERR_MAP_CNT
|
45 SAMPLE_DEVMAP_XMIT_CNT_MULTI
| SAMPLE_EXCEPTION_CNT
;
47 DEFINE_SAMPLE_INIT(xdp_router_ipv4
);
49 static const struct option long_options
[] = {
50 { "help", no_argument
, NULL
, 'h' },
51 { "skb-mode", no_argument
, NULL
, 'S' },
52 { "force", no_argument
, NULL
, 'F' },
53 { "interval", required_argument
, NULL
, 'i' },
54 { "verbose", no_argument
, NULL
, 'v' },
55 { "stats", no_argument
, NULL
, 's' },
59 static int get_route_table(int rtm_family
);
61 static int recv_msg(struct sockaddr_nl sock_addr
, int sock
)
69 len
= recv(sock
, buf_ptr
, sizeof(buf
) - nll
, 0);
73 nh
= (struct nlmsghdr
*)buf_ptr
;
75 if (nh
->nlmsg_type
== NLMSG_DONE
)
79 if ((sock_addr
.nl_groups
& RTMGRP_NEIGH
) == RTMGRP_NEIGH
)
82 if ((sock_addr
.nl_groups
& RTMGRP_IPV4_ROUTE
) == RTMGRP_IPV4_ROUTE
)
88 /* Function to parse the route entry returned by netlink
89 * Updates the route entry related map entries
91 static void read_route(struct nlmsghdr
*nh
, int nll
)
93 char dsts
[24], gws
[24], ifs
[16], dsts_len
[24], metrics
[24];
94 struct bpf_lpm_trie_key_u8
*prefix_key
;
95 struct rtattr
*rt_attr
;
101 int dst_len
, iface
, metric
;
111 struct arp_table arp
;
116 memset(&route
, 0, sizeof(route
));
117 for (; NLMSG_OK(nh
, nll
); nh
= NLMSG_NEXT(nh
, nll
)) {
118 rt_msg
= (struct rtmsg
*)NLMSG_DATA(nh
);
119 rtm_family
= rt_msg
->rtm_family
;
120 if (rtm_family
== AF_INET
)
121 if (rt_msg
->rtm_table
!= RT_TABLE_MAIN
)
123 rt_attr
= (struct rtattr
*)RTM_RTA(rt_msg
);
124 rtl
= RTM_PAYLOAD(nh
);
126 for (; RTA_OK(rt_attr
, rtl
); rt_attr
= RTA_NEXT(rt_attr
, rtl
)) {
127 switch (rt_attr
->rta_type
) {
130 (*((__be32
*)RTA_DATA(rt_attr
))));
134 *((__be32
*)RTA_DATA(rt_attr
)));
138 *((int *)RTA_DATA(rt_attr
)));
141 sprintf(metrics
, "%u",
142 *((int *)RTA_DATA(rt_attr
)));
147 sprintf(dsts_len
, "%d", rt_msg
->rtm_dst_len
);
148 route
.dst
= atoi(dsts
);
149 route
.dst_len
= atoi(dsts_len
);
150 route
.gw
= atoi(gws
);
151 route
.iface
= atoi(ifs
);
152 route
.metric
= atoi(metrics
);
153 assert(get_mac_addr(route
.iface
, &route
.mac
) == 0);
154 assert(bpf_map_update_elem(tx_port_map_fd
,
155 &route
.iface
, &route
.iface
, 0) == 0);
156 if (rtm_family
== AF_INET
) {
165 prefix_key
= alloca(sizeof(*prefix_key
) + 4);
166 prefix_value
= alloca(sizeof(*prefix_value
));
168 prefix_key
->prefixlen
= 32;
169 prefix_key
->prefixlen
= route
.dst_len
;
170 direct_entry
.mac
= route
.mac
& 0xffffffffffff;
171 direct_entry
.ifindex
= route
.iface
;
172 direct_entry
.arp
.mac
= 0;
173 direct_entry
.arp
.dst
= 0;
174 if (route
.dst_len
== 32) {
175 if (nh
->nlmsg_type
== RTM_DELROUTE
) {
176 assert(bpf_map_delete_elem(exact_match_map_fd
,
179 if (bpf_map_lookup_elem(arp_table_map_fd
,
181 &direct_entry
.arp
.mac
) == 0)
182 direct_entry
.arp
.dst
= route
.dst
;
183 assert(bpf_map_update_elem(exact_match_map_fd
,
185 &direct_entry
, 0) == 0);
188 for (i
= 0; i
< 4; i
++)
189 prefix_key
->data
[i
] = (route
.dst
>> i
* 8) & 0xff;
191 if (bpf_map_lookup_elem(lpm_map_fd
, prefix_key
,
193 for (i
= 0; i
< 4; i
++)
194 prefix_value
->prefix
[i
] = prefix_key
->data
[i
];
195 prefix_value
->value
= route
.mac
& 0xffffffffffff;
196 prefix_value
->ifindex
= route
.iface
;
197 prefix_value
->gw
= route
.gw
;
198 prefix_value
->metric
= route
.metric
;
200 assert(bpf_map_update_elem(lpm_map_fd
,
205 if (nh
->nlmsg_type
== RTM_DELROUTE
) {
206 assert(bpf_map_delete_elem(lpm_map_fd
,
209 /* Rereading the route table to check if
210 * there is an entry with the same
211 * prefix but a different metric as the
214 get_route_table(AF_INET
);
215 } else if (prefix_key
->data
[0] ==
216 prefix_value
->prefix
[0] &&
217 prefix_key
->data
[1] ==
218 prefix_value
->prefix
[1] &&
219 prefix_key
->data
[2] ==
220 prefix_value
->prefix
[2] &&
221 prefix_key
->data
[3] ==
222 prefix_value
->prefix
[3] &&
223 route
.metric
>= prefix_value
->metric
) {
226 for (i
= 0; i
< 4; i
++)
227 prefix_value
->prefix
[i
] =
229 prefix_value
->value
=
230 route
.mac
& 0xffffffffffff;
231 prefix_value
->ifindex
= route
.iface
;
232 prefix_value
->gw
= route
.gw
;
233 prefix_value
->metric
= route
.metric
;
234 assert(bpf_map_update_elem(lpm_map_fd
,
241 memset(&route
, 0, sizeof(route
));
242 memset(dsts
, 0, sizeof(dsts
));
243 memset(dsts_len
, 0, sizeof(dsts_len
));
244 memset(gws
, 0, sizeof(gws
));
245 memset(ifs
, 0, sizeof(ifs
));
246 memset(&route
, 0, sizeof(route
));
250 /* Function to read the existing route table when the process is launched*/
251 static int get_route_table(int rtm_family
)
253 struct sockaddr_nl sa
;
267 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
269 fprintf(stderr
, "open netlink socket: %s\n", strerror(errno
));
272 memset(&sa
, 0, sizeof(sa
));
273 sa
.nl_family
= AF_NETLINK
;
274 if (bind(sock
, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0) {
275 fprintf(stderr
, "bind netlink socket: %s\n", strerror(errno
));
279 memset(&req
, 0, sizeof(req
));
280 req
.nl
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
281 req
.nl
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
282 req
.nl
.nlmsg_type
= RTM_GETROUTE
;
284 req
.rt
.rtm_family
= rtm_family
;
285 req
.rt
.rtm_table
= RT_TABLE_MAIN
;
286 req
.nl
.nlmsg_pid
= 0;
287 req
.nl
.nlmsg_seq
= ++seq
;
288 memset(&msg
, 0, sizeof(msg
));
289 iov
.iov_base
= (void *)&req
.nl
;
290 iov
.iov_len
= req
.nl
.nlmsg_len
;
293 ret
= sendmsg(sock
, &msg
, 0);
295 fprintf(stderr
, "send to netlink: %s\n", strerror(errno
));
299 memset(buf
, 0, sizeof(buf
));
300 nll
= recv_msg(sa
, sock
);
302 fprintf(stderr
, "recv from netlink: %s\n", strerror(nll
));
306 nh
= (struct nlmsghdr
*)buf
;
313 /* Function to parse the arp entry returned by netlink
314 * Updates the arp entry related map entries
316 static void read_arp(struct nlmsghdr
*nh
, int nll
)
318 struct rtattr
*rt_attr
;
319 char dsts
[24], mac
[24];
320 struct ndmsg
*rt_msg
;
328 struct arp_table arp
;
333 for (; NLMSG_OK(nh
, nll
); nh
= NLMSG_NEXT(nh
, nll
)) {
334 rt_msg
= (struct ndmsg
*)NLMSG_DATA(nh
);
335 rt_attr
= (struct rtattr
*)RTM_RTA(rt_msg
);
336 ndm_family
= rt_msg
->ndm_family
;
337 rtl
= RTM_PAYLOAD(nh
);
338 for (; RTA_OK(rt_attr
, rtl
); rt_attr
= RTA_NEXT(rt_attr
, rtl
)) {
339 switch (rt_attr
->rta_type
) {
342 *((__be32
*)RTA_DATA(rt_attr
)));
346 *((__be64
*)RTA_DATA(rt_attr
)));
352 arp_entry
.dst
= atoi(dsts
);
353 arp_entry
.mac
= atol(mac
);
355 if (ndm_family
== AF_INET
) {
356 if (bpf_map_lookup_elem(exact_match_map_fd
,
358 &direct_entry
) == 0) {
359 if (nh
->nlmsg_type
== RTM_DELNEIGH
) {
360 direct_entry
.arp
.dst
= 0;
361 direct_entry
.arp
.mac
= 0;
362 } else if (nh
->nlmsg_type
== RTM_NEWNEIGH
) {
363 direct_entry
.arp
.dst
= arp_entry
.dst
;
364 direct_entry
.arp
.mac
= arp_entry
.mac
;
366 assert(bpf_map_update_elem(exact_match_map_fd
,
370 memset(&direct_entry
, 0, sizeof(direct_entry
));
372 if (nh
->nlmsg_type
== RTM_DELNEIGH
) {
373 assert(bpf_map_delete_elem(arp_table_map_fd
,
374 &arp_entry
.dst
) == 0);
375 } else if (nh
->nlmsg_type
== RTM_NEWNEIGH
) {
376 assert(bpf_map_update_elem(arp_table_map_fd
,
382 memset(&arp_entry
, 0, sizeof(arp_entry
));
383 memset(dsts
, 0, sizeof(dsts
));
387 /* Function to read the existing arp table when the process is launched*/
388 static int get_arp_table(int rtm_family
)
390 struct sockaddr_nl sa
;
403 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
405 fprintf(stderr
, "open netlink socket: %s\n", strerror(errno
));
408 memset(&sa
, 0, sizeof(sa
));
409 sa
.nl_family
= AF_NETLINK
;
410 if (bind(sock
, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0) {
411 fprintf(stderr
, "bind netlink socket: %s\n", strerror(errno
));
415 memset(&req
, 0, sizeof(req
));
416 req
.nl
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
417 req
.nl
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
418 req
.nl
.nlmsg_type
= RTM_GETNEIGH
;
419 req
.rt
.ndm_state
= NUD_REACHABLE
;
420 req
.rt
.ndm_family
= rtm_family
;
421 req
.nl
.nlmsg_pid
= 0;
422 req
.nl
.nlmsg_seq
= ++seq
;
423 memset(&msg
, 0, sizeof(msg
));
424 iov
.iov_base
= (void *)&req
.nl
;
425 iov
.iov_len
= req
.nl
.nlmsg_len
;
428 ret
= sendmsg(sock
, &msg
, 0);
430 fprintf(stderr
, "send to netlink: %s\n", strerror(errno
));
434 memset(buf
, 0, sizeof(buf
));
435 nll
= recv_msg(sa
, sock
);
437 fprintf(stderr
, "recv from netlink: %s\n", strerror(nll
));
441 nh
= (struct nlmsghdr
*)buf
;
448 /* Function to keep track and update changes in route and arp table
449 * Give regular statistics of packets forwarded
451 static void *monitor_routes_thread(void *arg
)
453 struct pollfd fds_route
, fds_arp
;
454 struct sockaddr_nl la
, lr
;
455 int sock
, sock_arp
, nll
;
458 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
460 fprintf(stderr
, "open netlink socket: %s\n", strerror(errno
));
464 fcntl(sock
, F_SETFL
, O_NONBLOCK
);
465 memset(&lr
, 0, sizeof(lr
));
466 lr
.nl_family
= AF_NETLINK
;
467 lr
.nl_groups
= RTMGRP_IPV6_ROUTE
| RTMGRP_IPV4_ROUTE
| RTMGRP_NOTIFY
;
468 if (bind(sock
, (struct sockaddr
*)&lr
, sizeof(lr
)) < 0) {
469 fprintf(stderr
, "bind netlink socket: %s\n", strerror(errno
));
475 fds_route
.events
= POLL_IN
;
477 sock_arp
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
479 fprintf(stderr
, "open netlink socket: %s\n", strerror(errno
));
484 fcntl(sock_arp
, F_SETFL
, O_NONBLOCK
);
485 memset(&la
, 0, sizeof(la
));
486 la
.nl_family
= AF_NETLINK
;
487 la
.nl_groups
= RTMGRP_NEIGH
| RTMGRP_NOTIFY
;
488 if (bind(sock_arp
, (struct sockaddr
*)&la
, sizeof(la
)) < 0) {
489 fprintf(stderr
, "bind netlink socket: %s\n", strerror(errno
));
493 fds_arp
.fd
= sock_arp
;
494 fds_arp
.events
= POLL_IN
;
496 /* dump route and arp tables */
497 if (get_arp_table(AF_INET
) < 0) {
498 fprintf(stderr
, "Failed reading arp table\n");
502 if (get_route_table(AF_INET
) < 0) {
503 fprintf(stderr
, "Failed reading route table\n");
507 while (!routes_thread_exit
) {
508 memset(buf
, 0, sizeof(buf
));
509 if (poll(&fds_route
, 1, 3) == POLL_IN
) {
510 nll
= recv_msg(lr
, sock
);
512 fprintf(stderr
, "recv from netlink: %s\n",
517 nh
= (struct nlmsghdr
*)buf
;
521 memset(buf
, 0, sizeof(buf
));
522 if (poll(&fds_arp
, 1, 3) == POLL_IN
) {
523 nll
= recv_msg(la
, sock_arp
);
525 fprintf(stderr
, "recv from netlink: %s\n",
530 nh
= (struct nlmsghdr
*)buf
;
543 static void usage(char *argv
[], const struct option
*long_options
,
544 const char *doc
, int mask
, bool error
,
545 struct bpf_object
*obj
)
547 sample_usage(argv
, long_options
, doc
, mask
, error
);
550 int main(int argc
, char **argv
)
552 bool error
= true, generic
= false, force
= false;
553 int opt
, ret
= EXIT_FAIL_BPF
;
554 struct xdp_router_ipv4
*skel
;
555 int i
, total_ifindex
= argc
- 1;
556 char **ifname_list
= argv
+ 1;
557 pthread_t routes_thread
;
560 if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL
) < 0) {
561 fprintf(stderr
, "Failed to set libbpf strict mode: %s\n",
566 skel
= xdp_router_ipv4__open();
568 fprintf(stderr
, "Failed to xdp_router_ipv4__open: %s\n",
573 ret
= sample_init_pre_load(skel
);
575 fprintf(stderr
, "Failed to sample_init_pre_load: %s\n",
581 ret
= xdp_router_ipv4__load(skel
);
583 fprintf(stderr
, "Failed to xdp_router_ipv4__load: %s\n",
588 ret
= sample_init(skel
, mask
);
590 fprintf(stderr
, "Failed to initialize sample: %s\n", strerror(-ret
));
595 while ((opt
= getopt_long(argc
, argv
, "si:SFvh",
596 long_options
, &longindex
)) != -1) {
599 mask
|= SAMPLE_REDIRECT_MAP_CNT
;
604 interval
= strtoul(optarg
, NULL
, 0);
619 sample_switch_mode();
626 usage(argv
, long_options
, __doc__
, mask
, error
, skel
->obj
);
631 ret
= EXIT_FAIL_OPTION
;
632 if (optind
== argc
) {
633 usage(argv
, long_options
, __doc__
, mask
, true, skel
->obj
);
637 lpm_map_fd
= bpf_map__fd(skel
->maps
.lpm_map
);
638 if (lpm_map_fd
< 0) {
639 fprintf(stderr
, "Failed loading lpm_map %s\n",
640 strerror(-lpm_map_fd
));
643 arp_table_map_fd
= bpf_map__fd(skel
->maps
.arp_table
);
644 if (arp_table_map_fd
< 0) {
645 fprintf(stderr
, "Failed loading arp_table_map_fd %s\n",
646 strerror(-arp_table_map_fd
));
649 exact_match_map_fd
= bpf_map__fd(skel
->maps
.exact_match
);
650 if (exact_match_map_fd
< 0) {
651 fprintf(stderr
, "Failed loading exact_match_map_fd %s\n",
652 strerror(-exact_match_map_fd
));
655 tx_port_map_fd
= bpf_map__fd(skel
->maps
.tx_port
);
656 if (tx_port_map_fd
< 0) {
657 fprintf(stderr
, "Failed loading tx_port_map_fd %s\n",
658 strerror(-tx_port_map_fd
));
663 for (i
= 0; i
< total_ifindex
; i
++) {
664 int index
= if_nametoindex(ifname_list
[i
]);
667 fprintf(stderr
, "Interface %s not found %s\n",
668 ifname_list
[i
], strerror(-tx_port_map_fd
));
671 if (sample_install_xdp(skel
->progs
.xdp_router_ipv4_prog
,
672 index
, generic
, force
) < 0)
676 ret
= pthread_create(&routes_thread
, NULL
, monitor_routes_thread
, NULL
);
678 fprintf(stderr
, "Failed creating routes_thread: %s\n", strerror(-ret
));
683 ret
= sample_run(interval
, NULL
, NULL
);
684 routes_thread_exit
= true;
687 fprintf(stderr
, "Failed during sample run: %s\n", strerror(-ret
));
689 goto end_thread_wait
;
694 pthread_join(routes_thread
, NULL
);
696 xdp_router_ipv4__destroy(skel
);