1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2017 Cavium, Inc.
5 #include <linux/netlink.h>
6 #include <linux/rtnetlink.h>
13 #include <sys/socket.h>
16 #include <arpa/inet.h>
21 #include <sys/ioctl.h>
22 #include <sys/syscall.h>
24 #include "bpf/libbpf.h"
25 #include <sys/resource.h>
28 int sock
, sock_arp
, flags
= XDP_FLAGS_UPDATE_IF_NOEXIST
;
29 static int total_ifindex
;
30 static int *ifindex_list
;
31 static __u32
*prog_id_list
;
33 static int lpm_map_fd
;
34 static int rxcnt_map_fd
;
35 static int arp_table_map_fd
;
36 static int exact_match_map_fd
;
37 static int tx_port_map_fd
;
39 static int get_route_table(int rtm_family
);
40 static void int_exit(int sig
)
45 for (i
= 0; i
< total_ifindex
; i
++) {
46 if (bpf_get_link_xdp_id(ifindex_list
[i
], &prog_id
, flags
)) {
47 printf("bpf_get_link_xdp_id on iface %d failed\n",
51 if (prog_id_list
[i
] == prog_id
)
52 bpf_set_link_xdp_fd(ifindex_list
[i
], -1, flags
);
54 printf("couldn't find a prog id on iface %d\n",
57 printf("program on iface %d changed, not removing\n",
64 static void close_and_exit(int sig
)
72 /* Get the mac address of the interface given interface name */
73 static __be64
getmac(char *iface
)
79 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
80 ifr
.ifr_addr
.sa_family
= AF_INET
;
81 strncpy(ifr
.ifr_name
, iface
, IFNAMSIZ
- 1);
82 if (ioctl(fd
, SIOCGIFHWADDR
, &ifr
) < 0) {
83 printf("ioctl failed leaving....\n");
86 for (i
= 0; i
< 6 ; i
++)
87 *((__u8
*)&mac
+ i
) = (__u8
)ifr
.ifr_hwaddr
.sa_data
[i
];
92 static int recv_msg(struct sockaddr_nl sock_addr
, int sock
)
100 len
= recv(sock
, buf_ptr
, sizeof(buf
) - nll
, 0);
104 nh
= (struct nlmsghdr
*)buf_ptr
;
106 if (nh
->nlmsg_type
== NLMSG_DONE
)
110 if ((sock_addr
.nl_groups
& RTMGRP_NEIGH
) == RTMGRP_NEIGH
)
113 if ((sock_addr
.nl_groups
& RTMGRP_IPV4_ROUTE
) == RTMGRP_IPV4_ROUTE
)
119 /* Function to parse the route entry returned by netlink
120 * Updates the route entry related map entries
122 static void read_route(struct nlmsghdr
*nh
, int nll
)
124 char dsts
[24], gws
[24], ifs
[16], dsts_len
[24], metrics
[24];
125 struct bpf_lpm_trie_key
*prefix_key
;
126 struct rtattr
*rt_attr
;
127 struct rtmsg
*rt_msg
;
132 int dst_len
, iface
, metric
;
143 struct arp_table arp
;
148 if (nh
->nlmsg_type
== RTM_DELROUTE
)
149 printf("DELETING Route entry\n");
150 else if (nh
->nlmsg_type
== RTM_GETROUTE
)
151 printf("READING Route entry\n");
152 else if (nh
->nlmsg_type
== RTM_NEWROUTE
)
153 printf("NEW Route entry\n");
155 printf("%d\n", nh
->nlmsg_type
);
157 memset(&route
, 0, sizeof(route
));
158 printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n");
159 for (; NLMSG_OK(nh
, nll
); nh
= NLMSG_NEXT(nh
, nll
)) {
160 rt_msg
= (struct rtmsg
*)NLMSG_DATA(nh
);
161 rtm_family
= rt_msg
->rtm_family
;
162 if (rtm_family
== AF_INET
)
163 if (rt_msg
->rtm_table
!= RT_TABLE_MAIN
)
165 rt_attr
= (struct rtattr
*)RTM_RTA(rt_msg
);
166 rtl
= RTM_PAYLOAD(nh
);
168 for (; RTA_OK(rt_attr
, rtl
); rt_attr
= RTA_NEXT(rt_attr
, rtl
)) {
169 switch (rt_attr
->rta_type
) {
172 (*((__be32
*)RTA_DATA(rt_attr
))));
176 *((__be32
*)RTA_DATA(rt_attr
)));
180 *((int *)RTA_DATA(rt_attr
)));
183 sprintf(metrics
, "%u",
184 *((int *)RTA_DATA(rt_attr
)));
189 sprintf(dsts_len
, "%d", rt_msg
->rtm_dst_len
);
190 route
.dst
= atoi(dsts
);
191 route
.dst_len
= atoi(dsts_len
);
192 route
.gw
= atoi(gws
);
193 route
.iface
= atoi(ifs
);
194 route
.metric
= atoi(metrics
);
195 route
.iface_name
= alloca(sizeof(char *) * IFNAMSIZ
);
196 route
.iface_name
= if_indextoname(route
.iface
, route
.iface_name
);
197 route
.mac
= getmac(route
.iface_name
);
200 assert(bpf_map_update_elem(tx_port_map_fd
,
201 &route
.iface
, &route
.iface
, 0) == 0);
202 if (rtm_family
== AF_INET
) {
211 prefix_key
= alloca(sizeof(*prefix_key
) + 3);
212 prefix_value
= alloca(sizeof(*prefix_value
));
214 prefix_key
->prefixlen
= 32;
215 prefix_key
->prefixlen
= route
.dst_len
;
216 direct_entry
.mac
= route
.mac
& 0xffffffffffff;
217 direct_entry
.ifindex
= route
.iface
;
218 direct_entry
.arp
.mac
= 0;
219 direct_entry
.arp
.dst
= 0;
220 if (route
.dst_len
== 32) {
221 if (nh
->nlmsg_type
== RTM_DELROUTE
) {
222 assert(bpf_map_delete_elem(exact_match_map_fd
,
225 if (bpf_map_lookup_elem(arp_table_map_fd
,
227 &direct_entry
.arp
.mac
) == 0)
228 direct_entry
.arp
.dst
= route
.dst
;
229 assert(bpf_map_update_elem(exact_match_map_fd
,
231 &direct_entry
, 0) == 0);
234 for (i
= 0; i
< 4; i
++)
235 prefix_key
->data
[i
] = (route
.dst
>> i
* 8) & 0xff;
237 printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n",
238 (int)prefix_key
->data
[0],
239 (int)prefix_key
->data
[1],
240 (int)prefix_key
->data
[2],
241 (int)prefix_key
->data
[3],
242 route
.gw
, route
.dst_len
,
245 if (bpf_map_lookup_elem(lpm_map_fd
, prefix_key
,
247 for (i
= 0; i
< 4; i
++)
248 prefix_value
->prefix
[i
] = prefix_key
->data
[i
];
249 prefix_value
->value
= route
.mac
& 0xffffffffffff;
250 prefix_value
->ifindex
= route
.iface
;
251 prefix_value
->gw
= route
.gw
;
252 prefix_value
->metric
= route
.metric
;
254 assert(bpf_map_update_elem(lpm_map_fd
,
259 if (nh
->nlmsg_type
== RTM_DELROUTE
) {
260 printf("deleting entry\n");
261 printf("prefix key=%d.%d.%d.%d/%d",
266 prefix_key
->prefixlen
);
267 assert(bpf_map_delete_elem(lpm_map_fd
,
270 /* Rereading the route table to check if
271 * there is an entry with the same
272 * prefix but a different metric as the
275 get_route_table(AF_INET
);
276 } else if (prefix_key
->data
[0] ==
277 prefix_value
->prefix
[0] &&
278 prefix_key
->data
[1] ==
279 prefix_value
->prefix
[1] &&
280 prefix_key
->data
[2] ==
281 prefix_value
->prefix
[2] &&
282 prefix_key
->data
[3] ==
283 prefix_value
->prefix
[3] &&
284 route
.metric
>= prefix_value
->metric
) {
287 for (i
= 0; i
< 4; i
++)
288 prefix_value
->prefix
[i
] =
290 prefix_value
->value
=
291 route
.mac
& 0xffffffffffff;
292 prefix_value
->ifindex
= route
.iface
;
293 prefix_value
->gw
= route
.gw
;
294 prefix_value
->metric
= route
.metric
;
295 assert(bpf_map_update_elem(lpm_map_fd
,
302 memset(&route
, 0, sizeof(route
));
303 memset(dsts
, 0, sizeof(dsts
));
304 memset(dsts_len
, 0, sizeof(dsts_len
));
305 memset(gws
, 0, sizeof(gws
));
306 memset(ifs
, 0, sizeof(ifs
));
307 memset(&route
, 0, sizeof(route
));
311 /* Function to read the existing route table when the process is launched*/
312 static int get_route_table(int rtm_family
)
314 struct sockaddr_nl sa
;
328 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
330 printf("open netlink socket: %s\n", strerror(errno
));
333 memset(&sa
, 0, sizeof(sa
));
334 sa
.nl_family
= AF_NETLINK
;
335 if (bind(sock
, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0) {
336 printf("bind to netlink: %s\n", strerror(errno
));
340 memset(&req
, 0, sizeof(req
));
341 req
.nl
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
342 req
.nl
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
343 req
.nl
.nlmsg_type
= RTM_GETROUTE
;
345 req
.rt
.rtm_family
= rtm_family
;
346 req
.rt
.rtm_table
= RT_TABLE_MAIN
;
347 req
.nl
.nlmsg_pid
= 0;
348 req
.nl
.nlmsg_seq
= ++seq
;
349 memset(&msg
, 0, sizeof(msg
));
350 iov
.iov_base
= (void *)&req
.nl
;
351 iov
.iov_len
= req
.nl
.nlmsg_len
;
354 ret
= sendmsg(sock
, &msg
, 0);
356 printf("send to netlink: %s\n", strerror(errno
));
360 memset(buf
, 0, sizeof(buf
));
361 nll
= recv_msg(sa
, sock
);
363 printf("recv from netlink: %s\n", strerror(nll
));
367 nh
= (struct nlmsghdr
*)buf
;
374 /* Function to parse the arp entry returned by netlink
375 * Updates the arp entry related map entries
377 static void read_arp(struct nlmsghdr
*nh
, int nll
)
379 struct rtattr
*rt_attr
;
380 char dsts
[24], mac
[24];
381 struct ndmsg
*rt_msg
;
389 struct arp_table arp
;
394 if (nh
->nlmsg_type
== RTM_GETNEIGH
)
395 printf("READING arp entry\n");
396 printf("Address\tHwAddress\n");
397 for (; NLMSG_OK(nh
, nll
); nh
= NLMSG_NEXT(nh
, nll
)) {
398 rt_msg
= (struct ndmsg
*)NLMSG_DATA(nh
);
399 rt_attr
= (struct rtattr
*)RTM_RTA(rt_msg
);
400 ndm_family
= rt_msg
->ndm_family
;
401 rtl
= RTM_PAYLOAD(nh
);
402 for (; RTA_OK(rt_attr
, rtl
); rt_attr
= RTA_NEXT(rt_attr
, rtl
)) {
403 switch (rt_attr
->rta_type
) {
406 *((__be32
*)RTA_DATA(rt_attr
)));
410 *((__be64
*)RTA_DATA(rt_attr
)));
416 arp_entry
.dst
= atoi(dsts
);
417 arp_entry
.mac
= atol(mac
);
418 printf("%x\t\t%llx\n", arp_entry
.dst
, arp_entry
.mac
);
419 if (ndm_family
== AF_INET
) {
420 if (bpf_map_lookup_elem(exact_match_map_fd
,
422 &direct_entry
) == 0) {
423 if (nh
->nlmsg_type
== RTM_DELNEIGH
) {
424 direct_entry
.arp
.dst
= 0;
425 direct_entry
.arp
.mac
= 0;
426 } else if (nh
->nlmsg_type
== RTM_NEWNEIGH
) {
427 direct_entry
.arp
.dst
= arp_entry
.dst
;
428 direct_entry
.arp
.mac
= arp_entry
.mac
;
430 assert(bpf_map_update_elem(exact_match_map_fd
,
434 memset(&direct_entry
, 0, sizeof(direct_entry
));
436 if (nh
->nlmsg_type
== RTM_DELNEIGH
) {
437 assert(bpf_map_delete_elem(arp_table_map_fd
,
438 &arp_entry
.dst
) == 0);
439 } else if (nh
->nlmsg_type
== RTM_NEWNEIGH
) {
440 assert(bpf_map_update_elem(arp_table_map_fd
,
446 memset(&arp_entry
, 0, sizeof(arp_entry
));
447 memset(dsts
, 0, sizeof(dsts
));
451 /* Function to read the existing arp table when the process is launched*/
452 static int get_arp_table(int rtm_family
)
454 struct sockaddr_nl sa
;
467 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
469 printf("open netlink socket: %s\n", strerror(errno
));
472 memset(&sa
, 0, sizeof(sa
));
473 sa
.nl_family
= AF_NETLINK
;
474 if (bind(sock
, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0) {
475 printf("bind to netlink: %s\n", strerror(errno
));
479 memset(&req
, 0, sizeof(req
));
480 req
.nl
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
481 req
.nl
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
482 req
.nl
.nlmsg_type
= RTM_GETNEIGH
;
483 req
.rt
.ndm_state
= NUD_REACHABLE
;
484 req
.rt
.ndm_family
= rtm_family
;
485 req
.nl
.nlmsg_pid
= 0;
486 req
.nl
.nlmsg_seq
= ++seq
;
487 memset(&msg
, 0, sizeof(msg
));
488 iov
.iov_base
= (void *)&req
.nl
;
489 iov
.iov_len
= req
.nl
.nlmsg_len
;
492 ret
= sendmsg(sock
, &msg
, 0);
494 printf("send to netlink: %s\n", strerror(errno
));
498 memset(buf
, 0, sizeof(buf
));
499 nll
= recv_msg(sa
, sock
);
501 printf("recv from netlink: %s\n", strerror(nll
));
505 nh
= (struct nlmsghdr
*)buf
;
512 /* Function to keep track and update changes in route and arp table
513 * Give regular statistics of packets forwarded
515 static int monitor_route(void)
517 unsigned int nr_cpus
= bpf_num_possible_cpus();
518 const unsigned int nr_keys
= 256;
519 struct pollfd fds_route
, fds_arp
;
520 __u64 prev
[nr_keys
][nr_cpus
];
521 struct sockaddr_nl la
, lr
;
522 __u64 values
[nr_cpus
];
529 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
531 printf("open netlink socket: %s\n", strerror(errno
));
535 fcntl(sock
, F_SETFL
, O_NONBLOCK
);
536 memset(&lr
, 0, sizeof(lr
));
537 lr
.nl_family
= AF_NETLINK
;
538 lr
.nl_groups
= RTMGRP_IPV6_ROUTE
| RTMGRP_IPV4_ROUTE
| RTMGRP_NOTIFY
;
539 if (bind(sock
, (struct sockaddr
*)&lr
, sizeof(lr
)) < 0) {
540 printf("bind to netlink: %s\n", strerror(errno
));
545 fds_route
.events
= POLL_IN
;
547 sock_arp
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
549 printf("open netlink socket: %s\n", strerror(errno
));
553 fcntl(sock_arp
, F_SETFL
, O_NONBLOCK
);
554 memset(&la
, 0, sizeof(la
));
555 la
.nl_family
= AF_NETLINK
;
556 la
.nl_groups
= RTMGRP_NEIGH
| RTMGRP_NOTIFY
;
557 if (bind(sock_arp
, (struct sockaddr
*)&la
, sizeof(la
)) < 0) {
558 printf("bind to netlink: %s\n", strerror(errno
));
562 fds_arp
.fd
= sock_arp
;
563 fds_arp
.events
= POLL_IN
;
565 memset(prev
, 0, sizeof(prev
));
567 signal(SIGINT
, close_and_exit
);
568 signal(SIGTERM
, close_and_exit
);
571 for (key
= 0; key
< nr_keys
; key
++) {
574 assert(bpf_map_lookup_elem(rxcnt_map_fd
,
576 for (i
= 0; i
< nr_cpus
; i
++)
577 sum
+= (values
[i
] - prev
[key
][i
]);
579 printf("proto %u: %10llu pkt/s\n",
580 key
, sum
/ interval
);
581 memcpy(prev
[key
], values
, sizeof(values
));
584 memset(buf
, 0, sizeof(buf
));
585 if (poll(&fds_route
, 1, 3) == POLL_IN
) {
586 nll
= recv_msg(lr
, sock
);
588 printf("recv from netlink: %s\n", strerror(nll
));
593 nh
= (struct nlmsghdr
*)buf
;
594 printf("Routing table updated.\n");
597 memset(buf
, 0, sizeof(buf
));
598 if (poll(&fds_arp
, 1, 3) == POLL_IN
) {
599 nll
= recv_msg(la
, sock_arp
);
601 printf("recv from netlink: %s\n", strerror(nll
));
606 nh
= (struct nlmsghdr
*)buf
;
616 static void usage(const char *prog
)
619 "%s: %s [OPTS] interface name list\n\n"
622 " -F force loading prog\n",
626 int main(int ac
, char **argv
)
628 struct rlimit r
= {RLIM_INFINITY
, RLIM_INFINITY
};
629 struct bpf_prog_load_attr prog_load_attr
= {
630 .prog_type
= BPF_PROG_TYPE_XDP
,
632 struct bpf_prog_info info
= {};
633 __u32 info_len
= sizeof(info
);
634 const char *optstr
= "SF";
635 struct bpf_object
*obj
;
641 snprintf(filename
, sizeof(filename
), "%s_kern.o", argv
[0]);
642 prog_load_attr
.file
= filename
;
644 total_ifindex
= ac
- 1;
645 ifname_list
= (argv
+ 1);
647 while ((opt
= getopt(ac
, argv
, optstr
)) != -1) {
650 flags
|= XDP_FLAGS_SKB_MODE
;
655 flags
&= ~XDP_FLAGS_UPDATE_IF_NOEXIST
;
660 usage(basename(argv
[0]));
666 usage(basename(argv
[0]));
670 if (setrlimit(RLIMIT_MEMLOCK
, &r
)) {
671 perror("setrlimit(RLIMIT_MEMLOCK)");
675 if (bpf_prog_load_xattr(&prog_load_attr
, &obj
, &prog_fd
))
678 printf("\n**************loading bpf file*********************\n\n\n");
680 printf("bpf_prog_load_xattr: %s\n", strerror(errno
));
684 lpm_map_fd
= bpf_object__find_map_fd_by_name(obj
, "lpm_map");
685 rxcnt_map_fd
= bpf_object__find_map_fd_by_name(obj
, "rxcnt");
686 arp_table_map_fd
= bpf_object__find_map_fd_by_name(obj
, "arp_table");
687 exact_match_map_fd
= bpf_object__find_map_fd_by_name(obj
,
689 tx_port_map_fd
= bpf_object__find_map_fd_by_name(obj
, "tx_port");
690 if (lpm_map_fd
< 0 || rxcnt_map_fd
< 0 || arp_table_map_fd
< 0 ||
691 exact_match_map_fd
< 0 || tx_port_map_fd
< 0) {
692 printf("bpf_object__find_map_fd_by_name failed\n");
696 ifindex_list
= (int *)calloc(total_ifindex
, sizeof(int *));
697 for (i
= 0; i
< total_ifindex
; i
++) {
698 ifindex_list
[i
] = if_nametoindex(ifname_list
[i
]);
699 if (!ifindex_list
[i
]) {
700 printf("Couldn't translate interface name: %s",
705 prog_id_list
= (__u32
*)calloc(total_ifindex
, sizeof(__u32
*));
706 for (i
= 0; i
< total_ifindex
; i
++) {
707 if (bpf_set_link_xdp_fd(ifindex_list
[i
], prog_fd
, flags
) < 0) {
708 printf("link set xdp fd failed\n");
709 int recovery_index
= i
;
711 for (i
= 0; i
< recovery_index
; i
++)
712 bpf_set_link_xdp_fd(ifindex_list
[i
], -1, flags
);
716 err
= bpf_obj_get_info_by_fd(prog_fd
, &info
, &info_len
);
718 printf("can't get prog info - %s\n", strerror(errno
));
721 prog_id_list
[i
] = info
.id
;
722 memset(&info
, 0, sizeof(info
));
723 printf("Attached to %d\n", ifindex_list
[i
]);
725 signal(SIGINT
, int_exit
);
726 signal(SIGTERM
, int_exit
);
728 printf("*******************ROUTE TABLE*************************\n\n\n");
729 get_route_table(AF_INET
);
730 printf("*******************ARP TABLE***************************\n\n\n");
731 get_arp_table(AF_INET
);
732 if (monitor_route() < 0) {
733 printf("Error in receiving route update");