1 /* Copyright (C) 2017 Cavium, Inc.
3 * This program is free software; you can redistribute it and/or modify it
4 * under the terms of version 2 of the GNU General Public License
5 * as published by the Free Software Foundation.
8 #include <linux/netlink.h>
9 #include <linux/rtnetlink.h>
16 #include <sys/socket.h>
20 #include <arpa/inet.h>
25 #include <sys/ioctl.h>
26 #include <sys/syscall.h>
29 int sock
, sock_arp
, flags
= 0;
30 static int total_ifindex
;
34 static int get_route_table(int rtm_family
);
35 static void int_exit(int sig
)
39 for (i
= 0; i
< total_ifindex
; i
++)
40 set_link_xdp_fd(ifindex_list
[i
], -1, flags
);
44 static void close_and_exit(int sig
)
51 for (i
= 0; i
< total_ifindex
; i
++)
52 set_link_xdp_fd(ifindex_list
[i
], -1, flags
);
56 /* Get the mac address of the interface given interface name */
57 static __be64
getmac(char *iface
)
63 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
64 ifr
.ifr_addr
.sa_family
= AF_INET
;
65 strncpy(ifr
.ifr_name
, iface
, IFNAMSIZ
- 1);
66 if (ioctl(fd
, SIOCGIFHWADDR
, &ifr
) < 0) {
67 printf("ioctl failed leaving....\n");
70 for (i
= 0; i
< 6 ; i
++)
71 *((__u8
*)&mac
+ i
) = (__u8
)ifr
.ifr_hwaddr
.sa_data
[i
];
76 static int recv_msg(struct sockaddr_nl sock_addr
, int sock
)
84 len
= recv(sock
, buf_ptr
, sizeof(buf
) - nll
, 0);
88 nh
= (struct nlmsghdr
*)buf_ptr
;
90 if (nh
->nlmsg_type
== NLMSG_DONE
)
94 if ((sock_addr
.nl_groups
& RTMGRP_NEIGH
) == RTMGRP_NEIGH
)
97 if ((sock_addr
.nl_groups
& RTMGRP_IPV4_ROUTE
) == RTMGRP_IPV4_ROUTE
)
103 /* Function to parse the route entry returned by netlink
104 * Updates the route entry related map entries
106 static void read_route(struct nlmsghdr
*nh
, int nll
)
108 char dsts
[24], gws
[24], ifs
[16], dsts_len
[24], metrics
[24];
109 struct bpf_lpm_trie_key
*prefix_key
;
110 struct rtattr
*rt_attr
;
111 struct rtmsg
*rt_msg
;
116 int dst_len
, iface
, metric
;
127 struct arp_table arp
;
132 if (nh
->nlmsg_type
== RTM_DELROUTE
)
133 printf("DELETING Route entry\n");
134 else if (nh
->nlmsg_type
== RTM_GETROUTE
)
135 printf("READING Route entry\n");
136 else if (nh
->nlmsg_type
== RTM_NEWROUTE
)
137 printf("NEW Route entry\n");
139 printf("%d\n", nh
->nlmsg_type
);
141 memset(&route
, 0, sizeof(route
));
142 printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n");
143 for (; NLMSG_OK(nh
, nll
); nh
= NLMSG_NEXT(nh
, nll
)) {
144 rt_msg
= (struct rtmsg
*)NLMSG_DATA(nh
);
145 rtm_family
= rt_msg
->rtm_family
;
146 if (rtm_family
== AF_INET
)
147 if (rt_msg
->rtm_table
!= RT_TABLE_MAIN
)
149 rt_attr
= (struct rtattr
*)RTM_RTA(rt_msg
);
150 rtl
= RTM_PAYLOAD(nh
);
152 for (; RTA_OK(rt_attr
, rtl
); rt_attr
= RTA_NEXT(rt_attr
, rtl
)) {
153 switch (rt_attr
->rta_type
) {
156 (*((__be32
*)RTA_DATA(rt_attr
))));
160 *((__be32
*)RTA_DATA(rt_attr
)));
164 *((int *)RTA_DATA(rt_attr
)));
167 sprintf(metrics
, "%u",
168 *((int *)RTA_DATA(rt_attr
)));
173 sprintf(dsts_len
, "%d", rt_msg
->rtm_dst_len
);
174 route
.dst
= atoi(dsts
);
175 route
.dst_len
= atoi(dsts_len
);
176 route
.gw
= atoi(gws
);
177 route
.iface
= atoi(ifs
);
178 route
.metric
= atoi(metrics
);
179 route
.iface_name
= alloca(sizeof(char *) * IFNAMSIZ
);
180 route
.iface_name
= if_indextoname(route
.iface
, route
.iface_name
);
181 route
.mac
= getmac(route
.iface_name
);
182 if (route
.mac
== -1) {
185 for (i
= 0; i
< total_ifindex
; i
++)
186 set_link_xdp_fd(ifindex_list
[i
], -1, flags
);
189 assert(bpf_map_update_elem(map_fd
[4], &route
.iface
, &route
.iface
, 0) == 0);
190 if (rtm_family
== AF_INET
) {
199 prefix_key
= alloca(sizeof(*prefix_key
) + 3);
200 prefix_value
= alloca(sizeof(*prefix_value
));
202 prefix_key
->prefixlen
= 32;
203 prefix_key
->prefixlen
= route
.dst_len
;
204 direct_entry
.mac
= route
.mac
& 0xffffffffffff;
205 direct_entry
.ifindex
= route
.iface
;
206 direct_entry
.arp
.mac
= 0;
207 direct_entry
.arp
.dst
= 0;
208 if (route
.dst_len
== 32) {
209 if (nh
->nlmsg_type
== RTM_DELROUTE
) {
210 assert(bpf_map_delete_elem(map_fd
[3], &route
.dst
) == 0);
212 if (bpf_map_lookup_elem(map_fd
[2], &route
.dst
, &direct_entry
.arp
.mac
) == 0)
213 direct_entry
.arp
.dst
= route
.dst
;
214 assert(bpf_map_update_elem(map_fd
[3], &route
.dst
, &direct_entry
, 0) == 0);
217 for (i
= 0; i
< 4; i
++)
218 prefix_key
->data
[i
] = (route
.dst
>> i
* 8) & 0xff;
220 printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n",
221 (int)prefix_key
->data
[0],
222 (int)prefix_key
->data
[1],
223 (int)prefix_key
->data
[2],
224 (int)prefix_key
->data
[3],
225 route
.gw
, route
.dst_len
,
228 if (bpf_map_lookup_elem(map_fd
[0], prefix_key
,
230 for (i
= 0; i
< 4; i
++)
231 prefix_value
->prefix
[i
] = prefix_key
->data
[i
];
232 prefix_value
->value
= route
.mac
& 0xffffffffffff;
233 prefix_value
->ifindex
= route
.iface
;
234 prefix_value
->gw
= route
.gw
;
235 prefix_value
->metric
= route
.metric
;
237 assert(bpf_map_update_elem(map_fd
[0],
242 if (nh
->nlmsg_type
== RTM_DELROUTE
) {
243 printf("deleting entry\n");
244 printf("prefix key=%d.%d.%d.%d/%d",
249 prefix_key
->prefixlen
);
250 assert(bpf_map_delete_elem(map_fd
[0],
253 /* Rereading the route table to check if
254 * there is an entry with the same
255 * prefix but a different metric as the
258 get_route_table(AF_INET
);
259 } else if (prefix_key
->data
[0] ==
260 prefix_value
->prefix
[0] &&
261 prefix_key
->data
[1] ==
262 prefix_value
->prefix
[1] &&
263 prefix_key
->data
[2] ==
264 prefix_value
->prefix
[2] &&
265 prefix_key
->data
[3] ==
266 prefix_value
->prefix
[3] &&
267 route
.metric
>= prefix_value
->metric
) {
270 for (i
= 0; i
< 4; i
++)
271 prefix_value
->prefix
[i
] =
273 prefix_value
->value
=
274 route
.mac
& 0xffffffffffff;
275 prefix_value
->ifindex
= route
.iface
;
276 prefix_value
->gw
= route
.gw
;
277 prefix_value
->metric
= route
.metric
;
278 assert(bpf_map_update_elem(
286 memset(&route
, 0, sizeof(route
));
287 memset(dsts
, 0, sizeof(dsts
));
288 memset(dsts_len
, 0, sizeof(dsts_len
));
289 memset(gws
, 0, sizeof(gws
));
290 memset(ifs
, 0, sizeof(ifs
));
291 memset(&route
, 0, sizeof(route
));
295 /* Function to read the existing route table when the process is launched*/
296 static int get_route_table(int rtm_family
)
298 struct sockaddr_nl sa
;
312 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
314 printf("open netlink socket: %s\n", strerror(errno
));
317 memset(&sa
, 0, sizeof(sa
));
318 sa
.nl_family
= AF_NETLINK
;
319 if (bind(sock
, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0) {
320 printf("bind to netlink: %s\n", strerror(errno
));
324 memset(&req
, 0, sizeof(req
));
325 req
.nl
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
326 req
.nl
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
327 req
.nl
.nlmsg_type
= RTM_GETROUTE
;
329 req
.rt
.rtm_family
= rtm_family
;
330 req
.rt
.rtm_table
= RT_TABLE_MAIN
;
331 req
.nl
.nlmsg_pid
= 0;
332 req
.nl
.nlmsg_seq
= ++seq
;
333 memset(&msg
, 0, sizeof(msg
));
334 iov
.iov_base
= (void *)&req
.nl
;
335 iov
.iov_len
= req
.nl
.nlmsg_len
;
338 ret
= sendmsg(sock
, &msg
, 0);
340 printf("send to netlink: %s\n", strerror(errno
));
344 memset(buf
, 0, sizeof(buf
));
345 nll
= recv_msg(sa
, sock
);
347 printf("recv from netlink: %s\n", strerror(nll
));
351 nh
= (struct nlmsghdr
*)buf
;
358 /* Function to parse the arp entry returned by netlink
359 * Updates the arp entry related map entries
361 static void read_arp(struct nlmsghdr
*nh
, int nll
)
363 struct rtattr
*rt_attr
;
364 char dsts
[24], mac
[24];
365 struct ndmsg
*rt_msg
;
373 struct arp_table arp
;
378 if (nh
->nlmsg_type
== RTM_GETNEIGH
)
379 printf("READING arp entry\n");
380 printf("Address\tHwAddress\n");
381 for (; NLMSG_OK(nh
, nll
); nh
= NLMSG_NEXT(nh
, nll
)) {
382 rt_msg
= (struct ndmsg
*)NLMSG_DATA(nh
);
383 rt_attr
= (struct rtattr
*)RTM_RTA(rt_msg
);
384 ndm_family
= rt_msg
->ndm_family
;
385 rtl
= RTM_PAYLOAD(nh
);
386 for (; RTA_OK(rt_attr
, rtl
); rt_attr
= RTA_NEXT(rt_attr
, rtl
)) {
387 switch (rt_attr
->rta_type
) {
390 *((__be32
*)RTA_DATA(rt_attr
)));
394 *((__be64
*)RTA_DATA(rt_attr
)));
400 arp_entry
.dst
= atoi(dsts
);
401 arp_entry
.mac
= atol(mac
);
402 printf("%x\t\t%llx\n", arp_entry
.dst
, arp_entry
.mac
);
403 if (ndm_family
== AF_INET
) {
404 if (bpf_map_lookup_elem(map_fd
[3], &arp_entry
.dst
,
405 &direct_entry
) == 0) {
406 if (nh
->nlmsg_type
== RTM_DELNEIGH
) {
407 direct_entry
.arp
.dst
= 0;
408 direct_entry
.arp
.mac
= 0;
409 } else if (nh
->nlmsg_type
== RTM_NEWNEIGH
) {
410 direct_entry
.arp
.dst
= arp_entry
.dst
;
411 direct_entry
.arp
.mac
= arp_entry
.mac
;
413 assert(bpf_map_update_elem(map_fd
[3],
417 memset(&direct_entry
, 0, sizeof(direct_entry
));
419 if (nh
->nlmsg_type
== RTM_DELNEIGH
) {
420 assert(bpf_map_delete_elem(map_fd
[2], &arp_entry
.dst
) == 0);
421 } else if (nh
->nlmsg_type
== RTM_NEWNEIGH
) {
422 assert(bpf_map_update_elem(map_fd
[2],
428 memset(&arp_entry
, 0, sizeof(arp_entry
));
429 memset(dsts
, 0, sizeof(dsts
));
433 /* Function to read the existing arp table when the process is launched*/
434 static int get_arp_table(int rtm_family
)
436 struct sockaddr_nl sa
;
449 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
451 printf("open netlink socket: %s\n", strerror(errno
));
454 memset(&sa
, 0, sizeof(sa
));
455 sa
.nl_family
= AF_NETLINK
;
456 if (bind(sock
, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0) {
457 printf("bind to netlink: %s\n", strerror(errno
));
461 memset(&req
, 0, sizeof(req
));
462 req
.nl
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
463 req
.nl
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
464 req
.nl
.nlmsg_type
= RTM_GETNEIGH
;
465 req
.rt
.ndm_state
= NUD_REACHABLE
;
466 req
.rt
.ndm_family
= rtm_family
;
467 req
.nl
.nlmsg_pid
= 0;
468 req
.nl
.nlmsg_seq
= ++seq
;
469 memset(&msg
, 0, sizeof(msg
));
470 iov
.iov_base
= (void *)&req
.nl
;
471 iov
.iov_len
= req
.nl
.nlmsg_len
;
474 ret
= sendmsg(sock
, &msg
, 0);
476 printf("send to netlink: %s\n", strerror(errno
));
480 memset(buf
, 0, sizeof(buf
));
481 nll
= recv_msg(sa
, sock
);
483 printf("recv from netlink: %s\n", strerror(nll
));
487 nh
= (struct nlmsghdr
*)buf
;
494 /* Function to keep track and update changes in route and arp table
495 * Give regular statistics of packets forwarded
497 static int monitor_route(void)
499 unsigned int nr_cpus
= bpf_num_possible_cpus();
500 const unsigned int nr_keys
= 256;
501 struct pollfd fds_route
, fds_arp
;
502 __u64 prev
[nr_keys
][nr_cpus
];
503 struct sockaddr_nl la
, lr
;
504 __u64 values
[nr_cpus
];
511 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
513 printf("open netlink socket: %s\n", strerror(errno
));
517 fcntl(sock
, F_SETFL
, O_NONBLOCK
);
518 memset(&lr
, 0, sizeof(lr
));
519 lr
.nl_family
= AF_NETLINK
;
520 lr
.nl_groups
= RTMGRP_IPV6_ROUTE
| RTMGRP_IPV4_ROUTE
| RTMGRP_NOTIFY
;
521 if (bind(sock
, (struct sockaddr
*)&lr
, sizeof(lr
)) < 0) {
522 printf("bind to netlink: %s\n", strerror(errno
));
527 fds_route
.events
= POLL_IN
;
529 sock_arp
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
531 printf("open netlink socket: %s\n", strerror(errno
));
535 fcntl(sock_arp
, F_SETFL
, O_NONBLOCK
);
536 memset(&la
, 0, sizeof(la
));
537 la
.nl_family
= AF_NETLINK
;
538 la
.nl_groups
= RTMGRP_NEIGH
| RTMGRP_NOTIFY
;
539 if (bind(sock_arp
, (struct sockaddr
*)&la
, sizeof(la
)) < 0) {
540 printf("bind to netlink: %s\n", strerror(errno
));
544 fds_arp
.fd
= sock_arp
;
545 fds_arp
.events
= POLL_IN
;
547 memset(prev
, 0, sizeof(prev
));
549 signal(SIGINT
, close_and_exit
);
550 signal(SIGTERM
, close_and_exit
);
553 for (key
= 0; key
< nr_keys
; key
++) {
556 assert(bpf_map_lookup_elem(map_fd
[1], &key
, values
) == 0);
557 for (i
= 0; i
< nr_cpus
; i
++)
558 sum
+= (values
[i
] - prev
[key
][i
]);
560 printf("proto %u: %10llu pkt/s\n",
561 key
, sum
/ interval
);
562 memcpy(prev
[key
], values
, sizeof(values
));
565 memset(buf
, 0, sizeof(buf
));
566 if (poll(&fds_route
, 1, 3) == POLL_IN
) {
567 nll
= recv_msg(lr
, sock
);
569 printf("recv from netlink: %s\n", strerror(nll
));
574 nh
= (struct nlmsghdr
*)buf
;
575 printf("Routing table updated.\n");
578 memset(buf
, 0, sizeof(buf
));
579 if (poll(&fds_arp
, 1, 3) == POLL_IN
) {
580 nll
= recv_msg(la
, sock_arp
);
582 printf("recv from netlink: %s\n", strerror(nll
));
587 nh
= (struct nlmsghdr
*)buf
;
597 int main(int ac
, char **argv
)
603 snprintf(filename
, sizeof(filename
), "%s_kern.o", argv
[0]);
605 printf("usage: %s [-S] Interface name list\n", argv
[0]);
608 if (!strcmp(argv
[1], "-S")) {
609 flags
= XDP_FLAGS_SKB_MODE
;
610 total_ifindex
= ac
- 2;
611 ifname_list
= (argv
+ 2);
614 total_ifindex
= ac
- 1;
615 ifname_list
= (argv
+ 1);
617 if (load_bpf_file(filename
)) {
618 printf("%s", bpf_log_buf
);
621 printf("\n**************loading bpf file*********************\n\n\n");
623 printf("load_bpf_file: %s\n", strerror(errno
));
626 ifindex_list
= (int *)malloc(total_ifindex
* sizeof(int *));
627 for (i
= 0; i
< total_ifindex
; i
++) {
628 ifindex_list
[i
] = if_nametoindex(ifname_list
[i
]);
629 if (!ifindex_list
[i
]) {
630 printf("Couldn't translate interface name: %s",
635 for (i
= 0; i
< total_ifindex
; i
++) {
636 if (set_link_xdp_fd(ifindex_list
[i
], prog_fd
[0], flags
) < 0) {
637 printf("link set xdp fd failed\n");
638 int recovery_index
= i
;
640 for (i
= 0; i
< recovery_index
; i
++)
641 set_link_xdp_fd(ifindex_list
[i
], -1, flags
);
645 printf("Attached to %d\n", ifindex_list
[i
]);
647 signal(SIGINT
, int_exit
);
648 signal(SIGTERM
, int_exit
);
650 printf("*******************ROUTE TABLE*************************\n\n\n");
651 get_route_table(AF_INET
);
652 printf("*******************ARP TABLE***************************\n\n\n");
653 get_arp_table(AF_INET
);
654 if (monitor_route() < 0) {
655 printf("Error in receiving route update");