bgpd: tighten bounds checking in RR ORF msg reader
[jleu-quagga.git] / zebra / rt_netlink.c
blob7652f80a99f65cecbc15b91aabdbbddd55058e97
1 /* Kernel routing table updates using netlink over GNU/Linux system.
2 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Zebra; see the file COPYING. If not, write to the Free
18 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 * 02111-1307, USA.
22 #include <zebra.h>
24 /* Hack for GNU libc version 2. */
25 #ifndef MSG_TRUNC
26 #define MSG_TRUNC 0x20
27 #endif /* MSG_TRUNC */
29 #include "linklist.h"
30 #include "if.h"
31 #include "log.h"
32 #include "prefix.h"
33 #include "connected.h"
34 #include "table.h"
35 #include "rib.h"
36 #include "thread.h"
37 #include "privs.h"
39 #include "zebra/zserv.h"
40 #include "zebra/rt.h"
41 #include "zebra/redistribute.h"
42 #include "zebra/interface.h"
43 #include "zebra/debug.h"
45 /* Socket interface to kernel */
46 struct nlsock
48 int sock;
49 int seq;
50 struct sockaddr_nl snl;
51 const char *name;
52 } netlink = { -1, 0, {0}, "netlink-listen"}, /* kernel messages */
53 netlink_cmd = { -1, 0, {0}, "netlink-cmd"}; /* command channel */
55 static const struct message nlmsg_str[] = {
56 {RTM_NEWROUTE, "RTM_NEWROUTE"},
57 {RTM_DELROUTE, "RTM_DELROUTE"},
58 {RTM_GETROUTE, "RTM_GETROUTE"},
59 {RTM_NEWLINK, "RTM_NEWLINK"},
60 {RTM_DELLINK, "RTM_DELLINK"},
61 {RTM_GETLINK, "RTM_GETLINK"},
62 {RTM_NEWADDR, "RTM_NEWADDR"},
63 {RTM_DELADDR, "RTM_DELADDR"},
64 {RTM_GETADDR, "RTM_GETADDR"},
65 {0, NULL}
68 static const char *nexthop_types_desc[] =
70 "none",
71 "Directly connected",
72 "Interface route",
73 "IPv4 nexthop",
74 "IPv4 nexthop with ifindex",
75 "IPv4 nexthop with ifname",
76 "IPv6 nexthop",
77 "IPv6 nexthop with ifindex",
78 "IPv6 nexthop with ifname",
79 "Null0 nexthop",
82 extern struct zebra_t zebrad;
84 extern struct zebra_privs_t zserv_privs;
86 extern u_int32_t nl_rcvbufsize;
88 /* Note: on netlink systems, there should be a 1-to-1 mapping between interface
89 names and ifindex values. */
90 static void
91 set_ifindex(struct interface *ifp, unsigned int ifi_index)
93 struct interface *oifp;
95 if (((oifp = if_lookup_by_index(ifi_index)) != NULL) && (oifp != ifp))
97 if (ifi_index == IFINDEX_INTERNAL)
98 zlog_err("Netlink is setting interface %s ifindex to reserved "
99 "internal value %u", ifp->name, ifi_index);
100 else
102 if (IS_ZEBRA_DEBUG_KERNEL)
103 zlog_debug("interface index %d was renamed from %s to %s",
104 ifi_index, oifp->name, ifp->name);
105 if (if_is_up(oifp))
106 zlog_err("interface rename detected on up interface: index %d "
107 "was renamed from %s to %s, results are uncertain!",
108 ifi_index, oifp->name, ifp->name);
109 if_delete_update(oifp);
112 ifp->ifindex = ifi_index;
115 static int
116 netlink_recvbuf (struct nlsock *nl, uint32_t newsize)
118 u_int32_t oldsize;
119 socklen_t newlen = sizeof(newsize);
120 socklen_t oldlen = sizeof(oldsize);
121 int ret;
123 ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen);
124 if (ret < 0)
126 zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name,
127 safe_strerror (errno));
128 return -1;
131 ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &nl_rcvbufsize,
132 sizeof(nl_rcvbufsize));
133 if (ret < 0)
135 zlog (NULL, LOG_ERR, "Can't set %s receive buffer size: %s", nl->name,
136 safe_strerror (errno));
137 return -1;
140 ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen);
141 if (ret < 0)
143 zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name,
144 safe_strerror (errno));
145 return -1;
148 zlog (NULL, LOG_INFO,
149 "Setting netlink socket receive buffer size: %u -> %u",
150 oldsize, newsize);
151 return 0;
154 /* Make socket for Linux netlink interface. */
155 static int
156 netlink_socket (struct nlsock *nl, unsigned long groups)
158 int ret;
159 struct sockaddr_nl snl;
160 int sock;
161 int namelen;
162 int save_errno;
164 sock = socket (AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
165 if (sock < 0)
167 zlog (NULL, LOG_ERR, "Can't open %s socket: %s", nl->name,
168 safe_strerror (errno));
169 return -1;
172 memset (&snl, 0, sizeof snl);
173 snl.nl_family = AF_NETLINK;
174 snl.nl_groups = groups;
176 /* Bind the socket to the netlink structure for anything. */
177 if (zserv_privs.change (ZPRIVS_RAISE))
179 zlog (NULL, LOG_ERR, "Can't raise privileges");
180 return -1;
183 ret = bind (sock, (struct sockaddr *) &snl, sizeof snl);
184 save_errno = errno;
185 if (zserv_privs.change (ZPRIVS_LOWER))
186 zlog (NULL, LOG_ERR, "Can't lower privileges");
188 if (ret < 0)
190 zlog (NULL, LOG_ERR, "Can't bind %s socket to group 0x%x: %s",
191 nl->name, snl.nl_groups, safe_strerror (save_errno));
192 close (sock);
193 return -1;
196 /* multiple netlink sockets will have different nl_pid */
197 namelen = sizeof snl;
198 ret = getsockname (sock, (struct sockaddr *) &snl, (socklen_t *) &namelen);
199 if (ret < 0 || namelen != sizeof snl)
201 zlog (NULL, LOG_ERR, "Can't get %s socket name: %s", nl->name,
202 safe_strerror (errno));
203 close (sock);
204 return -1;
207 nl->snl = snl;
208 nl->sock = sock;
209 return ret;
212 /* Get type specified information from netlink. */
213 static int
214 netlink_request (int family, int type, struct nlsock *nl)
216 int ret;
217 struct sockaddr_nl snl;
218 int save_errno;
220 struct
222 struct nlmsghdr nlh;
223 struct rtgenmsg g;
224 } req;
227 /* Check netlink socket. */
228 if (nl->sock < 0)
230 zlog (NULL, LOG_ERR, "%s socket isn't active.", nl->name);
231 return -1;
234 memset (&snl, 0, sizeof snl);
235 snl.nl_family = AF_NETLINK;
237 memset (&req, 0, sizeof req);
238 req.nlh.nlmsg_len = sizeof req;
239 req.nlh.nlmsg_type = type;
240 req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
241 req.nlh.nlmsg_pid = nl->snl.nl_pid;
242 req.nlh.nlmsg_seq = ++nl->seq;
243 req.g.rtgen_family = family;
245 /* linux appears to check capabilities on every message
246 * have to raise caps for every message sent
248 if (zserv_privs.change (ZPRIVS_RAISE))
250 zlog (NULL, LOG_ERR, "Can't raise privileges");
251 return -1;
254 ret = sendto (nl->sock, (void *) &req, sizeof req, 0,
255 (struct sockaddr *) &snl, sizeof snl);
256 save_errno = errno;
258 if (zserv_privs.change (ZPRIVS_LOWER))
259 zlog (NULL, LOG_ERR, "Can't lower privileges");
261 if (ret < 0)
263 zlog (NULL, LOG_ERR, "%s sendto failed: %s", nl->name,
264 safe_strerror (save_errno));
265 return -1;
268 return 0;
271 /* Receive message from netlink interface and pass those information
272 to the given function. */
273 static int
274 netlink_parse_info (int (*filter) (struct sockaddr_nl *, struct nlmsghdr *),
275 struct nlsock *nl)
277 int status;
278 int ret = 0;
279 int error;
281 while (1)
283 char buf[4096];
284 struct iovec iov = { buf, sizeof buf };
285 struct sockaddr_nl snl;
286 struct msghdr msg = { (void *) &snl, sizeof snl, &iov, 1, NULL, 0, 0 };
287 struct nlmsghdr *h;
289 status = recvmsg (nl->sock, &msg, 0);
290 if (status < 0)
292 if (errno == EINTR)
293 continue;
294 if (errno == EWOULDBLOCK || errno == EAGAIN)
295 break;
296 zlog (NULL, LOG_ERR, "%s recvmsg overrun: %s",
297 nl->name, safe_strerror(errno));
298 continue;
301 if (status == 0)
303 zlog (NULL, LOG_ERR, "%s EOF", nl->name);
304 return -1;
307 if (msg.msg_namelen != sizeof snl)
309 zlog (NULL, LOG_ERR, "%s sender address length error: length %d",
310 nl->name, msg.msg_namelen);
311 return -1;
314 for (h = (struct nlmsghdr *) buf; NLMSG_OK (h, (unsigned int) status);
315 h = NLMSG_NEXT (h, status))
317 /* Finish of reading. */
318 if (h->nlmsg_type == NLMSG_DONE)
319 return ret;
321 /* Error handling. */
322 if (h->nlmsg_type == NLMSG_ERROR)
324 struct nlmsgerr *err = (struct nlmsgerr *) NLMSG_DATA (h);
325 int errnum = err->error;
326 int msg_type = err->msg.nlmsg_type;
328 /* If the error field is zero, then this is an ACK */
329 if (err->error == 0)
331 if (IS_ZEBRA_DEBUG_KERNEL)
333 zlog_debug ("%s: %s ACK: type=%s(%u), seq=%u, pid=%u",
334 __FUNCTION__, nl->name,
335 lookup (nlmsg_str, err->msg.nlmsg_type),
336 err->msg.nlmsg_type, err->msg.nlmsg_seq,
337 err->msg.nlmsg_pid);
340 /* return if not a multipart message, otherwise continue */
341 if (!(h->nlmsg_flags & NLM_F_MULTI))
343 return 0;
345 continue;
348 if (h->nlmsg_len < NLMSG_LENGTH (sizeof (struct nlmsgerr)))
350 zlog (NULL, LOG_ERR, "%s error: message truncated",
351 nl->name);
352 return -1;
355 /* Deal with errors that occur because of races in link handling */
356 if (nl == &netlink_cmd
357 && ((msg_type == RTM_DELROUTE &&
358 (-errnum == ENODEV || -errnum == ESRCH))
359 || (msg_type == RTM_NEWROUTE && -errnum == EEXIST)))
361 if (IS_ZEBRA_DEBUG_KERNEL)
362 zlog_debug ("%s: error: %s type=%s(%u), seq=%u, pid=%u",
363 nl->name, safe_strerror (-errnum),
364 lookup (nlmsg_str, msg_type),
365 msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid);
366 return 0;
369 zlog_err ("%s error: %s, type=%s(%u), seq=%u, pid=%u",
370 nl->name, safe_strerror (-errnum),
371 lookup (nlmsg_str, msg_type),
372 msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid);
373 return -1;
376 /* OK we got netlink message. */
377 if (IS_ZEBRA_DEBUG_KERNEL)
378 zlog_debug ("netlink_parse_info: %s type %s(%u), seq=%u, pid=%u",
379 nl->name,
380 lookup (nlmsg_str, h->nlmsg_type), h->nlmsg_type,
381 h->nlmsg_seq, h->nlmsg_pid);
383 /* skip unsolicited messages originating from command socket */
384 if (nl != &netlink_cmd && h->nlmsg_pid == netlink_cmd.snl.nl_pid)
386 if (IS_ZEBRA_DEBUG_KERNEL)
387 zlog_debug ("netlink_parse_info: %s packet comes from %s",
388 netlink_cmd.name, nl->name);
389 continue;
392 error = (*filter) (&snl, h);
393 if (error < 0)
395 zlog (NULL, LOG_ERR, "%s filter function error", nl->name);
396 ret = error;
400 /* After error care. */
401 if (msg.msg_flags & MSG_TRUNC)
403 zlog (NULL, LOG_ERR, "%s error: message truncated", nl->name);
404 continue;
406 if (status)
408 zlog (NULL, LOG_ERR, "%s error: data remnant size %d", nl->name,
409 status);
410 return -1;
413 return ret;
416 /* Utility function for parse rtattr. */
417 static void
418 netlink_parse_rtattr (struct rtattr **tb, int max, struct rtattr *rta,
419 int len)
421 while (RTA_OK (rta, len))
423 if (rta->rta_type <= max)
424 tb[rta->rta_type] = rta;
425 rta = RTA_NEXT (rta, len);
429 /* Called from interface_lookup_netlink(). This function is only used
430 during bootstrap. */
431 static int
432 netlink_interface (struct sockaddr_nl *snl, struct nlmsghdr *h)
434 int len;
435 struct ifinfomsg *ifi;
436 struct rtattr *tb[IFLA_MAX + 1];
437 struct interface *ifp;
438 char *name;
439 int i;
441 ifi = NLMSG_DATA (h);
443 if (h->nlmsg_type != RTM_NEWLINK)
444 return 0;
446 len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifinfomsg));
447 if (len < 0)
448 return -1;
450 /* Looking up interface name. */
451 memset (tb, 0, sizeof tb);
452 netlink_parse_rtattr (tb, IFLA_MAX, IFLA_RTA (ifi), len);
454 #ifdef IFLA_WIRELESS
455 /* check for wireless messages to ignore */
456 if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0))
458 if (IS_ZEBRA_DEBUG_KERNEL)
459 zlog_debug ("%s: ignoring IFLA_WIRELESS message", __func__);
460 return 0;
462 #endif /* IFLA_WIRELESS */
464 if (tb[IFLA_IFNAME] == NULL)
465 return -1;
466 name = (char *) RTA_DATA (tb[IFLA_IFNAME]);
468 /* Add interface. */
469 ifp = if_get_by_name (name);
470 set_ifindex(ifp, ifi->ifi_index);
471 ifp->flags = ifi->ifi_flags & 0x0000fffff;
472 ifp->mtu6 = ifp->mtu = *(uint32_t *) RTA_DATA (tb[IFLA_MTU]);
473 ifp->metric = 1;
475 /* Hardware type and address. */
476 ifp->hw_type = ifi->ifi_type;
478 if (tb[IFLA_ADDRESS])
480 int hw_addr_len;
482 hw_addr_len = RTA_PAYLOAD (tb[IFLA_ADDRESS]);
484 if (hw_addr_len > INTERFACE_HWADDR_MAX)
485 zlog_warn ("Hardware address is too large: %d", hw_addr_len);
486 else
488 ifp->hw_addr_len = hw_addr_len;
489 memcpy (ifp->hw_addr, RTA_DATA (tb[IFLA_ADDRESS]), hw_addr_len);
491 for (i = 0; i < hw_addr_len; i++)
492 if (ifp->hw_addr[i] != 0)
493 break;
495 if (i == hw_addr_len)
496 ifp->hw_addr_len = 0;
497 else
498 ifp->hw_addr_len = hw_addr_len;
502 if_add_update (ifp);
504 return 0;
507 /* Lookup interface IPv4/IPv6 address. */
508 static int
509 netlink_interface_addr (struct sockaddr_nl *snl, struct nlmsghdr *h)
511 int len;
512 struct ifaddrmsg *ifa;
513 struct rtattr *tb[IFA_MAX + 1];
514 struct interface *ifp;
515 void *addr;
516 void *broad;
517 u_char flags = 0;
518 char *label = NULL;
520 ifa = NLMSG_DATA (h);
522 if (ifa->ifa_family != AF_INET
523 #ifdef HAVE_IPV6
524 && ifa->ifa_family != AF_INET6
525 #endif /* HAVE_IPV6 */
527 return 0;
529 if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR)
530 return 0;
532 len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifaddrmsg));
533 if (len < 0)
534 return -1;
536 memset (tb, 0, sizeof tb);
537 netlink_parse_rtattr (tb, IFA_MAX, IFA_RTA (ifa), len);
539 ifp = if_lookup_by_index (ifa->ifa_index);
540 if (ifp == NULL)
542 zlog_err ("netlink_interface_addr can't find interface by index %d",
543 ifa->ifa_index);
544 return -1;
547 if (IS_ZEBRA_DEBUG_KERNEL) /* remove this line to see initial ifcfg */
549 char buf[BUFSIZ];
550 zlog_debug ("netlink_interface_addr %s %s:",
551 lookup (nlmsg_str, h->nlmsg_type), ifp->name);
552 if (tb[IFA_LOCAL])
553 zlog_debug (" IFA_LOCAL %s/%d",
554 inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_LOCAL]),
555 buf, BUFSIZ), ifa->ifa_prefixlen);
556 if (tb[IFA_ADDRESS])
557 zlog_debug (" IFA_ADDRESS %s/%d",
558 inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_ADDRESS]),
559 buf, BUFSIZ), ifa->ifa_prefixlen);
560 if (tb[IFA_BROADCAST])
561 zlog_debug (" IFA_BROADCAST %s/%d",
562 inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_BROADCAST]),
563 buf, BUFSIZ), ifa->ifa_prefixlen);
564 if (tb[IFA_LABEL] && strcmp (ifp->name, RTA_DATA (tb[IFA_LABEL])))
565 zlog_debug (" IFA_LABEL %s", (char *)RTA_DATA (tb[IFA_LABEL]));
567 if (tb[IFA_CACHEINFO])
569 struct ifa_cacheinfo *ci = RTA_DATA (tb[IFA_CACHEINFO]);
570 zlog_debug (" IFA_CACHEINFO pref %d, valid %d",
571 ci->ifa_prefered, ci->ifa_valid);
575 /* logic copied from iproute2/ip/ipaddress.c:print_addrinfo() */
576 if (tb[IFA_LOCAL] == NULL)
577 tb[IFA_LOCAL] = tb[IFA_ADDRESS];
578 if (tb[IFA_ADDRESS] == NULL)
579 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
581 /* local interface address */
582 addr = (tb[IFA_LOCAL] ? RTA_DATA(tb[IFA_LOCAL]) : NULL);
584 /* is there a peer address? */
585 if (tb[IFA_ADDRESS] &&
586 memcmp(RTA_DATA(tb[IFA_ADDRESS]), RTA_DATA(tb[IFA_LOCAL]), RTA_PAYLOAD(tb[IFA_ADDRESS])))
588 broad = RTA_DATA(tb[IFA_ADDRESS]);
589 SET_FLAG (flags, ZEBRA_IFA_PEER);
591 else
592 /* seeking a broadcast address */
593 broad = (tb[IFA_BROADCAST] ? RTA_DATA(tb[IFA_BROADCAST]) : NULL);
595 /* addr is primary key, SOL if we don't have one */
596 if (addr == NULL)
598 zlog_debug ("%s: NULL address", __func__);
599 return -1;
602 /* Flags. */
603 if (ifa->ifa_flags & IFA_F_SECONDARY)
604 SET_FLAG (flags, ZEBRA_IFA_SECONDARY);
606 /* Label */
607 if (tb[IFA_LABEL])
608 label = (char *) RTA_DATA (tb[IFA_LABEL]);
610 if (ifp && label && strcmp (ifp->name, label) == 0)
611 label = NULL;
613 /* Register interface address to the interface. */
614 if (ifa->ifa_family == AF_INET)
616 if (h->nlmsg_type == RTM_NEWADDR)
617 connected_add_ipv4 (ifp, flags,
618 (struct in_addr *) addr, ifa->ifa_prefixlen,
619 (struct in_addr *) broad, label);
620 else
621 connected_delete_ipv4 (ifp, flags,
622 (struct in_addr *) addr, ifa->ifa_prefixlen,
623 (struct in_addr *) broad);
625 #ifdef HAVE_IPV6
626 if (ifa->ifa_family == AF_INET6)
628 if (h->nlmsg_type == RTM_NEWADDR)
629 connected_add_ipv6 (ifp, flags,
630 (struct in6_addr *) addr, ifa->ifa_prefixlen,
631 (struct in6_addr *) broad, label);
632 else
633 connected_delete_ipv6 (ifp,
634 (struct in6_addr *) addr, ifa->ifa_prefixlen,
635 (struct in6_addr *) broad);
637 #endif /* HAVE_IPV6 */
639 return 0;
642 /* Looking up routing table by netlink interface. */
643 static int
644 netlink_routing_table (struct sockaddr_nl *snl, struct nlmsghdr *h)
646 int len;
647 struct rtmsg *rtm;
648 struct rtattr *tb[RTA_MAX + 1];
649 u_char flags = 0;
651 char anyaddr[16] = { 0 };
653 int index;
654 int table;
655 int metric;
657 void *dest;
658 void *gate;
659 void *src;
661 rtm = NLMSG_DATA (h);
663 if (h->nlmsg_type != RTM_NEWROUTE)
664 return 0;
665 if (rtm->rtm_type != RTN_UNICAST)
666 return 0;
668 table = rtm->rtm_table;
669 #if 0 /* we weed them out later in rib_weed_tables () */
670 if (table != RT_TABLE_MAIN && table != zebrad.rtm_table_default)
671 return 0;
672 #endif
674 len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct rtmsg));
675 if (len < 0)
676 return -1;
678 memset (tb, 0, sizeof tb);
679 netlink_parse_rtattr (tb, RTA_MAX, RTM_RTA (rtm), len);
681 if (rtm->rtm_flags & RTM_F_CLONED)
682 return 0;
683 if (rtm->rtm_protocol == RTPROT_REDIRECT)
684 return 0;
685 if (rtm->rtm_protocol == RTPROT_KERNEL)
686 return 0;
688 if (rtm->rtm_src_len != 0)
689 return 0;
691 /* Route which inserted by Zebra. */
692 if (rtm->rtm_protocol == RTPROT_ZEBRA)
693 flags |= ZEBRA_FLAG_SELFROUTE;
695 index = 0;
696 metric = 0;
697 dest = NULL;
698 gate = NULL;
699 src = NULL;
701 if (tb[RTA_OIF])
702 index = *(int *) RTA_DATA (tb[RTA_OIF]);
704 if (tb[RTA_DST])
705 dest = RTA_DATA (tb[RTA_DST]);
706 else
707 dest = anyaddr;
709 if (tb[RTA_PREFSRC])
710 src = RTA_DATA (tb[RTA_PREFSRC]);
712 /* Multipath treatment is needed. */
713 if (tb[RTA_GATEWAY])
714 gate = RTA_DATA (tb[RTA_GATEWAY]);
716 if (tb[RTA_PRIORITY])
717 metric = *(int *) RTA_DATA(tb[RTA_PRIORITY]);
719 if (rtm->rtm_family == AF_INET)
721 struct prefix_ipv4 p;
722 p.family = AF_INET;
723 memcpy (&p.prefix, dest, 4);
724 p.prefixlen = rtm->rtm_dst_len;
726 rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, flags, &p, gate, src, index, table, metric, 0);
728 #ifdef HAVE_IPV6
729 if (rtm->rtm_family == AF_INET6)
731 struct prefix_ipv6 p;
732 p.family = AF_INET6;
733 memcpy (&p.prefix, dest, 16);
734 p.prefixlen = rtm->rtm_dst_len;
736 rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, flags, &p, gate, index, table,
737 metric, 0);
739 #endif /* HAVE_IPV6 */
741 return 0;
744 static const struct message rtproto_str[] = {
745 {RTPROT_REDIRECT, "redirect"},
746 {RTPROT_KERNEL, "kernel"},
747 {RTPROT_BOOT, "boot"},
748 {RTPROT_STATIC, "static"},
749 {RTPROT_GATED, "GateD"},
750 {RTPROT_RA, "router advertisement"},
751 {RTPROT_MRT, "MRT"},
752 {RTPROT_ZEBRA, "Zebra"},
753 #ifdef RTPROT_BIRD
754 {RTPROT_BIRD, "BIRD"},
755 #endif /* RTPROT_BIRD */
756 {0, NULL}
759 /* Routing information change from the kernel. */
760 static int
761 netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h)
763 int len;
764 struct rtmsg *rtm;
765 struct rtattr *tb[RTA_MAX + 1];
767 char anyaddr[16] = { 0 };
769 int index;
770 int table;
771 void *dest;
772 void *gate;
773 void *src;
775 rtm = NLMSG_DATA (h);
777 if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE))
779 /* If this is not route add/delete message print warning. */
780 zlog_warn ("Kernel message: %d\n", h->nlmsg_type);
781 return 0;
784 /* Connected route. */
785 if (IS_ZEBRA_DEBUG_KERNEL)
786 zlog_debug ("%s %s %s proto %s",
787 h->nlmsg_type ==
788 RTM_NEWROUTE ? "RTM_NEWROUTE" : "RTM_DELROUTE",
789 rtm->rtm_family == AF_INET ? "ipv4" : "ipv6",
790 rtm->rtm_type == RTN_UNICAST ? "unicast" : "multicast",
791 lookup (rtproto_str, rtm->rtm_protocol));
793 if (rtm->rtm_type != RTN_UNICAST)
795 return 0;
798 table = rtm->rtm_table;
799 if (table != RT_TABLE_MAIN && table != zebrad.rtm_table_default)
801 return 0;
804 len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct rtmsg));
805 if (len < 0)
806 return -1;
808 memset (tb, 0, sizeof tb);
809 netlink_parse_rtattr (tb, RTA_MAX, RTM_RTA (rtm), len);
811 if (rtm->rtm_flags & RTM_F_CLONED)
812 return 0;
813 if (rtm->rtm_protocol == RTPROT_REDIRECT)
814 return 0;
815 if (rtm->rtm_protocol == RTPROT_KERNEL)
816 return 0;
818 if (rtm->rtm_protocol == RTPROT_ZEBRA && h->nlmsg_type == RTM_NEWROUTE)
819 return 0;
821 if (rtm->rtm_src_len != 0)
823 zlog_warn ("netlink_route_change(): no src len");
824 return 0;
827 index = 0;
828 dest = NULL;
829 gate = NULL;
830 src = NULL;
832 if (tb[RTA_OIF])
833 index = *(int *) RTA_DATA (tb[RTA_OIF]);
835 if (tb[RTA_DST])
836 dest = RTA_DATA (tb[RTA_DST]);
837 else
838 dest = anyaddr;
840 if (tb[RTA_GATEWAY])
841 gate = RTA_DATA (tb[RTA_GATEWAY]);
843 if (tb[RTA_PREFSRC])
844 src = RTA_DATA (tb[RTA_PREFSRC]);
846 if (rtm->rtm_family == AF_INET)
848 struct prefix_ipv4 p;
849 p.family = AF_INET;
850 memcpy (&p.prefix, dest, 4);
851 p.prefixlen = rtm->rtm_dst_len;
853 if (IS_ZEBRA_DEBUG_KERNEL)
855 if (h->nlmsg_type == RTM_NEWROUTE)
856 zlog_debug ("RTM_NEWROUTE %s/%d",
857 inet_ntoa (p.prefix), p.prefixlen);
858 else
859 zlog_debug ("RTM_DELROUTE %s/%d",
860 inet_ntoa (p.prefix), p.prefixlen);
863 if (h->nlmsg_type == RTM_NEWROUTE)
864 rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, src, index, table, 0, 0);
865 else
866 rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, index, table);
869 #ifdef HAVE_IPV6
870 if (rtm->rtm_family == AF_INET6)
872 struct prefix_ipv6 p;
873 char buf[BUFSIZ];
875 p.family = AF_INET6;
876 memcpy (&p.prefix, dest, 16);
877 p.prefixlen = rtm->rtm_dst_len;
879 if (IS_ZEBRA_DEBUG_KERNEL)
881 if (h->nlmsg_type == RTM_NEWROUTE)
882 zlog_debug ("RTM_NEWROUTE %s/%d",
883 inet_ntop (AF_INET6, &p.prefix, buf, BUFSIZ),
884 p.prefixlen);
885 else
886 zlog_debug ("RTM_DELROUTE %s/%d",
887 inet_ntop (AF_INET6, &p.prefix, buf, BUFSIZ),
888 p.prefixlen);
891 if (h->nlmsg_type == RTM_NEWROUTE)
892 rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, index, table, 0, 0);
893 else
894 rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, index, table);
896 #endif /* HAVE_IPV6 */
898 return 0;
901 static int
902 netlink_link_change (struct sockaddr_nl *snl, struct nlmsghdr *h)
904 int len;
905 struct ifinfomsg *ifi;
906 struct rtattr *tb[IFLA_MAX + 1];
907 struct interface *ifp;
908 char *name;
910 ifi = NLMSG_DATA (h);
912 if (!(h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK))
914 /* If this is not link add/delete message so print warning. */
915 zlog_warn ("netlink_link_change: wrong kernel message %d\n",
916 h->nlmsg_type);
917 return 0;
920 len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifinfomsg));
921 if (len < 0)
922 return -1;
924 /* Looking up interface name. */
925 memset (tb, 0, sizeof tb);
926 netlink_parse_rtattr (tb, IFLA_MAX, IFLA_RTA (ifi), len);
928 #ifdef IFLA_WIRELESS
929 /* check for wireless messages to ignore */
930 if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0))
932 if (IS_ZEBRA_DEBUG_KERNEL)
933 zlog_debug ("%s: ignoring IFLA_WIRELESS message", __func__);
934 return 0;
936 #endif /* IFLA_WIRELESS */
938 if (tb[IFLA_IFNAME] == NULL)
939 return -1;
940 name = (char *) RTA_DATA (tb[IFLA_IFNAME]);
942 /* Add interface. */
943 if (h->nlmsg_type == RTM_NEWLINK)
945 ifp = if_lookup_by_name (name);
947 if (ifp == NULL || !CHECK_FLAG (ifp->status, ZEBRA_INTERFACE_ACTIVE))
949 if (ifp == NULL)
950 ifp = if_get_by_name (name);
952 set_ifindex(ifp, ifi->ifi_index);
953 ifp->flags = ifi->ifi_flags & 0x0000fffff;
954 ifp->mtu6 = ifp->mtu = *(int *) RTA_DATA (tb[IFLA_MTU]);
955 ifp->metric = 1;
957 /* If new link is added. */
958 if_add_update (ifp);
960 else
962 /* Interface status change. */
963 set_ifindex(ifp, ifi->ifi_index);
964 ifp->mtu6 = ifp->mtu = *(int *) RTA_DATA (tb[IFLA_MTU]);
965 ifp->metric = 1;
967 if (if_is_operative (ifp))
969 ifp->flags = ifi->ifi_flags & 0x0000fffff;
970 if (!if_is_operative (ifp))
971 if_down (ifp);
972 else
973 /* Must notify client daemons of new interface status. */
974 zebra_interface_up_update (ifp);
976 else
978 ifp->flags = ifi->ifi_flags & 0x0000fffff;
979 if (if_is_operative (ifp))
980 if_up (ifp);
984 else
986 /* RTM_DELLINK. */
987 ifp = if_lookup_by_name (name);
989 if (ifp == NULL)
991 zlog (NULL, LOG_WARNING, "interface %s is deleted but can't find",
992 name);
993 return 0;
996 if_delete_update (ifp);
999 return 0;
1002 static int
1003 netlink_information_fetch (struct sockaddr_nl *snl, struct nlmsghdr *h)
1005 /* JF: Ignore messages that aren't from the kernel */
1006 if ( snl->nl_pid != 0 )
1008 zlog ( NULL, LOG_ERR, "Ignoring message from pid %u", snl->nl_pid );
1009 return 0;
1012 switch (h->nlmsg_type)
1014 case RTM_NEWROUTE:
1015 return netlink_route_change (snl, h);
1016 break;
1017 case RTM_DELROUTE:
1018 return netlink_route_change (snl, h);
1019 break;
1020 case RTM_NEWLINK:
1021 return netlink_link_change (snl, h);
1022 break;
1023 case RTM_DELLINK:
1024 return netlink_link_change (snl, h);
1025 break;
1026 case RTM_NEWADDR:
1027 return netlink_interface_addr (snl, h);
1028 break;
1029 case RTM_DELADDR:
1030 return netlink_interface_addr (snl, h);
1031 break;
1032 default:
1033 zlog_warn ("Unknown netlink nlmsg_type %d\n", h->nlmsg_type);
1034 break;
1036 return 0;
1039 /* Interface lookup by netlink socket. */
1041 interface_lookup_netlink (void)
1043 int ret;
1045 /* Get interface information. */
1046 ret = netlink_request (AF_PACKET, RTM_GETLINK, &netlink_cmd);
1047 if (ret < 0)
1048 return ret;
1049 ret = netlink_parse_info (netlink_interface, &netlink_cmd);
1050 if (ret < 0)
1051 return ret;
1053 /* Get IPv4 address of the interfaces. */
1054 ret = netlink_request (AF_INET, RTM_GETADDR, &netlink_cmd);
1055 if (ret < 0)
1056 return ret;
1057 ret = netlink_parse_info (netlink_interface_addr, &netlink_cmd);
1058 if (ret < 0)
1059 return ret;
1061 #ifdef HAVE_IPV6
1062 /* Get IPv6 address of the interfaces. */
1063 ret = netlink_request (AF_INET6, RTM_GETADDR, &netlink_cmd);
1064 if (ret < 0)
1065 return ret;
1066 ret = netlink_parse_info (netlink_interface_addr, &netlink_cmd);
1067 if (ret < 0)
1068 return ret;
1069 #endif /* HAVE_IPV6 */
1071 return 0;
1074 /* Routing table read function using netlink interface. Only called
1075 bootstrap time. */
1077 netlink_route_read (void)
1079 int ret;
1081 /* Get IPv4 routing table. */
1082 ret = netlink_request (AF_INET, RTM_GETROUTE, &netlink_cmd);
1083 if (ret < 0)
1084 return ret;
1085 ret = netlink_parse_info (netlink_routing_table, &netlink_cmd);
1086 if (ret < 0)
1087 return ret;
1089 #ifdef HAVE_IPV6
1090 /* Get IPv6 routing table. */
1091 ret = netlink_request (AF_INET6, RTM_GETROUTE, &netlink_cmd);
1092 if (ret < 0)
1093 return ret;
1094 ret = netlink_parse_info (netlink_routing_table, &netlink_cmd);
1095 if (ret < 0)
1096 return ret;
1097 #endif /* HAVE_IPV6 */
1099 return 0;
1102 /* Utility function comes from iproute2.
1103 Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */
1104 static int
1105 addattr_l (struct nlmsghdr *n, int maxlen, int type, void *data, int alen)
1107 int len;
1108 struct rtattr *rta;
1110 len = RTA_LENGTH (alen);
1112 if (NLMSG_ALIGN (n->nlmsg_len) + len > maxlen)
1113 return -1;
1115 rta = (struct rtattr *) (((char *) n) + NLMSG_ALIGN (n->nlmsg_len));
1116 rta->rta_type = type;
1117 rta->rta_len = len;
1118 memcpy (RTA_DATA (rta), data, alen);
1119 n->nlmsg_len = NLMSG_ALIGN (n->nlmsg_len) + len;
1121 return 0;
1124 static int
1125 rta_addattr_l (struct rtattr *rta, int maxlen, int type, void *data, int alen)
1127 int len;
1128 struct rtattr *subrta;
1130 len = RTA_LENGTH (alen);
1132 if (RTA_ALIGN (rta->rta_len) + len > maxlen)
1133 return -1;
1135 subrta = (struct rtattr *) (((char *) rta) + RTA_ALIGN (rta->rta_len));
1136 subrta->rta_type = type;
1137 subrta->rta_len = len;
1138 memcpy (RTA_DATA (subrta), data, alen);
1139 rta->rta_len = NLMSG_ALIGN (rta->rta_len) + len;
1141 return 0;
1144 /* Utility function comes from iproute2.
1145 Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */
1146 static int
1147 addattr32 (struct nlmsghdr *n, int maxlen, int type, int data)
1149 int len;
1150 struct rtattr *rta;
1152 len = RTA_LENGTH (4);
1154 if (NLMSG_ALIGN (n->nlmsg_len) + len > maxlen)
1155 return -1;
1157 rta = (struct rtattr *) (((char *) n) + NLMSG_ALIGN (n->nlmsg_len));
1158 rta->rta_type = type;
1159 rta->rta_len = len;
1160 memcpy (RTA_DATA (rta), &data, 4);
1161 n->nlmsg_len = NLMSG_ALIGN (n->nlmsg_len) + len;
1163 return 0;
1166 static int
1167 netlink_talk_filter (struct sockaddr_nl *snl, struct nlmsghdr *h)
1169 zlog_warn ("netlink_talk: ignoring message type 0x%04x", h->nlmsg_type);
1170 return 0;
1173 /* sendmsg() to netlink socket then recvmsg(). */
1174 static int
1175 netlink_talk (struct nlmsghdr *n, struct nlsock *nl)
1177 int status;
1178 struct sockaddr_nl snl;
1179 struct iovec iov = { (void *) n, n->nlmsg_len };
1180 struct msghdr msg = { (void *) &snl, sizeof snl, &iov, 1, NULL, 0, 0 };
1181 int save_errno;
1183 memset (&snl, 0, sizeof snl);
1184 snl.nl_family = AF_NETLINK;
1186 n->nlmsg_seq = ++nl->seq;
1188 /* Request an acknowledgement by setting NLM_F_ACK */
1189 n->nlmsg_flags |= NLM_F_ACK;
1191 if (IS_ZEBRA_DEBUG_KERNEL)
1192 zlog_debug ("netlink_talk: %s type %s(%u), seq=%u", nl->name,
1193 lookup (nlmsg_str, n->nlmsg_type), n->nlmsg_type,
1194 n->nlmsg_seq);
1196 /* Send message to netlink interface. */
1197 if (zserv_privs.change (ZPRIVS_RAISE))
1198 zlog (NULL, LOG_ERR, "Can't raise privileges");
1199 status = sendmsg (nl->sock, &msg, 0);
1200 save_errno = errno;
1201 if (zserv_privs.change (ZPRIVS_LOWER))
1202 zlog (NULL, LOG_ERR, "Can't lower privileges");
1204 if (status < 0)
1206 zlog (NULL, LOG_ERR, "netlink_talk sendmsg() error: %s",
1207 safe_strerror (save_errno));
1208 return -1;
1213 * Get reply from netlink socket.
1214 * The reply should either be an acknowlegement or an error.
1216 return netlink_parse_info (netlink_talk_filter, nl);
1219 /* Routing table change via netlink interface. */
1220 static int
1221 netlink_route (int cmd, int family, void *dest, int length, void *gate,
1222 int index, int zebra_flags, int table)
1224 int ret;
1225 int bytelen;
1226 struct sockaddr_nl snl;
1227 int discard;
1229 struct
1231 struct nlmsghdr n;
1232 struct rtmsg r;
1233 char buf[1024];
1234 } req;
1236 memset (&req, 0, sizeof req);
1238 bytelen = (family == AF_INET ? 4 : 16);
1240 req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg));
1241 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1242 req.n.nlmsg_type = cmd;
1243 req.r.rtm_family = family;
1244 req.r.rtm_table = table;
1245 req.r.rtm_dst_len = length;
1246 req.r.rtm_protocol = RTPROT_ZEBRA;
1247 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
1249 if ((zebra_flags & ZEBRA_FLAG_BLACKHOLE)
1250 || (zebra_flags & ZEBRA_FLAG_REJECT))
1251 discard = 1;
1252 else
1253 discard = 0;
1255 if (cmd == RTM_NEWROUTE)
1257 if (discard)
1259 if (zebra_flags & ZEBRA_FLAG_BLACKHOLE)
1260 req.r.rtm_type = RTN_BLACKHOLE;
1261 else if (zebra_flags & ZEBRA_FLAG_REJECT)
1262 req.r.rtm_type = RTN_UNREACHABLE;
1263 else
1264 assert (RTN_BLACKHOLE != RTN_UNREACHABLE); /* false */
1266 else
1267 req.r.rtm_type = RTN_UNICAST;
1270 if (dest)
1271 addattr_l (&req.n, sizeof req, RTA_DST, dest, bytelen);
1273 if (!discard)
1275 if (gate)
1276 addattr_l (&req.n, sizeof req, RTA_GATEWAY, gate, bytelen);
1277 if (index > 0)
1278 addattr32 (&req.n, sizeof req, RTA_OIF, index);
1281 /* Destination netlink address. */
1282 memset (&snl, 0, sizeof snl);
1283 snl.nl_family = AF_NETLINK;
1285 /* Talk to netlink socket. */
1286 ret = netlink_talk (&req.n, &netlink_cmd);
1287 if (ret < 0)
1288 return -1;
1290 return 0;
1293 /* Routing table change via netlink interface. */
1294 static int
1295 netlink_route_multipath (int cmd, struct prefix *p, struct rib *rib,
1296 int family)
1298 int bytelen;
1299 struct sockaddr_nl snl;
1300 struct nexthop *nexthop = NULL;
1301 int nexthop_num = 0;
1302 int discard;
1304 struct
1306 struct nlmsghdr n;
1307 struct rtmsg r;
1308 char buf[1024];
1309 } req;
1311 memset (&req, 0, sizeof req);
1313 bytelen = (family == AF_INET ? 4 : 16);
1315 req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg));
1316 req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1317 req.n.nlmsg_type = cmd;
1318 req.r.rtm_family = family;
1319 req.r.rtm_table = rib->table;
1320 req.r.rtm_dst_len = p->prefixlen;
1321 req.r.rtm_protocol = RTPROT_ZEBRA;
1322 req.r.rtm_scope = RT_SCOPE_UNIVERSE;
1324 if ((rib->flags & ZEBRA_FLAG_BLACKHOLE) || (rib->flags & ZEBRA_FLAG_REJECT))
1325 discard = 1;
1326 else
1327 discard = 0;
1329 if (cmd == RTM_NEWROUTE)
1331 if (discard)
1333 if (rib->flags & ZEBRA_FLAG_BLACKHOLE)
1334 req.r.rtm_type = RTN_BLACKHOLE;
1335 else if (rib->flags & ZEBRA_FLAG_REJECT)
1336 req.r.rtm_type = RTN_UNREACHABLE;
1337 else
1338 assert (RTN_BLACKHOLE != RTN_UNREACHABLE); /* false */
1340 else
1341 req.r.rtm_type = RTN_UNICAST;
1344 addattr_l (&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen);
1346 /* Metric. */
1347 addattr32 (&req.n, sizeof req, RTA_PRIORITY, rib->metric);
1349 if (discard)
1351 if (cmd == RTM_NEWROUTE)
1352 for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next)
1353 SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
1354 goto skip;
1357 /* Multipath case. */
1358 if (rib->nexthop_active_num == 1 || MULTIPATH_NUM == 1)
1360 for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next)
1363 if ((cmd == RTM_NEWROUTE
1364 && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE))
1365 || (cmd == RTM_DELROUTE
1366 && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)))
1369 if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1371 if (IS_ZEBRA_DEBUG_KERNEL)
1373 zlog_debug
1374 ("netlink_route_multipath() (recursive, 1 hop): "
1375 "%s %s/%d, type %s", lookup (nlmsg_str, cmd),
1376 #ifdef HAVE_IPV6
1377 (family == AF_INET) ? inet_ntoa (p->u.prefix4) :
1378 inet6_ntoa (p->u.prefix6),
1379 #else
1380 inet_ntoa (p->u.prefix4),
1381 #endif /* HAVE_IPV6 */
1383 p->prefixlen, nexthop_types_desc[nexthop->rtype]);
1386 if (nexthop->rtype == NEXTHOP_TYPE_IPV4
1387 || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX)
1389 addattr_l (&req.n, sizeof req, RTA_GATEWAY,
1390 &nexthop->rgate.ipv4, bytelen);
1391 if (nexthop->src.ipv4.s_addr)
1392 addattr_l(&req.n, sizeof req, RTA_PREFSRC,
1393 &nexthop->src.ipv4, bytelen);
1394 if (IS_ZEBRA_DEBUG_KERNEL)
1395 zlog_debug("netlink_route_multipath() (recursive, "
1396 "1 hop): nexthop via %s if %u",
1397 inet_ntoa (nexthop->rgate.ipv4),
1398 nexthop->rifindex);
1400 #ifdef HAVE_IPV6
1401 if (nexthop->rtype == NEXTHOP_TYPE_IPV6
1402 || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX
1403 || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME)
1405 addattr_l (&req.n, sizeof req, RTA_GATEWAY,
1406 &nexthop->rgate.ipv6, bytelen);
1408 if (IS_ZEBRA_DEBUG_KERNEL)
1409 zlog_debug("netlink_route_multipath() (recursive, "
1410 "1 hop): nexthop via %s if %u",
1411 inet6_ntoa (nexthop->rgate.ipv6),
1412 nexthop->rifindex);
1414 #endif /* HAVE_IPV6 */
1415 if (nexthop->rtype == NEXTHOP_TYPE_IFINDEX
1416 || nexthop->rtype == NEXTHOP_TYPE_IFNAME
1417 || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX
1418 || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX
1419 || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME)
1421 addattr32 (&req.n, sizeof req, RTA_OIF,
1422 nexthop->rifindex);
1423 if ((nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX
1424 || nexthop->rtype == NEXTHOP_TYPE_IFINDEX)
1425 && nexthop->src.ipv4.s_addr)
1426 addattr_l (&req.n, sizeof req, RTA_PREFSRC,
1427 &nexthop->src.ipv4, bytelen);
1429 if (IS_ZEBRA_DEBUG_KERNEL)
1430 zlog_debug("netlink_route_multipath() (recursive, "
1431 "1 hop): nexthop via if %u",
1432 nexthop->rifindex);
1435 else
1437 if (IS_ZEBRA_DEBUG_KERNEL)
1439 zlog_debug
1440 ("netlink_route_multipath() (single hop): "
1441 "%s %s/%d, type %s", lookup (nlmsg_str, cmd),
1442 #ifdef HAVE_IPV6
1443 (family == AF_INET) ? inet_ntoa (p->u.prefix4) :
1444 inet6_ntoa (p->u.prefix6),
1445 #else
1446 inet_ntoa (p->u.prefix4),
1447 #endif /* HAVE_IPV6 */
1448 p->prefixlen, nexthop_types_desc[nexthop->type]);
1451 if (nexthop->type == NEXTHOP_TYPE_IPV4
1452 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
1454 addattr_l (&req.n, sizeof req, RTA_GATEWAY,
1455 &nexthop->gate.ipv4, bytelen);
1456 if (nexthop->src.ipv4.s_addr)
1457 addattr_l (&req.n, sizeof req, RTA_PREFSRC,
1458 &nexthop->src.ipv4, bytelen);
1460 if (IS_ZEBRA_DEBUG_KERNEL)
1461 zlog_debug("netlink_route_multipath() (single hop): "
1462 "nexthop via %s if %u",
1463 inet_ntoa (nexthop->gate.ipv4),
1464 nexthop->ifindex);
1466 #ifdef HAVE_IPV6
1467 if (nexthop->type == NEXTHOP_TYPE_IPV6
1468 || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME
1469 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)
1471 addattr_l (&req.n, sizeof req, RTA_GATEWAY,
1472 &nexthop->gate.ipv6, bytelen);
1474 if (IS_ZEBRA_DEBUG_KERNEL)
1475 zlog_debug("netlink_route_multipath() (single hop): "
1476 "nexthop via %s if %u",
1477 inet6_ntoa (nexthop->gate.ipv6),
1478 nexthop->ifindex);
1480 #endif /* HAVE_IPV6 */
1481 if (nexthop->type == NEXTHOP_TYPE_IFINDEX
1482 || nexthop->type == NEXTHOP_TYPE_IFNAME
1483 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
1485 addattr32 (&req.n, sizeof req, RTA_OIF, nexthop->ifindex);
1487 if (nexthop->src.ipv4.s_addr)
1488 addattr_l (&req.n, sizeof req, RTA_PREFSRC,
1489 &nexthop->src.ipv4, bytelen);
1491 if (IS_ZEBRA_DEBUG_KERNEL)
1492 zlog_debug("netlink_route_multipath() (single hop): "
1493 "nexthop via if %u", nexthop->ifindex);
1495 else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX
1496 || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME)
1498 addattr32 (&req.n, sizeof req, RTA_OIF, nexthop->ifindex);
1500 if (IS_ZEBRA_DEBUG_KERNEL)
1501 zlog_debug("netlink_route_multipath() (single hop): "
1502 "nexthop via if %u", nexthop->ifindex);
1506 if (cmd == RTM_NEWROUTE)
1507 SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
1509 nexthop_num++;
1510 break;
1514 else
1516 char buf[1024];
1517 struct rtattr *rta = (void *) buf;
1518 struct rtnexthop *rtnh;
1519 union g_addr *src = NULL;
1521 rta->rta_type = RTA_MULTIPATH;
1522 rta->rta_len = RTA_LENGTH (0);
1523 rtnh = RTA_DATA (rta);
1525 nexthop_num = 0;
1526 for (nexthop = rib->nexthop;
1527 nexthop && (MULTIPATH_NUM == 0 || nexthop_num < MULTIPATH_NUM);
1528 nexthop = nexthop->next)
1530 if ((cmd == RTM_NEWROUTE
1531 && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE))
1532 || (cmd == RTM_DELROUTE
1533 && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)))
1535 nexthop_num++;
1537 rtnh->rtnh_len = sizeof (*rtnh);
1538 rtnh->rtnh_flags = 0;
1539 rtnh->rtnh_hops = 0;
1540 rta->rta_len += rtnh->rtnh_len;
1542 if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
1544 if (IS_ZEBRA_DEBUG_KERNEL)
1546 zlog_debug ("netlink_route_multipath() "
1547 "(recursive, multihop): %s %s/%d type %s",
1548 lookup (nlmsg_str, cmd),
1549 #ifdef HAVE_IPV6
1550 (family == AF_INET) ? inet_ntoa (p->u.prefix4) :
1551 inet6_ntoa (p->u.prefix6),
1552 #else
1553 inet_ntoa (p->u.prefix4),
1554 #endif /* HAVE_IPV6 */
1555 p->prefixlen, nexthop_types_desc[nexthop->rtype]);
1557 if (nexthop->rtype == NEXTHOP_TYPE_IPV4
1558 || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX)
1560 rta_addattr_l (rta, 4096, RTA_GATEWAY,
1561 &nexthop->rgate.ipv4, bytelen);
1562 rtnh->rtnh_len += sizeof (struct rtattr) + 4;
1564 if (nexthop->src.ipv4.s_addr)
1565 src = &nexthop->src;
1567 if (IS_ZEBRA_DEBUG_KERNEL)
1568 zlog_debug("netlink_route_multipath() (recursive, "
1569 "multihop): nexthop via %s if %u",
1570 inet_ntoa (nexthop->rgate.ipv4),
1571 nexthop->rifindex);
1573 #ifdef HAVE_IPV6
1574 if (nexthop->rtype == NEXTHOP_TYPE_IPV6
1575 || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME
1576 || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX)
1578 rta_addattr_l (rta, 4096, RTA_GATEWAY,
1579 &nexthop->rgate.ipv6, bytelen);
1581 if (IS_ZEBRA_DEBUG_KERNEL)
1582 zlog_debug("netlink_route_multipath() (recursive, "
1583 "multihop): nexthop via %s if %u",
1584 inet6_ntoa (nexthop->rgate.ipv6),
1585 nexthop->rifindex);
1587 #endif /* HAVE_IPV6 */
1588 /* ifindex */
1589 if (nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX
1590 || nexthop->rtype == NEXTHOP_TYPE_IFINDEX
1591 || nexthop->rtype == NEXTHOP_TYPE_IFNAME)
1593 rtnh->rtnh_ifindex = nexthop->rifindex;
1594 if (nexthop->src.ipv4.s_addr)
1595 src = &nexthop->src;
1597 if (IS_ZEBRA_DEBUG_KERNEL)
1598 zlog_debug("netlink_route_multipath() (recursive, "
1599 "multihop): nexthop via if %u",
1600 nexthop->rifindex);
1602 else if (nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX
1603 || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME)
1605 rtnh->rtnh_ifindex = nexthop->rifindex;
1607 if (IS_ZEBRA_DEBUG_KERNEL)
1608 zlog_debug("netlink_route_multipath() (recursive, "
1609 "multihop): nexthop via if %u",
1610 nexthop->rifindex);
1612 else
1614 rtnh->rtnh_ifindex = 0;
1617 else
1619 if (IS_ZEBRA_DEBUG_KERNEL)
1621 zlog_debug ("netlink_route_multipath() (multihop): "
1622 "%s %s/%d, type %s", lookup (nlmsg_str, cmd),
1623 #ifdef HAVE_IPV6
1624 (family == AF_INET) ? inet_ntoa (p->u.prefix4) :
1625 inet6_ntoa (p->u.prefix6),
1626 #else
1627 inet_ntoa (p->u.prefix4),
1628 #endif /* HAVE_IPV6 */
1629 p->prefixlen, nexthop_types_desc[nexthop->type]);
1631 if (nexthop->type == NEXTHOP_TYPE_IPV4
1632 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
1634 rta_addattr_l (rta, 4096, RTA_GATEWAY,
1635 &nexthop->gate.ipv4, bytelen);
1636 rtnh->rtnh_len += sizeof (struct rtattr) + 4;
1638 if (nexthop->src.ipv4.s_addr)
1639 src = &nexthop->src;
1641 if (IS_ZEBRA_DEBUG_KERNEL)
1642 zlog_debug("netlink_route_multipath() (multihop): "
1643 "nexthop via %s if %u",
1644 inet_ntoa (nexthop->gate.ipv4),
1645 nexthop->ifindex);
1647 #ifdef HAVE_IPV6
1648 if (nexthop->type == NEXTHOP_TYPE_IPV6
1649 || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME
1650 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)
1652 rta_addattr_l (rta, 4096, RTA_GATEWAY,
1653 &nexthop->gate.ipv6, bytelen);
1655 if (IS_ZEBRA_DEBUG_KERNEL)
1656 zlog_debug("netlink_route_multipath() (multihop): "
1657 "nexthop via %s if %u",
1658 inet6_ntoa (nexthop->gate.ipv6),
1659 nexthop->ifindex);
1661 #endif /* HAVE_IPV6 */
1662 /* ifindex */
1663 if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX
1664 || nexthop->type == NEXTHOP_TYPE_IFINDEX
1665 || nexthop->type == NEXTHOP_TYPE_IFNAME)
1667 rtnh->rtnh_ifindex = nexthop->ifindex;
1668 if (nexthop->src.ipv4.s_addr)
1669 src = &nexthop->src;
1670 if (IS_ZEBRA_DEBUG_KERNEL)
1671 zlog_debug("netlink_route_multipath() (multihop): "
1672 "nexthop via if %u", nexthop->ifindex);
1674 else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME
1675 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)
1677 rtnh->rtnh_ifindex = nexthop->ifindex;
1679 if (IS_ZEBRA_DEBUG_KERNEL)
1680 zlog_debug("netlink_route_multipath() (multihop): "
1681 "nexthop via if %u", nexthop->ifindex);
1683 else
1685 rtnh->rtnh_ifindex = 0;
1688 rtnh = RTNH_NEXT (rtnh);
1690 if (cmd == RTM_NEWROUTE)
1691 SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
1694 if (src)
1695 addattr_l (&req.n, sizeof req, RTA_PREFSRC, &src->ipv4, bytelen);
1697 if (rta->rta_len > RTA_LENGTH (0))
1698 addattr_l (&req.n, 1024, RTA_MULTIPATH, RTA_DATA (rta),
1699 RTA_PAYLOAD (rta));
1702 /* If there is no useful nexthop then return. */
1703 if (nexthop_num == 0)
1705 if (IS_ZEBRA_DEBUG_KERNEL)
1706 zlog_debug ("netlink_route_multipath(): No useful nexthop.");
1707 return 0;
1710 skip:
1712 /* Destination netlink address. */
1713 memset (&snl, 0, sizeof snl);
1714 snl.nl_family = AF_NETLINK;
1716 /* Talk to netlink socket. */
1717 return netlink_talk (&req.n, &netlink_cmd);
1721 kernel_add_ipv4 (struct prefix *p, struct rib *rib)
1723 return netlink_route_multipath (RTM_NEWROUTE, p, rib, AF_INET);
1727 kernel_delete_ipv4 (struct prefix *p, struct rib *rib)
1729 return netlink_route_multipath (RTM_DELROUTE, p, rib, AF_INET);
1732 #ifdef HAVE_IPV6
1734 kernel_add_ipv6 (struct prefix *p, struct rib *rib)
1736 return netlink_route_multipath (RTM_NEWROUTE, p, rib, AF_INET6);
1740 kernel_delete_ipv6 (struct prefix *p, struct rib *rib)
1742 return netlink_route_multipath (RTM_DELROUTE, p, rib, AF_INET6);
1745 /* Delete IPv6 route from the kernel. */
1747 kernel_delete_ipv6_old (struct prefix_ipv6 *dest, struct in6_addr *gate,
1748 unsigned int index, int flags, int table)
1750 return netlink_route (RTM_DELROUTE, AF_INET6, &dest->prefix,
1751 dest->prefixlen, gate, index, flags, table);
1753 #endif /* HAVE_IPV6 */
1755 /* Interface address modification. */
1756 static int
1757 netlink_address (int cmd, int family, struct interface *ifp,
1758 struct connected *ifc)
1760 int bytelen;
1761 struct prefix *p;
1763 struct
1765 struct nlmsghdr n;
1766 struct ifaddrmsg ifa;
1767 char buf[1024];
1768 } req;
1770 p = ifc->address;
1771 memset (&req, 0, sizeof req);
1773 bytelen = (family == AF_INET ? 4 : 16);
1775 req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct ifaddrmsg));
1776 req.n.nlmsg_flags = NLM_F_REQUEST;
1777 req.n.nlmsg_type = cmd;
1778 req.ifa.ifa_family = family;
1780 req.ifa.ifa_index = ifp->ifindex;
1781 req.ifa.ifa_prefixlen = p->prefixlen;
1783 addattr_l (&req.n, sizeof req, IFA_LOCAL, &p->u.prefix, bytelen);
1785 if (family == AF_INET && cmd == RTM_NEWADDR)
1787 if (!CONNECTED_PEER(ifc) && ifc->destination)
1789 p = ifc->destination;
1790 addattr_l (&req.n, sizeof req, IFA_BROADCAST, &p->u.prefix,
1791 bytelen);
1795 if (CHECK_FLAG (ifc->flags, ZEBRA_IFA_SECONDARY))
1796 SET_FLAG (req.ifa.ifa_flags, IFA_F_SECONDARY);
1798 if (ifc->label)
1799 addattr_l (&req.n, sizeof req, IFA_LABEL, ifc->label,
1800 strlen (ifc->label) + 1);
1802 return netlink_talk (&req.n, &netlink_cmd);
1806 kernel_address_add_ipv4 (struct interface *ifp, struct connected *ifc)
1808 return netlink_address (RTM_NEWADDR, AF_INET, ifp, ifc);
1812 kernel_address_delete_ipv4 (struct interface *ifp, struct connected *ifc)
1814 return netlink_address (RTM_DELADDR, AF_INET, ifp, ifc);
1818 extern struct thread_master *master;
1820 /* Kernel route reflection. */
1821 static int
1822 kernel_read (struct thread *thread)
1824 int ret;
1825 int sock;
1827 sock = THREAD_FD (thread);
1828 ret = netlink_parse_info (netlink_information_fetch, &netlink);
1829 thread_add_read (zebrad.master, kernel_read, NULL, netlink.sock);
1831 return 0;
1834 /* Filter out messages from self that occur on listener socket,
1835 caused by our actions on the command socket
1837 static void netlink_install_filter (int sock, __u32 pid)
1839 struct sock_filter filter[] = {
1840 /* 0: ldh [4] */
1841 BPF_STMT(BPF_LD|BPF_ABS|BPF_H, offsetof(struct nlmsghdr, nlmsg_type)),
1842 /* 1: jeq 0x18 jt 3 jf 6 */
1843 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_NEWROUTE), 1, 0),
1844 /* 2: jeq 0x19 jt 3 jf 6 */
1845 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_DELROUTE), 0, 3),
1846 /* 3: ldw [12] */
1847 BPF_STMT(BPF_LD|BPF_ABS|BPF_W, offsetof(struct nlmsghdr, nlmsg_pid)),
1848 /* 4: jeq XX jt 5 jf 6 */
1849 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htonl(pid), 0, 1),
1850 /* 5: ret 0 (skip) */
1851 BPF_STMT(BPF_RET|BPF_K, 0),
1852 /* 6: ret 0xffff (keep) */
1853 BPF_STMT(BPF_RET|BPF_K, 0xffff),
1856 struct sock_fprog prog = {
1857 .len = sizeof(filter) / sizeof(filter[0]),
1858 .filter = filter,
1861 if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)) < 0)
1862 zlog_warn ("Can't install socket filter: %s\n", safe_strerror(errno));
1865 /* Exported interface function. This function simply calls
1866 netlink_socket (). */
1867 void
1868 kernel_init (void)
1870 unsigned long groups;
1872 groups = RTMGRP_LINK | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_IFADDR;
1873 #ifdef HAVE_IPV6
1874 groups |= RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFADDR;
1875 #endif /* HAVE_IPV6 */
1876 netlink_socket (&netlink, groups);
1877 netlink_socket (&netlink_cmd, 0);
1879 /* Register kernel socket. */
1880 if (netlink.sock > 0)
1882 /* Only want non-blocking on the netlink event socket */
1883 if (fcntl (netlink.sock, F_SETFL, O_NONBLOCK) < 0)
1884 zlog (NULL, LOG_ERR, "Can't set %s socket flags: %s", netlink.name,
1885 safe_strerror (errno));
1887 /* Set receive buffer size if it's set from command line */
1888 if (nl_rcvbufsize)
1889 netlink_recvbuf (&netlink, nl_rcvbufsize);
1891 netlink_install_filter (netlink.sock, netlink_cmd.snl.nl_pid);
1892 thread_add_read (zebrad.master, kernel_read, NULL, netlink.sock);