1 #include <linux/types.h>
2 #include <linux/skbuff.h>
3 #include <linux/socket.h>
4 #include <linux/sysctl.h>
6 #include <linux/module.h>
7 #include <linux/if_arp.h>
8 #include <linux/ipv6.h>
9 #include <linux/mpls.h>
10 #include <linux/vmalloc.h>
15 #include <net/ip_fib.h>
16 #include <net/netevent.h>
17 #include <net/netns/generic.h>
18 #if IS_ENABLED(CONFIG_IPV6)
20 #include <net/addrconf.h>
24 #define LABEL_NOT_SPECIFIED (1<<20)
25 #define MAX_NEW_LABELS 2
27 /* This maximum ha length copied from the definition of struct neighbour */
28 #define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
30 enum mpls_payload_type
{
31 MPT_UNSPEC
, /* IPv4 or IPv6 */
35 /* Other types not implemented:
36 * - Pseudo-wire with or without control word (RFC4385)
41 struct mpls_route
{ /* next hop label forwarding entry */
42 struct net_device __rcu
*rt_dev
;
43 struct rcu_head rt_rcu
;
44 u32 rt_label
[MAX_NEW_LABELS
];
45 u8 rt_protocol
; /* routing protocol that set this entry */
54 static int label_limit
= (1 << 20) - 1;
56 static void rtmsg_lfib(int event
, u32 label
, struct mpls_route
*rt
,
57 struct nlmsghdr
*nlh
, struct net
*net
, u32 portid
,
58 unsigned int nlm_flags
);
60 static struct mpls_route
*mpls_route_input_rcu(struct net
*net
, unsigned index
)
62 struct mpls_route
*rt
= NULL
;
64 if (index
< net
->mpls
.platform_labels
) {
65 struct mpls_route __rcu
**platform_label
=
66 rcu_dereference(net
->mpls
.platform_label
);
67 rt
= rcu_dereference(platform_label
[index
]);
72 static inline struct mpls_dev
*mpls_dev_get(const struct net_device
*dev
)
74 return rcu_dereference_rtnl(dev
->mpls_ptr
);
77 bool mpls_output_possible(const struct net_device
*dev
)
79 return dev
&& (dev
->flags
& IFF_UP
) && netif_carrier_ok(dev
);
81 EXPORT_SYMBOL_GPL(mpls_output_possible
);
83 static unsigned int mpls_rt_header_size(const struct mpls_route
*rt
)
85 /* The size of the layer 2.5 labels to be added for this route */
86 return rt
->rt_labels
* sizeof(struct mpls_shim_hdr
);
89 unsigned int mpls_dev_mtu(const struct net_device
*dev
)
91 /* The amount of data the layer 2 frame can hold */
94 EXPORT_SYMBOL_GPL(mpls_dev_mtu
);
96 bool mpls_pkt_too_big(const struct sk_buff
*skb
, unsigned int mtu
)
101 if (skb_is_gso(skb
) && skb_gso_network_seglen(skb
) <= mtu
)
106 EXPORT_SYMBOL_GPL(mpls_pkt_too_big
);
108 static bool mpls_egress(struct mpls_route
*rt
, struct sk_buff
*skb
,
109 struct mpls_entry_decoded dec
)
111 enum mpls_payload_type payload_type
;
112 bool success
= false;
114 /* The IPv4 code below accesses through the IPv4 header
115 * checksum, which is 12 bytes into the packet.
116 * The IPv6 code below accesses through the IPv6 hop limit
117 * which is 8 bytes into the packet.
119 * For all supported cases there should always be at least 12
120 * bytes of packet data present. The IPv4 header is 20 bytes
121 * without options and the IPv6 header is always 40 bytes
124 if (!pskb_may_pull(skb
, 12))
127 payload_type
= rt
->rt_payload_type
;
128 if (payload_type
== MPT_UNSPEC
)
129 payload_type
= ip_hdr(skb
)->version
;
131 switch (payload_type
) {
133 struct iphdr
*hdr4
= ip_hdr(skb
);
134 skb
->protocol
= htons(ETH_P_IP
);
135 csum_replace2(&hdr4
->check
,
136 htons(hdr4
->ttl
<< 8),
137 htons(dec
.ttl
<< 8));
143 struct ipv6hdr
*hdr6
= ipv6_hdr(skb
);
144 skb
->protocol
= htons(ETH_P_IPV6
);
145 hdr6
->hop_limit
= dec
.ttl
;
156 static int mpls_forward(struct sk_buff
*skb
, struct net_device
*dev
,
157 struct packet_type
*pt
, struct net_device
*orig_dev
)
159 struct net
*net
= dev_net(dev
);
160 struct mpls_shim_hdr
*hdr
;
161 struct mpls_route
*rt
;
162 struct mpls_entry_decoded dec
;
163 struct net_device
*out_dev
;
164 struct mpls_dev
*mdev
;
166 unsigned int new_header_size
;
170 /* Careful this entire function runs inside of an rcu critical section */
172 mdev
= mpls_dev_get(dev
);
173 if (!mdev
|| !mdev
->input_enabled
)
176 if (skb
->pkt_type
!= PACKET_HOST
)
179 if ((skb
= skb_share_check(skb
, GFP_ATOMIC
)) == NULL
)
182 if (!pskb_may_pull(skb
, sizeof(*hdr
)))
185 /* Read and decode the label */
187 dec
= mpls_entry_decode(hdr
);
190 skb_pull(skb
, sizeof(*hdr
));
191 skb_reset_network_header(skb
);
195 rt
= mpls_route_input_rcu(net
, dec
.label
);
199 /* Find the output device */
200 out_dev
= rcu_dereference(rt
->rt_dev
);
201 if (!mpls_output_possible(out_dev
))
204 if (skb_warn_if_lro(skb
))
207 skb_forward_csum(skb
);
209 /* Verify ttl is valid */
214 /* Verify the destination can hold the packet */
215 new_header_size
= mpls_rt_header_size(rt
);
216 mtu
= mpls_dev_mtu(out_dev
);
217 if (mpls_pkt_too_big(skb
, mtu
- new_header_size
))
220 hh_len
= LL_RESERVED_SPACE(out_dev
);
221 if (!out_dev
->header_ops
)
224 /* Ensure there is enough space for the headers in the skb */
225 if (skb_cow(skb
, hh_len
+ new_header_size
))
229 skb
->protocol
= htons(ETH_P_MPLS_UC
);
231 if (unlikely(!new_header_size
&& dec
.bos
)) {
232 /* Penultimate hop popping */
233 if (!mpls_egress(rt
, skb
, dec
))
238 skb_push(skb
, new_header_size
);
239 skb_reset_network_header(skb
);
240 /* Push the new labels */
243 for (i
= rt
->rt_labels
- 1; i
>= 0; i
--) {
244 hdr
[i
] = mpls_entry_encode(rt
->rt_label
[i
], dec
.ttl
, 0, bos
);
249 err
= neigh_xmit(rt
->rt_via_table
, out_dev
, rt
->rt_via
, skb
);
251 net_dbg_ratelimited("%s: packet transmission failed: %d\n",
260 static struct packet_type mpls_packet_type __read_mostly
= {
261 .type
= cpu_to_be16(ETH_P_MPLS_UC
),
262 .func
= mpls_forward
,
265 static const struct nla_policy rtm_mpls_policy
[RTA_MAX
+1] = {
266 [RTA_DST
] = { .type
= NLA_U32
},
267 [RTA_OIF
] = { .type
= NLA_U32
},
270 struct mpls_route_config
{
275 u8 rc_via
[MAX_VIA_ALEN
];
277 u32 rc_output_labels
;
278 u32 rc_output_label
[MAX_NEW_LABELS
];
280 enum mpls_payload_type rc_payload_type
;
281 struct nl_info rc_nlinfo
;
284 static struct mpls_route
*mpls_rt_alloc(size_t alen
)
286 struct mpls_route
*rt
;
288 rt
= kzalloc(sizeof(*rt
) + alen
, GFP_KERNEL
);
290 rt
->rt_via_alen
= alen
;
294 static void mpls_rt_free(struct mpls_route
*rt
)
297 kfree_rcu(rt
, rt_rcu
);
300 static void mpls_notify_route(struct net
*net
, unsigned index
,
301 struct mpls_route
*old
, struct mpls_route
*new,
302 const struct nl_info
*info
)
304 struct nlmsghdr
*nlh
= info
? info
->nlh
: NULL
;
305 unsigned portid
= info
? info
->portid
: 0;
306 int event
= new ? RTM_NEWROUTE
: RTM_DELROUTE
;
307 struct mpls_route
*rt
= new ? new : old
;
308 unsigned nlm_flags
= (old
&& new) ? NLM_F_REPLACE
: 0;
309 /* Ignore reserved labels for now */
310 if (rt
&& (index
>= MPLS_LABEL_FIRST_UNRESERVED
))
311 rtmsg_lfib(event
, index
, rt
, nlh
, net
, portid
, nlm_flags
);
314 static void mpls_route_update(struct net
*net
, unsigned index
,
315 struct net_device
*dev
, struct mpls_route
*new,
316 const struct nl_info
*info
)
318 struct mpls_route __rcu
**platform_label
;
319 struct mpls_route
*rt
, *old
= NULL
;
323 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
324 rt
= rtnl_dereference(platform_label
[index
]);
325 if (!dev
|| (rt
&& (rtnl_dereference(rt
->rt_dev
) == dev
))) {
326 rcu_assign_pointer(platform_label
[index
], new);
330 mpls_notify_route(net
, index
, old
, new, info
);
332 /* If we removed a route free it now */
336 static unsigned find_free_label(struct net
*net
)
338 struct mpls_route __rcu
**platform_label
;
339 size_t platform_labels
;
342 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
343 platform_labels
= net
->mpls
.platform_labels
;
344 for (index
= MPLS_LABEL_FIRST_UNRESERVED
; index
< platform_labels
;
346 if (!rtnl_dereference(platform_label
[index
]))
349 return LABEL_NOT_SPECIFIED
;
352 #if IS_ENABLED(CONFIG_INET)
353 static struct net_device
*inet_fib_lookup_dev(struct net
*net
, void *addr
)
355 struct net_device
*dev
;
357 struct in_addr daddr
;
359 memcpy(&daddr
, addr
, sizeof(struct in_addr
));
360 rt
= ip_route_output(net
, daddr
.s_addr
, 0, 0, 0);
372 static struct net_device
*inet_fib_lookup_dev(struct net
*net
, void *addr
)
374 return ERR_PTR(-EAFNOSUPPORT
);
378 #if IS_ENABLED(CONFIG_IPV6)
379 static struct net_device
*inet6_fib_lookup_dev(struct net
*net
, void *addr
)
381 struct net_device
*dev
;
382 struct dst_entry
*dst
;
387 return ERR_PTR(-EAFNOSUPPORT
);
389 memset(&fl6
, 0, sizeof(fl6
));
390 memcpy(&fl6
.daddr
, addr
, sizeof(struct in6_addr
));
391 err
= ipv6_stub
->ipv6_dst_lookup(net
, NULL
, &dst
, &fl6
);
402 static struct net_device
*inet6_fib_lookup_dev(struct net
*net
, void *addr
)
404 return ERR_PTR(-EAFNOSUPPORT
);
408 static struct net_device
*find_outdev(struct net
*net
,
409 struct mpls_route_config
*cfg
)
411 struct net_device
*dev
= NULL
;
413 if (!cfg
->rc_ifindex
) {
414 switch (cfg
->rc_via_table
) {
415 case NEIGH_ARP_TABLE
:
416 dev
= inet_fib_lookup_dev(net
, cfg
->rc_via
);
419 dev
= inet6_fib_lookup_dev(net
, cfg
->rc_via
);
421 case NEIGH_LINK_TABLE
:
425 dev
= dev_get_by_index(net
, cfg
->rc_ifindex
);
429 return ERR_PTR(-ENODEV
);
434 static int mpls_route_add(struct mpls_route_config
*cfg
)
436 struct mpls_route __rcu
**platform_label
;
437 struct net
*net
= cfg
->rc_nlinfo
.nl_net
;
438 struct net_device
*dev
= NULL
;
439 struct mpls_route
*rt
, *old
;
444 index
= cfg
->rc_label
;
446 /* If a label was not specified during insert pick one */
447 if ((index
== LABEL_NOT_SPECIFIED
) &&
448 (cfg
->rc_nlflags
& NLM_F_CREATE
)) {
449 index
= find_free_label(net
);
452 /* Reserved labels may not be set */
453 if (index
< MPLS_LABEL_FIRST_UNRESERVED
)
456 /* The full 20 bit range may not be supported. */
457 if (index
>= net
->mpls
.platform_labels
)
460 /* Ensure only a supported number of labels are present */
461 if (cfg
->rc_output_labels
> MAX_NEW_LABELS
)
464 dev
= find_outdev(net
, cfg
);
471 /* Ensure this is a supported device */
473 if (!mpls_dev_get(dev
))
477 if ((cfg
->rc_via_table
== NEIGH_LINK_TABLE
) &&
478 (dev
->addr_len
!= cfg
->rc_via_alen
))
481 /* Append makes no sense with mpls */
483 if (cfg
->rc_nlflags
& NLM_F_APPEND
)
487 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
488 old
= rtnl_dereference(platform_label
[index
]);
489 if ((cfg
->rc_nlflags
& NLM_F_EXCL
) && old
)
493 if (!(cfg
->rc_nlflags
& NLM_F_REPLACE
) && old
)
497 if (!(cfg
->rc_nlflags
& NLM_F_CREATE
) && !old
)
501 rt
= mpls_rt_alloc(cfg
->rc_via_alen
);
505 rt
->rt_labels
= cfg
->rc_output_labels
;
506 for (i
= 0; i
< rt
->rt_labels
; i
++)
507 rt
->rt_label
[i
] = cfg
->rc_output_label
[i
];
508 rt
->rt_protocol
= cfg
->rc_protocol
;
509 RCU_INIT_POINTER(rt
->rt_dev
, dev
);
510 rt
->rt_payload_type
= cfg
->rc_payload_type
;
511 rt
->rt_via_table
= cfg
->rc_via_table
;
512 memcpy(rt
->rt_via
, cfg
->rc_via
, cfg
->rc_via_alen
);
514 mpls_route_update(net
, index
, NULL
, rt
, &cfg
->rc_nlinfo
);
525 static int mpls_route_del(struct mpls_route_config
*cfg
)
527 struct net
*net
= cfg
->rc_nlinfo
.nl_net
;
531 index
= cfg
->rc_label
;
533 /* Reserved labels may not be removed */
534 if (index
< MPLS_LABEL_FIRST_UNRESERVED
)
537 /* The full 20 bit range may not be supported */
538 if (index
>= net
->mpls
.platform_labels
)
541 mpls_route_update(net
, index
, NULL
, NULL
, &cfg
->rc_nlinfo
);
548 #define MPLS_PERDEV_SYSCTL_OFFSET(field) \
549 (&((struct mpls_dev *)0)->field)
551 static const struct ctl_table mpls_dev_table
[] = {
554 .maxlen
= sizeof(int),
556 .proc_handler
= proc_dointvec
,
557 .data
= MPLS_PERDEV_SYSCTL_OFFSET(input_enabled
),
562 static int mpls_dev_sysctl_register(struct net_device
*dev
,
563 struct mpls_dev
*mdev
)
565 char path
[sizeof("net/mpls/conf/") + IFNAMSIZ
];
566 struct ctl_table
*table
;
569 table
= kmemdup(&mpls_dev_table
, sizeof(mpls_dev_table
), GFP_KERNEL
);
573 /* Table data contains only offsets relative to the base of
574 * the mdev at this point, so make them absolute.
576 for (i
= 0; i
< ARRAY_SIZE(mpls_dev_table
); i
++)
577 table
[i
].data
= (char *)mdev
+ (uintptr_t)table
[i
].data
;
579 snprintf(path
, sizeof(path
), "net/mpls/conf/%s", dev
->name
);
581 mdev
->sysctl
= register_net_sysctl(dev_net(dev
), path
, table
);
593 static void mpls_dev_sysctl_unregister(struct mpls_dev
*mdev
)
595 struct ctl_table
*table
;
597 table
= mdev
->sysctl
->ctl_table_arg
;
598 unregister_net_sysctl_table(mdev
->sysctl
);
602 static struct mpls_dev
*mpls_add_dev(struct net_device
*dev
)
604 struct mpls_dev
*mdev
;
609 mdev
= kzalloc(sizeof(*mdev
), GFP_KERNEL
);
613 err
= mpls_dev_sysctl_register(dev
, mdev
);
617 rcu_assign_pointer(dev
->mpls_ptr
, mdev
);
626 static void mpls_ifdown(struct net_device
*dev
)
628 struct mpls_route __rcu
**platform_label
;
629 struct net
*net
= dev_net(dev
);
630 struct mpls_dev
*mdev
;
633 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
634 for (index
= 0; index
< net
->mpls
.platform_labels
; index
++) {
635 struct mpls_route
*rt
= rtnl_dereference(platform_label
[index
]);
638 if (rtnl_dereference(rt
->rt_dev
) != dev
)
643 mdev
= mpls_dev_get(dev
);
647 mpls_dev_sysctl_unregister(mdev
);
649 RCU_INIT_POINTER(dev
->mpls_ptr
, NULL
);
651 kfree_rcu(mdev
, rcu
);
654 static int mpls_dev_notify(struct notifier_block
*this, unsigned long event
,
657 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
658 struct mpls_dev
*mdev
;
661 case NETDEV_REGISTER
:
662 /* For now just support ethernet devices */
663 if ((dev
->type
== ARPHRD_ETHER
) ||
664 (dev
->type
== ARPHRD_LOOPBACK
)) {
665 mdev
= mpls_add_dev(dev
);
667 return notifier_from_errno(PTR_ERR(mdev
));
671 case NETDEV_UNREGISTER
:
674 case NETDEV_CHANGENAME
:
675 mdev
= mpls_dev_get(dev
);
679 mpls_dev_sysctl_unregister(mdev
);
680 err
= mpls_dev_sysctl_register(dev
, mdev
);
682 return notifier_from_errno(err
);
689 static struct notifier_block mpls_dev_notifier
= {
690 .notifier_call
= mpls_dev_notify
,
693 static int nla_put_via(struct sk_buff
*skb
,
694 u8 table
, const void *addr
, int alen
)
696 static const int table_to_family
[NEIGH_NR_TABLES
+ 1] = {
697 AF_INET
, AF_INET6
, AF_DECnet
, AF_PACKET
,
701 int family
= AF_UNSPEC
;
703 nla
= nla_reserve(skb
, RTA_VIA
, alen
+ 2);
707 if (table
<= NEIGH_NR_TABLES
)
708 family
= table_to_family
[table
];
711 via
->rtvia_family
= family
;
712 memcpy(via
->rtvia_addr
, addr
, alen
);
716 int nla_put_labels(struct sk_buff
*skb
, int attrtype
,
717 u8 labels
, const u32 label
[])
720 struct mpls_shim_hdr
*nla_label
;
723 nla
= nla_reserve(skb
, attrtype
, labels
*4);
727 nla_label
= nla_data(nla
);
729 for (i
= labels
- 1; i
>= 0; i
--) {
730 nla_label
[i
] = mpls_entry_encode(label
[i
], 0, 0, bos
);
736 EXPORT_SYMBOL_GPL(nla_put_labels
);
738 int nla_get_labels(const struct nlattr
*nla
,
739 u32 max_labels
, u32
*labels
, u32 label
[])
741 unsigned len
= nla_len(nla
);
743 struct mpls_shim_hdr
*nla_label
;
747 /* len needs to be an even multiple of 4 (the label size) */
751 /* Limit the number of new labels allowed */
753 if (nla_labels
> max_labels
)
756 nla_label
= nla_data(nla
);
758 for (i
= nla_labels
- 1; i
>= 0; i
--, bos
= false) {
759 struct mpls_entry_decoded dec
;
760 dec
= mpls_entry_decode(nla_label
+ i
);
762 /* Ensure the bottom of stack flag is properly set
763 * and ttl and tc are both clear.
765 if ((dec
.bos
!= bos
) || dec
.ttl
|| dec
.tc
)
769 case MPLS_LABEL_IMPLNULL
:
770 /* RFC3032: This is a label that an LSR may
771 * assign and distribute, but which never
772 * actually appears in the encapsulation.
777 label
[i
] = dec
.label
;
779 *labels
= nla_labels
;
782 EXPORT_SYMBOL_GPL(nla_get_labels
);
784 static int rtm_to_route_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
785 struct mpls_route_config
*cfg
)
788 struct nlattr
*tb
[RTA_MAX
+1];
792 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_mpls_policy
);
797 rtm
= nlmsg_data(nlh
);
798 memset(cfg
, 0, sizeof(*cfg
));
800 if (rtm
->rtm_family
!= AF_MPLS
)
802 if (rtm
->rtm_dst_len
!= 20)
804 if (rtm
->rtm_src_len
!= 0)
806 if (rtm
->rtm_tos
!= 0)
808 if (rtm
->rtm_table
!= RT_TABLE_MAIN
)
810 /* Any value is acceptable for rtm_protocol */
812 /* As mpls uses destination specific addresses
813 * (or source specific address in the case of multicast)
814 * all addresses have universal scope.
816 if (rtm
->rtm_scope
!= RT_SCOPE_UNIVERSE
)
818 if (rtm
->rtm_type
!= RTN_UNICAST
)
820 if (rtm
->rtm_flags
!= 0)
823 cfg
->rc_label
= LABEL_NOT_SPECIFIED
;
824 cfg
->rc_protocol
= rtm
->rtm_protocol
;
825 cfg
->rc_nlflags
= nlh
->nlmsg_flags
;
826 cfg
->rc_nlinfo
.portid
= NETLINK_CB(skb
).portid
;
827 cfg
->rc_nlinfo
.nlh
= nlh
;
828 cfg
->rc_nlinfo
.nl_net
= sock_net(skb
->sk
);
830 for (index
= 0; index
<= RTA_MAX
; index
++) {
831 struct nlattr
*nla
= tb
[index
];
837 cfg
->rc_ifindex
= nla_get_u32(nla
);
840 if (nla_get_labels(nla
, MAX_NEW_LABELS
,
841 &cfg
->rc_output_labels
,
842 cfg
->rc_output_label
))
848 if (nla_get_labels(nla
, 1, &label_count
,
852 /* Reserved labels may not be set */
853 if (cfg
->rc_label
< MPLS_LABEL_FIRST_UNRESERVED
)
860 struct rtvia
*via
= nla_data(nla
);
861 if (nla_len(nla
) < offsetof(struct rtvia
, rtvia_addr
))
863 cfg
->rc_via_alen
= nla_len(nla
) -
864 offsetof(struct rtvia
, rtvia_addr
);
865 if (cfg
->rc_via_alen
> MAX_VIA_ALEN
)
868 /* Validate the address family */
869 switch(via
->rtvia_family
) {
871 cfg
->rc_via_table
= NEIGH_LINK_TABLE
;
874 cfg
->rc_via_table
= NEIGH_ARP_TABLE
;
875 if (cfg
->rc_via_alen
!= 4)
879 cfg
->rc_via_table
= NEIGH_ND_TABLE
;
880 if (cfg
->rc_via_alen
!= 16)
884 /* Unsupported address family */
888 memcpy(cfg
->rc_via
, via
->rtvia_addr
, cfg
->rc_via_alen
);
892 /* Unsupported attribute */
902 static int mpls_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
904 struct mpls_route_config cfg
;
907 err
= rtm_to_route_config(skb
, nlh
, &cfg
);
911 return mpls_route_del(&cfg
);
915 static int mpls_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
917 struct mpls_route_config cfg
;
920 err
= rtm_to_route_config(skb
, nlh
, &cfg
);
924 return mpls_route_add(&cfg
);
927 static int mpls_dump_route(struct sk_buff
*skb
, u32 portid
, u32 seq
, int event
,
928 u32 label
, struct mpls_route
*rt
, int flags
)
930 struct net_device
*dev
;
931 struct nlmsghdr
*nlh
;
934 nlh
= nlmsg_put(skb
, portid
, seq
, event
, sizeof(*rtm
), flags
);
938 rtm
= nlmsg_data(nlh
);
939 rtm
->rtm_family
= AF_MPLS
;
940 rtm
->rtm_dst_len
= 20;
941 rtm
->rtm_src_len
= 0;
943 rtm
->rtm_table
= RT_TABLE_MAIN
;
944 rtm
->rtm_protocol
= rt
->rt_protocol
;
945 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
946 rtm
->rtm_type
= RTN_UNICAST
;
950 nla_put_labels(skb
, RTA_NEWDST
, rt
->rt_labels
, rt
->rt_label
))
951 goto nla_put_failure
;
952 if (nla_put_via(skb
, rt
->rt_via_table
, rt
->rt_via
, rt
->rt_via_alen
))
953 goto nla_put_failure
;
954 dev
= rtnl_dereference(rt
->rt_dev
);
955 if (dev
&& nla_put_u32(skb
, RTA_OIF
, dev
->ifindex
))
956 goto nla_put_failure
;
957 if (nla_put_labels(skb
, RTA_DST
, 1, &label
))
958 goto nla_put_failure
;
964 nlmsg_cancel(skb
, nlh
);
968 static int mpls_dump_routes(struct sk_buff
*skb
, struct netlink_callback
*cb
)
970 struct net
*net
= sock_net(skb
->sk
);
971 struct mpls_route __rcu
**platform_label
;
972 size_t platform_labels
;
978 if (index
< MPLS_LABEL_FIRST_UNRESERVED
)
979 index
= MPLS_LABEL_FIRST_UNRESERVED
;
981 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
982 platform_labels
= net
->mpls
.platform_labels
;
983 for (; index
< platform_labels
; index
++) {
984 struct mpls_route
*rt
;
985 rt
= rtnl_dereference(platform_label
[index
]);
989 if (mpls_dump_route(skb
, NETLINK_CB(cb
->skb
).portid
,
990 cb
->nlh
->nlmsg_seq
, RTM_NEWROUTE
,
991 index
, rt
, NLM_F_MULTI
) < 0)
999 static inline size_t lfib_nlmsg_size(struct mpls_route
*rt
)
1002 NLMSG_ALIGN(sizeof(struct rtmsg
))
1003 + nla_total_size(2 + rt
->rt_via_alen
) /* RTA_VIA */
1004 + nla_total_size(4); /* RTA_DST */
1005 if (rt
->rt_labels
) /* RTA_NEWDST */
1006 payload
+= nla_total_size(rt
->rt_labels
* 4);
1007 if (rt
->rt_dev
) /* RTA_OIF */
1008 payload
+= nla_total_size(4);
1012 static void rtmsg_lfib(int event
, u32 label
, struct mpls_route
*rt
,
1013 struct nlmsghdr
*nlh
, struct net
*net
, u32 portid
,
1014 unsigned int nlm_flags
)
1016 struct sk_buff
*skb
;
1017 u32 seq
= nlh
? nlh
->nlmsg_seq
: 0;
1020 skb
= nlmsg_new(lfib_nlmsg_size(rt
), GFP_KERNEL
);
1024 err
= mpls_dump_route(skb
, portid
, seq
, event
, label
, rt
, nlm_flags
);
1026 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */
1027 WARN_ON(err
== -EMSGSIZE
);
1031 rtnl_notify(skb
, net
, portid
, RTNLGRP_MPLS_ROUTE
, nlh
, GFP_KERNEL
);
1036 rtnl_set_sk_err(net
, RTNLGRP_MPLS_ROUTE
, err
);
1039 static int resize_platform_label_table(struct net
*net
, size_t limit
)
1041 size_t size
= sizeof(struct mpls_route
*) * limit
;
1044 struct mpls_route __rcu
**labels
= NULL
, **old
;
1045 struct mpls_route
*rt0
= NULL
, *rt2
= NULL
;
1049 labels
= kzalloc(size
, GFP_KERNEL
| __GFP_NOWARN
| __GFP_NORETRY
);
1051 labels
= vzalloc(size
);
1057 /* In case the predefined labels need to be populated */
1058 if (limit
> MPLS_LABEL_IPV4NULL
) {
1059 struct net_device
*lo
= net
->loopback_dev
;
1060 rt0
= mpls_rt_alloc(lo
->addr_len
);
1063 RCU_INIT_POINTER(rt0
->rt_dev
, lo
);
1064 rt0
->rt_protocol
= RTPROT_KERNEL
;
1065 rt0
->rt_payload_type
= MPT_IPV4
;
1066 rt0
->rt_via_table
= NEIGH_LINK_TABLE
;
1067 memcpy(rt0
->rt_via
, lo
->dev_addr
, lo
->addr_len
);
1069 if (limit
> MPLS_LABEL_IPV6NULL
) {
1070 struct net_device
*lo
= net
->loopback_dev
;
1071 rt2
= mpls_rt_alloc(lo
->addr_len
);
1074 RCU_INIT_POINTER(rt2
->rt_dev
, lo
);
1075 rt2
->rt_protocol
= RTPROT_KERNEL
;
1076 rt2
->rt_payload_type
= MPT_IPV6
;
1077 rt2
->rt_via_table
= NEIGH_LINK_TABLE
;
1078 memcpy(rt2
->rt_via
, lo
->dev_addr
, lo
->addr_len
);
1082 /* Remember the original table */
1083 old
= rtnl_dereference(net
->mpls
.platform_label
);
1084 old_limit
= net
->mpls
.platform_labels
;
1086 /* Free any labels beyond the new table */
1087 for (index
= limit
; index
< old_limit
; index
++)
1088 mpls_route_update(net
, index
, NULL
, NULL
, NULL
);
1090 /* Copy over the old labels */
1092 if (old_limit
< limit
)
1093 cp_size
= old_limit
* sizeof(struct mpls_route
*);
1095 memcpy(labels
, old
, cp_size
);
1097 /* If needed set the predefined labels */
1098 if ((old_limit
<= MPLS_LABEL_IPV6NULL
) &&
1099 (limit
> MPLS_LABEL_IPV6NULL
)) {
1100 RCU_INIT_POINTER(labels
[MPLS_LABEL_IPV6NULL
], rt2
);
1104 if ((old_limit
<= MPLS_LABEL_IPV4NULL
) &&
1105 (limit
> MPLS_LABEL_IPV4NULL
)) {
1106 RCU_INIT_POINTER(labels
[MPLS_LABEL_IPV4NULL
], rt0
);
1110 /* Update the global pointers */
1111 net
->mpls
.platform_labels
= limit
;
1112 rcu_assign_pointer(net
->mpls
.platform_label
, labels
);
1133 static int mpls_platform_labels(struct ctl_table
*table
, int write
,
1134 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1136 struct net
*net
= table
->data
;
1137 int platform_labels
= net
->mpls
.platform_labels
;
1139 struct ctl_table tmp
= {
1140 .procname
= table
->procname
,
1141 .data
= &platform_labels
,
1142 .maxlen
= sizeof(int),
1143 .mode
= table
->mode
,
1145 .extra2
= &label_limit
,
1148 ret
= proc_dointvec_minmax(&tmp
, write
, buffer
, lenp
, ppos
);
1150 if (write
&& ret
== 0)
1151 ret
= resize_platform_label_table(net
, platform_labels
);
1156 static const struct ctl_table mpls_table
[] = {
1158 .procname
= "platform_labels",
1160 .maxlen
= sizeof(int),
1162 .proc_handler
= mpls_platform_labels
,
1167 static int mpls_net_init(struct net
*net
)
1169 struct ctl_table
*table
;
1171 net
->mpls
.platform_labels
= 0;
1172 net
->mpls
.platform_label
= NULL
;
1174 table
= kmemdup(mpls_table
, sizeof(mpls_table
), GFP_KERNEL
);
1178 table
[0].data
= net
;
1179 net
->mpls
.ctl
= register_net_sysctl(net
, "net/mpls", table
);
1180 if (net
->mpls
.ctl
== NULL
) {
1188 static void mpls_net_exit(struct net
*net
)
1190 struct mpls_route __rcu
**platform_label
;
1191 size_t platform_labels
;
1192 struct ctl_table
*table
;
1195 table
= net
->mpls
.ctl
->ctl_table_arg
;
1196 unregister_net_sysctl_table(net
->mpls
.ctl
);
1199 /* An rcu grace period has passed since there was a device in
1200 * the network namespace (and thus the last in flight packet)
1201 * left this network namespace. This is because
1202 * unregister_netdevice_many and netdev_run_todo has completed
1203 * for each network device that was in this network namespace.
1205 * As such no additional rcu synchronization is necessary when
1206 * freeing the platform_label table.
1209 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
1210 platform_labels
= net
->mpls
.platform_labels
;
1211 for (index
= 0; index
< platform_labels
; index
++) {
1212 struct mpls_route
*rt
= rtnl_dereference(platform_label
[index
]);
1213 RCU_INIT_POINTER(platform_label
[index
], NULL
);
1218 kvfree(platform_label
);
1221 static struct pernet_operations mpls_net_ops
= {
1222 .init
= mpls_net_init
,
1223 .exit
= mpls_net_exit
,
1226 static int __init
mpls_init(void)
1230 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr
) != 4);
1232 err
= register_pernet_subsys(&mpls_net_ops
);
1236 err
= register_netdevice_notifier(&mpls_dev_notifier
);
1238 goto out_unregister_pernet
;
1240 dev_add_pack(&mpls_packet_type
);
1242 rtnl_register(PF_MPLS
, RTM_NEWROUTE
, mpls_rtm_newroute
, NULL
, NULL
);
1243 rtnl_register(PF_MPLS
, RTM_DELROUTE
, mpls_rtm_delroute
, NULL
, NULL
);
1244 rtnl_register(PF_MPLS
, RTM_GETROUTE
, NULL
, mpls_dump_routes
, NULL
);
1249 out_unregister_pernet
:
1250 unregister_pernet_subsys(&mpls_net_ops
);
1253 module_init(mpls_init
);
1255 static void __exit
mpls_exit(void)
1257 rtnl_unregister_all(PF_MPLS
);
1258 dev_remove_pack(&mpls_packet_type
);
1259 unregister_netdevice_notifier(&mpls_dev_notifier
);
1260 unregister_pernet_subsys(&mpls_net_ops
);
1262 module_exit(mpls_exit
);
1264 MODULE_DESCRIPTION("MultiProtocol Label Switching");
1265 MODULE_LICENSE("GPL v2");
1266 MODULE_ALIAS_NETPROTO(PF_MPLS
);