1 #include <linux/types.h>
2 #include <linux/skbuff.h>
3 #include <linux/socket.h>
4 #include <linux/sysctl.h>
6 #include <linux/module.h>
7 #include <linux/if_arp.h>
8 #include <linux/ipv6.h>
9 #include <linux/mpls.h>
10 #include <linux/vmalloc.h>
15 #include <net/ip_fib.h>
16 #include <net/netevent.h>
17 #include <net/netns/generic.h>
20 #define LABEL_NOT_SPECIFIED (1<<20)
21 #define MAX_NEW_LABELS 2
23 /* This maximum ha length copied from the definition of struct neighbour */
24 #define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
26 struct mpls_route
{ /* next hop label forwarding entry */
27 struct net_device __rcu
*rt_dev
;
28 struct rcu_head rt_rcu
;
29 u32 rt_label
[MAX_NEW_LABELS
];
30 u8 rt_protocol
; /* routing protocol that set this entry */
38 static int label_limit
= (1 << 20) - 1;
40 static void rtmsg_lfib(int event
, u32 label
, struct mpls_route
*rt
,
41 struct nlmsghdr
*nlh
, struct net
*net
, u32 portid
,
42 unsigned int nlm_flags
);
44 static struct mpls_route
*mpls_route_input_rcu(struct net
*net
, unsigned index
)
46 struct mpls_route
*rt
= NULL
;
48 if (index
< net
->mpls
.platform_labels
) {
49 struct mpls_route __rcu
**platform_label
=
50 rcu_dereference(net
->mpls
.platform_label
);
51 rt
= rcu_dereference(platform_label
[index
]);
56 static inline struct mpls_dev
*mpls_dev_get(const struct net_device
*dev
)
58 return rcu_dereference_rtnl(dev
->mpls_ptr
);
61 static bool mpls_output_possible(const struct net_device
*dev
)
63 return dev
&& (dev
->flags
& IFF_UP
) && netif_carrier_ok(dev
);
66 static unsigned int mpls_rt_header_size(const struct mpls_route
*rt
)
68 /* The size of the layer 2.5 labels to be added for this route */
69 return rt
->rt_labels
* sizeof(struct mpls_shim_hdr
);
72 static unsigned int mpls_dev_mtu(const struct net_device
*dev
)
74 /* The amount of data the layer 2 frame can hold */
78 static bool mpls_pkt_too_big(const struct sk_buff
*skb
, unsigned int mtu
)
83 if (skb_is_gso(skb
) && skb_gso_network_seglen(skb
) <= mtu
)
89 static bool mpls_egress(struct mpls_route
*rt
, struct sk_buff
*skb
,
90 struct mpls_entry_decoded dec
)
92 /* RFC4385 and RFC5586 encode other packets in mpls such that
93 * they don't conflict with the ip version number, making
94 * decoding by examining the ip version correct in everything
95 * except for the strangest cases.
97 * The strange cases if we choose to support them will require
98 * manual configuration.
103 /* The IPv4 code below accesses through the IPv4 header
104 * checksum, which is 12 bytes into the packet.
105 * The IPv6 code below accesses through the IPv6 hop limit
106 * which is 8 bytes into the packet.
108 * For all supported cases there should always be at least 12
109 * bytes of packet data present. The IPv4 header is 20 bytes
110 * without options and the IPv6 header is always 40 bytes
113 if (!pskb_may_pull(skb
, 12))
116 /* Use ip_hdr to find the ip protocol version */
118 if (hdr4
->version
== 4) {
119 skb
->protocol
= htons(ETH_P_IP
);
120 csum_replace2(&hdr4
->check
,
121 htons(hdr4
->ttl
<< 8),
122 htons(dec
.ttl
<< 8));
125 else if (hdr4
->version
== 6) {
126 struct ipv6hdr
*hdr6
= ipv6_hdr(skb
);
127 skb
->protocol
= htons(ETH_P_IPV6
);
128 hdr6
->hop_limit
= dec
.ttl
;
131 /* version 0 and version 1 are used by pseudo wires */
136 static int mpls_forward(struct sk_buff
*skb
, struct net_device
*dev
,
137 struct packet_type
*pt
, struct net_device
*orig_dev
)
139 struct net
*net
= dev_net(dev
);
140 struct mpls_shim_hdr
*hdr
;
141 struct mpls_route
*rt
;
142 struct mpls_entry_decoded dec
;
143 struct net_device
*out_dev
;
144 struct mpls_dev
*mdev
;
146 unsigned int new_header_size
;
150 /* Careful this entire function runs inside of an rcu critical section */
152 mdev
= mpls_dev_get(dev
);
153 if (!mdev
|| !mdev
->input_enabled
)
156 if (skb
->pkt_type
!= PACKET_HOST
)
159 if ((skb
= skb_share_check(skb
, GFP_ATOMIC
)) == NULL
)
162 if (!pskb_may_pull(skb
, sizeof(*hdr
)))
165 /* Read and decode the label */
167 dec
= mpls_entry_decode(hdr
);
170 skb_pull(skb
, sizeof(*hdr
));
171 skb_reset_network_header(skb
);
175 rt
= mpls_route_input_rcu(net
, dec
.label
);
179 /* Find the output device */
180 out_dev
= rcu_dereference(rt
->rt_dev
);
181 if (!mpls_output_possible(out_dev
))
184 if (skb_warn_if_lro(skb
))
187 skb_forward_csum(skb
);
189 /* Verify ttl is valid */
194 /* Verify the destination can hold the packet */
195 new_header_size
= mpls_rt_header_size(rt
);
196 mtu
= mpls_dev_mtu(out_dev
);
197 if (mpls_pkt_too_big(skb
, mtu
- new_header_size
))
200 hh_len
= LL_RESERVED_SPACE(out_dev
);
201 if (!out_dev
->header_ops
)
204 /* Ensure there is enough space for the headers in the skb */
205 if (skb_cow(skb
, hh_len
+ new_header_size
))
209 skb
->protocol
= htons(ETH_P_MPLS_UC
);
211 if (unlikely(!new_header_size
&& dec
.bos
)) {
212 /* Penultimate hop popping */
213 if (!mpls_egress(rt
, skb
, dec
))
218 skb_push(skb
, new_header_size
);
219 skb_reset_network_header(skb
);
220 /* Push the new labels */
223 for (i
= rt
->rt_labels
- 1; i
>= 0; i
--) {
224 hdr
[i
] = mpls_entry_encode(rt
->rt_label
[i
], dec
.ttl
, 0, bos
);
229 err
= neigh_xmit(rt
->rt_via_table
, out_dev
, rt
->rt_via
, skb
);
231 net_dbg_ratelimited("%s: packet transmission failed: %d\n",
240 static struct packet_type mpls_packet_type __read_mostly
= {
241 .type
= cpu_to_be16(ETH_P_MPLS_UC
),
242 .func
= mpls_forward
,
245 static const struct nla_policy rtm_mpls_policy
[RTA_MAX
+1] = {
246 [RTA_DST
] = { .type
= NLA_U32
},
247 [RTA_OIF
] = { .type
= NLA_U32
},
250 struct mpls_route_config
{
255 u8 rc_via
[MAX_VIA_ALEN
];
257 u32 rc_output_labels
;
258 u32 rc_output_label
[MAX_NEW_LABELS
];
260 struct nl_info rc_nlinfo
;
263 static struct mpls_route
*mpls_rt_alloc(size_t alen
)
265 struct mpls_route
*rt
;
267 rt
= kzalloc(sizeof(*rt
) + alen
, GFP_KERNEL
);
269 rt
->rt_via_alen
= alen
;
273 static void mpls_rt_free(struct mpls_route
*rt
)
276 kfree_rcu(rt
, rt_rcu
);
279 static void mpls_notify_route(struct net
*net
, unsigned index
,
280 struct mpls_route
*old
, struct mpls_route
*new,
281 const struct nl_info
*info
)
283 struct nlmsghdr
*nlh
= info
? info
->nlh
: NULL
;
284 unsigned portid
= info
? info
->portid
: 0;
285 int event
= new ? RTM_NEWROUTE
: RTM_DELROUTE
;
286 struct mpls_route
*rt
= new ? new : old
;
287 unsigned nlm_flags
= (old
&& new) ? NLM_F_REPLACE
: 0;
288 /* Ignore reserved labels for now */
289 if (rt
&& (index
>= 16))
290 rtmsg_lfib(event
, index
, rt
, nlh
, net
, portid
, nlm_flags
);
293 static void mpls_route_update(struct net
*net
, unsigned index
,
294 struct net_device
*dev
, struct mpls_route
*new,
295 const struct nl_info
*info
)
297 struct mpls_route __rcu
**platform_label
;
298 struct mpls_route
*rt
, *old
= NULL
;
302 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
303 rt
= rtnl_dereference(platform_label
[index
]);
304 if (!dev
|| (rt
&& (rtnl_dereference(rt
->rt_dev
) == dev
))) {
305 rcu_assign_pointer(platform_label
[index
], new);
309 mpls_notify_route(net
, index
, old
, new, info
);
311 /* If we removed a route free it now */
315 static unsigned find_free_label(struct net
*net
)
317 struct mpls_route __rcu
**platform_label
;
318 size_t platform_labels
;
321 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
322 platform_labels
= net
->mpls
.platform_labels
;
323 for (index
= 16; index
< platform_labels
; index
++) {
324 if (!rtnl_dereference(platform_label
[index
]))
327 return LABEL_NOT_SPECIFIED
;
330 static int mpls_route_add(struct mpls_route_config
*cfg
)
332 struct mpls_route __rcu
**platform_label
;
333 struct net
*net
= cfg
->rc_nlinfo
.nl_net
;
334 struct net_device
*dev
= NULL
;
335 struct mpls_route
*rt
, *old
;
340 index
= cfg
->rc_label
;
342 /* If a label was not specified during insert pick one */
343 if ((index
== LABEL_NOT_SPECIFIED
) &&
344 (cfg
->rc_nlflags
& NLM_F_CREATE
)) {
345 index
= find_free_label(net
);
348 /* The first 16 labels are reserved, and may not be set */
352 /* The full 20 bit range may not be supported. */
353 if (index
>= net
->mpls
.platform_labels
)
356 /* Ensure only a supported number of labels are present */
357 if (cfg
->rc_output_labels
> MAX_NEW_LABELS
)
361 dev
= dev_get_by_index(net
, cfg
->rc_ifindex
);
365 /* Ensure this is a supported device */
367 if (!mpls_dev_get(dev
))
371 if ((cfg
->rc_via_table
== NEIGH_LINK_TABLE
) &&
372 (dev
->addr_len
!= cfg
->rc_via_alen
))
375 /* Append makes no sense with mpls */
377 if (cfg
->rc_nlflags
& NLM_F_APPEND
)
381 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
382 old
= rtnl_dereference(platform_label
[index
]);
383 if ((cfg
->rc_nlflags
& NLM_F_EXCL
) && old
)
387 if (!(cfg
->rc_nlflags
& NLM_F_REPLACE
) && old
)
391 if (!(cfg
->rc_nlflags
& NLM_F_CREATE
) && !old
)
395 rt
= mpls_rt_alloc(cfg
->rc_via_alen
);
399 rt
->rt_labels
= cfg
->rc_output_labels
;
400 for (i
= 0; i
< rt
->rt_labels
; i
++)
401 rt
->rt_label
[i
] = cfg
->rc_output_label
[i
];
402 rt
->rt_protocol
= cfg
->rc_protocol
;
403 RCU_INIT_POINTER(rt
->rt_dev
, dev
);
404 rt
->rt_via_table
= cfg
->rc_via_table
;
405 memcpy(rt
->rt_via
, cfg
->rc_via
, cfg
->rc_via_alen
);
407 mpls_route_update(net
, index
, NULL
, rt
, &cfg
->rc_nlinfo
);
418 static int mpls_route_del(struct mpls_route_config
*cfg
)
420 struct net
*net
= cfg
->rc_nlinfo
.nl_net
;
424 index
= cfg
->rc_label
;
426 /* The first 16 labels are reserved, and may not be removed */
430 /* The full 20 bit range may not be supported */
431 if (index
>= net
->mpls
.platform_labels
)
434 mpls_route_update(net
, index
, NULL
, NULL
, &cfg
->rc_nlinfo
);
441 #define MPLS_PERDEV_SYSCTL_OFFSET(field) \
442 (&((struct mpls_dev *)0)->field)
444 static const struct ctl_table mpls_dev_table
[] = {
447 .maxlen
= sizeof(int),
449 .proc_handler
= proc_dointvec
,
450 .data
= MPLS_PERDEV_SYSCTL_OFFSET(input_enabled
),
455 static int mpls_dev_sysctl_register(struct net_device
*dev
,
456 struct mpls_dev
*mdev
)
458 char path
[sizeof("net/mpls/conf/") + IFNAMSIZ
];
459 struct ctl_table
*table
;
462 table
= kmemdup(&mpls_dev_table
, sizeof(mpls_dev_table
), GFP_KERNEL
);
466 /* Table data contains only offsets relative to the base of
467 * the mdev at this point, so make them absolute.
469 for (i
= 0; i
< ARRAY_SIZE(mpls_dev_table
); i
++)
470 table
[i
].data
= (char *)mdev
+ (uintptr_t)table
[i
].data
;
472 snprintf(path
, sizeof(path
), "net/mpls/conf/%s", dev
->name
);
474 mdev
->sysctl
= register_net_sysctl(dev_net(dev
), path
, table
);
486 static void mpls_dev_sysctl_unregister(struct mpls_dev
*mdev
)
488 struct ctl_table
*table
;
490 table
= mdev
->sysctl
->ctl_table_arg
;
491 unregister_net_sysctl_table(mdev
->sysctl
);
495 static struct mpls_dev
*mpls_add_dev(struct net_device
*dev
)
497 struct mpls_dev
*mdev
;
502 mdev
= kzalloc(sizeof(*mdev
), GFP_KERNEL
);
506 err
= mpls_dev_sysctl_register(dev
, mdev
);
510 rcu_assign_pointer(dev
->mpls_ptr
, mdev
);
519 static void mpls_ifdown(struct net_device
*dev
)
521 struct mpls_route __rcu
**platform_label
;
522 struct net
*net
= dev_net(dev
);
523 struct mpls_dev
*mdev
;
526 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
527 for (index
= 0; index
< net
->mpls
.platform_labels
; index
++) {
528 struct mpls_route
*rt
= rtnl_dereference(platform_label
[index
]);
531 if (rtnl_dereference(rt
->rt_dev
) != dev
)
536 mdev
= mpls_dev_get(dev
);
540 mpls_dev_sysctl_unregister(mdev
);
542 RCU_INIT_POINTER(dev
->mpls_ptr
, NULL
);
544 kfree_rcu(mdev
, rcu
);
547 static int mpls_dev_notify(struct notifier_block
*this, unsigned long event
,
550 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
551 struct mpls_dev
*mdev
;
554 case NETDEV_REGISTER
:
555 /* For now just support ethernet devices */
556 if ((dev
->type
== ARPHRD_ETHER
) ||
557 (dev
->type
== ARPHRD_LOOPBACK
)) {
558 mdev
= mpls_add_dev(dev
);
560 return notifier_from_errno(PTR_ERR(mdev
));
564 case NETDEV_UNREGISTER
:
567 case NETDEV_CHANGENAME
:
568 mdev
= mpls_dev_get(dev
);
572 mpls_dev_sysctl_unregister(mdev
);
573 err
= mpls_dev_sysctl_register(dev
, mdev
);
575 return notifier_from_errno(err
);
582 static struct notifier_block mpls_dev_notifier
= {
583 .notifier_call
= mpls_dev_notify
,
586 static int nla_put_via(struct sk_buff
*skb
,
587 u8 table
, const void *addr
, int alen
)
589 static const int table_to_family
[NEIGH_NR_TABLES
+ 1] = {
590 AF_INET
, AF_INET6
, AF_DECnet
, AF_PACKET
,
594 int family
= AF_UNSPEC
;
596 nla
= nla_reserve(skb
, RTA_VIA
, alen
+ 2);
600 if (table
<= NEIGH_NR_TABLES
)
601 family
= table_to_family
[table
];
604 via
->rtvia_family
= family
;
605 memcpy(via
->rtvia_addr
, addr
, alen
);
609 int nla_put_labels(struct sk_buff
*skb
, int attrtype
,
610 u8 labels
, const u32 label
[])
613 struct mpls_shim_hdr
*nla_label
;
616 nla
= nla_reserve(skb
, attrtype
, labels
*4);
620 nla_label
= nla_data(nla
);
622 for (i
= labels
- 1; i
>= 0; i
--) {
623 nla_label
[i
] = mpls_entry_encode(label
[i
], 0, 0, bos
);
630 int nla_get_labels(const struct nlattr
*nla
,
631 u32 max_labels
, u32
*labels
, u32 label
[])
633 unsigned len
= nla_len(nla
);
635 struct mpls_shim_hdr
*nla_label
;
639 /* len needs to be an even multiple of 4 (the label size) */
643 /* Limit the number of new labels allowed */
645 if (nla_labels
> max_labels
)
648 nla_label
= nla_data(nla
);
650 for (i
= nla_labels
- 1; i
>= 0; i
--, bos
= false) {
651 struct mpls_entry_decoded dec
;
652 dec
= mpls_entry_decode(nla_label
+ i
);
654 /* Ensure the bottom of stack flag is properly set
655 * and ttl and tc are both clear.
657 if ((dec
.bos
!= bos
) || dec
.ttl
|| dec
.tc
)
661 case MPLS_LABEL_IMPLNULL
:
662 /* RFC3032: This is a label that an LSR may
663 * assign and distribute, but which never
664 * actually appears in the encapsulation.
669 label
[i
] = dec
.label
;
671 *labels
= nla_labels
;
675 static int rtm_to_route_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
676 struct mpls_route_config
*cfg
)
679 struct nlattr
*tb
[RTA_MAX
+1];
683 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_mpls_policy
);
688 rtm
= nlmsg_data(nlh
);
689 memset(cfg
, 0, sizeof(*cfg
));
691 if (rtm
->rtm_family
!= AF_MPLS
)
693 if (rtm
->rtm_dst_len
!= 20)
695 if (rtm
->rtm_src_len
!= 0)
697 if (rtm
->rtm_tos
!= 0)
699 if (rtm
->rtm_table
!= RT_TABLE_MAIN
)
701 /* Any value is acceptable for rtm_protocol */
703 /* As mpls uses destination specific addresses
704 * (or source specific address in the case of multicast)
705 * all addresses have universal scope.
707 if (rtm
->rtm_scope
!= RT_SCOPE_UNIVERSE
)
709 if (rtm
->rtm_type
!= RTN_UNICAST
)
711 if (rtm
->rtm_flags
!= 0)
714 cfg
->rc_label
= LABEL_NOT_SPECIFIED
;
715 cfg
->rc_protocol
= rtm
->rtm_protocol
;
716 cfg
->rc_nlflags
= nlh
->nlmsg_flags
;
717 cfg
->rc_nlinfo
.portid
= NETLINK_CB(skb
).portid
;
718 cfg
->rc_nlinfo
.nlh
= nlh
;
719 cfg
->rc_nlinfo
.nl_net
= sock_net(skb
->sk
);
721 for (index
= 0; index
<= RTA_MAX
; index
++) {
722 struct nlattr
*nla
= tb
[index
];
728 cfg
->rc_ifindex
= nla_get_u32(nla
);
731 if (nla_get_labels(nla
, MAX_NEW_LABELS
,
732 &cfg
->rc_output_labels
,
733 cfg
->rc_output_label
))
739 if (nla_get_labels(nla
, 1, &label_count
,
743 /* The first 16 labels are reserved, and may not be set */
744 if (cfg
->rc_label
< 16)
751 struct rtvia
*via
= nla_data(nla
);
752 if (nla_len(nla
) < offsetof(struct rtvia
, rtvia_addr
))
754 cfg
->rc_via_alen
= nla_len(nla
) -
755 offsetof(struct rtvia
, rtvia_addr
);
756 if (cfg
->rc_via_alen
> MAX_VIA_ALEN
)
759 /* Validate the address family */
760 switch(via
->rtvia_family
) {
762 cfg
->rc_via_table
= NEIGH_LINK_TABLE
;
765 cfg
->rc_via_table
= NEIGH_ARP_TABLE
;
766 if (cfg
->rc_via_alen
!= 4)
770 cfg
->rc_via_table
= NEIGH_ND_TABLE
;
771 if (cfg
->rc_via_alen
!= 16)
775 /* Unsupported address family */
779 memcpy(cfg
->rc_via
, via
->rtvia_addr
, cfg
->rc_via_alen
);
783 /* Unsupported attribute */
793 static int mpls_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
795 struct mpls_route_config cfg
;
798 err
= rtm_to_route_config(skb
, nlh
, &cfg
);
802 return mpls_route_del(&cfg
);
806 static int mpls_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
808 struct mpls_route_config cfg
;
811 err
= rtm_to_route_config(skb
, nlh
, &cfg
);
815 return mpls_route_add(&cfg
);
818 static int mpls_dump_route(struct sk_buff
*skb
, u32 portid
, u32 seq
, int event
,
819 u32 label
, struct mpls_route
*rt
, int flags
)
821 struct net_device
*dev
;
822 struct nlmsghdr
*nlh
;
825 nlh
= nlmsg_put(skb
, portid
, seq
, event
, sizeof(*rtm
), flags
);
829 rtm
= nlmsg_data(nlh
);
830 rtm
->rtm_family
= AF_MPLS
;
831 rtm
->rtm_dst_len
= 20;
832 rtm
->rtm_src_len
= 0;
834 rtm
->rtm_table
= RT_TABLE_MAIN
;
835 rtm
->rtm_protocol
= rt
->rt_protocol
;
836 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
837 rtm
->rtm_type
= RTN_UNICAST
;
841 nla_put_labels(skb
, RTA_NEWDST
, rt
->rt_labels
, rt
->rt_label
))
842 goto nla_put_failure
;
843 if (nla_put_via(skb
, rt
->rt_via_table
, rt
->rt_via
, rt
->rt_via_alen
))
844 goto nla_put_failure
;
845 dev
= rtnl_dereference(rt
->rt_dev
);
846 if (dev
&& nla_put_u32(skb
, RTA_OIF
, dev
->ifindex
))
847 goto nla_put_failure
;
848 if (nla_put_labels(skb
, RTA_DST
, 1, &label
))
849 goto nla_put_failure
;
855 nlmsg_cancel(skb
, nlh
);
859 static int mpls_dump_routes(struct sk_buff
*skb
, struct netlink_callback
*cb
)
861 struct net
*net
= sock_net(skb
->sk
);
862 struct mpls_route __rcu
**platform_label
;
863 size_t platform_labels
;
872 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
873 platform_labels
= net
->mpls
.platform_labels
;
874 for (; index
< platform_labels
; index
++) {
875 struct mpls_route
*rt
;
876 rt
= rtnl_dereference(platform_label
[index
]);
880 if (mpls_dump_route(skb
, NETLINK_CB(cb
->skb
).portid
,
881 cb
->nlh
->nlmsg_seq
, RTM_NEWROUTE
,
882 index
, rt
, NLM_F_MULTI
) < 0)
890 static inline size_t lfib_nlmsg_size(struct mpls_route
*rt
)
893 NLMSG_ALIGN(sizeof(struct rtmsg
))
894 + nla_total_size(2 + rt
->rt_via_alen
) /* RTA_VIA */
895 + nla_total_size(4); /* RTA_DST */
896 if (rt
->rt_labels
) /* RTA_NEWDST */
897 payload
+= nla_total_size(rt
->rt_labels
* 4);
898 if (rt
->rt_dev
) /* RTA_OIF */
899 payload
+= nla_total_size(4);
903 static void rtmsg_lfib(int event
, u32 label
, struct mpls_route
*rt
,
904 struct nlmsghdr
*nlh
, struct net
*net
, u32 portid
,
905 unsigned int nlm_flags
)
908 u32 seq
= nlh
? nlh
->nlmsg_seq
: 0;
911 skb
= nlmsg_new(lfib_nlmsg_size(rt
), GFP_KERNEL
);
915 err
= mpls_dump_route(skb
, portid
, seq
, event
, label
, rt
, nlm_flags
);
917 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */
918 WARN_ON(err
== -EMSGSIZE
);
922 rtnl_notify(skb
, net
, portid
, RTNLGRP_MPLS_ROUTE
, nlh
, GFP_KERNEL
);
927 rtnl_set_sk_err(net
, RTNLGRP_MPLS_ROUTE
, err
);
930 static int resize_platform_label_table(struct net
*net
, size_t limit
)
932 size_t size
= sizeof(struct mpls_route
*) * limit
;
935 struct mpls_route __rcu
**labels
= NULL
, **old
;
936 struct mpls_route
*rt0
= NULL
, *rt2
= NULL
;
940 labels
= kzalloc(size
, GFP_KERNEL
| __GFP_NOWARN
| __GFP_NORETRY
);
942 labels
= vzalloc(size
);
948 /* In case the predefined labels need to be populated */
949 if (limit
> MPLS_LABEL_IPV4NULL
) {
950 struct net_device
*lo
= net
->loopback_dev
;
951 rt0
= mpls_rt_alloc(lo
->addr_len
);
954 RCU_INIT_POINTER(rt0
->rt_dev
, lo
);
955 rt0
->rt_protocol
= RTPROT_KERNEL
;
956 rt0
->rt_via_table
= NEIGH_LINK_TABLE
;
957 memcpy(rt0
->rt_via
, lo
->dev_addr
, lo
->addr_len
);
959 if (limit
> MPLS_LABEL_IPV6NULL
) {
960 struct net_device
*lo
= net
->loopback_dev
;
961 rt2
= mpls_rt_alloc(lo
->addr_len
);
964 RCU_INIT_POINTER(rt2
->rt_dev
, lo
);
965 rt2
->rt_protocol
= RTPROT_KERNEL
;
966 rt2
->rt_via_table
= NEIGH_LINK_TABLE
;
967 memcpy(rt2
->rt_via
, lo
->dev_addr
, lo
->addr_len
);
971 /* Remember the original table */
972 old
= rtnl_dereference(net
->mpls
.platform_label
);
973 old_limit
= net
->mpls
.platform_labels
;
975 /* Free any labels beyond the new table */
976 for (index
= limit
; index
< old_limit
; index
++)
977 mpls_route_update(net
, index
, NULL
, NULL
, NULL
);
979 /* Copy over the old labels */
981 if (old_limit
< limit
)
982 cp_size
= old_limit
* sizeof(struct mpls_route
*);
984 memcpy(labels
, old
, cp_size
);
986 /* If needed set the predefined labels */
987 if ((old_limit
<= MPLS_LABEL_IPV6NULL
) &&
988 (limit
> MPLS_LABEL_IPV6NULL
)) {
989 RCU_INIT_POINTER(labels
[MPLS_LABEL_IPV6NULL
], rt2
);
993 if ((old_limit
<= MPLS_LABEL_IPV4NULL
) &&
994 (limit
> MPLS_LABEL_IPV4NULL
)) {
995 RCU_INIT_POINTER(labels
[MPLS_LABEL_IPV4NULL
], rt0
);
999 /* Update the global pointers */
1000 net
->mpls
.platform_labels
= limit
;
1001 rcu_assign_pointer(net
->mpls
.platform_label
, labels
);
1022 static int mpls_platform_labels(struct ctl_table
*table
, int write
,
1023 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1025 struct net
*net
= table
->data
;
1026 int platform_labels
= net
->mpls
.platform_labels
;
1028 struct ctl_table tmp
= {
1029 .procname
= table
->procname
,
1030 .data
= &platform_labels
,
1031 .maxlen
= sizeof(int),
1032 .mode
= table
->mode
,
1034 .extra2
= &label_limit
,
1037 ret
= proc_dointvec_minmax(&tmp
, write
, buffer
, lenp
, ppos
);
1039 if (write
&& ret
== 0)
1040 ret
= resize_platform_label_table(net
, platform_labels
);
1045 static const struct ctl_table mpls_table
[] = {
1047 .procname
= "platform_labels",
1049 .maxlen
= sizeof(int),
1051 .proc_handler
= mpls_platform_labels
,
1056 static int mpls_net_init(struct net
*net
)
1058 struct ctl_table
*table
;
1060 net
->mpls
.platform_labels
= 0;
1061 net
->mpls
.platform_label
= NULL
;
1063 table
= kmemdup(mpls_table
, sizeof(mpls_table
), GFP_KERNEL
);
1067 table
[0].data
= net
;
1068 net
->mpls
.ctl
= register_net_sysctl(net
, "net/mpls", table
);
1069 if (net
->mpls
.ctl
== NULL
)
1075 static void mpls_net_exit(struct net
*net
)
1077 struct mpls_route __rcu
**platform_label
;
1078 size_t platform_labels
;
1079 struct ctl_table
*table
;
1082 table
= net
->mpls
.ctl
->ctl_table_arg
;
1083 unregister_net_sysctl_table(net
->mpls
.ctl
);
1086 /* An rcu grace period has passed since there was a device in
1087 * the network namespace (and thus the last in flight packet)
1088 * left this network namespace. This is because
1089 * unregister_netdevice_many and netdev_run_todo has completed
1090 * for each network device that was in this network namespace.
1092 * As such no additional rcu synchronization is necessary when
1093 * freeing the platform_label table.
1096 platform_label
= rtnl_dereference(net
->mpls
.platform_label
);
1097 platform_labels
= net
->mpls
.platform_labels
;
1098 for (index
= 0; index
< platform_labels
; index
++) {
1099 struct mpls_route
*rt
= rtnl_dereference(platform_label
[index
]);
1100 RCU_INIT_POINTER(platform_label
[index
], NULL
);
1105 kvfree(platform_label
);
1108 static struct pernet_operations mpls_net_ops
= {
1109 .init
= mpls_net_init
,
1110 .exit
= mpls_net_exit
,
1113 static int __init
mpls_init(void)
1117 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr
) != 4);
1119 err
= register_pernet_subsys(&mpls_net_ops
);
1123 err
= register_netdevice_notifier(&mpls_dev_notifier
);
1125 goto out_unregister_pernet
;
1127 dev_add_pack(&mpls_packet_type
);
1129 rtnl_register(PF_MPLS
, RTM_NEWROUTE
, mpls_rtm_newroute
, NULL
, NULL
);
1130 rtnl_register(PF_MPLS
, RTM_DELROUTE
, mpls_rtm_delroute
, NULL
, NULL
);
1131 rtnl_register(PF_MPLS
, RTM_GETROUTE
, NULL
, mpls_dump_routes
, NULL
);
1136 out_unregister_pernet
:
1137 unregister_pernet_subsys(&mpls_net_ops
);
1140 module_init(mpls_init
);
1142 static void __exit
mpls_exit(void)
1144 rtnl_unregister_all(PF_MPLS
);
1145 dev_remove_pack(&mpls_packet_type
);
1146 unregister_netdevice_notifier(&mpls_dev_notifier
);
1147 unregister_pernet_subsys(&mpls_net_ops
);
1149 module_exit(mpls_exit
);
1151 MODULE_DESCRIPTION("MultiProtocol Label Switching");
1152 MODULE_LICENSE("GPL v2");
1153 MODULE_ALIAS_NETPROTO(PF_MPLS
);