1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * mpls tunnels An implementation mpls tunnels using the light weight tunnel
6 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
8 #include <linux/types.h>
9 #include <linux/skbuff.h>
10 #include <linux/net.h>
11 #include <linux/module.h>
12 #include <linux/mpls.h>
13 #include <linux/vmalloc.h>
16 #include <net/lwtunnel.h>
17 #include <net/netevent.h>
18 #include <net/netns/generic.h>
19 #include <net/ip6_fib.h>
20 #include <net/route.h>
21 #include <net/mpls_iptunnel.h>
22 #include <linux/mpls_iptunnel.h>
25 static const struct nla_policy mpls_iptunnel_policy
[MPLS_IPTUNNEL_MAX
+ 1] = {
26 [MPLS_IPTUNNEL_DST
] = { .len
= sizeof(u32
) },
27 [MPLS_IPTUNNEL_TTL
] = { .type
= NLA_U8
},
30 static unsigned int mpls_encap_size(struct mpls_iptunnel_encap
*en
)
32 /* The size of the layer 2.5 labels to be added for this route */
33 return en
->labels
* sizeof(struct mpls_shim_hdr
);
36 static int mpls_xmit(struct sk_buff
*skb
)
38 struct mpls_iptunnel_encap
*tun_encap_info
;
39 struct mpls_shim_hdr
*hdr
;
40 struct net_device
*out_dev
;
42 unsigned int new_header_size
;
44 struct dst_entry
*dst
= skb_dst(skb
);
45 struct rtable
*rt
= NULL
;
46 struct rt6_info
*rt6
= NULL
;
47 struct mpls_dev
*out_mdev
;
54 /* Find the output device */
56 net
= dev_net(out_dev
);
58 if (!mpls_output_possible(out_dev
) ||
59 !dst
->lwtstate
|| skb_warn_if_lro(skb
))
62 skb_forward_csum(skb
);
64 tun_encap_info
= mpls_lwtunnel_encap(dst
->lwtstate
);
66 /* Obtain the ttl using the following set of rules.
68 * LWT ttl propagation setting:
69 * - disabled => use default TTL value from LWT
70 * - enabled => use TTL value from IPv4/IPv6 header
72 * Global ttl propagation setting:
73 * - disabled => use default TTL value from global setting
74 * - enabled => use TTL value from IPv4/IPv6 header
76 if (dst
->ops
->family
== AF_INET
) {
77 if (tun_encap_info
->ttl_propagate
== MPLS_TTL_PROP_DISABLED
)
78 ttl
= tun_encap_info
->default_ttl
;
79 else if (tun_encap_info
->ttl_propagate
== MPLS_TTL_PROP_DEFAULT
&&
80 !net
->mpls
.ip_ttl_propagate
)
81 ttl
= net
->mpls
.default_ttl
;
83 ttl
= ip_hdr(skb
)->ttl
;
85 } else if (dst
->ops
->family
== AF_INET6
) {
86 if (tun_encap_info
->ttl_propagate
== MPLS_TTL_PROP_DISABLED
)
87 ttl
= tun_encap_info
->default_ttl
;
88 else if (tun_encap_info
->ttl_propagate
== MPLS_TTL_PROP_DEFAULT
&&
89 !net
->mpls
.ip_ttl_propagate
)
90 ttl
= net
->mpls
.default_ttl
;
92 ttl
= ipv6_hdr(skb
)->hop_limit
;
93 rt6
= dst_rt6_info(dst
);
98 /* Verify the destination can hold the packet */
99 new_header_size
= mpls_encap_size(tun_encap_info
);
100 mtu
= mpls_dev_mtu(out_dev
);
101 if (mpls_pkt_too_big(skb
, mtu
- new_header_size
))
104 hh_len
= LL_RESERVED_SPACE(out_dev
);
105 if (!out_dev
->header_ops
)
108 /* Ensure there is enough space for the headers in the skb */
109 if (skb_cow_head(skb
, hh_len
+ new_header_size
))
112 skb_set_inner_protocol(skb
, skb
->protocol
);
113 skb_reset_inner_network_header(skb
);
115 skb_push(skb
, new_header_size
);
117 skb_reset_network_header(skb
);
120 skb
->protocol
= htons(ETH_P_MPLS_UC
);
122 /* Push the new labels */
125 for (i
= tun_encap_info
->labels
- 1; i
>= 0; i
--) {
126 hdr
[i
] = mpls_entry_encode(tun_encap_info
->label
[i
],
131 mpls_stats_inc_outucastpkts(out_dev
, skb
);
134 if (rt
->rt_gw_family
== AF_INET6
)
135 err
= neigh_xmit(NEIGH_ND_TABLE
, out_dev
, &rt
->rt_gw6
,
138 err
= neigh_xmit(NEIGH_ARP_TABLE
, out_dev
, &rt
->rt_gw4
,
141 if (ipv6_addr_v4mapped(&rt6
->rt6i_gateway
)) {
143 err
= neigh_xmit(NEIGH_ARP_TABLE
, out_dev
, &rt6
->rt6i_gateway
.s6_addr32
[3],
146 err
= neigh_xmit(NEIGH_ND_TABLE
, out_dev
, &rt6
->rt6i_gateway
,
150 net_dbg_ratelimited("%s: packet transmission failed: %d\n",
153 return LWTUNNEL_XMIT_DONE
;
156 out_mdev
= out_dev
? mpls_dev_get(out_dev
) : NULL
;
158 MPLS_INC_STATS(out_mdev
, tx_errors
);
163 static int mpls_build_state(struct net
*net
, struct nlattr
*nla
,
164 unsigned int family
, const void *cfg
,
165 struct lwtunnel_state
**ts
,
166 struct netlink_ext_ack
*extack
)
168 struct mpls_iptunnel_encap
*tun_encap_info
;
169 struct nlattr
*tb
[MPLS_IPTUNNEL_MAX
+ 1];
170 struct lwtunnel_state
*newts
;
174 ret
= nla_parse_nested_deprecated(tb
, MPLS_IPTUNNEL_MAX
, nla
,
175 mpls_iptunnel_policy
, extack
);
179 if (!tb
[MPLS_IPTUNNEL_DST
]) {
180 NL_SET_ERR_MSG(extack
, "MPLS_IPTUNNEL_DST attribute is missing");
184 /* determine number of labels */
185 if (nla_get_labels(tb
[MPLS_IPTUNNEL_DST
], MAX_NEW_LABELS
,
186 &n_labels
, NULL
, extack
))
189 newts
= lwtunnel_state_alloc(struct_size(tun_encap_info
, label
,
194 tun_encap_info
= mpls_lwtunnel_encap(newts
);
195 ret
= nla_get_labels(tb
[MPLS_IPTUNNEL_DST
], n_labels
,
196 &tun_encap_info
->labels
, tun_encap_info
->label
,
201 tun_encap_info
->ttl_propagate
= MPLS_TTL_PROP_DEFAULT
;
203 if (tb
[MPLS_IPTUNNEL_TTL
]) {
204 tun_encap_info
->default_ttl
= nla_get_u8(tb
[MPLS_IPTUNNEL_TTL
]);
205 /* TTL 0 implies propagate from IP header */
206 tun_encap_info
->ttl_propagate
= tun_encap_info
->default_ttl
?
207 MPLS_TTL_PROP_DISABLED
:
208 MPLS_TTL_PROP_ENABLED
;
211 newts
->type
= LWTUNNEL_ENCAP_MPLS
;
212 newts
->flags
|= LWTUNNEL_STATE_XMIT_REDIRECT
;
213 newts
->headroom
= mpls_encap_size(tun_encap_info
);
226 static int mpls_fill_encap_info(struct sk_buff
*skb
,
227 struct lwtunnel_state
*lwtstate
)
229 struct mpls_iptunnel_encap
*tun_encap_info
;
231 tun_encap_info
= mpls_lwtunnel_encap(lwtstate
);
233 if (nla_put_labels(skb
, MPLS_IPTUNNEL_DST
, tun_encap_info
->labels
,
234 tun_encap_info
->label
))
235 goto nla_put_failure
;
237 if (tun_encap_info
->ttl_propagate
!= MPLS_TTL_PROP_DEFAULT
&&
238 nla_put_u8(skb
, MPLS_IPTUNNEL_TTL
, tun_encap_info
->default_ttl
))
239 goto nla_put_failure
;
247 static int mpls_encap_nlsize(struct lwtunnel_state
*lwtstate
)
249 struct mpls_iptunnel_encap
*tun_encap_info
;
252 tun_encap_info
= mpls_lwtunnel_encap(lwtstate
);
254 nlsize
= nla_total_size(tun_encap_info
->labels
* 4);
256 if (tun_encap_info
->ttl_propagate
!= MPLS_TTL_PROP_DEFAULT
)
257 nlsize
+= nla_total_size(1);
262 static int mpls_encap_cmp(struct lwtunnel_state
*a
, struct lwtunnel_state
*b
)
264 struct mpls_iptunnel_encap
*a_hdr
= mpls_lwtunnel_encap(a
);
265 struct mpls_iptunnel_encap
*b_hdr
= mpls_lwtunnel_encap(b
);
268 if (a_hdr
->labels
!= b_hdr
->labels
||
269 a_hdr
->ttl_propagate
!= b_hdr
->ttl_propagate
||
270 a_hdr
->default_ttl
!= b_hdr
->default_ttl
)
273 for (l
= 0; l
< a_hdr
->labels
; l
++)
274 if (a_hdr
->label
[l
] != b_hdr
->label
[l
])
279 static const struct lwtunnel_encap_ops mpls_iptun_ops
= {
280 .build_state
= mpls_build_state
,
282 .fill_encap
= mpls_fill_encap_info
,
283 .get_encap_size
= mpls_encap_nlsize
,
284 .cmp_encap
= mpls_encap_cmp
,
285 .owner
= THIS_MODULE
,
288 static int __init
mpls_iptunnel_init(void)
290 return lwtunnel_encap_add_ops(&mpls_iptun_ops
, LWTUNNEL_ENCAP_MPLS
);
292 module_init(mpls_iptunnel_init
);
294 static void __exit
mpls_iptunnel_exit(void)
296 lwtunnel_encap_del_ops(&mpls_iptun_ops
, LWTUNNEL_ENCAP_MPLS
);
298 module_exit(mpls_iptunnel_exit
);
300 MODULE_ALIAS_RTNL_LWT(MPLS
);
301 MODULE_SOFTDEP("post: mpls_gso");
302 MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels");
303 MODULE_LICENSE("GPL v2");