perf build: Get rid of LIB_INCLUDE variable
[linux/fpc-iii.git] / net / openvswitch / flow_netlink.c
blob216f20b90aa596b49592beee89a996cbe868d8ba
1 /*
2 * Copyright (c) 2007-2014 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include "flow.h"
22 #include "datapath.h"
23 #include <linux/uaccess.h>
24 #include <linux/netdevice.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <net/llc_pdu.h>
29 #include <linux/kernel.h>
30 #include <linux/jhash.h>
31 #include <linux/jiffies.h>
32 #include <linux/llc.h>
33 #include <linux/module.h>
34 #include <linux/in.h>
35 #include <linux/rcupdate.h>
36 #include <linux/if_arp.h>
37 #include <linux/ip.h>
38 #include <linux/ipv6.h>
39 #include <linux/sctp.h>
40 #include <linux/tcp.h>
41 #include <linux/udp.h>
42 #include <linux/icmp.h>
43 #include <linux/icmpv6.h>
44 #include <linux/rculist.h>
45 #include <net/geneve.h>
46 #include <net/ip.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/mpls.h>
51 #include "flow_netlink.h"
52 #include "vport-vxlan.h"
54 struct ovs_len_tbl {
55 int len;
56 const struct ovs_len_tbl *next;
59 #define OVS_ATTR_NESTED -1
61 static void update_range(struct sw_flow_match *match,
62 size_t offset, size_t size, bool is_mask)
64 struct sw_flow_key_range *range;
65 size_t start = rounddown(offset, sizeof(long));
66 size_t end = roundup(offset + size, sizeof(long));
68 if (!is_mask)
69 range = &match->range;
70 else
71 range = &match->mask->range;
73 if (range->start == range->end) {
74 range->start = start;
75 range->end = end;
76 return;
79 if (range->start > start)
80 range->start = start;
82 if (range->end < end)
83 range->end = end;
86 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
87 do { \
88 update_range(match, offsetof(struct sw_flow_key, field), \
89 sizeof((match)->key->field), is_mask); \
90 if (is_mask) \
91 (match)->mask->key.field = value; \
92 else \
93 (match)->key->field = value; \
94 } while (0)
96 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
97 do { \
98 update_range(match, offset, len, is_mask); \
99 if (is_mask) \
100 memcpy((u8 *)&(match)->mask->key + offset, value_p, \
101 len); \
102 else \
103 memcpy((u8 *)(match)->key + offset, value_p, len); \
104 } while (0)
106 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
107 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
108 value_p, len, is_mask)
110 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
111 do { \
112 update_range(match, offsetof(struct sw_flow_key, field), \
113 sizeof((match)->key->field), is_mask); \
114 if (is_mask) \
115 memset((u8 *)&(match)->mask->key.field, value, \
116 sizeof((match)->mask->key.field)); \
117 else \
118 memset((u8 *)&(match)->key->field, value, \
119 sizeof((match)->key->field)); \
120 } while (0)
122 static bool match_validate(const struct sw_flow_match *match,
123 u64 key_attrs, u64 mask_attrs, bool log)
125 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
126 u64 mask_allowed = key_attrs; /* At most allow all key attributes */
128 /* The following mask attributes allowed only if they
129 * pass the validation tests. */
130 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
131 | (1 << OVS_KEY_ATTR_IPV6)
132 | (1 << OVS_KEY_ATTR_TCP)
133 | (1 << OVS_KEY_ATTR_TCP_FLAGS)
134 | (1 << OVS_KEY_ATTR_UDP)
135 | (1 << OVS_KEY_ATTR_SCTP)
136 | (1 << OVS_KEY_ATTR_ICMP)
137 | (1 << OVS_KEY_ATTR_ICMPV6)
138 | (1 << OVS_KEY_ATTR_ARP)
139 | (1 << OVS_KEY_ATTR_ND)
140 | (1 << OVS_KEY_ATTR_MPLS));
142 /* Always allowed mask fields. */
143 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
144 | (1 << OVS_KEY_ATTR_IN_PORT)
145 | (1 << OVS_KEY_ATTR_ETHERTYPE));
147 /* Check key attributes. */
148 if (match->key->eth.type == htons(ETH_P_ARP)
149 || match->key->eth.type == htons(ETH_P_RARP)) {
150 key_expected |= 1 << OVS_KEY_ATTR_ARP;
151 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
152 mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
155 if (eth_p_mpls(match->key->eth.type)) {
156 key_expected |= 1 << OVS_KEY_ATTR_MPLS;
157 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
158 mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
161 if (match->key->eth.type == htons(ETH_P_IP)) {
162 key_expected |= 1 << OVS_KEY_ATTR_IPV4;
163 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
164 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
166 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
167 if (match->key->ip.proto == IPPROTO_UDP) {
168 key_expected |= 1 << OVS_KEY_ATTR_UDP;
169 if (match->mask && (match->mask->key.ip.proto == 0xff))
170 mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
173 if (match->key->ip.proto == IPPROTO_SCTP) {
174 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
175 if (match->mask && (match->mask->key.ip.proto == 0xff))
176 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
179 if (match->key->ip.proto == IPPROTO_TCP) {
180 key_expected |= 1 << OVS_KEY_ATTR_TCP;
181 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
182 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
183 mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
184 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
188 if (match->key->ip.proto == IPPROTO_ICMP) {
189 key_expected |= 1 << OVS_KEY_ATTR_ICMP;
190 if (match->mask && (match->mask->key.ip.proto == 0xff))
191 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
196 if (match->key->eth.type == htons(ETH_P_IPV6)) {
197 key_expected |= 1 << OVS_KEY_ATTR_IPV6;
198 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
199 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
201 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
202 if (match->key->ip.proto == IPPROTO_UDP) {
203 key_expected |= 1 << OVS_KEY_ATTR_UDP;
204 if (match->mask && (match->mask->key.ip.proto == 0xff))
205 mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
208 if (match->key->ip.proto == IPPROTO_SCTP) {
209 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
210 if (match->mask && (match->mask->key.ip.proto == 0xff))
211 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
214 if (match->key->ip.proto == IPPROTO_TCP) {
215 key_expected |= 1 << OVS_KEY_ATTR_TCP;
216 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
217 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
218 mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
219 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
223 if (match->key->ip.proto == IPPROTO_ICMPV6) {
224 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
225 if (match->mask && (match->mask->key.ip.proto == 0xff))
226 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
228 if (match->key->tp.src ==
229 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
230 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
231 key_expected |= 1 << OVS_KEY_ATTR_ND;
232 if (match->mask && (match->mask->key.tp.src == htons(0xff)))
233 mask_allowed |= 1 << OVS_KEY_ATTR_ND;
239 if ((key_attrs & key_expected) != key_expected) {
240 /* Key attributes check failed. */
241 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
242 (unsigned long long)key_attrs,
243 (unsigned long long)key_expected);
244 return false;
247 if ((mask_attrs & mask_allowed) != mask_attrs) {
248 /* Mask attributes check failed. */
249 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
250 (unsigned long long)mask_attrs,
251 (unsigned long long)mask_allowed);
252 return false;
255 return true;
258 size_t ovs_tun_key_attr_size(void)
260 /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
261 * updating this function.
263 return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
264 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
265 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
266 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
267 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
268 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
269 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
270 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
271 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
272 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
273 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
275 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
276 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
279 size_t ovs_key_attr_size(void)
281 /* Whenever adding new OVS_KEY_ FIELDS, we should consider
282 * updating this function.
284 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
286 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
287 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
288 + ovs_tun_key_attr_size()
289 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
290 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
291 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
292 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
293 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
294 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
295 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
296 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
297 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
298 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
299 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
300 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
303 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
304 [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) },
305 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) },
306 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) },
307 [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 },
308 [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 },
309 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
310 [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 },
311 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) },
312 [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) },
313 [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 },
314 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED },
315 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED },
318 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
319 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
320 [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED },
321 [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) },
322 [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) },
323 [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) },
324 [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) },
325 [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) },
326 [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
327 [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) },
328 [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) },
329 [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) },
330 [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
331 [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) },
332 [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) },
333 [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) },
334 [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) },
335 [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) },
336 [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) },
337 [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
338 [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) },
339 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
340 .next = ovs_tunnel_key_lens, },
341 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
344 static bool is_all_zero(const u8 *fp, size_t size)
346 int i;
348 if (!fp)
349 return false;
351 for (i = 0; i < size; i++)
352 if (fp[i])
353 return false;
355 return true;
358 static int __parse_flow_nlattrs(const struct nlattr *attr,
359 const struct nlattr *a[],
360 u64 *attrsp, bool log, bool nz)
362 const struct nlattr *nla;
363 u64 attrs;
364 int rem;
366 attrs = *attrsp;
367 nla_for_each_nested(nla, attr, rem) {
368 u16 type = nla_type(nla);
369 int expected_len;
371 if (type > OVS_KEY_ATTR_MAX) {
372 OVS_NLERR(log, "Key type %d is out of range max %d",
373 type, OVS_KEY_ATTR_MAX);
374 return -EINVAL;
377 if (attrs & (1 << type)) {
378 OVS_NLERR(log, "Duplicate key (type %d).", type);
379 return -EINVAL;
382 expected_len = ovs_key_lens[type].len;
383 if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) {
384 OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
385 type, nla_len(nla), expected_len);
386 return -EINVAL;
389 if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
390 attrs |= 1 << type;
391 a[type] = nla;
394 if (rem) {
395 OVS_NLERR(log, "Message has %d unknown bytes.", rem);
396 return -EINVAL;
399 *attrsp = attrs;
400 return 0;
403 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
404 const struct nlattr *a[], u64 *attrsp,
405 bool log)
407 return __parse_flow_nlattrs(attr, a, attrsp, log, true);
410 static int parse_flow_nlattrs(const struct nlattr *attr,
411 const struct nlattr *a[], u64 *attrsp,
412 bool log)
414 return __parse_flow_nlattrs(attr, a, attrsp, log, false);
417 static int genev_tun_opt_from_nlattr(const struct nlattr *a,
418 struct sw_flow_match *match, bool is_mask,
419 bool log)
421 unsigned long opt_key_offset;
423 if (nla_len(a) > sizeof(match->key->tun_opts)) {
424 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
425 nla_len(a), sizeof(match->key->tun_opts));
426 return -EINVAL;
429 if (nla_len(a) % 4 != 0) {
430 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
431 nla_len(a));
432 return -EINVAL;
435 /* We need to record the length of the options passed
436 * down, otherwise packets with the same format but
437 * additional options will be silently matched.
439 if (!is_mask) {
440 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
441 false);
442 } else {
443 /* This is somewhat unusual because it looks at
444 * both the key and mask while parsing the
445 * attributes (and by extension assumes the key
446 * is parsed first). Normally, we would verify
447 * that each is the correct length and that the
448 * attributes line up in the validate function.
449 * However, that is difficult because this is
450 * variable length and we won't have the
451 * information later.
453 if (match->key->tun_opts_len != nla_len(a)) {
454 OVS_NLERR(log, "Geneve option len %d != mask len %d",
455 match->key->tun_opts_len, nla_len(a));
456 return -EINVAL;
459 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
462 opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
463 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
464 nla_len(a), is_mask);
465 return 0;
468 static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
469 [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 },
472 static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
473 struct sw_flow_match *match, bool is_mask,
474 bool log)
476 struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
477 unsigned long opt_key_offset;
478 struct ovs_vxlan_opts opts;
479 int err;
481 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
483 err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
484 if (err < 0)
485 return err;
487 memset(&opts, 0, sizeof(opts));
489 if (tb[OVS_VXLAN_EXT_GBP])
490 opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
492 if (!is_mask)
493 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
494 else
495 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
497 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
498 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
499 is_mask);
500 return 0;
503 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
504 struct sw_flow_match *match, bool is_mask,
505 bool log)
507 struct nlattr *a;
508 int rem;
509 bool ttl = false;
510 __be16 tun_flags = 0;
511 int opts_type = 0;
513 nla_for_each_nested(a, attr, rem) {
514 int type = nla_type(a);
515 int err;
517 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
518 OVS_NLERR(log, "Tunnel attr %d out of range max %d",
519 type, OVS_TUNNEL_KEY_ATTR_MAX);
520 return -EINVAL;
523 if (ovs_tunnel_key_lens[type].len != nla_len(a) &&
524 ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) {
525 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
526 type, nla_len(a), ovs_tunnel_key_lens[type].len);
527 return -EINVAL;
530 switch (type) {
531 case OVS_TUNNEL_KEY_ATTR_ID:
532 SW_FLOW_KEY_PUT(match, tun_key.tun_id,
533 nla_get_be64(a), is_mask);
534 tun_flags |= TUNNEL_KEY;
535 break;
536 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
537 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
538 nla_get_be32(a), is_mask);
539 break;
540 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
541 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
542 nla_get_be32(a), is_mask);
543 break;
544 case OVS_TUNNEL_KEY_ATTR_TOS:
545 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
546 nla_get_u8(a), is_mask);
547 break;
548 case OVS_TUNNEL_KEY_ATTR_TTL:
549 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
550 nla_get_u8(a), is_mask);
551 ttl = true;
552 break;
553 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
554 tun_flags |= TUNNEL_DONT_FRAGMENT;
555 break;
556 case OVS_TUNNEL_KEY_ATTR_CSUM:
557 tun_flags |= TUNNEL_CSUM;
558 break;
559 case OVS_TUNNEL_KEY_ATTR_TP_SRC:
560 SW_FLOW_KEY_PUT(match, tun_key.tp_src,
561 nla_get_be16(a), is_mask);
562 break;
563 case OVS_TUNNEL_KEY_ATTR_TP_DST:
564 SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
565 nla_get_be16(a), is_mask);
566 break;
567 case OVS_TUNNEL_KEY_ATTR_OAM:
568 tun_flags |= TUNNEL_OAM;
569 break;
570 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
571 if (opts_type) {
572 OVS_NLERR(log, "Multiple metadata blocks provided");
573 return -EINVAL;
576 err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
577 if (err)
578 return err;
580 tun_flags |= TUNNEL_GENEVE_OPT;
581 opts_type = type;
582 break;
583 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
584 if (opts_type) {
585 OVS_NLERR(log, "Multiple metadata blocks provided");
586 return -EINVAL;
589 err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
590 if (err)
591 return err;
593 tun_flags |= TUNNEL_VXLAN_OPT;
594 opts_type = type;
595 break;
596 default:
597 OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
598 type);
599 return -EINVAL;
603 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
605 if (rem > 0) {
606 OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
607 rem);
608 return -EINVAL;
611 if (!is_mask) {
612 if (!match->key->tun_key.ipv4_dst) {
613 OVS_NLERR(log, "IPv4 tunnel dst address is zero");
614 return -EINVAL;
617 if (!ttl) {
618 OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
619 return -EINVAL;
623 return opts_type;
626 static int vxlan_opt_to_nlattr(struct sk_buff *skb,
627 const void *tun_opts, int swkey_tun_opts_len)
629 const struct ovs_vxlan_opts *opts = tun_opts;
630 struct nlattr *nla;
632 nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
633 if (!nla)
634 return -EMSGSIZE;
636 if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
637 return -EMSGSIZE;
639 nla_nest_end(skb, nla);
640 return 0;
643 static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
644 const struct ovs_key_ipv4_tunnel *output,
645 const void *tun_opts, int swkey_tun_opts_len)
647 if (output->tun_flags & TUNNEL_KEY &&
648 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
649 return -EMSGSIZE;
650 if (output->ipv4_src &&
651 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
652 return -EMSGSIZE;
653 if (output->ipv4_dst &&
654 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
655 return -EMSGSIZE;
656 if (output->ipv4_tos &&
657 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
658 return -EMSGSIZE;
659 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
660 return -EMSGSIZE;
661 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
662 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
663 return -EMSGSIZE;
664 if ((output->tun_flags & TUNNEL_CSUM) &&
665 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
666 return -EMSGSIZE;
667 if (output->tp_src &&
668 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
669 return -EMSGSIZE;
670 if (output->tp_dst &&
671 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
672 return -EMSGSIZE;
673 if ((output->tun_flags & TUNNEL_OAM) &&
674 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
675 return -EMSGSIZE;
676 if (tun_opts) {
677 if (output->tun_flags & TUNNEL_GENEVE_OPT &&
678 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
679 swkey_tun_opts_len, tun_opts))
680 return -EMSGSIZE;
681 else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
682 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
683 return -EMSGSIZE;
686 return 0;
689 static int ipv4_tun_to_nlattr(struct sk_buff *skb,
690 const struct ovs_key_ipv4_tunnel *output,
691 const void *tun_opts, int swkey_tun_opts_len)
693 struct nlattr *nla;
694 int err;
696 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
697 if (!nla)
698 return -EMSGSIZE;
700 err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
701 if (err)
702 return err;
704 nla_nest_end(skb, nla);
705 return 0;
708 int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
709 const struct ovs_tunnel_info *egress_tun_info)
711 return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel,
712 egress_tun_info->options,
713 egress_tun_info->options_len);
716 static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
717 const struct nlattr **a, bool is_mask,
718 bool log)
720 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
721 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
723 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
724 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
727 if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
728 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
730 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
731 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
734 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
735 SW_FLOW_KEY_PUT(match, phy.priority,
736 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
737 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
740 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
741 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
743 if (is_mask) {
744 in_port = 0xffffffff; /* Always exact match in_port. */
745 } else if (in_port >= DP_MAX_PORTS) {
746 OVS_NLERR(log, "Port %d exceeds max allowable %d",
747 in_port, DP_MAX_PORTS);
748 return -EINVAL;
751 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
752 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
753 } else if (!is_mask) {
754 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
757 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
758 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
760 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
761 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
763 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
764 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
765 is_mask, log) < 0)
766 return -EINVAL;
767 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
769 return 0;
772 static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
773 const struct nlattr **a, bool is_mask,
774 bool log)
776 int err;
778 err = metadata_from_nlattrs(match, &attrs, a, is_mask, log);
779 if (err)
780 return err;
782 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
783 const struct ovs_key_ethernet *eth_key;
785 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
786 SW_FLOW_KEY_MEMCPY(match, eth.src,
787 eth_key->eth_src, ETH_ALEN, is_mask);
788 SW_FLOW_KEY_MEMCPY(match, eth.dst,
789 eth_key->eth_dst, ETH_ALEN, is_mask);
790 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
793 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
794 __be16 tci;
796 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
797 if (!(tci & htons(VLAN_TAG_PRESENT))) {
798 if (is_mask)
799 OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.");
800 else
801 OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set.");
803 return -EINVAL;
806 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
807 attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
810 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
811 __be16 eth_type;
813 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
814 if (is_mask) {
815 /* Always exact match EtherType. */
816 eth_type = htons(0xffff);
817 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
818 OVS_NLERR(log, "EtherType %x is less than min %x",
819 ntohs(eth_type), ETH_P_802_3_MIN);
820 return -EINVAL;
823 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
824 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
825 } else if (!is_mask) {
826 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
829 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
830 const struct ovs_key_ipv4 *ipv4_key;
832 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
833 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
834 OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
835 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
836 return -EINVAL;
838 SW_FLOW_KEY_PUT(match, ip.proto,
839 ipv4_key->ipv4_proto, is_mask);
840 SW_FLOW_KEY_PUT(match, ip.tos,
841 ipv4_key->ipv4_tos, is_mask);
842 SW_FLOW_KEY_PUT(match, ip.ttl,
843 ipv4_key->ipv4_ttl, is_mask);
844 SW_FLOW_KEY_PUT(match, ip.frag,
845 ipv4_key->ipv4_frag, is_mask);
846 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
847 ipv4_key->ipv4_src, is_mask);
848 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
849 ipv4_key->ipv4_dst, is_mask);
850 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
853 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
854 const struct ovs_key_ipv6 *ipv6_key;
856 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
857 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
858 OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
859 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
860 return -EINVAL;
863 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
864 OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n",
865 ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
866 return -EINVAL;
869 SW_FLOW_KEY_PUT(match, ipv6.label,
870 ipv6_key->ipv6_label, is_mask);
871 SW_FLOW_KEY_PUT(match, ip.proto,
872 ipv6_key->ipv6_proto, is_mask);
873 SW_FLOW_KEY_PUT(match, ip.tos,
874 ipv6_key->ipv6_tclass, is_mask);
875 SW_FLOW_KEY_PUT(match, ip.ttl,
876 ipv6_key->ipv6_hlimit, is_mask);
877 SW_FLOW_KEY_PUT(match, ip.frag,
878 ipv6_key->ipv6_frag, is_mask);
879 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
880 ipv6_key->ipv6_src,
881 sizeof(match->key->ipv6.addr.src),
882 is_mask);
883 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
884 ipv6_key->ipv6_dst,
885 sizeof(match->key->ipv6.addr.dst),
886 is_mask);
888 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
891 if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
892 const struct ovs_key_arp *arp_key;
894 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
895 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
896 OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
897 arp_key->arp_op);
898 return -EINVAL;
901 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
902 arp_key->arp_sip, is_mask);
903 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
904 arp_key->arp_tip, is_mask);
905 SW_FLOW_KEY_PUT(match, ip.proto,
906 ntohs(arp_key->arp_op), is_mask);
907 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
908 arp_key->arp_sha, ETH_ALEN, is_mask);
909 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
910 arp_key->arp_tha, ETH_ALEN, is_mask);
912 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
915 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
916 const struct ovs_key_mpls *mpls_key;
918 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
919 SW_FLOW_KEY_PUT(match, mpls.top_lse,
920 mpls_key->mpls_lse, is_mask);
922 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
925 if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
926 const struct ovs_key_tcp *tcp_key;
928 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
929 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
930 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
931 attrs &= ~(1 << OVS_KEY_ATTR_TCP);
934 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
935 SW_FLOW_KEY_PUT(match, tp.flags,
936 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
937 is_mask);
938 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
941 if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
942 const struct ovs_key_udp *udp_key;
944 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
945 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
946 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
947 attrs &= ~(1 << OVS_KEY_ATTR_UDP);
950 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
951 const struct ovs_key_sctp *sctp_key;
953 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
954 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
955 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
956 attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
959 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
960 const struct ovs_key_icmp *icmp_key;
962 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
963 SW_FLOW_KEY_PUT(match, tp.src,
964 htons(icmp_key->icmp_type), is_mask);
965 SW_FLOW_KEY_PUT(match, tp.dst,
966 htons(icmp_key->icmp_code), is_mask);
967 attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
970 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
971 const struct ovs_key_icmpv6 *icmpv6_key;
973 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
974 SW_FLOW_KEY_PUT(match, tp.src,
975 htons(icmpv6_key->icmpv6_type), is_mask);
976 SW_FLOW_KEY_PUT(match, tp.dst,
977 htons(icmpv6_key->icmpv6_code), is_mask);
978 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
981 if (attrs & (1 << OVS_KEY_ATTR_ND)) {
982 const struct ovs_key_nd *nd_key;
984 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
985 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
986 nd_key->nd_target,
987 sizeof(match->key->ipv6.nd.target),
988 is_mask);
989 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
990 nd_key->nd_sll, ETH_ALEN, is_mask);
991 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
992 nd_key->nd_tll, ETH_ALEN, is_mask);
993 attrs &= ~(1 << OVS_KEY_ATTR_ND);
996 if (attrs != 0) {
997 OVS_NLERR(log, "Unknown key attributes %llx",
998 (unsigned long long)attrs);
999 return -EINVAL;
1002 return 0;
1005 static void nlattr_set(struct nlattr *attr, u8 val,
1006 const struct ovs_len_tbl *tbl)
1008 struct nlattr *nla;
1009 int rem;
1011 /* The nlattr stream should already have been validated */
1012 nla_for_each_nested(nla, attr, rem) {
1013 if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
1014 nlattr_set(nla, val, tbl[nla_type(nla)].next);
1015 else
1016 memset(nla_data(nla), val, nla_len(nla));
1020 static void mask_set_nlattr(struct nlattr *attr, u8 val)
1022 nlattr_set(attr, val, ovs_key_lens);
1026 * ovs_nla_get_match - parses Netlink attributes into a flow key and
1027 * mask. In case the 'mask' is NULL, the flow is treated as exact match
1028 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1029 * does not include any don't care bit.
1030 * @match: receives the extracted flow match information.
1031 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1032 * sequence. The fields should of the packet that triggered the creation
1033 * of this flow.
1034 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1035 * attribute specifies the mask field of the wildcarded flow.
1036 * @log: Boolean to allow kernel error logging. Normally true, but when
1037 * probing for feature compatibility this should be passed in as false to
1038 * suppress unnecessary error logging.
1040 int ovs_nla_get_match(struct sw_flow_match *match,
1041 const struct nlattr *nla_key,
1042 const struct nlattr *nla_mask,
1043 bool log)
1045 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1046 const struct nlattr *encap;
1047 struct nlattr *newmask = NULL;
1048 u64 key_attrs = 0;
1049 u64 mask_attrs = 0;
1050 bool encap_valid = false;
1051 int err;
1053 err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1054 if (err)
1055 return err;
1057 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
1058 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
1059 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
1060 __be16 tci;
1062 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
1063 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
1064 OVS_NLERR(log, "Invalid Vlan frame.");
1065 return -EINVAL;
1068 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1069 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1070 encap = a[OVS_KEY_ATTR_ENCAP];
1071 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1072 encap_valid = true;
1074 if (tci & htons(VLAN_TAG_PRESENT)) {
1075 err = parse_flow_nlattrs(encap, a, &key_attrs, log);
1076 if (err)
1077 return err;
1078 } else if (!tci) {
1079 /* Corner case for truncated 802.1Q header. */
1080 if (nla_len(encap)) {
1081 OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute.");
1082 return -EINVAL;
1084 } else {
1085 OVS_NLERR(log, "Encap attr is set for non-VLAN frame");
1086 return -EINVAL;
1090 err = ovs_key_from_nlattrs(match, key_attrs, a, false, log);
1091 if (err)
1092 return err;
1094 if (match->mask) {
1095 if (!nla_mask) {
1096 /* Create an exact match mask. We need to set to 0xff
1097 * all the 'match->mask' fields that have been touched
1098 * in 'match->key'. We cannot simply memset
1099 * 'match->mask', because padding bytes and fields not
1100 * specified in 'match->key' should be left to 0.
1101 * Instead, we use a stream of netlink attributes,
1102 * copied from 'key' and set to 0xff.
1103 * ovs_key_from_nlattrs() will take care of filling
1104 * 'match->mask' appropriately.
1106 newmask = kmemdup(nla_key,
1107 nla_total_size(nla_len(nla_key)),
1108 GFP_KERNEL);
1109 if (!newmask)
1110 return -ENOMEM;
1112 mask_set_nlattr(newmask, 0xff);
1114 /* The userspace does not send tunnel attributes that
1115 * are 0, but we should not wildcard them nonetheless.
1117 if (match->key->tun_key.ipv4_dst)
1118 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1119 0xff, true);
1121 nla_mask = newmask;
1124 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1125 if (err)
1126 goto free_newmask;
1128 /* Always match on tci. */
1129 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
1131 if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
1132 __be16 eth_type = 0;
1133 __be16 tci = 0;
1135 if (!encap_valid) {
1136 OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame.");
1137 err = -EINVAL;
1138 goto free_newmask;
1141 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1142 if (a[OVS_KEY_ATTR_ETHERTYPE])
1143 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1145 if (eth_type == htons(0xffff)) {
1146 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1147 encap = a[OVS_KEY_ATTR_ENCAP];
1148 err = parse_flow_mask_nlattrs(encap, a,
1149 &mask_attrs, log);
1150 if (err)
1151 goto free_newmask;
1152 } else {
1153 OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).",
1154 ntohs(eth_type));
1155 err = -EINVAL;
1156 goto free_newmask;
1159 if (a[OVS_KEY_ATTR_VLAN])
1160 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1162 if (!(tci & htons(VLAN_TAG_PRESENT))) {
1163 OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).",
1164 ntohs(tci));
1165 err = -EINVAL;
1166 goto free_newmask;
1170 err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log);
1171 if (err)
1172 goto free_newmask;
1175 if (!match_validate(match, key_attrs, mask_attrs, log))
1176 err = -EINVAL;
1178 free_newmask:
1179 kfree(newmask);
1180 return err;
1183 static size_t get_ufid_len(const struct nlattr *attr, bool log)
1185 size_t len;
1187 if (!attr)
1188 return 0;
1190 len = nla_len(attr);
1191 if (len < 1 || len > MAX_UFID_LENGTH) {
1192 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1193 nla_len(attr), MAX_UFID_LENGTH);
1194 return 0;
1197 return len;
1200 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1201 * or false otherwise.
1203 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1204 bool log)
1206 sfid->ufid_len = get_ufid_len(attr, log);
1207 if (sfid->ufid_len)
1208 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1210 return sfid->ufid_len;
1213 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1214 const struct sw_flow_key *key, bool log)
1216 struct sw_flow_key *new_key;
1218 if (ovs_nla_get_ufid(sfid, ufid, log))
1219 return 0;
1221 /* If UFID was not provided, use unmasked key. */
1222 new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1223 if (!new_key)
1224 return -ENOMEM;
1225 memcpy(new_key, key, sizeof(*key));
1226 sfid->unmasked_key = new_key;
1228 return 0;
1231 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1233 return attr ? nla_get_u32(attr) : 0;
1237 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1238 * @key: Receives extracted in_port, priority, tun_key and skb_mark.
1239 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1240 * sequence.
1241 * @log: Boolean to allow kernel error logging. Normally true, but when
1242 * probing for feature compatibility this should be passed in as false to
1243 * suppress unnecessary error logging.
1245 * This parses a series of Netlink attributes that form a flow key, which must
1246 * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1247 * get the metadata, that is, the parts of the flow key that cannot be
1248 * extracted from the packet itself.
1251 int ovs_nla_get_flow_metadata(const struct nlattr *attr,
1252 struct sw_flow_key *key,
1253 bool log)
1255 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1256 struct sw_flow_match match;
1257 u64 attrs = 0;
1258 int err;
1260 err = parse_flow_nlattrs(attr, a, &attrs, log);
1261 if (err)
1262 return -EINVAL;
1264 memset(&match, 0, sizeof(match));
1265 match.key = key;
1267 key->phy.in_port = DP_MAX_PORTS;
1269 return metadata_from_nlattrs(&match, &attrs, a, false, log);
1272 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1273 const struct sw_flow_key *output, bool is_mask,
1274 struct sk_buff *skb)
1276 struct ovs_key_ethernet *eth_key;
1277 struct nlattr *nla, *encap;
1279 if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1280 goto nla_put_failure;
1282 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1283 goto nla_put_failure;
1285 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1286 goto nla_put_failure;
1288 if ((swkey->tun_key.ipv4_dst || is_mask)) {
1289 const void *opts = NULL;
1291 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1292 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
1294 if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
1295 swkey->tun_opts_len))
1296 goto nla_put_failure;
1299 if (swkey->phy.in_port == DP_MAX_PORTS) {
1300 if (is_mask && (output->phy.in_port == 0xffff))
1301 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1302 goto nla_put_failure;
1303 } else {
1304 u16 upper_u16;
1305 upper_u16 = !is_mask ? 0 : 0xffff;
1307 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1308 (upper_u16 << 16) | output->phy.in_port))
1309 goto nla_put_failure;
1312 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
1313 goto nla_put_failure;
1315 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1316 if (!nla)
1317 goto nla_put_failure;
1319 eth_key = nla_data(nla);
1320 ether_addr_copy(eth_key->eth_src, output->eth.src);
1321 ether_addr_copy(eth_key->eth_dst, output->eth.dst);
1323 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
1324 __be16 eth_type;
1325 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
1326 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1327 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
1328 goto nla_put_failure;
1329 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1330 if (!swkey->eth.tci)
1331 goto unencap;
1332 } else
1333 encap = NULL;
1335 if (swkey->eth.type == htons(ETH_P_802_2)) {
1337 * Ethertype 802.2 is represented in the netlink with omitted
1338 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1339 * 0xffff in the mask attribute. Ethertype can also
1340 * be wildcarded.
1342 if (is_mask && output->eth.type)
1343 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1344 output->eth.type))
1345 goto nla_put_failure;
1346 goto unencap;
1349 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
1350 goto nla_put_failure;
1352 if (swkey->eth.type == htons(ETH_P_IP)) {
1353 struct ovs_key_ipv4 *ipv4_key;
1355 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
1356 if (!nla)
1357 goto nla_put_failure;
1358 ipv4_key = nla_data(nla);
1359 ipv4_key->ipv4_src = output->ipv4.addr.src;
1360 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
1361 ipv4_key->ipv4_proto = output->ip.proto;
1362 ipv4_key->ipv4_tos = output->ip.tos;
1363 ipv4_key->ipv4_ttl = output->ip.ttl;
1364 ipv4_key->ipv4_frag = output->ip.frag;
1365 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1366 struct ovs_key_ipv6 *ipv6_key;
1368 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
1369 if (!nla)
1370 goto nla_put_failure;
1371 ipv6_key = nla_data(nla);
1372 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
1373 sizeof(ipv6_key->ipv6_src));
1374 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
1375 sizeof(ipv6_key->ipv6_dst));
1376 ipv6_key->ipv6_label = output->ipv6.label;
1377 ipv6_key->ipv6_proto = output->ip.proto;
1378 ipv6_key->ipv6_tclass = output->ip.tos;
1379 ipv6_key->ipv6_hlimit = output->ip.ttl;
1380 ipv6_key->ipv6_frag = output->ip.frag;
1381 } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1382 swkey->eth.type == htons(ETH_P_RARP)) {
1383 struct ovs_key_arp *arp_key;
1385 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
1386 if (!nla)
1387 goto nla_put_failure;
1388 arp_key = nla_data(nla);
1389 memset(arp_key, 0, sizeof(struct ovs_key_arp));
1390 arp_key->arp_sip = output->ipv4.addr.src;
1391 arp_key->arp_tip = output->ipv4.addr.dst;
1392 arp_key->arp_op = htons(output->ip.proto);
1393 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
1394 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
1395 } else if (eth_p_mpls(swkey->eth.type)) {
1396 struct ovs_key_mpls *mpls_key;
1398 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
1399 if (!nla)
1400 goto nla_put_failure;
1401 mpls_key = nla_data(nla);
1402 mpls_key->mpls_lse = output->mpls.top_lse;
1405 if ((swkey->eth.type == htons(ETH_P_IP) ||
1406 swkey->eth.type == htons(ETH_P_IPV6)) &&
1407 swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1409 if (swkey->ip.proto == IPPROTO_TCP) {
1410 struct ovs_key_tcp *tcp_key;
1412 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
1413 if (!nla)
1414 goto nla_put_failure;
1415 tcp_key = nla_data(nla);
1416 tcp_key->tcp_src = output->tp.src;
1417 tcp_key->tcp_dst = output->tp.dst;
1418 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
1419 output->tp.flags))
1420 goto nla_put_failure;
1421 } else if (swkey->ip.proto == IPPROTO_UDP) {
1422 struct ovs_key_udp *udp_key;
1424 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
1425 if (!nla)
1426 goto nla_put_failure;
1427 udp_key = nla_data(nla);
1428 udp_key->udp_src = output->tp.src;
1429 udp_key->udp_dst = output->tp.dst;
1430 } else if (swkey->ip.proto == IPPROTO_SCTP) {
1431 struct ovs_key_sctp *sctp_key;
1433 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1434 if (!nla)
1435 goto nla_put_failure;
1436 sctp_key = nla_data(nla);
1437 sctp_key->sctp_src = output->tp.src;
1438 sctp_key->sctp_dst = output->tp.dst;
1439 } else if (swkey->eth.type == htons(ETH_P_IP) &&
1440 swkey->ip.proto == IPPROTO_ICMP) {
1441 struct ovs_key_icmp *icmp_key;
1443 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
1444 if (!nla)
1445 goto nla_put_failure;
1446 icmp_key = nla_data(nla);
1447 icmp_key->icmp_type = ntohs(output->tp.src);
1448 icmp_key->icmp_code = ntohs(output->tp.dst);
1449 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1450 swkey->ip.proto == IPPROTO_ICMPV6) {
1451 struct ovs_key_icmpv6 *icmpv6_key;
1453 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
1454 sizeof(*icmpv6_key));
1455 if (!nla)
1456 goto nla_put_failure;
1457 icmpv6_key = nla_data(nla);
1458 icmpv6_key->icmpv6_type = ntohs(output->tp.src);
1459 icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
1461 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1462 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
1463 struct ovs_key_nd *nd_key;
1465 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
1466 if (!nla)
1467 goto nla_put_failure;
1468 nd_key = nla_data(nla);
1469 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1470 sizeof(nd_key->nd_target));
1471 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
1472 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
1477 unencap:
1478 if (encap)
1479 nla_nest_end(skb, encap);
1481 return 0;
1483 nla_put_failure:
1484 return -EMSGSIZE;
1487 int ovs_nla_put_key(const struct sw_flow_key *swkey,
1488 const struct sw_flow_key *output, int attr, bool is_mask,
1489 struct sk_buff *skb)
1491 int err;
1492 struct nlattr *nla;
1494 nla = nla_nest_start(skb, attr);
1495 if (!nla)
1496 return -EMSGSIZE;
1497 err = __ovs_nla_put_key(swkey, output, is_mask, skb);
1498 if (err)
1499 return err;
1500 nla_nest_end(skb, nla);
1502 return 0;
1505 /* Called with ovs_mutex or RCU read lock. */
1506 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
1508 if (ovs_identifier_is_ufid(&flow->id))
1509 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
1510 flow->id.ufid);
1512 return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
1513 OVS_FLOW_ATTR_KEY, false, skb);
1516 /* Called with ovs_mutex or RCU read lock. */
1517 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
1519 return ovs_nla_put_key(&flow->key, &flow->key,
1520 OVS_FLOW_ATTR_KEY, false, skb);
1523 /* Called with ovs_mutex or RCU read lock. */
1524 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
1526 return ovs_nla_put_key(&flow->key, &flow->mask->key,
1527 OVS_FLOW_ATTR_MASK, true, skb);
1530 #define MAX_ACTIONS_BUFSIZE (32 * 1024)
1532 static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
1534 struct sw_flow_actions *sfa;
1536 if (size > MAX_ACTIONS_BUFSIZE) {
1537 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
1538 return ERR_PTR(-EINVAL);
1541 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
1542 if (!sfa)
1543 return ERR_PTR(-ENOMEM);
1545 sfa->actions_len = 0;
1546 return sfa;
1549 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
1550 * The caller must hold rcu_read_lock for this to be sensible. */
1551 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
1553 kfree_rcu(sf_acts, rcu);
1556 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
1557 int attr_len, bool log)
1560 struct sw_flow_actions *acts;
1561 int new_acts_size;
1562 int req_size = NLA_ALIGN(attr_len);
1563 int next_offset = offsetof(struct sw_flow_actions, actions) +
1564 (*sfa)->actions_len;
1566 if (req_size <= (ksize(*sfa) - next_offset))
1567 goto out;
1569 new_acts_size = ksize(*sfa) * 2;
1571 if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
1572 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
1573 return ERR_PTR(-EMSGSIZE);
1574 new_acts_size = MAX_ACTIONS_BUFSIZE;
1577 acts = nla_alloc_flow_actions(new_acts_size, log);
1578 if (IS_ERR(acts))
1579 return (void *)acts;
1581 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
1582 acts->actions_len = (*sfa)->actions_len;
1583 kfree(*sfa);
1584 *sfa = acts;
1586 out:
1587 (*sfa)->actions_len += req_size;
1588 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
1591 static struct nlattr *__add_action(struct sw_flow_actions **sfa,
1592 int attrtype, void *data, int len, bool log)
1594 struct nlattr *a;
1596 a = reserve_sfa_size(sfa, nla_attr_size(len), log);
1597 if (IS_ERR(a))
1598 return a;
1600 a->nla_type = attrtype;
1601 a->nla_len = nla_attr_size(len);
1603 if (data)
1604 memcpy(nla_data(a), data, len);
1605 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
1607 return a;
1610 static int add_action(struct sw_flow_actions **sfa, int attrtype,
1611 void *data, int len, bool log)
1613 struct nlattr *a;
1615 a = __add_action(sfa, attrtype, data, len, log);
1617 return PTR_ERR_OR_ZERO(a);
1620 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
1621 int attrtype, bool log)
1623 int used = (*sfa)->actions_len;
1624 int err;
1626 err = add_action(sfa, attrtype, NULL, 0, log);
1627 if (err)
1628 return err;
1630 return used;
1633 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1634 int st_offset)
1636 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
1637 st_offset);
1639 a->nla_len = sfa->actions_len - st_offset;
1642 static int __ovs_nla_copy_actions(const struct nlattr *attr,
1643 const struct sw_flow_key *key,
1644 int depth, struct sw_flow_actions **sfa,
1645 __be16 eth_type, __be16 vlan_tci, bool log);
1647 static int validate_and_copy_sample(const struct nlattr *attr,
1648 const struct sw_flow_key *key, int depth,
1649 struct sw_flow_actions **sfa,
1650 __be16 eth_type, __be16 vlan_tci, bool log)
1652 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
1653 const struct nlattr *probability, *actions;
1654 const struct nlattr *a;
1655 int rem, start, err, st_acts;
1657 memset(attrs, 0, sizeof(attrs));
1658 nla_for_each_nested(a, attr, rem) {
1659 int type = nla_type(a);
1660 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
1661 return -EINVAL;
1662 attrs[type] = a;
1664 if (rem)
1665 return -EINVAL;
1667 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
1668 if (!probability || nla_len(probability) != sizeof(u32))
1669 return -EINVAL;
1671 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
1672 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
1673 return -EINVAL;
1675 /* validation done, copy sample action. */
1676 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
1677 if (start < 0)
1678 return start;
1679 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
1680 nla_data(probability), sizeof(u32), log);
1681 if (err)
1682 return err;
1683 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
1684 if (st_acts < 0)
1685 return st_acts;
1687 err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
1688 eth_type, vlan_tci, log);
1689 if (err)
1690 return err;
1692 add_nested_action_end(*sfa, st_acts);
1693 add_nested_action_end(*sfa, start);
1695 return 0;
1698 void ovs_match_init(struct sw_flow_match *match,
1699 struct sw_flow_key *key,
1700 struct sw_flow_mask *mask)
1702 memset(match, 0, sizeof(*match));
1703 match->key = key;
1704 match->mask = mask;
1706 memset(key, 0, sizeof(*key));
1708 if (mask) {
1709 memset(&mask->key, 0, sizeof(mask->key));
1710 mask->range.start = mask->range.end = 0;
1714 static int validate_geneve_opts(struct sw_flow_key *key)
1716 struct geneve_opt *option;
1717 int opts_len = key->tun_opts_len;
1718 bool crit_opt = false;
1720 option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
1721 while (opts_len > 0) {
1722 int len;
1724 if (opts_len < sizeof(*option))
1725 return -EINVAL;
1727 len = sizeof(*option) + option->length * 4;
1728 if (len > opts_len)
1729 return -EINVAL;
1731 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
1733 option = (struct geneve_opt *)((u8 *)option + len);
1734 opts_len -= len;
1737 key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
1739 return 0;
1742 static int validate_and_copy_set_tun(const struct nlattr *attr,
1743 struct sw_flow_actions **sfa, bool log)
1745 struct sw_flow_match match;
1746 struct sw_flow_key key;
1747 struct ovs_tunnel_info *tun_info;
1748 struct nlattr *a;
1749 int err = 0, start, opts_type;
1751 ovs_match_init(&match, &key, NULL);
1752 opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
1753 if (opts_type < 0)
1754 return opts_type;
1756 if (key.tun_opts_len) {
1757 switch (opts_type) {
1758 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
1759 err = validate_geneve_opts(&key);
1760 if (err < 0)
1761 return err;
1762 break;
1763 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
1764 break;
1768 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
1769 if (start < 0)
1770 return start;
1772 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
1773 sizeof(*tun_info) + key.tun_opts_len, log);
1774 if (IS_ERR(a))
1775 return PTR_ERR(a);
1777 tun_info = nla_data(a);
1778 tun_info->tunnel = key.tun_key;
1779 tun_info->options_len = key.tun_opts_len;
1781 if (tun_info->options_len) {
1782 /* We need to store the options in the action itself since
1783 * everything else will go away after flow setup. We can append
1784 * it to tun_info and then point there.
1786 memcpy((tun_info + 1),
1787 TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len);
1788 tun_info->options = (tun_info + 1);
1789 } else {
1790 tun_info->options = NULL;
1793 add_nested_action_end(*sfa, start);
1795 return err;
1798 /* Return false if there are any non-masked bits set.
1799 * Mask follows data immediately, before any netlink padding.
1801 static bool validate_masked(u8 *data, int len)
1803 u8 *mask = data + len;
1805 while (len--)
1806 if (*data++ & ~*mask++)
1807 return false;
1809 return true;
1812 static int validate_set(const struct nlattr *a,
1813 const struct sw_flow_key *flow_key,
1814 struct sw_flow_actions **sfa,
1815 bool *skip_copy, __be16 eth_type, bool masked, bool log)
1817 const struct nlattr *ovs_key = nla_data(a);
1818 int key_type = nla_type(ovs_key);
1819 size_t key_len;
1821 /* There can be only one key in a action */
1822 if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
1823 return -EINVAL;
1825 key_len = nla_len(ovs_key);
1826 if (masked)
1827 key_len /= 2;
1829 if (key_type > OVS_KEY_ATTR_MAX ||
1830 (ovs_key_lens[key_type].len != key_len &&
1831 ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
1832 return -EINVAL;
1834 if (masked && !validate_masked(nla_data(ovs_key), key_len))
1835 return -EINVAL;
1837 switch (key_type) {
1838 const struct ovs_key_ipv4 *ipv4_key;
1839 const struct ovs_key_ipv6 *ipv6_key;
1840 int err;
1842 case OVS_KEY_ATTR_PRIORITY:
1843 case OVS_KEY_ATTR_SKB_MARK:
1844 case OVS_KEY_ATTR_ETHERNET:
1845 break;
1847 case OVS_KEY_ATTR_TUNNEL:
1848 if (eth_p_mpls(eth_type))
1849 return -EINVAL;
1851 if (masked)
1852 return -EINVAL; /* Masked tunnel set not supported. */
1854 *skip_copy = true;
1855 err = validate_and_copy_set_tun(a, sfa, log);
1856 if (err)
1857 return err;
1858 break;
1860 case OVS_KEY_ATTR_IPV4:
1861 if (eth_type != htons(ETH_P_IP))
1862 return -EINVAL;
1864 ipv4_key = nla_data(ovs_key);
1866 if (masked) {
1867 const struct ovs_key_ipv4 *mask = ipv4_key + 1;
1869 /* Non-writeable fields. */
1870 if (mask->ipv4_proto || mask->ipv4_frag)
1871 return -EINVAL;
1872 } else {
1873 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
1874 return -EINVAL;
1876 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
1877 return -EINVAL;
1879 break;
1881 case OVS_KEY_ATTR_IPV6:
1882 if (eth_type != htons(ETH_P_IPV6))
1883 return -EINVAL;
1885 ipv6_key = nla_data(ovs_key);
1887 if (masked) {
1888 const struct ovs_key_ipv6 *mask = ipv6_key + 1;
1890 /* Non-writeable fields. */
1891 if (mask->ipv6_proto || mask->ipv6_frag)
1892 return -EINVAL;
1894 /* Invalid bits in the flow label mask? */
1895 if (ntohl(mask->ipv6_label) & 0xFFF00000)
1896 return -EINVAL;
1897 } else {
1898 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
1899 return -EINVAL;
1901 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
1902 return -EINVAL;
1904 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
1905 return -EINVAL;
1907 break;
1909 case OVS_KEY_ATTR_TCP:
1910 if ((eth_type != htons(ETH_P_IP) &&
1911 eth_type != htons(ETH_P_IPV6)) ||
1912 flow_key->ip.proto != IPPROTO_TCP)
1913 return -EINVAL;
1915 break;
1917 case OVS_KEY_ATTR_UDP:
1918 if ((eth_type != htons(ETH_P_IP) &&
1919 eth_type != htons(ETH_P_IPV6)) ||
1920 flow_key->ip.proto != IPPROTO_UDP)
1921 return -EINVAL;
1923 break;
1925 case OVS_KEY_ATTR_MPLS:
1926 if (!eth_p_mpls(eth_type))
1927 return -EINVAL;
1928 break;
1930 case OVS_KEY_ATTR_SCTP:
1931 if ((eth_type != htons(ETH_P_IP) &&
1932 eth_type != htons(ETH_P_IPV6)) ||
1933 flow_key->ip.proto != IPPROTO_SCTP)
1934 return -EINVAL;
1936 break;
1938 default:
1939 return -EINVAL;
1942 /* Convert non-masked non-tunnel set actions to masked set actions. */
1943 if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
1944 int start, len = key_len * 2;
1945 struct nlattr *at;
1947 *skip_copy = true;
1949 start = add_nested_action_start(sfa,
1950 OVS_ACTION_ATTR_SET_TO_MASKED,
1951 log);
1952 if (start < 0)
1953 return start;
1955 at = __add_action(sfa, key_type, NULL, len, log);
1956 if (IS_ERR(at))
1957 return PTR_ERR(at);
1959 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
1960 memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */
1961 /* Clear non-writeable bits from otherwise writeable fields. */
1962 if (key_type == OVS_KEY_ATTR_IPV6) {
1963 struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
1965 mask->ipv6_label &= htonl(0x000FFFFF);
1967 add_nested_action_end(*sfa, start);
1970 return 0;
1973 static int validate_userspace(const struct nlattr *attr)
1975 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
1976 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
1977 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
1978 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
1980 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
1981 int error;
1983 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
1984 attr, userspace_policy);
1985 if (error)
1986 return error;
1988 if (!a[OVS_USERSPACE_ATTR_PID] ||
1989 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
1990 return -EINVAL;
1992 return 0;
1995 static int copy_action(const struct nlattr *from,
1996 struct sw_flow_actions **sfa, bool log)
1998 int totlen = NLA_ALIGN(from->nla_len);
1999 struct nlattr *to;
2001 to = reserve_sfa_size(sfa, from->nla_len, log);
2002 if (IS_ERR(to))
2003 return PTR_ERR(to);
2005 memcpy(to, from, totlen);
2006 return 0;
2009 static int __ovs_nla_copy_actions(const struct nlattr *attr,
2010 const struct sw_flow_key *key,
2011 int depth, struct sw_flow_actions **sfa,
2012 __be16 eth_type, __be16 vlan_tci, bool log)
2014 const struct nlattr *a;
2015 int rem, err;
2017 if (depth >= SAMPLE_ACTION_DEPTH)
2018 return -EOVERFLOW;
2020 nla_for_each_nested(a, attr, rem) {
2021 /* Expected argument lengths, (u32)-1 for variable length. */
2022 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
2023 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
2024 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
2025 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
2026 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
2027 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
2028 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
2029 [OVS_ACTION_ATTR_POP_VLAN] = 0,
2030 [OVS_ACTION_ATTR_SET] = (u32)-1,
2031 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
2032 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2033 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
2035 const struct ovs_action_push_vlan *vlan;
2036 int type = nla_type(a);
2037 bool skip_copy;
2039 if (type > OVS_ACTION_ATTR_MAX ||
2040 (action_lens[type] != nla_len(a) &&
2041 action_lens[type] != (u32)-1))
2042 return -EINVAL;
2044 skip_copy = false;
2045 switch (type) {
2046 case OVS_ACTION_ATTR_UNSPEC:
2047 return -EINVAL;
2049 case OVS_ACTION_ATTR_USERSPACE:
2050 err = validate_userspace(a);
2051 if (err)
2052 return err;
2053 break;
2055 case OVS_ACTION_ATTR_OUTPUT:
2056 if (nla_get_u32(a) >= DP_MAX_PORTS)
2057 return -EINVAL;
2058 break;
2060 case OVS_ACTION_ATTR_HASH: {
2061 const struct ovs_action_hash *act_hash = nla_data(a);
2063 switch (act_hash->hash_alg) {
2064 case OVS_HASH_ALG_L4:
2065 break;
2066 default:
2067 return -EINVAL;
2070 break;
2073 case OVS_ACTION_ATTR_POP_VLAN:
2074 vlan_tci = htons(0);
2075 break;
2077 case OVS_ACTION_ATTR_PUSH_VLAN:
2078 vlan = nla_data(a);
2079 if (vlan->vlan_tpid != htons(ETH_P_8021Q))
2080 return -EINVAL;
2081 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
2082 return -EINVAL;
2083 vlan_tci = vlan->vlan_tci;
2084 break;
2086 case OVS_ACTION_ATTR_RECIRC:
2087 break;
2089 case OVS_ACTION_ATTR_PUSH_MPLS: {
2090 const struct ovs_action_push_mpls *mpls = nla_data(a);
2092 if (!eth_p_mpls(mpls->mpls_ethertype))
2093 return -EINVAL;
2094 /* Prohibit push MPLS other than to a white list
2095 * for packets that have a known tag order.
2097 if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2098 (eth_type != htons(ETH_P_IP) &&
2099 eth_type != htons(ETH_P_IPV6) &&
2100 eth_type != htons(ETH_P_ARP) &&
2101 eth_type != htons(ETH_P_RARP) &&
2102 !eth_p_mpls(eth_type)))
2103 return -EINVAL;
2104 eth_type = mpls->mpls_ethertype;
2105 break;
2108 case OVS_ACTION_ATTR_POP_MPLS:
2109 if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2110 !eth_p_mpls(eth_type))
2111 return -EINVAL;
2113 /* Disallow subsequent L2.5+ set and mpls_pop actions
2114 * as there is no check here to ensure that the new
2115 * eth_type is valid and thus set actions could
2116 * write off the end of the packet or otherwise
2117 * corrupt it.
2119 * Support for these actions is planned using packet
2120 * recirculation.
2122 eth_type = htons(0);
2123 break;
2125 case OVS_ACTION_ATTR_SET:
2126 err = validate_set(a, key, sfa,
2127 &skip_copy, eth_type, false, log);
2128 if (err)
2129 return err;
2130 break;
2132 case OVS_ACTION_ATTR_SET_MASKED:
2133 err = validate_set(a, key, sfa,
2134 &skip_copy, eth_type, true, log);
2135 if (err)
2136 return err;
2137 break;
2139 case OVS_ACTION_ATTR_SAMPLE:
2140 err = validate_and_copy_sample(a, key, depth, sfa,
2141 eth_type, vlan_tci, log);
2142 if (err)
2143 return err;
2144 skip_copy = true;
2145 break;
2147 default:
2148 OVS_NLERR(log, "Unknown Action type %d", type);
2149 return -EINVAL;
2151 if (!skip_copy) {
2152 err = copy_action(a, sfa, log);
2153 if (err)
2154 return err;
2158 if (rem > 0)
2159 return -EINVAL;
2161 return 0;
2164 /* 'key' must be the masked key. */
2165 int ovs_nla_copy_actions(const struct nlattr *attr,
2166 const struct sw_flow_key *key,
2167 struct sw_flow_actions **sfa, bool log)
2169 int err;
2171 *sfa = nla_alloc_flow_actions(nla_len(attr), log);
2172 if (IS_ERR(*sfa))
2173 return PTR_ERR(*sfa);
2175 err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
2176 key->eth.tci, log);
2177 if (err)
2178 kfree(*sfa);
2180 return err;
2183 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
2185 const struct nlattr *a;
2186 struct nlattr *start;
2187 int err = 0, rem;
2189 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
2190 if (!start)
2191 return -EMSGSIZE;
2193 nla_for_each_nested(a, attr, rem) {
2194 int type = nla_type(a);
2195 struct nlattr *st_sample;
2197 switch (type) {
2198 case OVS_SAMPLE_ATTR_PROBABILITY:
2199 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
2200 sizeof(u32), nla_data(a)))
2201 return -EMSGSIZE;
2202 break;
2203 case OVS_SAMPLE_ATTR_ACTIONS:
2204 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
2205 if (!st_sample)
2206 return -EMSGSIZE;
2207 err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
2208 if (err)
2209 return err;
2210 nla_nest_end(skb, st_sample);
2211 break;
2215 nla_nest_end(skb, start);
2216 return err;
2219 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
2221 const struct nlattr *ovs_key = nla_data(a);
2222 int key_type = nla_type(ovs_key);
2223 struct nlattr *start;
2224 int err;
2226 switch (key_type) {
2227 case OVS_KEY_ATTR_TUNNEL_INFO: {
2228 struct ovs_tunnel_info *tun_info = nla_data(ovs_key);
2230 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
2231 if (!start)
2232 return -EMSGSIZE;
2234 err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
2235 tun_info->options_len ?
2236 tun_info->options : NULL,
2237 tun_info->options_len);
2238 if (err)
2239 return err;
2240 nla_nest_end(skb, start);
2241 break;
2243 default:
2244 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
2245 return -EMSGSIZE;
2246 break;
2249 return 0;
2252 static int masked_set_action_to_set_action_attr(const struct nlattr *a,
2253 struct sk_buff *skb)
2255 const struct nlattr *ovs_key = nla_data(a);
2256 size_t key_len = nla_len(ovs_key) / 2;
2258 /* Revert the conversion we did from a non-masked set action to
2259 * masked set action.
2261 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key))
2262 return -EMSGSIZE;
2264 return 0;
2267 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
2269 const struct nlattr *a;
2270 int rem, err;
2272 nla_for_each_attr(a, attr, len, rem) {
2273 int type = nla_type(a);
2275 switch (type) {
2276 case OVS_ACTION_ATTR_SET:
2277 err = set_action_to_attr(a, skb);
2278 if (err)
2279 return err;
2280 break;
2282 case OVS_ACTION_ATTR_SET_TO_MASKED:
2283 err = masked_set_action_to_set_action_attr(a, skb);
2284 if (err)
2285 return err;
2286 break;
2288 case OVS_ACTION_ATTR_SAMPLE:
2289 err = sample_action_to_attr(a, skb);
2290 if (err)
2291 return err;
2292 break;
2293 default:
2294 if (nla_put(skb, type, nla_len(a), nla_data(a)))
2295 return -EMSGSIZE;
2296 break;
2300 return 0;