1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2007-2014 Nicira, Inc.
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <linux/init.h>
9 #include <linux/module.h>
10 #include <linux/if_arp.h>
11 #include <linux/if_vlan.h>
14 #include <linux/jhash.h>
15 #include <linux/delay.h>
16 #include <linux/time.h>
17 #include <linux/etherdevice.h>
18 #include <linux/genetlink.h>
19 #include <linux/kernel.h>
20 #include <linux/kthread.h>
21 #include <linux/mutex.h>
22 #include <linux/percpu.h>
23 #include <linux/rcupdate.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/ethtool.h>
27 #include <linux/wait.h>
28 #include <asm/div64.h>
29 #include <linux/highmem.h>
30 #include <linux/netfilter_bridge.h>
31 #include <linux/netfilter_ipv4.h>
32 #include <linux/inetdevice.h>
33 #include <linux/list.h>
34 #include <linux/openvswitch.h>
35 #include <linux/rculist.h>
36 #include <linux/dmi.h>
37 #include <net/genetlink.h>
38 #include <net/net_namespace.h>
39 #include <net/netns/generic.h>
43 #include "flow_table.h"
44 #include "flow_netlink.h"
46 #include "vport-internal_dev.h"
47 #include "vport-netdev.h"
49 unsigned int ovs_net_id __read_mostly
;
51 static struct genl_family dp_packet_genl_family
;
52 static struct genl_family dp_flow_genl_family
;
53 static struct genl_family dp_datapath_genl_family
;
55 static const struct nla_policy flow_policy
[];
57 static const struct genl_multicast_group ovs_dp_flow_multicast_group
= {
58 .name
= OVS_FLOW_MCGROUP
,
61 static const struct genl_multicast_group ovs_dp_datapath_multicast_group
= {
62 .name
= OVS_DATAPATH_MCGROUP
,
65 static const struct genl_multicast_group ovs_dp_vport_multicast_group
= {
66 .name
= OVS_VPORT_MCGROUP
,
69 /* Check if need to build a reply message.
70 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
71 static bool ovs_must_notify(struct genl_family
*family
, struct genl_info
*info
,
74 return info
->nlhdr
->nlmsg_flags
& NLM_F_ECHO
||
75 genl_has_listeners(family
, genl_info_net(info
), group
);
78 static void ovs_notify(struct genl_family
*family
,
79 struct sk_buff
*skb
, struct genl_info
*info
)
81 genl_notify(family
, skb
, info
, 0, GFP_KERNEL
);
87 * All writes e.g. Writes to device state (add/remove datapath, port, set
88 * operations on vports, etc.), Writes to other state (flow table
89 * modifications, set miscellaneous datapath parameters, etc.) are protected
92 * Reads are protected by RCU.
94 * There are a few special cases (mostly stats) that have their own
95 * synchronization but they nest under all of above and don't interact with
98 * The RTNL lock nests inside ovs_mutex.
101 static DEFINE_MUTEX(ovs_mutex
);
105 mutex_lock(&ovs_mutex
);
108 void ovs_unlock(void)
110 mutex_unlock(&ovs_mutex
);
113 #ifdef CONFIG_LOCKDEP
114 int lockdep_ovsl_is_held(void)
117 return lockdep_is_held(&ovs_mutex
);
123 static struct vport
*new_vport(const struct vport_parms
*);
124 static int queue_gso_packets(struct datapath
*dp
, struct sk_buff
*,
125 const struct sw_flow_key
*,
126 const struct dp_upcall_info
*,
128 static int queue_userspace_packet(struct datapath
*dp
, struct sk_buff
*,
129 const struct sw_flow_key
*,
130 const struct dp_upcall_info
*,
133 static void ovs_dp_masks_rebalance(struct work_struct
*work
);
135 /* Must be called with rcu_read_lock or ovs_mutex. */
136 const char *ovs_dp_name(const struct datapath
*dp
)
138 struct vport
*vport
= ovs_vport_ovsl_rcu(dp
, OVSP_LOCAL
);
139 return ovs_vport_name(vport
);
142 static int get_dpifindex(const struct datapath
*dp
)
149 local
= ovs_vport_rcu(dp
, OVSP_LOCAL
);
151 ifindex
= local
->dev
->ifindex
;
160 static void destroy_dp_rcu(struct rcu_head
*rcu
)
162 struct datapath
*dp
= container_of(rcu
, struct datapath
, rcu
);
164 ovs_flow_tbl_destroy(&dp
->table
);
165 free_percpu(dp
->stats_percpu
);
171 static struct hlist_head
*vport_hash_bucket(const struct datapath
*dp
,
174 return &dp
->ports
[port_no
& (DP_VPORT_HASH_BUCKETS
- 1)];
177 /* Called with ovs_mutex or RCU read lock. */
178 struct vport
*ovs_lookup_vport(const struct datapath
*dp
, u16 port_no
)
181 struct hlist_head
*head
;
183 head
= vport_hash_bucket(dp
, port_no
);
184 hlist_for_each_entry_rcu(vport
, head
, dp_hash_node
,
185 lockdep_ovsl_is_held()) {
186 if (vport
->port_no
== port_no
)
192 /* Called with ovs_mutex. */
193 static struct vport
*new_vport(const struct vport_parms
*parms
)
197 vport
= ovs_vport_add(parms
);
198 if (!IS_ERR(vport
)) {
199 struct datapath
*dp
= parms
->dp
;
200 struct hlist_head
*head
= vport_hash_bucket(dp
, vport
->port_no
);
202 hlist_add_head_rcu(&vport
->dp_hash_node
, head
);
207 void ovs_dp_detach_port(struct vport
*p
)
211 /* First drop references to device. */
212 hlist_del_rcu(&p
->dp_hash_node
);
214 /* Then destroy it. */
218 /* Must be called with rcu_read_lock. */
219 void ovs_dp_process_packet(struct sk_buff
*skb
, struct sw_flow_key
*key
)
221 const struct vport
*p
= OVS_CB(skb
)->input_vport
;
222 struct datapath
*dp
= p
->dp
;
223 struct sw_flow
*flow
;
224 struct sw_flow_actions
*sf_acts
;
225 struct dp_stats_percpu
*stats
;
231 stats
= this_cpu_ptr(dp
->stats_percpu
);
234 flow
= ovs_flow_tbl_lookup_stats(&dp
->table
, key
, skb_get_hash(skb
),
235 &n_mask_hit
, &n_cache_hit
);
236 if (unlikely(!flow
)) {
237 struct dp_upcall_info upcall
;
239 memset(&upcall
, 0, sizeof(upcall
));
240 upcall
.cmd
= OVS_PACKET_CMD_MISS
;
241 upcall
.portid
= ovs_vport_find_upcall_portid(p
, skb
);
242 upcall
.mru
= OVS_CB(skb
)->mru
;
243 error
= ovs_dp_upcall(dp
, skb
, key
, &upcall
, 0);
248 stats_counter
= &stats
->n_missed
;
252 ovs_flow_stats_update(flow
, key
->tp
.flags
, skb
);
253 sf_acts
= rcu_dereference(flow
->sf_acts
);
254 error
= ovs_execute_actions(dp
, skb
, sf_acts
, key
);
256 net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
257 ovs_dp_name(dp
), error
);
259 stats_counter
= &stats
->n_hit
;
262 /* Update datapath statistics. */
263 u64_stats_update_begin(&stats
->syncp
);
265 stats
->n_mask_hit
+= n_mask_hit
;
266 stats
->n_cache_hit
+= n_cache_hit
;
267 u64_stats_update_end(&stats
->syncp
);
270 int ovs_dp_upcall(struct datapath
*dp
, struct sk_buff
*skb
,
271 const struct sw_flow_key
*key
,
272 const struct dp_upcall_info
*upcall_info
,
275 struct dp_stats_percpu
*stats
;
278 if (upcall_info
->portid
== 0) {
283 if (!skb_is_gso(skb
))
284 err
= queue_userspace_packet(dp
, skb
, key
, upcall_info
, cutlen
);
286 err
= queue_gso_packets(dp
, skb
, key
, upcall_info
, cutlen
);
293 stats
= this_cpu_ptr(dp
->stats_percpu
);
295 u64_stats_update_begin(&stats
->syncp
);
297 u64_stats_update_end(&stats
->syncp
);
302 static int queue_gso_packets(struct datapath
*dp
, struct sk_buff
*skb
,
303 const struct sw_flow_key
*key
,
304 const struct dp_upcall_info
*upcall_info
,
307 unsigned int gso_type
= skb_shinfo(skb
)->gso_type
;
308 struct sw_flow_key later_key
;
309 struct sk_buff
*segs
, *nskb
;
312 BUILD_BUG_ON(sizeof(*OVS_CB(skb
)) > SKB_GSO_CB_OFFSET
);
313 segs
= __skb_gso_segment(skb
, NETIF_F_SG
, false);
315 return PTR_ERR(segs
);
319 if (gso_type
& SKB_GSO_UDP
) {
320 /* The initial flow key extracted by ovs_flow_key_extract()
321 * in this case is for a first fragment, so we need to
322 * properly mark later fragments.
325 later_key
.ip
.frag
= OVS_FRAG_TYPE_LATER
;
328 /* Queue all of the segments. */
329 skb_list_walk_safe(segs
, skb
, nskb
) {
330 if (gso_type
& SKB_GSO_UDP
&& skb
!= segs
)
333 err
= queue_userspace_packet(dp
, skb
, key
, upcall_info
, cutlen
);
339 /* Free all of the segments. */
340 skb_list_walk_safe(segs
, skb
, nskb
) {
349 static size_t upcall_msg_size(const struct dp_upcall_info
*upcall_info
,
350 unsigned int hdrlen
, int actions_attrlen
)
352 size_t size
= NLMSG_ALIGN(sizeof(struct ovs_header
))
353 + nla_total_size(hdrlen
) /* OVS_PACKET_ATTR_PACKET */
354 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
355 + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
356 + nla_total_size(sizeof(u64
)); /* OVS_PACKET_ATTR_HASH */
358 /* OVS_PACKET_ATTR_USERDATA */
359 if (upcall_info
->userdata
)
360 size
+= NLA_ALIGN(upcall_info
->userdata
->nla_len
);
362 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
363 if (upcall_info
->egress_tun_info
)
364 size
+= nla_total_size(ovs_tun_key_attr_size());
366 /* OVS_PACKET_ATTR_ACTIONS */
367 if (upcall_info
->actions_len
)
368 size
+= nla_total_size(actions_attrlen
);
370 /* OVS_PACKET_ATTR_MRU */
371 if (upcall_info
->mru
)
372 size
+= nla_total_size(sizeof(upcall_info
->mru
));
377 static void pad_packet(struct datapath
*dp
, struct sk_buff
*skb
)
379 if (!(dp
->user_features
& OVS_DP_F_UNALIGNED
)) {
380 size_t plen
= NLA_ALIGN(skb
->len
) - skb
->len
;
383 skb_put_zero(skb
, plen
);
387 static int queue_userspace_packet(struct datapath
*dp
, struct sk_buff
*skb
,
388 const struct sw_flow_key
*key
,
389 const struct dp_upcall_info
*upcall_info
,
392 struct ovs_header
*upcall
;
393 struct sk_buff
*nskb
= NULL
;
394 struct sk_buff
*user_skb
= NULL
; /* to be queued to userspace */
401 dp_ifindex
= get_dpifindex(dp
);
405 if (skb_vlan_tag_present(skb
)) {
406 nskb
= skb_clone(skb
, GFP_ATOMIC
);
410 nskb
= __vlan_hwaccel_push_inside(nskb
);
417 if (nla_attr_size(skb
->len
) > USHRT_MAX
) {
422 /* Complete checksum if needed */
423 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
424 (err
= skb_csum_hwoffload_help(skb
, 0)))
427 /* Older versions of OVS user space enforce alignment of the last
428 * Netlink attribute to NLA_ALIGNTO which would require extensive
429 * padding logic. Only perform zerocopy if padding is not required.
431 if (dp
->user_features
& OVS_DP_F_UNALIGNED
)
432 hlen
= skb_zerocopy_headlen(skb
);
436 len
= upcall_msg_size(upcall_info
, hlen
- cutlen
,
437 OVS_CB(skb
)->acts_origlen
);
438 user_skb
= genlmsg_new(len
, GFP_ATOMIC
);
444 upcall
= genlmsg_put(user_skb
, 0, 0, &dp_packet_genl_family
,
445 0, upcall_info
->cmd
);
450 upcall
->dp_ifindex
= dp_ifindex
;
452 err
= ovs_nla_put_key(key
, key
, OVS_PACKET_ATTR_KEY
, false, user_skb
);
456 if (upcall_info
->userdata
)
457 __nla_put(user_skb
, OVS_PACKET_ATTR_USERDATA
,
458 nla_len(upcall_info
->userdata
),
459 nla_data(upcall_info
->userdata
));
461 if (upcall_info
->egress_tun_info
) {
462 nla
= nla_nest_start_noflag(user_skb
,
463 OVS_PACKET_ATTR_EGRESS_TUN_KEY
);
468 err
= ovs_nla_put_tunnel_info(user_skb
,
469 upcall_info
->egress_tun_info
);
473 nla_nest_end(user_skb
, nla
);
476 if (upcall_info
->actions_len
) {
477 nla
= nla_nest_start_noflag(user_skb
, OVS_PACKET_ATTR_ACTIONS
);
482 err
= ovs_nla_put_actions(upcall_info
->actions
,
483 upcall_info
->actions_len
,
486 nla_nest_end(user_skb
, nla
);
488 nla_nest_cancel(user_skb
, nla
);
491 /* Add OVS_PACKET_ATTR_MRU */
492 if (upcall_info
->mru
&&
493 nla_put_u16(user_skb
, OVS_PACKET_ATTR_MRU
, upcall_info
->mru
)) {
498 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
500 nla_put_u32(user_skb
, OVS_PACKET_ATTR_LEN
, skb
->len
)) {
505 /* Add OVS_PACKET_ATTR_HASH */
506 hash
= skb_get_hash_raw(skb
);
508 hash
|= OVS_PACKET_HASH_SW_BIT
;
511 hash
|= OVS_PACKET_HASH_L4_BIT
;
513 if (nla_put(user_skb
, OVS_PACKET_ATTR_HASH
, sizeof (u64
), &hash
)) {
518 /* Only reserve room for attribute header, packet data is added
519 * in skb_zerocopy() */
520 if (!(nla
= nla_reserve(user_skb
, OVS_PACKET_ATTR_PACKET
, 0))) {
524 nla
->nla_len
= nla_attr_size(skb
->len
- cutlen
);
526 err
= skb_zerocopy(user_skb
, skb
, skb
->len
- cutlen
, hlen
);
530 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
531 pad_packet(dp
, user_skb
);
533 ((struct nlmsghdr
*) user_skb
->data
)->nlmsg_len
= user_skb
->len
;
535 err
= genlmsg_unicast(ovs_dp_get_net(dp
), user_skb
, upcall_info
->portid
);
545 static int ovs_packet_cmd_execute(struct sk_buff
*skb
, struct genl_info
*info
)
547 struct ovs_header
*ovs_header
= info
->userhdr
;
548 struct net
*net
= sock_net(skb
->sk
);
549 struct nlattr
**a
= info
->attrs
;
550 struct sw_flow_actions
*acts
;
551 struct sk_buff
*packet
;
552 struct sw_flow
*flow
;
553 struct sw_flow_actions
*sf_acts
;
555 struct vport
*input_vport
;
560 bool log
= !a
[OVS_PACKET_ATTR_PROBE
];
563 if (!a
[OVS_PACKET_ATTR_PACKET
] || !a
[OVS_PACKET_ATTR_KEY
] ||
564 !a
[OVS_PACKET_ATTR_ACTIONS
])
567 len
= nla_len(a
[OVS_PACKET_ATTR_PACKET
]);
568 packet
= __dev_alloc_skb(NET_IP_ALIGN
+ len
, GFP_KERNEL
);
572 skb_reserve(packet
, NET_IP_ALIGN
);
574 nla_memcpy(__skb_put(packet
, len
), a
[OVS_PACKET_ATTR_PACKET
], len
);
576 /* Set packet's mru */
577 if (a
[OVS_PACKET_ATTR_MRU
]) {
578 mru
= nla_get_u16(a
[OVS_PACKET_ATTR_MRU
]);
579 packet
->ignore_df
= 1;
581 OVS_CB(packet
)->mru
= mru
;
583 if (a
[OVS_PACKET_ATTR_HASH
]) {
584 hash
= nla_get_u64(a
[OVS_PACKET_ATTR_HASH
]);
586 __skb_set_hash(packet
, hash
& 0xFFFFFFFFULL
,
587 !!(hash
& OVS_PACKET_HASH_SW_BIT
),
588 !!(hash
& OVS_PACKET_HASH_L4_BIT
));
591 /* Build an sw_flow for sending this packet. */
592 flow
= ovs_flow_alloc();
597 err
= ovs_flow_key_extract_userspace(net
, a
[OVS_PACKET_ATTR_KEY
],
598 packet
, &flow
->key
, log
);
602 err
= ovs_nla_copy_actions(net
, a
[OVS_PACKET_ATTR_ACTIONS
],
603 &flow
->key
, &acts
, log
);
607 rcu_assign_pointer(flow
->sf_acts
, acts
);
608 packet
->priority
= flow
->key
.phy
.priority
;
609 packet
->mark
= flow
->key
.phy
.skb_mark
;
612 dp
= get_dp_rcu(net
, ovs_header
->dp_ifindex
);
617 input_vport
= ovs_vport_rcu(dp
, flow
->key
.phy
.in_port
);
619 input_vport
= ovs_vport_rcu(dp
, OVSP_LOCAL
);
624 packet
->dev
= input_vport
->dev
;
625 OVS_CB(packet
)->input_vport
= input_vport
;
626 sf_acts
= rcu_dereference(flow
->sf_acts
);
629 err
= ovs_execute_actions(dp
, packet
, sf_acts
, &flow
->key
);
633 ovs_flow_free(flow
, false);
639 ovs_flow_free(flow
, false);
646 static const struct nla_policy packet_policy
[OVS_PACKET_ATTR_MAX
+ 1] = {
647 [OVS_PACKET_ATTR_PACKET
] = { .len
= ETH_HLEN
},
648 [OVS_PACKET_ATTR_KEY
] = { .type
= NLA_NESTED
},
649 [OVS_PACKET_ATTR_ACTIONS
] = { .type
= NLA_NESTED
},
650 [OVS_PACKET_ATTR_PROBE
] = { .type
= NLA_FLAG
},
651 [OVS_PACKET_ATTR_MRU
] = { .type
= NLA_U16
},
652 [OVS_PACKET_ATTR_HASH
] = { .type
= NLA_U64
},
655 static const struct genl_small_ops dp_packet_genl_ops
[] = {
656 { .cmd
= OVS_PACKET_CMD_EXECUTE
,
657 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
658 .flags
= GENL_UNS_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
659 .doit
= ovs_packet_cmd_execute
663 static struct genl_family dp_packet_genl_family __ro_after_init
= {
664 .hdrsize
= sizeof(struct ovs_header
),
665 .name
= OVS_PACKET_FAMILY
,
666 .version
= OVS_PACKET_VERSION
,
667 .maxattr
= OVS_PACKET_ATTR_MAX
,
668 .policy
= packet_policy
,
670 .parallel_ops
= true,
671 .small_ops
= dp_packet_genl_ops
,
672 .n_small_ops
= ARRAY_SIZE(dp_packet_genl_ops
),
673 .module
= THIS_MODULE
,
676 static void get_dp_stats(const struct datapath
*dp
, struct ovs_dp_stats
*stats
,
677 struct ovs_dp_megaflow_stats
*mega_stats
)
681 memset(mega_stats
, 0, sizeof(*mega_stats
));
683 stats
->n_flows
= ovs_flow_tbl_count(&dp
->table
);
684 mega_stats
->n_masks
= ovs_flow_tbl_num_masks(&dp
->table
);
686 stats
->n_hit
= stats
->n_missed
= stats
->n_lost
= 0;
688 for_each_possible_cpu(i
) {
689 const struct dp_stats_percpu
*percpu_stats
;
690 struct dp_stats_percpu local_stats
;
693 percpu_stats
= per_cpu_ptr(dp
->stats_percpu
, i
);
696 start
= u64_stats_fetch_begin_irq(&percpu_stats
->syncp
);
697 local_stats
= *percpu_stats
;
698 } while (u64_stats_fetch_retry_irq(&percpu_stats
->syncp
, start
));
700 stats
->n_hit
+= local_stats
.n_hit
;
701 stats
->n_missed
+= local_stats
.n_missed
;
702 stats
->n_lost
+= local_stats
.n_lost
;
703 mega_stats
->n_mask_hit
+= local_stats
.n_mask_hit
;
704 mega_stats
->n_cache_hit
+= local_stats
.n_cache_hit
;
708 static bool should_fill_key(const struct sw_flow_id
*sfid
, uint32_t ufid_flags
)
710 return ovs_identifier_is_ufid(sfid
) &&
711 !(ufid_flags
& OVS_UFID_F_OMIT_KEY
);
714 static bool should_fill_mask(uint32_t ufid_flags
)
716 return !(ufid_flags
& OVS_UFID_F_OMIT_MASK
);
719 static bool should_fill_actions(uint32_t ufid_flags
)
721 return !(ufid_flags
& OVS_UFID_F_OMIT_ACTIONS
);
724 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions
*acts
,
725 const struct sw_flow_id
*sfid
,
728 size_t len
= NLMSG_ALIGN(sizeof(struct ovs_header
));
730 /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
731 * see ovs_nla_put_identifier()
733 if (sfid
&& ovs_identifier_is_ufid(sfid
))
734 len
+= nla_total_size(sfid
->ufid_len
);
736 len
+= nla_total_size(ovs_key_attr_size());
738 /* OVS_FLOW_ATTR_KEY */
739 if (!sfid
|| should_fill_key(sfid
, ufid_flags
))
740 len
+= nla_total_size(ovs_key_attr_size());
742 /* OVS_FLOW_ATTR_MASK */
743 if (should_fill_mask(ufid_flags
))
744 len
+= nla_total_size(ovs_key_attr_size());
746 /* OVS_FLOW_ATTR_ACTIONS */
747 if (should_fill_actions(ufid_flags
))
748 len
+= nla_total_size(acts
->orig_len
);
751 + nla_total_size_64bit(sizeof(struct ovs_flow_stats
)) /* OVS_FLOW_ATTR_STATS */
752 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
753 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
756 /* Called with ovs_mutex or RCU read lock. */
757 static int ovs_flow_cmd_fill_stats(const struct sw_flow
*flow
,
760 struct ovs_flow_stats stats
;
764 ovs_flow_stats_get(flow
, &stats
, &used
, &tcp_flags
);
767 nla_put_u64_64bit(skb
, OVS_FLOW_ATTR_USED
, ovs_flow_used_time(used
),
771 if (stats
.n_packets
&&
772 nla_put_64bit(skb
, OVS_FLOW_ATTR_STATS
,
773 sizeof(struct ovs_flow_stats
), &stats
,
777 if ((u8
)ntohs(tcp_flags
) &&
778 nla_put_u8(skb
, OVS_FLOW_ATTR_TCP_FLAGS
, (u8
)ntohs(tcp_flags
)))
784 /* Called with ovs_mutex or RCU read lock. */
785 static int ovs_flow_cmd_fill_actions(const struct sw_flow
*flow
,
786 struct sk_buff
*skb
, int skb_orig_len
)
788 struct nlattr
*start
;
791 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
792 * this is the first flow to be dumped into 'skb'. This is unusual for
793 * Netlink but individual action lists can be longer than
794 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
795 * The userspace caller can always fetch the actions separately if it
796 * really wants them. (Most userspace callers in fact don't care.)
798 * This can only fail for dump operations because the skb is always
799 * properly sized for single flows.
801 start
= nla_nest_start_noflag(skb
, OVS_FLOW_ATTR_ACTIONS
);
803 const struct sw_flow_actions
*sf_acts
;
805 sf_acts
= rcu_dereference_ovsl(flow
->sf_acts
);
806 err
= ovs_nla_put_actions(sf_acts
->actions
,
807 sf_acts
->actions_len
, skb
);
810 nla_nest_end(skb
, start
);
815 nla_nest_cancel(skb
, start
);
817 } else if (skb_orig_len
) {
824 /* Called with ovs_mutex or RCU read lock. */
825 static int ovs_flow_cmd_fill_info(const struct sw_flow
*flow
, int dp_ifindex
,
826 struct sk_buff
*skb
, u32 portid
,
827 u32 seq
, u32 flags
, u8 cmd
, u32 ufid_flags
)
829 const int skb_orig_len
= skb
->len
;
830 struct ovs_header
*ovs_header
;
833 ovs_header
= genlmsg_put(skb
, portid
, seq
, &dp_flow_genl_family
,
838 ovs_header
->dp_ifindex
= dp_ifindex
;
840 err
= ovs_nla_put_identifier(flow
, skb
);
844 if (should_fill_key(&flow
->id
, ufid_flags
)) {
845 err
= ovs_nla_put_masked_key(flow
, skb
);
850 if (should_fill_mask(ufid_flags
)) {
851 err
= ovs_nla_put_mask(flow
, skb
);
856 err
= ovs_flow_cmd_fill_stats(flow
, skb
);
860 if (should_fill_actions(ufid_flags
)) {
861 err
= ovs_flow_cmd_fill_actions(flow
, skb
, skb_orig_len
);
866 genlmsg_end(skb
, ovs_header
);
870 genlmsg_cancel(skb
, ovs_header
);
874 /* May not be called with RCU read lock. */
875 static struct sk_buff
*ovs_flow_cmd_alloc_info(const struct sw_flow_actions
*acts
,
876 const struct sw_flow_id
*sfid
,
877 struct genl_info
*info
,
884 if (!always
&& !ovs_must_notify(&dp_flow_genl_family
, info
, 0))
887 len
= ovs_flow_cmd_msg_size(acts
, sfid
, ufid_flags
);
888 skb
= genlmsg_new(len
, GFP_KERNEL
);
890 return ERR_PTR(-ENOMEM
);
895 /* Called with ovs_mutex. */
896 static struct sk_buff
*ovs_flow_cmd_build_info(const struct sw_flow
*flow
,
898 struct genl_info
*info
, u8 cmd
,
899 bool always
, u32 ufid_flags
)
904 skb
= ovs_flow_cmd_alloc_info(ovsl_dereference(flow
->sf_acts
),
905 &flow
->id
, info
, always
, ufid_flags
);
906 if (IS_ERR_OR_NULL(skb
))
909 retval
= ovs_flow_cmd_fill_info(flow
, dp_ifindex
, skb
,
910 info
->snd_portid
, info
->snd_seq
, 0,
912 if (WARN_ON_ONCE(retval
< 0)) {
914 skb
= ERR_PTR(retval
);
919 static int ovs_flow_cmd_new(struct sk_buff
*skb
, struct genl_info
*info
)
921 struct net
*net
= sock_net(skb
->sk
);
922 struct nlattr
**a
= info
->attrs
;
923 struct ovs_header
*ovs_header
= info
->userhdr
;
924 struct sw_flow
*flow
= NULL
, *new_flow
;
925 struct sw_flow_mask mask
;
926 struct sk_buff
*reply
;
928 struct sw_flow_actions
*acts
;
929 struct sw_flow_match match
;
930 u32 ufid_flags
= ovs_nla_get_ufid_flags(a
[OVS_FLOW_ATTR_UFID_FLAGS
]);
932 bool log
= !a
[OVS_FLOW_ATTR_PROBE
];
934 /* Must have key and actions. */
936 if (!a
[OVS_FLOW_ATTR_KEY
]) {
937 OVS_NLERR(log
, "Flow key attr not present in new flow.");
940 if (!a
[OVS_FLOW_ATTR_ACTIONS
]) {
941 OVS_NLERR(log
, "Flow actions attr not present in new flow.");
945 /* Most of the time we need to allocate a new flow, do it before
948 new_flow
= ovs_flow_alloc();
949 if (IS_ERR(new_flow
)) {
950 error
= PTR_ERR(new_flow
);
955 ovs_match_init(&match
, &new_flow
->key
, false, &mask
);
956 error
= ovs_nla_get_match(net
, &match
, a
[OVS_FLOW_ATTR_KEY
],
957 a
[OVS_FLOW_ATTR_MASK
], log
);
961 /* Extract flow identifier. */
962 error
= ovs_nla_get_identifier(&new_flow
->id
, a
[OVS_FLOW_ATTR_UFID
],
963 &new_flow
->key
, log
);
967 /* unmasked key is needed to match when ufid is not used. */
968 if (ovs_identifier_is_key(&new_flow
->id
))
969 match
.key
= new_flow
->id
.unmasked_key
;
971 ovs_flow_mask_key(&new_flow
->key
, &new_flow
->key
, true, &mask
);
973 /* Validate actions. */
974 error
= ovs_nla_copy_actions(net
, a
[OVS_FLOW_ATTR_ACTIONS
],
975 &new_flow
->key
, &acts
, log
);
977 OVS_NLERR(log
, "Flow actions may not be safe on all matching packets.");
981 reply
= ovs_flow_cmd_alloc_info(acts
, &new_flow
->id
, info
, false,
984 error
= PTR_ERR(reply
);
989 dp
= get_dp(net
, ovs_header
->dp_ifindex
);
995 /* Check if this is a duplicate flow */
996 if (ovs_identifier_is_ufid(&new_flow
->id
))
997 flow
= ovs_flow_tbl_lookup_ufid(&dp
->table
, &new_flow
->id
);
999 flow
= ovs_flow_tbl_lookup(&dp
->table
, &new_flow
->key
);
1000 if (likely(!flow
)) {
1001 rcu_assign_pointer(new_flow
->sf_acts
, acts
);
1003 /* Put flow in bucket. */
1004 error
= ovs_flow_tbl_insert(&dp
->table
, new_flow
, &mask
);
1005 if (unlikely(error
)) {
1007 goto err_unlock_ovs
;
1010 if (unlikely(reply
)) {
1011 error
= ovs_flow_cmd_fill_info(new_flow
,
1012 ovs_header
->dp_ifindex
,
1013 reply
, info
->snd_portid
,
1021 struct sw_flow_actions
*old_acts
;
1023 /* Bail out if we're not allowed to modify an existing flow.
1024 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1025 * because Generic Netlink treats the latter as a dump
1026 * request. We also accept NLM_F_EXCL in case that bug ever
1029 if (unlikely(info
->nlhdr
->nlmsg_flags
& (NLM_F_CREATE
1032 goto err_unlock_ovs
;
1034 /* The flow identifier has to be the same for flow updates.
1035 * Look for any overlapping flow.
1037 if (unlikely(!ovs_flow_cmp(flow
, &match
))) {
1038 if (ovs_identifier_is_key(&flow
->id
))
1039 flow
= ovs_flow_tbl_lookup_exact(&dp
->table
,
1041 else /* UFID matches but key is different */
1045 goto err_unlock_ovs
;
1048 /* Update actions. */
1049 old_acts
= ovsl_dereference(flow
->sf_acts
);
1050 rcu_assign_pointer(flow
->sf_acts
, acts
);
1052 if (unlikely(reply
)) {
1053 error
= ovs_flow_cmd_fill_info(flow
,
1054 ovs_header
->dp_ifindex
,
1055 reply
, info
->snd_portid
,
1063 ovs_nla_free_flow_actions_rcu(old_acts
);
1064 ovs_flow_free(new_flow
, false);
1068 ovs_notify(&dp_flow_genl_family
, reply
, info
);
1075 ovs_nla_free_flow_actions(acts
);
1077 ovs_flow_free(new_flow
, false);
1082 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1083 static noinline_for_stack
1084 struct sw_flow_actions
*get_flow_actions(struct net
*net
,
1085 const struct nlattr
*a
,
1086 const struct sw_flow_key
*key
,
1087 const struct sw_flow_mask
*mask
,
1090 struct sw_flow_actions
*acts
;
1091 struct sw_flow_key masked_key
;
1094 ovs_flow_mask_key(&masked_key
, key
, true, mask
);
1095 error
= ovs_nla_copy_actions(net
, a
, &masked_key
, &acts
, log
);
1098 "Actions may not be safe on all matching packets");
1099 return ERR_PTR(error
);
1105 /* Factor out match-init and action-copy to avoid
1106 * "Wframe-larger-than=1024" warning. Because mask is only
1107 * used to get actions, we new a function to save some
1110 * If there are not key and action attrs, we return 0
1111 * directly. In the case, the caller will also not use the
1112 * match as before. If there is action attr, we try to get
1113 * actions and save them to *acts. Before returning from
1114 * the function, we reset the match->mask pointer. Because
1115 * we should not to return match object with dangling reference
1118 static noinline_for_stack
int
1119 ovs_nla_init_match_and_action(struct net
*net
,
1120 struct sw_flow_match
*match
,
1121 struct sw_flow_key
*key
,
1123 struct sw_flow_actions
**acts
,
1126 struct sw_flow_mask mask
;
1129 if (a
[OVS_FLOW_ATTR_KEY
]) {
1130 ovs_match_init(match
, key
, true, &mask
);
1131 error
= ovs_nla_get_match(net
, match
, a
[OVS_FLOW_ATTR_KEY
],
1132 a
[OVS_FLOW_ATTR_MASK
], log
);
1137 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
1138 if (!a
[OVS_FLOW_ATTR_KEY
]) {
1140 "Flow key attribute not present in set flow.");
1145 *acts
= get_flow_actions(net
, a
[OVS_FLOW_ATTR_ACTIONS
], key
,
1147 if (IS_ERR(*acts
)) {
1148 error
= PTR_ERR(*acts
);
1153 /* On success, error is 0. */
1159 static int ovs_flow_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
1161 struct net
*net
= sock_net(skb
->sk
);
1162 struct nlattr
**a
= info
->attrs
;
1163 struct ovs_header
*ovs_header
= info
->userhdr
;
1164 struct sw_flow_key key
;
1165 struct sw_flow
*flow
;
1166 struct sk_buff
*reply
= NULL
;
1167 struct datapath
*dp
;
1168 struct sw_flow_actions
*old_acts
= NULL
, *acts
= NULL
;
1169 struct sw_flow_match match
;
1170 struct sw_flow_id sfid
;
1171 u32 ufid_flags
= ovs_nla_get_ufid_flags(a
[OVS_FLOW_ATTR_UFID_FLAGS
]);
1173 bool log
= !a
[OVS_FLOW_ATTR_PROBE
];
1176 ufid_present
= ovs_nla_get_ufid(&sfid
, a
[OVS_FLOW_ATTR_UFID
], log
);
1177 if (!a
[OVS_FLOW_ATTR_KEY
] && !ufid_present
) {
1179 "Flow set message rejected, Key attribute missing.");
1183 error
= ovs_nla_init_match_and_action(net
, &match
, &key
, a
,
1189 /* Can allocate before locking if have acts. */
1190 reply
= ovs_flow_cmd_alloc_info(acts
, &sfid
, info
, false,
1192 if (IS_ERR(reply
)) {
1193 error
= PTR_ERR(reply
);
1194 goto err_kfree_acts
;
1199 dp
= get_dp(net
, ovs_header
->dp_ifindex
);
1200 if (unlikely(!dp
)) {
1202 goto err_unlock_ovs
;
1204 /* Check that the flow exists. */
1206 flow
= ovs_flow_tbl_lookup_ufid(&dp
->table
, &sfid
);
1208 flow
= ovs_flow_tbl_lookup_exact(&dp
->table
, &match
);
1209 if (unlikely(!flow
)) {
1211 goto err_unlock_ovs
;
1214 /* Update actions, if present. */
1216 old_acts
= ovsl_dereference(flow
->sf_acts
);
1217 rcu_assign_pointer(flow
->sf_acts
, acts
);
1219 if (unlikely(reply
)) {
1220 error
= ovs_flow_cmd_fill_info(flow
,
1221 ovs_header
->dp_ifindex
,
1222 reply
, info
->snd_portid
,
1229 /* Could not alloc without acts before locking. */
1230 reply
= ovs_flow_cmd_build_info(flow
, ovs_header
->dp_ifindex
,
1231 info
, OVS_FLOW_CMD_SET
, false,
1234 if (IS_ERR(reply
)) {
1235 error
= PTR_ERR(reply
);
1236 goto err_unlock_ovs
;
1241 if (a
[OVS_FLOW_ATTR_CLEAR
])
1242 ovs_flow_stats_clear(flow
);
1246 ovs_notify(&dp_flow_genl_family
, reply
, info
);
1248 ovs_nla_free_flow_actions_rcu(old_acts
);
1256 ovs_nla_free_flow_actions(acts
);
1261 static int ovs_flow_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
1263 struct nlattr
**a
= info
->attrs
;
1264 struct ovs_header
*ovs_header
= info
->userhdr
;
1265 struct net
*net
= sock_net(skb
->sk
);
1266 struct sw_flow_key key
;
1267 struct sk_buff
*reply
;
1268 struct sw_flow
*flow
;
1269 struct datapath
*dp
;
1270 struct sw_flow_match match
;
1271 struct sw_flow_id ufid
;
1272 u32 ufid_flags
= ovs_nla_get_ufid_flags(a
[OVS_FLOW_ATTR_UFID_FLAGS
]);
1274 bool log
= !a
[OVS_FLOW_ATTR_PROBE
];
1277 ufid_present
= ovs_nla_get_ufid(&ufid
, a
[OVS_FLOW_ATTR_UFID
], log
);
1278 if (a
[OVS_FLOW_ATTR_KEY
]) {
1279 ovs_match_init(&match
, &key
, true, NULL
);
1280 err
= ovs_nla_get_match(net
, &match
, a
[OVS_FLOW_ATTR_KEY
], NULL
,
1282 } else if (!ufid_present
) {
1284 "Flow get message rejected, Key attribute missing.");
1291 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1298 flow
= ovs_flow_tbl_lookup_ufid(&dp
->table
, &ufid
);
1300 flow
= ovs_flow_tbl_lookup_exact(&dp
->table
, &match
);
1306 reply
= ovs_flow_cmd_build_info(flow
, ovs_header
->dp_ifindex
, info
,
1307 OVS_FLOW_CMD_GET
, true, ufid_flags
);
1308 if (IS_ERR(reply
)) {
1309 err
= PTR_ERR(reply
);
1314 return genlmsg_reply(reply
, info
);
1320 static int ovs_flow_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
1322 struct nlattr
**a
= info
->attrs
;
1323 struct ovs_header
*ovs_header
= info
->userhdr
;
1324 struct net
*net
= sock_net(skb
->sk
);
1325 struct sw_flow_key key
;
1326 struct sk_buff
*reply
;
1327 struct sw_flow
*flow
= NULL
;
1328 struct datapath
*dp
;
1329 struct sw_flow_match match
;
1330 struct sw_flow_id ufid
;
1331 u32 ufid_flags
= ovs_nla_get_ufid_flags(a
[OVS_FLOW_ATTR_UFID_FLAGS
]);
1333 bool log
= !a
[OVS_FLOW_ATTR_PROBE
];
1336 ufid_present
= ovs_nla_get_ufid(&ufid
, a
[OVS_FLOW_ATTR_UFID
], log
);
1337 if (a
[OVS_FLOW_ATTR_KEY
]) {
1338 ovs_match_init(&match
, &key
, true, NULL
);
1339 err
= ovs_nla_get_match(net
, &match
, a
[OVS_FLOW_ATTR_KEY
],
1346 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1347 if (unlikely(!dp
)) {
1352 if (unlikely(!a
[OVS_FLOW_ATTR_KEY
] && !ufid_present
)) {
1353 err
= ovs_flow_tbl_flush(&dp
->table
);
1358 flow
= ovs_flow_tbl_lookup_ufid(&dp
->table
, &ufid
);
1360 flow
= ovs_flow_tbl_lookup_exact(&dp
->table
, &match
);
1361 if (unlikely(!flow
)) {
1366 ovs_flow_tbl_remove(&dp
->table
, flow
);
1369 reply
= ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force
*) flow
->sf_acts
,
1370 &flow
->id
, info
, false, ufid_flags
);
1371 if (likely(reply
)) {
1372 if (!IS_ERR(reply
)) {
1373 rcu_read_lock(); /*To keep RCU checker happy. */
1374 err
= ovs_flow_cmd_fill_info(flow
, ovs_header
->dp_ifindex
,
1375 reply
, info
->snd_portid
,
1380 if (WARN_ON_ONCE(err
< 0)) {
1385 ovs_notify(&dp_flow_genl_family
, reply
, info
);
1387 netlink_set_err(sock_net(skb
->sk
)->genl_sock
, 0, 0,
1393 ovs_flow_free(flow
, true);
1400 static int ovs_flow_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1402 struct nlattr
*a
[__OVS_FLOW_ATTR_MAX
];
1403 struct ovs_header
*ovs_header
= genlmsg_data(nlmsg_data(cb
->nlh
));
1404 struct table_instance
*ti
;
1405 struct datapath
*dp
;
1409 err
= genlmsg_parse_deprecated(cb
->nlh
, &dp_flow_genl_family
, a
,
1410 OVS_FLOW_ATTR_MAX
, flow_policy
, NULL
);
1413 ufid_flags
= ovs_nla_get_ufid_flags(a
[OVS_FLOW_ATTR_UFID_FLAGS
]);
1416 dp
= get_dp_rcu(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
1422 ti
= rcu_dereference(dp
->table
.ti
);
1424 struct sw_flow
*flow
;
1427 bucket
= cb
->args
[0];
1429 flow
= ovs_flow_tbl_dump_next(ti
, &bucket
, &obj
);
1433 if (ovs_flow_cmd_fill_info(flow
, ovs_header
->dp_ifindex
, skb
,
1434 NETLINK_CB(cb
->skb
).portid
,
1435 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1436 OVS_FLOW_CMD_GET
, ufid_flags
) < 0)
1439 cb
->args
[0] = bucket
;
1446 static const struct nla_policy flow_policy
[OVS_FLOW_ATTR_MAX
+ 1] = {
1447 [OVS_FLOW_ATTR_KEY
] = { .type
= NLA_NESTED
},
1448 [OVS_FLOW_ATTR_MASK
] = { .type
= NLA_NESTED
},
1449 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NLA_NESTED
},
1450 [OVS_FLOW_ATTR_CLEAR
] = { .type
= NLA_FLAG
},
1451 [OVS_FLOW_ATTR_PROBE
] = { .type
= NLA_FLAG
},
1452 [OVS_FLOW_ATTR_UFID
] = { .type
= NLA_UNSPEC
, .len
= 1 },
1453 [OVS_FLOW_ATTR_UFID_FLAGS
] = { .type
= NLA_U32
},
1456 static const struct genl_small_ops dp_flow_genl_ops
[] = {
1457 { .cmd
= OVS_FLOW_CMD_NEW
,
1458 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
1459 .flags
= GENL_UNS_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1460 .doit
= ovs_flow_cmd_new
1462 { .cmd
= OVS_FLOW_CMD_DEL
,
1463 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
1464 .flags
= GENL_UNS_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1465 .doit
= ovs_flow_cmd_del
1467 { .cmd
= OVS_FLOW_CMD_GET
,
1468 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
1469 .flags
= 0, /* OK for unprivileged users. */
1470 .doit
= ovs_flow_cmd_get
,
1471 .dumpit
= ovs_flow_cmd_dump
1473 { .cmd
= OVS_FLOW_CMD_SET
,
1474 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
1475 .flags
= GENL_UNS_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1476 .doit
= ovs_flow_cmd_set
,
1480 static struct genl_family dp_flow_genl_family __ro_after_init
= {
1481 .hdrsize
= sizeof(struct ovs_header
),
1482 .name
= OVS_FLOW_FAMILY
,
1483 .version
= OVS_FLOW_VERSION
,
1484 .maxattr
= OVS_FLOW_ATTR_MAX
,
1485 .policy
= flow_policy
,
1487 .parallel_ops
= true,
1488 .small_ops
= dp_flow_genl_ops
,
1489 .n_small_ops
= ARRAY_SIZE(dp_flow_genl_ops
),
1490 .mcgrps
= &ovs_dp_flow_multicast_group
,
1492 .module
= THIS_MODULE
,
1495 static size_t ovs_dp_cmd_msg_size(void)
1497 size_t msgsize
= NLMSG_ALIGN(sizeof(struct ovs_header
));
1499 msgsize
+= nla_total_size(IFNAMSIZ
);
1500 msgsize
+= nla_total_size_64bit(sizeof(struct ovs_dp_stats
));
1501 msgsize
+= nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats
));
1502 msgsize
+= nla_total_size(sizeof(u32
)); /* OVS_DP_ATTR_USER_FEATURES */
1503 msgsize
+= nla_total_size(sizeof(u32
)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
1508 /* Called with ovs_mutex. */
1509 static int ovs_dp_cmd_fill_info(struct datapath
*dp
, struct sk_buff
*skb
,
1510 u32 portid
, u32 seq
, u32 flags
, u8 cmd
)
1512 struct ovs_header
*ovs_header
;
1513 struct ovs_dp_stats dp_stats
;
1514 struct ovs_dp_megaflow_stats dp_megaflow_stats
;
1517 ovs_header
= genlmsg_put(skb
, portid
, seq
, &dp_datapath_genl_family
,
1522 ovs_header
->dp_ifindex
= get_dpifindex(dp
);
1524 err
= nla_put_string(skb
, OVS_DP_ATTR_NAME
, ovs_dp_name(dp
));
1526 goto nla_put_failure
;
1528 get_dp_stats(dp
, &dp_stats
, &dp_megaflow_stats
);
1529 if (nla_put_64bit(skb
, OVS_DP_ATTR_STATS
, sizeof(struct ovs_dp_stats
),
1530 &dp_stats
, OVS_DP_ATTR_PAD
))
1531 goto nla_put_failure
;
1533 if (nla_put_64bit(skb
, OVS_DP_ATTR_MEGAFLOW_STATS
,
1534 sizeof(struct ovs_dp_megaflow_stats
),
1535 &dp_megaflow_stats
, OVS_DP_ATTR_PAD
))
1536 goto nla_put_failure
;
1538 if (nla_put_u32(skb
, OVS_DP_ATTR_USER_FEATURES
, dp
->user_features
))
1539 goto nla_put_failure
;
1541 if (nla_put_u32(skb
, OVS_DP_ATTR_MASKS_CACHE_SIZE
,
1542 ovs_flow_tbl_masks_cache_size(&dp
->table
)))
1543 goto nla_put_failure
;
1545 genlmsg_end(skb
, ovs_header
);
1549 genlmsg_cancel(skb
, ovs_header
);
1554 static struct sk_buff
*ovs_dp_cmd_alloc_info(void)
1556 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL
);
1559 /* Called with rcu_read_lock or ovs_mutex. */
1560 static struct datapath
*lookup_datapath(struct net
*net
,
1561 const struct ovs_header
*ovs_header
,
1562 struct nlattr
*a
[OVS_DP_ATTR_MAX
+ 1])
1564 struct datapath
*dp
;
1566 if (!a
[OVS_DP_ATTR_NAME
])
1567 dp
= get_dp(net
, ovs_header
->dp_ifindex
);
1569 struct vport
*vport
;
1571 vport
= ovs_vport_locate(net
, nla_data(a
[OVS_DP_ATTR_NAME
]));
1572 dp
= vport
&& vport
->port_no
== OVSP_LOCAL
? vport
->dp
: NULL
;
1574 return dp
? dp
: ERR_PTR(-ENODEV
);
1577 static void ovs_dp_reset_user_features(struct sk_buff
*skb
,
1578 struct genl_info
*info
)
1580 struct datapath
*dp
;
1582 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
,
1587 WARN(dp
->user_features
, "Dropping previously announced user features\n");
1588 dp
->user_features
= 0;
1591 DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support
);
1593 static int ovs_dp_change(struct datapath
*dp
, struct nlattr
*a
[])
1595 u32 user_features
= 0;
1597 if (a
[OVS_DP_ATTR_USER_FEATURES
]) {
1598 user_features
= nla_get_u32(a
[OVS_DP_ATTR_USER_FEATURES
]);
1600 if (user_features
& ~(OVS_DP_F_VPORT_PIDS
|
1601 OVS_DP_F_UNALIGNED
|
1602 OVS_DP_F_TC_RECIRC_SHARING
))
1605 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1606 if (user_features
& OVS_DP_F_TC_RECIRC_SHARING
)
1611 if (a
[OVS_DP_ATTR_MASKS_CACHE_SIZE
]) {
1615 cache_size
= nla_get_u32(a
[OVS_DP_ATTR_MASKS_CACHE_SIZE
]);
1616 err
= ovs_flow_tbl_masks_cache_resize(&dp
->table
, cache_size
);
1621 dp
->user_features
= user_features
;
1623 if (dp
->user_features
& OVS_DP_F_TC_RECIRC_SHARING
)
1624 static_branch_enable(&tc_recirc_sharing_support
);
1626 static_branch_disable(&tc_recirc_sharing_support
);
1631 static int ovs_dp_stats_init(struct datapath
*dp
)
1633 dp
->stats_percpu
= netdev_alloc_pcpu_stats(struct dp_stats_percpu
);
1634 if (!dp
->stats_percpu
)
1640 static int ovs_dp_vport_init(struct datapath
*dp
)
1644 dp
->ports
= kmalloc_array(DP_VPORT_HASH_BUCKETS
,
1645 sizeof(struct hlist_head
),
1650 for (i
= 0; i
< DP_VPORT_HASH_BUCKETS
; i
++)
1651 INIT_HLIST_HEAD(&dp
->ports
[i
]);
1656 static int ovs_dp_cmd_new(struct sk_buff
*skb
, struct genl_info
*info
)
1658 struct nlattr
**a
= info
->attrs
;
1659 struct vport_parms parms
;
1660 struct sk_buff
*reply
;
1661 struct datapath
*dp
;
1662 struct vport
*vport
;
1663 struct ovs_net
*ovs_net
;
1667 if (!a
[OVS_DP_ATTR_NAME
] || !a
[OVS_DP_ATTR_UPCALL_PID
])
1670 reply
= ovs_dp_cmd_alloc_info();
1675 dp
= kzalloc(sizeof(*dp
), GFP_KERNEL
);
1677 goto err_destroy_reply
;
1679 ovs_dp_set_net(dp
, sock_net(skb
->sk
));
1681 /* Allocate table. */
1682 err
= ovs_flow_tbl_init(&dp
->table
);
1684 goto err_destroy_dp
;
1686 err
= ovs_dp_stats_init(dp
);
1688 goto err_destroy_table
;
1690 err
= ovs_dp_vport_init(dp
);
1692 goto err_destroy_stats
;
1694 err
= ovs_meters_init(dp
);
1696 goto err_destroy_ports
;
1698 /* Set up our datapath device. */
1699 parms
.name
= nla_data(a
[OVS_DP_ATTR_NAME
]);
1700 parms
.type
= OVS_VPORT_TYPE_INTERNAL
;
1701 parms
.options
= NULL
;
1703 parms
.port_no
= OVSP_LOCAL
;
1704 parms
.upcall_portids
= a
[OVS_DP_ATTR_UPCALL_PID
];
1706 /* So far only local changes have been made, now need the lock. */
1709 err
= ovs_dp_change(dp
, a
);
1711 goto err_unlock_and_destroy_meters
;
1713 vport
= new_vport(&parms
);
1714 if (IS_ERR(vport
)) {
1715 err
= PTR_ERR(vport
);
1719 if (err
== -EEXIST
) {
1720 /* An outdated user space instance that does not understand
1721 * the concept of user_features has attempted to create a new
1722 * datapath and is likely to reuse it. Drop all user features.
1724 if (info
->genlhdr
->version
< OVS_DP_VER_FEATURES
)
1725 ovs_dp_reset_user_features(skb
, info
);
1728 goto err_unlock_and_destroy_meters
;
1731 err
= ovs_dp_cmd_fill_info(dp
, reply
, info
->snd_portid
,
1732 info
->snd_seq
, 0, OVS_DP_CMD_NEW
);
1735 ovs_net
= net_generic(ovs_dp_get_net(dp
), ovs_net_id
);
1736 list_add_tail_rcu(&dp
->list_node
, &ovs_net
->dps
);
1740 ovs_notify(&dp_datapath_genl_family
, reply
, info
);
1743 err_unlock_and_destroy_meters
:
1745 ovs_meters_exit(dp
);
1749 free_percpu(dp
->stats_percpu
);
1751 ovs_flow_tbl_destroy(&dp
->table
);
1760 /* Called with ovs_mutex. */
1761 static void __dp_destroy(struct datapath
*dp
)
1763 struct flow_table
*table
= &dp
->table
;
1766 for (i
= 0; i
< DP_VPORT_HASH_BUCKETS
; i
++) {
1767 struct vport
*vport
;
1768 struct hlist_node
*n
;
1770 hlist_for_each_entry_safe(vport
, n
, &dp
->ports
[i
], dp_hash_node
)
1771 if (vport
->port_no
!= OVSP_LOCAL
)
1772 ovs_dp_detach_port(vport
);
1775 list_del_rcu(&dp
->list_node
);
1777 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1778 * all ports in datapath are destroyed first before freeing datapath.
1780 ovs_dp_detach_port(ovs_vport_ovsl(dp
, OVSP_LOCAL
));
1782 /* Flush sw_flow in the tables. RCU cb only releases resource
1783 * such as dp, ports and tables. That may avoid some issues
1784 * such as RCU usage warning.
1786 table_instance_flow_flush(table
, ovsl_dereference(table
->ti
),
1787 ovsl_dereference(table
->ufid_ti
));
1789 /* RCU destroy the ports, meters and flow tables. */
1790 call_rcu(&dp
->rcu
, destroy_dp_rcu
);
1793 static int ovs_dp_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
1795 struct sk_buff
*reply
;
1796 struct datapath
*dp
;
1799 reply
= ovs_dp_cmd_alloc_info();
1804 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
, info
->attrs
);
1807 goto err_unlock_free
;
1809 err
= ovs_dp_cmd_fill_info(dp
, reply
, info
->snd_portid
,
1810 info
->snd_seq
, 0, OVS_DP_CMD_DEL
);
1816 ovs_notify(&dp_datapath_genl_family
, reply
, info
);
1826 static int ovs_dp_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
1828 struct sk_buff
*reply
;
1829 struct datapath
*dp
;
1832 reply
= ovs_dp_cmd_alloc_info();
1837 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
, info
->attrs
);
1840 goto err_unlock_free
;
1842 err
= ovs_dp_change(dp
, info
->attrs
);
1844 goto err_unlock_free
;
1846 err
= ovs_dp_cmd_fill_info(dp
, reply
, info
->snd_portid
,
1847 info
->snd_seq
, 0, OVS_DP_CMD_SET
);
1851 ovs_notify(&dp_datapath_genl_family
, reply
, info
);
1861 static int ovs_dp_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
1863 struct sk_buff
*reply
;
1864 struct datapath
*dp
;
1867 reply
= ovs_dp_cmd_alloc_info();
1872 dp
= lookup_datapath(sock_net(skb
->sk
), info
->userhdr
, info
->attrs
);
1875 goto err_unlock_free
;
1877 err
= ovs_dp_cmd_fill_info(dp
, reply
, info
->snd_portid
,
1878 info
->snd_seq
, 0, OVS_DP_CMD_GET
);
1882 return genlmsg_reply(reply
, info
);
1890 static int ovs_dp_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1892 struct ovs_net
*ovs_net
= net_generic(sock_net(skb
->sk
), ovs_net_id
);
1893 struct datapath
*dp
;
1894 int skip
= cb
->args
[0];
1898 list_for_each_entry(dp
, &ovs_net
->dps
, list_node
) {
1900 ovs_dp_cmd_fill_info(dp
, skb
, NETLINK_CB(cb
->skb
).portid
,
1901 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1902 OVS_DP_CMD_GET
) < 0)
1913 static const struct nla_policy datapath_policy
[OVS_DP_ATTR_MAX
+ 1] = {
1914 [OVS_DP_ATTR_NAME
] = { .type
= NLA_NUL_STRING
, .len
= IFNAMSIZ
- 1 },
1915 [OVS_DP_ATTR_UPCALL_PID
] = { .type
= NLA_U32
},
1916 [OVS_DP_ATTR_USER_FEATURES
] = { .type
= NLA_U32
},
1917 [OVS_DP_ATTR_MASKS_CACHE_SIZE
] = NLA_POLICY_RANGE(NLA_U32
, 0,
1918 PCPU_MIN_UNIT_SIZE
/ sizeof(struct mask_cache_entry
)),
1921 static const struct genl_small_ops dp_datapath_genl_ops
[] = {
1922 { .cmd
= OVS_DP_CMD_NEW
,
1923 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
1924 .flags
= GENL_UNS_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1925 .doit
= ovs_dp_cmd_new
1927 { .cmd
= OVS_DP_CMD_DEL
,
1928 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
1929 .flags
= GENL_UNS_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1930 .doit
= ovs_dp_cmd_del
1932 { .cmd
= OVS_DP_CMD_GET
,
1933 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
1934 .flags
= 0, /* OK for unprivileged users. */
1935 .doit
= ovs_dp_cmd_get
,
1936 .dumpit
= ovs_dp_cmd_dump
1938 { .cmd
= OVS_DP_CMD_SET
,
1939 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
1940 .flags
= GENL_UNS_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
1941 .doit
= ovs_dp_cmd_set
,
1945 static struct genl_family dp_datapath_genl_family __ro_after_init
= {
1946 .hdrsize
= sizeof(struct ovs_header
),
1947 .name
= OVS_DATAPATH_FAMILY
,
1948 .version
= OVS_DATAPATH_VERSION
,
1949 .maxattr
= OVS_DP_ATTR_MAX
,
1950 .policy
= datapath_policy
,
1952 .parallel_ops
= true,
1953 .small_ops
= dp_datapath_genl_ops
,
1954 .n_small_ops
= ARRAY_SIZE(dp_datapath_genl_ops
),
1955 .mcgrps
= &ovs_dp_datapath_multicast_group
,
1957 .module
= THIS_MODULE
,
1960 /* Called with ovs_mutex or RCU read lock. */
1961 static int ovs_vport_cmd_fill_info(struct vport
*vport
, struct sk_buff
*skb
,
1962 struct net
*net
, u32 portid
, u32 seq
,
1963 u32 flags
, u8 cmd
, gfp_t gfp
)
1965 struct ovs_header
*ovs_header
;
1966 struct ovs_vport_stats vport_stats
;
1969 ovs_header
= genlmsg_put(skb
, portid
, seq
, &dp_vport_genl_family
,
1974 ovs_header
->dp_ifindex
= get_dpifindex(vport
->dp
);
1976 if (nla_put_u32(skb
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
) ||
1977 nla_put_u32(skb
, OVS_VPORT_ATTR_TYPE
, vport
->ops
->type
) ||
1978 nla_put_string(skb
, OVS_VPORT_ATTR_NAME
,
1979 ovs_vport_name(vport
)) ||
1980 nla_put_u32(skb
, OVS_VPORT_ATTR_IFINDEX
, vport
->dev
->ifindex
))
1981 goto nla_put_failure
;
1983 if (!net_eq(net
, dev_net(vport
->dev
))) {
1984 int id
= peernet2id_alloc(net
, dev_net(vport
->dev
), gfp
);
1986 if (nla_put_s32(skb
, OVS_VPORT_ATTR_NETNSID
, id
))
1987 goto nla_put_failure
;
1990 ovs_vport_get_stats(vport
, &vport_stats
);
1991 if (nla_put_64bit(skb
, OVS_VPORT_ATTR_STATS
,
1992 sizeof(struct ovs_vport_stats
), &vport_stats
,
1993 OVS_VPORT_ATTR_PAD
))
1994 goto nla_put_failure
;
1996 if (ovs_vport_get_upcall_portids(vport
, skb
))
1997 goto nla_put_failure
;
1999 err
= ovs_vport_get_options(vport
, skb
);
2000 if (err
== -EMSGSIZE
)
2003 genlmsg_end(skb
, ovs_header
);
2009 genlmsg_cancel(skb
, ovs_header
);
2013 static struct sk_buff
*ovs_vport_cmd_alloc_info(void)
2015 return nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
2018 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
2019 struct sk_buff
*ovs_vport_cmd_build_info(struct vport
*vport
, struct net
*net
,
2020 u32 portid
, u32 seq
, u8 cmd
)
2022 struct sk_buff
*skb
;
2025 skb
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
2027 return ERR_PTR(-ENOMEM
);
2029 retval
= ovs_vport_cmd_fill_info(vport
, skb
, net
, portid
, seq
, 0, cmd
,
2036 /* Called with ovs_mutex or RCU read lock. */
2037 static struct vport
*lookup_vport(struct net
*net
,
2038 const struct ovs_header
*ovs_header
,
2039 struct nlattr
*a
[OVS_VPORT_ATTR_MAX
+ 1])
2041 struct datapath
*dp
;
2042 struct vport
*vport
;
2044 if (a
[OVS_VPORT_ATTR_IFINDEX
])
2045 return ERR_PTR(-EOPNOTSUPP
);
2046 if (a
[OVS_VPORT_ATTR_NAME
]) {
2047 vport
= ovs_vport_locate(net
, nla_data(a
[OVS_VPORT_ATTR_NAME
]));
2049 return ERR_PTR(-ENODEV
);
2050 if (ovs_header
->dp_ifindex
&&
2051 ovs_header
->dp_ifindex
!= get_dpifindex(vport
->dp
))
2052 return ERR_PTR(-ENODEV
);
2054 } else if (a
[OVS_VPORT_ATTR_PORT_NO
]) {
2055 u32 port_no
= nla_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]);
2057 if (port_no
>= DP_MAX_PORTS
)
2058 return ERR_PTR(-EFBIG
);
2060 dp
= get_dp(net
, ovs_header
->dp_ifindex
);
2062 return ERR_PTR(-ENODEV
);
2064 vport
= ovs_vport_ovsl_rcu(dp
, port_no
);
2066 return ERR_PTR(-ENODEV
);
2069 return ERR_PTR(-EINVAL
);
2073 static unsigned int ovs_get_max_headroom(struct datapath
*dp
)
2075 unsigned int dev_headroom
, max_headroom
= 0;
2076 struct net_device
*dev
;
2077 struct vport
*vport
;
2080 for (i
= 0; i
< DP_VPORT_HASH_BUCKETS
; i
++) {
2081 hlist_for_each_entry_rcu(vport
, &dp
->ports
[i
], dp_hash_node
,
2082 lockdep_ovsl_is_held()) {
2084 dev_headroom
= netdev_get_fwd_headroom(dev
);
2085 if (dev_headroom
> max_headroom
)
2086 max_headroom
= dev_headroom
;
2090 return max_headroom
;
2093 /* Called with ovs_mutex */
2094 static void ovs_update_headroom(struct datapath
*dp
, unsigned int new_headroom
)
2096 struct vport
*vport
;
2099 dp
->max_headroom
= new_headroom
;
2100 for (i
= 0; i
< DP_VPORT_HASH_BUCKETS
; i
++) {
2101 hlist_for_each_entry_rcu(vport
, &dp
->ports
[i
], dp_hash_node
,
2102 lockdep_ovsl_is_held())
2103 netdev_set_rx_headroom(vport
->dev
, new_headroom
);
2107 static int ovs_vport_cmd_new(struct sk_buff
*skb
, struct genl_info
*info
)
2109 struct nlattr
**a
= info
->attrs
;
2110 struct ovs_header
*ovs_header
= info
->userhdr
;
2111 struct vport_parms parms
;
2112 struct sk_buff
*reply
;
2113 struct vport
*vport
;
2114 struct datapath
*dp
;
2115 unsigned int new_headroom
;
2119 if (!a
[OVS_VPORT_ATTR_NAME
] || !a
[OVS_VPORT_ATTR_TYPE
] ||
2120 !a
[OVS_VPORT_ATTR_UPCALL_PID
])
2122 if (a
[OVS_VPORT_ATTR_IFINDEX
])
2125 port_no
= a
[OVS_VPORT_ATTR_PORT_NO
]
2126 ? nla_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]) : 0;
2127 if (port_no
>= DP_MAX_PORTS
)
2130 reply
= ovs_vport_cmd_alloc_info();
2136 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
2139 goto exit_unlock_free
;
2142 vport
= ovs_vport_ovsl(dp
, port_no
);
2145 goto exit_unlock_free
;
2147 for (port_no
= 1; ; port_no
++) {
2148 if (port_no
>= DP_MAX_PORTS
) {
2150 goto exit_unlock_free
;
2152 vport
= ovs_vport_ovsl(dp
, port_no
);
2158 parms
.name
= nla_data(a
[OVS_VPORT_ATTR_NAME
]);
2159 parms
.type
= nla_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
2160 parms
.options
= a
[OVS_VPORT_ATTR_OPTIONS
];
2162 parms
.port_no
= port_no
;
2163 parms
.upcall_portids
= a
[OVS_VPORT_ATTR_UPCALL_PID
];
2165 vport
= new_vport(&parms
);
2166 err
= PTR_ERR(vport
);
2167 if (IS_ERR(vport
)) {
2170 goto exit_unlock_free
;
2173 err
= ovs_vport_cmd_fill_info(vport
, reply
, genl_info_net(info
),
2174 info
->snd_portid
, info
->snd_seq
, 0,
2175 OVS_VPORT_CMD_NEW
, GFP_KERNEL
);
2177 new_headroom
= netdev_get_fwd_headroom(vport
->dev
);
2179 if (new_headroom
> dp
->max_headroom
)
2180 ovs_update_headroom(dp
, new_headroom
);
2182 netdev_set_rx_headroom(vport
->dev
, dp
->max_headroom
);
2187 ovs_notify(&dp_vport_genl_family
, reply
, info
);
2196 static int ovs_vport_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
2198 struct nlattr
**a
= info
->attrs
;
2199 struct sk_buff
*reply
;
2200 struct vport
*vport
;
2203 reply
= ovs_vport_cmd_alloc_info();
2208 vport
= lookup_vport(sock_net(skb
->sk
), info
->userhdr
, a
);
2209 err
= PTR_ERR(vport
);
2211 goto exit_unlock_free
;
2213 if (a
[OVS_VPORT_ATTR_TYPE
] &&
2214 nla_get_u32(a
[OVS_VPORT_ATTR_TYPE
]) != vport
->ops
->type
) {
2216 goto exit_unlock_free
;
2219 if (a
[OVS_VPORT_ATTR_OPTIONS
]) {
2220 err
= ovs_vport_set_options(vport
, a
[OVS_VPORT_ATTR_OPTIONS
]);
2222 goto exit_unlock_free
;
2226 if (a
[OVS_VPORT_ATTR_UPCALL_PID
]) {
2227 struct nlattr
*ids
= a
[OVS_VPORT_ATTR_UPCALL_PID
];
2229 err
= ovs_vport_set_upcall_portids(vport
, ids
);
2231 goto exit_unlock_free
;
2234 err
= ovs_vport_cmd_fill_info(vport
, reply
, genl_info_net(info
),
2235 info
->snd_portid
, info
->snd_seq
, 0,
2236 OVS_VPORT_CMD_SET
, GFP_KERNEL
);
2240 ovs_notify(&dp_vport_genl_family
, reply
, info
);
2249 static int ovs_vport_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
2251 bool update_headroom
= false;
2252 struct nlattr
**a
= info
->attrs
;
2253 struct sk_buff
*reply
;
2254 struct datapath
*dp
;
2255 struct vport
*vport
;
2256 unsigned int new_headroom
;
2259 reply
= ovs_vport_cmd_alloc_info();
2264 vport
= lookup_vport(sock_net(skb
->sk
), info
->userhdr
, a
);
2265 err
= PTR_ERR(vport
);
2267 goto exit_unlock_free
;
2269 if (vport
->port_no
== OVSP_LOCAL
) {
2271 goto exit_unlock_free
;
2274 err
= ovs_vport_cmd_fill_info(vport
, reply
, genl_info_net(info
),
2275 info
->snd_portid
, info
->snd_seq
, 0,
2276 OVS_VPORT_CMD_DEL
, GFP_KERNEL
);
2279 /* the vport deletion may trigger dp headroom update */
2281 if (netdev_get_fwd_headroom(vport
->dev
) == dp
->max_headroom
)
2282 update_headroom
= true;
2284 netdev_reset_rx_headroom(vport
->dev
);
2285 ovs_dp_detach_port(vport
);
2287 if (update_headroom
) {
2288 new_headroom
= ovs_get_max_headroom(dp
);
2290 if (new_headroom
< dp
->max_headroom
)
2291 ovs_update_headroom(dp
, new_headroom
);
2295 ovs_notify(&dp_vport_genl_family
, reply
, info
);
2304 static int ovs_vport_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
2306 struct nlattr
**a
= info
->attrs
;
2307 struct ovs_header
*ovs_header
= info
->userhdr
;
2308 struct sk_buff
*reply
;
2309 struct vport
*vport
;
2312 reply
= ovs_vport_cmd_alloc_info();
2317 vport
= lookup_vport(sock_net(skb
->sk
), ovs_header
, a
);
2318 err
= PTR_ERR(vport
);
2320 goto exit_unlock_free
;
2321 err
= ovs_vport_cmd_fill_info(vport
, reply
, genl_info_net(info
),
2322 info
->snd_portid
, info
->snd_seq
, 0,
2323 OVS_VPORT_CMD_GET
, GFP_ATOMIC
);
2327 return genlmsg_reply(reply
, info
);
2335 static int ovs_vport_cmd_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
2337 struct ovs_header
*ovs_header
= genlmsg_data(nlmsg_data(cb
->nlh
));
2338 struct datapath
*dp
;
2339 int bucket
= cb
->args
[0], skip
= cb
->args
[1];
2343 dp
= get_dp_rcu(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
2348 for (i
= bucket
; i
< DP_VPORT_HASH_BUCKETS
; i
++) {
2349 struct vport
*vport
;
2352 hlist_for_each_entry_rcu(vport
, &dp
->ports
[i
], dp_hash_node
) {
2354 ovs_vport_cmd_fill_info(vport
, skb
,
2356 NETLINK_CB(cb
->skb
).portid
,
2376 static void ovs_dp_masks_rebalance(struct work_struct
*work
)
2378 struct ovs_net
*ovs_net
= container_of(work
, struct ovs_net
,
2379 masks_rebalance
.work
);
2380 struct datapath
*dp
;
2384 list_for_each_entry(dp
, &ovs_net
->dps
, list_node
)
2385 ovs_flow_masks_rebalance(&dp
->table
);
2389 schedule_delayed_work(&ovs_net
->masks_rebalance
,
2390 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL
));
2393 static const struct nla_policy vport_policy
[OVS_VPORT_ATTR_MAX
+ 1] = {
2394 [OVS_VPORT_ATTR_NAME
] = { .type
= NLA_NUL_STRING
, .len
= IFNAMSIZ
- 1 },
2395 [OVS_VPORT_ATTR_STATS
] = { .len
= sizeof(struct ovs_vport_stats
) },
2396 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NLA_U32
},
2397 [OVS_VPORT_ATTR_TYPE
] = { .type
= NLA_U32
},
2398 [OVS_VPORT_ATTR_UPCALL_PID
] = { .type
= NLA_UNSPEC
},
2399 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NLA_NESTED
},
2400 [OVS_VPORT_ATTR_IFINDEX
] = { .type
= NLA_U32
},
2401 [OVS_VPORT_ATTR_NETNSID
] = { .type
= NLA_S32
},
2404 static const struct genl_small_ops dp_vport_genl_ops
[] = {
2405 { .cmd
= OVS_VPORT_CMD_NEW
,
2406 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
2407 .flags
= GENL_UNS_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
2408 .doit
= ovs_vport_cmd_new
2410 { .cmd
= OVS_VPORT_CMD_DEL
,
2411 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
2412 .flags
= GENL_UNS_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
2413 .doit
= ovs_vport_cmd_del
2415 { .cmd
= OVS_VPORT_CMD_GET
,
2416 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
2417 .flags
= 0, /* OK for unprivileged users. */
2418 .doit
= ovs_vport_cmd_get
,
2419 .dumpit
= ovs_vport_cmd_dump
2421 { .cmd
= OVS_VPORT_CMD_SET
,
2422 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
2423 .flags
= GENL_UNS_ADMIN_PERM
, /* Requires CAP_NET_ADMIN privilege. */
2424 .doit
= ovs_vport_cmd_set
,
2428 struct genl_family dp_vport_genl_family __ro_after_init
= {
2429 .hdrsize
= sizeof(struct ovs_header
),
2430 .name
= OVS_VPORT_FAMILY
,
2431 .version
= OVS_VPORT_VERSION
,
2432 .maxattr
= OVS_VPORT_ATTR_MAX
,
2433 .policy
= vport_policy
,
2435 .parallel_ops
= true,
2436 .small_ops
= dp_vport_genl_ops
,
2437 .n_small_ops
= ARRAY_SIZE(dp_vport_genl_ops
),
2438 .mcgrps
= &ovs_dp_vport_multicast_group
,
2440 .module
= THIS_MODULE
,
2443 static struct genl_family
* const dp_genl_families
[] = {
2444 &dp_datapath_genl_family
,
2445 &dp_vport_genl_family
,
2446 &dp_flow_genl_family
,
2447 &dp_packet_genl_family
,
2448 &dp_meter_genl_family
,
2449 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2450 &dp_ct_limit_genl_family
,
2454 static void dp_unregister_genl(int n_families
)
2458 for (i
= 0; i
< n_families
; i
++)
2459 genl_unregister_family(dp_genl_families
[i
]);
2462 static int __init
dp_register_genl(void)
2467 for (i
= 0; i
< ARRAY_SIZE(dp_genl_families
); i
++) {
2469 err
= genl_register_family(dp_genl_families
[i
]);
2477 dp_unregister_genl(i
);
2481 static int __net_init
ovs_init_net(struct net
*net
)
2483 struct ovs_net
*ovs_net
= net_generic(net
, ovs_net_id
);
2486 INIT_LIST_HEAD(&ovs_net
->dps
);
2487 INIT_WORK(&ovs_net
->dp_notify_work
, ovs_dp_notify_wq
);
2488 INIT_DELAYED_WORK(&ovs_net
->masks_rebalance
, ovs_dp_masks_rebalance
);
2490 err
= ovs_ct_init(net
);
2494 schedule_delayed_work(&ovs_net
->masks_rebalance
,
2495 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL
));
2499 static void __net_exit
list_vports_from_net(struct net
*net
, struct net
*dnet
,
2500 struct list_head
*head
)
2502 struct ovs_net
*ovs_net
= net_generic(net
, ovs_net_id
);
2503 struct datapath
*dp
;
2505 list_for_each_entry(dp
, &ovs_net
->dps
, list_node
) {
2508 for (i
= 0; i
< DP_VPORT_HASH_BUCKETS
; i
++) {
2509 struct vport
*vport
;
2511 hlist_for_each_entry(vport
, &dp
->ports
[i
], dp_hash_node
) {
2512 if (vport
->ops
->type
!= OVS_VPORT_TYPE_INTERNAL
)
2515 if (dev_net(vport
->dev
) == dnet
)
2516 list_add(&vport
->detach_list
, head
);
2522 static void __net_exit
ovs_exit_net(struct net
*dnet
)
2524 struct datapath
*dp
, *dp_next
;
2525 struct ovs_net
*ovs_net
= net_generic(dnet
, ovs_net_id
);
2526 struct vport
*vport
, *vport_next
;
2534 list_for_each_entry_safe(dp
, dp_next
, &ovs_net
->dps
, list_node
)
2537 down_read(&net_rwsem
);
2539 list_vports_from_net(net
, dnet
, &head
);
2540 up_read(&net_rwsem
);
2542 /* Detach all vports from given namespace. */
2543 list_for_each_entry_safe(vport
, vport_next
, &head
, detach_list
) {
2544 list_del(&vport
->detach_list
);
2545 ovs_dp_detach_port(vport
);
2550 cancel_delayed_work_sync(&ovs_net
->masks_rebalance
);
2551 cancel_work_sync(&ovs_net
->dp_notify_work
);
2554 static struct pernet_operations ovs_net_ops
= {
2555 .init
= ovs_init_net
,
2556 .exit
= ovs_exit_net
,
2558 .size
= sizeof(struct ovs_net
),
2561 static int __init
dp_init(void)
2565 BUILD_BUG_ON(sizeof(struct ovs_skb_cb
) >
2566 sizeof_field(struct sk_buff
, cb
));
2568 pr_info("Open vSwitch switching datapath\n");
2570 err
= action_fifos_init();
2574 err
= ovs_internal_dev_rtnl_link_register();
2576 goto error_action_fifos_exit
;
2578 err
= ovs_flow_init();
2580 goto error_unreg_rtnl_link
;
2582 err
= ovs_vport_init();
2584 goto error_flow_exit
;
2586 err
= register_pernet_device(&ovs_net_ops
);
2588 goto error_vport_exit
;
2590 err
= register_netdevice_notifier(&ovs_dp_device_notifier
);
2592 goto error_netns_exit
;
2594 err
= ovs_netdev_init();
2596 goto error_unreg_notifier
;
2598 err
= dp_register_genl();
2600 goto error_unreg_netdev
;
2606 error_unreg_notifier
:
2607 unregister_netdevice_notifier(&ovs_dp_device_notifier
);
2609 unregister_pernet_device(&ovs_net_ops
);
2614 error_unreg_rtnl_link
:
2615 ovs_internal_dev_rtnl_link_unregister();
2616 error_action_fifos_exit
:
2617 action_fifos_exit();
2622 static void dp_cleanup(void)
2624 dp_unregister_genl(ARRAY_SIZE(dp_genl_families
));
2626 unregister_netdevice_notifier(&ovs_dp_device_notifier
);
2627 unregister_pernet_device(&ovs_net_ops
);
2631 ovs_internal_dev_rtnl_link_unregister();
2632 action_fifos_exit();
2635 module_init(dp_init
);
2636 module_exit(dp_cleanup
);
2638 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2639 MODULE_LICENSE("GPL");
2640 MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY
);
2641 MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY
);
2642 MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY
);
2643 MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY
);
2644 MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY
);
2645 MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY
);