1 /* SPDX-License-Identifier: GPL-2.0 */
3 * Operations on the network namespace
5 #ifndef __NET_NET_NAMESPACE_H
6 #define __NET_NET_NAMESPACE_H
8 #include <linux/atomic.h>
9 #include <linux/refcount.h>
10 #include <linux/workqueue.h>
11 #include <linux/list.h>
12 #include <linux/sysctl.h>
13 #include <linux/uidgid.h>
16 #include <net/netns/core.h>
17 #include <net/netns/mib.h>
18 #include <net/netns/unix.h>
19 #include <net/netns/packet.h>
20 #include <net/netns/ipv4.h>
21 #include <net/netns/ipv6.h>
22 #include <net/netns/nexthop.h>
23 #include <net/netns/ieee802154_6lowpan.h>
24 #include <net/netns/sctp.h>
25 #include <net/netns/netfilter.h>
26 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
27 #include <net/netns/conntrack.h>
29 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
30 #include <net/netns/flow_table.h>
32 #include <net/netns/nftables.h>
33 #include <net/netns/xfrm.h>
34 #include <net/netns/mpls.h>
35 #include <net/netns/can.h>
36 #include <net/netns/xdp.h>
37 #include <net/netns/smc.h>
38 #include <net/netns/bpf.h>
39 #include <net/netns/mctp.h>
40 #include <net/net_trackers.h>
41 #include <linux/ns_common.h>
42 #include <linux/idr.h>
43 #include <linux/skbuff.h>
44 #include <linux/notifier.h>
45 #include <linux/xarray.h>
47 struct user_namespace
;
48 struct proc_dir_entry
;
51 struct ctl_table_header
;
58 #define NETDEV_HASHBITS 8
59 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
62 /* First cache line can be often dirtied.
63 * Do not place here read-mostly fields.
65 refcount_t passive
; /* To decide when the network
66 * namespace should be freed.
68 spinlock_t rules_mod_lock
;
70 unsigned int dev_base_seq
; /* protected by rtnl_mutex */
76 struct list_head list
; /* list of network namespaces */
77 struct list_head exit_list
; /* To linked to call pernet exit
78 * methods on dead net (
79 * pernet_ops_rwsem read locked),
80 * or to unregister pernet ops
81 * (pernet_ops_rwsem write locked).
83 struct llist_node cleanup_list
; /* namespaces on death row */
86 struct key_tag
*key_domain
; /* Key domain of operation tag */
88 struct user_namespace
*user_ns
; /* Owning user namespace */
89 struct ucounts
*ucounts
;
93 struct ref_tracker_dir refcnt_tracker
;
94 struct ref_tracker_dir notrefcnt_tracker
; /* tracker for objects not
95 * refcounted against netns
97 struct list_head dev_base_head
;
98 struct proc_dir_entry
*proc_net
;
99 struct proc_dir_entry
*proc_net_stat
;
102 struct ctl_table_set sysctls
;
105 struct sock
*rtnl
; /* rtnetlink socket */
106 struct sock
*genl_sock
;
108 struct uevent_sock
*uevent_sock
; /* uevent socket */
110 struct hlist_head
*dev_name_head
;
111 struct hlist_head
*dev_index_head
;
112 struct xarray dev_by_index
;
113 struct raw_notifier_head netdev_chain
;
115 /* Note that @hash_mix can be read millions times per second,
116 * it is critical that it is on a read_mostly cache line.
120 struct net_device
*loopback_dev
; /* The loopback */
123 struct list_head rules_ops
;
125 struct netns_core core
;
126 struct netns_mib mib
;
127 struct netns_packet packet
;
128 #if IS_ENABLED(CONFIG_UNIX)
129 struct netns_unix unx
;
131 struct netns_nexthop nexthop
;
132 struct netns_ipv4 ipv4
;
133 #if IS_ENABLED(CONFIG_IPV6)
134 struct netns_ipv6 ipv6
;
136 #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
137 struct netns_ieee802154_lowpan ieee802154_lowpan
;
139 #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
140 struct netns_sctp sctp
;
142 #ifdef CONFIG_NETFILTER
144 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
147 #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
148 struct netns_nftables nft
;
150 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
154 #ifdef CONFIG_WEXT_CORE
155 struct sk_buff_head wext_nlevents
;
157 struct net_generic __rcu
*gen
;
159 /* Used to store attached BPF programs */
160 struct netns_bpf bpf
;
162 /* Note : following structs are cache line aligned */
164 struct netns_xfrm xfrm
;
167 u64 net_cookie
; /* written once */
169 #if IS_ENABLED(CONFIG_IP_VS)
170 struct netns_ipvs
*ipvs
;
172 #if IS_ENABLED(CONFIG_MPLS)
173 struct netns_mpls mpls
;
175 #if IS_ENABLED(CONFIG_CAN)
176 struct netns_can can
;
178 #ifdef CONFIG_XDP_SOCKETS
179 struct netns_xdp xdp
;
181 #if IS_ENABLED(CONFIG_MCTP)
182 struct netns_mctp mctp
;
184 #if IS_ENABLED(CONFIG_CRYPTO_USER)
185 struct sock
*crypto_nlsk
;
187 struct sock
*diag_nlsk
;
188 #if IS_ENABLED(CONFIG_SMC)
189 struct netns_smc smc
;
191 #ifdef CONFIG_DEBUG_NET_SMALL_RTNL
192 /* Move to a better place when the config guard is removed. */
193 struct mutex rtnl_mutex
;
195 } __randomize_layout
;
197 #include <linux/seq_file_net.h>
199 /* Init's network namespace */
200 extern struct net init_net
;
203 struct net
*copy_net_ns(unsigned long flags
, struct user_namespace
*user_ns
,
204 struct net
*old_net
);
206 void net_ns_get_ownership(const struct net
*net
, kuid_t
*uid
, kgid_t
*gid
);
208 void net_ns_barrier(void);
210 struct ns_common
*get_net_ns(struct ns_common
*ns
);
211 struct net
*get_net_ns_by_fd(int fd
);
212 #else /* CONFIG_NET_NS */
213 #include <linux/sched.h>
214 #include <linux/nsproxy.h>
215 static inline struct net
*copy_net_ns(unsigned long flags
,
216 struct user_namespace
*user_ns
, struct net
*old_net
)
218 if (flags
& CLONE_NEWNET
)
219 return ERR_PTR(-EINVAL
);
223 static inline void net_ns_get_ownership(const struct net
*net
,
224 kuid_t
*uid
, kgid_t
*gid
)
226 *uid
= GLOBAL_ROOT_UID
;
227 *gid
= GLOBAL_ROOT_GID
;
230 static inline void net_ns_barrier(void) {}
232 static inline struct ns_common
*get_net_ns(struct ns_common
*ns
)
234 return ERR_PTR(-EINVAL
);
237 static inline struct net
*get_net_ns_by_fd(int fd
)
239 return ERR_PTR(-EINVAL
);
241 #endif /* CONFIG_NET_NS */
244 extern struct list_head net_namespace_list
;
246 struct net
*get_net_ns_by_pid(pid_t pid
);
249 void ipx_register_sysctl(void);
250 void ipx_unregister_sysctl(void);
252 #define ipx_register_sysctl()
253 #define ipx_unregister_sysctl()
257 void __put_net(struct net
*net
);
259 /* Try using get_net_track() instead */
260 static inline struct net
*get_net(struct net
*net
)
262 refcount_inc(&net
->ns
.count
);
266 static inline struct net
*maybe_get_net(struct net
*net
)
268 /* Used when we know struct net exists but we
269 * aren't guaranteed a previous reference count
270 * exists. If the reference count is zero this
271 * function fails and returns NULL.
273 if (!refcount_inc_not_zero(&net
->ns
.count
))
278 /* Try using put_net_track() instead */
279 static inline void put_net(struct net
*net
)
281 if (refcount_dec_and_test(&net
->ns
.count
))
286 int net_eq(const struct net
*net1
, const struct net
*net2
)
291 static inline int check_net(const struct net
*net
)
293 return refcount_read(&net
->ns
.count
) != 0;
296 void net_drop_ns(void *);
300 static inline struct net
*get_net(struct net
*net
)
305 static inline void put_net(struct net
*net
)
309 static inline struct net
*maybe_get_net(struct net
*net
)
315 int net_eq(const struct net
*net1
, const struct net
*net2
)
320 static inline int check_net(const struct net
*net
)
325 #define net_drop_ns NULL
328 /* Returns true if the netns initialization is completed successfully */
329 static inline bool net_initialized(const struct net
*net
)
331 return READ_ONCE(net
->list
.next
);
334 static inline void __netns_tracker_alloc(struct net
*net
,
335 netns_tracker
*tracker
,
339 #ifdef CONFIG_NET_NS_REFCNT_TRACKER
340 ref_tracker_alloc(refcounted
? &net
->refcnt_tracker
:
341 &net
->notrefcnt_tracker
,
346 static inline void netns_tracker_alloc(struct net
*net
, netns_tracker
*tracker
,
349 __netns_tracker_alloc(net
, tracker
, true, gfp
);
352 static inline void __netns_tracker_free(struct net
*net
,
353 netns_tracker
*tracker
,
356 #ifdef CONFIG_NET_NS_REFCNT_TRACKER
357 ref_tracker_free(refcounted
? &net
->refcnt_tracker
:
358 &net
->notrefcnt_tracker
, tracker
);
362 static inline struct net
*get_net_track(struct net
*net
,
363 netns_tracker
*tracker
, gfp_t gfp
)
366 netns_tracker_alloc(net
, tracker
, gfp
);
370 static inline void put_net_track(struct net
*net
, netns_tracker
*tracker
)
372 __netns_tracker_free(net
, tracker
, true);
378 struct net __rcu
*net
;
382 static inline void write_pnet(possible_net_t
*pnet
, struct net
*net
)
385 rcu_assign_pointer(pnet
->net
, net
);
389 static inline struct net
*read_pnet(const possible_net_t
*pnet
)
392 return rcu_dereference_protected(pnet
->net
, true);
398 static inline struct net
*read_pnet_rcu(possible_net_t
*pnet
)
401 return rcu_dereference(pnet
->net
);
407 /* Protected by net_rwsem */
408 #define for_each_net(VAR) \
409 list_for_each_entry(VAR, &net_namespace_list, list)
410 #define for_each_net_continue_reverse(VAR) \
411 list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list)
412 #define for_each_net_rcu(VAR) \
413 list_for_each_entry_rcu(VAR, &net_namespace_list, list)
418 #define __net_initdata
419 #define __net_initconst
421 #define __net_init __init
422 #define __net_exit __ref
423 #define __net_initdata __initdata
424 #define __net_initconst __initconst
427 int peernet2id_alloc(struct net
*net
, struct net
*peer
, gfp_t gfp
);
428 int peernet2id(const struct net
*net
, struct net
*peer
);
429 bool peernet_has_id(const struct net
*net
, struct net
*peer
);
430 struct net
*get_net_ns_by_id(const struct net
*net
, int id
);
432 struct pernet_operations
{
433 struct list_head list
;
435 * Below methods are called without any exclusive locks.
436 * More than one net may be constructed and destructed
437 * in parallel on several cpus. Every pernet_operations
438 * have to keep in mind all other pernet_operations and
439 * to introduce a locking, if they share common resources.
441 * The only time they are called with exclusive lock is
442 * from register_pernet_subsys(), unregister_pernet_subsys()
443 * register_pernet_device() and unregister_pernet_device().
445 * Exit methods using blocking RCU primitives, such as
446 * synchronize_rcu(), should be implemented via exit_batch.
447 * Then, destruction of a group of net requires single
448 * synchronize_rcu() related to these pernet_operations,
449 * instead of separate synchronize_rcu() for every net.
450 * Please, avoid synchronize_rcu() at all, where it's possible.
452 * Note that a combination of pre_exit() and exit() can
453 * be used, since a synchronize_rcu() is guaranteed between
456 int (*init
)(struct net
*net
);
457 void (*pre_exit
)(struct net
*net
);
458 void (*exit
)(struct net
*net
);
459 void (*exit_batch
)(struct list_head
*net_exit_list
);
460 /* Following method is called with RTNL held. */
461 void (*exit_batch_rtnl
)(struct list_head
*net_exit_list
,
462 struct list_head
*dev_kill_list
);
463 unsigned int * const id
;
468 * Use these carefully. If you implement a network device and it
469 * needs per network namespace operations use device pernet operations,
470 * otherwise use pernet subsys operations.
472 * Network interfaces need to be removed from a dying netns _before_
473 * subsys notifiers can be called, as most of the network code cleanup
474 * (which is done from subsys notifiers) runs with the assumption that
475 * dev_remove_pack has been called so no new packets will arrive during
476 * and after the cleanup functions have been called. dev_remove_pack
477 * is not per namespace so instead the guarantee of no more packets
478 * arriving in a network namespace is provided by ensuring that all
479 * network devices and all sockets have left the network namespace
480 * before the cleanup methods are called.
482 * For the longest time the ipv4 icmp code was registered as a pernet
483 * device which caused kernel oops, and panics during network
484 * namespace cleanup. So please don't get this wrong.
486 int register_pernet_subsys(struct pernet_operations
*);
487 void unregister_pernet_subsys(struct pernet_operations
*);
488 int register_pernet_device(struct pernet_operations
*);
489 void unregister_pernet_device(struct pernet_operations
*);
493 #define register_net_sysctl(net, path, table) \
494 register_net_sysctl_sz(net, path, table, ARRAY_SIZE(table))
496 int net_sysctl_init(void);
497 struct ctl_table_header
*register_net_sysctl_sz(struct net
*net
, const char *path
,
498 struct ctl_table
*table
, size_t table_size
);
499 void unregister_net_sysctl_table(struct ctl_table_header
*header
);
501 static inline int net_sysctl_init(void) { return 0; }
502 static inline struct ctl_table_header
*register_net_sysctl_sz(struct net
*net
,
503 const char *path
, struct ctl_table
*table
, size_t table_size
)
507 static inline void unregister_net_sysctl_table(struct ctl_table_header
*header
)
512 static inline int rt_genid_ipv4(const struct net
*net
)
514 return atomic_read(&net
->ipv4
.rt_genid
);
517 #if IS_ENABLED(CONFIG_IPV6)
518 static inline int rt_genid_ipv6(const struct net
*net
)
520 return atomic_read(&net
->ipv6
.fib6_sernum
);
524 static inline void rt_genid_bump_ipv4(struct net
*net
)
526 atomic_inc(&net
->ipv4
.rt_genid
);
529 extern void (*__fib6_flush_trees
)(struct net
*net
);
530 static inline void rt_genid_bump_ipv6(struct net
*net
)
532 if (__fib6_flush_trees
)
533 __fib6_flush_trees(net
);
536 #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
537 static inline struct netns_ieee802154_lowpan
*
538 net_ieee802154_lowpan(struct net
*net
)
540 return &net
->ieee802154_lowpan
;
544 /* For callers who don't really care about whether it's IPv4 or IPv6 */
545 static inline void rt_genid_bump_all(struct net
*net
)
547 rt_genid_bump_ipv4(net
);
548 rt_genid_bump_ipv6(net
);
551 static inline int fnhe_genid(const struct net
*net
)
553 return atomic_read(&net
->fnhe_genid
);
556 static inline void fnhe_genid_bump(struct net
*net
)
558 atomic_inc(&net
->fnhe_genid
);
562 void net_ns_init(void);
564 static inline void net_ns_init(void) {}
567 #endif /* __NET_NET_NAMESPACE_H */