1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright (c) 2019, Tessares SA.
8 #include <linux/sysctl.h>
11 #include <net/net_namespace.h>
12 #include <net/netns/generic.h>
17 #define MPTCP_SYSCTL_PATH "net/mptcp"
19 static int mptcp_pernet_id
;
22 static int mptcp_pm_type_max
= __MPTCP_PM_TYPE_MAX
;
27 struct ctl_table_header
*ctl_table_hdr
;
30 unsigned int add_addr_timeout
;
31 unsigned int blackhole_timeout
;
32 unsigned int close_timeout
;
33 unsigned int stale_loss_cnt
;
34 atomic_t active_disable_times
;
35 unsigned long active_disable_stamp
;
38 u8 allow_join_initial_addr_port
;
40 char scheduler
[MPTCP_SCHED_NAME_MAX
];
43 static struct mptcp_pernet
*mptcp_get_pernet(const struct net
*net
)
45 return net_generic(net
, mptcp_pernet_id
);
48 int mptcp_is_enabled(const struct net
*net
)
50 return mptcp_get_pernet(net
)->mptcp_enabled
;
53 unsigned int mptcp_get_add_addr_timeout(const struct net
*net
)
55 return mptcp_get_pernet(net
)->add_addr_timeout
;
58 int mptcp_is_checksum_enabled(const struct net
*net
)
60 return mptcp_get_pernet(net
)->checksum_enabled
;
63 int mptcp_allow_join_id0(const struct net
*net
)
65 return mptcp_get_pernet(net
)->allow_join_initial_addr_port
;
68 unsigned int mptcp_stale_loss_cnt(const struct net
*net
)
70 return mptcp_get_pernet(net
)->stale_loss_cnt
;
73 unsigned int mptcp_close_timeout(const struct sock
*sk
)
75 if (sock_flag(sk
, SOCK_DEAD
))
76 return TCP_TIMEWAIT_LEN
;
77 return mptcp_get_pernet(sock_net(sk
))->close_timeout
;
80 int mptcp_get_pm_type(const struct net
*net
)
82 return mptcp_get_pernet(net
)->pm_type
;
85 const char *mptcp_get_scheduler(const struct net
*net
)
87 return mptcp_get_pernet(net
)->scheduler
;
90 static void mptcp_pernet_set_defaults(struct mptcp_pernet
*pernet
)
92 pernet
->mptcp_enabled
= 1;
93 pernet
->add_addr_timeout
= TCP_RTO_MAX
;
94 pernet
->blackhole_timeout
= 3600;
95 atomic_set(&pernet
->active_disable_times
, 0);
96 pernet
->close_timeout
= TCP_TIMEWAIT_LEN
;
97 pernet
->checksum_enabled
= 0;
98 pernet
->allow_join_initial_addr_port
= 1;
99 pernet
->stale_loss_cnt
= 4;
100 pernet
->pm_type
= MPTCP_PM_TYPE_KERNEL
;
101 strscpy(pernet
->scheduler
, "default", sizeof(pernet
->scheduler
));
105 static int mptcp_set_scheduler(const struct net
*net
, const char *name
)
107 struct mptcp_pernet
*pernet
= mptcp_get_pernet(net
);
108 struct mptcp_sched_ops
*sched
;
112 sched
= mptcp_sched_find(name
);
114 strscpy(pernet
->scheduler
, name
, MPTCP_SCHED_NAME_MAX
);
122 static int proc_scheduler(const struct ctl_table
*ctl
, int write
,
123 void *buffer
, size_t *lenp
, loff_t
*ppos
)
125 const struct net
*net
= current
->nsproxy
->net_ns
;
126 char val
[MPTCP_SCHED_NAME_MAX
];
127 struct ctl_table tbl
= {
129 .maxlen
= MPTCP_SCHED_NAME_MAX
,
133 strscpy(val
, mptcp_get_scheduler(net
), MPTCP_SCHED_NAME_MAX
);
135 ret
= proc_dostring(&tbl
, write
, buffer
, lenp
, ppos
);
136 if (write
&& ret
== 0)
137 ret
= mptcp_set_scheduler(net
, val
);
142 static int proc_available_schedulers(const struct ctl_table
*ctl
,
143 int write
, void *buffer
,
144 size_t *lenp
, loff_t
*ppos
)
146 struct ctl_table tbl
= { .maxlen
= MPTCP_SCHED_BUF_MAX
, };
149 tbl
.data
= kmalloc(tbl
.maxlen
, GFP_USER
);
153 mptcp_get_available_schedulers(tbl
.data
, MPTCP_SCHED_BUF_MAX
);
154 ret
= proc_dostring(&tbl
, write
, buffer
, lenp
, ppos
);
160 static int proc_blackhole_detect_timeout(const struct ctl_table
*table
,
161 int write
, void *buffer
, size_t *lenp
,
164 struct mptcp_pernet
*pernet
= mptcp_get_pernet(current
->nsproxy
->net_ns
);
167 ret
= proc_dointvec_minmax(table
, write
, buffer
, lenp
, ppos
);
168 if (write
&& ret
== 0)
169 atomic_set(&pernet
->active_disable_times
, 0);
174 static struct ctl_table mptcp_sysctl_table
[] = {
176 .procname
= "enabled",
177 .maxlen
= sizeof(u8
),
179 /* users with CAP_NET_ADMIN or root (not and) can change this
180 * value, same as other sysctl or the 'net' tree.
182 .proc_handler
= proc_dou8vec_minmax
,
183 .extra1
= SYSCTL_ZERO
,
187 .procname
= "add_addr_timeout",
188 .maxlen
= sizeof(unsigned int),
190 .proc_handler
= proc_dointvec_jiffies
,
193 .procname
= "checksum_enabled",
194 .maxlen
= sizeof(u8
),
196 .proc_handler
= proc_dou8vec_minmax
,
197 .extra1
= SYSCTL_ZERO
,
201 .procname
= "allow_join_initial_addr_port",
202 .maxlen
= sizeof(u8
),
204 .proc_handler
= proc_dou8vec_minmax
,
205 .extra1
= SYSCTL_ZERO
,
209 .procname
= "stale_loss_cnt",
210 .maxlen
= sizeof(unsigned int),
212 .proc_handler
= proc_douintvec_minmax
,
215 .procname
= "pm_type",
216 .maxlen
= sizeof(u8
),
218 .proc_handler
= proc_dou8vec_minmax
,
219 .extra1
= SYSCTL_ZERO
,
220 .extra2
= &mptcp_pm_type_max
223 .procname
= "scheduler",
224 .maxlen
= MPTCP_SCHED_NAME_MAX
,
226 .proc_handler
= proc_scheduler
,
229 .procname
= "available_schedulers",
230 .maxlen
= MPTCP_SCHED_BUF_MAX
,
232 .proc_handler
= proc_available_schedulers
,
235 .procname
= "close_timeout",
236 .maxlen
= sizeof(unsigned int),
238 .proc_handler
= proc_dointvec_jiffies
,
241 .procname
= "blackhole_timeout",
242 .maxlen
= sizeof(unsigned int),
244 .proc_handler
= proc_blackhole_detect_timeout
,
245 .extra1
= SYSCTL_ZERO
,
249 static int mptcp_pernet_new_table(struct net
*net
, struct mptcp_pernet
*pernet
)
251 struct ctl_table_header
*hdr
;
252 struct ctl_table
*table
;
254 table
= mptcp_sysctl_table
;
255 if (!net_eq(net
, &init_net
)) {
256 table
= kmemdup(table
, sizeof(mptcp_sysctl_table
), GFP_KERNEL
);
261 table
[0].data
= &pernet
->mptcp_enabled
;
262 table
[1].data
= &pernet
->add_addr_timeout
;
263 table
[2].data
= &pernet
->checksum_enabled
;
264 table
[3].data
= &pernet
->allow_join_initial_addr_port
;
265 table
[4].data
= &pernet
->stale_loss_cnt
;
266 table
[5].data
= &pernet
->pm_type
;
267 table
[6].data
= &pernet
->scheduler
;
268 /* table[7] is for available_schedulers which is read-only info */
269 table
[8].data
= &pernet
->close_timeout
;
270 table
[9].data
= &pernet
->blackhole_timeout
;
272 hdr
= register_net_sysctl_sz(net
, MPTCP_SYSCTL_PATH
, table
,
273 ARRAY_SIZE(mptcp_sysctl_table
));
277 pernet
->ctl_table_hdr
= hdr
;
282 if (!net_eq(net
, &init_net
))
288 static void mptcp_pernet_del_table(struct mptcp_pernet
*pernet
)
290 const struct ctl_table
*table
= pernet
->ctl_table_hdr
->ctl_table_arg
;
292 unregister_net_sysctl_table(pernet
->ctl_table_hdr
);
299 static int mptcp_pernet_new_table(struct net
*net
, struct mptcp_pernet
*pernet
)
304 static void mptcp_pernet_del_table(struct mptcp_pernet
*pernet
) {}
306 #endif /* CONFIG_SYSCTL */
308 /* The following code block is to deal with middle box issues with MPTCP,
309 * similar to what is done with TFO.
310 * The proposed solution is to disable active MPTCP globally when SYN+MPC are
311 * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
312 * disabled globally for 1hr at first. Then if it happens again, it is disabled
313 * for 2h, then 4h, 8h, ...
314 * The timeout is reset back to 1hr when a successful active MPTCP connection is
318 /* Disable active MPTCP and record current jiffies and active_disable_times */
319 void mptcp_active_disable(struct sock
*sk
)
321 struct net
*net
= sock_net(sk
);
322 struct mptcp_pernet
*pernet
;
324 pernet
= mptcp_get_pernet(net
);
326 if (!READ_ONCE(pernet
->blackhole_timeout
))
329 /* Paired with READ_ONCE() in mptcp_active_should_disable() */
330 WRITE_ONCE(pernet
->active_disable_stamp
, jiffies
);
332 /* Paired with smp_rmb() in mptcp_active_should_disable().
333 * We want pernet->active_disable_stamp to be updated first.
335 smp_mb__before_atomic();
336 atomic_inc(&pernet
->active_disable_times
);
338 MPTCP_INC_STATS(net
, MPTCP_MIB_BLACKHOLE
);
341 /* Calculate timeout for MPTCP active disable
342 * Return true if we are still in the active MPTCP disable period
343 * Return false if timeout already expired and we should use active MPTCP
345 bool mptcp_active_should_disable(struct sock
*ssk
)
347 struct net
*net
= sock_net(ssk
);
348 unsigned int blackhole_timeout
;
349 struct mptcp_pernet
*pernet
;
350 unsigned long timeout
;
354 pernet
= mptcp_get_pernet(net
);
355 blackhole_timeout
= READ_ONCE(pernet
->blackhole_timeout
);
357 if (!blackhole_timeout
)
360 disable_times
= atomic_read(&pernet
->active_disable_times
);
364 /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
367 /* Limit timeout to max: 2^6 * initial timeout */
368 multiplier
= 1 << min(disable_times
- 1, 6);
370 /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
371 timeout
= READ_ONCE(pernet
->active_disable_stamp
) +
372 multiplier
* blackhole_timeout
* HZ
;
374 return time_before(jiffies
, timeout
);
377 /* Enable active MPTCP and reset active_disable_times if needed */
378 void mptcp_active_enable(struct sock
*sk
)
380 struct mptcp_pernet
*pernet
= mptcp_get_pernet(sock_net(sk
));
382 if (atomic_read(&pernet
->active_disable_times
)) {
383 struct dst_entry
*dst
= sk_dst_get(sk
);
385 if (dst
&& dst
->dev
&& (dst
->dev
->flags
& IFF_LOOPBACK
))
386 atomic_set(&pernet
->active_disable_times
, 0);
390 /* Check the number of retransmissions, and fallback to TCP if needed */
391 void mptcp_active_detect_blackhole(struct sock
*ssk
, bool expired
)
393 struct mptcp_subflow_context
*subflow
;
396 if (!sk_is_mptcp(ssk
))
399 timeouts
= inet_csk(ssk
)->icsk_retransmits
;
400 subflow
= mptcp_subflow_ctx(ssk
);
402 if (subflow
->request_mptcp
&& ssk
->sk_state
== TCP_SYN_SENT
) {
403 if (timeouts
== 2 || (timeouts
< 2 && expired
)) {
404 MPTCP_INC_STATS(sock_net(ssk
), MPTCP_MIB_MPCAPABLEACTIVEDROP
);
405 subflow
->mpc_drop
= 1;
406 mptcp_subflow_early_fallback(mptcp_sk(subflow
->conn
), subflow
);
408 subflow
->mpc_drop
= 0;
413 static int __net_init
mptcp_net_init(struct net
*net
)
415 struct mptcp_pernet
*pernet
= mptcp_get_pernet(net
);
417 mptcp_pernet_set_defaults(pernet
);
419 return mptcp_pernet_new_table(net
, pernet
);
422 /* Note: the callback will only be called per extra netns */
423 static void __net_exit
mptcp_net_exit(struct net
*net
)
425 struct mptcp_pernet
*pernet
= mptcp_get_pernet(net
);
427 mptcp_pernet_del_table(pernet
);
430 static struct pernet_operations mptcp_pernet_ops
= {
431 .init
= mptcp_net_init
,
432 .exit
= mptcp_net_exit
,
433 .id
= &mptcp_pernet_id
,
434 .size
= sizeof(struct mptcp_pernet
),
437 void __init
mptcp_init(void)
439 mptcp_join_cookie_init();
442 if (register_pernet_subsys(&mptcp_pernet_ops
) < 0)
443 panic("Failed to register MPTCP pernet subsystem.\n");
446 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
447 int __init
mptcpv6_init(void)
451 err
= mptcp_proto_v6_init();