1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright (c) 2019, Tessares SA.
8 #include <linux/sysctl.h>
11 #include <net/net_namespace.h>
12 #include <net/netns/generic.h>
17 #define MPTCP_SYSCTL_PATH "net/mptcp"
19 static int mptcp_pernet_id
;
22 static int mptcp_pm_type_max
= __MPTCP_PM_TYPE_MAX
;
27 struct ctl_table_header
*ctl_table_hdr
;
30 unsigned int add_addr_timeout
;
31 unsigned int blackhole_timeout
;
32 unsigned int close_timeout
;
33 unsigned int stale_loss_cnt
;
34 atomic_t active_disable_times
;
35 unsigned long active_disable_stamp
;
38 u8 allow_join_initial_addr_port
;
40 char scheduler
[MPTCP_SCHED_NAME_MAX
];
43 static struct mptcp_pernet
*mptcp_get_pernet(const struct net
*net
)
45 return net_generic(net
, mptcp_pernet_id
);
48 int mptcp_is_enabled(const struct net
*net
)
50 return mptcp_get_pernet(net
)->mptcp_enabled
;
53 unsigned int mptcp_get_add_addr_timeout(const struct net
*net
)
55 return mptcp_get_pernet(net
)->add_addr_timeout
;
58 int mptcp_is_checksum_enabled(const struct net
*net
)
60 return mptcp_get_pernet(net
)->checksum_enabled
;
63 int mptcp_allow_join_id0(const struct net
*net
)
65 return mptcp_get_pernet(net
)->allow_join_initial_addr_port
;
68 unsigned int mptcp_stale_loss_cnt(const struct net
*net
)
70 return mptcp_get_pernet(net
)->stale_loss_cnt
;
73 unsigned int mptcp_close_timeout(const struct sock
*sk
)
75 if (sock_flag(sk
, SOCK_DEAD
))
76 return TCP_TIMEWAIT_LEN
;
77 return mptcp_get_pernet(sock_net(sk
))->close_timeout
;
80 int mptcp_get_pm_type(const struct net
*net
)
82 return mptcp_get_pernet(net
)->pm_type
;
85 const char *mptcp_get_scheduler(const struct net
*net
)
87 return mptcp_get_pernet(net
)->scheduler
;
90 static void mptcp_pernet_set_defaults(struct mptcp_pernet
*pernet
)
92 pernet
->mptcp_enabled
= 1;
93 pernet
->add_addr_timeout
= TCP_RTO_MAX
;
94 pernet
->blackhole_timeout
= 3600;
95 atomic_set(&pernet
->active_disable_times
, 0);
96 pernet
->close_timeout
= TCP_TIMEWAIT_LEN
;
97 pernet
->checksum_enabled
= 0;
98 pernet
->allow_join_initial_addr_port
= 1;
99 pernet
->stale_loss_cnt
= 4;
100 pernet
->pm_type
= MPTCP_PM_TYPE_KERNEL
;
101 strscpy(pernet
->scheduler
, "default", sizeof(pernet
->scheduler
));
105 static int mptcp_set_scheduler(char *scheduler
, const char *name
)
107 struct mptcp_sched_ops
*sched
;
111 sched
= mptcp_sched_find(name
);
113 strscpy(scheduler
, name
, MPTCP_SCHED_NAME_MAX
);
121 static int proc_scheduler(const struct ctl_table
*ctl
, int write
,
122 void *buffer
, size_t *lenp
, loff_t
*ppos
)
124 char (*scheduler
)[MPTCP_SCHED_NAME_MAX
] = ctl
->data
;
125 char val
[MPTCP_SCHED_NAME_MAX
];
126 struct ctl_table tbl
= {
128 .maxlen
= MPTCP_SCHED_NAME_MAX
,
132 strscpy(val
, *scheduler
, MPTCP_SCHED_NAME_MAX
);
134 ret
= proc_dostring(&tbl
, write
, buffer
, lenp
, ppos
);
135 if (write
&& ret
== 0)
136 ret
= mptcp_set_scheduler(*scheduler
, val
);
141 static int proc_available_schedulers(const struct ctl_table
*ctl
,
142 int write
, void *buffer
,
143 size_t *lenp
, loff_t
*ppos
)
145 struct ctl_table tbl
= { .maxlen
= MPTCP_SCHED_BUF_MAX
, };
148 tbl
.data
= kmalloc(tbl
.maxlen
, GFP_USER
);
152 mptcp_get_available_schedulers(tbl
.data
, MPTCP_SCHED_BUF_MAX
);
153 ret
= proc_dostring(&tbl
, write
, buffer
, lenp
, ppos
);
159 static int proc_blackhole_detect_timeout(const struct ctl_table
*table
,
160 int write
, void *buffer
, size_t *lenp
,
163 struct mptcp_pernet
*pernet
= container_of(table
->data
,
168 ret
= proc_dointvec_minmax(table
, write
, buffer
, lenp
, ppos
);
169 if (write
&& ret
== 0)
170 atomic_set(&pernet
->active_disable_times
, 0);
175 static struct ctl_table mptcp_sysctl_table
[] = {
177 .procname
= "enabled",
178 .maxlen
= sizeof(u8
),
180 /* users with CAP_NET_ADMIN or root (not and) can change this
181 * value, same as other sysctl or the 'net' tree.
183 .proc_handler
= proc_dou8vec_minmax
,
184 .extra1
= SYSCTL_ZERO
,
188 .procname
= "add_addr_timeout",
189 .maxlen
= sizeof(unsigned int),
191 .proc_handler
= proc_dointvec_jiffies
,
194 .procname
= "checksum_enabled",
195 .maxlen
= sizeof(u8
),
197 .proc_handler
= proc_dou8vec_minmax
,
198 .extra1
= SYSCTL_ZERO
,
202 .procname
= "allow_join_initial_addr_port",
203 .maxlen
= sizeof(u8
),
205 .proc_handler
= proc_dou8vec_minmax
,
206 .extra1
= SYSCTL_ZERO
,
210 .procname
= "stale_loss_cnt",
211 .maxlen
= sizeof(unsigned int),
213 .proc_handler
= proc_douintvec_minmax
,
216 .procname
= "pm_type",
217 .maxlen
= sizeof(u8
),
219 .proc_handler
= proc_dou8vec_minmax
,
220 .extra1
= SYSCTL_ZERO
,
221 .extra2
= &mptcp_pm_type_max
224 .procname
= "scheduler",
225 .maxlen
= MPTCP_SCHED_NAME_MAX
,
227 .proc_handler
= proc_scheduler
,
230 .procname
= "available_schedulers",
231 .maxlen
= MPTCP_SCHED_BUF_MAX
,
233 .proc_handler
= proc_available_schedulers
,
236 .procname
= "close_timeout",
237 .maxlen
= sizeof(unsigned int),
239 .proc_handler
= proc_dointvec_jiffies
,
242 .procname
= "blackhole_timeout",
243 .maxlen
= sizeof(unsigned int),
245 .proc_handler
= proc_blackhole_detect_timeout
,
246 .extra1
= SYSCTL_ZERO
,
250 static int mptcp_pernet_new_table(struct net
*net
, struct mptcp_pernet
*pernet
)
252 struct ctl_table_header
*hdr
;
253 struct ctl_table
*table
;
255 table
= mptcp_sysctl_table
;
256 if (!net_eq(net
, &init_net
)) {
257 table
= kmemdup(table
, sizeof(mptcp_sysctl_table
), GFP_KERNEL
);
262 table
[0].data
= &pernet
->mptcp_enabled
;
263 table
[1].data
= &pernet
->add_addr_timeout
;
264 table
[2].data
= &pernet
->checksum_enabled
;
265 table
[3].data
= &pernet
->allow_join_initial_addr_port
;
266 table
[4].data
= &pernet
->stale_loss_cnt
;
267 table
[5].data
= &pernet
->pm_type
;
268 table
[6].data
= &pernet
->scheduler
;
269 /* table[7] is for available_schedulers which is read-only info */
270 table
[8].data
= &pernet
->close_timeout
;
271 table
[9].data
= &pernet
->blackhole_timeout
;
273 hdr
= register_net_sysctl_sz(net
, MPTCP_SYSCTL_PATH
, table
,
274 ARRAY_SIZE(mptcp_sysctl_table
));
278 pernet
->ctl_table_hdr
= hdr
;
283 if (!net_eq(net
, &init_net
))
289 static void mptcp_pernet_del_table(struct mptcp_pernet
*pernet
)
291 const struct ctl_table
*table
= pernet
->ctl_table_hdr
->ctl_table_arg
;
293 unregister_net_sysctl_table(pernet
->ctl_table_hdr
);
300 static int mptcp_pernet_new_table(struct net
*net
, struct mptcp_pernet
*pernet
)
305 static void mptcp_pernet_del_table(struct mptcp_pernet
*pernet
) {}
307 #endif /* CONFIG_SYSCTL */
309 /* The following code block is to deal with middle box issues with MPTCP,
310 * similar to what is done with TFO.
311 * The proposed solution is to disable active MPTCP globally when SYN+MPC are
312 * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
313 * disabled globally for 1hr at first. Then if it happens again, it is disabled
314 * for 2h, then 4h, 8h, ...
315 * The timeout is reset back to 1hr when a successful active MPTCP connection is
319 /* Disable active MPTCP and record current jiffies and active_disable_times */
320 void mptcp_active_disable(struct sock
*sk
)
322 struct net
*net
= sock_net(sk
);
323 struct mptcp_pernet
*pernet
;
325 pernet
= mptcp_get_pernet(net
);
327 if (!READ_ONCE(pernet
->blackhole_timeout
))
330 /* Paired with READ_ONCE() in mptcp_active_should_disable() */
331 WRITE_ONCE(pernet
->active_disable_stamp
, jiffies
);
333 /* Paired with smp_rmb() in mptcp_active_should_disable().
334 * We want pernet->active_disable_stamp to be updated first.
336 smp_mb__before_atomic();
337 atomic_inc(&pernet
->active_disable_times
);
339 MPTCP_INC_STATS(net
, MPTCP_MIB_BLACKHOLE
);
342 /* Calculate timeout for MPTCP active disable
343 * Return true if we are still in the active MPTCP disable period
344 * Return false if timeout already expired and we should use active MPTCP
346 bool mptcp_active_should_disable(struct sock
*ssk
)
348 struct net
*net
= sock_net(ssk
);
349 unsigned int blackhole_timeout
;
350 struct mptcp_pernet
*pernet
;
351 unsigned long timeout
;
355 pernet
= mptcp_get_pernet(net
);
356 blackhole_timeout
= READ_ONCE(pernet
->blackhole_timeout
);
358 if (!blackhole_timeout
)
361 disable_times
= atomic_read(&pernet
->active_disable_times
);
365 /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
368 /* Limit timeout to max: 2^6 * initial timeout */
369 multiplier
= 1 << min(disable_times
- 1, 6);
371 /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
372 timeout
= READ_ONCE(pernet
->active_disable_stamp
) +
373 multiplier
* blackhole_timeout
* HZ
;
375 return time_before(jiffies
, timeout
);
378 /* Enable active MPTCP and reset active_disable_times if needed */
379 void mptcp_active_enable(struct sock
*sk
)
381 struct mptcp_pernet
*pernet
= mptcp_get_pernet(sock_net(sk
));
383 if (atomic_read(&pernet
->active_disable_times
)) {
384 struct dst_entry
*dst
= sk_dst_get(sk
);
386 if (dst
&& dst
->dev
&& (dst
->dev
->flags
& IFF_LOOPBACK
))
387 atomic_set(&pernet
->active_disable_times
, 0);
391 /* Check the number of retransmissions, and fallback to TCP if needed */
392 void mptcp_active_detect_blackhole(struct sock
*ssk
, bool expired
)
394 struct mptcp_subflow_context
*subflow
;
397 if (!sk_is_mptcp(ssk
))
400 timeouts
= inet_csk(ssk
)->icsk_retransmits
;
401 subflow
= mptcp_subflow_ctx(ssk
);
403 if (subflow
->request_mptcp
&& ssk
->sk_state
== TCP_SYN_SENT
) {
404 if (timeouts
== 2 || (timeouts
< 2 && expired
)) {
405 MPTCP_INC_STATS(sock_net(ssk
), MPTCP_MIB_MPCAPABLEACTIVEDROP
);
406 subflow
->mpc_drop
= 1;
407 mptcp_subflow_early_fallback(mptcp_sk(subflow
->conn
), subflow
);
409 subflow
->mpc_drop
= 0;
414 static int __net_init
mptcp_net_init(struct net
*net
)
416 struct mptcp_pernet
*pernet
= mptcp_get_pernet(net
);
418 mptcp_pernet_set_defaults(pernet
);
420 return mptcp_pernet_new_table(net
, pernet
);
423 /* Note: the callback will only be called per extra netns */
424 static void __net_exit
mptcp_net_exit(struct net
*net
)
426 struct mptcp_pernet
*pernet
= mptcp_get_pernet(net
);
428 mptcp_pernet_del_table(pernet
);
431 static struct pernet_operations mptcp_pernet_ops
= {
432 .init
= mptcp_net_init
,
433 .exit
= mptcp_net_exit
,
434 .id
= &mptcp_pernet_id
,
435 .size
= sizeof(struct mptcp_pernet
),
438 void __init
mptcp_init(void)
440 mptcp_join_cookie_init();
443 if (register_pernet_subsys(&mptcp_pernet_ops
) < 0)
444 panic("Failed to register MPTCP pernet subsystem.\n");
447 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
448 int __init
mptcpv6_init(void)
452 err
= mptcp_proto_v6_init();