1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright (c) 2019, Intel Corporation.
6 #define pr_fmt(fmt) "MPTCP: " fmt
8 #include <linux/kernel.h>
14 /* path manager command handlers */
16 int mptcp_pm_announce_addr(struct mptcp_sock
*msk
,
17 const struct mptcp_addr_info
*addr
,
20 u8 add_addr
= READ_ONCE(msk
->pm
.addr_signal
);
22 pr_debug("msk=%p, local_id=%d, echo=%d\n", msk
, addr
->id
, echo
);
24 lockdep_assert_held(&msk
->pm
.lock
);
27 (echo
? BIT(MPTCP_ADD_ADDR_ECHO
) : BIT(MPTCP_ADD_ADDR_SIGNAL
))) {
28 MPTCP_INC_STATS(sock_net((struct sock
*)msk
),
29 echo
? MPTCP_MIB_ECHOADDTXDROP
: MPTCP_MIB_ADDADDRTXDROP
);
34 msk
->pm
.remote
= *addr
;
35 add_addr
|= BIT(MPTCP_ADD_ADDR_ECHO
);
37 msk
->pm
.local
= *addr
;
38 add_addr
|= BIT(MPTCP_ADD_ADDR_SIGNAL
);
40 WRITE_ONCE(msk
->pm
.addr_signal
, add_addr
);
44 int mptcp_pm_remove_addr(struct mptcp_sock
*msk
, const struct mptcp_rm_list
*rm_list
)
46 u8 rm_addr
= READ_ONCE(msk
->pm
.addr_signal
);
48 pr_debug("msk=%p, rm_list_nr=%d\n", msk
, rm_list
->nr
);
51 MPTCP_ADD_STATS(sock_net((struct sock
*)msk
),
52 MPTCP_MIB_RMADDRTXDROP
, rm_list
->nr
);
56 msk
->pm
.rm_list_tx
= *rm_list
;
57 rm_addr
|= BIT(MPTCP_RM_ADDR_SIGNAL
);
58 WRITE_ONCE(msk
->pm
.addr_signal
, rm_addr
);
59 mptcp_pm_nl_addr_send_ack(msk
);
63 /* path manager event handlers */
65 void mptcp_pm_new_connection(struct mptcp_sock
*msk
, const struct sock
*ssk
, int server_side
)
67 struct mptcp_pm_data
*pm
= &msk
->pm
;
69 pr_debug("msk=%p, token=%u side=%d\n", msk
, READ_ONCE(msk
->token
), server_side
);
71 WRITE_ONCE(pm
->server_side
, server_side
);
72 mptcp_event(MPTCP_EVENT_CREATED
, msk
, ssk
, GFP_ATOMIC
);
75 bool mptcp_pm_allow_new_subflow(struct mptcp_sock
*msk
)
77 struct mptcp_pm_data
*pm
= &msk
->pm
;
78 unsigned int subflows_max
;
81 if (mptcp_pm_is_userspace(msk
)) {
82 if (mptcp_userspace_pm_active(msk
)) {
83 spin_lock_bh(&pm
->lock
);
85 spin_unlock_bh(&pm
->lock
);
91 subflows_max
= mptcp_pm_get_subflows_max(msk
);
93 pr_debug("msk=%p subflows=%d max=%d allow=%d\n", msk
, pm
->subflows
,
94 subflows_max
, READ_ONCE(pm
->accept_subflow
));
96 /* try to avoid acquiring the lock below */
97 if (!READ_ONCE(pm
->accept_subflow
))
100 spin_lock_bh(&pm
->lock
);
101 if (READ_ONCE(pm
->accept_subflow
)) {
102 ret
= pm
->subflows
< subflows_max
;
103 if (ret
&& ++pm
->subflows
== subflows_max
)
104 WRITE_ONCE(pm
->accept_subflow
, false);
106 spin_unlock_bh(&pm
->lock
);
111 /* return true if the new status bit is currently cleared, that is, this event
112 * can be server, eventually by an already scheduled work
114 static bool mptcp_pm_schedule_work(struct mptcp_sock
*msk
,
115 enum mptcp_pm_status new_status
)
117 pr_debug("msk=%p status=%x new=%lx\n", msk
, msk
->pm
.status
,
119 if (msk
->pm
.status
& BIT(new_status
))
122 msk
->pm
.status
|= BIT(new_status
);
123 mptcp_schedule_work((struct sock
*)msk
);
127 void mptcp_pm_fully_established(struct mptcp_sock
*msk
, const struct sock
*ssk
)
129 struct mptcp_pm_data
*pm
= &msk
->pm
;
130 bool announce
= false;
132 pr_debug("msk=%p\n", msk
);
134 spin_lock_bh(&pm
->lock
);
136 /* mptcp_pm_fully_established() can be invoked by multiple
137 * racing paths - accept() and check_fully_established()
138 * be sure to serve this event only once.
140 if (READ_ONCE(pm
->work_pending
) &&
141 !(msk
->pm
.status
& BIT(MPTCP_PM_ALREADY_ESTABLISHED
)))
142 mptcp_pm_schedule_work(msk
, MPTCP_PM_ESTABLISHED
);
144 if ((msk
->pm
.status
& BIT(MPTCP_PM_ALREADY_ESTABLISHED
)) == 0)
147 msk
->pm
.status
|= BIT(MPTCP_PM_ALREADY_ESTABLISHED
);
148 spin_unlock_bh(&pm
->lock
);
151 mptcp_event(MPTCP_EVENT_ESTABLISHED
, msk
, ssk
, GFP_ATOMIC
);
154 void mptcp_pm_connection_closed(struct mptcp_sock
*msk
)
156 pr_debug("msk=%p\n", msk
);
159 mptcp_event(MPTCP_EVENT_CLOSED
, msk
, NULL
, GFP_KERNEL
);
162 void mptcp_pm_subflow_established(struct mptcp_sock
*msk
)
164 struct mptcp_pm_data
*pm
= &msk
->pm
;
166 pr_debug("msk=%p\n", msk
);
168 if (!READ_ONCE(pm
->work_pending
))
171 spin_lock_bh(&pm
->lock
);
173 if (READ_ONCE(pm
->work_pending
))
174 mptcp_pm_schedule_work(msk
, MPTCP_PM_SUBFLOW_ESTABLISHED
);
176 spin_unlock_bh(&pm
->lock
);
179 void mptcp_pm_subflow_check_next(struct mptcp_sock
*msk
,
180 const struct mptcp_subflow_context
*subflow
)
182 struct mptcp_pm_data
*pm
= &msk
->pm
;
183 bool update_subflows
;
185 update_subflows
= subflow
->request_join
|| subflow
->mp_join
;
186 if (mptcp_pm_is_userspace(msk
)) {
187 if (update_subflows
) {
188 spin_lock_bh(&pm
->lock
);
190 spin_unlock_bh(&pm
->lock
);
195 if (!READ_ONCE(pm
->work_pending
) && !update_subflows
)
198 spin_lock_bh(&pm
->lock
);
200 __mptcp_pm_close_subflow(msk
);
202 /* Even if this subflow is not really established, tell the PM to try
203 * to pick the next ones, if possible.
205 if (mptcp_pm_nl_check_work_pending(msk
))
206 mptcp_pm_schedule_work(msk
, MPTCP_PM_SUBFLOW_ESTABLISHED
);
208 spin_unlock_bh(&pm
->lock
);
211 void mptcp_pm_add_addr_received(const struct sock
*ssk
,
212 const struct mptcp_addr_info
*addr
)
214 struct mptcp_subflow_context
*subflow
= mptcp_subflow_ctx(ssk
);
215 struct mptcp_sock
*msk
= mptcp_sk(subflow
->conn
);
216 struct mptcp_pm_data
*pm
= &msk
->pm
;
218 pr_debug("msk=%p remote_id=%d accept=%d\n", msk
, addr
->id
,
219 READ_ONCE(pm
->accept_addr
));
221 mptcp_event_addr_announced(ssk
, addr
);
223 spin_lock_bh(&pm
->lock
);
225 if (mptcp_pm_is_userspace(msk
)) {
226 if (mptcp_userspace_pm_active(msk
)) {
227 mptcp_pm_announce_addr(msk
, addr
, true);
228 mptcp_pm_add_addr_send_ack(msk
);
230 __MPTCP_INC_STATS(sock_net((struct sock
*)msk
), MPTCP_MIB_ADDADDRDROP
);
232 /* id0 should not have a different address */
233 } else if ((addr
->id
== 0 && !mptcp_pm_nl_is_init_remote_addr(msk
, addr
)) ||
234 (addr
->id
> 0 && !READ_ONCE(pm
->accept_addr
))) {
235 mptcp_pm_announce_addr(msk
, addr
, true);
236 mptcp_pm_add_addr_send_ack(msk
);
237 } else if (mptcp_pm_schedule_work(msk
, MPTCP_PM_ADD_ADDR_RECEIVED
)) {
240 __MPTCP_INC_STATS(sock_net((struct sock
*)msk
), MPTCP_MIB_ADDADDRDROP
);
243 spin_unlock_bh(&pm
->lock
);
246 void mptcp_pm_add_addr_echoed(struct mptcp_sock
*msk
,
247 const struct mptcp_addr_info
*addr
)
249 struct mptcp_pm_data
*pm
= &msk
->pm
;
251 pr_debug("msk=%p\n", msk
);
253 spin_lock_bh(&pm
->lock
);
255 if (mptcp_lookup_anno_list_by_saddr(msk
, addr
) && READ_ONCE(pm
->work_pending
))
256 mptcp_pm_schedule_work(msk
, MPTCP_PM_SUBFLOW_ESTABLISHED
);
258 spin_unlock_bh(&pm
->lock
);
261 void mptcp_pm_add_addr_send_ack(struct mptcp_sock
*msk
)
263 if (!mptcp_pm_should_add_signal(msk
))
266 mptcp_pm_schedule_work(msk
, MPTCP_PM_ADD_ADDR_SEND_ACK
);
269 void mptcp_pm_rm_addr_received(struct mptcp_sock
*msk
,
270 const struct mptcp_rm_list
*rm_list
)
272 struct mptcp_pm_data
*pm
= &msk
->pm
;
275 pr_debug("msk=%p remote_ids_nr=%d\n", msk
, rm_list
->nr
);
277 for (i
= 0; i
< rm_list
->nr
; i
++)
278 mptcp_event_addr_removed(msk
, rm_list
->ids
[i
]);
280 spin_lock_bh(&pm
->lock
);
281 if (mptcp_pm_schedule_work(msk
, MPTCP_PM_RM_ADDR_RECEIVED
))
282 pm
->rm_list_rx
= *rm_list
;
284 __MPTCP_INC_STATS(sock_net((struct sock
*)msk
), MPTCP_MIB_RMADDRDROP
);
285 spin_unlock_bh(&pm
->lock
);
288 void mptcp_pm_mp_prio_received(struct sock
*ssk
, u8 bkup
)
290 struct mptcp_subflow_context
*subflow
= mptcp_subflow_ctx(ssk
);
291 struct sock
*sk
= subflow
->conn
;
292 struct mptcp_sock
*msk
;
294 pr_debug("subflow->backup=%d, bkup=%d\n", subflow
->backup
, bkup
);
296 if (subflow
->backup
!= bkup
)
297 subflow
->backup
= bkup
;
299 mptcp_event(MPTCP_EVENT_SUB_PRIORITY
, msk
, ssk
, GFP_ATOMIC
);
302 void mptcp_pm_mp_fail_received(struct sock
*sk
, u64 fail_seq
)
304 struct mptcp_subflow_context
*subflow
= mptcp_subflow_ctx(sk
);
305 struct mptcp_sock
*msk
= mptcp_sk(subflow
->conn
);
307 pr_debug("fail_seq=%llu\n", fail_seq
);
309 if (!READ_ONCE(msk
->allow_infinite_fallback
))
312 if (!subflow
->fail_tout
) {
313 pr_debug("send MP_FAIL response and infinite map\n");
315 subflow
->send_mp_fail
= 1;
316 subflow
->send_infinite_map
= 1;
319 pr_debug("MP_FAIL response received\n");
320 WRITE_ONCE(subflow
->fail_tout
, 0);
324 /* path manager helpers */
326 bool mptcp_pm_add_addr_signal(struct mptcp_sock
*msk
, const struct sk_buff
*skb
,
327 unsigned int opt_size
, unsigned int remaining
,
328 struct mptcp_addr_info
*addr
, bool *echo
,
329 bool *drop_other_suboptions
)
336 spin_lock_bh(&msk
->pm
.lock
);
338 /* double check after the lock is acquired */
339 if (!mptcp_pm_should_add_signal(msk
))
342 /* always drop every other options for pure ack ADD_ADDR; this is a
343 * plain dup-ack from TCP perspective. The other MPTCP-relevant info,
344 * if any, will be carried by the 'original' TCP ack
346 if (skb
&& skb_is_tcp_pure_ack(skb
)) {
347 remaining
+= opt_size
;
348 *drop_other_suboptions
= true;
351 *echo
= mptcp_pm_should_add_signal_echo(msk
);
352 port
= !!(*echo
? msk
->pm
.remote
.port
: msk
->pm
.local
.port
);
354 family
= *echo
? msk
->pm
.remote
.family
: msk
->pm
.local
.family
;
355 if (remaining
< mptcp_add_addr_len(family
, *echo
, port
))
359 *addr
= msk
->pm
.remote
;
360 add_addr
= msk
->pm
.addr_signal
& ~BIT(MPTCP_ADD_ADDR_ECHO
);
362 *addr
= msk
->pm
.local
;
363 add_addr
= msk
->pm
.addr_signal
& ~BIT(MPTCP_ADD_ADDR_SIGNAL
);
365 WRITE_ONCE(msk
->pm
.addr_signal
, add_addr
);
369 spin_unlock_bh(&msk
->pm
.lock
);
373 bool mptcp_pm_rm_addr_signal(struct mptcp_sock
*msk
, unsigned int remaining
,
374 struct mptcp_rm_list
*rm_list
)
376 int ret
= false, len
;
379 spin_lock_bh(&msk
->pm
.lock
);
381 /* double check after the lock is acquired */
382 if (!mptcp_pm_should_rm_signal(msk
))
385 rm_addr
= msk
->pm
.addr_signal
& ~BIT(MPTCP_RM_ADDR_SIGNAL
);
386 len
= mptcp_rm_addr_len(&msk
->pm
.rm_list_tx
);
388 WRITE_ONCE(msk
->pm
.addr_signal
, rm_addr
);
394 *rm_list
= msk
->pm
.rm_list_tx
;
395 WRITE_ONCE(msk
->pm
.addr_signal
, rm_addr
);
399 spin_unlock_bh(&msk
->pm
.lock
);
403 int mptcp_pm_get_local_id(struct mptcp_sock
*msk
, struct sock_common
*skc
)
405 struct mptcp_addr_info skc_local
;
406 struct mptcp_addr_info msk_local
;
408 if (WARN_ON_ONCE(!msk
))
411 /* The 0 ID mapping is defined by the first subflow, copied into the msk
414 mptcp_local_address((struct sock_common
*)msk
, &msk_local
);
415 mptcp_local_address((struct sock_common
*)skc
, &skc_local
);
416 if (mptcp_addresses_equal(&msk_local
, &skc_local
, false))
419 if (mptcp_pm_is_userspace(msk
))
420 return mptcp_userspace_pm_get_local_id(msk
, &skc_local
);
421 return mptcp_pm_nl_get_local_id(msk
, &skc_local
);
424 bool mptcp_pm_is_backup(struct mptcp_sock
*msk
, struct sock_common
*skc
)
426 struct mptcp_addr_info skc_local
;
428 mptcp_local_address((struct sock_common
*)skc
, &skc_local
);
430 if (mptcp_pm_is_userspace(msk
))
431 return mptcp_userspace_pm_is_backup(msk
, &skc_local
);
433 return mptcp_pm_nl_is_backup(msk
, &skc_local
);
436 int mptcp_pm_get_addr(struct sk_buff
*skb
, struct genl_info
*info
)
438 if (info
->attrs
[MPTCP_PM_ATTR_TOKEN
])
439 return mptcp_userspace_pm_get_addr(skb
, info
);
440 return mptcp_pm_nl_get_addr(skb
, info
);
443 int mptcp_pm_dump_addr(struct sk_buff
*msg
, struct netlink_callback
*cb
)
445 const struct genl_info
*info
= genl_info_dump(cb
);
447 if (info
->attrs
[MPTCP_PM_ATTR_TOKEN
])
448 return mptcp_userspace_pm_dump_addr(msg
, cb
);
449 return mptcp_pm_nl_dump_addr(msg
, cb
);
452 int mptcp_pm_set_flags(struct sk_buff
*skb
, struct genl_info
*info
)
454 if (info
->attrs
[MPTCP_PM_ATTR_TOKEN
])
455 return mptcp_userspace_pm_set_flags(skb
, info
);
456 return mptcp_pm_nl_set_flags(skb
, info
);
459 void mptcp_pm_subflow_chk_stale(const struct mptcp_sock
*msk
, struct sock
*ssk
)
461 struct mptcp_subflow_context
*subflow
= mptcp_subflow_ctx(ssk
);
462 u32 rcv_tstamp
= READ_ONCE(tcp_sk(ssk
)->rcv_tstamp
);
464 /* keep track of rtx periods with no progress */
465 if (!subflow
->stale_count
) {
466 subflow
->stale_rcv_tstamp
= rcv_tstamp
;
467 subflow
->stale_count
++;
468 } else if (subflow
->stale_rcv_tstamp
== rcv_tstamp
) {
469 if (subflow
->stale_count
< U8_MAX
)
470 subflow
->stale_count
++;
471 mptcp_pm_nl_subflow_chk_stale(msk
, ssk
);
473 subflow
->stale_count
= 0;
474 mptcp_subflow_set_active(subflow
);
478 /* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses,
479 * otherwise allow any matching local/remote pair
481 bool mptcp_pm_addr_families_match(const struct sock
*sk
,
482 const struct mptcp_addr_info
*loc
,
483 const struct mptcp_addr_info
*rem
)
485 bool mptcp_is_v4
= sk
->sk_family
== AF_INET
;
487 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
488 bool loc_is_v4
= loc
->family
== AF_INET
|| ipv6_addr_v4mapped(&loc
->addr6
);
489 bool rem_is_v4
= rem
->family
== AF_INET
|| ipv6_addr_v4mapped(&rem
->addr6
);
492 return loc_is_v4
&& rem_is_v4
;
494 if (ipv6_only_sock(sk
))
495 return !loc_is_v4
&& !rem_is_v4
;
497 return loc_is_v4
== rem_is_v4
;
499 return mptcp_is_v4
&& loc
->family
== AF_INET
&& rem
->family
== AF_INET
;
503 void mptcp_pm_data_reset(struct mptcp_sock
*msk
)
505 u8 pm_type
= mptcp_get_pm_type(sock_net((struct sock
*)msk
));
506 struct mptcp_pm_data
*pm
= &msk
->pm
;
508 pm
->add_addr_signaled
= 0;
509 pm
->add_addr_accepted
= 0;
510 pm
->local_addr_used
= 0;
512 pm
->rm_list_tx
.nr
= 0;
513 pm
->rm_list_rx
.nr
= 0;
514 WRITE_ONCE(pm
->pm_type
, pm_type
);
516 if (pm_type
== MPTCP_PM_TYPE_KERNEL
) {
517 bool subflows_allowed
= !!mptcp_pm_get_subflows_max(msk
);
519 /* pm->work_pending must be only be set to 'true' when
520 * pm->pm_type is set to MPTCP_PM_TYPE_KERNEL
522 WRITE_ONCE(pm
->work_pending
,
523 (!!mptcp_pm_get_local_addr_max(msk
) &&
525 !!mptcp_pm_get_add_addr_signal_max(msk
));
526 WRITE_ONCE(pm
->accept_addr
,
527 !!mptcp_pm_get_add_addr_accept_max(msk
) &&
529 WRITE_ONCE(pm
->accept_subflow
, subflows_allowed
);
531 WRITE_ONCE(pm
->work_pending
, 0);
532 WRITE_ONCE(pm
->accept_addr
, 0);
533 WRITE_ONCE(pm
->accept_subflow
, 0);
536 WRITE_ONCE(pm
->addr_signal
, 0);
537 WRITE_ONCE(pm
->remote_deny_join_id0
, false);
539 bitmap_fill(msk
->pm
.id_avail_bitmap
, MPTCP_PM_MAX_ADDR_ID
+ 1);
542 void mptcp_pm_data_init(struct mptcp_sock
*msk
)
544 spin_lock_init(&msk
->pm
.lock
);
545 INIT_LIST_HEAD(&msk
->pm
.anno_list
);
546 INIT_LIST_HEAD(&msk
->pm
.userspace_pm_local_addr_list
);
547 mptcp_pm_data_reset(msk
);
550 void __init
mptcp_pm_init(void)