1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright (c) 2017 - 2019, Intel Corporation.
7 #define pr_fmt(fmt) "MPTCP: " fmt
9 #include <linux/kernel.h>
11 #include <net/mptcp.h>
14 static bool mptcp_cap_flag_sha256(u8 flags
)
16 return (flags
& MPTCP_CAP_FLAG_MASK
) == MPTCP_CAP_HMAC_SHA256
;
19 static void mptcp_parse_option(const struct sk_buff
*skb
,
20 const unsigned char *ptr
, int opsize
,
21 struct mptcp_options_received
*mp_opt
)
23 u8 subtype
= *ptr
>> 4;
29 case MPTCPOPT_MP_CAPABLE
:
30 /* strict size checking */
31 if (!(TCP_SKB_CB(skb
)->tcp_flags
& TCPHDR_SYN
)) {
32 if (skb
->len
> tcp_hdr(skb
)->doff
<< 2)
33 expected_opsize
= TCPOLEN_MPTCP_MPC_ACK_DATA
;
35 expected_opsize
= TCPOLEN_MPTCP_MPC_ACK
;
37 if (TCP_SKB_CB(skb
)->tcp_flags
& TCPHDR_ACK
)
38 expected_opsize
= TCPOLEN_MPTCP_MPC_SYNACK
;
40 expected_opsize
= TCPOLEN_MPTCP_MPC_SYN
;
42 if (opsize
!= expected_opsize
)
45 /* try to be gentle vs future versions on the initial syn */
46 version
= *ptr
++ & MPTCP_VERSION_MASK
;
47 if (opsize
!= TCPOLEN_MPTCP_MPC_SYN
) {
48 if (version
!= MPTCP_SUPPORTED_VERSION
)
50 } else if (version
< MPTCP_SUPPORTED_VERSION
) {
55 if (!mptcp_cap_flag_sha256(flags
) ||
56 (flags
& MPTCP_CAP_EXTENSIBILITY
))
59 /* RFC 6824, Section 3.1:
60 * "For the Checksum Required bit (labeled "A"), if either
61 * host requires the use of checksums, checksums MUST be used.
62 * In other words, the only way for checksums not to be used
63 * is if both hosts in their SYNs set A=0."
66 * "If a checksum is not present when its use has been
67 * negotiated, the receiver MUST close the subflow with a RST as
68 * it is considered broken."
70 * We don't implement DSS checksum - fall back to TCP.
72 if (flags
& MPTCP_CAP_CHECKSUM_REQD
)
75 mp_opt
->mp_capable
= 1;
76 if (opsize
>= TCPOLEN_MPTCP_MPC_SYNACK
) {
77 mp_opt
->sndr_key
= get_unaligned_be64(ptr
);
80 if (opsize
>= TCPOLEN_MPTCP_MPC_ACK
) {
81 mp_opt
->rcvr_key
= get_unaligned_be64(ptr
);
84 if (opsize
== TCPOLEN_MPTCP_MPC_ACK_DATA
) {
86 * "the data parameters in a MP_CAPABLE are semantically
87 * equivalent to those in a DSS option and can be used
93 mp_opt
->data_len
= get_unaligned_be16(ptr
);
96 pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
97 version
, flags
, opsize
, mp_opt
->sndr_key
,
98 mp_opt
->rcvr_key
, mp_opt
->data_len
);
101 case MPTCPOPT_MP_JOIN
:
103 if (opsize
== TCPOLEN_MPTCP_MPJ_SYN
) {
104 mp_opt
->backup
= *ptr
++ & MPTCPOPT_BACKUP
;
105 mp_opt
->join_id
= *ptr
++;
106 mp_opt
->token
= get_unaligned_be32(ptr
);
108 mp_opt
->nonce
= get_unaligned_be32(ptr
);
110 pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u",
111 mp_opt
->backup
, mp_opt
->join_id
,
112 mp_opt
->token
, mp_opt
->nonce
);
113 } else if (opsize
== TCPOLEN_MPTCP_MPJ_SYNACK
) {
114 mp_opt
->backup
= *ptr
++ & MPTCPOPT_BACKUP
;
115 mp_opt
->join_id
= *ptr
++;
116 mp_opt
->thmac
= get_unaligned_be64(ptr
);
118 mp_opt
->nonce
= get_unaligned_be32(ptr
);
120 pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u",
121 mp_opt
->backup
, mp_opt
->join_id
,
122 mp_opt
->thmac
, mp_opt
->nonce
);
123 } else if (opsize
== TCPOLEN_MPTCP_MPJ_ACK
) {
125 memcpy(mp_opt
->hmac
, ptr
, MPTCPOPT_HMAC_LEN
);
126 pr_debug("MP_JOIN hmac");
128 pr_warn("MP_JOIN bad option size");
137 /* we must clear 'mpc_map' be able to detect MP_CAPABLE
138 * map vs DSS map in mptcp_incoming_options(), and reconstruct
139 * map info accordingly
142 flags
= (*ptr
++) & MPTCP_DSS_FLAG_MASK
;
143 mp_opt
->data_fin
= (flags
& MPTCP_DSS_DATA_FIN
) != 0;
144 mp_opt
->dsn64
= (flags
& MPTCP_DSS_DSN64
) != 0;
145 mp_opt
->use_map
= (flags
& MPTCP_DSS_HAS_MAP
) != 0;
146 mp_opt
->ack64
= (flags
& MPTCP_DSS_ACK64
) != 0;
147 mp_opt
->use_ack
= (flags
& MPTCP_DSS_HAS_ACK
);
149 pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d",
150 mp_opt
->data_fin
, mp_opt
->dsn64
,
151 mp_opt
->use_map
, mp_opt
->ack64
,
154 expected_opsize
= TCPOLEN_MPTCP_DSS_BASE
;
156 if (mp_opt
->use_ack
) {
158 expected_opsize
+= TCPOLEN_MPTCP_DSS_ACK64
;
160 expected_opsize
+= TCPOLEN_MPTCP_DSS_ACK32
;
163 if (mp_opt
->use_map
) {
165 expected_opsize
+= TCPOLEN_MPTCP_DSS_MAP64
;
167 expected_opsize
+= TCPOLEN_MPTCP_DSS_MAP32
;
170 /* RFC 6824, Section 3.3:
171 * If a checksum is present, but its use had
172 * not been negotiated in the MP_CAPABLE handshake,
173 * the checksum field MUST be ignored.
175 if (opsize
!= expected_opsize
&&
176 opsize
!= expected_opsize
+ TCPOLEN_MPTCP_DSS_CHECKSUM
)
181 if (mp_opt
->use_ack
) {
183 mp_opt
->data_ack
= get_unaligned_be64(ptr
);
186 mp_opt
->data_ack
= get_unaligned_be32(ptr
);
190 pr_debug("data_ack=%llu", mp_opt
->data_ack
);
193 if (mp_opt
->use_map
) {
195 mp_opt
->data_seq
= get_unaligned_be64(ptr
);
198 mp_opt
->data_seq
= get_unaligned_be32(ptr
);
202 mp_opt
->subflow_seq
= get_unaligned_be32(ptr
);
205 mp_opt
->data_len
= get_unaligned_be16(ptr
);
208 pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
209 mp_opt
->data_seq
, mp_opt
->subflow_seq
,
215 case MPTCPOPT_ADD_ADDR
:
216 mp_opt
->echo
= (*ptr
++) & MPTCP_ADDR_ECHO
;
218 if (opsize
== TCPOLEN_MPTCP_ADD_ADDR
||
219 opsize
== TCPOLEN_MPTCP_ADD_ADDR_PORT
)
220 mp_opt
->family
= MPTCP_ADDR_IPVERSION_4
;
221 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
222 else if (opsize
== TCPOLEN_MPTCP_ADD_ADDR6
||
223 opsize
== TCPOLEN_MPTCP_ADD_ADDR6_PORT
)
224 mp_opt
->family
= MPTCP_ADDR_IPVERSION_6
;
229 if (opsize
== TCPOLEN_MPTCP_ADD_ADDR_BASE
||
230 opsize
== TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT
)
231 mp_opt
->family
= MPTCP_ADDR_IPVERSION_4
;
232 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
233 else if (opsize
== TCPOLEN_MPTCP_ADD_ADDR6_BASE
||
234 opsize
== TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT
)
235 mp_opt
->family
= MPTCP_ADDR_IPVERSION_6
;
241 mp_opt
->add_addr
= 1;
243 mp_opt
->addr_id
= *ptr
++;
244 pr_debug("ADD_ADDR: id=%d", mp_opt
->addr_id
);
245 if (mp_opt
->family
== MPTCP_ADDR_IPVERSION_4
) {
246 memcpy((u8
*)&mp_opt
->addr
.s_addr
, (u8
*)ptr
, 4);
248 if (opsize
== TCPOLEN_MPTCP_ADD_ADDR_PORT
||
249 opsize
== TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT
) {
250 mp_opt
->port
= get_unaligned_be16(ptr
);
254 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
256 memcpy(mp_opt
->addr6
.s6_addr
, (u8
*)ptr
, 16);
258 if (opsize
== TCPOLEN_MPTCP_ADD_ADDR6_PORT
||
259 opsize
== TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT
) {
260 mp_opt
->port
= get_unaligned_be16(ptr
);
266 mp_opt
->ahmac
= get_unaligned_be64(ptr
);
271 case MPTCPOPT_RM_ADDR
:
272 if (opsize
!= TCPOLEN_MPTCP_RM_ADDR_BASE
)
276 mp_opt
->rm_id
= *ptr
++;
277 pr_debug("RM_ADDR: id=%d", mp_opt
->rm_id
);
285 void mptcp_get_options(const struct sk_buff
*skb
,
286 struct mptcp_options_received
*mp_opt
)
288 const struct tcphdr
*th
= tcp_hdr(skb
);
289 const unsigned char *ptr
;
292 /* initialize option status */
293 mp_opt
->mp_capable
= 0;
295 mp_opt
->add_addr
= 0;
299 length
= (th
->doff
* 4) - sizeof(struct tcphdr
);
300 ptr
= (const unsigned char *)(th
+ 1);
309 case TCPOPT_NOP
: /* Ref: RFC 793 section 3.1 */
314 if (opsize
< 2) /* "silly options" */
317 return; /* don't parse partial options */
318 if (opcode
== TCPOPT_MPTCP
)
319 mptcp_parse_option(skb
, ptr
, opsize
, mp_opt
);
326 bool mptcp_syn_options(struct sock
*sk
, const struct sk_buff
*skb
,
327 unsigned int *size
, struct mptcp_out_options
*opts
)
329 struct mptcp_subflow_context
*subflow
= mptcp_subflow_ctx(sk
);
331 /* we will use snd_isn to detect first pkt [re]transmission
332 * in mptcp_established_options_mp()
334 subflow
->snd_isn
= TCP_SKB_CB(skb
)->end_seq
;
335 if (subflow
->request_mptcp
) {
336 pr_debug("local_key=%llu", subflow
->local_key
);
337 opts
->suboptions
= OPTION_MPTCP_MPC_SYN
;
338 opts
->sndr_key
= subflow
->local_key
;
339 *size
= TCPOLEN_MPTCP_MPC_SYN
;
341 } else if (subflow
->request_join
) {
342 pr_debug("remote_token=%u, nonce=%u", subflow
->remote_token
,
343 subflow
->local_nonce
);
344 opts
->suboptions
= OPTION_MPTCP_MPJ_SYN
;
345 opts
->join_id
= subflow
->local_id
;
346 opts
->token
= subflow
->remote_token
;
347 opts
->nonce
= subflow
->local_nonce
;
348 opts
->backup
= subflow
->request_bkup
;
349 *size
= TCPOLEN_MPTCP_MPJ_SYN
;
355 /* MP_JOIN client subflow must wait for 4th ack before sending any data:
356 * TCP can't schedule delack timer before the subflow is fully established.
357 * MPTCP uses the delack timer to do 3rd ack retransmissions
359 static void schedule_3rdack_retransmission(struct sock
*sk
)
361 struct inet_connection_sock
*icsk
= inet_csk(sk
);
362 struct tcp_sock
*tp
= tcp_sk(sk
);
363 unsigned long timeout
;
365 /* reschedule with a timeout above RTT, as we must look only for drop */
367 timeout
= tp
->srtt_us
<< 1;
369 timeout
= TCP_TIMEOUT_INIT
;
371 WARN_ON_ONCE(icsk
->icsk_ack
.pending
& ICSK_ACK_TIMER
);
372 icsk
->icsk_ack
.pending
|= ICSK_ACK_SCHED
| ICSK_ACK_TIMER
;
373 icsk
->icsk_ack
.timeout
= timeout
;
374 sk_reset_timer(sk
, &icsk
->icsk_delack_timer
, timeout
);
377 static void clear_3rdack_retransmission(struct sock
*sk
)
379 struct inet_connection_sock
*icsk
= inet_csk(sk
);
381 sk_stop_timer(sk
, &icsk
->icsk_delack_timer
);
382 icsk
->icsk_ack
.timeout
= 0;
383 icsk
->icsk_ack
.ato
= 0;
384 icsk
->icsk_ack
.pending
&= ~(ICSK_ACK_SCHED
| ICSK_ACK_TIMER
);
387 static bool mptcp_established_options_mp(struct sock
*sk
, struct sk_buff
*skb
,
389 unsigned int remaining
,
390 struct mptcp_out_options
*opts
)
392 struct mptcp_subflow_context
*subflow
= mptcp_subflow_ctx(sk
);
393 struct mptcp_ext
*mpext
;
394 unsigned int data_len
;
396 /* When skb is not available, we better over-estimate the emitted
397 * options len. A full DSS option (28 bytes) is longer than
398 * TCPOLEN_MPTCP_MPC_ACK_DATA(22) or TCPOLEN_MPTCP_MPJ_ACK(24), so
399 * tell the caller to defer the estimate to
400 * mptcp_established_options_dss(), which will reserve enough space.
405 /* MPC/MPJ needed only on 3rd ack packet */
406 if (subflow
->fully_established
||
407 subflow
->snd_isn
!= TCP_SKB_CB(skb
)->seq
)
410 if (subflow
->mp_capable
) {
411 mpext
= mptcp_get_ext(skb
);
412 data_len
= mpext
? mpext
->data_len
: 0;
414 /* we will check ext_copy.data_len in mptcp_write_options() to
415 * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
416 * TCPOLEN_MPTCP_MPC_ACK
418 opts
->ext_copy
.data_len
= data_len
;
419 opts
->suboptions
= OPTION_MPTCP_MPC_ACK
;
420 opts
->sndr_key
= subflow
->local_key
;
421 opts
->rcvr_key
= subflow
->remote_key
;
424 * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
425 * packets that start the first subflow of an MPTCP connection,
426 * as well as the first packet that carries data
429 *size
= ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA
, 4);
431 *size
= TCPOLEN_MPTCP_MPC_ACK
;
433 pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
434 subflow
, subflow
->local_key
, subflow
->remote_key
,
438 } else if (subflow
->mp_join
) {
439 opts
->suboptions
= OPTION_MPTCP_MPJ_ACK
;
440 memcpy(opts
->hmac
, subflow
->hmac
, MPTCPOPT_HMAC_LEN
);
441 *size
= TCPOLEN_MPTCP_MPJ_ACK
;
442 pr_debug("subflow=%p", subflow
);
444 schedule_3rdack_retransmission(sk
);
450 static void mptcp_write_data_fin(struct mptcp_subflow_context
*subflow
,
451 struct mptcp_ext
*ext
)
454 /* RFC6824 requires a DSS mapping with specific values
455 * if DATA_FIN is set but no data payload is mapped
460 ext
->data_seq
= subflow
->data_fin_tx_seq
;
461 ext
->subflow_seq
= 0;
463 } else if (ext
->data_seq
+ ext
->data_len
== subflow
->data_fin_tx_seq
) {
464 /* If there's an existing DSS mapping and it is the
465 * final mapping, DATA_FIN consumes 1 additional byte of
473 static bool mptcp_established_options_dss(struct sock
*sk
, struct sk_buff
*skb
,
475 unsigned int remaining
,
476 struct mptcp_out_options
*opts
)
478 struct mptcp_subflow_context
*subflow
= mptcp_subflow_ctx(sk
);
479 unsigned int dss_size
= 0;
480 struct mptcp_ext
*mpext
;
481 struct mptcp_sock
*msk
;
482 unsigned int ack_size
;
487 mpext
= mptcp_get_ext(skb
);
488 tcp_fin
= TCP_SKB_CB(skb
)->tcp_flags
& TCPHDR_FIN
;
494 if (!skb
|| (mpext
&& mpext
->use_map
) || tcp_fin
) {
495 unsigned int map_size
;
497 map_size
= TCPOLEN_MPTCP_DSS_BASE
+ TCPOLEN_MPTCP_DSS_MAP64
;
499 remaining
-= map_size
;
502 opts
->ext_copy
= *mpext
;
504 if (skb
&& tcp_fin
&& subflow
->data_fin_tx_enable
)
505 mptcp_write_data_fin(subflow
, &opts
->ext_copy
);
509 /* passive sockets msk will set the 'can_ack' after accept(), even
510 * if the first subflow may have the already the remote key handy
512 opts
->ext_copy
.use_ack
= 0;
513 msk
= mptcp_sk(subflow
->conn
);
514 if (!READ_ONCE(msk
->can_ack
)) {
515 *size
= ALIGN(dss_size
, 4);
519 ack_size
= TCPOLEN_MPTCP_DSS_ACK64
;
521 /* Add kind/length/subtype/flag overhead if mapping is not populated */
523 ack_size
+= TCPOLEN_MPTCP_DSS_BASE
;
525 dss_size
+= ack_size
;
527 opts
->ext_copy
.data_ack
= msk
->ack_seq
;
528 opts
->ext_copy
.ack64
= 1;
529 opts
->ext_copy
.use_ack
= 1;
531 *size
= ALIGN(dss_size
, 4);
535 static u64
add_addr_generate_hmac(u64 key1
, u64 key2
, u8 addr_id
,
536 struct in_addr
*addr
)
538 u8 hmac
[MPTCP_ADDR_HMAC_LEN
];
542 memcpy(&msg
[1], &addr
->s_addr
, 4);
546 mptcp_crypto_hmac_sha(key1
, key2
, msg
, 7, hmac
);
548 return get_unaligned_be64(hmac
);
551 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
552 static u64
add_addr6_generate_hmac(u64 key1
, u64 key2
, u8 addr_id
,
553 struct in6_addr
*addr
)
555 u8 hmac
[MPTCP_ADDR_HMAC_LEN
];
559 memcpy(&msg
[1], &addr
->s6_addr
, 16);
563 mptcp_crypto_hmac_sha(key1
, key2
, msg
, 19, hmac
);
565 return get_unaligned_be64(hmac
);
569 static bool mptcp_established_options_addr(struct sock
*sk
,
571 unsigned int remaining
,
572 struct mptcp_out_options
*opts
)
574 struct mptcp_subflow_context
*subflow
= mptcp_subflow_ctx(sk
);
575 struct mptcp_sock
*msk
= mptcp_sk(subflow
->conn
);
576 struct mptcp_addr_info saddr
;
579 if (!mptcp_pm_should_signal(msk
) ||
580 !(mptcp_pm_addr_signal(msk
, remaining
, &saddr
)))
583 len
= mptcp_add_addr_len(saddr
.family
);
588 opts
->addr_id
= saddr
.id
;
589 if (saddr
.family
== AF_INET
) {
590 opts
->suboptions
|= OPTION_MPTCP_ADD_ADDR
;
591 opts
->addr
= saddr
.addr
;
592 opts
->ahmac
= add_addr_generate_hmac(msk
->local_key
,
597 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
598 else if (saddr
.family
== AF_INET6
) {
599 opts
->suboptions
|= OPTION_MPTCP_ADD_ADDR6
;
600 opts
->addr6
= saddr
.addr6
;
601 opts
->ahmac
= add_addr6_generate_hmac(msk
->local_key
,
607 pr_debug("addr_id=%d, ahmac=%llu", opts
->addr_id
, opts
->ahmac
);
612 bool mptcp_established_options(struct sock
*sk
, struct sk_buff
*skb
,
613 unsigned int *size
, unsigned int remaining
,
614 struct mptcp_out_options
*opts
)
616 unsigned int opt_size
= 0;
619 opts
->suboptions
= 0;
621 if (mptcp_established_options_mp(sk
, skb
, &opt_size
, remaining
, opts
))
623 else if (mptcp_established_options_dss(sk
, skb
, &opt_size
, remaining
,
627 /* we reserved enough space for the above options, and exceeding the
628 * TCP option space would be fatal
630 if (WARN_ON_ONCE(opt_size
> remaining
))
634 remaining
-= opt_size
;
635 if (mptcp_established_options_addr(sk
, &opt_size
, remaining
, opts
)) {
637 remaining
-= opt_size
;
644 bool mptcp_synack_options(const struct request_sock
*req
, unsigned int *size
,
645 struct mptcp_out_options
*opts
)
647 struct mptcp_subflow_request_sock
*subflow_req
= mptcp_subflow_rsk(req
);
649 if (subflow_req
->mp_capable
) {
650 opts
->suboptions
= OPTION_MPTCP_MPC_SYNACK
;
651 opts
->sndr_key
= subflow_req
->local_key
;
652 *size
= TCPOLEN_MPTCP_MPC_SYNACK
;
653 pr_debug("subflow_req=%p, local_key=%llu",
654 subflow_req
, subflow_req
->local_key
);
656 } else if (subflow_req
->mp_join
) {
657 opts
->suboptions
= OPTION_MPTCP_MPJ_SYNACK
;
658 opts
->backup
= subflow_req
->backup
;
659 opts
->join_id
= subflow_req
->local_id
;
660 opts
->thmac
= subflow_req
->thmac
;
661 opts
->nonce
= subflow_req
->local_nonce
;
662 pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u",
663 subflow_req
, opts
->backup
, opts
->join_id
,
664 opts
->thmac
, opts
->nonce
);
665 *size
= TCPOLEN_MPTCP_MPJ_SYNACK
;
671 static bool check_fully_established(struct mptcp_sock
*msk
, struct sock
*sk
,
672 struct mptcp_subflow_context
*subflow
,
674 struct mptcp_options_received
*mp_opt
)
676 /* here we can process OoO, in-window pkts, only in-sequence 4th ack
677 * will make the subflow fully established
679 if (likely(subflow
->fully_established
)) {
680 /* on passive sockets, check for 3rd ack retransmission
681 * note that msk is always set by subflow_syn_recv_sock()
682 * for mp_join subflows
684 if (TCP_SKB_CB(skb
)->seq
== subflow
->ssn_offset
+ 1 &&
685 TCP_SKB_CB(skb
)->end_seq
== TCP_SKB_CB(skb
)->seq
&&
686 subflow
->mp_join
&& mp_opt
->mp_join
&&
687 READ_ONCE(msk
->pm
.server_side
))
689 goto fully_established
;
692 /* we should process OoO packets before the first subflow is fully
693 * established, but not expected for MP_JOIN subflows
695 if (TCP_SKB_CB(skb
)->seq
!= subflow
->ssn_offset
+ 1)
696 return subflow
->mp_capable
;
698 if (mp_opt
->dss
&& mp_opt
->use_ack
) {
699 /* subflows are fully established as soon as we get any
702 subflow
->fully_established
= 1;
703 goto fully_established
;
706 /* If the first established packet does not contain MP_CAPABLE + data
707 * then fallback to TCP
709 if (!mp_opt
->mp_capable
) {
710 subflow
->mp_capable
= 0;
711 tcp_sk(sk
)->is_mptcp
= 0;
715 if (unlikely(!READ_ONCE(msk
->pm
.server_side
)))
716 pr_warn_once("bogus mpc option on established client sk");
717 subflow
->fully_established
= 1;
718 subflow
->remote_key
= mp_opt
->sndr_key
;
719 subflow
->can_ack
= 1;
722 if (likely(subflow
->pm_notified
))
725 subflow
->pm_notified
= 1;
726 if (subflow
->mp_join
) {
727 clear_3rdack_retransmission(sk
);
728 mptcp_pm_subflow_established(msk
, subflow
);
730 mptcp_pm_fully_established(msk
);
735 static u64
expand_ack(u64 old_ack
, u64 cur_ack
, bool use_64bit
)
737 u32 old_ack32
, cur_ack32
;
742 old_ack32
= (u32
)old_ack
;
743 cur_ack32
= (u32
)cur_ack
;
744 cur_ack
= (old_ack
& GENMASK_ULL(63, 32)) + cur_ack32
;
745 if (unlikely(before(cur_ack32
, old_ack32
)))
746 return cur_ack
+ (1LL << 32);
750 static void update_una(struct mptcp_sock
*msk
,
751 struct mptcp_options_received
*mp_opt
)
753 u64 new_snd_una
, snd_una
, old_snd_una
= atomic64_read(&msk
->snd_una
);
754 u64 write_seq
= READ_ONCE(msk
->write_seq
);
756 /* avoid ack expansion on update conflict, to reduce the risk of
757 * wrongly expanding to a future ack sequence number, which is way
758 * more dangerous than missing an ack
760 new_snd_una
= expand_ack(old_snd_una
, mp_opt
->data_ack
, mp_opt
->ack64
);
762 /* ACK for data not even sent yet? Ignore. */
763 if (after64(new_snd_una
, write_seq
))
764 new_snd_una
= old_snd_una
;
766 while (after64(new_snd_una
, old_snd_una
)) {
767 snd_una
= old_snd_una
;
768 old_snd_una
= atomic64_cmpxchg(&msk
->snd_una
, snd_una
,
770 if (old_snd_una
== snd_una
) {
771 mptcp_data_acked((struct sock
*)msk
);
777 static bool add_addr_hmac_valid(struct mptcp_sock
*msk
,
778 struct mptcp_options_received
*mp_opt
)
785 if (mp_opt
->family
== MPTCP_ADDR_IPVERSION_4
)
786 hmac
= add_addr_generate_hmac(msk
->remote_key
,
788 mp_opt
->addr_id
, &mp_opt
->addr
);
789 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
791 hmac
= add_addr6_generate_hmac(msk
->remote_key
,
793 mp_opt
->addr_id
, &mp_opt
->addr6
);
796 pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
797 msk
, (unsigned long long)hmac
,
798 (unsigned long long)mp_opt
->ahmac
);
800 return hmac
== mp_opt
->ahmac
;
803 void mptcp_incoming_options(struct sock
*sk
, struct sk_buff
*skb
,
804 struct tcp_options_received
*opt_rx
)
806 struct mptcp_subflow_context
*subflow
= mptcp_subflow_ctx(sk
);
807 struct mptcp_sock
*msk
= mptcp_sk(subflow
->conn
);
808 struct mptcp_options_received mp_opt
;
809 struct mptcp_ext
*mpext
;
811 mptcp_get_options(skb
, &mp_opt
);
812 if (!check_fully_established(msk
, sk
, subflow
, skb
, &mp_opt
))
815 if (mp_opt
.add_addr
&& add_addr_hmac_valid(msk
, &mp_opt
)) {
816 struct mptcp_addr_info addr
;
818 addr
.port
= htons(mp_opt
.port
);
819 addr
.id
= mp_opt
.addr_id
;
820 if (mp_opt
.family
== MPTCP_ADDR_IPVERSION_4
) {
821 addr
.family
= AF_INET
;
822 addr
.addr
= mp_opt
.addr
;
824 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
825 else if (mp_opt
.family
== MPTCP_ADDR_IPVERSION_6
) {
826 addr
.family
= AF_INET6
;
827 addr
.addr6
= mp_opt
.addr6
;
831 mptcp_pm_add_addr_received(msk
, &addr
);
838 /* we can't wait for recvmsg() to update the ack_seq, otherwise
839 * monodirectional flows will stuck
842 update_una(msk
, &mp_opt
);
844 mpext
= skb_ext_add(skb
, SKB_EXT_MPTCP
);
848 memset(mpext
, 0, sizeof(*mpext
));
850 if (mp_opt
.use_map
) {
851 if (mp_opt
.mpc_map
) {
852 /* this is an MP_CAPABLE carrying MPTCP data
853 * we know this map the first chunk of data
855 mptcp_crypto_key_sha(subflow
->remote_key
, NULL
,
858 mpext
->subflow_seq
= 1;
863 mpext
->data_seq
= mp_opt
.data_seq
;
864 mpext
->subflow_seq
= mp_opt
.subflow_seq
;
865 mpext
->dsn64
= mp_opt
.dsn64
;
866 mpext
->data_fin
= mp_opt
.data_fin
;
868 mpext
->data_len
= mp_opt
.data_len
;
873 void mptcp_write_options(__be32
*ptr
, struct mptcp_out_options
*opts
)
875 if ((OPTION_MPTCP_MPC_SYN
| OPTION_MPTCP_MPC_SYNACK
|
876 OPTION_MPTCP_MPC_ACK
) & opts
->suboptions
) {
879 if (OPTION_MPTCP_MPC_SYN
& opts
->suboptions
)
880 len
= TCPOLEN_MPTCP_MPC_SYN
;
881 else if (OPTION_MPTCP_MPC_SYNACK
& opts
->suboptions
)
882 len
= TCPOLEN_MPTCP_MPC_SYNACK
;
883 else if (opts
->ext_copy
.data_len
)
884 len
= TCPOLEN_MPTCP_MPC_ACK_DATA
;
886 len
= TCPOLEN_MPTCP_MPC_ACK
;
888 *ptr
++ = mptcp_option(MPTCPOPT_MP_CAPABLE
, len
,
889 MPTCP_SUPPORTED_VERSION
,
890 MPTCP_CAP_HMAC_SHA256
);
892 if (!((OPTION_MPTCP_MPC_SYNACK
| OPTION_MPTCP_MPC_ACK
) &
894 goto mp_capable_done
;
896 put_unaligned_be64(opts
->sndr_key
, ptr
);
898 if (!((OPTION_MPTCP_MPC_ACK
) & opts
->suboptions
))
899 goto mp_capable_done
;
901 put_unaligned_be64(opts
->rcvr_key
, ptr
);
903 if (!opts
->ext_copy
.data_len
)
904 goto mp_capable_done
;
906 put_unaligned_be32(opts
->ext_copy
.data_len
<< 16 |
907 TCPOPT_NOP
<< 8 | TCPOPT_NOP
, ptr
);
912 if (OPTION_MPTCP_ADD_ADDR
& opts
->suboptions
) {
914 *ptr
++ = mptcp_option(MPTCPOPT_ADD_ADDR
,
915 TCPOLEN_MPTCP_ADD_ADDR
, 0,
918 *ptr
++ = mptcp_option(MPTCPOPT_ADD_ADDR
,
919 TCPOLEN_MPTCP_ADD_ADDR_BASE
,
922 memcpy((u8
*)ptr
, (u8
*)&opts
->addr
.s_addr
, 4);
925 put_unaligned_be64(opts
->ahmac
, ptr
);
930 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
931 if (OPTION_MPTCP_ADD_ADDR6
& opts
->suboptions
) {
933 *ptr
++ = mptcp_option(MPTCPOPT_ADD_ADDR
,
934 TCPOLEN_MPTCP_ADD_ADDR6
, 0,
937 *ptr
++ = mptcp_option(MPTCPOPT_ADD_ADDR
,
938 TCPOLEN_MPTCP_ADD_ADDR6_BASE
,
941 memcpy((u8
*)ptr
, opts
->addr6
.s6_addr
, 16);
944 put_unaligned_be64(opts
->ahmac
, ptr
);
950 if (OPTION_MPTCP_RM_ADDR
& opts
->suboptions
) {
951 *ptr
++ = mptcp_option(MPTCPOPT_RM_ADDR
,
952 TCPOLEN_MPTCP_RM_ADDR_BASE
,
956 if (OPTION_MPTCP_MPJ_SYN
& opts
->suboptions
) {
957 *ptr
++ = mptcp_option(MPTCPOPT_MP_JOIN
,
958 TCPOLEN_MPTCP_MPJ_SYN
,
959 opts
->backup
, opts
->join_id
);
960 put_unaligned_be32(opts
->token
, ptr
);
962 put_unaligned_be32(opts
->nonce
, ptr
);
966 if (OPTION_MPTCP_MPJ_SYNACK
& opts
->suboptions
) {
967 *ptr
++ = mptcp_option(MPTCPOPT_MP_JOIN
,
968 TCPOLEN_MPTCP_MPJ_SYNACK
,
969 opts
->backup
, opts
->join_id
);
970 put_unaligned_be64(opts
->thmac
, ptr
);
972 put_unaligned_be32(opts
->nonce
, ptr
);
976 if (OPTION_MPTCP_MPJ_ACK
& opts
->suboptions
) {
977 *ptr
++ = mptcp_option(MPTCPOPT_MP_JOIN
,
978 TCPOLEN_MPTCP_MPJ_ACK
, 0, 0);
979 memcpy(ptr
, opts
->hmac
, MPTCPOPT_HMAC_LEN
);
983 if (opts
->ext_copy
.use_ack
|| opts
->ext_copy
.use_map
) {
984 struct mptcp_ext
*mpext
= &opts
->ext_copy
;
985 u8 len
= TCPOLEN_MPTCP_DSS_BASE
;
988 if (mpext
->use_ack
) {
989 len
+= TCPOLEN_MPTCP_DSS_ACK64
;
990 flags
= MPTCP_DSS_HAS_ACK
| MPTCP_DSS_ACK64
;
993 if (mpext
->use_map
) {
994 len
+= TCPOLEN_MPTCP_DSS_MAP64
;
996 /* Use only 64-bit mapping flags for now, add
997 * support for optional 32-bit mappings later.
999 flags
|= MPTCP_DSS_HAS_MAP
| MPTCP_DSS_DSN64
;
1000 if (mpext
->data_fin
)
1001 flags
|= MPTCP_DSS_DATA_FIN
;
1004 *ptr
++ = mptcp_option(MPTCPOPT_DSS
, len
, 0, flags
);
1006 if (mpext
->use_ack
) {
1007 put_unaligned_be64(mpext
->data_ack
, ptr
);
1011 if (mpext
->use_map
) {
1012 put_unaligned_be64(mpext
->data_seq
, ptr
);
1014 put_unaligned_be32(mpext
->subflow_seq
, ptr
);
1016 put_unaligned_be32(mpext
->data_len
<< 16 |
1017 TCPOPT_NOP
<< 8 | TCPOPT_NOP
, ptr
);