2 * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
4 * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved.
5 * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved.
6 * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved.
7 * Copyright (c) 2018, Covalent IO, Inc. http://covalent.io
9 * This software is available to you under a choice of one of two
10 * licenses. You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
15 * Redistribution and use in source and binary forms, with or
16 * without modification, are permitted provided that the following
19 * - Redistributions of source code must retain the above
20 * copyright notice, this list of conditions and the following
23 * - Redistributions in binary form must reproduce the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer in the documentation and/or other materials
26 * provided with the distribution.
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 #include <linux/sched/signal.h>
39 #include <linux/module.h>
40 #include <crypto/aead.h>
42 #include <net/strparser.h>
45 #define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE
47 static int __skb_nsg(struct sk_buff
*skb
, int offset
, int len
,
48 unsigned int recursion_level
)
50 int start
= skb_headlen(skb
);
51 int i
, chunk
= start
- offset
;
52 struct sk_buff
*frag_iter
;
55 if (unlikely(recursion_level
>= 24))
68 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
71 WARN_ON(start
> offset
+ len
);
73 end
= start
+ skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
87 if (unlikely(skb_has_frag_list(skb
))) {
88 skb_walk_frags(skb
, frag_iter
) {
91 WARN_ON(start
> offset
+ len
);
93 end
= start
+ frag_iter
->len
;
98 ret
= __skb_nsg(frag_iter
, offset
- start
, chunk
,
100 if (unlikely(ret
< 0))
115 /* Return the number of scatterlist elements required to completely map the
116 * skb, or -EMSGSIZE if the recursion depth is exceeded.
118 static int skb_nsg(struct sk_buff
*skb
, int offset
, int len
)
120 return __skb_nsg(skb
, offset
, len
, 0);
123 static int padding_length(struct tls_sw_context_rx
*ctx
,
124 struct tls_context
*tls_ctx
, struct sk_buff
*skb
)
126 struct strp_msg
*rxm
= strp_msg(skb
);
129 /* Determine zero-padding length */
130 if (tls_ctx
->prot_info
.version
== TLS_1_3_VERSION
) {
131 char content_type
= 0;
135 while (content_type
== 0) {
136 if (back
> rxm
->full_len
)
138 err
= skb_copy_bits(skb
,
139 rxm
->offset
+ rxm
->full_len
- back
,
146 ctx
->control
= content_type
;
151 static void tls_decrypt_done(struct crypto_async_request
*req
, int err
)
153 struct aead_request
*aead_req
= (struct aead_request
*)req
;
154 struct scatterlist
*sgout
= aead_req
->dst
;
155 struct scatterlist
*sgin
= aead_req
->src
;
156 struct tls_sw_context_rx
*ctx
;
157 struct tls_context
*tls_ctx
;
158 struct tls_prot_info
*prot
;
159 struct scatterlist
*sg
;
164 skb
= (struct sk_buff
*)req
->data
;
165 tls_ctx
= tls_get_ctx(skb
->sk
);
166 ctx
= tls_sw_ctx_rx(tls_ctx
);
167 prot
= &tls_ctx
->prot_info
;
169 /* Propagate if there was an err */
171 ctx
->async_wait
.err
= err
;
172 tls_err_abort(skb
->sk
, err
);
174 struct strp_msg
*rxm
= strp_msg(skb
);
175 rxm
->full_len
-= padding_length(ctx
, tls_ctx
, skb
);
176 rxm
->offset
+= prot
->prepend_size
;
177 rxm
->full_len
-= prot
->overhead_size
;
180 /* After using skb->sk to propagate sk through crypto async callback
181 * we need to NULL it again.
186 /* Free the destination pages if skb was not decrypted inplace */
188 /* Skip the first S/G entry as it points to AAD */
189 for_each_sg(sg_next(sgout
), sg
, UINT_MAX
, pages
) {
192 put_page(sg_page(sg
));
198 pending
= atomic_dec_return(&ctx
->decrypt_pending
);
200 if (!pending
&& READ_ONCE(ctx
->async_notify
))
201 complete(&ctx
->async_wait
.completion
);
204 static int tls_do_decryption(struct sock
*sk
,
206 struct scatterlist
*sgin
,
207 struct scatterlist
*sgout
,
210 struct aead_request
*aead_req
,
213 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
214 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
215 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
218 aead_request_set_tfm(aead_req
, ctx
->aead_recv
);
219 aead_request_set_ad(aead_req
, prot
->aad_size
);
220 aead_request_set_crypt(aead_req
, sgin
, sgout
,
221 data_len
+ prot
->tag_size
,
225 /* Using skb->sk to push sk through to crypto async callback
226 * handler. This allows propagating errors up to the socket
227 * if needed. It _must_ be cleared in the async handler
228 * before kfree_skb is called. We _know_ skb->sk is NULL
229 * because it is a clone from strparser.
232 aead_request_set_callback(aead_req
,
233 CRYPTO_TFM_REQ_MAY_BACKLOG
,
234 tls_decrypt_done
, skb
);
235 atomic_inc(&ctx
->decrypt_pending
);
237 aead_request_set_callback(aead_req
,
238 CRYPTO_TFM_REQ_MAY_BACKLOG
,
239 crypto_req_done
, &ctx
->async_wait
);
242 ret
= crypto_aead_decrypt(aead_req
);
243 if (ret
== -EINPROGRESS
) {
247 ret
= crypto_wait_req(ret
, &ctx
->async_wait
);
251 atomic_dec(&ctx
->decrypt_pending
);
256 static void tls_trim_both_msgs(struct sock
*sk
, int target_size
)
258 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
259 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
260 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
261 struct tls_rec
*rec
= ctx
->open_rec
;
263 sk_msg_trim(sk
, &rec
->msg_plaintext
, target_size
);
265 target_size
+= prot
->overhead_size
;
266 sk_msg_trim(sk
, &rec
->msg_encrypted
, target_size
);
269 static int tls_alloc_encrypted_msg(struct sock
*sk
, int len
)
271 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
272 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
273 struct tls_rec
*rec
= ctx
->open_rec
;
274 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
276 return sk_msg_alloc(sk
, msg_en
, len
, 0);
279 static int tls_clone_plaintext_msg(struct sock
*sk
, int required
)
281 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
282 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
283 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
284 struct tls_rec
*rec
= ctx
->open_rec
;
285 struct sk_msg
*msg_pl
= &rec
->msg_plaintext
;
286 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
289 /* We add page references worth len bytes from encrypted sg
290 * at the end of plaintext sg. It is guaranteed that msg_en
291 * has enough required room (ensured by caller).
293 len
= required
- msg_pl
->sg
.size
;
295 /* Skip initial bytes in msg_en's data to be able to use
296 * same offset of both plain and encrypted data.
298 skip
= prot
->prepend_size
+ msg_pl
->sg
.size
;
300 return sk_msg_clone(sk
, msg_pl
, msg_en
, skip
, len
);
303 static struct tls_rec
*tls_get_rec(struct sock
*sk
)
305 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
306 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
307 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
308 struct sk_msg
*msg_pl
, *msg_en
;
312 mem_size
= sizeof(struct tls_rec
) + crypto_aead_reqsize(ctx
->aead_send
);
314 rec
= kzalloc(mem_size
, sk
->sk_allocation
);
318 msg_pl
= &rec
->msg_plaintext
;
319 msg_en
= &rec
->msg_encrypted
;
324 sg_init_table(rec
->sg_aead_in
, 2);
325 sg_set_buf(&rec
->sg_aead_in
[0], rec
->aad_space
, prot
->aad_size
);
326 sg_unmark_end(&rec
->sg_aead_in
[1]);
328 sg_init_table(rec
->sg_aead_out
, 2);
329 sg_set_buf(&rec
->sg_aead_out
[0], rec
->aad_space
, prot
->aad_size
);
330 sg_unmark_end(&rec
->sg_aead_out
[1]);
335 static void tls_free_rec(struct sock
*sk
, struct tls_rec
*rec
)
337 sk_msg_free(sk
, &rec
->msg_encrypted
);
338 sk_msg_free(sk
, &rec
->msg_plaintext
);
342 static void tls_free_open_rec(struct sock
*sk
)
344 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
345 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
346 struct tls_rec
*rec
= ctx
->open_rec
;
349 tls_free_rec(sk
, rec
);
350 ctx
->open_rec
= NULL
;
354 int tls_tx_records(struct sock
*sk
, int flags
)
356 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
357 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
358 struct tls_rec
*rec
, *tmp
;
359 struct sk_msg
*msg_en
;
360 int tx_flags
, rc
= 0;
362 if (tls_is_partially_sent_record(tls_ctx
)) {
363 rec
= list_first_entry(&ctx
->tx_list
,
364 struct tls_rec
, list
);
367 tx_flags
= rec
->tx_flags
;
371 rc
= tls_push_partial_record(sk
, tls_ctx
, tx_flags
);
375 /* Full record has been transmitted.
376 * Remove the head of tx_list
378 list_del(&rec
->list
);
379 sk_msg_free(sk
, &rec
->msg_plaintext
);
383 /* Tx all ready records */
384 list_for_each_entry_safe(rec
, tmp
, &ctx
->tx_list
, list
) {
385 if (READ_ONCE(rec
->tx_ready
)) {
387 tx_flags
= rec
->tx_flags
;
391 msg_en
= &rec
->msg_encrypted
;
392 rc
= tls_push_sg(sk
, tls_ctx
,
393 &msg_en
->sg
.data
[msg_en
->sg
.curr
],
398 list_del(&rec
->list
);
399 sk_msg_free(sk
, &rec
->msg_plaintext
);
407 if (rc
< 0 && rc
!= -EAGAIN
)
408 tls_err_abort(sk
, EBADMSG
);
413 static void tls_encrypt_done(struct crypto_async_request
*req
, int err
)
415 struct aead_request
*aead_req
= (struct aead_request
*)req
;
416 struct sock
*sk
= req
->data
;
417 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
418 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
419 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
420 struct scatterlist
*sge
;
421 struct sk_msg
*msg_en
;
426 rec
= container_of(aead_req
, struct tls_rec
, aead_req
);
427 msg_en
= &rec
->msg_encrypted
;
429 sge
= sk_msg_elem(msg_en
, msg_en
->sg
.curr
);
430 sge
->offset
-= prot
->prepend_size
;
431 sge
->length
+= prot
->prepend_size
;
433 /* Check if error is previously set on socket */
434 if (err
|| sk
->sk_err
) {
437 /* If err is already set on socket, return the same code */
439 ctx
->async_wait
.err
= sk
->sk_err
;
441 ctx
->async_wait
.err
= err
;
442 tls_err_abort(sk
, err
);
447 struct tls_rec
*first_rec
;
449 /* Mark the record as ready for transmission */
450 smp_store_mb(rec
->tx_ready
, true);
452 /* If received record is at head of tx_list, schedule tx */
453 first_rec
= list_first_entry(&ctx
->tx_list
,
454 struct tls_rec
, list
);
455 if (rec
== first_rec
)
459 pending
= atomic_dec_return(&ctx
->encrypt_pending
);
461 if (!pending
&& READ_ONCE(ctx
->async_notify
))
462 complete(&ctx
->async_wait
.completion
);
467 /* Schedule the transmission */
468 if (!test_and_set_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
))
469 schedule_delayed_work(&ctx
->tx_work
.work
, 1);
472 static int tls_do_encryption(struct sock
*sk
,
473 struct tls_context
*tls_ctx
,
474 struct tls_sw_context_tx
*ctx
,
475 struct aead_request
*aead_req
,
476 size_t data_len
, u32 start
)
478 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
479 struct tls_rec
*rec
= ctx
->open_rec
;
480 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
481 struct scatterlist
*sge
= sk_msg_elem(msg_en
, start
);
484 memcpy(rec
->iv_data
, tls_ctx
->tx
.iv
, sizeof(rec
->iv_data
));
485 xor_iv_with_seq(prot
->version
, rec
->iv_data
,
486 tls_ctx
->tx
.rec_seq
);
488 sge
->offset
+= prot
->prepend_size
;
489 sge
->length
-= prot
->prepend_size
;
491 msg_en
->sg
.curr
= start
;
493 aead_request_set_tfm(aead_req
, ctx
->aead_send
);
494 aead_request_set_ad(aead_req
, prot
->aad_size
);
495 aead_request_set_crypt(aead_req
, rec
->sg_aead_in
,
497 data_len
, rec
->iv_data
);
499 aead_request_set_callback(aead_req
, CRYPTO_TFM_REQ_MAY_BACKLOG
,
500 tls_encrypt_done
, sk
);
502 /* Add the record in tx_list */
503 list_add_tail((struct list_head
*)&rec
->list
, &ctx
->tx_list
);
504 atomic_inc(&ctx
->encrypt_pending
);
506 rc
= crypto_aead_encrypt(aead_req
);
507 if (!rc
|| rc
!= -EINPROGRESS
) {
508 atomic_dec(&ctx
->encrypt_pending
);
509 sge
->offset
-= prot
->prepend_size
;
510 sge
->length
+= prot
->prepend_size
;
514 WRITE_ONCE(rec
->tx_ready
, true);
515 } else if (rc
!= -EINPROGRESS
) {
516 list_del(&rec
->list
);
520 /* Unhook the record from context if encryption is not failure */
521 ctx
->open_rec
= NULL
;
522 tls_advance_record_sn(sk
, &tls_ctx
->tx
, prot
->version
);
526 static int tls_split_open_record(struct sock
*sk
, struct tls_rec
*from
,
527 struct tls_rec
**to
, struct sk_msg
*msg_opl
,
528 struct sk_msg
*msg_oen
, u32 split_point
,
529 u32 tx_overhead_size
, u32
*orig_end
)
531 u32 i
, j
, bytes
= 0, apply
= msg_opl
->apply_bytes
;
532 struct scatterlist
*sge
, *osge
, *nsge
;
533 u32 orig_size
= msg_opl
->sg
.size
;
534 struct scatterlist tmp
= { };
535 struct sk_msg
*msg_npl
;
539 new = tls_get_rec(sk
);
542 ret
= sk_msg_alloc(sk
, &new->msg_encrypted
, msg_opl
->sg
.size
+
543 tx_overhead_size
, 0);
545 tls_free_rec(sk
, new);
549 *orig_end
= msg_opl
->sg
.end
;
550 i
= msg_opl
->sg
.start
;
551 sge
= sk_msg_elem(msg_opl
, i
);
552 while (apply
&& sge
->length
) {
553 if (sge
->length
> apply
) {
554 u32 len
= sge
->length
- apply
;
556 get_page(sg_page(sge
));
557 sg_set_page(&tmp
, sg_page(sge
), len
,
558 sge
->offset
+ apply
);
563 apply
-= sge
->length
;
564 bytes
+= sge
->length
;
567 sk_msg_iter_var_next(i
);
568 if (i
== msg_opl
->sg
.end
)
570 sge
= sk_msg_elem(msg_opl
, i
);
574 msg_opl
->sg
.curr
= i
;
575 msg_opl
->sg
.copybreak
= 0;
576 msg_opl
->apply_bytes
= 0;
577 msg_opl
->sg
.size
= bytes
;
579 msg_npl
= &new->msg_plaintext
;
580 msg_npl
->apply_bytes
= apply
;
581 msg_npl
->sg
.size
= orig_size
- bytes
;
583 j
= msg_npl
->sg
.start
;
584 nsge
= sk_msg_elem(msg_npl
, j
);
586 memcpy(nsge
, &tmp
, sizeof(*nsge
));
587 sk_msg_iter_var_next(j
);
588 nsge
= sk_msg_elem(msg_npl
, j
);
591 osge
= sk_msg_elem(msg_opl
, i
);
592 while (osge
->length
) {
593 memcpy(nsge
, osge
, sizeof(*nsge
));
595 sk_msg_iter_var_next(i
);
596 sk_msg_iter_var_next(j
);
599 osge
= sk_msg_elem(msg_opl
, i
);
600 nsge
= sk_msg_elem(msg_npl
, j
);
604 msg_npl
->sg
.curr
= j
;
605 msg_npl
->sg
.copybreak
= 0;
611 static void tls_merge_open_record(struct sock
*sk
, struct tls_rec
*to
,
612 struct tls_rec
*from
, u32 orig_end
)
614 struct sk_msg
*msg_npl
= &from
->msg_plaintext
;
615 struct sk_msg
*msg_opl
= &to
->msg_plaintext
;
616 struct scatterlist
*osge
, *nsge
;
620 sk_msg_iter_var_prev(i
);
621 j
= msg_npl
->sg
.start
;
623 osge
= sk_msg_elem(msg_opl
, i
);
624 nsge
= sk_msg_elem(msg_npl
, j
);
626 if (sg_page(osge
) == sg_page(nsge
) &&
627 osge
->offset
+ osge
->length
== nsge
->offset
) {
628 osge
->length
+= nsge
->length
;
629 put_page(sg_page(nsge
));
632 msg_opl
->sg
.end
= orig_end
;
633 msg_opl
->sg
.curr
= orig_end
;
634 msg_opl
->sg
.copybreak
= 0;
635 msg_opl
->apply_bytes
= msg_opl
->sg
.size
+ msg_npl
->sg
.size
;
636 msg_opl
->sg
.size
+= msg_npl
->sg
.size
;
638 sk_msg_free(sk
, &to
->msg_encrypted
);
639 sk_msg_xfer_full(&to
->msg_encrypted
, &from
->msg_encrypted
);
644 static int tls_push_record(struct sock
*sk
, int flags
,
645 unsigned char record_type
)
647 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
648 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
649 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
650 struct tls_rec
*rec
= ctx
->open_rec
, *tmp
= NULL
;
651 u32 i
, split_point
, uninitialized_var(orig_end
);
652 struct sk_msg
*msg_pl
, *msg_en
;
653 struct aead_request
*req
;
660 msg_pl
= &rec
->msg_plaintext
;
661 msg_en
= &rec
->msg_encrypted
;
663 split_point
= msg_pl
->apply_bytes
;
664 split
= split_point
&& split_point
< msg_pl
->sg
.size
;
666 rc
= tls_split_open_record(sk
, rec
, &tmp
, msg_pl
, msg_en
,
667 split_point
, prot
->overhead_size
,
671 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+
672 prot
->overhead_size
);
675 rec
->tx_flags
= flags
;
676 req
= &rec
->aead_req
;
679 sk_msg_iter_var_prev(i
);
681 rec
->content_type
= record_type
;
682 if (prot
->version
== TLS_1_3_VERSION
) {
683 /* Add content type to end of message. No padding added */
684 sg_set_buf(&rec
->sg_content_type
, &rec
->content_type
, 1);
685 sg_mark_end(&rec
->sg_content_type
);
686 sg_chain(msg_pl
->sg
.data
, msg_pl
->sg
.end
+ 1,
687 &rec
->sg_content_type
);
689 sg_mark_end(sk_msg_elem(msg_pl
, i
));
692 i
= msg_pl
->sg
.start
;
693 sg_chain(rec
->sg_aead_in
, 2, rec
->inplace_crypto
?
694 &msg_en
->sg
.data
[i
] : &msg_pl
->sg
.data
[i
]);
697 sk_msg_iter_var_prev(i
);
698 sg_mark_end(sk_msg_elem(msg_en
, i
));
700 i
= msg_en
->sg
.start
;
701 sg_chain(rec
->sg_aead_out
, 2, &msg_en
->sg
.data
[i
]);
703 tls_make_aad(rec
->aad_space
, msg_pl
->sg
.size
+ prot
->tail_size
,
704 tls_ctx
->tx
.rec_seq
, prot
->rec_seq_size
,
705 record_type
, prot
->version
);
707 tls_fill_prepend(tls_ctx
,
708 page_address(sg_page(&msg_en
->sg
.data
[i
])) +
709 msg_en
->sg
.data
[i
].offset
,
710 msg_pl
->sg
.size
+ prot
->tail_size
,
711 record_type
, prot
->version
);
713 tls_ctx
->pending_open_record_frags
= false;
715 rc
= tls_do_encryption(sk
, tls_ctx
, ctx
, req
,
716 msg_pl
->sg
.size
+ prot
->tail_size
, i
);
718 if (rc
!= -EINPROGRESS
) {
719 tls_err_abort(sk
, EBADMSG
);
721 tls_ctx
->pending_open_record_frags
= true;
722 tls_merge_open_record(sk
, rec
, tmp
, orig_end
);
725 ctx
->async_capable
= 1;
728 msg_pl
= &tmp
->msg_plaintext
;
729 msg_en
= &tmp
->msg_encrypted
;
730 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+ prot
->overhead_size
);
731 tls_ctx
->pending_open_record_frags
= true;
735 return tls_tx_records(sk
, flags
);
738 static int bpf_exec_tx_verdict(struct sk_msg
*msg
, struct sock
*sk
,
739 bool full_record
, u8 record_type
,
740 size_t *copied
, int flags
)
742 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
743 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
744 struct sk_msg msg_redir
= { };
745 struct sk_psock
*psock
;
746 struct sock
*sk_redir
;
752 policy
= !(flags
& MSG_SENDPAGE_NOPOLICY
);
753 psock
= sk_psock_get(sk
);
754 if (!psock
|| !policy
)
755 return tls_push_record(sk
, flags
, record_type
);
757 enospc
= sk_msg_full(msg
);
758 if (psock
->eval
== __SK_NONE
) {
759 delta
= msg
->sg
.size
;
760 psock
->eval
= sk_psock_msg_verdict(sk
, psock
, msg
);
761 if (delta
< msg
->sg
.size
)
762 delta
-= msg
->sg
.size
;
766 if (msg
->cork_bytes
&& msg
->cork_bytes
> msg
->sg
.size
&&
767 !enospc
&& !full_record
) {
773 if (msg
->apply_bytes
&& msg
->apply_bytes
< send
)
774 send
= msg
->apply_bytes
;
776 switch (psock
->eval
) {
778 err
= tls_push_record(sk
, flags
, record_type
);
780 *copied
-= sk_msg_free(sk
, msg
);
781 tls_free_open_rec(sk
);
786 sk_redir
= psock
->sk_redir
;
787 memcpy(&msg_redir
, msg
, sizeof(*msg
));
788 if (msg
->apply_bytes
< send
)
789 msg
->apply_bytes
= 0;
791 msg
->apply_bytes
-= send
;
792 sk_msg_return_zero(sk
, msg
, send
);
793 msg
->sg
.size
-= send
;
795 err
= tcp_bpf_sendmsg_redir(sk_redir
, &msg_redir
, send
, flags
);
798 *copied
-= sk_msg_free_nocharge(sk
, &msg_redir
);
801 if (msg
->sg
.size
== 0)
802 tls_free_open_rec(sk
);
806 sk_msg_free_partial(sk
, msg
, send
);
807 if (msg
->apply_bytes
< send
)
808 msg
->apply_bytes
= 0;
810 msg
->apply_bytes
-= send
;
811 if (msg
->sg
.size
== 0)
812 tls_free_open_rec(sk
);
813 *copied
-= (send
+ delta
);
818 bool reset_eval
= !ctx
->open_rec
;
822 msg
= &rec
->msg_plaintext
;
823 if (!msg
->apply_bytes
)
827 psock
->eval
= __SK_NONE
;
828 if (psock
->sk_redir
) {
829 sock_put(psock
->sk_redir
);
830 psock
->sk_redir
= NULL
;
837 sk_psock_put(sk
, psock
);
841 static int tls_sw_push_pending_record(struct sock
*sk
, int flags
)
843 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
844 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
845 struct tls_rec
*rec
= ctx
->open_rec
;
846 struct sk_msg
*msg_pl
;
852 msg_pl
= &rec
->msg_plaintext
;
853 copied
= msg_pl
->sg
.size
;
857 return bpf_exec_tx_verdict(msg_pl
, sk
, true, TLS_RECORD_TYPE_DATA
,
861 int tls_sw_sendmsg(struct sock
*sk
, struct msghdr
*msg
, size_t size
)
863 long timeo
= sock_sndtimeo(sk
, msg
->msg_flags
& MSG_DONTWAIT
);
864 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
865 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
866 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
867 bool async_capable
= ctx
->async_capable
;
868 unsigned char record_type
= TLS_RECORD_TYPE_DATA
;
869 bool is_kvec
= iov_iter_is_kvec(&msg
->msg_iter
);
870 bool eor
= !(msg
->msg_flags
& MSG_MORE
);
871 size_t try_to_copy
, copied
= 0;
872 struct sk_msg
*msg_pl
, *msg_en
;
882 if (msg
->msg_flags
& ~(MSG_MORE
| MSG_DONTWAIT
| MSG_NOSIGNAL
))
887 /* Wait till there is any pending write on socket */
888 if (unlikely(sk
->sk_write_pending
)) {
889 ret
= wait_on_pending_writer(sk
, &timeo
);
894 if (unlikely(msg
->msg_controllen
)) {
895 ret
= tls_proccess_cmsg(sk
, msg
, &record_type
);
897 if (ret
== -EINPROGRESS
)
899 else if (ret
!= -EAGAIN
)
904 while (msg_data_left(msg
)) {
913 rec
= ctx
->open_rec
= tls_get_rec(sk
);
919 msg_pl
= &rec
->msg_plaintext
;
920 msg_en
= &rec
->msg_encrypted
;
922 orig_size
= msg_pl
->sg
.size
;
924 try_to_copy
= msg_data_left(msg
);
925 record_room
= TLS_MAX_PAYLOAD_SIZE
- msg_pl
->sg
.size
;
926 if (try_to_copy
>= record_room
) {
927 try_to_copy
= record_room
;
931 required_size
= msg_pl
->sg
.size
+ try_to_copy
+
934 if (!sk_stream_memory_free(sk
))
935 goto wait_for_sndbuf
;
938 ret
= tls_alloc_encrypted_msg(sk
, required_size
);
941 goto wait_for_memory
;
943 /* Adjust try_to_copy according to the amount that was
944 * actually allocated. The difference is due
945 * to max sg elements limit
947 try_to_copy
-= required_size
- msg_en
->sg
.size
;
951 if (!is_kvec
&& (full_record
|| eor
) && !async_capable
) {
952 u32 first
= msg_pl
->sg
.end
;
954 ret
= sk_msg_zerocopy_from_iter(sk
, &msg
->msg_iter
,
955 msg_pl
, try_to_copy
);
957 goto fallback_to_reg_send
;
959 rec
->inplace_crypto
= 0;
962 copied
+= try_to_copy
;
964 sk_msg_sg_copy_set(msg_pl
, first
);
965 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
966 record_type
, &copied
,
969 if (ret
== -EINPROGRESS
)
971 else if (ret
== -ENOMEM
)
972 goto wait_for_memory
;
973 else if (ret
== -ENOSPC
)
975 else if (ret
!= -EAGAIN
)
980 copied
-= try_to_copy
;
981 sk_msg_sg_copy_clear(msg_pl
, first
);
982 iov_iter_revert(&msg
->msg_iter
,
983 msg_pl
->sg
.size
- orig_size
);
984 fallback_to_reg_send
:
985 sk_msg_trim(sk
, msg_pl
, orig_size
);
988 required_size
= msg_pl
->sg
.size
+ try_to_copy
;
990 ret
= tls_clone_plaintext_msg(sk
, required_size
);
995 /* Adjust try_to_copy according to the amount that was
996 * actually allocated. The difference is due
997 * to max sg elements limit
999 try_to_copy
-= required_size
- msg_pl
->sg
.size
;
1001 sk_msg_trim(sk
, msg_en
,
1002 msg_pl
->sg
.size
+ prot
->overhead_size
);
1006 ret
= sk_msg_memcopy_from_iter(sk
, &msg
->msg_iter
,
1007 msg_pl
, try_to_copy
);
1012 /* Open records defined only if successfully copied, otherwise
1013 * we would trim the sg but not reset the open record frags.
1015 tls_ctx
->pending_open_record_frags
= true;
1016 copied
+= try_to_copy
;
1017 if (full_record
|| eor
) {
1018 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
1019 record_type
, &copied
,
1022 if (ret
== -EINPROGRESS
)
1024 else if (ret
== -ENOMEM
)
1025 goto wait_for_memory
;
1026 else if (ret
!= -EAGAIN
) {
1037 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
1039 ret
= sk_stream_wait_memory(sk
, &timeo
);
1042 tls_trim_both_msgs(sk
, orig_size
);
1046 if (msg_en
->sg
.size
< required_size
)
1047 goto alloc_encrypted
;
1052 } else if (num_zc
) {
1053 /* Wait for pending encryptions to get completed */
1054 smp_store_mb(ctx
->async_notify
, true);
1056 if (atomic_read(&ctx
->encrypt_pending
))
1057 crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
1059 reinit_completion(&ctx
->async_wait
.completion
);
1061 WRITE_ONCE(ctx
->async_notify
, false);
1063 if (ctx
->async_wait
.err
) {
1064 ret
= ctx
->async_wait
.err
;
1069 /* Transmit if any encryptions have completed */
1070 if (test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
)) {
1071 cancel_delayed_work(&ctx
->tx_work
.work
);
1072 tls_tx_records(sk
, msg
->msg_flags
);
1076 ret
= sk_stream_error(sk
, msg
->msg_flags
, ret
);
1079 return copied
? copied
: ret
;
1082 static int tls_sw_do_sendpage(struct sock
*sk
, struct page
*page
,
1083 int offset
, size_t size
, int flags
)
1085 long timeo
= sock_sndtimeo(sk
, flags
& MSG_DONTWAIT
);
1086 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1087 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
1088 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
1089 unsigned char record_type
= TLS_RECORD_TYPE_DATA
;
1090 struct sk_msg
*msg_pl
;
1091 struct tls_rec
*rec
;
1099 eor
= !(flags
& (MSG_MORE
| MSG_SENDPAGE_NOTLAST
));
1100 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE
, sk
);
1102 /* Wait till there is any pending write on socket */
1103 if (unlikely(sk
->sk_write_pending
)) {
1104 ret
= wait_on_pending_writer(sk
, &timeo
);
1109 /* Call the sk_stream functions to manage the sndbuf mem. */
1111 size_t copy
, required_size
;
1119 rec
= ctx
->open_rec
;
1121 rec
= ctx
->open_rec
= tls_get_rec(sk
);
1127 msg_pl
= &rec
->msg_plaintext
;
1129 full_record
= false;
1130 record_room
= TLS_MAX_PAYLOAD_SIZE
- msg_pl
->sg
.size
;
1132 if (copy
>= record_room
) {
1137 required_size
= msg_pl
->sg
.size
+ copy
+ prot
->overhead_size
;
1139 if (!sk_stream_memory_free(sk
))
1140 goto wait_for_sndbuf
;
1142 ret
= tls_alloc_encrypted_msg(sk
, required_size
);
1145 goto wait_for_memory
;
1147 /* Adjust copy according to the amount that was
1148 * actually allocated. The difference is due
1149 * to max sg elements limit
1151 copy
-= required_size
- msg_pl
->sg
.size
;
1155 sk_msg_page_add(msg_pl
, page
, copy
, offset
);
1156 sk_mem_charge(sk
, copy
);
1162 tls_ctx
->pending_open_record_frags
= true;
1163 if (full_record
|| eor
|| sk_msg_full(msg_pl
)) {
1164 rec
->inplace_crypto
= 0;
1165 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
1166 record_type
, &copied
, flags
);
1168 if (ret
== -EINPROGRESS
)
1170 else if (ret
== -ENOMEM
)
1171 goto wait_for_memory
;
1172 else if (ret
!= -EAGAIN
) {
1181 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
1183 ret
= sk_stream_wait_memory(sk
, &timeo
);
1185 tls_trim_both_msgs(sk
, msg_pl
->sg
.size
);
1193 /* Transmit if any encryptions have completed */
1194 if (test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
)) {
1195 cancel_delayed_work(&ctx
->tx_work
.work
);
1196 tls_tx_records(sk
, flags
);
1200 ret
= sk_stream_error(sk
, flags
, ret
);
1201 return copied
? copied
: ret
;
1204 int tls_sw_sendpage(struct sock
*sk
, struct page
*page
,
1205 int offset
, size_t size
, int flags
)
1209 if (flags
& ~(MSG_MORE
| MSG_DONTWAIT
| MSG_NOSIGNAL
|
1210 MSG_SENDPAGE_NOTLAST
| MSG_SENDPAGE_NOPOLICY
))
1214 ret
= tls_sw_do_sendpage(sk
, page
, offset
, size
, flags
);
1219 static struct sk_buff
*tls_wait_data(struct sock
*sk
, struct sk_psock
*psock
,
1220 int flags
, long timeo
, int *err
)
1222 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1223 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1224 struct sk_buff
*skb
;
1225 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
1227 while (!(skb
= ctx
->recv_pkt
) && sk_psock_queue_empty(psock
)) {
1229 *err
= sock_error(sk
);
1233 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
1236 if (sock_flag(sk
, SOCK_DONE
))
1239 if ((flags
& MSG_DONTWAIT
) || !timeo
) {
1244 add_wait_queue(sk_sleep(sk
), &wait
);
1245 sk_set_bit(SOCKWQ_ASYNC_WAITDATA
, sk
);
1246 sk_wait_event(sk
, &timeo
,
1247 ctx
->recv_pkt
!= skb
||
1248 !sk_psock_queue_empty(psock
),
1250 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA
, sk
);
1251 remove_wait_queue(sk_sleep(sk
), &wait
);
1253 /* Handle signals */
1254 if (signal_pending(current
)) {
1255 *err
= sock_intr_errno(timeo
);
1263 static int tls_setup_from_iter(struct sock
*sk
, struct iov_iter
*from
,
1264 int length
, int *pages_used
,
1265 unsigned int *size_used
,
1266 struct scatterlist
*to
,
1269 int rc
= 0, i
= 0, num_elem
= *pages_used
, maxpages
;
1270 struct page
*pages
[MAX_SKB_FRAGS
];
1271 unsigned int size
= *size_used
;
1272 ssize_t copied
, use
;
1275 while (length
> 0) {
1277 maxpages
= to_max_pages
- num_elem
;
1278 if (maxpages
== 0) {
1282 copied
= iov_iter_get_pages(from
, pages
,
1290 iov_iter_advance(from
, copied
);
1295 use
= min_t(int, copied
, PAGE_SIZE
- offset
);
1297 sg_set_page(&to
[num_elem
],
1298 pages
[i
], use
, offset
);
1299 sg_unmark_end(&to
[num_elem
]);
1300 /* We do not uncharge memory from this API */
1309 /* Mark the end in the last sg entry if newly added */
1310 if (num_elem
> *pages_used
)
1311 sg_mark_end(&to
[num_elem
- 1]);
1314 iov_iter_revert(from
, size
- *size_used
);
1316 *pages_used
= num_elem
;
1321 /* This function decrypts the input skb into either out_iov or in out_sg
1322 * or in skb buffers itself. The input parameter 'zc' indicates if
1323 * zero-copy mode needs to be tried or not. With zero-copy mode, either
1324 * out_iov or out_sg must be non-NULL. In case both out_iov and out_sg are
1325 * NULL, then the decryption happens inside skb buffers itself, i.e.
1326 * zero-copy gets disabled and 'zc' is updated.
1329 static int decrypt_internal(struct sock
*sk
, struct sk_buff
*skb
,
1330 struct iov_iter
*out_iov
,
1331 struct scatterlist
*out_sg
,
1332 int *chunk
, bool *zc
, bool async
)
1334 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1335 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1336 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
1337 struct strp_msg
*rxm
= strp_msg(skb
);
1338 int n_sgin
, n_sgout
, nsg
, mem_size
, aead_size
, err
, pages
= 0;
1339 struct aead_request
*aead_req
;
1340 struct sk_buff
*unused
;
1341 u8
*aad
, *iv
, *mem
= NULL
;
1342 struct scatterlist
*sgin
= NULL
;
1343 struct scatterlist
*sgout
= NULL
;
1344 const int data_len
= rxm
->full_len
- prot
->overhead_size
+
1347 if (*zc
&& (out_iov
|| out_sg
)) {
1349 n_sgout
= iov_iter_npages(out_iov
, INT_MAX
) + 1;
1351 n_sgout
= sg_nents(out_sg
);
1352 n_sgin
= skb_nsg(skb
, rxm
->offset
+ prot
->prepend_size
,
1353 rxm
->full_len
- prot
->prepend_size
);
1357 n_sgin
= skb_cow_data(skb
, 0, &unused
);
1363 /* Increment to accommodate AAD */
1364 n_sgin
= n_sgin
+ 1;
1366 nsg
= n_sgin
+ n_sgout
;
1368 aead_size
= sizeof(*aead_req
) + crypto_aead_reqsize(ctx
->aead_recv
);
1369 mem_size
= aead_size
+ (nsg
* sizeof(struct scatterlist
));
1370 mem_size
= mem_size
+ prot
->aad_size
;
1371 mem_size
= mem_size
+ crypto_aead_ivsize(ctx
->aead_recv
);
1373 /* Allocate a single block of memory which contains
1374 * aead_req || sgin[] || sgout[] || aad || iv.
1375 * This order achieves correct alignment for aead_req, sgin, sgout.
1377 mem
= kmalloc(mem_size
, sk
->sk_allocation
);
1381 /* Segment the allocated memory */
1382 aead_req
= (struct aead_request
*)mem
;
1383 sgin
= (struct scatterlist
*)(mem
+ aead_size
);
1384 sgout
= sgin
+ n_sgin
;
1385 aad
= (u8
*)(sgout
+ n_sgout
);
1386 iv
= aad
+ prot
->aad_size
;
1389 err
= skb_copy_bits(skb
, rxm
->offset
+ TLS_HEADER_SIZE
,
1390 iv
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
,
1396 if (prot
->version
== TLS_1_3_VERSION
)
1397 memcpy(iv
, tls_ctx
->rx
.iv
, crypto_aead_ivsize(ctx
->aead_recv
));
1399 memcpy(iv
, tls_ctx
->rx
.iv
, TLS_CIPHER_AES_GCM_128_SALT_SIZE
);
1401 xor_iv_with_seq(prot
->version
, iv
, tls_ctx
->rx
.rec_seq
);
1404 tls_make_aad(aad
, rxm
->full_len
- prot
->overhead_size
+
1406 tls_ctx
->rx
.rec_seq
, prot
->rec_seq_size
,
1407 ctx
->control
, prot
->version
);
1410 sg_init_table(sgin
, n_sgin
);
1411 sg_set_buf(&sgin
[0], aad
, prot
->aad_size
);
1412 err
= skb_to_sgvec(skb
, &sgin
[1],
1413 rxm
->offset
+ prot
->prepend_size
,
1414 rxm
->full_len
- prot
->prepend_size
);
1422 sg_init_table(sgout
, n_sgout
);
1423 sg_set_buf(&sgout
[0], aad
, prot
->aad_size
);
1426 err
= tls_setup_from_iter(sk
, out_iov
, data_len
,
1427 &pages
, chunk
, &sgout
[1],
1430 goto fallback_to_reg_recv
;
1431 } else if (out_sg
) {
1432 memcpy(sgout
, out_sg
, n_sgout
* sizeof(*sgout
));
1434 goto fallback_to_reg_recv
;
1437 fallback_to_reg_recv
:
1444 /* Prepare and submit AEAD request */
1445 err
= tls_do_decryption(sk
, skb
, sgin
, sgout
, iv
,
1446 data_len
, aead_req
, async
);
1447 if (err
== -EINPROGRESS
)
1450 /* Release the pages in case iov was mapped to pages */
1451 for (; pages
> 0; pages
--)
1452 put_page(sg_page(&sgout
[pages
]));
1458 static int decrypt_skb_update(struct sock
*sk
, struct sk_buff
*skb
,
1459 struct iov_iter
*dest
, int *chunk
, bool *zc
,
1462 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1463 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1464 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
1465 int version
= prot
->version
;
1466 struct strp_msg
*rxm
= strp_msg(skb
);
1469 if (!ctx
->decrypted
) {
1470 #ifdef CONFIG_TLS_DEVICE
1471 err
= tls_device_decrypted(sk
, skb
);
1475 /* Still not decrypted after tls_device */
1476 if (!ctx
->decrypted
) {
1477 err
= decrypt_internal(sk
, skb
, dest
, NULL
, chunk
, zc
,
1480 if (err
== -EINPROGRESS
)
1481 tls_advance_record_sn(sk
, &tls_ctx
->rx
,
1490 rxm
->full_len
-= padding_length(ctx
, tls_ctx
, skb
);
1491 rxm
->offset
+= prot
->prepend_size
;
1492 rxm
->full_len
-= prot
->overhead_size
;
1493 tls_advance_record_sn(sk
, &tls_ctx
->rx
, version
);
1494 ctx
->decrypted
= true;
1495 ctx
->saved_data_ready(sk
);
1503 int decrypt_skb(struct sock
*sk
, struct sk_buff
*skb
,
1504 struct scatterlist
*sgout
)
1509 return decrypt_internal(sk
, skb
, NULL
, sgout
, &chunk
, &zc
, false);
1512 static bool tls_sw_advance_skb(struct sock
*sk
, struct sk_buff
*skb
,
1515 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1516 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1519 struct strp_msg
*rxm
= strp_msg(skb
);
1521 if (len
< rxm
->full_len
) {
1523 rxm
->full_len
-= len
;
1529 /* Finished with message */
1530 ctx
->recv_pkt
= NULL
;
1531 __strp_unpause(&ctx
->strp
);
1536 /* This function traverses the rx_list in tls receive context to copies the
1537 * decrypted records into the buffer provided by caller zero copy is not
1538 * true. Further, the records are removed from the rx_list if it is not a peek
1539 * case and the record has been consumed completely.
1541 static int process_rx_list(struct tls_sw_context_rx
*ctx
,
1550 struct sk_buff
*skb
= skb_peek(&ctx
->rx_list
);
1553 struct tls_msg
*tlm
;
1556 /* Set the record type in 'control' if caller didn't pass it */
1559 ctrl
= tlm
->control
;
1562 while (skip
&& skb
) {
1563 struct strp_msg
*rxm
= strp_msg(skb
);
1566 /* Cannot process a record of different type */
1567 if (ctrl
!= tlm
->control
)
1570 if (skip
< rxm
->full_len
)
1573 skip
= skip
- rxm
->full_len
;
1574 skb
= skb_peek_next(skb
, &ctx
->rx_list
);
1577 while (len
&& skb
) {
1578 struct sk_buff
*next_skb
;
1579 struct strp_msg
*rxm
= strp_msg(skb
);
1580 int chunk
= min_t(unsigned int, rxm
->full_len
- skip
, len
);
1584 /* Cannot process a record of different type */
1585 if (ctrl
!= tlm
->control
)
1588 /* Set record type if not already done. For a non-data record,
1589 * do not proceed if record type could not be copied.
1592 int cerr
= put_cmsg(msg
, SOL_TLS
, TLS_GET_RECORD_TYPE
,
1593 sizeof(ctrl
), &ctrl
);
1595 if (ctrl
!= TLS_RECORD_TYPE_DATA
) {
1596 if (cerr
|| msg
->msg_flags
& MSG_CTRUNC
)
1603 if (!zc
|| (rxm
->full_len
- skip
) > len
) {
1604 int err
= skb_copy_datagram_msg(skb
, rxm
->offset
+ skip
,
1611 copied
= copied
+ chunk
;
1613 /* Consume the data from record if it is non-peek case*/
1615 rxm
->offset
= rxm
->offset
+ chunk
;
1616 rxm
->full_len
= rxm
->full_len
- chunk
;
1618 /* Return if there is unconsumed data in the record */
1619 if (rxm
->full_len
- skip
)
1623 /* The remaining skip-bytes must lie in 1st record in rx_list.
1624 * So from the 2nd record, 'skip' should be 0.
1629 msg
->msg_flags
|= MSG_EOR
;
1631 next_skb
= skb_peek_next(skb
, &ctx
->rx_list
);
1634 skb_unlink(skb
, &ctx
->rx_list
);
1645 int tls_sw_recvmsg(struct sock
*sk
,
1652 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1653 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1654 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
1655 struct sk_psock
*psock
;
1656 unsigned char control
= 0;
1657 ssize_t decrypted
= 0;
1658 struct strp_msg
*rxm
;
1659 struct tls_msg
*tlm
;
1660 struct sk_buff
*skb
;
1663 int target
, err
= 0;
1665 bool is_kvec
= iov_iter_is_kvec(&msg
->msg_iter
);
1666 bool is_peek
= flags
& MSG_PEEK
;
1671 if (unlikely(flags
& MSG_ERRQUEUE
))
1672 return sock_recv_errqueue(sk
, msg
, len
, SOL_IP
, IP_RECVERR
);
1674 psock
= sk_psock_get(sk
);
1677 /* Process pending decrypted records. It must be non-zero-copy */
1678 err
= process_rx_list(ctx
, msg
, &control
, &cmsg
, 0, len
, false,
1681 tls_err_abort(sk
, err
);
1690 target
= sock_rcvlowat(sk
, flags
& MSG_WAITALL
, len
);
1692 timeo
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
1694 while (len
&& (decrypted
+ copied
< target
|| ctx
->recv_pkt
)) {
1695 bool retain_skb
= false;
1702 skb
= tls_wait_data(sk
, psock
, flags
, timeo
, &err
);
1705 int ret
= __tcp_bpf_recvmsg(sk
, psock
,
1717 if (prot
->version
== TLS_1_3_VERSION
)
1720 tlm
->control
= ctx
->control
;
1723 rxm
= strp_msg(skb
);
1725 to_decrypt
= rxm
->full_len
- prot
->overhead_size
;
1727 if (to_decrypt
<= len
&& !is_kvec
&& !is_peek
&&
1728 ctx
->control
== TLS_RECORD_TYPE_DATA
&&
1729 prot
->version
!= TLS_1_3_VERSION
)
1732 /* Do not use async mode if record is non-data */
1733 if (ctx
->control
== TLS_RECORD_TYPE_DATA
)
1734 async_capable
= ctx
->async_capable
;
1736 async_capable
= false;
1738 err
= decrypt_skb_update(sk
, skb
, &msg
->msg_iter
,
1739 &chunk
, &zc
, async_capable
);
1740 if (err
< 0 && err
!= -EINPROGRESS
) {
1741 tls_err_abort(sk
, EBADMSG
);
1745 if (err
== -EINPROGRESS
) {
1748 } else if (prot
->version
== TLS_1_3_VERSION
) {
1749 tlm
->control
= ctx
->control
;
1752 /* If the type of records being processed is not known yet,
1753 * set it to record type just dequeued. If it is already known,
1754 * but does not match the record type just dequeued, go to end.
1755 * We always get record type here since for tls1.2, record type
1756 * is known just after record is dequeued from stream parser.
1757 * For tls1.3, we disable async.
1761 control
= tlm
->control
;
1762 else if (control
!= tlm
->control
)
1768 cerr
= put_cmsg(msg
, SOL_TLS
, TLS_GET_RECORD_TYPE
,
1769 sizeof(control
), &control
);
1771 if (control
!= TLS_RECORD_TYPE_DATA
) {
1772 if (cerr
|| msg
->msg_flags
& MSG_CTRUNC
) {
1780 goto pick_next_record
;
1783 if (rxm
->full_len
> len
) {
1787 chunk
= rxm
->full_len
;
1790 err
= skb_copy_datagram_msg(skb
, rxm
->offset
,
1796 rxm
->offset
= rxm
->offset
+ chunk
;
1797 rxm
->full_len
= rxm
->full_len
- chunk
;
1808 /* For async or peek case, queue the current skb */
1809 if (async
|| is_peek
|| retain_skb
) {
1810 skb_queue_tail(&ctx
->rx_list
, skb
);
1814 if (tls_sw_advance_skb(sk
, skb
, chunk
)) {
1815 /* Return full control message to
1816 * userspace before trying to parse
1817 * another message type
1819 msg
->msg_flags
|= MSG_EOR
;
1820 if (ctx
->control
!= TLS_RECORD_TYPE_DATA
)
1829 /* Wait for all previously submitted records to be decrypted */
1830 smp_store_mb(ctx
->async_notify
, true);
1831 if (atomic_read(&ctx
->decrypt_pending
)) {
1832 err
= crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
1834 /* one of async decrypt failed */
1835 tls_err_abort(sk
, err
);
1841 reinit_completion(&ctx
->async_wait
.completion
);
1843 WRITE_ONCE(ctx
->async_notify
, false);
1845 /* Drain records from the rx_list & copy if required */
1846 if (is_peek
|| is_kvec
)
1847 err
= process_rx_list(ctx
, msg
, &control
, &cmsg
, copied
,
1848 decrypted
, false, is_peek
);
1850 err
= process_rx_list(ctx
, msg
, &control
, &cmsg
, 0,
1851 decrypted
, true, is_peek
);
1853 tls_err_abort(sk
, err
);
1859 copied
+= decrypted
;
1864 sk_psock_put(sk
, psock
);
1865 return copied
? : err
;
1868 ssize_t
tls_sw_splice_read(struct socket
*sock
, loff_t
*ppos
,
1869 struct pipe_inode_info
*pipe
,
1870 size_t len
, unsigned int flags
)
1872 struct tls_context
*tls_ctx
= tls_get_ctx(sock
->sk
);
1873 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1874 struct strp_msg
*rxm
= NULL
;
1875 struct sock
*sk
= sock
->sk
;
1876 struct sk_buff
*skb
;
1885 timeo
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
1887 skb
= tls_wait_data(sk
, NULL
, flags
, timeo
, &err
);
1889 goto splice_read_end
;
1891 if (!ctx
->decrypted
) {
1892 err
= decrypt_skb_update(sk
, skb
, NULL
, &chunk
, &zc
, false);
1894 /* splice does not support reading control messages */
1895 if (ctx
->control
!= TLS_RECORD_TYPE_DATA
) {
1897 goto splice_read_end
;
1901 tls_err_abort(sk
, EBADMSG
);
1902 goto splice_read_end
;
1904 ctx
->decrypted
= true;
1906 rxm
= strp_msg(skb
);
1908 chunk
= min_t(unsigned int, rxm
->full_len
, len
);
1909 copied
= skb_splice_bits(skb
, sk
, rxm
->offset
, pipe
, chunk
, flags
);
1911 goto splice_read_end
;
1913 if (likely(!(flags
& MSG_PEEK
)))
1914 tls_sw_advance_skb(sk
, skb
, copied
);
1918 return copied
? : err
;
1921 bool tls_sw_stream_read(const struct sock
*sk
)
1923 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1924 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1925 bool ingress_empty
= true;
1926 struct sk_psock
*psock
;
1929 psock
= sk_psock(sk
);
1931 ingress_empty
= list_empty(&psock
->ingress_msg
);
1934 return !ingress_empty
|| ctx
->recv_pkt
;
1937 static int tls_read_size(struct strparser
*strp
, struct sk_buff
*skb
)
1939 struct tls_context
*tls_ctx
= tls_get_ctx(strp
->sk
);
1940 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1941 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
1942 char header
[TLS_HEADER_SIZE
+ MAX_IV_SIZE
];
1943 struct strp_msg
*rxm
= strp_msg(skb
);
1944 size_t cipher_overhead
;
1945 size_t data_len
= 0;
1948 /* Verify that we have a full TLS header, or wait for more data */
1949 if (rxm
->offset
+ prot
->prepend_size
> skb
->len
)
1952 /* Sanity-check size of on-stack buffer. */
1953 if (WARN_ON(prot
->prepend_size
> sizeof(header
))) {
1958 /* Linearize header to local buffer */
1959 ret
= skb_copy_bits(skb
, rxm
->offset
, header
, prot
->prepend_size
);
1964 ctx
->control
= header
[0];
1966 data_len
= ((header
[4] & 0xFF) | (header
[3] << 8));
1968 cipher_overhead
= prot
->tag_size
;
1969 if (prot
->version
!= TLS_1_3_VERSION
)
1970 cipher_overhead
+= prot
->iv_size
;
1972 if (data_len
> TLS_MAX_PAYLOAD_SIZE
+ cipher_overhead
+
1977 if (data_len
< cipher_overhead
) {
1982 /* Note that both TLS1.3 and TLS1.2 use TLS_1_2 version here */
1983 if (header
[1] != TLS_1_2_VERSION_MINOR
||
1984 header
[2] != TLS_1_2_VERSION_MAJOR
) {
1988 #ifdef CONFIG_TLS_DEVICE
1989 handle_device_resync(strp
->sk
, TCP_SKB_CB(skb
)->seq
+ rxm
->offset
,
1990 *(u64
*)tls_ctx
->rx
.rec_seq
);
1992 return data_len
+ TLS_HEADER_SIZE
;
1995 tls_err_abort(strp
->sk
, ret
);
2000 static void tls_queue(struct strparser
*strp
, struct sk_buff
*skb
)
2002 struct tls_context
*tls_ctx
= tls_get_ctx(strp
->sk
);
2003 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
2005 ctx
->decrypted
= false;
2007 ctx
->recv_pkt
= skb
;
2010 ctx
->saved_data_ready(strp
->sk
);
2013 static void tls_data_ready(struct sock
*sk
)
2015 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
2016 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
2017 struct sk_psock
*psock
;
2019 strp_data_ready(&ctx
->strp
);
2021 psock
= sk_psock_get(sk
);
2022 if (psock
&& !list_empty(&psock
->ingress_msg
)) {
2023 ctx
->saved_data_ready(sk
);
2024 sk_psock_put(sk
, psock
);
2028 void tls_sw_free_resources_tx(struct sock
*sk
)
2030 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
2031 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
2032 struct tls_rec
*rec
, *tmp
;
2034 /* Wait for any pending async encryptions to complete */
2035 smp_store_mb(ctx
->async_notify
, true);
2036 if (atomic_read(&ctx
->encrypt_pending
))
2037 crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
2040 cancel_delayed_work_sync(&ctx
->tx_work
.work
);
2043 /* Tx whatever records we can transmit and abandon the rest */
2044 tls_tx_records(sk
, -1);
2046 /* Free up un-sent records in tx_list. First, free
2047 * the partially sent record if any at head of tx_list.
2049 if (tls_free_partial_record(sk
, tls_ctx
)) {
2050 rec
= list_first_entry(&ctx
->tx_list
,
2051 struct tls_rec
, list
);
2052 list_del(&rec
->list
);
2053 sk_msg_free(sk
, &rec
->msg_plaintext
);
2057 list_for_each_entry_safe(rec
, tmp
, &ctx
->tx_list
, list
) {
2058 list_del(&rec
->list
);
2059 sk_msg_free(sk
, &rec
->msg_encrypted
);
2060 sk_msg_free(sk
, &rec
->msg_plaintext
);
2064 crypto_free_aead(ctx
->aead_send
);
2065 tls_free_open_rec(sk
);
2070 void tls_sw_release_resources_rx(struct sock
*sk
)
2072 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
2073 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
2075 kfree(tls_ctx
->rx
.rec_seq
);
2076 kfree(tls_ctx
->rx
.iv
);
2078 if (ctx
->aead_recv
) {
2079 kfree_skb(ctx
->recv_pkt
);
2080 ctx
->recv_pkt
= NULL
;
2081 skb_queue_purge(&ctx
->rx_list
);
2082 crypto_free_aead(ctx
->aead_recv
);
2083 strp_stop(&ctx
->strp
);
2084 write_lock_bh(&sk
->sk_callback_lock
);
2085 sk
->sk_data_ready
= ctx
->saved_data_ready
;
2086 write_unlock_bh(&sk
->sk_callback_lock
);
2088 strp_done(&ctx
->strp
);
2093 void tls_sw_free_resources_rx(struct sock
*sk
)
2095 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
2096 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
2098 tls_sw_release_resources_rx(sk
);
2103 /* The work handler to transmitt the encrypted records in tx_list */
2104 static void tx_work_handler(struct work_struct
*work
)
2106 struct delayed_work
*delayed_work
= to_delayed_work(work
);
2107 struct tx_work
*tx_work
= container_of(delayed_work
,
2108 struct tx_work
, work
);
2109 struct sock
*sk
= tx_work
->sk
;
2110 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
2111 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
2113 if (!test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
))
2117 tls_tx_records(sk
, -1);
2121 void tls_sw_write_space(struct sock
*sk
, struct tls_context
*ctx
)
2123 struct tls_sw_context_tx
*tx_ctx
= tls_sw_ctx_tx(ctx
);
2125 /* Schedule the transmission if tx list is ready */
2126 if (is_tx_ready(tx_ctx
) && !sk
->sk_write_pending
) {
2127 /* Schedule the transmission */
2128 if (!test_and_set_bit(BIT_TX_SCHEDULED
,
2129 &tx_ctx
->tx_bitmask
))
2130 schedule_delayed_work(&tx_ctx
->tx_work
.work
, 0);
2134 int tls_set_sw_offload(struct sock
*sk
, struct tls_context
*ctx
, int tx
)
2136 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
2137 struct tls_prot_info
*prot
= &tls_ctx
->prot_info
;
2138 struct tls_crypto_info
*crypto_info
;
2139 struct tls12_crypto_info_aes_gcm_128
*gcm_128_info
;
2140 struct tls12_crypto_info_aes_gcm_256
*gcm_256_info
;
2141 struct tls_sw_context_tx
*sw_ctx_tx
= NULL
;
2142 struct tls_sw_context_rx
*sw_ctx_rx
= NULL
;
2143 struct cipher_context
*cctx
;
2144 struct crypto_aead
**aead
;
2145 struct strp_callbacks cb
;
2146 u16 nonce_size
, tag_size
, iv_size
, rec_seq_size
;
2147 struct crypto_tfm
*tfm
;
2148 char *iv
, *rec_seq
, *key
, *salt
;
2158 if (!ctx
->priv_ctx_tx
) {
2159 sw_ctx_tx
= kzalloc(sizeof(*sw_ctx_tx
), GFP_KERNEL
);
2164 ctx
->priv_ctx_tx
= sw_ctx_tx
;
2167 (struct tls_sw_context_tx
*)ctx
->priv_ctx_tx
;
2170 if (!ctx
->priv_ctx_rx
) {
2171 sw_ctx_rx
= kzalloc(sizeof(*sw_ctx_rx
), GFP_KERNEL
);
2176 ctx
->priv_ctx_rx
= sw_ctx_rx
;
2179 (struct tls_sw_context_rx
*)ctx
->priv_ctx_rx
;
2184 crypto_init_wait(&sw_ctx_tx
->async_wait
);
2185 crypto_info
= &ctx
->crypto_send
.info
;
2187 aead
= &sw_ctx_tx
->aead_send
;
2188 INIT_LIST_HEAD(&sw_ctx_tx
->tx_list
);
2189 INIT_DELAYED_WORK(&sw_ctx_tx
->tx_work
.work
, tx_work_handler
);
2190 sw_ctx_tx
->tx_work
.sk
= sk
;
2192 crypto_init_wait(&sw_ctx_rx
->async_wait
);
2193 crypto_info
= &ctx
->crypto_recv
.info
;
2195 skb_queue_head_init(&sw_ctx_rx
->rx_list
);
2196 aead
= &sw_ctx_rx
->aead_recv
;
2199 switch (crypto_info
->cipher_type
) {
2200 case TLS_CIPHER_AES_GCM_128
: {
2201 nonce_size
= TLS_CIPHER_AES_GCM_128_IV_SIZE
;
2202 tag_size
= TLS_CIPHER_AES_GCM_128_TAG_SIZE
;
2203 iv_size
= TLS_CIPHER_AES_GCM_128_IV_SIZE
;
2204 iv
= ((struct tls12_crypto_info_aes_gcm_128
*)crypto_info
)->iv
;
2205 rec_seq_size
= TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE
;
2207 ((struct tls12_crypto_info_aes_gcm_128
*)crypto_info
)->rec_seq
;
2209 (struct tls12_crypto_info_aes_gcm_128
*)crypto_info
;
2210 keysize
= TLS_CIPHER_AES_GCM_128_KEY_SIZE
;
2211 key
= gcm_128_info
->key
;
2212 salt
= gcm_128_info
->salt
;
2215 case TLS_CIPHER_AES_GCM_256
: {
2216 nonce_size
= TLS_CIPHER_AES_GCM_256_IV_SIZE
;
2217 tag_size
= TLS_CIPHER_AES_GCM_256_TAG_SIZE
;
2218 iv_size
= TLS_CIPHER_AES_GCM_256_IV_SIZE
;
2219 iv
= ((struct tls12_crypto_info_aes_gcm_256
*)crypto_info
)->iv
;
2220 rec_seq_size
= TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE
;
2222 ((struct tls12_crypto_info_aes_gcm_256
*)crypto_info
)->rec_seq
;
2224 (struct tls12_crypto_info_aes_gcm_256
*)crypto_info
;
2225 keysize
= TLS_CIPHER_AES_GCM_256_KEY_SIZE
;
2226 key
= gcm_256_info
->key
;
2227 salt
= gcm_256_info
->salt
;
2235 /* Sanity-check the IV size for stack allocations. */
2236 if (iv_size
> MAX_IV_SIZE
|| nonce_size
> MAX_IV_SIZE
) {
2241 if (crypto_info
->version
== TLS_1_3_VERSION
) {
2243 prot
->aad_size
= TLS_HEADER_SIZE
;
2244 prot
->tail_size
= 1;
2246 prot
->aad_size
= TLS_AAD_SPACE_SIZE
;
2247 prot
->tail_size
= 0;
2250 prot
->version
= crypto_info
->version
;
2251 prot
->cipher_type
= crypto_info
->cipher_type
;
2252 prot
->prepend_size
= TLS_HEADER_SIZE
+ nonce_size
;
2253 prot
->tag_size
= tag_size
;
2254 prot
->overhead_size
= prot
->prepend_size
+
2255 prot
->tag_size
+ prot
->tail_size
;
2256 prot
->iv_size
= iv_size
;
2257 cctx
->iv
= kmalloc(iv_size
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
,
2263 /* Note: 128 & 256 bit salt are the same size */
2264 memcpy(cctx
->iv
, salt
, TLS_CIPHER_AES_GCM_128_SALT_SIZE
);
2265 memcpy(cctx
->iv
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
, iv
, iv_size
);
2266 prot
->rec_seq_size
= rec_seq_size
;
2267 cctx
->rec_seq
= kmemdup(rec_seq
, rec_seq_size
, GFP_KERNEL
);
2268 if (!cctx
->rec_seq
) {
2274 *aead
= crypto_alloc_aead("gcm(aes)", 0, 0);
2275 if (IS_ERR(*aead
)) {
2276 rc
= PTR_ERR(*aead
);
2282 ctx
->push_pending_record
= tls_sw_push_pending_record
;
2284 rc
= crypto_aead_setkey(*aead
, key
, keysize
);
2289 rc
= crypto_aead_setauthsize(*aead
, prot
->tag_size
);
2294 tfm
= crypto_aead_tfm(sw_ctx_rx
->aead_recv
);
2296 if (crypto_info
->version
== TLS_1_3_VERSION
)
2297 sw_ctx_rx
->async_capable
= false;
2299 sw_ctx_rx
->async_capable
=
2300 tfm
->__crt_alg
->cra_flags
& CRYPTO_ALG_ASYNC
;
2302 /* Set up strparser */
2303 memset(&cb
, 0, sizeof(cb
));
2304 cb
.rcv_msg
= tls_queue
;
2305 cb
.parse_msg
= tls_read_size
;
2307 strp_init(&sw_ctx_rx
->strp
, sk
, &cb
);
2309 write_lock_bh(&sk
->sk_callback_lock
);
2310 sw_ctx_rx
->saved_data_ready
= sk
->sk_data_ready
;
2311 sk
->sk_data_ready
= tls_data_ready
;
2312 write_unlock_bh(&sk
->sk_callback_lock
);
2314 strp_check_rcv(&sw_ctx_rx
->strp
);
2320 crypto_free_aead(*aead
);
2323 kfree(cctx
->rec_seq
);
2324 cctx
->rec_seq
= NULL
;
2330 kfree(ctx
->priv_ctx_tx
);
2331 ctx
->priv_ctx_tx
= NULL
;
2333 kfree(ctx
->priv_ctx_rx
);
2334 ctx
->priv_ctx_rx
= NULL
;