2 * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
4 * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved.
5 * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved.
6 * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved.
7 * Copyright (c) 2018, Covalent IO, Inc. http://covalent.io
9 * This software is available to you under a choice of one of two
10 * licenses. You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
15 * Redistribution and use in source and binary forms, with or
16 * without modification, are permitted provided that the following
19 * - Redistributions of source code must retain the above
20 * copyright notice, this list of conditions and the following
23 * - Redistributions in binary form must reproduce the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer in the documentation and/or other materials
26 * provided with the distribution.
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 #include <linux/sched/signal.h>
39 #include <linux/module.h>
40 #include <crypto/aead.h>
42 #include <net/strparser.h>
45 #define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE
47 static int __skb_nsg(struct sk_buff
*skb
, int offset
, int len
,
48 unsigned int recursion_level
)
50 int start
= skb_headlen(skb
);
51 int i
, chunk
= start
- offset
;
52 struct sk_buff
*frag_iter
;
55 if (unlikely(recursion_level
>= 24))
68 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
71 WARN_ON(start
> offset
+ len
);
73 end
= start
+ skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
87 if (unlikely(skb_has_frag_list(skb
))) {
88 skb_walk_frags(skb
, frag_iter
) {
91 WARN_ON(start
> offset
+ len
);
93 end
= start
+ frag_iter
->len
;
98 ret
= __skb_nsg(frag_iter
, offset
- start
, chunk
,
100 if (unlikely(ret
< 0))
115 /* Return the number of scatterlist elements required to completely map the
116 * skb, or -EMSGSIZE if the recursion depth is exceeded.
118 static int skb_nsg(struct sk_buff
*skb
, int offset
, int len
)
120 return __skb_nsg(skb
, offset
, len
, 0);
123 static void tls_decrypt_done(struct crypto_async_request
*req
, int err
)
125 struct aead_request
*aead_req
= (struct aead_request
*)req
;
126 struct scatterlist
*sgout
= aead_req
->dst
;
127 struct tls_sw_context_rx
*ctx
;
128 struct tls_context
*tls_ctx
;
129 struct scatterlist
*sg
;
134 skb
= (struct sk_buff
*)req
->data
;
135 tls_ctx
= tls_get_ctx(skb
->sk
);
136 ctx
= tls_sw_ctx_rx(tls_ctx
);
137 pending
= atomic_dec_return(&ctx
->decrypt_pending
);
139 /* Propagate if there was an err */
141 ctx
->async_wait
.err
= err
;
142 tls_err_abort(skb
->sk
, err
);
145 /* After using skb->sk to propagate sk through crypto async callback
146 * we need to NULL it again.
150 /* Release the skb, pages and memory allocated for crypto req */
153 /* Skip the first S/G entry as it points to AAD */
154 for_each_sg(sg_next(sgout
), sg
, UINT_MAX
, pages
) {
157 put_page(sg_page(sg
));
162 if (!pending
&& READ_ONCE(ctx
->async_notify
))
163 complete(&ctx
->async_wait
.completion
);
166 static int tls_do_decryption(struct sock
*sk
,
168 struct scatterlist
*sgin
,
169 struct scatterlist
*sgout
,
172 struct aead_request
*aead_req
,
175 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
176 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
179 aead_request_set_tfm(aead_req
, ctx
->aead_recv
);
180 aead_request_set_ad(aead_req
, TLS_AAD_SPACE_SIZE
);
181 aead_request_set_crypt(aead_req
, sgin
, sgout
,
182 data_len
+ tls_ctx
->rx
.tag_size
,
186 /* Using skb->sk to push sk through to crypto async callback
187 * handler. This allows propagating errors up to the socket
188 * if needed. It _must_ be cleared in the async handler
189 * before kfree_skb is called. We _know_ skb->sk is NULL
190 * because it is a clone from strparser.
193 aead_request_set_callback(aead_req
,
194 CRYPTO_TFM_REQ_MAY_BACKLOG
,
195 tls_decrypt_done
, skb
);
196 atomic_inc(&ctx
->decrypt_pending
);
198 aead_request_set_callback(aead_req
,
199 CRYPTO_TFM_REQ_MAY_BACKLOG
,
200 crypto_req_done
, &ctx
->async_wait
);
203 ret
= crypto_aead_decrypt(aead_req
);
204 if (ret
== -EINPROGRESS
) {
208 ret
= crypto_wait_req(ret
, &ctx
->async_wait
);
212 atomic_dec(&ctx
->decrypt_pending
);
217 static void tls_trim_both_msgs(struct sock
*sk
, int target_size
)
219 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
220 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
221 struct tls_rec
*rec
= ctx
->open_rec
;
223 sk_msg_trim(sk
, &rec
->msg_plaintext
, target_size
);
225 target_size
+= tls_ctx
->tx
.overhead_size
;
226 sk_msg_trim(sk
, &rec
->msg_encrypted
, target_size
);
229 static int tls_alloc_encrypted_msg(struct sock
*sk
, int len
)
231 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
232 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
233 struct tls_rec
*rec
= ctx
->open_rec
;
234 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
236 return sk_msg_alloc(sk
, msg_en
, len
, 0);
239 static int tls_clone_plaintext_msg(struct sock
*sk
, int required
)
241 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
242 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
243 struct tls_rec
*rec
= ctx
->open_rec
;
244 struct sk_msg
*msg_pl
= &rec
->msg_plaintext
;
245 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
248 /* We add page references worth len bytes from encrypted sg
249 * at the end of plaintext sg. It is guaranteed that msg_en
250 * has enough required room (ensured by caller).
252 len
= required
- msg_pl
->sg
.size
;
254 /* Skip initial bytes in msg_en's data to be able to use
255 * same offset of both plain and encrypted data.
257 skip
= tls_ctx
->tx
.prepend_size
+ msg_pl
->sg
.size
;
259 return sk_msg_clone(sk
, msg_pl
, msg_en
, skip
, len
);
262 static struct tls_rec
*tls_get_rec(struct sock
*sk
)
264 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
265 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
266 struct sk_msg
*msg_pl
, *msg_en
;
270 mem_size
= sizeof(struct tls_rec
) + crypto_aead_reqsize(ctx
->aead_send
);
272 rec
= kzalloc(mem_size
, sk
->sk_allocation
);
276 msg_pl
= &rec
->msg_plaintext
;
277 msg_en
= &rec
->msg_encrypted
;
282 sg_init_table(rec
->sg_aead_in
, 2);
283 sg_set_buf(&rec
->sg_aead_in
[0], rec
->aad_space
,
284 sizeof(rec
->aad_space
));
285 sg_unmark_end(&rec
->sg_aead_in
[1]);
287 sg_init_table(rec
->sg_aead_out
, 2);
288 sg_set_buf(&rec
->sg_aead_out
[0], rec
->aad_space
,
289 sizeof(rec
->aad_space
));
290 sg_unmark_end(&rec
->sg_aead_out
[1]);
295 static void tls_free_rec(struct sock
*sk
, struct tls_rec
*rec
)
297 sk_msg_free(sk
, &rec
->msg_encrypted
);
298 sk_msg_free(sk
, &rec
->msg_plaintext
);
302 static void tls_free_open_rec(struct sock
*sk
)
304 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
305 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
306 struct tls_rec
*rec
= ctx
->open_rec
;
309 tls_free_rec(sk
, rec
);
310 ctx
->open_rec
= NULL
;
314 int tls_tx_records(struct sock
*sk
, int flags
)
316 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
317 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
318 struct tls_rec
*rec
, *tmp
;
319 struct sk_msg
*msg_en
;
320 int tx_flags
, rc
= 0;
322 if (tls_is_partially_sent_record(tls_ctx
)) {
323 rec
= list_first_entry(&ctx
->tx_list
,
324 struct tls_rec
, list
);
327 tx_flags
= rec
->tx_flags
;
331 rc
= tls_push_partial_record(sk
, tls_ctx
, tx_flags
);
335 /* Full record has been transmitted.
336 * Remove the head of tx_list
338 list_del(&rec
->list
);
339 sk_msg_free(sk
, &rec
->msg_plaintext
);
343 /* Tx all ready records */
344 list_for_each_entry_safe(rec
, tmp
, &ctx
->tx_list
, list
) {
345 if (READ_ONCE(rec
->tx_ready
)) {
347 tx_flags
= rec
->tx_flags
;
351 msg_en
= &rec
->msg_encrypted
;
352 rc
= tls_push_sg(sk
, tls_ctx
,
353 &msg_en
->sg
.data
[msg_en
->sg
.curr
],
358 list_del(&rec
->list
);
359 sk_msg_free(sk
, &rec
->msg_plaintext
);
367 if (rc
< 0 && rc
!= -EAGAIN
)
368 tls_err_abort(sk
, EBADMSG
);
373 static void tls_encrypt_done(struct crypto_async_request
*req
, int err
)
375 struct aead_request
*aead_req
= (struct aead_request
*)req
;
376 struct sock
*sk
= req
->data
;
377 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
378 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
379 struct scatterlist
*sge
;
380 struct sk_msg
*msg_en
;
385 rec
= container_of(aead_req
, struct tls_rec
, aead_req
);
386 msg_en
= &rec
->msg_encrypted
;
388 sge
= sk_msg_elem(msg_en
, msg_en
->sg
.curr
);
389 sge
->offset
-= tls_ctx
->tx
.prepend_size
;
390 sge
->length
+= tls_ctx
->tx
.prepend_size
;
392 /* Check if error is previously set on socket */
393 if (err
|| sk
->sk_err
) {
396 /* If err is already set on socket, return the same code */
398 ctx
->async_wait
.err
= sk
->sk_err
;
400 ctx
->async_wait
.err
= err
;
401 tls_err_abort(sk
, err
);
406 struct tls_rec
*first_rec
;
408 /* Mark the record as ready for transmission */
409 smp_store_mb(rec
->tx_ready
, true);
411 /* If received record is at head of tx_list, schedule tx */
412 first_rec
= list_first_entry(&ctx
->tx_list
,
413 struct tls_rec
, list
);
414 if (rec
== first_rec
)
418 pending
= atomic_dec_return(&ctx
->encrypt_pending
);
420 if (!pending
&& READ_ONCE(ctx
->async_notify
))
421 complete(&ctx
->async_wait
.completion
);
426 /* Schedule the transmission */
427 if (!test_and_set_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
))
428 schedule_delayed_work(&ctx
->tx_work
.work
, 1);
431 static int tls_do_encryption(struct sock
*sk
,
432 struct tls_context
*tls_ctx
,
433 struct tls_sw_context_tx
*ctx
,
434 struct aead_request
*aead_req
,
435 size_t data_len
, u32 start
)
437 struct tls_rec
*rec
= ctx
->open_rec
;
438 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
439 struct scatterlist
*sge
= sk_msg_elem(msg_en
, start
);
442 sge
->offset
+= tls_ctx
->tx
.prepend_size
;
443 sge
->length
-= tls_ctx
->tx
.prepend_size
;
445 msg_en
->sg
.curr
= start
;
447 aead_request_set_tfm(aead_req
, ctx
->aead_send
);
448 aead_request_set_ad(aead_req
, TLS_AAD_SPACE_SIZE
);
449 aead_request_set_crypt(aead_req
, rec
->sg_aead_in
,
451 data_len
, tls_ctx
->tx
.iv
);
453 aead_request_set_callback(aead_req
, CRYPTO_TFM_REQ_MAY_BACKLOG
,
454 tls_encrypt_done
, sk
);
456 /* Add the record in tx_list */
457 list_add_tail((struct list_head
*)&rec
->list
, &ctx
->tx_list
);
458 atomic_inc(&ctx
->encrypt_pending
);
460 rc
= crypto_aead_encrypt(aead_req
);
461 if (!rc
|| rc
!= -EINPROGRESS
) {
462 atomic_dec(&ctx
->encrypt_pending
);
463 sge
->offset
-= tls_ctx
->tx
.prepend_size
;
464 sge
->length
+= tls_ctx
->tx
.prepend_size
;
468 WRITE_ONCE(rec
->tx_ready
, true);
469 } else if (rc
!= -EINPROGRESS
) {
470 list_del(&rec
->list
);
474 /* Unhook the record from context if encryption is not failure */
475 ctx
->open_rec
= NULL
;
476 tls_advance_record_sn(sk
, &tls_ctx
->tx
);
480 static int tls_split_open_record(struct sock
*sk
, struct tls_rec
*from
,
481 struct tls_rec
**to
, struct sk_msg
*msg_opl
,
482 struct sk_msg
*msg_oen
, u32 split_point
,
483 u32 tx_overhead_size
, u32
*orig_end
)
485 u32 i
, j
, bytes
= 0, apply
= msg_opl
->apply_bytes
;
486 struct scatterlist
*sge
, *osge
, *nsge
;
487 u32 orig_size
= msg_opl
->sg
.size
;
488 struct scatterlist tmp
= { };
489 struct sk_msg
*msg_npl
;
493 new = tls_get_rec(sk
);
496 ret
= sk_msg_alloc(sk
, &new->msg_encrypted
, msg_opl
->sg
.size
+
497 tx_overhead_size
, 0);
499 tls_free_rec(sk
, new);
503 *orig_end
= msg_opl
->sg
.end
;
504 i
= msg_opl
->sg
.start
;
505 sge
= sk_msg_elem(msg_opl
, i
);
506 while (apply
&& sge
->length
) {
507 if (sge
->length
> apply
) {
508 u32 len
= sge
->length
- apply
;
510 get_page(sg_page(sge
));
511 sg_set_page(&tmp
, sg_page(sge
), len
,
512 sge
->offset
+ apply
);
517 apply
-= sge
->length
;
518 bytes
+= sge
->length
;
521 sk_msg_iter_var_next(i
);
522 if (i
== msg_opl
->sg
.end
)
524 sge
= sk_msg_elem(msg_opl
, i
);
528 msg_opl
->sg
.curr
= i
;
529 msg_opl
->sg
.copybreak
= 0;
530 msg_opl
->apply_bytes
= 0;
531 msg_opl
->sg
.size
= bytes
;
533 msg_npl
= &new->msg_plaintext
;
534 msg_npl
->apply_bytes
= apply
;
535 msg_npl
->sg
.size
= orig_size
- bytes
;
537 j
= msg_npl
->sg
.start
;
538 nsge
= sk_msg_elem(msg_npl
, j
);
540 memcpy(nsge
, &tmp
, sizeof(*nsge
));
541 sk_msg_iter_var_next(j
);
542 nsge
= sk_msg_elem(msg_npl
, j
);
545 osge
= sk_msg_elem(msg_opl
, i
);
546 while (osge
->length
) {
547 memcpy(nsge
, osge
, sizeof(*nsge
));
549 sk_msg_iter_var_next(i
);
550 sk_msg_iter_var_next(j
);
553 osge
= sk_msg_elem(msg_opl
, i
);
554 nsge
= sk_msg_elem(msg_npl
, j
);
558 msg_npl
->sg
.curr
= j
;
559 msg_npl
->sg
.copybreak
= 0;
565 static void tls_merge_open_record(struct sock
*sk
, struct tls_rec
*to
,
566 struct tls_rec
*from
, u32 orig_end
)
568 struct sk_msg
*msg_npl
= &from
->msg_plaintext
;
569 struct sk_msg
*msg_opl
= &to
->msg_plaintext
;
570 struct scatterlist
*osge
, *nsge
;
574 sk_msg_iter_var_prev(i
);
575 j
= msg_npl
->sg
.start
;
577 osge
= sk_msg_elem(msg_opl
, i
);
578 nsge
= sk_msg_elem(msg_npl
, j
);
580 if (sg_page(osge
) == sg_page(nsge
) &&
581 osge
->offset
+ osge
->length
== nsge
->offset
) {
582 osge
->length
+= nsge
->length
;
583 put_page(sg_page(nsge
));
586 msg_opl
->sg
.end
= orig_end
;
587 msg_opl
->sg
.curr
= orig_end
;
588 msg_opl
->sg
.copybreak
= 0;
589 msg_opl
->apply_bytes
= msg_opl
->sg
.size
+ msg_npl
->sg
.size
;
590 msg_opl
->sg
.size
+= msg_npl
->sg
.size
;
592 sk_msg_free(sk
, &to
->msg_encrypted
);
593 sk_msg_xfer_full(&to
->msg_encrypted
, &from
->msg_encrypted
);
598 static int tls_push_record(struct sock
*sk
, int flags
,
599 unsigned char record_type
)
601 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
602 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
603 struct tls_rec
*rec
= ctx
->open_rec
, *tmp
= NULL
;
604 u32 i
, split_point
, uninitialized_var(orig_end
);
605 struct sk_msg
*msg_pl
, *msg_en
;
606 struct aead_request
*req
;
613 msg_pl
= &rec
->msg_plaintext
;
614 msg_en
= &rec
->msg_encrypted
;
616 split_point
= msg_pl
->apply_bytes
;
617 split
= split_point
&& split_point
< msg_pl
->sg
.size
;
619 rc
= tls_split_open_record(sk
, rec
, &tmp
, msg_pl
, msg_en
,
620 split_point
, tls_ctx
->tx
.overhead_size
,
624 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+
625 tls_ctx
->tx
.overhead_size
);
628 rec
->tx_flags
= flags
;
629 req
= &rec
->aead_req
;
632 sk_msg_iter_var_prev(i
);
633 sg_mark_end(sk_msg_elem(msg_pl
, i
));
635 i
= msg_pl
->sg
.start
;
636 sg_chain(rec
->sg_aead_in
, 2, rec
->inplace_crypto
?
637 &msg_en
->sg
.data
[i
] : &msg_pl
->sg
.data
[i
]);
640 sk_msg_iter_var_prev(i
);
641 sg_mark_end(sk_msg_elem(msg_en
, i
));
643 i
= msg_en
->sg
.start
;
644 sg_chain(rec
->sg_aead_out
, 2, &msg_en
->sg
.data
[i
]);
646 tls_make_aad(rec
->aad_space
, msg_pl
->sg
.size
,
647 tls_ctx
->tx
.rec_seq
, tls_ctx
->tx
.rec_seq_size
,
650 tls_fill_prepend(tls_ctx
,
651 page_address(sg_page(&msg_en
->sg
.data
[i
])) +
652 msg_en
->sg
.data
[i
].offset
, msg_pl
->sg
.size
,
655 tls_ctx
->pending_open_record_frags
= false;
657 rc
= tls_do_encryption(sk
, tls_ctx
, ctx
, req
, msg_pl
->sg
.size
, i
);
659 if (rc
!= -EINPROGRESS
) {
660 tls_err_abort(sk
, EBADMSG
);
662 tls_ctx
->pending_open_record_frags
= true;
663 tls_merge_open_record(sk
, rec
, tmp
, orig_end
);
668 msg_pl
= &tmp
->msg_plaintext
;
669 msg_en
= &tmp
->msg_encrypted
;
670 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+
671 tls_ctx
->tx
.overhead_size
);
672 tls_ctx
->pending_open_record_frags
= true;
676 return tls_tx_records(sk
, flags
);
679 static int bpf_exec_tx_verdict(struct sk_msg
*msg
, struct sock
*sk
,
680 bool full_record
, u8 record_type
,
681 size_t *copied
, int flags
)
683 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
684 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
685 struct sk_msg msg_redir
= { };
686 struct sk_psock
*psock
;
687 struct sock
*sk_redir
;
692 psock
= sk_psock_get(sk
);
694 return tls_push_record(sk
, flags
, record_type
);
696 enospc
= sk_msg_full(msg
);
697 if (psock
->eval
== __SK_NONE
)
698 psock
->eval
= sk_psock_msg_verdict(sk
, psock
, msg
);
699 if (msg
->cork_bytes
&& msg
->cork_bytes
> msg
->sg
.size
&&
700 !enospc
&& !full_record
) {
706 if (msg
->apply_bytes
&& msg
->apply_bytes
< send
)
707 send
= msg
->apply_bytes
;
709 switch (psock
->eval
) {
711 err
= tls_push_record(sk
, flags
, record_type
);
713 *copied
-= sk_msg_free(sk
, msg
);
714 tls_free_open_rec(sk
);
719 sk_redir
= psock
->sk_redir
;
720 memcpy(&msg_redir
, msg
, sizeof(*msg
));
721 if (msg
->apply_bytes
< send
)
722 msg
->apply_bytes
= 0;
724 msg
->apply_bytes
-= send
;
725 sk_msg_return_zero(sk
, msg
, send
);
726 msg
->sg
.size
-= send
;
728 err
= tcp_bpf_sendmsg_redir(sk_redir
, &msg_redir
, send
, flags
);
731 *copied
-= sk_msg_free_nocharge(sk
, &msg_redir
);
734 if (msg
->sg
.size
== 0)
735 tls_free_open_rec(sk
);
739 sk_msg_free_partial(sk
, msg
, send
);
740 if (msg
->apply_bytes
< send
)
741 msg
->apply_bytes
= 0;
743 msg
->apply_bytes
-= send
;
744 if (msg
->sg
.size
== 0)
745 tls_free_open_rec(sk
);
751 bool reset_eval
= !ctx
->open_rec
;
755 msg
= &rec
->msg_plaintext
;
756 if (!msg
->apply_bytes
)
760 psock
->eval
= __SK_NONE
;
761 if (psock
->sk_redir
) {
762 sock_put(psock
->sk_redir
);
763 psock
->sk_redir
= NULL
;
770 sk_psock_put(sk
, psock
);
774 static int tls_sw_push_pending_record(struct sock
*sk
, int flags
)
776 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
777 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
778 struct tls_rec
*rec
= ctx
->open_rec
;
779 struct sk_msg
*msg_pl
;
785 msg_pl
= &rec
->msg_plaintext
;
786 copied
= msg_pl
->sg
.size
;
790 return bpf_exec_tx_verdict(msg_pl
, sk
, true, TLS_RECORD_TYPE_DATA
,
794 int tls_sw_sendmsg(struct sock
*sk
, struct msghdr
*msg
, size_t size
)
796 long timeo
= sock_sndtimeo(sk
, msg
->msg_flags
& MSG_DONTWAIT
);
797 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
798 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
799 struct crypto_tfm
*tfm
= crypto_aead_tfm(ctx
->aead_send
);
800 bool async_capable
= tfm
->__crt_alg
->cra_flags
& CRYPTO_ALG_ASYNC
;
801 unsigned char record_type
= TLS_RECORD_TYPE_DATA
;
802 bool is_kvec
= iov_iter_is_kvec(&msg
->msg_iter
);
803 bool eor
= !(msg
->msg_flags
& MSG_MORE
);
804 size_t try_to_copy
, copied
= 0;
805 struct sk_msg
*msg_pl
, *msg_en
;
815 if (msg
->msg_flags
& ~(MSG_MORE
| MSG_DONTWAIT
| MSG_NOSIGNAL
))
820 /* Wait till there is any pending write on socket */
821 if (unlikely(sk
->sk_write_pending
)) {
822 ret
= wait_on_pending_writer(sk
, &timeo
);
827 if (unlikely(msg
->msg_controllen
)) {
828 ret
= tls_proccess_cmsg(sk
, msg
, &record_type
);
830 if (ret
== -EINPROGRESS
)
832 else if (ret
!= -EAGAIN
)
837 while (msg_data_left(msg
)) {
846 rec
= ctx
->open_rec
= tls_get_rec(sk
);
852 msg_pl
= &rec
->msg_plaintext
;
853 msg_en
= &rec
->msg_encrypted
;
855 orig_size
= msg_pl
->sg
.size
;
857 try_to_copy
= msg_data_left(msg
);
858 record_room
= TLS_MAX_PAYLOAD_SIZE
- msg_pl
->sg
.size
;
859 if (try_to_copy
>= record_room
) {
860 try_to_copy
= record_room
;
864 required_size
= msg_pl
->sg
.size
+ try_to_copy
+
865 tls_ctx
->tx
.overhead_size
;
867 if (!sk_stream_memory_free(sk
))
868 goto wait_for_sndbuf
;
871 ret
= tls_alloc_encrypted_msg(sk
, required_size
);
874 goto wait_for_memory
;
876 /* Adjust try_to_copy according to the amount that was
877 * actually allocated. The difference is due
878 * to max sg elements limit
880 try_to_copy
-= required_size
- msg_en
->sg
.size
;
884 if (!is_kvec
&& (full_record
|| eor
) && !async_capable
) {
885 u32 first
= msg_pl
->sg
.end
;
887 ret
= sk_msg_zerocopy_from_iter(sk
, &msg
->msg_iter
,
888 msg_pl
, try_to_copy
);
890 goto fallback_to_reg_send
;
892 rec
->inplace_crypto
= 0;
895 copied
+= try_to_copy
;
897 sk_msg_sg_copy_set(msg_pl
, first
);
898 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
899 record_type
, &copied
,
902 if (ret
== -EINPROGRESS
)
904 else if (ret
== -ENOMEM
)
905 goto wait_for_memory
;
906 else if (ret
== -ENOSPC
)
908 else if (ret
!= -EAGAIN
)
913 copied
-= try_to_copy
;
914 sk_msg_sg_copy_clear(msg_pl
, first
);
915 iov_iter_revert(&msg
->msg_iter
,
916 msg_pl
->sg
.size
- orig_size
);
917 fallback_to_reg_send
:
918 sk_msg_trim(sk
, msg_pl
, orig_size
);
921 required_size
= msg_pl
->sg
.size
+ try_to_copy
;
923 ret
= tls_clone_plaintext_msg(sk
, required_size
);
928 /* Adjust try_to_copy according to the amount that was
929 * actually allocated. The difference is due
930 * to max sg elements limit
932 try_to_copy
-= required_size
- msg_pl
->sg
.size
;
934 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+
935 tls_ctx
->tx
.overhead_size
);
938 ret
= sk_msg_memcopy_from_iter(sk
, &msg
->msg_iter
, msg_pl
,
943 /* Open records defined only if successfully copied, otherwise
944 * we would trim the sg but not reset the open record frags.
946 tls_ctx
->pending_open_record_frags
= true;
947 copied
+= try_to_copy
;
948 if (full_record
|| eor
) {
949 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
950 record_type
, &copied
,
953 if (ret
== -EINPROGRESS
)
955 else if (ret
== -ENOMEM
)
956 goto wait_for_memory
;
957 else if (ret
!= -EAGAIN
) {
968 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
970 ret
= sk_stream_wait_memory(sk
, &timeo
);
973 tls_trim_both_msgs(sk
, orig_size
);
977 if (msg_en
->sg
.size
< required_size
)
978 goto alloc_encrypted
;
984 /* Wait for pending encryptions to get completed */
985 smp_store_mb(ctx
->async_notify
, true);
987 if (atomic_read(&ctx
->encrypt_pending
))
988 crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
990 reinit_completion(&ctx
->async_wait
.completion
);
992 WRITE_ONCE(ctx
->async_notify
, false);
994 if (ctx
->async_wait
.err
) {
995 ret
= ctx
->async_wait
.err
;
1000 /* Transmit if any encryptions have completed */
1001 if (test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
)) {
1002 cancel_delayed_work(&ctx
->tx_work
.work
);
1003 tls_tx_records(sk
, msg
->msg_flags
);
1007 ret
= sk_stream_error(sk
, msg
->msg_flags
, ret
);
1010 return copied
? copied
: ret
;
1013 int tls_sw_sendpage(struct sock
*sk
, struct page
*page
,
1014 int offset
, size_t size
, int flags
)
1016 long timeo
= sock_sndtimeo(sk
, flags
& MSG_DONTWAIT
);
1017 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1018 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
1019 unsigned char record_type
= TLS_RECORD_TYPE_DATA
;
1020 struct sk_msg
*msg_pl
;
1021 struct tls_rec
*rec
;
1029 if (flags
& ~(MSG_MORE
| MSG_DONTWAIT
| MSG_NOSIGNAL
|
1030 MSG_SENDPAGE_NOTLAST
))
1033 /* No MSG_EOR from splice, only look at MSG_MORE */
1034 eor
= !(flags
& (MSG_MORE
| MSG_SENDPAGE_NOTLAST
));
1038 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE
, sk
);
1040 /* Wait till there is any pending write on socket */
1041 if (unlikely(sk
->sk_write_pending
)) {
1042 ret
= wait_on_pending_writer(sk
, &timeo
);
1047 /* Call the sk_stream functions to manage the sndbuf mem. */
1049 size_t copy
, required_size
;
1057 rec
= ctx
->open_rec
;
1059 rec
= ctx
->open_rec
= tls_get_rec(sk
);
1065 msg_pl
= &rec
->msg_plaintext
;
1067 full_record
= false;
1068 record_room
= TLS_MAX_PAYLOAD_SIZE
- msg_pl
->sg
.size
;
1071 if (copy
>= record_room
) {
1076 required_size
= msg_pl
->sg
.size
+ copy
+
1077 tls_ctx
->tx
.overhead_size
;
1079 if (!sk_stream_memory_free(sk
))
1080 goto wait_for_sndbuf
;
1082 ret
= tls_alloc_encrypted_msg(sk
, required_size
);
1085 goto wait_for_memory
;
1087 /* Adjust copy according to the amount that was
1088 * actually allocated. The difference is due
1089 * to max sg elements limit
1091 copy
-= required_size
- msg_pl
->sg
.size
;
1095 sk_msg_page_add(msg_pl
, page
, copy
, offset
);
1096 sk_mem_charge(sk
, copy
);
1102 tls_ctx
->pending_open_record_frags
= true;
1103 if (full_record
|| eor
|| sk_msg_full(msg_pl
)) {
1104 rec
->inplace_crypto
= 0;
1105 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
1106 record_type
, &copied
, flags
);
1108 if (ret
== -EINPROGRESS
)
1110 else if (ret
== -ENOMEM
)
1111 goto wait_for_memory
;
1112 else if (ret
!= -EAGAIN
) {
1121 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
1123 ret
= sk_stream_wait_memory(sk
, &timeo
);
1125 tls_trim_both_msgs(sk
, msg_pl
->sg
.size
);
1133 /* Transmit if any encryptions have completed */
1134 if (test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
)) {
1135 cancel_delayed_work(&ctx
->tx_work
.work
);
1136 tls_tx_records(sk
, flags
);
1140 ret
= sk_stream_error(sk
, flags
, ret
);
1142 return copied
? copied
: ret
;
1145 static struct sk_buff
*tls_wait_data(struct sock
*sk
, struct sk_psock
*psock
,
1146 int flags
, long timeo
, int *err
)
1148 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1149 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1150 struct sk_buff
*skb
;
1151 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
1153 while (!(skb
= ctx
->recv_pkt
) && sk_psock_queue_empty(psock
)) {
1155 *err
= sock_error(sk
);
1159 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
1162 if (sock_flag(sk
, SOCK_DONE
))
1165 if ((flags
& MSG_DONTWAIT
) || !timeo
) {
1170 add_wait_queue(sk_sleep(sk
), &wait
);
1171 sk_set_bit(SOCKWQ_ASYNC_WAITDATA
, sk
);
1172 sk_wait_event(sk
, &timeo
,
1173 ctx
->recv_pkt
!= skb
||
1174 !sk_psock_queue_empty(psock
),
1176 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA
, sk
);
1177 remove_wait_queue(sk_sleep(sk
), &wait
);
1179 /* Handle signals */
1180 if (signal_pending(current
)) {
1181 *err
= sock_intr_errno(timeo
);
1189 static int tls_setup_from_iter(struct sock
*sk
, struct iov_iter
*from
,
1190 int length
, int *pages_used
,
1191 unsigned int *size_used
,
1192 struct scatterlist
*to
,
1195 int rc
= 0, i
= 0, num_elem
= *pages_used
, maxpages
;
1196 struct page
*pages
[MAX_SKB_FRAGS
];
1197 unsigned int size
= *size_used
;
1198 ssize_t copied
, use
;
1201 while (length
> 0) {
1203 maxpages
= to_max_pages
- num_elem
;
1204 if (maxpages
== 0) {
1208 copied
= iov_iter_get_pages(from
, pages
,
1216 iov_iter_advance(from
, copied
);
1221 use
= min_t(int, copied
, PAGE_SIZE
- offset
);
1223 sg_set_page(&to
[num_elem
],
1224 pages
[i
], use
, offset
);
1225 sg_unmark_end(&to
[num_elem
]);
1226 /* We do not uncharge memory from this API */
1235 /* Mark the end in the last sg entry if newly added */
1236 if (num_elem
> *pages_used
)
1237 sg_mark_end(&to
[num_elem
- 1]);
1240 iov_iter_revert(from
, size
- *size_used
);
1242 *pages_used
= num_elem
;
1247 /* This function decrypts the input skb into either out_iov or in out_sg
1248 * or in skb buffers itself. The input parameter 'zc' indicates if
1249 * zero-copy mode needs to be tried or not. With zero-copy mode, either
1250 * out_iov or out_sg must be non-NULL. In case both out_iov and out_sg are
1251 * NULL, then the decryption happens inside skb buffers itself, i.e.
1252 * zero-copy gets disabled and 'zc' is updated.
1255 static int decrypt_internal(struct sock
*sk
, struct sk_buff
*skb
,
1256 struct iov_iter
*out_iov
,
1257 struct scatterlist
*out_sg
,
1258 int *chunk
, bool *zc
)
1260 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1261 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1262 struct strp_msg
*rxm
= strp_msg(skb
);
1263 int n_sgin
, n_sgout
, nsg
, mem_size
, aead_size
, err
, pages
= 0;
1264 struct aead_request
*aead_req
;
1265 struct sk_buff
*unused
;
1266 u8
*aad
, *iv
, *mem
= NULL
;
1267 struct scatterlist
*sgin
= NULL
;
1268 struct scatterlist
*sgout
= NULL
;
1269 const int data_len
= rxm
->full_len
- tls_ctx
->rx
.overhead_size
;
1271 if (*zc
&& (out_iov
|| out_sg
)) {
1273 n_sgout
= iov_iter_npages(out_iov
, INT_MAX
) + 1;
1275 n_sgout
= sg_nents(out_sg
);
1276 n_sgin
= skb_nsg(skb
, rxm
->offset
+ tls_ctx
->rx
.prepend_size
,
1277 rxm
->full_len
- tls_ctx
->rx
.prepend_size
);
1281 n_sgin
= skb_cow_data(skb
, 0, &unused
);
1287 /* Increment to accommodate AAD */
1288 n_sgin
= n_sgin
+ 1;
1290 nsg
= n_sgin
+ n_sgout
;
1292 aead_size
= sizeof(*aead_req
) + crypto_aead_reqsize(ctx
->aead_recv
);
1293 mem_size
= aead_size
+ (nsg
* sizeof(struct scatterlist
));
1294 mem_size
= mem_size
+ TLS_AAD_SPACE_SIZE
;
1295 mem_size
= mem_size
+ crypto_aead_ivsize(ctx
->aead_recv
);
1297 /* Allocate a single block of memory which contains
1298 * aead_req || sgin[] || sgout[] || aad || iv.
1299 * This order achieves correct alignment for aead_req, sgin, sgout.
1301 mem
= kmalloc(mem_size
, sk
->sk_allocation
);
1305 /* Segment the allocated memory */
1306 aead_req
= (struct aead_request
*)mem
;
1307 sgin
= (struct scatterlist
*)(mem
+ aead_size
);
1308 sgout
= sgin
+ n_sgin
;
1309 aad
= (u8
*)(sgout
+ n_sgout
);
1310 iv
= aad
+ TLS_AAD_SPACE_SIZE
;
1313 err
= skb_copy_bits(skb
, rxm
->offset
+ TLS_HEADER_SIZE
,
1314 iv
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
,
1315 tls_ctx
->rx
.iv_size
);
1320 memcpy(iv
, tls_ctx
->rx
.iv
, TLS_CIPHER_AES_GCM_128_SALT_SIZE
);
1323 tls_make_aad(aad
, rxm
->full_len
- tls_ctx
->rx
.overhead_size
,
1324 tls_ctx
->rx
.rec_seq
, tls_ctx
->rx
.rec_seq_size
,
1328 sg_init_table(sgin
, n_sgin
);
1329 sg_set_buf(&sgin
[0], aad
, TLS_AAD_SPACE_SIZE
);
1330 err
= skb_to_sgvec(skb
, &sgin
[1],
1331 rxm
->offset
+ tls_ctx
->rx
.prepend_size
,
1332 rxm
->full_len
- tls_ctx
->rx
.prepend_size
);
1340 sg_init_table(sgout
, n_sgout
);
1341 sg_set_buf(&sgout
[0], aad
, TLS_AAD_SPACE_SIZE
);
1344 err
= tls_setup_from_iter(sk
, out_iov
, data_len
,
1345 &pages
, chunk
, &sgout
[1],
1348 goto fallback_to_reg_recv
;
1349 } else if (out_sg
) {
1350 memcpy(sgout
, out_sg
, n_sgout
* sizeof(*sgout
));
1352 goto fallback_to_reg_recv
;
1355 fallback_to_reg_recv
:
1362 /* Prepare and submit AEAD request */
1363 err
= tls_do_decryption(sk
, skb
, sgin
, sgout
, iv
,
1364 data_len
, aead_req
, *zc
);
1365 if (err
== -EINPROGRESS
)
1368 /* Release the pages in case iov was mapped to pages */
1369 for (; pages
> 0; pages
--)
1370 put_page(sg_page(&sgout
[pages
]));
1376 static int decrypt_skb_update(struct sock
*sk
, struct sk_buff
*skb
,
1377 struct iov_iter
*dest
, int *chunk
, bool *zc
)
1379 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1380 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1381 struct strp_msg
*rxm
= strp_msg(skb
);
1384 #ifdef CONFIG_TLS_DEVICE
1385 err
= tls_device_decrypted(sk
, skb
);
1389 if (!ctx
->decrypted
) {
1390 err
= decrypt_internal(sk
, skb
, dest
, NULL
, chunk
, zc
);
1392 if (err
== -EINPROGRESS
)
1393 tls_advance_record_sn(sk
, &tls_ctx
->rx
);
1401 rxm
->offset
+= tls_ctx
->rx
.prepend_size
;
1402 rxm
->full_len
-= tls_ctx
->rx
.overhead_size
;
1403 tls_advance_record_sn(sk
, &tls_ctx
->rx
);
1404 ctx
->decrypted
= true;
1405 ctx
->saved_data_ready(sk
);
1410 int decrypt_skb(struct sock
*sk
, struct sk_buff
*skb
,
1411 struct scatterlist
*sgout
)
1416 return decrypt_internal(sk
, skb
, NULL
, sgout
, &chunk
, &zc
);
1419 static bool tls_sw_advance_skb(struct sock
*sk
, struct sk_buff
*skb
,
1422 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1423 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1426 struct strp_msg
*rxm
= strp_msg(skb
);
1428 if (len
< rxm
->full_len
) {
1430 rxm
->full_len
-= len
;
1436 /* Finished with message */
1437 ctx
->recv_pkt
= NULL
;
1438 __strp_unpause(&ctx
->strp
);
1443 int tls_sw_recvmsg(struct sock
*sk
,
1450 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1451 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1452 struct sk_psock
*psock
;
1453 unsigned char control
;
1454 struct strp_msg
*rxm
;
1455 struct sk_buff
*skb
;
1458 int target
, err
= 0;
1460 bool is_kvec
= iov_iter_is_kvec(&msg
->msg_iter
);
1465 if (unlikely(flags
& MSG_ERRQUEUE
))
1466 return sock_recv_errqueue(sk
, msg
, len
, SOL_IP
, IP_RECVERR
);
1468 psock
= sk_psock_get(sk
);
1471 target
= sock_rcvlowat(sk
, flags
& MSG_WAITALL
, len
);
1472 timeo
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
1478 skb
= tls_wait_data(sk
, psock
, flags
, timeo
, &err
);
1481 int ret
= __tcp_bpf_recvmsg(sk
, psock
,
1493 rxm
= strp_msg(skb
);
1498 cerr
= put_cmsg(msg
, SOL_TLS
, TLS_GET_RECORD_TYPE
,
1499 sizeof(ctx
->control
), &ctx
->control
);
1501 control
= ctx
->control
;
1502 if (ctx
->control
!= TLS_RECORD_TYPE_DATA
) {
1503 if (cerr
|| msg
->msg_flags
& MSG_CTRUNC
) {
1508 } else if (control
!= ctx
->control
) {
1512 if (!ctx
->decrypted
) {
1513 int to_copy
= rxm
->full_len
- tls_ctx
->rx
.overhead_size
;
1515 if (!is_kvec
&& to_copy
<= len
&&
1516 likely(!(flags
& MSG_PEEK
)))
1519 err
= decrypt_skb_update(sk
, skb
, &msg
->msg_iter
,
1521 if (err
< 0 && err
!= -EINPROGRESS
) {
1522 tls_err_abort(sk
, EBADMSG
);
1526 if (err
== -EINPROGRESS
) {
1529 goto pick_next_record
;
1532 ctx
->decrypted
= true;
1536 chunk
= min_t(unsigned int, rxm
->full_len
, len
);
1538 err
= skb_copy_datagram_msg(skb
, rxm
->offset
, msg
,
1547 if (likely(!(flags
& MSG_PEEK
))) {
1548 u8 control
= ctx
->control
;
1550 /* For async, drop current skb reference */
1554 if (tls_sw_advance_skb(sk
, skb
, chunk
)) {
1555 /* Return full control message to
1556 * userspace before trying to parse
1557 * another message type
1559 msg
->msg_flags
|= MSG_EOR
;
1560 if (control
!= TLS_RECORD_TYPE_DATA
)
1566 /* MSG_PEEK right now cannot look beyond current skb
1567 * from strparser, meaning we cannot advance skb here
1568 * and thus unpause strparser since we'd loose original
1574 /* If we have a new message from strparser, continue now. */
1575 if (copied
>= target
&& !ctx
->recv_pkt
)
1581 /* Wait for all previously submitted records to be decrypted */
1582 smp_store_mb(ctx
->async_notify
, true);
1583 if (atomic_read(&ctx
->decrypt_pending
)) {
1584 err
= crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
1586 /* one of async decrypt failed */
1587 tls_err_abort(sk
, err
);
1591 reinit_completion(&ctx
->async_wait
.completion
);
1593 WRITE_ONCE(ctx
->async_notify
, false);
1598 sk_psock_put(sk
, psock
);
1599 return copied
? : err
;
1602 ssize_t
tls_sw_splice_read(struct socket
*sock
, loff_t
*ppos
,
1603 struct pipe_inode_info
*pipe
,
1604 size_t len
, unsigned int flags
)
1606 struct tls_context
*tls_ctx
= tls_get_ctx(sock
->sk
);
1607 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1608 struct strp_msg
*rxm
= NULL
;
1609 struct sock
*sk
= sock
->sk
;
1610 struct sk_buff
*skb
;
1619 timeo
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
1621 skb
= tls_wait_data(sk
, NULL
, flags
, timeo
, &err
);
1623 goto splice_read_end
;
1625 /* splice does not support reading control messages */
1626 if (ctx
->control
!= TLS_RECORD_TYPE_DATA
) {
1628 goto splice_read_end
;
1631 if (!ctx
->decrypted
) {
1632 err
= decrypt_skb_update(sk
, skb
, NULL
, &chunk
, &zc
);
1635 tls_err_abort(sk
, EBADMSG
);
1636 goto splice_read_end
;
1638 ctx
->decrypted
= true;
1640 rxm
= strp_msg(skb
);
1642 chunk
= min_t(unsigned int, rxm
->full_len
, len
);
1643 copied
= skb_splice_bits(skb
, sk
, rxm
->offset
, pipe
, chunk
, flags
);
1645 goto splice_read_end
;
1647 if (likely(!(flags
& MSG_PEEK
)))
1648 tls_sw_advance_skb(sk
, skb
, copied
);
1652 return copied
? : err
;
1655 bool tls_sw_stream_read(const struct sock
*sk
)
1657 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1658 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1659 bool ingress_empty
= true;
1660 struct sk_psock
*psock
;
1663 psock
= sk_psock(sk
);
1665 ingress_empty
= list_empty(&psock
->ingress_msg
);
1668 return !ingress_empty
|| ctx
->recv_pkt
;
1671 static int tls_read_size(struct strparser
*strp
, struct sk_buff
*skb
)
1673 struct tls_context
*tls_ctx
= tls_get_ctx(strp
->sk
);
1674 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1675 char header
[TLS_HEADER_SIZE
+ MAX_IV_SIZE
];
1676 struct strp_msg
*rxm
= strp_msg(skb
);
1677 size_t cipher_overhead
;
1678 size_t data_len
= 0;
1681 /* Verify that we have a full TLS header, or wait for more data */
1682 if (rxm
->offset
+ tls_ctx
->rx
.prepend_size
> skb
->len
)
1685 /* Sanity-check size of on-stack buffer. */
1686 if (WARN_ON(tls_ctx
->rx
.prepend_size
> sizeof(header
))) {
1691 /* Linearize header to local buffer */
1692 ret
= skb_copy_bits(skb
, rxm
->offset
, header
, tls_ctx
->rx
.prepend_size
);
1697 ctx
->control
= header
[0];
1699 data_len
= ((header
[4] & 0xFF) | (header
[3] << 8));
1701 cipher_overhead
= tls_ctx
->rx
.tag_size
+ tls_ctx
->rx
.iv_size
;
1703 if (data_len
> TLS_MAX_PAYLOAD_SIZE
+ cipher_overhead
) {
1707 if (data_len
< cipher_overhead
) {
1712 if (header
[1] != TLS_VERSION_MINOR(tls_ctx
->crypto_recv
.info
.version
) ||
1713 header
[2] != TLS_VERSION_MAJOR(tls_ctx
->crypto_recv
.info
.version
)) {
1718 #ifdef CONFIG_TLS_DEVICE
1719 handle_device_resync(strp
->sk
, TCP_SKB_CB(skb
)->seq
+ rxm
->offset
,
1720 *(u64
*)tls_ctx
->rx
.rec_seq
);
1722 return data_len
+ TLS_HEADER_SIZE
;
1725 tls_err_abort(strp
->sk
, ret
);
1730 static void tls_queue(struct strparser
*strp
, struct sk_buff
*skb
)
1732 struct tls_context
*tls_ctx
= tls_get_ctx(strp
->sk
);
1733 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1735 ctx
->decrypted
= false;
1737 ctx
->recv_pkt
= skb
;
1740 ctx
->saved_data_ready(strp
->sk
);
1743 static void tls_data_ready(struct sock
*sk
)
1745 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1746 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1747 struct sk_psock
*psock
;
1749 strp_data_ready(&ctx
->strp
);
1751 psock
= sk_psock_get(sk
);
1752 if (psock
&& !list_empty(&psock
->ingress_msg
)) {
1753 ctx
->saved_data_ready(sk
);
1754 sk_psock_put(sk
, psock
);
1758 void tls_sw_free_resources_tx(struct sock
*sk
)
1760 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1761 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
1762 struct tls_rec
*rec
, *tmp
;
1764 /* Wait for any pending async encryptions to complete */
1765 smp_store_mb(ctx
->async_notify
, true);
1766 if (atomic_read(&ctx
->encrypt_pending
))
1767 crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
1769 cancel_delayed_work_sync(&ctx
->tx_work
.work
);
1771 /* Tx whatever records we can transmit and abandon the rest */
1772 tls_tx_records(sk
, -1);
1774 /* Free up un-sent records in tx_list. First, free
1775 * the partially sent record if any at head of tx_list.
1777 if (tls_ctx
->partially_sent_record
) {
1778 struct scatterlist
*sg
= tls_ctx
->partially_sent_record
;
1781 put_page(sg_page(sg
));
1782 sk_mem_uncharge(sk
, sg
->length
);
1789 tls_ctx
->partially_sent_record
= NULL
;
1791 rec
= list_first_entry(&ctx
->tx_list
,
1792 struct tls_rec
, list
);
1793 list_del(&rec
->list
);
1794 sk_msg_free(sk
, &rec
->msg_plaintext
);
1798 list_for_each_entry_safe(rec
, tmp
, &ctx
->tx_list
, list
) {
1799 list_del(&rec
->list
);
1800 sk_msg_free(sk
, &rec
->msg_encrypted
);
1801 sk_msg_free(sk
, &rec
->msg_plaintext
);
1805 crypto_free_aead(ctx
->aead_send
);
1806 tls_free_open_rec(sk
);
1811 void tls_sw_release_resources_rx(struct sock
*sk
)
1813 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1814 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1816 if (ctx
->aead_recv
) {
1817 kfree_skb(ctx
->recv_pkt
);
1818 ctx
->recv_pkt
= NULL
;
1819 crypto_free_aead(ctx
->aead_recv
);
1820 strp_stop(&ctx
->strp
);
1821 write_lock_bh(&sk
->sk_callback_lock
);
1822 sk
->sk_data_ready
= ctx
->saved_data_ready
;
1823 write_unlock_bh(&sk
->sk_callback_lock
);
1825 strp_done(&ctx
->strp
);
1830 void tls_sw_free_resources_rx(struct sock
*sk
)
1832 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1833 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1835 tls_sw_release_resources_rx(sk
);
1840 /* The work handler to transmitt the encrypted records in tx_list */
1841 static void tx_work_handler(struct work_struct
*work
)
1843 struct delayed_work
*delayed_work
= to_delayed_work(work
);
1844 struct tx_work
*tx_work
= container_of(delayed_work
,
1845 struct tx_work
, work
);
1846 struct sock
*sk
= tx_work
->sk
;
1847 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1848 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
1850 if (!test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
))
1854 tls_tx_records(sk
, -1);
1858 int tls_set_sw_offload(struct sock
*sk
, struct tls_context
*ctx
, int tx
)
1860 struct tls_crypto_info
*crypto_info
;
1861 struct tls12_crypto_info_aes_gcm_128
*gcm_128_info
;
1862 struct tls_sw_context_tx
*sw_ctx_tx
= NULL
;
1863 struct tls_sw_context_rx
*sw_ctx_rx
= NULL
;
1864 struct cipher_context
*cctx
;
1865 struct crypto_aead
**aead
;
1866 struct strp_callbacks cb
;
1867 u16 nonce_size
, tag_size
, iv_size
, rec_seq_size
;
1877 if (!ctx
->priv_ctx_tx
) {
1878 sw_ctx_tx
= kzalloc(sizeof(*sw_ctx_tx
), GFP_KERNEL
);
1883 ctx
->priv_ctx_tx
= sw_ctx_tx
;
1886 (struct tls_sw_context_tx
*)ctx
->priv_ctx_tx
;
1889 if (!ctx
->priv_ctx_rx
) {
1890 sw_ctx_rx
= kzalloc(sizeof(*sw_ctx_rx
), GFP_KERNEL
);
1895 ctx
->priv_ctx_rx
= sw_ctx_rx
;
1898 (struct tls_sw_context_rx
*)ctx
->priv_ctx_rx
;
1903 crypto_init_wait(&sw_ctx_tx
->async_wait
);
1904 crypto_info
= &ctx
->crypto_send
.info
;
1906 aead
= &sw_ctx_tx
->aead_send
;
1907 INIT_LIST_HEAD(&sw_ctx_tx
->tx_list
);
1908 INIT_DELAYED_WORK(&sw_ctx_tx
->tx_work
.work
, tx_work_handler
);
1909 sw_ctx_tx
->tx_work
.sk
= sk
;
1911 crypto_init_wait(&sw_ctx_rx
->async_wait
);
1912 crypto_info
= &ctx
->crypto_recv
.info
;
1914 aead
= &sw_ctx_rx
->aead_recv
;
1917 switch (crypto_info
->cipher_type
) {
1918 case TLS_CIPHER_AES_GCM_128
: {
1919 nonce_size
= TLS_CIPHER_AES_GCM_128_IV_SIZE
;
1920 tag_size
= TLS_CIPHER_AES_GCM_128_TAG_SIZE
;
1921 iv_size
= TLS_CIPHER_AES_GCM_128_IV_SIZE
;
1922 iv
= ((struct tls12_crypto_info_aes_gcm_128
*)crypto_info
)->iv
;
1923 rec_seq_size
= TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE
;
1925 ((struct tls12_crypto_info_aes_gcm_128
*)crypto_info
)->rec_seq
;
1927 (struct tls12_crypto_info_aes_gcm_128
*)crypto_info
;
1935 /* Sanity-check the IV size for stack allocations. */
1936 if (iv_size
> MAX_IV_SIZE
|| nonce_size
> MAX_IV_SIZE
) {
1941 cctx
->prepend_size
= TLS_HEADER_SIZE
+ nonce_size
;
1942 cctx
->tag_size
= tag_size
;
1943 cctx
->overhead_size
= cctx
->prepend_size
+ cctx
->tag_size
;
1944 cctx
->iv_size
= iv_size
;
1945 cctx
->iv
= kmalloc(iv_size
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
,
1951 memcpy(cctx
->iv
, gcm_128_info
->salt
, TLS_CIPHER_AES_GCM_128_SALT_SIZE
);
1952 memcpy(cctx
->iv
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
, iv
, iv_size
);
1953 cctx
->rec_seq_size
= rec_seq_size
;
1954 cctx
->rec_seq
= kmemdup(rec_seq
, rec_seq_size
, GFP_KERNEL
);
1955 if (!cctx
->rec_seq
) {
1961 *aead
= crypto_alloc_aead("gcm(aes)", 0, 0);
1962 if (IS_ERR(*aead
)) {
1963 rc
= PTR_ERR(*aead
);
1969 ctx
->push_pending_record
= tls_sw_push_pending_record
;
1971 rc
= crypto_aead_setkey(*aead
, gcm_128_info
->key
,
1972 TLS_CIPHER_AES_GCM_128_KEY_SIZE
);
1976 rc
= crypto_aead_setauthsize(*aead
, cctx
->tag_size
);
1981 /* Set up strparser */
1982 memset(&cb
, 0, sizeof(cb
));
1983 cb
.rcv_msg
= tls_queue
;
1984 cb
.parse_msg
= tls_read_size
;
1986 strp_init(&sw_ctx_rx
->strp
, sk
, &cb
);
1988 write_lock_bh(&sk
->sk_callback_lock
);
1989 sw_ctx_rx
->saved_data_ready
= sk
->sk_data_ready
;
1990 sk
->sk_data_ready
= tls_data_ready
;
1991 write_unlock_bh(&sk
->sk_callback_lock
);
1993 strp_check_rcv(&sw_ctx_rx
->strp
);
1999 crypto_free_aead(*aead
);
2002 kfree(cctx
->rec_seq
);
2003 cctx
->rec_seq
= NULL
;
2009 kfree(ctx
->priv_ctx_tx
);
2010 ctx
->priv_ctx_tx
= NULL
;
2012 kfree(ctx
->priv_ctx_rx
);
2013 ctx
->priv_ctx_rx
= NULL
;