1 // SPDX-License-Identifier: GPL-2.0
3 * Ceph msgr2 protocol implementation
5 * Copyright (C) 2020 Ilya Dryomov <idryomov@gmail.com>
8 #include <linux/ceph/ceph_debug.h>
10 #include <crypto/aead.h>
11 #include <crypto/hash.h>
12 #include <crypto/sha2.h>
13 #include <crypto/utils.h>
14 #include <linux/bvec.h>
15 #include <linux/crc32c.h>
16 #include <linux/net.h>
17 #include <linux/scatterlist.h>
18 #include <linux/socket.h>
19 #include <linux/sched/mm.h>
23 #include <linux/ceph/ceph_features.h>
24 #include <linux/ceph/decode.h>
25 #include <linux/ceph/libceph.h>
26 #include <linux/ceph/messenger.h>
28 #include "crypto.h" /* for CEPH_KEY_LEN and CEPH_MAX_CON_SECRET_LEN */
30 #define FRAME_TAG_HELLO 1
31 #define FRAME_TAG_AUTH_REQUEST 2
32 #define FRAME_TAG_AUTH_BAD_METHOD 3
33 #define FRAME_TAG_AUTH_REPLY_MORE 4
34 #define FRAME_TAG_AUTH_REQUEST_MORE 5
35 #define FRAME_TAG_AUTH_DONE 6
36 #define FRAME_TAG_AUTH_SIGNATURE 7
37 #define FRAME_TAG_CLIENT_IDENT 8
38 #define FRAME_TAG_SERVER_IDENT 9
39 #define FRAME_TAG_IDENT_MISSING_FEATURES 10
40 #define FRAME_TAG_SESSION_RECONNECT 11
41 #define FRAME_TAG_SESSION_RESET 12
42 #define FRAME_TAG_SESSION_RETRY 13
43 #define FRAME_TAG_SESSION_RETRY_GLOBAL 14
44 #define FRAME_TAG_SESSION_RECONNECT_OK 15
45 #define FRAME_TAG_WAIT 16
46 #define FRAME_TAG_MESSAGE 17
47 #define FRAME_TAG_KEEPALIVE2 18
48 #define FRAME_TAG_KEEPALIVE2_ACK 19
49 #define FRAME_TAG_ACK 20
51 #define FRAME_LATE_STATUS_ABORTED 0x1
52 #define FRAME_LATE_STATUS_COMPLETE 0xe
53 #define FRAME_LATE_STATUS_ABORTED_MASK 0xf
55 #define IN_S_HANDLE_PREAMBLE 1
56 #define IN_S_HANDLE_CONTROL 2
57 #define IN_S_HANDLE_CONTROL_REMAINDER 3
58 #define IN_S_PREPARE_READ_DATA 4
59 #define IN_S_PREPARE_READ_DATA_CONT 5
60 #define IN_S_PREPARE_READ_ENC_PAGE 6
61 #define IN_S_PREPARE_SPARSE_DATA 7
62 #define IN_S_PREPARE_SPARSE_DATA_CONT 8
63 #define IN_S_HANDLE_EPILOGUE 9
64 #define IN_S_FINISH_SKIP 10
66 #define OUT_S_QUEUE_DATA 1
67 #define OUT_S_QUEUE_DATA_CONT 2
68 #define OUT_S_QUEUE_ENC_PAGE 3
69 #define OUT_S_QUEUE_ZEROS 4
70 #define OUT_S_FINISH_MESSAGE 5
71 #define OUT_S_GET_NEXT 6
73 #define CTRL_BODY(p) ((void *)(p) + CEPH_PREAMBLE_LEN)
74 #define FRONT_PAD(p) ((void *)(p) + CEPH_EPILOGUE_SECURE_LEN)
75 #define MIDDLE_PAD(p) (FRONT_PAD(p) + CEPH_GCM_BLOCK_LEN)
76 #define DATA_PAD(p) (MIDDLE_PAD(p) + CEPH_GCM_BLOCK_LEN)
78 #define CEPH_MSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
80 static int do_recvmsg(struct socket
*sock
, struct iov_iter
*it
)
82 struct msghdr msg
= { .msg_flags
= CEPH_MSG_FLAGS
};
86 while (iov_iter_count(it
)) {
87 ret
= sock_recvmsg(sock
, &msg
, msg
.msg_flags
);
94 iov_iter_advance(it
, ret
);
97 WARN_ON(msg_data_left(&msg
));
102 * Read as much as possible.
105 * 1 - done, nothing (else) to read
106 * 0 - socket is empty, need to wait
109 static int ceph_tcp_recv(struct ceph_connection
*con
)
113 dout("%s con %p %s %zu\n", __func__
, con
,
114 iov_iter_is_discard(&con
->v2
.in_iter
) ? "discard" : "need",
115 iov_iter_count(&con
->v2
.in_iter
));
116 ret
= do_recvmsg(con
->sock
, &con
->v2
.in_iter
);
117 dout("%s con %p ret %d left %zu\n", __func__
, con
, ret
,
118 iov_iter_count(&con
->v2
.in_iter
));
122 static int do_sendmsg(struct socket
*sock
, struct iov_iter
*it
)
124 struct msghdr msg
= { .msg_flags
= CEPH_MSG_FLAGS
};
128 while (iov_iter_count(it
)) {
129 ret
= sock_sendmsg(sock
, &msg
);
136 iov_iter_advance(it
, ret
);
139 WARN_ON(msg_data_left(&msg
));
143 static int do_try_sendpage(struct socket
*sock
, struct iov_iter
*it
)
145 struct msghdr msg
= { .msg_flags
= CEPH_MSG_FLAGS
};
149 if (WARN_ON(!iov_iter_is_bvec(it
)))
152 while (iov_iter_count(it
)) {
153 /* iov_iter_iovec() for ITER_BVEC */
154 bvec_set_page(&bv
, it
->bvec
->bv_page
,
155 min(iov_iter_count(it
),
156 it
->bvec
->bv_len
- it
->iov_offset
),
157 it
->bvec
->bv_offset
+ it
->iov_offset
);
160 * MSG_SPLICE_PAGES cannot properly handle pages with
161 * page_count == 0, we need to fall back to sendmsg if
164 * Same goes for slab pages: skb_can_coalesce() allows
165 * coalescing neighboring slab objects into a single frag
166 * which triggers one of hardened usercopy checks.
168 if (sendpage_ok(bv
.bv_page
))
169 msg
.msg_flags
|= MSG_SPLICE_PAGES
;
171 msg
.msg_flags
&= ~MSG_SPLICE_PAGES
;
173 iov_iter_bvec(&msg
.msg_iter
, ITER_SOURCE
, &bv
, 1, bv
.bv_len
);
174 ret
= sock_sendmsg(sock
, &msg
);
181 iov_iter_advance(it
, ret
);
188 * Write as much as possible. The socket is expected to be corked,
189 * so we don't bother with MSG_MORE here.
192 * 1 - done, nothing (else) to write
193 * 0 - socket is full, need to wait
196 static int ceph_tcp_send(struct ceph_connection
*con
)
200 dout("%s con %p have %zu try_sendpage %d\n", __func__
, con
,
201 iov_iter_count(&con
->v2
.out_iter
), con
->v2
.out_iter_sendpage
);
202 if (con
->v2
.out_iter_sendpage
)
203 ret
= do_try_sendpage(con
->sock
, &con
->v2
.out_iter
);
205 ret
= do_sendmsg(con
->sock
, &con
->v2
.out_iter
);
206 dout("%s con %p ret %d left %zu\n", __func__
, con
, ret
,
207 iov_iter_count(&con
->v2
.out_iter
));
211 static void add_in_kvec(struct ceph_connection
*con
, void *buf
, int len
)
213 BUG_ON(con
->v2
.in_kvec_cnt
>= ARRAY_SIZE(con
->v2
.in_kvecs
));
214 WARN_ON(!iov_iter_is_kvec(&con
->v2
.in_iter
));
216 con
->v2
.in_kvecs
[con
->v2
.in_kvec_cnt
].iov_base
= buf
;
217 con
->v2
.in_kvecs
[con
->v2
.in_kvec_cnt
].iov_len
= len
;
218 con
->v2
.in_kvec_cnt
++;
220 con
->v2
.in_iter
.nr_segs
++;
221 con
->v2
.in_iter
.count
+= len
;
224 static void reset_in_kvecs(struct ceph_connection
*con
)
226 WARN_ON(iov_iter_count(&con
->v2
.in_iter
));
228 con
->v2
.in_kvec_cnt
= 0;
229 iov_iter_kvec(&con
->v2
.in_iter
, ITER_DEST
, con
->v2
.in_kvecs
, 0, 0);
232 static void set_in_bvec(struct ceph_connection
*con
, const struct bio_vec
*bv
)
234 WARN_ON(iov_iter_count(&con
->v2
.in_iter
));
236 con
->v2
.in_bvec
= *bv
;
237 iov_iter_bvec(&con
->v2
.in_iter
, ITER_DEST
, &con
->v2
.in_bvec
, 1, bv
->bv_len
);
240 static void set_in_skip(struct ceph_connection
*con
, int len
)
242 WARN_ON(iov_iter_count(&con
->v2
.in_iter
));
244 dout("%s con %p len %d\n", __func__
, con
, len
);
245 iov_iter_discard(&con
->v2
.in_iter
, ITER_DEST
, len
);
248 static void add_out_kvec(struct ceph_connection
*con
, void *buf
, int len
)
250 BUG_ON(con
->v2
.out_kvec_cnt
>= ARRAY_SIZE(con
->v2
.out_kvecs
));
251 WARN_ON(!iov_iter_is_kvec(&con
->v2
.out_iter
));
252 WARN_ON(con
->v2
.out_zero
);
254 con
->v2
.out_kvecs
[con
->v2
.out_kvec_cnt
].iov_base
= buf
;
255 con
->v2
.out_kvecs
[con
->v2
.out_kvec_cnt
].iov_len
= len
;
256 con
->v2
.out_kvec_cnt
++;
258 con
->v2
.out_iter
.nr_segs
++;
259 con
->v2
.out_iter
.count
+= len
;
262 static void reset_out_kvecs(struct ceph_connection
*con
)
264 WARN_ON(iov_iter_count(&con
->v2
.out_iter
));
265 WARN_ON(con
->v2
.out_zero
);
267 con
->v2
.out_kvec_cnt
= 0;
269 iov_iter_kvec(&con
->v2
.out_iter
, ITER_SOURCE
, con
->v2
.out_kvecs
, 0, 0);
270 con
->v2
.out_iter_sendpage
= false;
273 static void set_out_bvec(struct ceph_connection
*con
, const struct bio_vec
*bv
,
276 WARN_ON(iov_iter_count(&con
->v2
.out_iter
));
277 WARN_ON(con
->v2
.out_zero
);
279 con
->v2
.out_bvec
= *bv
;
280 con
->v2
.out_iter_sendpage
= zerocopy
;
281 iov_iter_bvec(&con
->v2
.out_iter
, ITER_SOURCE
, &con
->v2
.out_bvec
, 1,
282 con
->v2
.out_bvec
.bv_len
);
285 static void set_out_bvec_zero(struct ceph_connection
*con
)
287 WARN_ON(iov_iter_count(&con
->v2
.out_iter
));
288 WARN_ON(!con
->v2
.out_zero
);
290 bvec_set_page(&con
->v2
.out_bvec
, ceph_zero_page
,
291 min(con
->v2
.out_zero
, (int)PAGE_SIZE
), 0);
292 con
->v2
.out_iter_sendpage
= true;
293 iov_iter_bvec(&con
->v2
.out_iter
, ITER_SOURCE
, &con
->v2
.out_bvec
, 1,
294 con
->v2
.out_bvec
.bv_len
);
297 static void out_zero_add(struct ceph_connection
*con
, int len
)
299 dout("%s con %p len %d\n", __func__
, con
, len
);
300 con
->v2
.out_zero
+= len
;
303 static void *alloc_conn_buf(struct ceph_connection
*con
, int len
)
307 dout("%s con %p len %d\n", __func__
, con
, len
);
309 if (WARN_ON(con
->v2
.conn_buf_cnt
>= ARRAY_SIZE(con
->v2
.conn_bufs
)))
312 buf
= kvmalloc(len
, GFP_NOIO
);
316 con
->v2
.conn_bufs
[con
->v2
.conn_buf_cnt
++] = buf
;
320 static void free_conn_bufs(struct ceph_connection
*con
)
322 while (con
->v2
.conn_buf_cnt
)
323 kvfree(con
->v2
.conn_bufs
[--con
->v2
.conn_buf_cnt
]);
326 static void add_in_sign_kvec(struct ceph_connection
*con
, void *buf
, int len
)
328 BUG_ON(con
->v2
.in_sign_kvec_cnt
>= ARRAY_SIZE(con
->v2
.in_sign_kvecs
));
330 con
->v2
.in_sign_kvecs
[con
->v2
.in_sign_kvec_cnt
].iov_base
= buf
;
331 con
->v2
.in_sign_kvecs
[con
->v2
.in_sign_kvec_cnt
].iov_len
= len
;
332 con
->v2
.in_sign_kvec_cnt
++;
335 static void clear_in_sign_kvecs(struct ceph_connection
*con
)
337 con
->v2
.in_sign_kvec_cnt
= 0;
340 static void add_out_sign_kvec(struct ceph_connection
*con
, void *buf
, int len
)
342 BUG_ON(con
->v2
.out_sign_kvec_cnt
>= ARRAY_SIZE(con
->v2
.out_sign_kvecs
));
344 con
->v2
.out_sign_kvecs
[con
->v2
.out_sign_kvec_cnt
].iov_base
= buf
;
345 con
->v2
.out_sign_kvecs
[con
->v2
.out_sign_kvec_cnt
].iov_len
= len
;
346 con
->v2
.out_sign_kvec_cnt
++;
349 static void clear_out_sign_kvecs(struct ceph_connection
*con
)
351 con
->v2
.out_sign_kvec_cnt
= 0;
354 static bool con_secure(struct ceph_connection
*con
)
356 return con
->v2
.con_mode
== CEPH_CON_MODE_SECURE
;
359 static int front_len(const struct ceph_msg
*msg
)
361 return le32_to_cpu(msg
->hdr
.front_len
);
364 static int middle_len(const struct ceph_msg
*msg
)
366 return le32_to_cpu(msg
->hdr
.middle_len
);
369 static int data_len(const struct ceph_msg
*msg
)
371 return le32_to_cpu(msg
->hdr
.data_len
);
374 static bool need_padding(int len
)
376 return !IS_ALIGNED(len
, CEPH_GCM_BLOCK_LEN
);
379 static int padded_len(int len
)
381 return ALIGN(len
, CEPH_GCM_BLOCK_LEN
);
384 static int padding_len(int len
)
386 return padded_len(len
) - len
;
389 /* preamble + control segment */
390 static int head_onwire_len(int ctrl_len
, bool secure
)
395 BUG_ON(ctrl_len
< 0 || ctrl_len
> CEPH_MSG_MAX_CONTROL_LEN
);
398 head_len
= CEPH_PREAMBLE_SECURE_LEN
;
399 if (ctrl_len
> CEPH_PREAMBLE_INLINE_LEN
) {
400 rem_len
= ctrl_len
- CEPH_PREAMBLE_INLINE_LEN
;
401 head_len
+= padded_len(rem_len
) + CEPH_GCM_TAG_LEN
;
404 head_len
= CEPH_PREAMBLE_PLAIN_LEN
;
406 head_len
+= ctrl_len
+ CEPH_CRC_LEN
;
411 /* front, middle and data segments + epilogue */
412 static int __tail_onwire_len(int front_len
, int middle_len
, int data_len
,
415 BUG_ON(front_len
< 0 || front_len
> CEPH_MSG_MAX_FRONT_LEN
||
416 middle_len
< 0 || middle_len
> CEPH_MSG_MAX_MIDDLE_LEN
||
417 data_len
< 0 || data_len
> CEPH_MSG_MAX_DATA_LEN
);
419 if (!front_len
&& !middle_len
&& !data_len
)
423 return front_len
+ middle_len
+ data_len
+
424 CEPH_EPILOGUE_PLAIN_LEN
;
426 return padded_len(front_len
) + padded_len(middle_len
) +
427 padded_len(data_len
) + CEPH_EPILOGUE_SECURE_LEN
;
430 static int tail_onwire_len(const struct ceph_msg
*msg
, bool secure
)
432 return __tail_onwire_len(front_len(msg
), middle_len(msg
),
433 data_len(msg
), secure
);
436 /* head_onwire_len(sizeof(struct ceph_msg_header2), false) */
437 #define MESSAGE_HEAD_PLAIN_LEN (CEPH_PREAMBLE_PLAIN_LEN + \
438 sizeof(struct ceph_msg_header2) + \
441 static const int frame_aligns
[] = {
449 * Discards trailing empty segments, unless there is just one segment.
450 * A frame always has at least one (possibly empty) segment.
452 static int calc_segment_count(const int *lens
, int len_cnt
)
456 for (i
= len_cnt
- 1; i
>= 0; i
--) {
464 static void init_frame_desc(struct ceph_frame_desc
*desc
, int tag
,
465 const int *lens
, int len_cnt
)
469 memset(desc
, 0, sizeof(*desc
));
472 desc
->fd_seg_cnt
= calc_segment_count(lens
, len_cnt
);
473 BUG_ON(desc
->fd_seg_cnt
> CEPH_FRAME_MAX_SEGMENT_COUNT
);
474 for (i
= 0; i
< desc
->fd_seg_cnt
; i
++) {
475 desc
->fd_lens
[i
] = lens
[i
];
476 desc
->fd_aligns
[i
] = frame_aligns
[i
];
481 * Preamble crc covers everything up to itself (28 bytes) and
482 * is calculated and verified irrespective of the connection mode
483 * (i.e. even if the frame is encrypted).
485 static void encode_preamble(const struct ceph_frame_desc
*desc
, void *p
)
487 void *crcp
= p
+ CEPH_PREAMBLE_LEN
- CEPH_CRC_LEN
;
491 memset(p
, 0, CEPH_PREAMBLE_LEN
);
493 ceph_encode_8(&p
, desc
->fd_tag
);
494 ceph_encode_8(&p
, desc
->fd_seg_cnt
);
495 for (i
= 0; i
< desc
->fd_seg_cnt
; i
++) {
496 ceph_encode_32(&p
, desc
->fd_lens
[i
]);
497 ceph_encode_16(&p
, desc
->fd_aligns
[i
]);
500 put_unaligned_le32(crc32c(0, start
, crcp
- start
), crcp
);
503 static int decode_preamble(void *p
, struct ceph_frame_desc
*desc
)
505 void *crcp
= p
+ CEPH_PREAMBLE_LEN
- CEPH_CRC_LEN
;
506 u32 crc
, expected_crc
;
509 crc
= crc32c(0, p
, crcp
- p
);
510 expected_crc
= get_unaligned_le32(crcp
);
511 if (crc
!= expected_crc
) {
512 pr_err("bad preamble crc, calculated %u, expected %u\n",
517 memset(desc
, 0, sizeof(*desc
));
519 desc
->fd_tag
= ceph_decode_8(&p
);
520 desc
->fd_seg_cnt
= ceph_decode_8(&p
);
521 if (desc
->fd_seg_cnt
< 1 ||
522 desc
->fd_seg_cnt
> CEPH_FRAME_MAX_SEGMENT_COUNT
) {
523 pr_err("bad segment count %d\n", desc
->fd_seg_cnt
);
526 for (i
= 0; i
< desc
->fd_seg_cnt
; i
++) {
527 desc
->fd_lens
[i
] = ceph_decode_32(&p
);
528 desc
->fd_aligns
[i
] = ceph_decode_16(&p
);
531 if (desc
->fd_lens
[0] < 0 ||
532 desc
->fd_lens
[0] > CEPH_MSG_MAX_CONTROL_LEN
) {
533 pr_err("bad control segment length %d\n", desc
->fd_lens
[0]);
536 if (desc
->fd_lens
[1] < 0 ||
537 desc
->fd_lens
[1] > CEPH_MSG_MAX_FRONT_LEN
) {
538 pr_err("bad front segment length %d\n", desc
->fd_lens
[1]);
541 if (desc
->fd_lens
[2] < 0 ||
542 desc
->fd_lens
[2] > CEPH_MSG_MAX_MIDDLE_LEN
) {
543 pr_err("bad middle segment length %d\n", desc
->fd_lens
[2]);
546 if (desc
->fd_lens
[3] < 0 ||
547 desc
->fd_lens
[3] > CEPH_MSG_MAX_DATA_LEN
) {
548 pr_err("bad data segment length %d\n", desc
->fd_lens
[3]);
553 * This would fire for FRAME_TAG_WAIT (it has one empty
554 * segment), but we should never get it as client.
556 if (!desc
->fd_lens
[desc
->fd_seg_cnt
- 1]) {
557 pr_err("last segment empty, segment count %d\n",
565 static void encode_epilogue_plain(struct ceph_connection
*con
, bool aborted
)
567 con
->v2
.out_epil
.late_status
= aborted
? FRAME_LATE_STATUS_ABORTED
:
568 FRAME_LATE_STATUS_COMPLETE
;
569 cpu_to_le32s(&con
->v2
.out_epil
.front_crc
);
570 cpu_to_le32s(&con
->v2
.out_epil
.middle_crc
);
571 cpu_to_le32s(&con
->v2
.out_epil
.data_crc
);
574 static void encode_epilogue_secure(struct ceph_connection
*con
, bool aborted
)
576 memset(&con
->v2
.out_epil
, 0, sizeof(con
->v2
.out_epil
));
577 con
->v2
.out_epil
.late_status
= aborted
? FRAME_LATE_STATUS_ABORTED
:
578 FRAME_LATE_STATUS_COMPLETE
;
581 static int decode_epilogue(void *p
, u32
*front_crc
, u32
*middle_crc
,
586 late_status
= ceph_decode_8(&p
);
587 if ((late_status
& FRAME_LATE_STATUS_ABORTED_MASK
) !=
588 FRAME_LATE_STATUS_COMPLETE
) {
589 /* we should never get an aborted message as client */
590 pr_err("bad late_status 0x%x\n", late_status
);
594 if (front_crc
&& middle_crc
&& data_crc
) {
595 *front_crc
= ceph_decode_32(&p
);
596 *middle_crc
= ceph_decode_32(&p
);
597 *data_crc
= ceph_decode_32(&p
);
603 static void fill_header(struct ceph_msg_header
*hdr
,
604 const struct ceph_msg_header2
*hdr2
,
605 int front_len
, int middle_len
, int data_len
,
606 const struct ceph_entity_name
*peer_name
)
608 hdr
->seq
= hdr2
->seq
;
609 hdr
->tid
= hdr2
->tid
;
610 hdr
->type
= hdr2
->type
;
611 hdr
->priority
= hdr2
->priority
;
612 hdr
->version
= hdr2
->version
;
613 hdr
->front_len
= cpu_to_le32(front_len
);
614 hdr
->middle_len
= cpu_to_le32(middle_len
);
615 hdr
->data_len
= cpu_to_le32(data_len
);
616 hdr
->data_off
= hdr2
->data_off
;
617 hdr
->src
= *peer_name
;
618 hdr
->compat_version
= hdr2
->compat_version
;
623 static void fill_header2(struct ceph_msg_header2
*hdr2
,
624 const struct ceph_msg_header
*hdr
, u64 ack_seq
)
626 hdr2
->seq
= hdr
->seq
;
627 hdr2
->tid
= hdr
->tid
;
628 hdr2
->type
= hdr
->type
;
629 hdr2
->priority
= hdr
->priority
;
630 hdr2
->version
= hdr
->version
;
631 hdr2
->data_pre_padding_len
= 0;
632 hdr2
->data_off
= hdr
->data_off
;
633 hdr2
->ack_seq
= cpu_to_le64(ack_seq
);
635 hdr2
->compat_version
= hdr
->compat_version
;
639 static int verify_control_crc(struct ceph_connection
*con
)
641 int ctrl_len
= con
->v2
.in_desc
.fd_lens
[0];
642 u32 crc
, expected_crc
;
644 WARN_ON(con
->v2
.in_kvecs
[0].iov_len
!= ctrl_len
);
645 WARN_ON(con
->v2
.in_kvecs
[1].iov_len
!= CEPH_CRC_LEN
);
647 crc
= crc32c(-1, con
->v2
.in_kvecs
[0].iov_base
, ctrl_len
);
648 expected_crc
= get_unaligned_le32(con
->v2
.in_kvecs
[1].iov_base
);
649 if (crc
!= expected_crc
) {
650 pr_err("bad control crc, calculated %u, expected %u\n",
658 static int verify_epilogue_crcs(struct ceph_connection
*con
, u32 front_crc
,
659 u32 middle_crc
, u32 data_crc
)
661 if (front_len(con
->in_msg
)) {
662 con
->in_front_crc
= crc32c(-1, con
->in_msg
->front
.iov_base
,
663 front_len(con
->in_msg
));
665 WARN_ON(!middle_len(con
->in_msg
) && !data_len(con
->in_msg
));
666 con
->in_front_crc
= -1;
669 if (middle_len(con
->in_msg
))
670 con
->in_middle_crc
= crc32c(-1,
671 con
->in_msg
->middle
->vec
.iov_base
,
672 middle_len(con
->in_msg
));
673 else if (data_len(con
->in_msg
))
674 con
->in_middle_crc
= -1;
676 con
->in_middle_crc
= 0;
678 if (!data_len(con
->in_msg
))
679 con
->in_data_crc
= 0;
681 dout("%s con %p msg %p crcs %u %u %u\n", __func__
, con
, con
->in_msg
,
682 con
->in_front_crc
, con
->in_middle_crc
, con
->in_data_crc
);
684 if (con
->in_front_crc
!= front_crc
) {
685 pr_err("bad front crc, calculated %u, expected %u\n",
686 con
->in_front_crc
, front_crc
);
689 if (con
->in_middle_crc
!= middle_crc
) {
690 pr_err("bad middle crc, calculated %u, expected %u\n",
691 con
->in_middle_crc
, middle_crc
);
694 if (con
->in_data_crc
!= data_crc
) {
695 pr_err("bad data crc, calculated %u, expected %u\n",
696 con
->in_data_crc
, data_crc
);
703 static int setup_crypto(struct ceph_connection
*con
,
704 const u8
*session_key
, int session_key_len
,
705 const u8
*con_secret
, int con_secret_len
)
707 unsigned int noio_flag
;
710 dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n",
711 __func__
, con
, con
->v2
.con_mode
, session_key_len
, con_secret_len
);
712 WARN_ON(con
->v2
.hmac_tfm
|| con
->v2
.gcm_tfm
|| con
->v2
.gcm_req
);
714 if (con
->v2
.con_mode
!= CEPH_CON_MODE_CRC
&&
715 con
->v2
.con_mode
!= CEPH_CON_MODE_SECURE
) {
716 pr_err("bad con_mode %d\n", con
->v2
.con_mode
);
720 if (!session_key_len
) {
721 WARN_ON(con
->v2
.con_mode
!= CEPH_CON_MODE_CRC
);
722 WARN_ON(con_secret_len
);
723 return 0; /* auth_none */
726 noio_flag
= memalloc_noio_save();
727 con
->v2
.hmac_tfm
= crypto_alloc_shash("hmac(sha256)", 0, 0);
728 memalloc_noio_restore(noio_flag
);
729 if (IS_ERR(con
->v2
.hmac_tfm
)) {
730 ret
= PTR_ERR(con
->v2
.hmac_tfm
);
731 con
->v2
.hmac_tfm
= NULL
;
732 pr_err("failed to allocate hmac tfm context: %d\n", ret
);
736 ret
= crypto_shash_setkey(con
->v2
.hmac_tfm
, session_key
,
739 pr_err("failed to set hmac key: %d\n", ret
);
743 if (con
->v2
.con_mode
== CEPH_CON_MODE_CRC
) {
744 WARN_ON(con_secret_len
);
745 return 0; /* auth_x, plain mode */
748 if (con_secret_len
< CEPH_GCM_KEY_LEN
+ 2 * CEPH_GCM_IV_LEN
) {
749 pr_err("con_secret too small %d\n", con_secret_len
);
753 noio_flag
= memalloc_noio_save();
754 con
->v2
.gcm_tfm
= crypto_alloc_aead("gcm(aes)", 0, 0);
755 memalloc_noio_restore(noio_flag
);
756 if (IS_ERR(con
->v2
.gcm_tfm
)) {
757 ret
= PTR_ERR(con
->v2
.gcm_tfm
);
758 con
->v2
.gcm_tfm
= NULL
;
759 pr_err("failed to allocate gcm tfm context: %d\n", ret
);
763 WARN_ON((unsigned long)con_secret
&
764 crypto_aead_alignmask(con
->v2
.gcm_tfm
));
765 ret
= crypto_aead_setkey(con
->v2
.gcm_tfm
, con_secret
, CEPH_GCM_KEY_LEN
);
767 pr_err("failed to set gcm key: %d\n", ret
);
771 WARN_ON(crypto_aead_ivsize(con
->v2
.gcm_tfm
) != CEPH_GCM_IV_LEN
);
772 ret
= crypto_aead_setauthsize(con
->v2
.gcm_tfm
, CEPH_GCM_TAG_LEN
);
774 pr_err("failed to set gcm tag size: %d\n", ret
);
778 con
->v2
.gcm_req
= aead_request_alloc(con
->v2
.gcm_tfm
, GFP_NOIO
);
779 if (!con
->v2
.gcm_req
) {
780 pr_err("failed to allocate gcm request\n");
784 crypto_init_wait(&con
->v2
.gcm_wait
);
785 aead_request_set_callback(con
->v2
.gcm_req
, CRYPTO_TFM_REQ_MAY_BACKLOG
,
786 crypto_req_done
, &con
->v2
.gcm_wait
);
788 memcpy(&con
->v2
.in_gcm_nonce
, con_secret
+ CEPH_GCM_KEY_LEN
,
790 memcpy(&con
->v2
.out_gcm_nonce
,
791 con_secret
+ CEPH_GCM_KEY_LEN
+ CEPH_GCM_IV_LEN
,
793 return 0; /* auth_x, secure mode */
796 static int hmac_sha256(struct ceph_connection
*con
, const struct kvec
*kvecs
,
797 int kvec_cnt
, u8
*hmac
)
799 SHASH_DESC_ON_STACK(desc
, con
->v2
.hmac_tfm
); /* tfm arg is ignored */
803 dout("%s con %p hmac_tfm %p kvec_cnt %d\n", __func__
, con
,
804 con
->v2
.hmac_tfm
, kvec_cnt
);
806 if (!con
->v2
.hmac_tfm
) {
807 memset(hmac
, 0, SHA256_DIGEST_SIZE
);
808 return 0; /* auth_none */
811 desc
->tfm
= con
->v2
.hmac_tfm
;
812 ret
= crypto_shash_init(desc
);
816 for (i
= 0; i
< kvec_cnt
; i
++) {
817 ret
= crypto_shash_update(desc
, kvecs
[i
].iov_base
,
823 ret
= crypto_shash_final(desc
, hmac
);
826 shash_desc_zero(desc
);
827 return ret
; /* auth_x, both plain and secure modes */
830 static void gcm_inc_nonce(struct ceph_gcm_nonce
*nonce
)
834 counter
= le64_to_cpu(nonce
->counter
);
835 nonce
->counter
= cpu_to_le64(counter
+ 1);
838 static int gcm_crypt(struct ceph_connection
*con
, bool encrypt
,
839 struct scatterlist
*src
, struct scatterlist
*dst
,
842 struct ceph_gcm_nonce
*nonce
;
845 nonce
= encrypt
? &con
->v2
.out_gcm_nonce
: &con
->v2
.in_gcm_nonce
;
847 aead_request_set_ad(con
->v2
.gcm_req
, 0); /* no AAD */
848 aead_request_set_crypt(con
->v2
.gcm_req
, src
, dst
, src_len
, (u8
*)nonce
);
849 ret
= crypto_wait_req(encrypt
? crypto_aead_encrypt(con
->v2
.gcm_req
) :
850 crypto_aead_decrypt(con
->v2
.gcm_req
),
855 gcm_inc_nonce(nonce
);
859 static void get_bvec_at(struct ceph_msg_data_cursor
*cursor
,
865 WARN_ON(!cursor
->total_resid
);
867 /* skip zero-length data items */
868 while (!cursor
->resid
)
869 ceph_msg_data_advance(cursor
, 0);
871 /* get a piece of data, cursor isn't advanced */
872 page
= ceph_msg_data_next(cursor
, &off
, &len
);
873 bvec_set_page(bv
, page
, len
, off
);
876 static int calc_sg_cnt(void *buf
, int buf_len
)
883 sg_cnt
= need_padding(buf_len
) ? 1 : 0;
884 if (is_vmalloc_addr(buf
)) {
885 WARN_ON(offset_in_page(buf
));
886 sg_cnt
+= PAGE_ALIGN(buf_len
) >> PAGE_SHIFT
;
894 static int calc_sg_cnt_cursor(struct ceph_msg_data_cursor
*cursor
)
896 int data_len
= cursor
->total_resid
;
903 sg_cnt
= need_padding(data_len
) ? 1 : 0;
905 get_bvec_at(cursor
, &bv
);
908 ceph_msg_data_advance(cursor
, bv
.bv_len
);
909 } while (cursor
->total_resid
);
914 static void init_sgs(struct scatterlist
**sg
, void *buf
, int buf_len
, u8
*pad
)
916 void *end
= buf
+ buf_len
;
924 if (is_vmalloc_addr(buf
)) {
927 page
= vmalloc_to_page(p
);
928 len
= min_t(int, end
- p
, PAGE_SIZE
);
929 WARN_ON(!page
|| !len
|| offset_in_page(p
));
930 sg_set_page(*sg
, page
, len
, 0);
935 sg_set_buf(*sg
, buf
, buf_len
);
939 if (need_padding(buf_len
)) {
940 sg_set_buf(*sg
, pad
, padding_len(buf_len
));
945 static void init_sgs_cursor(struct scatterlist
**sg
,
946 struct ceph_msg_data_cursor
*cursor
, u8
*pad
)
948 int data_len
= cursor
->total_resid
;
955 get_bvec_at(cursor
, &bv
);
956 sg_set_page(*sg
, bv
.bv_page
, bv
.bv_len
, bv
.bv_offset
);
959 ceph_msg_data_advance(cursor
, bv
.bv_len
);
960 } while (cursor
->total_resid
);
962 if (need_padding(data_len
)) {
963 sg_set_buf(*sg
, pad
, padding_len(data_len
));
969 * init_sgs_pages: set up scatterlist on an array of page pointers
970 * @sg: scatterlist to populate
971 * @pages: pointer to page array
972 * @dpos: position in the array to start (bytes)
973 * @dlen: len to add to sg (bytes)
974 * @pad: pointer to pad destination (if any)
976 * Populate the scatterlist from the page array, starting at an arbitrary
977 * byte in the array and running for a specified length.
979 static void init_sgs_pages(struct scatterlist
**sg
, struct page
**pages
,
980 int dpos
, int dlen
, u8
*pad
)
982 int idx
= dpos
>> PAGE_SHIFT
;
983 int off
= offset_in_page(dpos
);
987 int len
= min(resid
, (int)PAGE_SIZE
- off
);
989 sg_set_page(*sg
, pages
[idx
], len
, off
);
996 if (need_padding(dlen
)) {
997 sg_set_buf(*sg
, pad
, padding_len(dlen
));
1002 static int setup_message_sgs(struct sg_table
*sgt
, struct ceph_msg
*msg
,
1003 u8
*front_pad
, u8
*middle_pad
, u8
*data_pad
,
1004 void *epilogue
, struct page
**pages
, int dpos
,
1007 struct ceph_msg_data_cursor cursor
;
1008 struct scatterlist
*cur_sg
;
1009 int dlen
= data_len(msg
);
1013 if (!front_len(msg
) && !middle_len(msg
) && !data_len(msg
))
1016 sg_cnt
= 1; /* epilogue + [auth tag] */
1018 sg_cnt
+= calc_sg_cnt(msg
->front
.iov_base
,
1020 if (middle_len(msg
))
1021 sg_cnt
+= calc_sg_cnt(msg
->middle
->vec
.iov_base
,
1025 sg_cnt
+= calc_pages_for(dpos
, dlen
);
1026 if (need_padding(dlen
))
1029 ceph_msg_data_cursor_init(&cursor
, msg
, dlen
);
1030 sg_cnt
+= calc_sg_cnt_cursor(&cursor
);
1034 ret
= sg_alloc_table(sgt
, sg_cnt
, GFP_NOIO
);
1040 init_sgs(&cur_sg
, msg
->front
.iov_base
, front_len(msg
),
1042 if (middle_len(msg
))
1043 init_sgs(&cur_sg
, msg
->middle
->vec
.iov_base
, middle_len(msg
),
1047 init_sgs_pages(&cur_sg
, pages
, dpos
, dlen
, data_pad
);
1049 ceph_msg_data_cursor_init(&cursor
, msg
, dlen
);
1050 init_sgs_cursor(&cur_sg
, &cursor
, data_pad
);
1054 WARN_ON(!sg_is_last(cur_sg
));
1055 sg_set_buf(cur_sg
, epilogue
,
1056 CEPH_GCM_BLOCK_LEN
+ (add_tag
? CEPH_GCM_TAG_LEN
: 0));
1060 static int decrypt_preamble(struct ceph_connection
*con
)
1062 struct scatterlist sg
;
1064 sg_init_one(&sg
, con
->v2
.in_buf
, CEPH_PREAMBLE_SECURE_LEN
);
1065 return gcm_crypt(con
, false, &sg
, &sg
, CEPH_PREAMBLE_SECURE_LEN
);
1068 static int decrypt_control_remainder(struct ceph_connection
*con
)
1070 int ctrl_len
= con
->v2
.in_desc
.fd_lens
[0];
1071 int rem_len
= ctrl_len
- CEPH_PREAMBLE_INLINE_LEN
;
1072 int pt_len
= padding_len(rem_len
) + CEPH_GCM_TAG_LEN
;
1073 struct scatterlist sgs
[2];
1075 WARN_ON(con
->v2
.in_kvecs
[0].iov_len
!= rem_len
);
1076 WARN_ON(con
->v2
.in_kvecs
[1].iov_len
!= pt_len
);
1078 sg_init_table(sgs
, 2);
1079 sg_set_buf(&sgs
[0], con
->v2
.in_kvecs
[0].iov_base
, rem_len
);
1080 sg_set_buf(&sgs
[1], con
->v2
.in_buf
, pt_len
);
1082 return gcm_crypt(con
, false, sgs
, sgs
,
1083 padded_len(rem_len
) + CEPH_GCM_TAG_LEN
);
1086 /* Process sparse read data that lives in a buffer */
1087 static int process_v2_sparse_read(struct ceph_connection
*con
,
1088 struct page
**pages
, int spos
)
1090 struct ceph_msg_data_cursor
*cursor
= &con
->v2
.in_cursor
;
1096 ret
= con
->ops
->sparse_read(con
, cursor
, &buf
);
1100 dout("%s: sparse_read return %x buf %p\n", __func__
, ret
, buf
);
1103 int idx
= spos
>> PAGE_SHIFT
;
1104 int soff
= offset_in_page(spos
);
1105 struct page
*spage
= con
->v2
.in_enc_pages
[idx
];
1106 int len
= min_t(int, ret
, PAGE_SIZE
- soff
);
1109 memcpy_from_page(buf
, spage
, soff
, len
);
1114 get_bvec_at(cursor
, &bv
);
1115 len
= min_t(int, len
, bv
.bv_len
);
1116 memcpy_page(bv
.bv_page
, bv
.bv_offset
,
1118 ceph_msg_data_advance(cursor
, len
);
1126 static int decrypt_tail(struct ceph_connection
*con
)
1128 struct sg_table enc_sgt
= {};
1129 struct sg_table sgt
= {};
1130 struct page
**pages
= NULL
;
1131 bool sparse
= !!con
->in_msg
->sparse_read_total
;
1136 tail_len
= tail_onwire_len(con
->in_msg
, true);
1137 ret
= sg_alloc_table_from_pages(&enc_sgt
, con
->v2
.in_enc_pages
,
1138 con
->v2
.in_enc_page_cnt
, 0, tail_len
,
1144 dpos
= padded_len(front_len(con
->in_msg
) + padded_len(middle_len(con
->in_msg
)));
1145 pages
= con
->v2
.in_enc_pages
;
1148 ret
= setup_message_sgs(&sgt
, con
->in_msg
, FRONT_PAD(con
->v2
.in_buf
),
1149 MIDDLE_PAD(con
->v2
.in_buf
), DATA_PAD(con
->v2
.in_buf
),
1150 con
->v2
.in_buf
, pages
, dpos
, true);
1154 dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__
, con
,
1155 con
->in_msg
, con
->v2
.in_enc_page_cnt
, sgt
.orig_nents
);
1156 ret
= gcm_crypt(con
, false, enc_sgt
.sgl
, sgt
.sgl
, tail_len
);
1160 if (sparse
&& data_len(con
->in_msg
)) {
1161 ret
= process_v2_sparse_read(con
, con
->v2
.in_enc_pages
, dpos
);
1166 WARN_ON(!con
->v2
.in_enc_page_cnt
);
1167 ceph_release_page_vector(con
->v2
.in_enc_pages
,
1168 con
->v2
.in_enc_page_cnt
);
1169 con
->v2
.in_enc_pages
= NULL
;
1170 con
->v2
.in_enc_page_cnt
= 0;
1173 sg_free_table(&sgt
);
1174 sg_free_table(&enc_sgt
);
1178 static int prepare_banner(struct ceph_connection
*con
)
1180 int buf_len
= CEPH_BANNER_V2_LEN
+ 2 + 8 + 8;
1183 buf
= alloc_conn_buf(con
, buf_len
);
1188 ceph_encode_copy(&p
, CEPH_BANNER_V2
, CEPH_BANNER_V2_LEN
);
1189 ceph_encode_16(&p
, sizeof(u64
) + sizeof(u64
));
1190 ceph_encode_64(&p
, CEPH_MSGR2_SUPPORTED_FEATURES
);
1191 ceph_encode_64(&p
, CEPH_MSGR2_REQUIRED_FEATURES
);
1192 WARN_ON(p
!= buf
+ buf_len
);
1194 add_out_kvec(con
, buf
, buf_len
);
1195 add_out_sign_kvec(con
, buf
, buf_len
);
1196 ceph_con_flag_set(con
, CEPH_CON_F_WRITE_PENDING
);
1203 * control body (ctrl_len bytes)
1204 * space for control crc
1206 * extdata (optional):
1207 * control body (extdata_len bytes)
1209 * Compute control crc and gather base and extdata into:
1212 * control body (ctrl_len + extdata_len bytes)
1215 * Preamble should already be encoded at the start of base.
1217 static void prepare_head_plain(struct ceph_connection
*con
, void *base
,
1218 int ctrl_len
, void *extdata
, int extdata_len
,
1221 int base_len
= CEPH_PREAMBLE_LEN
+ ctrl_len
+ CEPH_CRC_LEN
;
1222 void *crcp
= base
+ base_len
- CEPH_CRC_LEN
;
1225 crc
= crc32c(-1, CTRL_BODY(base
), ctrl_len
);
1227 crc
= crc32c(crc
, extdata
, extdata_len
);
1228 put_unaligned_le32(crc
, crcp
);
1231 add_out_kvec(con
, base
, base_len
);
1233 add_out_sign_kvec(con
, base
, base_len
);
1237 add_out_kvec(con
, base
, crcp
- base
);
1238 add_out_kvec(con
, extdata
, extdata_len
);
1239 add_out_kvec(con
, crcp
, CEPH_CRC_LEN
);
1241 add_out_sign_kvec(con
, base
, crcp
- base
);
1242 add_out_sign_kvec(con
, extdata
, extdata_len
);
1243 add_out_sign_kvec(con
, crcp
, CEPH_CRC_LEN
);
1247 static int prepare_head_secure_small(struct ceph_connection
*con
,
1248 void *base
, int ctrl_len
)
1250 struct scatterlist sg
;
1253 /* inline buffer padding? */
1254 if (ctrl_len
< CEPH_PREAMBLE_INLINE_LEN
)
1255 memset(CTRL_BODY(base
) + ctrl_len
, 0,
1256 CEPH_PREAMBLE_INLINE_LEN
- ctrl_len
);
1258 sg_init_one(&sg
, base
, CEPH_PREAMBLE_SECURE_LEN
);
1259 ret
= gcm_crypt(con
, true, &sg
, &sg
,
1260 CEPH_PREAMBLE_SECURE_LEN
- CEPH_GCM_TAG_LEN
);
1264 add_out_kvec(con
, base
, CEPH_PREAMBLE_SECURE_LEN
);
1271 * control body (ctrl_len bytes)
1272 * space for padding, if needed
1273 * space for control remainder auth tag
1274 * space for preamble auth tag
1276 * Encrypt preamble and the inline portion, then encrypt the remainder
1280 * control body (48 bytes)
1282 * control body (ctrl_len - 48 bytes)
1283 * zero padding, if needed
1284 * control remainder auth tag
1286 * Preamble should already be encoded at the start of base.
1288 static int prepare_head_secure_big(struct ceph_connection
*con
,
1289 void *base
, int ctrl_len
)
1291 int rem_len
= ctrl_len
- CEPH_PREAMBLE_INLINE_LEN
;
1292 void *rem
= CTRL_BODY(base
) + CEPH_PREAMBLE_INLINE_LEN
;
1293 void *rem_tag
= rem
+ padded_len(rem_len
);
1294 void *pmbl_tag
= rem_tag
+ CEPH_GCM_TAG_LEN
;
1295 struct scatterlist sgs
[2];
1298 sg_init_table(sgs
, 2);
1299 sg_set_buf(&sgs
[0], base
, rem
- base
);
1300 sg_set_buf(&sgs
[1], pmbl_tag
, CEPH_GCM_TAG_LEN
);
1301 ret
= gcm_crypt(con
, true, sgs
, sgs
, rem
- base
);
1305 /* control remainder padding? */
1306 if (need_padding(rem_len
))
1307 memset(rem
+ rem_len
, 0, padding_len(rem_len
));
1309 sg_init_one(&sgs
[0], rem
, pmbl_tag
- rem
);
1310 ret
= gcm_crypt(con
, true, sgs
, sgs
, rem_tag
- rem
);
1314 add_out_kvec(con
, base
, rem
- base
);
1315 add_out_kvec(con
, pmbl_tag
, CEPH_GCM_TAG_LEN
);
1316 add_out_kvec(con
, rem
, pmbl_tag
- rem
);
1320 static int __prepare_control(struct ceph_connection
*con
, int tag
,
1321 void *base
, int ctrl_len
, void *extdata
,
1322 int extdata_len
, bool to_be_signed
)
1324 int total_len
= ctrl_len
+ extdata_len
;
1325 struct ceph_frame_desc desc
;
1328 dout("%s con %p tag %d len %d (%d+%d)\n", __func__
, con
, tag
,
1329 total_len
, ctrl_len
, extdata_len
);
1331 /* extdata may be vmalloc'ed but not base */
1332 if (WARN_ON(is_vmalloc_addr(base
) || !ctrl_len
))
1335 init_frame_desc(&desc
, tag
, &total_len
, 1);
1336 encode_preamble(&desc
, base
);
1338 if (con_secure(con
)) {
1339 if (WARN_ON(extdata_len
|| to_be_signed
))
1342 if (ctrl_len
<= CEPH_PREAMBLE_INLINE_LEN
)
1343 /* fully inlined, inline buffer may need padding */
1344 ret
= prepare_head_secure_small(con
, base
, ctrl_len
);
1346 /* partially inlined, inline buffer is full */
1347 ret
= prepare_head_secure_big(con
, base
, ctrl_len
);
1351 prepare_head_plain(con
, base
, ctrl_len
, extdata
, extdata_len
,
1355 ceph_con_flag_set(con
, CEPH_CON_F_WRITE_PENDING
);
1359 static int prepare_control(struct ceph_connection
*con
, int tag
,
1360 void *base
, int ctrl_len
)
1362 return __prepare_control(con
, tag
, base
, ctrl_len
, NULL
, 0, false);
1365 static int prepare_hello(struct ceph_connection
*con
)
1370 ctrl_len
= 1 + ceph_entity_addr_encoding_len(&con
->peer_addr
);
1371 buf
= alloc_conn_buf(con
, head_onwire_len(ctrl_len
, false));
1376 ceph_encode_8(&p
, CEPH_ENTITY_TYPE_CLIENT
);
1377 ceph_encode_entity_addr(&p
, &con
->peer_addr
);
1378 WARN_ON(p
!= CTRL_BODY(buf
) + ctrl_len
);
1380 return __prepare_control(con
, FRAME_TAG_HELLO
, buf
, ctrl_len
,
1384 /* so that head_onwire_len(AUTH_BUF_LEN, false) is 512 */
1385 #define AUTH_BUF_LEN (512 - CEPH_CRC_LEN - CEPH_PREAMBLE_PLAIN_LEN)
1387 static int prepare_auth_request(struct ceph_connection
*con
)
1389 void *authorizer
, *authorizer_copy
;
1390 int ctrl_len
, authorizer_len
;
1394 ctrl_len
= AUTH_BUF_LEN
;
1395 buf
= alloc_conn_buf(con
, head_onwire_len(ctrl_len
, false));
1399 mutex_unlock(&con
->mutex
);
1400 ret
= con
->ops
->get_auth_request(con
, CTRL_BODY(buf
), &ctrl_len
,
1401 &authorizer
, &authorizer_len
);
1402 mutex_lock(&con
->mutex
);
1403 if (con
->state
!= CEPH_CON_S_V2_HELLO
) {
1404 dout("%s con %p state changed to %d\n", __func__
, con
,
1409 dout("%s con %p get_auth_request ret %d\n", __func__
, con
, ret
);
1413 authorizer_copy
= alloc_conn_buf(con
, authorizer_len
);
1414 if (!authorizer_copy
)
1417 memcpy(authorizer_copy
, authorizer
, authorizer_len
);
1419 return __prepare_control(con
, FRAME_TAG_AUTH_REQUEST
, buf
, ctrl_len
,
1420 authorizer_copy
, authorizer_len
, true);
1423 static int prepare_auth_request_more(struct ceph_connection
*con
,
1424 void *reply
, int reply_len
)
1426 int ctrl_len
, authorizer_len
;
1431 ctrl_len
= AUTH_BUF_LEN
;
1432 buf
= alloc_conn_buf(con
, head_onwire_len(ctrl_len
, false));
1436 mutex_unlock(&con
->mutex
);
1437 ret
= con
->ops
->handle_auth_reply_more(con
, reply
, reply_len
,
1438 CTRL_BODY(buf
), &ctrl_len
,
1439 &authorizer
, &authorizer_len
);
1440 mutex_lock(&con
->mutex
);
1441 if (con
->state
!= CEPH_CON_S_V2_AUTH
) {
1442 dout("%s con %p state changed to %d\n", __func__
, con
,
1447 dout("%s con %p handle_auth_reply_more ret %d\n", __func__
, con
, ret
);
1451 return __prepare_control(con
, FRAME_TAG_AUTH_REQUEST_MORE
, buf
,
1452 ctrl_len
, authorizer
, authorizer_len
, true);
1455 static int prepare_auth_signature(struct ceph_connection
*con
)
1460 buf
= alloc_conn_buf(con
, head_onwire_len(SHA256_DIGEST_SIZE
,
1465 ret
= hmac_sha256(con
, con
->v2
.in_sign_kvecs
, con
->v2
.in_sign_kvec_cnt
,
1470 return prepare_control(con
, FRAME_TAG_AUTH_SIGNATURE
, buf
,
1471 SHA256_DIGEST_SIZE
);
1474 static int prepare_client_ident(struct ceph_connection
*con
)
1476 struct ceph_entity_addr
*my_addr
= &con
->msgr
->inst
.addr
;
1477 struct ceph_client
*client
= from_msgr(con
->msgr
);
1478 u64 global_id
= ceph_client_gid(client
);
1482 WARN_ON(con
->v2
.server_cookie
);
1483 WARN_ON(con
->v2
.connect_seq
);
1484 WARN_ON(con
->v2
.peer_global_seq
);
1486 if (!con
->v2
.client_cookie
) {
1488 get_random_bytes(&con
->v2
.client_cookie
,
1489 sizeof(con
->v2
.client_cookie
));
1490 } while (!con
->v2
.client_cookie
);
1491 dout("%s con %p generated cookie 0x%llx\n", __func__
, con
,
1492 con
->v2
.client_cookie
);
1494 dout("%s con %p cookie already set 0x%llx\n", __func__
, con
,
1495 con
->v2
.client_cookie
);
1498 dout("%s con %p my_addr %s/%u peer_addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx cookie 0x%llx\n",
1499 __func__
, con
, ceph_pr_addr(my_addr
), le32_to_cpu(my_addr
->nonce
),
1500 ceph_pr_addr(&con
->peer_addr
), le32_to_cpu(con
->peer_addr
.nonce
),
1501 global_id
, con
->v2
.global_seq
, client
->supported_features
,
1502 client
->required_features
, con
->v2
.client_cookie
);
1504 ctrl_len
= 1 + 4 + ceph_entity_addr_encoding_len(my_addr
) +
1505 ceph_entity_addr_encoding_len(&con
->peer_addr
) + 6 * 8;
1506 buf
= alloc_conn_buf(con
, head_onwire_len(ctrl_len
, con_secure(con
)));
1511 ceph_encode_8(&p
, 2); /* addrvec marker */
1512 ceph_encode_32(&p
, 1); /* addr_cnt */
1513 ceph_encode_entity_addr(&p
, my_addr
);
1514 ceph_encode_entity_addr(&p
, &con
->peer_addr
);
1515 ceph_encode_64(&p
, global_id
);
1516 ceph_encode_64(&p
, con
->v2
.global_seq
);
1517 ceph_encode_64(&p
, client
->supported_features
);
1518 ceph_encode_64(&p
, client
->required_features
);
1519 ceph_encode_64(&p
, 0); /* flags */
1520 ceph_encode_64(&p
, con
->v2
.client_cookie
);
1521 WARN_ON(p
!= CTRL_BODY(buf
) + ctrl_len
);
1523 return prepare_control(con
, FRAME_TAG_CLIENT_IDENT
, buf
, ctrl_len
);
1526 static int prepare_session_reconnect(struct ceph_connection
*con
)
1528 struct ceph_entity_addr
*my_addr
= &con
->msgr
->inst
.addr
;
1532 WARN_ON(!con
->v2
.client_cookie
);
1533 WARN_ON(!con
->v2
.server_cookie
);
1534 WARN_ON(!con
->v2
.connect_seq
);
1535 WARN_ON(!con
->v2
.peer_global_seq
);
1537 dout("%s con %p my_addr %s/%u client_cookie 0x%llx server_cookie 0x%llx global_seq %llu connect_seq %llu in_seq %llu\n",
1538 __func__
, con
, ceph_pr_addr(my_addr
), le32_to_cpu(my_addr
->nonce
),
1539 con
->v2
.client_cookie
, con
->v2
.server_cookie
, con
->v2
.global_seq
,
1540 con
->v2
.connect_seq
, con
->in_seq
);
1542 ctrl_len
= 1 + 4 + ceph_entity_addr_encoding_len(my_addr
) + 5 * 8;
1543 buf
= alloc_conn_buf(con
, head_onwire_len(ctrl_len
, con_secure(con
)));
1548 ceph_encode_8(&p
, 2); /* entity_addrvec_t marker */
1549 ceph_encode_32(&p
, 1); /* my_addrs len */
1550 ceph_encode_entity_addr(&p
, my_addr
);
1551 ceph_encode_64(&p
, con
->v2
.client_cookie
);
1552 ceph_encode_64(&p
, con
->v2
.server_cookie
);
1553 ceph_encode_64(&p
, con
->v2
.global_seq
);
1554 ceph_encode_64(&p
, con
->v2
.connect_seq
);
1555 ceph_encode_64(&p
, con
->in_seq
);
1556 WARN_ON(p
!= CTRL_BODY(buf
) + ctrl_len
);
1558 return prepare_control(con
, FRAME_TAG_SESSION_RECONNECT
, buf
, ctrl_len
);
1561 static int prepare_keepalive2(struct ceph_connection
*con
)
1563 struct ceph_timespec
*ts
= CTRL_BODY(con
->v2
.out_buf
);
1564 struct timespec64 now
;
1566 ktime_get_real_ts64(&now
);
1567 dout("%s con %p timestamp %lld.%09ld\n", __func__
, con
, now
.tv_sec
,
1570 ceph_encode_timespec64(ts
, &now
);
1572 reset_out_kvecs(con
);
1573 return prepare_control(con
, FRAME_TAG_KEEPALIVE2
, con
->v2
.out_buf
,
1574 sizeof(struct ceph_timespec
));
1577 static int prepare_ack(struct ceph_connection
*con
)
1581 dout("%s con %p in_seq_acked %llu -> %llu\n", __func__
, con
,
1582 con
->in_seq_acked
, con
->in_seq
);
1583 con
->in_seq_acked
= con
->in_seq
;
1585 p
= CTRL_BODY(con
->v2
.out_buf
);
1586 ceph_encode_64(&p
, con
->in_seq_acked
);
1588 reset_out_kvecs(con
);
1589 return prepare_control(con
, FRAME_TAG_ACK
, con
->v2
.out_buf
, 8);
1592 static void prepare_epilogue_plain(struct ceph_connection
*con
, bool aborted
)
1594 dout("%s con %p msg %p aborted %d crcs %u %u %u\n", __func__
, con
,
1595 con
->out_msg
, aborted
, con
->v2
.out_epil
.front_crc
,
1596 con
->v2
.out_epil
.middle_crc
, con
->v2
.out_epil
.data_crc
);
1598 encode_epilogue_plain(con
, aborted
);
1599 add_out_kvec(con
, &con
->v2
.out_epil
, CEPH_EPILOGUE_PLAIN_LEN
);
1603 * For "used" empty segments, crc is -1. For unused (trailing)
1604 * segments, crc is 0.
1606 static void prepare_message_plain(struct ceph_connection
*con
)
1608 struct ceph_msg
*msg
= con
->out_msg
;
1610 prepare_head_plain(con
, con
->v2
.out_buf
,
1611 sizeof(struct ceph_msg_header2
), NULL
, 0, false);
1613 if (!front_len(msg
) && !middle_len(msg
)) {
1614 if (!data_len(msg
)) {
1616 * Empty message: once the head is written,
1617 * we are done -- there is no epilogue.
1619 con
->v2
.out_state
= OUT_S_FINISH_MESSAGE
;
1623 con
->v2
.out_epil
.front_crc
= -1;
1624 con
->v2
.out_epil
.middle_crc
= -1;
1625 con
->v2
.out_state
= OUT_S_QUEUE_DATA
;
1629 if (front_len(msg
)) {
1630 con
->v2
.out_epil
.front_crc
= crc32c(-1, msg
->front
.iov_base
,
1632 add_out_kvec(con
, msg
->front
.iov_base
, front_len(msg
));
1634 /* middle (at least) is there, checked above */
1635 con
->v2
.out_epil
.front_crc
= -1;
1638 if (middle_len(msg
)) {
1639 con
->v2
.out_epil
.middle_crc
=
1640 crc32c(-1, msg
->middle
->vec
.iov_base
, middle_len(msg
));
1641 add_out_kvec(con
, msg
->middle
->vec
.iov_base
, middle_len(msg
));
1643 con
->v2
.out_epil
.middle_crc
= data_len(msg
) ? -1 : 0;
1646 if (data_len(msg
)) {
1647 con
->v2
.out_state
= OUT_S_QUEUE_DATA
;
1649 con
->v2
.out_epil
.data_crc
= 0;
1650 prepare_epilogue_plain(con
, false);
1651 con
->v2
.out_state
= OUT_S_FINISH_MESSAGE
;
1656 * Unfortunately the kernel crypto API doesn't support streaming
1657 * (piecewise) operation for AEAD algorithms, so we can't get away
1658 * with a fixed size buffer and a couple sgs. Instead, we have to
1659 * allocate pages for the entire tail of the message (currently up
1660 * to ~32M) and two sgs arrays (up to ~256K each)...
1662 static int prepare_message_secure(struct ceph_connection
*con
)
1664 void *zerop
= page_address(ceph_zero_page
);
1665 struct sg_table enc_sgt
= {};
1666 struct sg_table sgt
= {};
1667 struct page
**enc_pages
;
1672 ret
= prepare_head_secure_small(con
, con
->v2
.out_buf
,
1673 sizeof(struct ceph_msg_header2
));
1677 tail_len
= tail_onwire_len(con
->out_msg
, true);
1680 * Empty message: once the head is written,
1681 * we are done -- there is no epilogue.
1683 con
->v2
.out_state
= OUT_S_FINISH_MESSAGE
;
1687 encode_epilogue_secure(con
, false);
1688 ret
= setup_message_sgs(&sgt
, con
->out_msg
, zerop
, zerop
, zerop
,
1689 &con
->v2
.out_epil
, NULL
, 0, false);
1693 enc_page_cnt
= calc_pages_for(0, tail_len
);
1694 enc_pages
= ceph_alloc_page_vector(enc_page_cnt
, GFP_NOIO
);
1695 if (IS_ERR(enc_pages
)) {
1696 ret
= PTR_ERR(enc_pages
);
1700 WARN_ON(con
->v2
.out_enc_pages
|| con
->v2
.out_enc_page_cnt
);
1701 con
->v2
.out_enc_pages
= enc_pages
;
1702 con
->v2
.out_enc_page_cnt
= enc_page_cnt
;
1703 con
->v2
.out_enc_resid
= tail_len
;
1704 con
->v2
.out_enc_i
= 0;
1706 ret
= sg_alloc_table_from_pages(&enc_sgt
, enc_pages
, enc_page_cnt
,
1707 0, tail_len
, GFP_NOIO
);
1711 ret
= gcm_crypt(con
, true, sgt
.sgl
, enc_sgt
.sgl
,
1712 tail_len
- CEPH_GCM_TAG_LEN
);
1716 dout("%s con %p msg %p sg_cnt %d enc_page_cnt %d\n", __func__
, con
,
1717 con
->out_msg
, sgt
.orig_nents
, enc_page_cnt
);
1718 con
->v2
.out_state
= OUT_S_QUEUE_ENC_PAGE
;
1721 sg_free_table(&sgt
);
1722 sg_free_table(&enc_sgt
);
1726 static int prepare_message(struct ceph_connection
*con
)
1729 sizeof(struct ceph_msg_header2
),
1730 front_len(con
->out_msg
),
1731 middle_len(con
->out_msg
),
1732 data_len(con
->out_msg
)
1734 struct ceph_frame_desc desc
;
1737 dout("%s con %p msg %p logical %d+%d+%d+%d\n", __func__
, con
,
1738 con
->out_msg
, lens
[0], lens
[1], lens
[2], lens
[3]);
1740 if (con
->in_seq
> con
->in_seq_acked
) {
1741 dout("%s con %p in_seq_acked %llu -> %llu\n", __func__
, con
,
1742 con
->in_seq_acked
, con
->in_seq
);
1743 con
->in_seq_acked
= con
->in_seq
;
1746 reset_out_kvecs(con
);
1747 init_frame_desc(&desc
, FRAME_TAG_MESSAGE
, lens
, 4);
1748 encode_preamble(&desc
, con
->v2
.out_buf
);
1749 fill_header2(CTRL_BODY(con
->v2
.out_buf
), &con
->out_msg
->hdr
,
1752 if (con_secure(con
)) {
1753 ret
= prepare_message_secure(con
);
1757 prepare_message_plain(con
);
1760 ceph_con_flag_set(con
, CEPH_CON_F_WRITE_PENDING
);
1764 static int prepare_read_banner_prefix(struct ceph_connection
*con
)
1768 buf
= alloc_conn_buf(con
, CEPH_BANNER_V2_PREFIX_LEN
);
1772 reset_in_kvecs(con
);
1773 add_in_kvec(con
, buf
, CEPH_BANNER_V2_PREFIX_LEN
);
1774 add_in_sign_kvec(con
, buf
, CEPH_BANNER_V2_PREFIX_LEN
);
1775 con
->state
= CEPH_CON_S_V2_BANNER_PREFIX
;
1779 static int prepare_read_banner_payload(struct ceph_connection
*con
,
1784 buf
= alloc_conn_buf(con
, payload_len
);
1788 reset_in_kvecs(con
);
1789 add_in_kvec(con
, buf
, payload_len
);
1790 add_in_sign_kvec(con
, buf
, payload_len
);
1791 con
->state
= CEPH_CON_S_V2_BANNER_PAYLOAD
;
1795 static void prepare_read_preamble(struct ceph_connection
*con
)
1797 reset_in_kvecs(con
);
1798 add_in_kvec(con
, con
->v2
.in_buf
,
1799 con_secure(con
) ? CEPH_PREAMBLE_SECURE_LEN
:
1800 CEPH_PREAMBLE_PLAIN_LEN
);
1801 con
->v2
.in_state
= IN_S_HANDLE_PREAMBLE
;
1804 static int prepare_read_control(struct ceph_connection
*con
)
1806 int ctrl_len
= con
->v2
.in_desc
.fd_lens
[0];
1810 reset_in_kvecs(con
);
1811 if (con
->state
== CEPH_CON_S_V2_HELLO
||
1812 con
->state
== CEPH_CON_S_V2_AUTH
) {
1813 head_len
= head_onwire_len(ctrl_len
, false);
1814 buf
= alloc_conn_buf(con
, head_len
);
1818 /* preserve preamble */
1819 memcpy(buf
, con
->v2
.in_buf
, CEPH_PREAMBLE_LEN
);
1821 add_in_kvec(con
, CTRL_BODY(buf
), ctrl_len
);
1822 add_in_kvec(con
, CTRL_BODY(buf
) + ctrl_len
, CEPH_CRC_LEN
);
1823 add_in_sign_kvec(con
, buf
, head_len
);
1825 if (ctrl_len
> CEPH_PREAMBLE_INLINE_LEN
) {
1826 buf
= alloc_conn_buf(con
, ctrl_len
);
1830 add_in_kvec(con
, buf
, ctrl_len
);
1832 add_in_kvec(con
, CTRL_BODY(con
->v2
.in_buf
), ctrl_len
);
1834 add_in_kvec(con
, con
->v2
.in_buf
, CEPH_CRC_LEN
);
1836 con
->v2
.in_state
= IN_S_HANDLE_CONTROL
;
1840 static int prepare_read_control_remainder(struct ceph_connection
*con
)
1842 int ctrl_len
= con
->v2
.in_desc
.fd_lens
[0];
1843 int rem_len
= ctrl_len
- CEPH_PREAMBLE_INLINE_LEN
;
1846 buf
= alloc_conn_buf(con
, ctrl_len
);
1850 memcpy(buf
, CTRL_BODY(con
->v2
.in_buf
), CEPH_PREAMBLE_INLINE_LEN
);
1852 reset_in_kvecs(con
);
1853 add_in_kvec(con
, buf
+ CEPH_PREAMBLE_INLINE_LEN
, rem_len
);
1854 add_in_kvec(con
, con
->v2
.in_buf
,
1855 padding_len(rem_len
) + CEPH_GCM_TAG_LEN
);
1856 con
->v2
.in_state
= IN_S_HANDLE_CONTROL_REMAINDER
;
1860 static int prepare_read_data(struct ceph_connection
*con
)
1864 con
->in_data_crc
= -1;
1865 ceph_msg_data_cursor_init(&con
->v2
.in_cursor
, con
->in_msg
,
1866 data_len(con
->in_msg
));
1868 get_bvec_at(&con
->v2
.in_cursor
, &bv
);
1869 if (ceph_test_opt(from_msgr(con
->msgr
), RXBOUNCE
)) {
1870 if (unlikely(!con
->bounce_page
)) {
1871 con
->bounce_page
= alloc_page(GFP_NOIO
);
1872 if (!con
->bounce_page
) {
1873 pr_err("failed to allocate bounce page\n");
1878 bv
.bv_page
= con
->bounce_page
;
1881 set_in_bvec(con
, &bv
);
1882 con
->v2
.in_state
= IN_S_PREPARE_READ_DATA_CONT
;
1886 static void prepare_read_data_cont(struct ceph_connection
*con
)
1890 if (ceph_test_opt(from_msgr(con
->msgr
), RXBOUNCE
)) {
1891 con
->in_data_crc
= crc32c(con
->in_data_crc
,
1892 page_address(con
->bounce_page
),
1893 con
->v2
.in_bvec
.bv_len
);
1895 get_bvec_at(&con
->v2
.in_cursor
, &bv
);
1896 memcpy_to_page(bv
.bv_page
, bv
.bv_offset
,
1897 page_address(con
->bounce_page
),
1898 con
->v2
.in_bvec
.bv_len
);
1900 con
->in_data_crc
= ceph_crc32c_page(con
->in_data_crc
,
1901 con
->v2
.in_bvec
.bv_page
,
1902 con
->v2
.in_bvec
.bv_offset
,
1903 con
->v2
.in_bvec
.bv_len
);
1906 ceph_msg_data_advance(&con
->v2
.in_cursor
, con
->v2
.in_bvec
.bv_len
);
1907 if (con
->v2
.in_cursor
.total_resid
) {
1908 get_bvec_at(&con
->v2
.in_cursor
, &bv
);
1909 if (ceph_test_opt(from_msgr(con
->msgr
), RXBOUNCE
)) {
1910 bv
.bv_page
= con
->bounce_page
;
1913 set_in_bvec(con
, &bv
);
1914 WARN_ON(con
->v2
.in_state
!= IN_S_PREPARE_READ_DATA_CONT
);
1919 * We've read all data. Prepare to read epilogue.
1921 reset_in_kvecs(con
);
1922 add_in_kvec(con
, con
->v2
.in_buf
, CEPH_EPILOGUE_PLAIN_LEN
);
1923 con
->v2
.in_state
= IN_S_HANDLE_EPILOGUE
;
1926 static int prepare_sparse_read_cont(struct ceph_connection
*con
)
1931 struct ceph_msg_data_cursor
*cursor
= &con
->v2
.in_cursor
;
1933 WARN_ON(con
->v2
.in_state
!= IN_S_PREPARE_SPARSE_DATA_CONT
);
1935 if (iov_iter_is_bvec(&con
->v2
.in_iter
)) {
1936 if (ceph_test_opt(from_msgr(con
->msgr
), RXBOUNCE
)) {
1937 con
->in_data_crc
= crc32c(con
->in_data_crc
,
1938 page_address(con
->bounce_page
),
1939 con
->v2
.in_bvec
.bv_len
);
1940 get_bvec_at(cursor
, &bv
);
1941 memcpy_to_page(bv
.bv_page
, bv
.bv_offset
,
1942 page_address(con
->bounce_page
),
1943 con
->v2
.in_bvec
.bv_len
);
1945 con
->in_data_crc
= ceph_crc32c_page(con
->in_data_crc
,
1946 con
->v2
.in_bvec
.bv_page
,
1947 con
->v2
.in_bvec
.bv_offset
,
1948 con
->v2
.in_bvec
.bv_len
);
1951 ceph_msg_data_advance(cursor
, con
->v2
.in_bvec
.bv_len
);
1952 cursor
->sr_resid
-= con
->v2
.in_bvec
.bv_len
;
1953 dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__
,
1954 con
->v2
.in_bvec
.bv_len
, cursor
->sr_resid
);
1955 WARN_ON_ONCE(cursor
->sr_resid
> cursor
->total_resid
);
1956 if (cursor
->sr_resid
) {
1957 get_bvec_at(cursor
, &bv
);
1958 if (bv
.bv_len
> cursor
->sr_resid
)
1959 bv
.bv_len
= cursor
->sr_resid
;
1960 if (ceph_test_opt(from_msgr(con
->msgr
), RXBOUNCE
)) {
1961 bv
.bv_page
= con
->bounce_page
;
1964 set_in_bvec(con
, &bv
);
1965 con
->v2
.data_len_remain
-= bv
.bv_len
;
1968 } else if (iov_iter_is_kvec(&con
->v2
.in_iter
)) {
1969 /* On first call, we have no kvec so don't compute crc */
1970 if (con
->v2
.in_kvec_cnt
) {
1971 WARN_ON_ONCE(con
->v2
.in_kvec_cnt
> 1);
1972 con
->in_data_crc
= crc32c(con
->in_data_crc
,
1973 con
->v2
.in_kvecs
[0].iov_base
,
1974 con
->v2
.in_kvecs
[0].iov_len
);
1980 /* get next extent */
1981 ret
= con
->ops
->sparse_read(con
, cursor
, &buf
);
1986 reset_in_kvecs(con
);
1987 add_in_kvec(con
, con
->v2
.in_buf
, CEPH_EPILOGUE_PLAIN_LEN
);
1988 con
->v2
.in_state
= IN_S_HANDLE_EPILOGUE
;
1993 /* receive into buffer */
1994 reset_in_kvecs(con
);
1995 add_in_kvec(con
, buf
, ret
);
1996 con
->v2
.data_len_remain
-= ret
;
2000 if (ret
> cursor
->total_resid
) {
2001 pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n",
2002 __func__
, ret
, cursor
->total_resid
, cursor
->resid
);
2005 get_bvec_at(cursor
, &bv
);
2006 if (bv
.bv_len
> cursor
->sr_resid
)
2007 bv
.bv_len
= cursor
->sr_resid
;
2008 if (ceph_test_opt(from_msgr(con
->msgr
), RXBOUNCE
)) {
2009 if (unlikely(!con
->bounce_page
)) {
2010 con
->bounce_page
= alloc_page(GFP_NOIO
);
2011 if (!con
->bounce_page
) {
2012 pr_err("failed to allocate bounce page\n");
2017 bv
.bv_page
= con
->bounce_page
;
2020 set_in_bvec(con
, &bv
);
2021 con
->v2
.data_len_remain
-= ret
;
2025 static int prepare_sparse_read_data(struct ceph_connection
*con
)
2027 struct ceph_msg
*msg
= con
->in_msg
;
2029 dout("%s: starting sparse read\n", __func__
);
2031 if (WARN_ON_ONCE(!con
->ops
->sparse_read
))
2034 if (!con_secure(con
))
2035 con
->in_data_crc
= -1;
2037 ceph_msg_data_cursor_init(&con
->v2
.in_cursor
, msg
,
2038 msg
->sparse_read_total
);
2040 reset_in_kvecs(con
);
2041 con
->v2
.in_state
= IN_S_PREPARE_SPARSE_DATA_CONT
;
2042 con
->v2
.data_len_remain
= data_len(msg
);
2043 return prepare_sparse_read_cont(con
);
2046 static int prepare_read_tail_plain(struct ceph_connection
*con
)
2048 struct ceph_msg
*msg
= con
->in_msg
;
2050 if (!front_len(msg
) && !middle_len(msg
)) {
2051 WARN_ON(!data_len(msg
));
2052 return prepare_read_data(con
);
2055 reset_in_kvecs(con
);
2056 if (front_len(msg
)) {
2057 add_in_kvec(con
, msg
->front
.iov_base
, front_len(msg
));
2058 WARN_ON(msg
->front
.iov_len
!= front_len(msg
));
2060 if (middle_len(msg
)) {
2061 add_in_kvec(con
, msg
->middle
->vec
.iov_base
, middle_len(msg
));
2062 WARN_ON(msg
->middle
->vec
.iov_len
!= middle_len(msg
));
2065 if (data_len(msg
)) {
2066 if (msg
->sparse_read_total
)
2067 con
->v2
.in_state
= IN_S_PREPARE_SPARSE_DATA
;
2069 con
->v2
.in_state
= IN_S_PREPARE_READ_DATA
;
2071 add_in_kvec(con
, con
->v2
.in_buf
, CEPH_EPILOGUE_PLAIN_LEN
);
2072 con
->v2
.in_state
= IN_S_HANDLE_EPILOGUE
;
2077 static void prepare_read_enc_page(struct ceph_connection
*con
)
2081 dout("%s con %p i %d resid %d\n", __func__
, con
, con
->v2
.in_enc_i
,
2082 con
->v2
.in_enc_resid
);
2083 WARN_ON(!con
->v2
.in_enc_resid
);
2085 bvec_set_page(&bv
, con
->v2
.in_enc_pages
[con
->v2
.in_enc_i
],
2086 min(con
->v2
.in_enc_resid
, (int)PAGE_SIZE
), 0);
2088 set_in_bvec(con
, &bv
);
2090 con
->v2
.in_enc_resid
-= bv
.bv_len
;
2092 if (con
->v2
.in_enc_resid
) {
2093 con
->v2
.in_state
= IN_S_PREPARE_READ_ENC_PAGE
;
2098 * We are set to read the last piece of ciphertext (ending
2099 * with epilogue) + auth tag.
2101 WARN_ON(con
->v2
.in_enc_i
!= con
->v2
.in_enc_page_cnt
);
2102 con
->v2
.in_state
= IN_S_HANDLE_EPILOGUE
;
2105 static int prepare_read_tail_secure(struct ceph_connection
*con
)
2107 struct page
**enc_pages
;
2111 tail_len
= tail_onwire_len(con
->in_msg
, true);
2114 enc_page_cnt
= calc_pages_for(0, tail_len
);
2115 enc_pages
= ceph_alloc_page_vector(enc_page_cnt
, GFP_NOIO
);
2116 if (IS_ERR(enc_pages
))
2117 return PTR_ERR(enc_pages
);
2119 WARN_ON(con
->v2
.in_enc_pages
|| con
->v2
.in_enc_page_cnt
);
2120 con
->v2
.in_enc_pages
= enc_pages
;
2121 con
->v2
.in_enc_page_cnt
= enc_page_cnt
;
2122 con
->v2
.in_enc_resid
= tail_len
;
2123 con
->v2
.in_enc_i
= 0;
2125 prepare_read_enc_page(con
);
2129 static void __finish_skip(struct ceph_connection
*con
)
2132 prepare_read_preamble(con
);
2135 static void prepare_skip_message(struct ceph_connection
*con
)
2137 struct ceph_frame_desc
*desc
= &con
->v2
.in_desc
;
2140 dout("%s con %p %d+%d+%d\n", __func__
, con
, desc
->fd_lens
[1],
2141 desc
->fd_lens
[2], desc
->fd_lens
[3]);
2143 tail_len
= __tail_onwire_len(desc
->fd_lens
[1], desc
->fd_lens
[2],
2144 desc
->fd_lens
[3], con_secure(con
));
2148 set_in_skip(con
, tail_len
);
2149 con
->v2
.in_state
= IN_S_FINISH_SKIP
;
2153 static int process_banner_prefix(struct ceph_connection
*con
)
2158 WARN_ON(con
->v2
.in_kvecs
[0].iov_len
!= CEPH_BANNER_V2_PREFIX_LEN
);
2160 p
= con
->v2
.in_kvecs
[0].iov_base
;
2161 if (memcmp(p
, CEPH_BANNER_V2
, CEPH_BANNER_V2_LEN
)) {
2162 if (!memcmp(p
, CEPH_BANNER
, CEPH_BANNER_LEN
))
2163 con
->error_msg
= "server is speaking msgr1 protocol";
2165 con
->error_msg
= "protocol error, bad banner";
2169 p
+= CEPH_BANNER_V2_LEN
;
2170 payload_len
= ceph_decode_16(&p
);
2171 dout("%s con %p payload_len %d\n", __func__
, con
, payload_len
);
2173 return prepare_read_banner_payload(con
, payload_len
);
2176 static int process_banner_payload(struct ceph_connection
*con
)
2178 void *end
= con
->v2
.in_kvecs
[0].iov_base
+ con
->v2
.in_kvecs
[0].iov_len
;
2179 u64 feat
= CEPH_MSGR2_SUPPORTED_FEATURES
;
2180 u64 req_feat
= CEPH_MSGR2_REQUIRED_FEATURES
;
2181 u64 server_feat
, server_req_feat
;
2185 p
= con
->v2
.in_kvecs
[0].iov_base
;
2186 ceph_decode_64_safe(&p
, end
, server_feat
, bad
);
2187 ceph_decode_64_safe(&p
, end
, server_req_feat
, bad
);
2189 dout("%s con %p server_feat 0x%llx server_req_feat 0x%llx\n",
2190 __func__
, con
, server_feat
, server_req_feat
);
2192 if (req_feat
& ~server_feat
) {
2193 pr_err("msgr2 feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
2194 server_feat
, req_feat
& ~server_feat
);
2195 con
->error_msg
= "missing required protocol features";
2198 if (server_req_feat
& ~feat
) {
2199 pr_err("msgr2 feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
2200 feat
, server_req_feat
& ~feat
);
2201 con
->error_msg
= "missing required protocol features";
2205 /* no reset_out_kvecs() as our banner may still be pending */
2206 ret
= prepare_hello(con
);
2208 pr_err("prepare_hello failed: %d\n", ret
);
2212 con
->state
= CEPH_CON_S_V2_HELLO
;
2213 prepare_read_preamble(con
);
2217 pr_err("failed to decode banner payload\n");
2221 static int process_hello(struct ceph_connection
*con
, void *p
, void *end
)
2223 struct ceph_entity_addr
*my_addr
= &con
->msgr
->inst
.addr
;
2224 struct ceph_entity_addr addr_for_me
;
2228 if (con
->state
!= CEPH_CON_S_V2_HELLO
) {
2229 con
->error_msg
= "protocol error, unexpected hello";
2233 ceph_decode_8_safe(&p
, end
, entity_type
, bad
);
2234 ret
= ceph_decode_entity_addr(&p
, end
, &addr_for_me
);
2236 pr_err("failed to decode addr_for_me: %d\n", ret
);
2240 dout("%s con %p entity_type %d addr_for_me %s\n", __func__
, con
,
2241 entity_type
, ceph_pr_addr(&addr_for_me
));
2243 if (entity_type
!= con
->peer_name
.type
) {
2244 pr_err("bad peer type, want %d, got %d\n",
2245 con
->peer_name
.type
, entity_type
);
2246 con
->error_msg
= "wrong peer at address";
2251 * Set our address to the address our first peer (i.e. monitor)
2252 * sees that we are connecting from. If we are behind some sort
2253 * of NAT and want to be identified by some private (not NATed)
2254 * address, ip option should be used.
2256 if (ceph_addr_is_blank(my_addr
)) {
2257 memcpy(&my_addr
->in_addr
, &addr_for_me
.in_addr
,
2258 sizeof(my_addr
->in_addr
));
2259 ceph_addr_set_port(my_addr
, 0);
2260 dout("%s con %p set my addr %s, as seen by peer %s\n",
2261 __func__
, con
, ceph_pr_addr(my_addr
),
2262 ceph_pr_addr(&con
->peer_addr
));
2264 dout("%s con %p my addr already set %s\n",
2265 __func__
, con
, ceph_pr_addr(my_addr
));
2268 WARN_ON(ceph_addr_is_blank(my_addr
) || ceph_addr_port(my_addr
));
2269 WARN_ON(my_addr
->type
!= CEPH_ENTITY_ADDR_TYPE_ANY
);
2270 WARN_ON(!my_addr
->nonce
);
2272 /* no reset_out_kvecs() as our hello may still be pending */
2273 ret
= prepare_auth_request(con
);
2276 pr_err("prepare_auth_request failed: %d\n", ret
);
2280 con
->state
= CEPH_CON_S_V2_AUTH
;
2284 pr_err("failed to decode hello\n");
2288 static int process_auth_bad_method(struct ceph_connection
*con
,
2291 int allowed_protos
[8], allowed_modes
[8];
2292 int allowed_proto_cnt
, allowed_mode_cnt
;
2293 int used_proto
, result
;
2297 if (con
->state
!= CEPH_CON_S_V2_AUTH
) {
2298 con
->error_msg
= "protocol error, unexpected auth_bad_method";
2302 ceph_decode_32_safe(&p
, end
, used_proto
, bad
);
2303 ceph_decode_32_safe(&p
, end
, result
, bad
);
2304 dout("%s con %p used_proto %d result %d\n", __func__
, con
, used_proto
,
2307 ceph_decode_32_safe(&p
, end
, allowed_proto_cnt
, bad
);
2308 if (allowed_proto_cnt
> ARRAY_SIZE(allowed_protos
)) {
2309 pr_err("allowed_protos too big %d\n", allowed_proto_cnt
);
2312 for (i
= 0; i
< allowed_proto_cnt
; i
++) {
2313 ceph_decode_32_safe(&p
, end
, allowed_protos
[i
], bad
);
2314 dout("%s con %p allowed_protos[%d] %d\n", __func__
, con
,
2315 i
, allowed_protos
[i
]);
2318 ceph_decode_32_safe(&p
, end
, allowed_mode_cnt
, bad
);
2319 if (allowed_mode_cnt
> ARRAY_SIZE(allowed_modes
)) {
2320 pr_err("allowed_modes too big %d\n", allowed_mode_cnt
);
2323 for (i
= 0; i
< allowed_mode_cnt
; i
++) {
2324 ceph_decode_32_safe(&p
, end
, allowed_modes
[i
], bad
);
2325 dout("%s con %p allowed_modes[%d] %d\n", __func__
, con
,
2326 i
, allowed_modes
[i
]);
2329 mutex_unlock(&con
->mutex
);
2330 ret
= con
->ops
->handle_auth_bad_method(con
, used_proto
, result
,
2335 mutex_lock(&con
->mutex
);
2336 if (con
->state
!= CEPH_CON_S_V2_AUTH
) {
2337 dout("%s con %p state changed to %d\n", __func__
, con
,
2342 dout("%s con %p handle_auth_bad_method ret %d\n", __func__
, con
, ret
);
2346 pr_err("failed to decode auth_bad_method\n");
2350 static int process_auth_reply_more(struct ceph_connection
*con
,
2356 if (con
->state
!= CEPH_CON_S_V2_AUTH
) {
2357 con
->error_msg
= "protocol error, unexpected auth_reply_more";
2361 ceph_decode_32_safe(&p
, end
, payload_len
, bad
);
2362 ceph_decode_need(&p
, end
, payload_len
, bad
);
2364 dout("%s con %p payload_len %d\n", __func__
, con
, payload_len
);
2366 reset_out_kvecs(con
);
2367 ret
= prepare_auth_request_more(con
, p
, payload_len
);
2370 pr_err("prepare_auth_request_more failed: %d\n", ret
);
2377 pr_err("failed to decode auth_reply_more\n");
2382 * Align session_key and con_secret to avoid GFP_ATOMIC allocation
2383 * inside crypto_shash_setkey() and crypto_aead_setkey() called from
2384 * setup_crypto(). __aligned(16) isn't guaranteed to work for stack
2385 * objects, so do it by hand.
2387 static int process_auth_done(struct ceph_connection
*con
, void *p
, void *end
)
2389 u8 session_key_buf
[CEPH_KEY_LEN
+ 16];
2390 u8 con_secret_buf
[CEPH_MAX_CON_SECRET_LEN
+ 16];
2391 u8
*session_key
= PTR_ALIGN(&session_key_buf
[0], 16);
2392 u8
*con_secret
= PTR_ALIGN(&con_secret_buf
[0], 16);
2393 int session_key_len
, con_secret_len
;
2398 if (con
->state
!= CEPH_CON_S_V2_AUTH
) {
2399 con
->error_msg
= "protocol error, unexpected auth_done";
2403 ceph_decode_64_safe(&p
, end
, global_id
, bad
);
2404 ceph_decode_32_safe(&p
, end
, con
->v2
.con_mode
, bad
);
2405 ceph_decode_32_safe(&p
, end
, payload_len
, bad
);
2407 dout("%s con %p global_id %llu con_mode %d payload_len %d\n",
2408 __func__
, con
, global_id
, con
->v2
.con_mode
, payload_len
);
2410 mutex_unlock(&con
->mutex
);
2411 session_key_len
= 0;
2413 ret
= con
->ops
->handle_auth_done(con
, global_id
, p
, payload_len
,
2414 session_key
, &session_key_len
,
2415 con_secret
, &con_secret_len
);
2416 mutex_lock(&con
->mutex
);
2417 if (con
->state
!= CEPH_CON_S_V2_AUTH
) {
2418 dout("%s con %p state changed to %d\n", __func__
, con
,
2424 dout("%s con %p handle_auth_done ret %d\n", __func__
, con
, ret
);
2428 ret
= setup_crypto(con
, session_key
, session_key_len
, con_secret
,
2433 reset_out_kvecs(con
);
2434 ret
= prepare_auth_signature(con
);
2436 pr_err("prepare_auth_signature failed: %d\n", ret
);
2440 con
->state
= CEPH_CON_S_V2_AUTH_SIGNATURE
;
2443 memzero_explicit(session_key_buf
, sizeof(session_key_buf
));
2444 memzero_explicit(con_secret_buf
, sizeof(con_secret_buf
));
2448 pr_err("failed to decode auth_done\n");
2452 static int process_auth_signature(struct ceph_connection
*con
,
2455 u8 hmac
[SHA256_DIGEST_SIZE
];
2458 if (con
->state
!= CEPH_CON_S_V2_AUTH_SIGNATURE
) {
2459 con
->error_msg
= "protocol error, unexpected auth_signature";
2463 ret
= hmac_sha256(con
, con
->v2
.out_sign_kvecs
,
2464 con
->v2
.out_sign_kvec_cnt
, hmac
);
2468 ceph_decode_need(&p
, end
, SHA256_DIGEST_SIZE
, bad
);
2469 if (crypto_memneq(p
, hmac
, SHA256_DIGEST_SIZE
)) {
2470 con
->error_msg
= "integrity error, bad auth signature";
2474 dout("%s con %p auth signature ok\n", __func__
, con
);
2476 /* no reset_out_kvecs() as our auth_signature may still be pending */
2477 if (!con
->v2
.server_cookie
) {
2478 ret
= prepare_client_ident(con
);
2480 pr_err("prepare_client_ident failed: %d\n", ret
);
2484 con
->state
= CEPH_CON_S_V2_SESSION_CONNECT
;
2486 ret
= prepare_session_reconnect(con
);
2488 pr_err("prepare_session_reconnect failed: %d\n", ret
);
2492 con
->state
= CEPH_CON_S_V2_SESSION_RECONNECT
;
2498 pr_err("failed to decode auth_signature\n");
2502 static int process_server_ident(struct ceph_connection
*con
,
2505 struct ceph_client
*client
= from_msgr(con
->msgr
);
2506 u64 features
, required_features
;
2507 struct ceph_entity_addr addr
;
2514 if (con
->state
!= CEPH_CON_S_V2_SESSION_CONNECT
) {
2515 con
->error_msg
= "protocol error, unexpected server_ident";
2519 ret
= ceph_decode_entity_addrvec(&p
, end
, true, &addr
);
2521 pr_err("failed to decode server addrs: %d\n", ret
);
2525 ceph_decode_64_safe(&p
, end
, global_id
, bad
);
2526 ceph_decode_64_safe(&p
, end
, global_seq
, bad
);
2527 ceph_decode_64_safe(&p
, end
, features
, bad
);
2528 ceph_decode_64_safe(&p
, end
, required_features
, bad
);
2529 ceph_decode_64_safe(&p
, end
, flags
, bad
);
2530 ceph_decode_64_safe(&p
, end
, cookie
, bad
);
2532 dout("%s con %p addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx flags 0x%llx cookie 0x%llx\n",
2533 __func__
, con
, ceph_pr_addr(&addr
), le32_to_cpu(addr
.nonce
),
2534 global_id
, global_seq
, features
, required_features
, flags
, cookie
);
2536 /* is this who we intended to talk to? */
2537 if (memcmp(&addr
, &con
->peer_addr
, sizeof(con
->peer_addr
))) {
2538 pr_err("bad peer addr/nonce, want %s/%u, got %s/%u\n",
2539 ceph_pr_addr(&con
->peer_addr
),
2540 le32_to_cpu(con
->peer_addr
.nonce
),
2541 ceph_pr_addr(&addr
), le32_to_cpu(addr
.nonce
));
2542 con
->error_msg
= "wrong peer at address";
2546 if (client
->required_features
& ~features
) {
2547 pr_err("RADOS feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
2548 features
, client
->required_features
& ~features
);
2549 con
->error_msg
= "missing required protocol features";
2554 * Both name->type and name->num are set in ceph_con_open() but
2555 * name->num may be bogus in the initial monmap. name->type is
2556 * verified in handle_hello().
2558 WARN_ON(!con
->peer_name
.type
);
2559 con
->peer_name
.num
= cpu_to_le64(global_id
);
2560 con
->v2
.peer_global_seq
= global_seq
;
2561 con
->peer_features
= features
;
2562 WARN_ON(required_features
& ~client
->supported_features
);
2563 con
->v2
.server_cookie
= cookie
;
2565 if (flags
& CEPH_MSG_CONNECT_LOSSY
) {
2566 ceph_con_flag_set(con
, CEPH_CON_F_LOSSYTX
);
2567 WARN_ON(con
->v2
.server_cookie
);
2569 WARN_ON(!con
->v2
.server_cookie
);
2572 clear_in_sign_kvecs(con
);
2573 clear_out_sign_kvecs(con
);
2574 free_conn_bufs(con
);
2575 con
->delay
= 0; /* reset backoff memory */
2577 con
->state
= CEPH_CON_S_OPEN
;
2578 con
->v2
.out_state
= OUT_S_GET_NEXT
;
2582 pr_err("failed to decode server_ident\n");
2586 static int process_ident_missing_features(struct ceph_connection
*con
,
2589 struct ceph_client
*client
= from_msgr(con
->msgr
);
2590 u64 missing_features
;
2592 if (con
->state
!= CEPH_CON_S_V2_SESSION_CONNECT
) {
2593 con
->error_msg
= "protocol error, unexpected ident_missing_features";
2597 ceph_decode_64_safe(&p
, end
, missing_features
, bad
);
2598 pr_err("RADOS feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
2599 client
->supported_features
, missing_features
);
2600 con
->error_msg
= "missing required protocol features";
2604 pr_err("failed to decode ident_missing_features\n");
2608 static int process_session_reconnect_ok(struct ceph_connection
*con
,
2613 if (con
->state
!= CEPH_CON_S_V2_SESSION_RECONNECT
) {
2614 con
->error_msg
= "protocol error, unexpected session_reconnect_ok";
2618 ceph_decode_64_safe(&p
, end
, seq
, bad
);
2620 dout("%s con %p seq %llu\n", __func__
, con
, seq
);
2621 ceph_con_discard_requeued(con
, seq
);
2623 clear_in_sign_kvecs(con
);
2624 clear_out_sign_kvecs(con
);
2625 free_conn_bufs(con
);
2626 con
->delay
= 0; /* reset backoff memory */
2628 con
->state
= CEPH_CON_S_OPEN
;
2629 con
->v2
.out_state
= OUT_S_GET_NEXT
;
2633 pr_err("failed to decode session_reconnect_ok\n");
2637 static int process_session_retry(struct ceph_connection
*con
,
2643 if (con
->state
!= CEPH_CON_S_V2_SESSION_RECONNECT
) {
2644 con
->error_msg
= "protocol error, unexpected session_retry";
2648 ceph_decode_64_safe(&p
, end
, connect_seq
, bad
);
2650 dout("%s con %p connect_seq %llu\n", __func__
, con
, connect_seq
);
2651 WARN_ON(connect_seq
<= con
->v2
.connect_seq
);
2652 con
->v2
.connect_seq
= connect_seq
+ 1;
2654 free_conn_bufs(con
);
2656 reset_out_kvecs(con
);
2657 ret
= prepare_session_reconnect(con
);
2659 pr_err("prepare_session_reconnect (cseq) failed: %d\n", ret
);
2666 pr_err("failed to decode session_retry\n");
2670 static int process_session_retry_global(struct ceph_connection
*con
,
2676 if (con
->state
!= CEPH_CON_S_V2_SESSION_RECONNECT
) {
2677 con
->error_msg
= "protocol error, unexpected session_retry_global";
2681 ceph_decode_64_safe(&p
, end
, global_seq
, bad
);
2683 dout("%s con %p global_seq %llu\n", __func__
, con
, global_seq
);
2684 WARN_ON(global_seq
<= con
->v2
.global_seq
);
2685 con
->v2
.global_seq
= ceph_get_global_seq(con
->msgr
, global_seq
);
2687 free_conn_bufs(con
);
2689 reset_out_kvecs(con
);
2690 ret
= prepare_session_reconnect(con
);
2692 pr_err("prepare_session_reconnect (gseq) failed: %d\n", ret
);
2699 pr_err("failed to decode session_retry_global\n");
2703 static int process_session_reset(struct ceph_connection
*con
,
2709 if (con
->state
!= CEPH_CON_S_V2_SESSION_RECONNECT
) {
2710 con
->error_msg
= "protocol error, unexpected session_reset";
2714 ceph_decode_8_safe(&p
, end
, full
, bad
);
2716 con
->error_msg
= "protocol error, bad session_reset";
2720 pr_info("%s%lld %s session reset\n", ENTITY_NAME(con
->peer_name
),
2721 ceph_pr_addr(&con
->peer_addr
));
2722 ceph_con_reset_session(con
);
2724 mutex_unlock(&con
->mutex
);
2725 if (con
->ops
->peer_reset
)
2726 con
->ops
->peer_reset(con
);
2727 mutex_lock(&con
->mutex
);
2728 if (con
->state
!= CEPH_CON_S_V2_SESSION_RECONNECT
) {
2729 dout("%s con %p state changed to %d\n", __func__
, con
,
2734 free_conn_bufs(con
);
2736 reset_out_kvecs(con
);
2737 ret
= prepare_client_ident(con
);
2739 pr_err("prepare_client_ident (rst) failed: %d\n", ret
);
2743 con
->state
= CEPH_CON_S_V2_SESSION_CONNECT
;
2747 pr_err("failed to decode session_reset\n");
2751 static int process_keepalive2_ack(struct ceph_connection
*con
,
2754 if (con
->state
!= CEPH_CON_S_OPEN
) {
2755 con
->error_msg
= "protocol error, unexpected keepalive2_ack";
2759 ceph_decode_need(&p
, end
, sizeof(struct ceph_timespec
), bad
);
2760 ceph_decode_timespec64(&con
->last_keepalive_ack
, p
);
2762 dout("%s con %p timestamp %lld.%09ld\n", __func__
, con
,
2763 con
->last_keepalive_ack
.tv_sec
, con
->last_keepalive_ack
.tv_nsec
);
2768 pr_err("failed to decode keepalive2_ack\n");
2772 static int process_ack(struct ceph_connection
*con
, void *p
, void *end
)
2776 if (con
->state
!= CEPH_CON_S_OPEN
) {
2777 con
->error_msg
= "protocol error, unexpected ack";
2781 ceph_decode_64_safe(&p
, end
, seq
, bad
);
2783 dout("%s con %p seq %llu\n", __func__
, con
, seq
);
2784 ceph_con_discard_sent(con
, seq
);
2788 pr_err("failed to decode ack\n");
2792 static int process_control(struct ceph_connection
*con
, void *p
, void *end
)
2794 int tag
= con
->v2
.in_desc
.fd_tag
;
2797 dout("%s con %p tag %d len %d\n", __func__
, con
, tag
, (int)(end
- p
));
2800 case FRAME_TAG_HELLO
:
2801 ret
= process_hello(con
, p
, end
);
2803 case FRAME_TAG_AUTH_BAD_METHOD
:
2804 ret
= process_auth_bad_method(con
, p
, end
);
2806 case FRAME_TAG_AUTH_REPLY_MORE
:
2807 ret
= process_auth_reply_more(con
, p
, end
);
2809 case FRAME_TAG_AUTH_DONE
:
2810 ret
= process_auth_done(con
, p
, end
);
2812 case FRAME_TAG_AUTH_SIGNATURE
:
2813 ret
= process_auth_signature(con
, p
, end
);
2815 case FRAME_TAG_SERVER_IDENT
:
2816 ret
= process_server_ident(con
, p
, end
);
2818 case FRAME_TAG_IDENT_MISSING_FEATURES
:
2819 ret
= process_ident_missing_features(con
, p
, end
);
2821 case FRAME_TAG_SESSION_RECONNECT_OK
:
2822 ret
= process_session_reconnect_ok(con
, p
, end
);
2824 case FRAME_TAG_SESSION_RETRY
:
2825 ret
= process_session_retry(con
, p
, end
);
2827 case FRAME_TAG_SESSION_RETRY_GLOBAL
:
2828 ret
= process_session_retry_global(con
, p
, end
);
2830 case FRAME_TAG_SESSION_RESET
:
2831 ret
= process_session_reset(con
, p
, end
);
2833 case FRAME_TAG_KEEPALIVE2_ACK
:
2834 ret
= process_keepalive2_ack(con
, p
, end
);
2837 ret
= process_ack(con
, p
, end
);
2840 pr_err("bad tag %d\n", tag
);
2841 con
->error_msg
= "protocol error, bad tag";
2845 dout("%s con %p error %d\n", __func__
, con
, ret
);
2849 prepare_read_preamble(con
);
2855 * 1 - con->in_msg set, read message
2859 static int process_message_header(struct ceph_connection
*con
,
2862 struct ceph_frame_desc
*desc
= &con
->v2
.in_desc
;
2863 struct ceph_msg_header2
*hdr2
= p
;
2864 struct ceph_msg_header hdr
;
2870 seq
= le64_to_cpu(hdr2
->seq
);
2871 if ((s64
)seq
- (s64
)con
->in_seq
< 1) {
2872 pr_info("%s%lld %s skipping old message: seq %llu, expected %llu\n",
2873 ENTITY_NAME(con
->peer_name
),
2874 ceph_pr_addr(&con
->peer_addr
),
2875 seq
, con
->in_seq
+ 1);
2878 if ((s64
)seq
- (s64
)con
->in_seq
> 1) {
2879 pr_err("bad seq %llu, expected %llu\n", seq
, con
->in_seq
+ 1);
2880 con
->error_msg
= "bad message sequence # for incoming message";
2884 ceph_con_discard_sent(con
, le64_to_cpu(hdr2
->ack_seq
));
2886 fill_header(&hdr
, hdr2
, desc
->fd_lens
[1], desc
->fd_lens
[2],
2887 desc
->fd_lens
[3], &con
->peer_name
);
2888 ret
= ceph_con_in_msg_alloc(con
, &hdr
, &skip
);
2892 WARN_ON(!con
->in_msg
^ skip
);
2896 WARN_ON(!con
->in_msg
);
2897 WARN_ON(con
->in_msg
->con
!= con
);
2901 static int process_message(struct ceph_connection
*con
)
2903 ceph_con_process_message(con
);
2906 * We could have been closed by ceph_con_close() because
2907 * ceph_con_process_message() temporarily drops con->mutex.
2909 if (con
->state
!= CEPH_CON_S_OPEN
) {
2910 dout("%s con %p state changed to %d\n", __func__
, con
,
2915 prepare_read_preamble(con
);
2919 static int __handle_control(struct ceph_connection
*con
, void *p
)
2921 void *end
= p
+ con
->v2
.in_desc
.fd_lens
[0];
2922 struct ceph_msg
*msg
;
2925 if (con
->v2
.in_desc
.fd_tag
!= FRAME_TAG_MESSAGE
)
2926 return process_control(con
, p
, end
);
2928 ret
= process_message_header(con
, p
, end
);
2932 prepare_skip_message(con
);
2936 msg
= con
->in_msg
; /* set in process_message_header() */
2937 if (front_len(msg
)) {
2938 WARN_ON(front_len(msg
) > msg
->front_alloc_len
);
2939 msg
->front
.iov_len
= front_len(msg
);
2941 msg
->front
.iov_len
= 0;
2943 if (middle_len(msg
)) {
2944 WARN_ON(middle_len(msg
) > msg
->middle
->alloc_len
);
2945 msg
->middle
->vec
.iov_len
= middle_len(msg
);
2946 } else if (msg
->middle
) {
2947 msg
->middle
->vec
.iov_len
= 0;
2950 if (!front_len(msg
) && !middle_len(msg
) && !data_len(msg
))
2951 return process_message(con
);
2953 if (con_secure(con
))
2954 return prepare_read_tail_secure(con
);
2956 return prepare_read_tail_plain(con
);
2959 static int handle_preamble(struct ceph_connection
*con
)
2961 struct ceph_frame_desc
*desc
= &con
->v2
.in_desc
;
2964 if (con_secure(con
)) {
2965 ret
= decrypt_preamble(con
);
2967 if (ret
== -EBADMSG
)
2968 con
->error_msg
= "integrity error, bad preamble auth tag";
2973 ret
= decode_preamble(con
->v2
.in_buf
, desc
);
2975 if (ret
== -EBADMSG
)
2976 con
->error_msg
= "integrity error, bad crc";
2978 con
->error_msg
= "protocol error, bad preamble";
2982 dout("%s con %p tag %d seg_cnt %d %d+%d+%d+%d\n", __func__
,
2983 con
, desc
->fd_tag
, desc
->fd_seg_cnt
, desc
->fd_lens
[0],
2984 desc
->fd_lens
[1], desc
->fd_lens
[2], desc
->fd_lens
[3]);
2986 if (!con_secure(con
))
2987 return prepare_read_control(con
);
2989 if (desc
->fd_lens
[0] > CEPH_PREAMBLE_INLINE_LEN
)
2990 return prepare_read_control_remainder(con
);
2992 return __handle_control(con
, CTRL_BODY(con
->v2
.in_buf
));
2995 static int handle_control(struct ceph_connection
*con
)
2997 int ctrl_len
= con
->v2
.in_desc
.fd_lens
[0];
3001 WARN_ON(con_secure(con
));
3003 ret
= verify_control_crc(con
);
3005 con
->error_msg
= "integrity error, bad crc";
3009 if (con
->state
== CEPH_CON_S_V2_AUTH
) {
3010 buf
= alloc_conn_buf(con
, ctrl_len
);
3014 memcpy(buf
, con
->v2
.in_kvecs
[0].iov_base
, ctrl_len
);
3015 return __handle_control(con
, buf
);
3018 return __handle_control(con
, con
->v2
.in_kvecs
[0].iov_base
);
3021 static int handle_control_remainder(struct ceph_connection
*con
)
3025 WARN_ON(!con_secure(con
));
3027 ret
= decrypt_control_remainder(con
);
3029 if (ret
== -EBADMSG
)
3030 con
->error_msg
= "integrity error, bad control remainder auth tag";
3034 return __handle_control(con
, con
->v2
.in_kvecs
[0].iov_base
-
3035 CEPH_PREAMBLE_INLINE_LEN
);
3038 static int handle_epilogue(struct ceph_connection
*con
)
3040 u32 front_crc
, middle_crc
, data_crc
;
3043 if (con_secure(con
)) {
3044 ret
= decrypt_tail(con
);
3046 if (ret
== -EBADMSG
)
3047 con
->error_msg
= "integrity error, bad epilogue auth tag";
3051 /* just late_status */
3052 ret
= decode_epilogue(con
->v2
.in_buf
, NULL
, NULL
, NULL
);
3054 con
->error_msg
= "protocol error, bad epilogue";
3058 ret
= decode_epilogue(con
->v2
.in_buf
, &front_crc
,
3059 &middle_crc
, &data_crc
);
3061 con
->error_msg
= "protocol error, bad epilogue";
3065 ret
= verify_epilogue_crcs(con
, front_crc
, middle_crc
,
3068 con
->error_msg
= "integrity error, bad crc";
3073 return process_message(con
);
3076 static void finish_skip(struct ceph_connection
*con
)
3078 dout("%s con %p\n", __func__
, con
);
3080 if (con_secure(con
))
3081 gcm_inc_nonce(&con
->v2
.in_gcm_nonce
);
3086 static int populate_in_iter(struct ceph_connection
*con
)
3090 dout("%s con %p state %d in_state %d\n", __func__
, con
, con
->state
,
3092 WARN_ON(iov_iter_count(&con
->v2
.in_iter
));
3094 if (con
->state
== CEPH_CON_S_V2_BANNER_PREFIX
) {
3095 ret
= process_banner_prefix(con
);
3096 } else if (con
->state
== CEPH_CON_S_V2_BANNER_PAYLOAD
) {
3097 ret
= process_banner_payload(con
);
3098 } else if ((con
->state
>= CEPH_CON_S_V2_HELLO
&&
3099 con
->state
<= CEPH_CON_S_V2_SESSION_RECONNECT
) ||
3100 con
->state
== CEPH_CON_S_OPEN
) {
3101 switch (con
->v2
.in_state
) {
3102 case IN_S_HANDLE_PREAMBLE
:
3103 ret
= handle_preamble(con
);
3105 case IN_S_HANDLE_CONTROL
:
3106 ret
= handle_control(con
);
3108 case IN_S_HANDLE_CONTROL_REMAINDER
:
3109 ret
= handle_control_remainder(con
);
3111 case IN_S_PREPARE_READ_DATA
:
3112 ret
= prepare_read_data(con
);
3114 case IN_S_PREPARE_READ_DATA_CONT
:
3115 prepare_read_data_cont(con
);
3118 case IN_S_PREPARE_READ_ENC_PAGE
:
3119 prepare_read_enc_page(con
);
3122 case IN_S_PREPARE_SPARSE_DATA
:
3123 ret
= prepare_sparse_read_data(con
);
3125 case IN_S_PREPARE_SPARSE_DATA_CONT
:
3126 ret
= prepare_sparse_read_cont(con
);
3128 case IN_S_HANDLE_EPILOGUE
:
3129 ret
= handle_epilogue(con
);
3131 case IN_S_FINISH_SKIP
:
3136 WARN(1, "bad in_state %d", con
->v2
.in_state
);
3140 WARN(1, "bad state %d", con
->state
);
3144 dout("%s con %p error %d\n", __func__
, con
, ret
);
3148 if (WARN_ON(!iov_iter_count(&con
->v2
.in_iter
)))
3150 dout("%s con %p populated %zu\n", __func__
, con
,
3151 iov_iter_count(&con
->v2
.in_iter
));
3155 int ceph_con_v2_try_read(struct ceph_connection
*con
)
3159 dout("%s con %p state %d need %zu\n", __func__
, con
, con
->state
,
3160 iov_iter_count(&con
->v2
.in_iter
));
3162 if (con
->state
== CEPH_CON_S_PREOPEN
)
3166 * We should always have something pending here. If not,
3167 * avoid calling populate_in_iter() as if we read something
3168 * (ceph_tcp_recv() would immediately return 1).
3170 if (WARN_ON(!iov_iter_count(&con
->v2
.in_iter
)))
3174 ret
= ceph_tcp_recv(con
);
3178 ret
= populate_in_iter(con
);
3180 if (ret
&& ret
!= -EAGAIN
&& !con
->error_msg
)
3181 con
->error_msg
= "read processing error";
3187 static void queue_data(struct ceph_connection
*con
)
3191 con
->v2
.out_epil
.data_crc
= -1;
3192 ceph_msg_data_cursor_init(&con
->v2
.out_cursor
, con
->out_msg
,
3193 data_len(con
->out_msg
));
3195 get_bvec_at(&con
->v2
.out_cursor
, &bv
);
3196 set_out_bvec(con
, &bv
, true);
3197 con
->v2
.out_state
= OUT_S_QUEUE_DATA_CONT
;
3200 static void queue_data_cont(struct ceph_connection
*con
)
3204 con
->v2
.out_epil
.data_crc
= ceph_crc32c_page(
3205 con
->v2
.out_epil
.data_crc
, con
->v2
.out_bvec
.bv_page
,
3206 con
->v2
.out_bvec
.bv_offset
, con
->v2
.out_bvec
.bv_len
);
3208 ceph_msg_data_advance(&con
->v2
.out_cursor
, con
->v2
.out_bvec
.bv_len
);
3209 if (con
->v2
.out_cursor
.total_resid
) {
3210 get_bvec_at(&con
->v2
.out_cursor
, &bv
);
3211 set_out_bvec(con
, &bv
, true);
3212 WARN_ON(con
->v2
.out_state
!= OUT_S_QUEUE_DATA_CONT
);
3217 * We've written all data. Queue epilogue. Once it's written,
3220 reset_out_kvecs(con
);
3221 prepare_epilogue_plain(con
, false);
3222 con
->v2
.out_state
= OUT_S_FINISH_MESSAGE
;
3225 static void queue_enc_page(struct ceph_connection
*con
)
3229 dout("%s con %p i %d resid %d\n", __func__
, con
, con
->v2
.out_enc_i
,
3230 con
->v2
.out_enc_resid
);
3231 WARN_ON(!con
->v2
.out_enc_resid
);
3233 bvec_set_page(&bv
, con
->v2
.out_enc_pages
[con
->v2
.out_enc_i
],
3234 min(con
->v2
.out_enc_resid
, (int)PAGE_SIZE
), 0);
3236 set_out_bvec(con
, &bv
, false);
3237 con
->v2
.out_enc_i
++;
3238 con
->v2
.out_enc_resid
-= bv
.bv_len
;
3240 if (con
->v2
.out_enc_resid
) {
3241 WARN_ON(con
->v2
.out_state
!= OUT_S_QUEUE_ENC_PAGE
);
3246 * We've queued the last piece of ciphertext (ending with
3247 * epilogue) + auth tag. Once it's written, we are done.
3249 WARN_ON(con
->v2
.out_enc_i
!= con
->v2
.out_enc_page_cnt
);
3250 con
->v2
.out_state
= OUT_S_FINISH_MESSAGE
;
3253 static void queue_zeros(struct ceph_connection
*con
)
3255 dout("%s con %p out_zero %d\n", __func__
, con
, con
->v2
.out_zero
);
3257 if (con
->v2
.out_zero
) {
3258 set_out_bvec_zero(con
);
3259 con
->v2
.out_zero
-= con
->v2
.out_bvec
.bv_len
;
3260 con
->v2
.out_state
= OUT_S_QUEUE_ZEROS
;
3265 * We've zero-filled everything up to epilogue. Queue epilogue
3266 * with late_status set to ABORTED and crcs adjusted for zeros.
3267 * Once it's written, we are done patching up for the revoke.
3269 reset_out_kvecs(con
);
3270 prepare_epilogue_plain(con
, true);
3271 con
->v2
.out_state
= OUT_S_FINISH_MESSAGE
;
3274 static void finish_message(struct ceph_connection
*con
)
3276 dout("%s con %p msg %p\n", __func__
, con
, con
->out_msg
);
3278 /* we end up here both plain and secure modes */
3279 if (con
->v2
.out_enc_pages
) {
3280 WARN_ON(!con
->v2
.out_enc_page_cnt
);
3281 ceph_release_page_vector(con
->v2
.out_enc_pages
,
3282 con
->v2
.out_enc_page_cnt
);
3283 con
->v2
.out_enc_pages
= NULL
;
3284 con
->v2
.out_enc_page_cnt
= 0;
3286 /* message may have been revoked */
3288 ceph_msg_put(con
->out_msg
);
3289 con
->out_msg
= NULL
;
3292 con
->v2
.out_state
= OUT_S_GET_NEXT
;
3295 static int populate_out_iter(struct ceph_connection
*con
)
3299 dout("%s con %p state %d out_state %d\n", __func__
, con
, con
->state
,
3301 WARN_ON(iov_iter_count(&con
->v2
.out_iter
));
3303 if (con
->state
!= CEPH_CON_S_OPEN
) {
3304 WARN_ON(con
->state
< CEPH_CON_S_V2_BANNER_PREFIX
||
3305 con
->state
> CEPH_CON_S_V2_SESSION_RECONNECT
);
3306 goto nothing_pending
;
3309 switch (con
->v2
.out_state
) {
3310 case OUT_S_QUEUE_DATA
:
3311 WARN_ON(!con
->out_msg
);
3314 case OUT_S_QUEUE_DATA_CONT
:
3315 WARN_ON(!con
->out_msg
);
3316 queue_data_cont(con
);
3318 case OUT_S_QUEUE_ENC_PAGE
:
3319 queue_enc_page(con
);
3321 case OUT_S_QUEUE_ZEROS
:
3322 WARN_ON(con
->out_msg
); /* revoked */
3325 case OUT_S_FINISH_MESSAGE
:
3326 finish_message(con
);
3328 case OUT_S_GET_NEXT
:
3331 WARN(1, "bad out_state %d", con
->v2
.out_state
);
3335 WARN_ON(con
->v2
.out_state
!= OUT_S_GET_NEXT
);
3336 if (ceph_con_flag_test_and_clear(con
, CEPH_CON_F_KEEPALIVE_PENDING
)) {
3337 ret
= prepare_keepalive2(con
);
3339 pr_err("prepare_keepalive2 failed: %d\n", ret
);
3342 } else if (!list_empty(&con
->out_queue
)) {
3343 ceph_con_get_out_msg(con
);
3344 ret
= prepare_message(con
);
3346 pr_err("prepare_message failed: %d\n", ret
);
3349 } else if (con
->in_seq
> con
->in_seq_acked
) {
3350 ret
= prepare_ack(con
);
3352 pr_err("prepare_ack failed: %d\n", ret
);
3356 goto nothing_pending
;
3360 if (WARN_ON(!iov_iter_count(&con
->v2
.out_iter
)))
3362 dout("%s con %p populated %zu\n", __func__
, con
,
3363 iov_iter_count(&con
->v2
.out_iter
));
3367 WARN_ON(iov_iter_count(&con
->v2
.out_iter
));
3368 dout("%s con %p nothing pending\n", __func__
, con
);
3369 ceph_con_flag_clear(con
, CEPH_CON_F_WRITE_PENDING
);
3373 int ceph_con_v2_try_write(struct ceph_connection
*con
)
3377 dout("%s con %p state %d have %zu\n", __func__
, con
, con
->state
,
3378 iov_iter_count(&con
->v2
.out_iter
));
3380 /* open the socket first? */
3381 if (con
->state
== CEPH_CON_S_PREOPEN
) {
3382 WARN_ON(con
->peer_addr
.type
!= CEPH_ENTITY_ADDR_TYPE_MSGR2
);
3385 * Always bump global_seq. Bump connect_seq only if
3386 * there is a session (i.e. we are reconnecting and will
3387 * send session_reconnect instead of client_ident).
3389 con
->v2
.global_seq
= ceph_get_global_seq(con
->msgr
, 0);
3390 if (con
->v2
.server_cookie
)
3391 con
->v2
.connect_seq
++;
3393 ret
= prepare_read_banner_prefix(con
);
3395 pr_err("prepare_read_banner_prefix failed: %d\n", ret
);
3396 con
->error_msg
= "connect error";
3400 reset_out_kvecs(con
);
3401 ret
= prepare_banner(con
);
3403 pr_err("prepare_banner failed: %d\n", ret
);
3404 con
->error_msg
= "connect error";
3408 ret
= ceph_tcp_connect(con
);
3410 pr_err("ceph_tcp_connect failed: %d\n", ret
);
3411 con
->error_msg
= "connect error";
3416 if (!iov_iter_count(&con
->v2
.out_iter
)) {
3417 ret
= populate_out_iter(con
);
3419 if (ret
&& ret
!= -EAGAIN
&& !con
->error_msg
)
3420 con
->error_msg
= "write processing error";
3425 tcp_sock_set_cork(con
->sock
->sk
, true);
3427 ret
= ceph_tcp_send(con
);
3431 ret
= populate_out_iter(con
);
3433 if (ret
&& ret
!= -EAGAIN
&& !con
->error_msg
)
3434 con
->error_msg
= "write processing error";
3439 tcp_sock_set_cork(con
->sock
->sk
, false);
3443 static u32
crc32c_zeros(u32 crc
, int zero_len
)
3448 len
= min(zero_len
, (int)PAGE_SIZE
);
3449 crc
= crc32c(crc
, page_address(ceph_zero_page
), len
);
3456 static void prepare_zero_front(struct ceph_connection
*con
, int resid
)
3460 WARN_ON(!resid
|| resid
> front_len(con
->out_msg
));
3461 sent
= front_len(con
->out_msg
) - resid
;
3462 dout("%s con %p sent %d resid %d\n", __func__
, con
, sent
, resid
);
3465 con
->v2
.out_epil
.front_crc
=
3466 crc32c(-1, con
->out_msg
->front
.iov_base
, sent
);
3467 con
->v2
.out_epil
.front_crc
=
3468 crc32c_zeros(con
->v2
.out_epil
.front_crc
, resid
);
3470 con
->v2
.out_epil
.front_crc
= crc32c_zeros(-1, resid
);
3473 con
->v2
.out_iter
.count
-= resid
;
3474 out_zero_add(con
, resid
);
3477 static void prepare_zero_middle(struct ceph_connection
*con
, int resid
)
3481 WARN_ON(!resid
|| resid
> middle_len(con
->out_msg
));
3482 sent
= middle_len(con
->out_msg
) - resid
;
3483 dout("%s con %p sent %d resid %d\n", __func__
, con
, sent
, resid
);
3486 con
->v2
.out_epil
.middle_crc
=
3487 crc32c(-1, con
->out_msg
->middle
->vec
.iov_base
, sent
);
3488 con
->v2
.out_epil
.middle_crc
=
3489 crc32c_zeros(con
->v2
.out_epil
.middle_crc
, resid
);
3491 con
->v2
.out_epil
.middle_crc
= crc32c_zeros(-1, resid
);
3494 con
->v2
.out_iter
.count
-= resid
;
3495 out_zero_add(con
, resid
);
3498 static void prepare_zero_data(struct ceph_connection
*con
)
3500 dout("%s con %p\n", __func__
, con
);
3501 con
->v2
.out_epil
.data_crc
= crc32c_zeros(-1, data_len(con
->out_msg
));
3502 out_zero_add(con
, data_len(con
->out_msg
));
3505 static void revoke_at_queue_data(struct ceph_connection
*con
)
3510 WARN_ON(!data_len(con
->out_msg
));
3511 WARN_ON(!iov_iter_is_kvec(&con
->v2
.out_iter
));
3512 resid
= iov_iter_count(&con
->v2
.out_iter
);
3514 boundary
= front_len(con
->out_msg
) + middle_len(con
->out_msg
);
3515 if (resid
> boundary
) {
3517 WARN_ON(resid
> MESSAGE_HEAD_PLAIN_LEN
);
3518 dout("%s con %p was sending head\n", __func__
, con
);
3519 if (front_len(con
->out_msg
))
3520 prepare_zero_front(con
, front_len(con
->out_msg
));
3521 if (middle_len(con
->out_msg
))
3522 prepare_zero_middle(con
, middle_len(con
->out_msg
));
3523 prepare_zero_data(con
);
3524 WARN_ON(iov_iter_count(&con
->v2
.out_iter
) != resid
);
3525 con
->v2
.out_state
= OUT_S_QUEUE_ZEROS
;
3529 boundary
= middle_len(con
->out_msg
);
3530 if (resid
> boundary
) {
3532 dout("%s con %p was sending front\n", __func__
, con
);
3533 prepare_zero_front(con
, resid
);
3534 if (middle_len(con
->out_msg
))
3535 prepare_zero_middle(con
, middle_len(con
->out_msg
));
3536 prepare_zero_data(con
);
3542 dout("%s con %p was sending middle\n", __func__
, con
);
3543 prepare_zero_middle(con
, resid
);
3544 prepare_zero_data(con
);
3548 static void revoke_at_queue_data_cont(struct ceph_connection
*con
)
3550 int sent
, resid
; /* current piece of data */
3552 WARN_ON(!data_len(con
->out_msg
));
3553 WARN_ON(!iov_iter_is_bvec(&con
->v2
.out_iter
));
3554 resid
= iov_iter_count(&con
->v2
.out_iter
);
3555 WARN_ON(!resid
|| resid
> con
->v2
.out_bvec
.bv_len
);
3556 sent
= con
->v2
.out_bvec
.bv_len
- resid
;
3557 dout("%s con %p sent %d resid %d\n", __func__
, con
, sent
, resid
);
3560 con
->v2
.out_epil
.data_crc
= ceph_crc32c_page(
3561 con
->v2
.out_epil
.data_crc
, con
->v2
.out_bvec
.bv_page
,
3562 con
->v2
.out_bvec
.bv_offset
, sent
);
3563 ceph_msg_data_advance(&con
->v2
.out_cursor
, sent
);
3565 WARN_ON(resid
> con
->v2
.out_cursor
.total_resid
);
3566 con
->v2
.out_epil
.data_crc
= crc32c_zeros(con
->v2
.out_epil
.data_crc
,
3567 con
->v2
.out_cursor
.total_resid
);
3569 con
->v2
.out_iter
.count
-= resid
;
3570 out_zero_add(con
, con
->v2
.out_cursor
.total_resid
);
3574 static void revoke_at_finish_message(struct ceph_connection
*con
)
3579 WARN_ON(!iov_iter_is_kvec(&con
->v2
.out_iter
));
3580 resid
= iov_iter_count(&con
->v2
.out_iter
);
3582 if (!front_len(con
->out_msg
) && !middle_len(con
->out_msg
) &&
3583 !data_len(con
->out_msg
)) {
3584 WARN_ON(!resid
|| resid
> MESSAGE_HEAD_PLAIN_LEN
);
3585 dout("%s con %p was sending head (empty message) - noop\n",
3590 boundary
= front_len(con
->out_msg
) + middle_len(con
->out_msg
) +
3591 CEPH_EPILOGUE_PLAIN_LEN
;
3592 if (resid
> boundary
) {
3594 WARN_ON(resid
> MESSAGE_HEAD_PLAIN_LEN
);
3595 dout("%s con %p was sending head\n", __func__
, con
);
3596 if (front_len(con
->out_msg
))
3597 prepare_zero_front(con
, front_len(con
->out_msg
));
3598 if (middle_len(con
->out_msg
))
3599 prepare_zero_middle(con
, middle_len(con
->out_msg
));
3600 con
->v2
.out_iter
.count
-= CEPH_EPILOGUE_PLAIN_LEN
;
3601 WARN_ON(iov_iter_count(&con
->v2
.out_iter
) != resid
);
3602 con
->v2
.out_state
= OUT_S_QUEUE_ZEROS
;
3606 boundary
= middle_len(con
->out_msg
) + CEPH_EPILOGUE_PLAIN_LEN
;
3607 if (resid
> boundary
) {
3609 dout("%s con %p was sending front\n", __func__
, con
);
3610 prepare_zero_front(con
, resid
);
3611 if (middle_len(con
->out_msg
))
3612 prepare_zero_middle(con
, middle_len(con
->out_msg
));
3613 con
->v2
.out_iter
.count
-= CEPH_EPILOGUE_PLAIN_LEN
;
3618 boundary
= CEPH_EPILOGUE_PLAIN_LEN
;
3619 if (resid
> boundary
) {
3621 dout("%s con %p was sending middle\n", __func__
, con
);
3622 prepare_zero_middle(con
, resid
);
3623 con
->v2
.out_iter
.count
-= CEPH_EPILOGUE_PLAIN_LEN
;
3629 dout("%s con %p was sending epilogue - noop\n", __func__
, con
);
3632 void ceph_con_v2_revoke(struct ceph_connection
*con
)
3634 WARN_ON(con
->v2
.out_zero
);
3636 if (con_secure(con
)) {
3637 WARN_ON(con
->v2
.out_state
!= OUT_S_QUEUE_ENC_PAGE
&&
3638 con
->v2
.out_state
!= OUT_S_FINISH_MESSAGE
);
3639 dout("%s con %p secure - noop\n", __func__
, con
);
3643 switch (con
->v2
.out_state
) {
3644 case OUT_S_QUEUE_DATA
:
3645 revoke_at_queue_data(con
);
3647 case OUT_S_QUEUE_DATA_CONT
:
3648 revoke_at_queue_data_cont(con
);
3650 case OUT_S_FINISH_MESSAGE
:
3651 revoke_at_finish_message(con
);
3654 WARN(1, "bad out_state %d", con
->v2
.out_state
);
3659 static void revoke_at_prepare_read_data(struct ceph_connection
*con
)
3664 WARN_ON(con_secure(con
));
3665 WARN_ON(!data_len(con
->in_msg
));
3666 WARN_ON(!iov_iter_is_kvec(&con
->v2
.in_iter
));
3667 resid
= iov_iter_count(&con
->v2
.in_iter
);
3670 remaining
= data_len(con
->in_msg
) + CEPH_EPILOGUE_PLAIN_LEN
;
3671 dout("%s con %p resid %d remaining %d\n", __func__
, con
, resid
,
3673 con
->v2
.in_iter
.count
-= resid
;
3674 set_in_skip(con
, resid
+ remaining
);
3675 con
->v2
.in_state
= IN_S_FINISH_SKIP
;
3678 static void revoke_at_prepare_read_data_cont(struct ceph_connection
*con
)
3680 int recved
, resid
; /* current piece of data */
3683 WARN_ON(con_secure(con
));
3684 WARN_ON(!data_len(con
->in_msg
));
3685 WARN_ON(!iov_iter_is_bvec(&con
->v2
.in_iter
));
3686 resid
= iov_iter_count(&con
->v2
.in_iter
);
3687 WARN_ON(!resid
|| resid
> con
->v2
.in_bvec
.bv_len
);
3688 recved
= con
->v2
.in_bvec
.bv_len
- resid
;
3689 dout("%s con %p recved %d resid %d\n", __func__
, con
, recved
, resid
);
3692 ceph_msg_data_advance(&con
->v2
.in_cursor
, recved
);
3693 WARN_ON(resid
> con
->v2
.in_cursor
.total_resid
);
3695 remaining
= CEPH_EPILOGUE_PLAIN_LEN
;
3696 dout("%s con %p total_resid %zu remaining %d\n", __func__
, con
,
3697 con
->v2
.in_cursor
.total_resid
, remaining
);
3698 con
->v2
.in_iter
.count
-= resid
;
3699 set_in_skip(con
, con
->v2
.in_cursor
.total_resid
+ remaining
);
3700 con
->v2
.in_state
= IN_S_FINISH_SKIP
;
3703 static void revoke_at_prepare_read_enc_page(struct ceph_connection
*con
)
3705 int resid
; /* current enc page (not necessarily data) */
3707 WARN_ON(!con_secure(con
));
3708 WARN_ON(!iov_iter_is_bvec(&con
->v2
.in_iter
));
3709 resid
= iov_iter_count(&con
->v2
.in_iter
);
3710 WARN_ON(!resid
|| resid
> con
->v2
.in_bvec
.bv_len
);
3712 dout("%s con %p resid %d enc_resid %d\n", __func__
, con
, resid
,
3713 con
->v2
.in_enc_resid
);
3714 con
->v2
.in_iter
.count
-= resid
;
3715 set_in_skip(con
, resid
+ con
->v2
.in_enc_resid
);
3716 con
->v2
.in_state
= IN_S_FINISH_SKIP
;
3719 static void revoke_at_prepare_sparse_data(struct ceph_connection
*con
)
3721 int resid
; /* current piece of data */
3724 WARN_ON(con_secure(con
));
3725 WARN_ON(!data_len(con
->in_msg
));
3726 WARN_ON(!iov_iter_is_bvec(&con
->v2
.in_iter
));
3727 resid
= iov_iter_count(&con
->v2
.in_iter
);
3728 dout("%s con %p resid %d\n", __func__
, con
, resid
);
3730 remaining
= CEPH_EPILOGUE_PLAIN_LEN
+ con
->v2
.data_len_remain
;
3731 con
->v2
.in_iter
.count
-= resid
;
3732 set_in_skip(con
, resid
+ remaining
);
3733 con
->v2
.in_state
= IN_S_FINISH_SKIP
;
3736 static void revoke_at_handle_epilogue(struct ceph_connection
*con
)
3740 resid
= iov_iter_count(&con
->v2
.in_iter
);
3743 dout("%s con %p resid %d\n", __func__
, con
, resid
);
3744 con
->v2
.in_iter
.count
-= resid
;
3745 set_in_skip(con
, resid
);
3746 con
->v2
.in_state
= IN_S_FINISH_SKIP
;
3749 void ceph_con_v2_revoke_incoming(struct ceph_connection
*con
)
3751 switch (con
->v2
.in_state
) {
3752 case IN_S_PREPARE_SPARSE_DATA
:
3753 case IN_S_PREPARE_READ_DATA
:
3754 revoke_at_prepare_read_data(con
);
3756 case IN_S_PREPARE_READ_DATA_CONT
:
3757 revoke_at_prepare_read_data_cont(con
);
3759 case IN_S_PREPARE_READ_ENC_PAGE
:
3760 revoke_at_prepare_read_enc_page(con
);
3762 case IN_S_PREPARE_SPARSE_DATA_CONT
:
3763 revoke_at_prepare_sparse_data(con
);
3765 case IN_S_HANDLE_EPILOGUE
:
3766 revoke_at_handle_epilogue(con
);
3769 WARN(1, "bad in_state %d", con
->v2
.in_state
);
3774 bool ceph_con_v2_opened(struct ceph_connection
*con
)
3776 return con
->v2
.peer_global_seq
;
3779 void ceph_con_v2_reset_session(struct ceph_connection
*con
)
3781 con
->v2
.client_cookie
= 0;
3782 con
->v2
.server_cookie
= 0;
3783 con
->v2
.global_seq
= 0;
3784 con
->v2
.connect_seq
= 0;
3785 con
->v2
.peer_global_seq
= 0;
3788 void ceph_con_v2_reset_protocol(struct ceph_connection
*con
)
3790 iov_iter_truncate(&con
->v2
.in_iter
, 0);
3791 iov_iter_truncate(&con
->v2
.out_iter
, 0);
3792 con
->v2
.out_zero
= 0;
3794 clear_in_sign_kvecs(con
);
3795 clear_out_sign_kvecs(con
);
3796 free_conn_bufs(con
);
3798 if (con
->v2
.in_enc_pages
) {
3799 WARN_ON(!con
->v2
.in_enc_page_cnt
);
3800 ceph_release_page_vector(con
->v2
.in_enc_pages
,
3801 con
->v2
.in_enc_page_cnt
);
3802 con
->v2
.in_enc_pages
= NULL
;
3803 con
->v2
.in_enc_page_cnt
= 0;
3805 if (con
->v2
.out_enc_pages
) {
3806 WARN_ON(!con
->v2
.out_enc_page_cnt
);
3807 ceph_release_page_vector(con
->v2
.out_enc_pages
,
3808 con
->v2
.out_enc_page_cnt
);
3809 con
->v2
.out_enc_pages
= NULL
;
3810 con
->v2
.out_enc_page_cnt
= 0;
3813 con
->v2
.con_mode
= CEPH_CON_MODE_UNKNOWN
;
3814 memzero_explicit(&con
->v2
.in_gcm_nonce
, CEPH_GCM_IV_LEN
);
3815 memzero_explicit(&con
->v2
.out_gcm_nonce
, CEPH_GCM_IV_LEN
);
3817 if (con
->v2
.hmac_tfm
) {
3818 crypto_free_shash(con
->v2
.hmac_tfm
);
3819 con
->v2
.hmac_tfm
= NULL
;
3821 if (con
->v2
.gcm_req
) {
3822 aead_request_free(con
->v2
.gcm_req
);
3823 con
->v2
.gcm_req
= NULL
;
3825 if (con
->v2
.gcm_tfm
) {
3826 crypto_free_aead(con
->v2
.gcm_tfm
);
3827 con
->v2
.gcm_tfm
= NULL
;