1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __FS_CEPH_MESSENGER_H
3 #define __FS_CEPH_MESSENGER_H
5 #include <linux/bvec.h>
6 #include <linux/crypto.h>
7 #include <linux/kref.h>
8 #include <linux/mutex.h>
10 #include <linux/radix-tree.h>
11 #include <linux/uio.h>
12 #include <linux/workqueue.h>
13 #include <net/net_namespace.h>
15 #include <linux/ceph/types.h>
16 #include <linux/ceph/buffer.h>
19 struct ceph_connection
;
20 struct ceph_msg_data_cursor
;
23 * Ceph defines these callbacks for handling connection events.
25 struct ceph_connection_operations
{
26 struct ceph_connection
*(*get
)(struct ceph_connection
*);
27 void (*put
)(struct ceph_connection
*);
29 /* handle an incoming message. */
30 void (*dispatch
) (struct ceph_connection
*con
, struct ceph_msg
*m
);
32 /* authorize an outgoing connection */
33 struct ceph_auth_handshake
*(*get_authorizer
) (
34 struct ceph_connection
*con
,
35 int *proto
, int force_new
);
36 int (*add_authorizer_challenge
)(struct ceph_connection
*con
,
38 int challenge_buf_len
);
39 int (*verify_authorizer_reply
) (struct ceph_connection
*con
);
40 int (*invalidate_authorizer
)(struct ceph_connection
*con
);
42 /* there was some error on the socket (disconnect, whatever) */
43 void (*fault
) (struct ceph_connection
*con
);
45 /* a remote host as terminated a message exchange session, and messages
46 * we sent (or they tried to send us) may be lost. */
47 void (*peer_reset
) (struct ceph_connection
*con
);
49 struct ceph_msg
* (*alloc_msg
) (struct ceph_connection
*con
,
50 struct ceph_msg_header
*hdr
,
53 void (*reencode_message
) (struct ceph_msg
*msg
);
55 int (*sign_message
) (struct ceph_msg
*msg
);
56 int (*check_message_signature
) (struct ceph_msg
*msg
);
58 /* msgr2 authentication exchange */
59 int (*get_auth_request
)(struct ceph_connection
*con
,
60 void *buf
, int *buf_len
,
61 void **authorizer
, int *authorizer_len
);
62 int (*handle_auth_reply_more
)(struct ceph_connection
*con
,
63 void *reply
, int reply_len
,
64 void *buf
, int *buf_len
,
65 void **authorizer
, int *authorizer_len
);
66 int (*handle_auth_done
)(struct ceph_connection
*con
,
67 u64 global_id
, void *reply
, int reply_len
,
68 u8
*session_key
, int *session_key_len
,
69 u8
*con_secret
, int *con_secret_len
);
70 int (*handle_auth_bad_method
)(struct ceph_connection
*con
,
71 int used_proto
, int result
,
72 const int *allowed_protos
, int proto_cnt
,
73 const int *allowed_modes
, int mode_cnt
);
76 * sparse_read: read sparse data
77 * @con: connection we're reading from
78 * @cursor: data cursor for reading extents
79 * @buf: optional buffer to read into
81 * This should be called more than once, each time setting up to
82 * receive an extent into the current cursor position, and zeroing
83 * the holes between them.
85 * Returns amount of data to be read (in bytes), 0 if reading is
86 * complete, or -errno if there was an error.
88 * If @buf is set on a >0 return, then the data should be read into
89 * the provided buffer. Otherwise, it should be read into the cursor.
91 * The sparse read operation is expected to initialize the cursor
92 * with a length covering up to the end of the last extent.
94 int (*sparse_read
)(struct ceph_connection
*con
,
95 struct ceph_msg_data_cursor
*cursor
,
100 /* use format string %s%lld */
101 #define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
103 struct ceph_messenger
{
104 struct ceph_entity_inst inst
; /* my name+address */
105 struct ceph_entity_addr my_enc_addr
;
111 * the global_seq counts connections i (attempt to) initiate
112 * in order to disambiguate certain connect race conditions.
115 spinlock_t global_seq_lock
;
118 enum ceph_msg_data_type
{
119 CEPH_MSG_DATA_NONE
, /* message contains no data payload */
120 CEPH_MSG_DATA_PAGES
, /* data source/destination is a page array */
121 CEPH_MSG_DATA_PAGELIST
, /* data source/destination is a pagelist */
123 CEPH_MSG_DATA_BIO
, /* data source/destination is a bio list */
124 #endif /* CONFIG_BLOCK */
125 CEPH_MSG_DATA_BVECS
, /* data source/destination is a bio_vec array */
126 CEPH_MSG_DATA_ITER
, /* data source/destination is an iov_iter */
131 struct ceph_bio_iter
{
133 struct bvec_iter iter
;
136 #define __ceph_bio_iter_advance_step(it, n, STEP) do { \
137 unsigned int __n = (n), __cur_n; \
140 BUG_ON(!(it)->iter.bi_size); \
141 __cur_n = min((it)->iter.bi_size, __n); \
143 bio_advance_iter((it)->bio, &(it)->iter, __cur_n); \
144 if (!(it)->iter.bi_size && (it)->bio->bi_next) { \
145 dout("__ceph_bio_iter_advance_step next bio\n"); \
146 (it)->bio = (it)->bio->bi_next; \
147 (it)->iter = (it)->bio->bi_iter; \
154 * Advance @it by @n bytes.
156 #define ceph_bio_iter_advance(it, n) \
157 __ceph_bio_iter_advance_step(it, n, 0)
160 * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
162 #define ceph_bio_iter_advance_step(it, n, BVEC_STEP) \
163 __ceph_bio_iter_advance_step(it, n, ({ \
165 struct bvec_iter __cur_iter; \
167 __cur_iter = (it)->iter; \
168 __cur_iter.bi_size = __cur_n; \
169 __bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \
173 #endif /* CONFIG_BLOCK */
175 struct ceph_bvec_iter
{
176 struct bio_vec
*bvecs
;
177 struct bvec_iter iter
;
180 #define __ceph_bvec_iter_advance_step(it, n, STEP) do { \
181 BUG_ON((n) > (it)->iter.bi_size); \
183 bvec_iter_advance((it)->bvecs, &(it)->iter, (n)); \
187 * Advance @it by @n bytes.
189 #define ceph_bvec_iter_advance(it, n) \
190 __ceph_bvec_iter_advance_step(it, n, 0)
193 * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
195 #define ceph_bvec_iter_advance_step(it, n, BVEC_STEP) \
196 __ceph_bvec_iter_advance_step(it, n, ({ \
198 struct bvec_iter __cur_iter; \
200 __cur_iter = (it)->iter; \
201 __cur_iter.bi_size = (n); \
202 for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter) \
206 #define ceph_bvec_iter_shorten(it, n) do { \
207 BUG_ON((n) > (it)->iter.bi_size); \
208 (it)->iter.bi_size = (n); \
211 struct ceph_msg_data
{
212 enum ceph_msg_data_type type
;
216 struct ceph_bio_iter bio_pos
;
219 #endif /* CONFIG_BLOCK */
220 struct ceph_bvec_iter bvec_pos
;
223 size_t length
; /* total # bytes */
224 unsigned int alignment
; /* first page */
227 struct ceph_pagelist
*pagelist
;
228 struct iov_iter iter
;
232 struct ceph_msg_data_cursor
{
233 size_t total_resid
; /* across all data items */
235 struct ceph_msg_data
*data
; /* current data item */
236 size_t resid
; /* bytes not yet consumed */
237 int sr_resid
; /* residual sparse_read len */
238 bool need_crc
; /* crc update needed */
241 struct ceph_bio_iter bio_iter
;
242 #endif /* CONFIG_BLOCK */
243 struct bvec_iter bvec_iter
;
245 unsigned int page_offset
; /* offset in page */
246 unsigned short page_index
; /* index in array */
247 unsigned short page_count
; /* pages in array */
249 struct { /* pagelist */
250 struct page
*page
; /* page from list */
251 size_t offset
; /* bytes from list */
254 struct iov_iter iov_iter
;
255 unsigned int lastlen
;
261 * a single message. it contains a header (src, dest, message type, etc.),
262 * footer (crc values, mainly), a "front" message body, and possibly a
263 * data payload (stored in some number of pages).
266 struct ceph_msg_header hdr
; /* header */
268 struct ceph_msg_footer footer
; /* footer */
269 struct ceph_msg_footer_old old_footer
; /* old format footer */
271 struct kvec front
; /* unaligned blobs of message */
272 struct ceph_buffer
*middle
;
275 struct ceph_msg_data
*data
;
278 struct ceph_msg_data_cursor cursor
;
280 struct ceph_connection
*con
;
281 struct list_head list_head
; /* links for connection lists */
286 u64 sparse_read_total
;
289 struct ceph_msgpool
*pool
;
295 #define CEPH_CON_S_CLOSED 1
296 #define CEPH_CON_S_PREOPEN 2
297 #define CEPH_CON_S_V1_BANNER 3
298 #define CEPH_CON_S_V1_CONNECT_MSG 4
299 #define CEPH_CON_S_V2_BANNER_PREFIX 5
300 #define CEPH_CON_S_V2_BANNER_PAYLOAD 6
301 #define CEPH_CON_S_V2_HELLO 7
302 #define CEPH_CON_S_V2_AUTH 8
303 #define CEPH_CON_S_V2_AUTH_SIGNATURE 9
304 #define CEPH_CON_S_V2_SESSION_CONNECT 10
305 #define CEPH_CON_S_V2_SESSION_RECONNECT 11
306 #define CEPH_CON_S_OPEN 12
307 #define CEPH_CON_S_STANDBY 13
310 * ceph_connection flag bits
312 #define CEPH_CON_F_LOSSYTX 0 /* we can close channel or drop
313 messages on errors */
314 #define CEPH_CON_F_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
315 #define CEPH_CON_F_WRITE_PENDING 2 /* we have data ready to send */
316 #define CEPH_CON_F_SOCK_CLOSED 3 /* socket state changed to closed */
317 #define CEPH_CON_F_BACKOFF 4 /* need to retry queuing delayed
320 /* ceph connection fault delay defaults, for exponential backoff */
321 #define BASE_DELAY_INTERVAL (HZ / 4)
322 #define MAX_DELAY_INTERVAL (15 * HZ)
324 struct ceph_connection_v1_info
{
325 struct kvec out_kvec
[8], /* sending header/footer data */
327 int out_kvec_left
; /* kvec's left in out_kvec */
328 int out_skip
; /* skip this many bytes */
329 int out_kvec_bytes
; /* total bytes left */
330 bool out_more
; /* there is more data after the kvecs */
333 struct ceph_auth_handshake
*auth
;
334 int auth_retry
; /* true if we need a newer authorizer */
336 /* connection negotiation temps */
337 u8 in_banner
[CEPH_BANNER_MAX_LEN
];
338 struct ceph_entity_addr actual_peer_addr
;
339 struct ceph_entity_addr peer_addr_for_me
;
340 struct ceph_msg_connect out_connect
;
341 struct ceph_msg_connect_reply in_reply
;
343 int in_base_pos
; /* bytes read */
346 struct kvec in_sr_kvec
; /* current location to receive into */
347 u64 in_sr_len
; /* amount of data in this extent */
349 /* message in temps */
350 u8 in_tag
; /* protocol control byte */
351 struct ceph_msg_header in_hdr
;
352 __le64 in_temp_ack
; /* for reading an ack */
354 /* message out temps */
355 struct ceph_msg_header out_hdr
;
356 __le64 out_temp_ack
; /* for writing an ack */
357 struct ceph_timespec out_temp_keepalive2
; /* for writing keepalive2
360 u32 connect_seq
; /* identify the most recent connection
361 attempt for this session */
362 u32 peer_global_seq
; /* peer's global seq for this connection */
365 #define CEPH_CRC_LEN 4
366 #define CEPH_GCM_KEY_LEN 16
367 #define CEPH_GCM_IV_LEN sizeof(struct ceph_gcm_nonce)
368 #define CEPH_GCM_BLOCK_LEN 16
369 #define CEPH_GCM_TAG_LEN 16
371 #define CEPH_PREAMBLE_LEN 32
372 #define CEPH_PREAMBLE_INLINE_LEN 48
373 #define CEPH_PREAMBLE_PLAIN_LEN CEPH_PREAMBLE_LEN
374 #define CEPH_PREAMBLE_SECURE_LEN (CEPH_PREAMBLE_LEN + \
375 CEPH_PREAMBLE_INLINE_LEN + \
377 #define CEPH_EPILOGUE_PLAIN_LEN (1 + 3 * CEPH_CRC_LEN)
378 #define CEPH_EPILOGUE_SECURE_LEN (CEPH_GCM_BLOCK_LEN + CEPH_GCM_TAG_LEN)
380 #define CEPH_FRAME_MAX_SEGMENT_COUNT 4
382 struct ceph_frame_desc
{
383 int fd_tag
; /* FRAME_TAG_* */
385 int fd_lens
[CEPH_FRAME_MAX_SEGMENT_COUNT
]; /* logical */
386 int fd_aligns
[CEPH_FRAME_MAX_SEGMENT_COUNT
];
389 struct ceph_gcm_nonce
{
391 __le64 counter __packed
;
394 struct ceph_connection_v2_info
{
395 struct iov_iter in_iter
;
396 struct kvec in_kvecs
[5]; /* recvmsg */
397 struct bio_vec in_bvec
; /* recvmsg (in_cursor) */
399 int in_state
; /* IN_S_* */
401 struct iov_iter out_iter
;
402 struct kvec out_kvecs
[8]; /* sendmsg */
403 struct bio_vec out_bvec
; /* sendpage (out_cursor, out_zero),
404 sendmsg (out_enc_pages) */
406 int out_state
; /* OUT_S_* */
408 int out_zero
; /* # of zero bytes to send */
409 bool out_iter_sendpage
; /* use sendpage if possible */
411 struct ceph_frame_desc in_desc
;
412 struct ceph_msg_data_cursor in_cursor
;
413 struct ceph_msg_data_cursor out_cursor
;
415 struct crypto_shash
*hmac_tfm
; /* post-auth signature */
416 struct crypto_aead
*gcm_tfm
; /* on-wire encryption */
417 struct aead_request
*gcm_req
;
418 struct crypto_wait gcm_wait
;
419 struct ceph_gcm_nonce in_gcm_nonce
;
420 struct ceph_gcm_nonce out_gcm_nonce
;
422 struct page
**in_enc_pages
;
426 struct page
**out_enc_pages
;
427 int out_enc_page_cnt
;
431 int con_mode
; /* CEPH_CON_MODE_* */
437 struct kvec in_sign_kvecs
[8];
438 struct kvec out_sign_kvecs
[8];
439 int in_sign_kvec_cnt
;
440 int out_sign_kvec_cnt
;
448 u8 in_buf
[CEPH_PREAMBLE_SECURE_LEN
];
449 u8 out_buf
[CEPH_PREAMBLE_SECURE_LEN
];
451 u8 late_status
; /* FRAME_LATE_STATUS_* */
458 u8 pad
[CEPH_GCM_BLOCK_LEN
- 1];
464 * A single connection with another host.
466 * We maintain a queue of outgoing messages, and some session state to
467 * ensure that we can preserve the lossless, ordered delivery of
468 * messages in the case of a TCP disconnect.
470 struct ceph_connection
{
473 const struct ceph_connection_operations
*ops
;
475 struct ceph_messenger
*msgr
;
477 int state
; /* CEPH_CON_S_* */
481 unsigned long flags
; /* CEPH_CON_F_* */
482 const char *error_msg
; /* error message, if any */
484 struct ceph_entity_name peer_name
; /* peer name */
485 struct ceph_entity_addr peer_addr
; /* peer address */
491 struct list_head out_queue
;
492 struct list_head out_sent
; /* sending or sent but unacked */
493 u64 out_seq
; /* last message queued for send */
495 u64 in_seq
, in_seq_acked
; /* last message received, acked */
497 struct ceph_msg
*in_msg
;
498 struct ceph_msg
*out_msg
; /* sending message (== tail of
501 struct page
*bounce_page
;
502 u32 in_front_crc
, in_middle_crc
, in_data_crc
; /* calculated crc */
504 struct timespec64 last_keepalive_ack
; /* keepalive2 ack stamp */
506 struct delayed_work work
; /* send|recv work */
507 unsigned long delay
; /* current delay interval */
510 struct ceph_connection_v1_info v1
;
511 struct ceph_connection_v2_info v2
;
515 extern struct page
*ceph_zero_page
;
517 void ceph_con_flag_clear(struct ceph_connection
*con
, unsigned long con_flag
);
518 void ceph_con_flag_set(struct ceph_connection
*con
, unsigned long con_flag
);
519 bool ceph_con_flag_test(struct ceph_connection
*con
, unsigned long con_flag
);
520 bool ceph_con_flag_test_and_clear(struct ceph_connection
*con
,
521 unsigned long con_flag
);
522 bool ceph_con_flag_test_and_set(struct ceph_connection
*con
,
523 unsigned long con_flag
);
525 void ceph_encode_my_addr(struct ceph_messenger
*msgr
);
527 int ceph_tcp_connect(struct ceph_connection
*con
);
528 int ceph_con_close_socket(struct ceph_connection
*con
);
529 void ceph_con_reset_session(struct ceph_connection
*con
);
531 u32
ceph_get_global_seq(struct ceph_messenger
*msgr
, u32 gt
);
532 void ceph_con_discard_sent(struct ceph_connection
*con
, u64 ack_seq
);
533 void ceph_con_discard_requeued(struct ceph_connection
*con
, u64 reconnect_seq
);
535 void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor
*cursor
,
536 struct ceph_msg
*msg
, size_t length
);
537 struct page
*ceph_msg_data_next(struct ceph_msg_data_cursor
*cursor
,
538 size_t *page_offset
, size_t *length
);
539 void ceph_msg_data_advance(struct ceph_msg_data_cursor
*cursor
, size_t bytes
);
541 u32
ceph_crc32c_page(u32 crc
, struct page
*page
, unsigned int page_offset
,
542 unsigned int length
);
544 bool ceph_addr_is_blank(const struct ceph_entity_addr
*addr
);
545 int ceph_addr_port(const struct ceph_entity_addr
*addr
);
546 void ceph_addr_set_port(struct ceph_entity_addr
*addr
, int p
);
548 void ceph_con_process_message(struct ceph_connection
*con
);
549 int ceph_con_in_msg_alloc(struct ceph_connection
*con
,
550 struct ceph_msg_header
*hdr
, int *skip
);
551 void ceph_con_get_out_msg(struct ceph_connection
*con
);
554 int ceph_con_v1_try_read(struct ceph_connection
*con
);
555 int ceph_con_v1_try_write(struct ceph_connection
*con
);
556 void ceph_con_v1_revoke(struct ceph_connection
*con
);
557 void ceph_con_v1_revoke_incoming(struct ceph_connection
*con
);
558 bool ceph_con_v1_opened(struct ceph_connection
*con
);
559 void ceph_con_v1_reset_session(struct ceph_connection
*con
);
560 void ceph_con_v1_reset_protocol(struct ceph_connection
*con
);
563 int ceph_con_v2_try_read(struct ceph_connection
*con
);
564 int ceph_con_v2_try_write(struct ceph_connection
*con
);
565 void ceph_con_v2_revoke(struct ceph_connection
*con
);
566 void ceph_con_v2_revoke_incoming(struct ceph_connection
*con
);
567 bool ceph_con_v2_opened(struct ceph_connection
*con
);
568 void ceph_con_v2_reset_session(struct ceph_connection
*con
);
569 void ceph_con_v2_reset_protocol(struct ceph_connection
*con
);
572 extern const char *ceph_pr_addr(const struct ceph_entity_addr
*addr
);
574 extern int ceph_parse_ips(const char *c
, const char *end
,
575 struct ceph_entity_addr
*addr
,
576 int max_count
, int *count
, char delim
);
578 extern int ceph_msgr_init(void);
579 extern void ceph_msgr_exit(void);
580 extern void ceph_msgr_flush(void);
582 extern void ceph_messenger_init(struct ceph_messenger
*msgr
,
583 struct ceph_entity_addr
*myaddr
);
584 extern void ceph_messenger_fini(struct ceph_messenger
*msgr
);
585 extern void ceph_messenger_reset_nonce(struct ceph_messenger
*msgr
);
587 extern void ceph_con_init(struct ceph_connection
*con
, void *private,
588 const struct ceph_connection_operations
*ops
,
589 struct ceph_messenger
*msgr
);
590 extern void ceph_con_open(struct ceph_connection
*con
,
591 __u8 entity_type
, __u64 entity_num
,
592 struct ceph_entity_addr
*addr
);
593 extern bool ceph_con_opened(struct ceph_connection
*con
);
594 extern void ceph_con_close(struct ceph_connection
*con
);
595 extern void ceph_con_send(struct ceph_connection
*con
, struct ceph_msg
*msg
);
597 extern void ceph_msg_revoke(struct ceph_msg
*msg
);
598 extern void ceph_msg_revoke_incoming(struct ceph_msg
*msg
);
600 extern void ceph_con_keepalive(struct ceph_connection
*con
);
601 extern bool ceph_con_keepalive_expired(struct ceph_connection
*con
,
602 unsigned long interval
);
604 void ceph_msg_data_add_pages(struct ceph_msg
*msg
, struct page
**pages
,
605 size_t length
, size_t alignment
, bool own_pages
);
606 extern void ceph_msg_data_add_pagelist(struct ceph_msg
*msg
,
607 struct ceph_pagelist
*pagelist
);
609 void ceph_msg_data_add_bio(struct ceph_msg
*msg
, struct ceph_bio_iter
*bio_pos
,
611 #endif /* CONFIG_BLOCK */
612 void ceph_msg_data_add_bvecs(struct ceph_msg
*msg
,
613 struct ceph_bvec_iter
*bvec_pos
);
614 void ceph_msg_data_add_iter(struct ceph_msg
*msg
,
615 struct iov_iter
*iter
);
617 struct ceph_msg
*ceph_msg_new2(int type
, int front_len
, int max_data_items
,
618 gfp_t flags
, bool can_fail
);
619 extern struct ceph_msg
*ceph_msg_new(int type
, int front_len
, gfp_t flags
,
622 extern struct ceph_msg
*ceph_msg_get(struct ceph_msg
*msg
);
623 extern void ceph_msg_put(struct ceph_msg
*msg
);
625 extern void ceph_msg_dump(struct ceph_msg
*msg
);