1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* RxRPC recvmsg() implementation
4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/net.h>
11 #include <linux/skbuff.h>
12 #include <linux/export.h>
13 #include <linux/sched/signal.h>
16 #include <net/af_rxrpc.h>
17 #include "ar-internal.h"
20 * Post a call for attention by the socket or kernel service. Further
21 * notifications are suppressed by putting recvmsg_link on a dummy queue.
23 void rxrpc_notify_socket(struct rxrpc_call
*call
)
25 struct rxrpc_sock
*rx
;
28 _enter("%d", call
->debug_id
);
30 if (!list_empty(&call
->recvmsg_link
))
35 rx
= rcu_dereference(call
->socket
);
37 if (rx
&& sk
->sk_state
< RXRPC_CLOSE
) {
38 if (call
->notify_rx
) {
39 spin_lock_bh(&call
->notify_lock
);
40 call
->notify_rx(sk
, call
, call
->user_call_ID
);
41 spin_unlock_bh(&call
->notify_lock
);
43 write_lock_bh(&rx
->recvmsg_lock
);
44 if (list_empty(&call
->recvmsg_link
)) {
45 rxrpc_get_call(call
, rxrpc_call_got
);
46 list_add_tail(&call
->recvmsg_link
, &rx
->recvmsg_q
);
48 write_unlock_bh(&rx
->recvmsg_lock
);
50 if (!sock_flag(sk
, SOCK_DEAD
)) {
51 _debug("call %ps", sk
->sk_data_ready
);
52 sk
->sk_data_ready(sk
);
62 * Pass a call terminating message to userspace.
64 static int rxrpc_recvmsg_term(struct rxrpc_call
*call
, struct msghdr
*msg
)
69 switch (call
->completion
) {
70 case RXRPC_CALL_SUCCEEDED
:
72 if (rxrpc_is_service_call(call
))
73 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_ACK
, 0, &tmp
);
75 case RXRPC_CALL_REMOTELY_ABORTED
:
76 tmp
= call
->abort_code
;
77 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_ABORT
, 4, &tmp
);
79 case RXRPC_CALL_LOCALLY_ABORTED
:
80 tmp
= call
->abort_code
;
81 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_ABORT
, 4, &tmp
);
83 case RXRPC_CALL_NETWORK_ERROR
:
85 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_NET_ERROR
, 4, &tmp
);
87 case RXRPC_CALL_LOCAL_ERROR
:
89 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_LOCAL_ERROR
, 4, &tmp
);
92 pr_err("Invalid terminal call state %u\n", call
->state
);
97 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_terminal
, call
->rx_hard_ack
,
98 call
->rx_pkt_offset
, call
->rx_pkt_len
, ret
);
103 * Pass back notification of a new call. The call is added to the
104 * to-be-accepted list. This means that the next call to be accepted might not
105 * be the last call seen awaiting acceptance, but unless we leave this on the
106 * front of the queue and block all other messages until someone gives us a
107 * user_ID for it, there's not a lot we can do.
109 static int rxrpc_recvmsg_new_call(struct rxrpc_sock
*rx
,
110 struct rxrpc_call
*call
,
111 struct msghdr
*msg
, int flags
)
115 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_NEW_CALL
, 0, &tmp
);
117 if (ret
== 0 && !(flags
& MSG_PEEK
)) {
118 _debug("to be accepted");
119 write_lock_bh(&rx
->recvmsg_lock
);
120 list_del_init(&call
->recvmsg_link
);
121 write_unlock_bh(&rx
->recvmsg_lock
);
123 rxrpc_get_call(call
, rxrpc_call_got
);
124 write_lock(&rx
->call_lock
);
125 list_add_tail(&call
->accept_link
, &rx
->to_be_accepted
);
126 write_unlock(&rx
->call_lock
);
129 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_to_be_accepted
, 1, 0, 0, ret
);
134 * End the packet reception phase.
136 static void rxrpc_end_rx_phase(struct rxrpc_call
*call
, rxrpc_serial_t serial
)
138 _enter("%d,%s", call
->debug_id
, rxrpc_call_states
[call
->state
]);
140 trace_rxrpc_receive(call
, rxrpc_receive_end
, 0, call
->rx_top
);
141 ASSERTCMP(call
->rx_hard_ack
, ==, call
->rx_top
);
143 if (call
->state
== RXRPC_CALL_CLIENT_RECV_REPLY
) {
144 rxrpc_propose_ACK(call
, RXRPC_ACK_IDLE
, serial
, false, true,
145 rxrpc_propose_ack_terminal_ack
);
146 //rxrpc_send_ack_packet(call, false, NULL);
149 write_lock_bh(&call
->state_lock
);
151 switch (call
->state
) {
152 case RXRPC_CALL_CLIENT_RECV_REPLY
:
153 __rxrpc_call_completed(call
);
154 write_unlock_bh(&call
->state_lock
);
157 case RXRPC_CALL_SERVER_RECV_REQUEST
:
158 call
->tx_phase
= true;
159 call
->state
= RXRPC_CALL_SERVER_ACK_REQUEST
;
160 call
->expect_req_by
= jiffies
+ MAX_JIFFY_OFFSET
;
161 write_unlock_bh(&call
->state_lock
);
162 rxrpc_propose_ACK(call
, RXRPC_ACK_DELAY
, serial
, false, true,
163 rxrpc_propose_ack_processing_op
);
166 write_unlock_bh(&call
->state_lock
);
172 * Discard a packet we've used up and advance the Rx window by one.
174 static void rxrpc_rotate_rx_window(struct rxrpc_call
*call
)
176 struct rxrpc_skb_priv
*sp
;
178 rxrpc_serial_t serial
;
179 rxrpc_seq_t hard_ack
, top
;
184 _enter("%d", call
->debug_id
);
186 hard_ack
= call
->rx_hard_ack
;
187 top
= smp_load_acquire(&call
->rx_top
);
188 ASSERT(before(hard_ack
, top
));
191 ix
= hard_ack
& RXRPC_RXTX_BUFF_MASK
;
192 skb
= call
->rxtx_buffer
[ix
];
193 rxrpc_see_skb(skb
, rxrpc_skb_rotated
);
196 subpacket
= call
->rxtx_annotations
[ix
] & RXRPC_RX_ANNO_SUBPACKET
;
197 serial
= sp
->hdr
.serial
+ subpacket
;
199 if (subpacket
== sp
->nr_subpackets
- 1 &&
200 sp
->rx_flags
& RXRPC_SKB_INCL_LAST
)
203 call
->rxtx_buffer
[ix
] = NULL
;
204 call
->rxtx_annotations
[ix
] = 0;
205 /* Barrier against rxrpc_input_data(). */
206 smp_store_release(&call
->rx_hard_ack
, hard_ack
);
208 rxrpc_free_skb(skb
, rxrpc_skb_freed
);
210 trace_rxrpc_receive(call
, rxrpc_receive_rotate
, serial
, hard_ack
);
212 rxrpc_end_rx_phase(call
, serial
);
214 /* Check to see if there's an ACK that needs sending. */
215 if (after_eq(hard_ack
, call
->ackr_consumed
+ 2) ||
216 after_eq(top
, call
->ackr_seen
+ 2) ||
217 (hard_ack
== top
&& after(hard_ack
, call
->ackr_consumed
)))
218 rxrpc_propose_ACK(call
, RXRPC_ACK_DELAY
, serial
,
220 rxrpc_propose_ack_rotate_rx
);
221 if (call
->ackr_reason
&& call
->ackr_reason
!= RXRPC_ACK_DELAY
)
222 rxrpc_send_ack_packet(call
, false, NULL
);
227 * Decrypt and verify a (sub)packet. The packet's length may be changed due to
228 * padding, but if this is the case, the packet length will be resident in the
229 * socket buffer. Note that we can't modify the master skb info as the skb may
230 * be the home to multiple subpackets.
232 static int rxrpc_verify_packet(struct rxrpc_call
*call
, struct sk_buff
*skb
,
234 unsigned int offset
, unsigned int len
)
236 struct rxrpc_skb_priv
*sp
= rxrpc_skb(skb
);
237 rxrpc_seq_t seq
= sp
->hdr
.seq
;
238 u16 cksum
= sp
->hdr
.cksum
;
239 u8 subpacket
= annotation
& RXRPC_RX_ANNO_SUBPACKET
;
243 /* For all but the head jumbo subpacket, the security checksum is in a
244 * jumbo header immediately prior to the data.
248 if (skb_copy_bits(skb
, offset
- 2, &tmp
, 2) < 0)
254 return call
->security
->verify_packet(call
, skb
, offset
, len
,
259 * Locate the data within a packet. This is complicated by:
261 * (1) An skb may contain a jumbo packet - so we have to find the appropriate
264 * (2) The (sub)packets may be encrypted and, if so, the encrypted portion
265 * contains an extra header which includes the true length of the data,
266 * excluding any encrypted padding.
268 static int rxrpc_locate_data(struct rxrpc_call
*call
, struct sk_buff
*skb
,
270 unsigned int *_offset
, unsigned int *_len
,
273 struct rxrpc_skb_priv
*sp
= rxrpc_skb(skb
);
274 unsigned int offset
= sizeof(struct rxrpc_wire_header
);
278 u8 annotation
= *_annotation
;
279 u8 subpacket
= annotation
& RXRPC_RX_ANNO_SUBPACKET
;
281 /* Locate the subpacket */
282 offset
+= subpacket
* RXRPC_JUMBO_SUBPKTLEN
;
283 len
= skb
->len
- offset
;
284 if (subpacket
< sp
->nr_subpackets
- 1)
285 len
= RXRPC_JUMBO_DATALEN
;
286 else if (sp
->rx_flags
& RXRPC_SKB_INCL_LAST
)
289 if (!(annotation
& RXRPC_RX_ANNO_VERIFIED
)) {
290 ret
= rxrpc_verify_packet(call
, skb
, annotation
, offset
, len
);
293 *_annotation
|= RXRPC_RX_ANNO_VERIFIED
;
299 call
->security
->locate_data(call
, skb
, _offset
, _len
);
304 * Deliver messages to a call. This keeps processing packets until the buffer
305 * is filled and we find either more DATA (returns 0) or the end of the DATA
306 * (returns 1). If more packets are required, it returns -EAGAIN.
308 static int rxrpc_recvmsg_data(struct socket
*sock
, struct rxrpc_call
*call
,
309 struct msghdr
*msg
, struct iov_iter
*iter
,
310 size_t len
, int flags
, size_t *_offset
)
312 struct rxrpc_skb_priv
*sp
;
314 rxrpc_serial_t serial
;
315 rxrpc_seq_t hard_ack
, top
, seq
;
318 unsigned int rx_pkt_offset
, rx_pkt_len
;
319 int ix
, copy
, ret
= -EAGAIN
, ret2
;
321 if (test_and_clear_bit(RXRPC_CALL_RX_UNDERRUN
, &call
->flags
) &&
323 rxrpc_send_ack_packet(call
, false, NULL
);
325 rx_pkt_offset
= call
->rx_pkt_offset
;
326 rx_pkt_len
= call
->rx_pkt_len
;
327 rx_pkt_last
= call
->rx_pkt_last
;
329 if (call
->state
>= RXRPC_CALL_SERVER_ACK_REQUEST
) {
330 seq
= call
->rx_hard_ack
;
335 /* Barriers against rxrpc_input_data(). */
336 hard_ack
= call
->rx_hard_ack
;
339 while (top
= smp_load_acquire(&call
->rx_top
),
342 ix
= seq
& RXRPC_RXTX_BUFF_MASK
;
343 skb
= call
->rxtx_buffer
[ix
];
345 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_hole
, seq
,
346 rx_pkt_offset
, rx_pkt_len
, 0);
350 rxrpc_see_skb(skb
, rxrpc_skb_seen
);
353 if (!(flags
& MSG_PEEK
)) {
354 serial
= sp
->hdr
.serial
;
355 serial
+= call
->rxtx_annotations
[ix
] & RXRPC_RX_ANNO_SUBPACKET
;
356 trace_rxrpc_receive(call
, rxrpc_receive_front
,
361 sock_recv_timestamp(msg
, sock
->sk
, skb
);
363 if (rx_pkt_offset
== 0) {
364 ret2
= rxrpc_locate_data(call
, skb
,
365 &call
->rxtx_annotations
[ix
],
366 &rx_pkt_offset
, &rx_pkt_len
,
368 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_next
, seq
,
369 rx_pkt_offset
, rx_pkt_len
, ret2
);
375 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_cont
, seq
,
376 rx_pkt_offset
, rx_pkt_len
, 0);
379 /* We have to handle short, empty and used-up DATA packets. */
380 remain
= len
- *_offset
;
385 ret2
= skb_copy_datagram_iter(skb
, rx_pkt_offset
, iter
,
392 /* handle piecemeal consumption of data packets */
393 rx_pkt_offset
+= copy
;
398 if (rx_pkt_len
> 0) {
399 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_full
, seq
,
400 rx_pkt_offset
, rx_pkt_len
, 0);
401 ASSERTCMP(*_offset
, ==, len
);
406 /* The whole packet has been transferred. */
407 if (!(flags
& MSG_PEEK
))
408 rxrpc_rotate_rx_window(call
);
413 ASSERTCMP(seq
, ==, READ_ONCE(call
->rx_top
));
422 if (!(flags
& MSG_PEEK
)) {
423 call
->rx_pkt_offset
= rx_pkt_offset
;
424 call
->rx_pkt_len
= rx_pkt_len
;
425 call
->rx_pkt_last
= rx_pkt_last
;
428 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_data_return
, seq
,
429 rx_pkt_offset
, rx_pkt_len
, ret
);
431 set_bit(RXRPC_CALL_RX_UNDERRUN
, &call
->flags
);
436 * Receive a message from an RxRPC socket
437 * - we need to be careful about two or more threads calling recvmsg
440 int rxrpc_recvmsg(struct socket
*sock
, struct msghdr
*msg
, size_t len
,
443 struct rxrpc_call
*call
;
444 struct rxrpc_sock
*rx
= rxrpc_sk(sock
->sk
);
452 trace_rxrpc_recvmsg(NULL
, rxrpc_recvmsg_enter
, 0, 0, 0, 0);
454 if (flags
& (MSG_OOB
| MSG_TRUNC
))
457 timeo
= sock_rcvtimeo(&rx
->sk
, flags
& MSG_DONTWAIT
);
462 /* Return immediately if a client socket has no outstanding calls */
463 if (RB_EMPTY_ROOT(&rx
->calls
) &&
464 list_empty(&rx
->recvmsg_q
) &&
465 rx
->sk
.sk_state
!= RXRPC_SERVER_LISTENING
) {
466 release_sock(&rx
->sk
);
470 if (list_empty(&rx
->recvmsg_q
)) {
477 release_sock(&rx
->sk
);
479 /* Wait for something to happen */
480 prepare_to_wait_exclusive(sk_sleep(&rx
->sk
), &wait
,
482 ret
= sock_error(&rx
->sk
);
486 if (list_empty(&rx
->recvmsg_q
)) {
487 if (signal_pending(current
))
488 goto wait_interrupted
;
489 trace_rxrpc_recvmsg(NULL
, rxrpc_recvmsg_wait
,
491 timeo
= schedule_timeout(timeo
);
493 finish_wait(sk_sleep(&rx
->sk
), &wait
);
497 /* Find the next call and dequeue it if we're not just peeking. If we
498 * do dequeue it, that comes with a ref that we will need to release.
500 write_lock_bh(&rx
->recvmsg_lock
);
501 l
= rx
->recvmsg_q
.next
;
502 call
= list_entry(l
, struct rxrpc_call
, recvmsg_link
);
503 if (!(flags
& MSG_PEEK
))
504 list_del_init(&call
->recvmsg_link
);
506 rxrpc_get_call(call
, rxrpc_call_got
);
507 write_unlock_bh(&rx
->recvmsg_lock
);
509 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_dequeue
, 0, 0, 0, 0);
511 /* We're going to drop the socket lock, so we need to lock the call
512 * against interference by sendmsg.
514 if (!mutex_trylock(&call
->user_mutex
)) {
516 if (flags
& MSG_DONTWAIT
)
517 goto error_requeue_call
;
519 if (mutex_lock_interruptible(&call
->user_mutex
) < 0)
520 goto error_requeue_call
;
523 release_sock(&rx
->sk
);
525 if (test_bit(RXRPC_CALL_RELEASED
, &call
->flags
))
528 if (test_bit(RXRPC_CALL_HAS_USERID
, &call
->flags
)) {
529 if (flags
& MSG_CMSG_COMPAT
) {
530 unsigned int id32
= call
->user_call_ID
;
532 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_USER_CALL_ID
,
533 sizeof(unsigned int), &id32
);
535 unsigned long idl
= call
->user_call_ID
;
537 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_USER_CALL_ID
,
538 sizeof(unsigned long), &idl
);
541 goto error_unlock_call
;
545 struct sockaddr_rxrpc
*srx
= msg
->msg_name
;
546 size_t len
= sizeof(call
->peer
->srx
);
548 memcpy(msg
->msg_name
, &call
->peer
->srx
, len
);
549 srx
->srx_service
= call
->service_id
;
550 msg
->msg_namelen
= len
;
553 switch (READ_ONCE(call
->state
)) {
554 case RXRPC_CALL_SERVER_ACCEPTING
:
555 ret
= rxrpc_recvmsg_new_call(rx
, call
, msg
, flags
);
557 case RXRPC_CALL_CLIENT_RECV_REPLY
:
558 case RXRPC_CALL_SERVER_RECV_REQUEST
:
559 case RXRPC_CALL_SERVER_ACK_REQUEST
:
560 ret
= rxrpc_recvmsg_data(sock
, call
, msg
, &msg
->msg_iter
, len
,
565 if (after(call
->rx_top
, call
->rx_hard_ack
) &&
566 call
->rxtx_buffer
[(call
->rx_hard_ack
+ 1) & RXRPC_RXTX_BUFF_MASK
])
567 rxrpc_notify_socket(call
);
575 goto error_unlock_call
;
577 if (call
->state
== RXRPC_CALL_COMPLETE
) {
578 ret
= rxrpc_recvmsg_term(call
, msg
);
580 goto error_unlock_call
;
581 if (!(flags
& MSG_PEEK
))
582 rxrpc_release_call(rx
, call
);
583 msg
->msg_flags
|= MSG_EOR
;
588 msg
->msg_flags
|= MSG_MORE
;
590 msg
->msg_flags
&= ~MSG_MORE
;
594 mutex_unlock(&call
->user_mutex
);
595 rxrpc_put_call(call
, rxrpc_call_put
);
596 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_return
, 0, 0, 0, ret
);
600 if (!(flags
& MSG_PEEK
)) {
601 write_lock_bh(&rx
->recvmsg_lock
);
602 list_add(&call
->recvmsg_link
, &rx
->recvmsg_q
);
603 write_unlock_bh(&rx
->recvmsg_lock
);
604 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_requeue
, 0, 0, 0, 0);
606 rxrpc_put_call(call
, rxrpc_call_put
);
609 release_sock(&rx
->sk
);
611 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_return
, 0, 0, 0, ret
);
615 ret
= sock_intr_errno(timeo
);
617 finish_wait(sk_sleep(&rx
->sk
), &wait
);
623 * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info
624 * @sock: The socket that the call exists on
625 * @call: The call to send data through
626 * @iter: The buffer to receive into
627 * @want_more: True if more data is expected to be read
628 * @_abort: Where the abort code is stored if -ECONNABORTED is returned
629 * @_service: Where to store the actual service ID (may be upgraded)
631 * Allow a kernel service to receive data and pick up information about the
632 * state of a call. Returns 0 if got what was asked for and there's more
633 * available, 1 if we got what was asked for and we're at the end of the data
634 * and -EAGAIN if we need more data.
636 * Note that we may return -EAGAIN to drain empty packets at the end of the
637 * data, even if we've already copied over the requested data.
639 * *_abort should also be initialised to 0.
641 int rxrpc_kernel_recv_data(struct socket
*sock
, struct rxrpc_call
*call
,
642 struct iov_iter
*iter
,
643 bool want_more
, u32
*_abort
, u16
*_service
)
648 _enter("{%d,%s},%zu,%d",
649 call
->debug_id
, rxrpc_call_states
[call
->state
],
650 iov_iter_count(iter
), want_more
);
652 ASSERTCMP(call
->state
, !=, RXRPC_CALL_SERVER_ACCEPTING
);
654 mutex_lock(&call
->user_mutex
);
656 switch (READ_ONCE(call
->state
)) {
657 case RXRPC_CALL_CLIENT_RECV_REPLY
:
658 case RXRPC_CALL_SERVER_RECV_REQUEST
:
659 case RXRPC_CALL_SERVER_ACK_REQUEST
:
660 ret
= rxrpc_recvmsg_data(sock
, call
, NULL
, iter
,
661 iov_iter_count(iter
), 0,
666 /* We can only reach here with a partially full buffer if we
667 * have reached the end of the data. We must otherwise have a
668 * full buffer or have been given -EAGAIN.
671 if (iov_iter_count(iter
) > 0)
674 goto read_phase_complete
;
683 case RXRPC_CALL_COMPLETE
:
694 switch (call
->ackr_reason
) {
697 case RXRPC_ACK_DELAY
:
702 rxrpc_send_ack_packet(call
, false, NULL
);
706 *_service
= call
->service_id
;
707 mutex_unlock(&call
->user_mutex
);
708 _leave(" = %d [%zu,%d]", ret
, iov_iter_count(iter
), *_abort
);
712 trace_rxrpc_rx_eproto(call
, 0, tracepoint_string("short_data"));
716 trace_rxrpc_rx_eproto(call
, 0, tracepoint_string("excess_data"));
720 *_abort
= call
->abort_code
;
722 if (call
->completion
== RXRPC_CALL_SUCCEEDED
) {
724 if (iov_iter_count(iter
) > 0)
729 EXPORT_SYMBOL(rxrpc_kernel_recv_data
);
732 * rxrpc_kernel_get_reply_time - Get timestamp on first reply packet
733 * @sock: The socket that the call exists on
734 * @call: The call to query
735 * @_ts: Where to put the timestamp
737 * Retrieve the timestamp from the first DATA packet of the reply if it is
738 * in the ring. Returns true if successful, false if not.
740 bool rxrpc_kernel_get_reply_time(struct socket
*sock
, struct rxrpc_call
*call
,
744 rxrpc_seq_t hard_ack
, top
, seq
;
745 bool success
= false;
747 mutex_lock(&call
->user_mutex
);
749 if (READ_ONCE(call
->state
) != RXRPC_CALL_CLIENT_RECV_REPLY
)
752 hard_ack
= call
->rx_hard_ack
;
757 top
= smp_load_acquire(&call
->rx_top
);
761 skb
= call
->rxtx_buffer
[seq
& RXRPC_RXTX_BUFF_MASK
];
765 *_ts
= skb_get_ktime(skb
);
769 mutex_unlock(&call
->user_mutex
);
772 EXPORT_SYMBOL(rxrpc_kernel_get_reply_time
);