1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* RxRPC packet transmission
4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/net.h>
11 #include <linux/gfp.h>
12 #include <linux/skbuff.h>
13 #include <linux/export.h>
15 #include <net/af_rxrpc.h>
16 #include "ar-internal.h"
18 struct rxrpc_ack_buffer
{
19 struct rxrpc_wire_header whdr
;
20 struct rxrpc_ackpacket ack
;
23 struct rxrpc_ackinfo ackinfo
;
26 struct rxrpc_abort_buffer
{
27 struct rxrpc_wire_header whdr
;
31 static const char rxrpc_keepalive_string
[] = "";
34 * Increase Tx backoff on transmission failure and clear it on success.
36 static void rxrpc_tx_backoff(struct rxrpc_call
*call
, int ret
)
39 u16 tx_backoff
= READ_ONCE(call
->tx_backoff
);
42 WRITE_ONCE(call
->tx_backoff
, tx_backoff
+ 1);
44 WRITE_ONCE(call
->tx_backoff
, 0);
49 * Arrange for a keepalive ping a certain time after we last transmitted. This
50 * lets the far side know we're still interested in this call and helps keep
51 * the route through any intervening firewall open.
53 * Receiving a response to the ping will prevent the ->expect_rx_by timer from
56 static void rxrpc_set_keepalive(struct rxrpc_call
*call
)
58 unsigned long now
= jiffies
, keepalive_at
= call
->next_rx_timo
/ 6;
61 WRITE_ONCE(call
->keepalive_at
, keepalive_at
);
62 rxrpc_reduce_call_timer(call
, keepalive_at
, now
,
63 rxrpc_timer_set_for_keepalive
);
67 * Fill out an ACK packet.
69 static size_t rxrpc_fill_out_ack(struct rxrpc_connection
*conn
,
70 struct rxrpc_call
*call
,
71 struct rxrpc_ack_buffer
*pkt
,
72 rxrpc_seq_t
*_hard_ack
,
76 rxrpc_serial_t serial
;
77 rxrpc_seq_t hard_ack
, top
, seq
;
82 /* Barrier against rxrpc_input_data(). */
83 serial
= call
->ackr_serial
;
84 hard_ack
= READ_ONCE(call
->rx_hard_ack
);
85 top
= smp_load_acquire(&call
->rx_top
);
86 *_hard_ack
= hard_ack
;
89 pkt
->ack
.bufferSpace
= htons(8);
90 pkt
->ack
.maxSkew
= htons(0);
91 pkt
->ack
.firstPacket
= htonl(hard_ack
+ 1);
92 pkt
->ack
.previousPacket
= htonl(call
->ackr_prev_seq
);
93 pkt
->ack
.serial
= htonl(serial
);
94 pkt
->ack
.reason
= reason
;
95 pkt
->ack
.nAcks
= top
- hard_ack
;
97 if (reason
== RXRPC_ACK_PING
)
98 pkt
->whdr
.flags
|= RXRPC_REQUEST_ACK
;
100 if (after(top
, hard_ack
)) {
103 ix
= seq
& RXRPC_RXTX_BUFF_MASK
;
104 if (call
->rxtx_buffer
[ix
])
105 *ackp
++ = RXRPC_ACK_TYPE_ACK
;
107 *ackp
++ = RXRPC_ACK_TYPE_NACK
;
109 } while (before_eq(seq
, top
));
112 mtu
= conn
->params
.peer
->if_mtu
;
113 mtu
-= conn
->params
.peer
->hdrsize
;
114 jmax
= (call
->nr_jumbo_bad
> 3) ? 1 : rxrpc_rx_jumbo_max
;
115 pkt
->ackinfo
.rxMTU
= htonl(rxrpc_rx_mtu
);
116 pkt
->ackinfo
.maxMTU
= htonl(mtu
);
117 pkt
->ackinfo
.rwind
= htonl(call
->rx_winsize
);
118 pkt
->ackinfo
.jumbo_max
= htonl(jmax
);
123 return top
- hard_ack
+ 3;
127 * Record the beginning of an RTT probe.
129 static int rxrpc_begin_rtt_probe(struct rxrpc_call
*call
, rxrpc_serial_t serial
,
130 enum rxrpc_rtt_tx_trace why
)
132 unsigned long avail
= call
->rtt_avail
;
135 if (!(avail
& RXRPC_CALL_RTT_AVAIL_MASK
))
138 rtt_slot
= __ffs(avail
& RXRPC_CALL_RTT_AVAIL_MASK
);
139 if (!test_and_clear_bit(rtt_slot
, &call
->rtt_avail
))
142 call
->rtt_serial
[rtt_slot
] = serial
;
143 call
->rtt_sent_at
[rtt_slot
] = ktime_get_real();
144 smp_wmb(); /* Write data before avail bit */
145 set_bit(rtt_slot
+ RXRPC_CALL_RTT_PEND_SHIFT
, &call
->rtt_avail
);
147 trace_rxrpc_rtt_tx(call
, why
, rtt_slot
, serial
);
151 trace_rxrpc_rtt_tx(call
, rxrpc_rtt_tx_no_slot
, rtt_slot
, serial
);
156 * Cancel an RTT probe.
158 static void rxrpc_cancel_rtt_probe(struct rxrpc_call
*call
,
159 rxrpc_serial_t serial
, int rtt_slot
)
161 if (rtt_slot
!= -1) {
162 clear_bit(rtt_slot
+ RXRPC_CALL_RTT_PEND_SHIFT
, &call
->rtt_avail
);
163 smp_wmb(); /* Clear pending bit before setting slot */
164 set_bit(rtt_slot
, &call
->rtt_avail
);
165 trace_rxrpc_rtt_tx(call
, rxrpc_rtt_tx_cancel
, rtt_slot
, serial
);
170 * Send an ACK call packet.
172 int rxrpc_send_ack_packet(struct rxrpc_call
*call
, bool ping
,
173 rxrpc_serial_t
*_serial
)
175 struct rxrpc_connection
*conn
;
176 struct rxrpc_ack_buffer
*pkt
;
179 rxrpc_serial_t serial
;
180 rxrpc_seq_t hard_ack
, top
;
182 int ret
, rtt_slot
= -1;
185 if (test_bit(RXRPC_CALL_DISCONNECTED
, &call
->flags
))
188 pkt
= kzalloc(sizeof(*pkt
), GFP_KERNEL
);
194 msg
.msg_name
= &call
->peer
->srx
.transport
;
195 msg
.msg_namelen
= call
->peer
->srx
.transport_len
;
196 msg
.msg_control
= NULL
;
197 msg
.msg_controllen
= 0;
200 pkt
->whdr
.epoch
= htonl(conn
->proto
.epoch
);
201 pkt
->whdr
.cid
= htonl(call
->cid
);
202 pkt
->whdr
.callNumber
= htonl(call
->call_id
);
204 pkt
->whdr
.type
= RXRPC_PACKET_TYPE_ACK
;
205 pkt
->whdr
.flags
= RXRPC_SLOW_START_OK
| conn
->out_clientflag
;
206 pkt
->whdr
.userStatus
= 0;
207 pkt
->whdr
.securityIndex
= call
->security_ix
;
209 pkt
->whdr
.serviceId
= htons(call
->service_id
);
211 spin_lock_bh(&call
->lock
);
213 reason
= RXRPC_ACK_PING
;
215 reason
= call
->ackr_reason
;
216 if (!call
->ackr_reason
) {
217 spin_unlock_bh(&call
->lock
);
221 call
->ackr_reason
= 0;
223 n
= rxrpc_fill_out_ack(conn
, call
, pkt
, &hard_ack
, &top
, reason
);
225 spin_unlock_bh(&call
->lock
);
227 iov
[0].iov_base
= pkt
;
228 iov
[0].iov_len
= sizeof(pkt
->whdr
) + sizeof(pkt
->ack
) + n
;
229 iov
[1].iov_base
= &pkt
->ackinfo
;
230 iov
[1].iov_len
= sizeof(pkt
->ackinfo
);
231 len
= iov
[0].iov_len
+ iov
[1].iov_len
;
233 serial
= atomic_inc_return(&conn
->serial
);
234 pkt
->whdr
.serial
= htonl(serial
);
235 trace_rxrpc_tx_ack(call
->debug_id
, serial
,
236 ntohl(pkt
->ack
.firstPacket
),
237 ntohl(pkt
->ack
.serial
),
238 pkt
->ack
.reason
, pkt
->ack
.nAcks
);
243 rtt_slot
= rxrpc_begin_rtt_probe(call
, serial
, rxrpc_rtt_tx_ping
);
245 ret
= kernel_sendmsg(conn
->params
.local
->socket
, &msg
, iov
, 2, len
);
246 conn
->params
.peer
->last_tx_at
= ktime_get_seconds();
248 trace_rxrpc_tx_fail(call
->debug_id
, serial
, ret
,
249 rxrpc_tx_point_call_ack
);
251 trace_rxrpc_tx_packet(call
->debug_id
, &pkt
->whdr
,
252 rxrpc_tx_point_call_ack
);
253 rxrpc_tx_backoff(call
, ret
);
255 if (call
->state
< RXRPC_CALL_COMPLETE
) {
257 rxrpc_cancel_rtt_probe(call
, serial
, rtt_slot
);
258 rxrpc_propose_ACK(call
, pkt
->ack
.reason
,
259 ntohl(pkt
->ack
.serial
),
261 rxrpc_propose_ack_retry_tx
);
263 spin_lock_bh(&call
->lock
);
264 if (after(hard_ack
, call
->ackr_consumed
))
265 call
->ackr_consumed
= hard_ack
;
266 if (after(top
, call
->ackr_seen
))
267 call
->ackr_seen
= top
;
268 spin_unlock_bh(&call
->lock
);
271 rxrpc_set_keepalive(call
);
280 * Send an ABORT call packet.
282 int rxrpc_send_abort_packet(struct rxrpc_call
*call
)
284 struct rxrpc_connection
*conn
;
285 struct rxrpc_abort_buffer pkt
;
288 rxrpc_serial_t serial
;
291 /* Don't bother sending aborts for a client call once the server has
292 * hard-ACK'd all of its request data. After that point, we're not
293 * going to stop the operation proceeding, and whilst we might limit
294 * the reply, it's not worth it if we can send a new call on the same
295 * channel instead, thereby closing off this call.
297 if (rxrpc_is_client_call(call
) &&
298 test_bit(RXRPC_CALL_TX_LAST
, &call
->flags
))
301 if (test_bit(RXRPC_CALL_DISCONNECTED
, &call
->flags
))
306 msg
.msg_name
= &call
->peer
->srx
.transport
;
307 msg
.msg_namelen
= call
->peer
->srx
.transport_len
;
308 msg
.msg_control
= NULL
;
309 msg
.msg_controllen
= 0;
312 pkt
.whdr
.epoch
= htonl(conn
->proto
.epoch
);
313 pkt
.whdr
.cid
= htonl(call
->cid
);
314 pkt
.whdr
.callNumber
= htonl(call
->call_id
);
316 pkt
.whdr
.type
= RXRPC_PACKET_TYPE_ABORT
;
317 pkt
.whdr
.flags
= conn
->out_clientflag
;
318 pkt
.whdr
.userStatus
= 0;
319 pkt
.whdr
.securityIndex
= call
->security_ix
;
321 pkt
.whdr
.serviceId
= htons(call
->service_id
);
322 pkt
.abort_code
= htonl(call
->abort_code
);
324 iov
[0].iov_base
= &pkt
;
325 iov
[0].iov_len
= sizeof(pkt
);
327 serial
= atomic_inc_return(&conn
->serial
);
328 pkt
.whdr
.serial
= htonl(serial
);
330 ret
= kernel_sendmsg(conn
->params
.local
->socket
,
331 &msg
, iov
, 1, sizeof(pkt
));
332 conn
->params
.peer
->last_tx_at
= ktime_get_seconds();
334 trace_rxrpc_tx_fail(call
->debug_id
, serial
, ret
,
335 rxrpc_tx_point_call_abort
);
337 trace_rxrpc_tx_packet(call
->debug_id
, &pkt
.whdr
,
338 rxrpc_tx_point_call_abort
);
339 rxrpc_tx_backoff(call
, ret
);
344 * send a packet through the transport endpoint
346 int rxrpc_send_data_packet(struct rxrpc_call
*call
, struct sk_buff
*skb
,
349 struct rxrpc_connection
*conn
= call
->conn
;
350 struct rxrpc_wire_header whdr
;
351 struct rxrpc_skb_priv
*sp
= rxrpc_skb(skb
);
354 rxrpc_serial_t serial
;
356 int ret
, rtt_slot
= -1;
358 _enter(",{%d}", skb
->len
);
360 if (hlist_unhashed(&call
->error_link
)) {
361 spin_lock_bh(&call
->peer
->lock
);
362 hlist_add_head_rcu(&call
->error_link
, &call
->peer
->error_targets
);
363 spin_unlock_bh(&call
->peer
->lock
);
366 /* Each transmission of a Tx packet needs a new serial number */
367 serial
= atomic_inc_return(&conn
->serial
);
369 whdr
.epoch
= htonl(conn
->proto
.epoch
);
370 whdr
.cid
= htonl(call
->cid
);
371 whdr
.callNumber
= htonl(call
->call_id
);
372 whdr
.seq
= htonl(sp
->hdr
.seq
);
373 whdr
.serial
= htonl(serial
);
374 whdr
.type
= RXRPC_PACKET_TYPE_DATA
;
375 whdr
.flags
= sp
->hdr
.flags
;
377 whdr
.securityIndex
= call
->security_ix
;
378 whdr
._rsvd
= htons(sp
->hdr
._rsvd
);
379 whdr
.serviceId
= htons(call
->service_id
);
381 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE
, &conn
->flags
) &&
383 whdr
.userStatus
= RXRPC_USERSTATUS_SERVICE_UPGRADE
;
385 iov
[0].iov_base
= &whdr
;
386 iov
[0].iov_len
= sizeof(whdr
);
387 iov
[1].iov_base
= skb
->head
;
388 iov
[1].iov_len
= skb
->len
;
389 len
= iov
[0].iov_len
+ iov
[1].iov_len
;
391 msg
.msg_name
= &call
->peer
->srx
.transport
;
392 msg
.msg_namelen
= call
->peer
->srx
.transport_len
;
393 msg
.msg_control
= NULL
;
394 msg
.msg_controllen
= 0;
397 /* If our RTT cache needs working on, request an ACK. Also request
398 * ACKs if a DATA packet appears to have been lost.
400 * However, we mustn't request an ACK on the last reply packet of a
401 * service call, lest OpenAFS incorrectly send us an ACK with some
402 * soft-ACKs in it and then never follow up with a proper hard ACK.
404 if ((!(sp
->hdr
.flags
& RXRPC_LAST_PACKET
) ||
407 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST
, &call
->events
) ||
409 call
->cong_mode
== RXRPC_CALL_SLOW_START
||
410 (call
->peer
->rtt_count
< 3 && sp
->hdr
.seq
& 1) ||
411 ktime_before(ktime_add_ms(call
->peer
->rtt_last_req
, 1000),
413 whdr
.flags
|= RXRPC_REQUEST_ACK
;
415 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS
)) {
417 if ((lose
++ & 7) == 7) {
419 trace_rxrpc_tx_data(call
, sp
->hdr
.seq
, serial
,
420 whdr
.flags
, retrans
, true);
425 trace_rxrpc_tx_data(call
, sp
->hdr
.seq
, serial
, whdr
.flags
, retrans
,
428 /* send the packet with the don't fragment bit set if we currently
429 * think it's small enough */
430 if (iov
[1].iov_len
>= call
->peer
->maxdata
)
431 goto send_fragmentable
;
433 down_read(&conn
->params
.local
->defrag_sem
);
435 sp
->hdr
.serial
= serial
;
436 smp_wmb(); /* Set serial before timestamp */
437 skb
->tstamp
= ktime_get_real();
438 if (whdr
.flags
& RXRPC_REQUEST_ACK
)
439 rtt_slot
= rxrpc_begin_rtt_probe(call
, serial
, rxrpc_rtt_tx_data
);
441 /* send the packet by UDP
442 * - returns -EMSGSIZE if UDP would have to fragment the packet
443 * to go out of the interface
444 * - in which case, we'll have processed the ICMP error
445 * message and update the peer record
447 ret
= kernel_sendmsg(conn
->params
.local
->socket
, &msg
, iov
, 2, len
);
448 conn
->params
.peer
->last_tx_at
= ktime_get_seconds();
450 up_read(&conn
->params
.local
->defrag_sem
);
452 rxrpc_cancel_rtt_probe(call
, serial
, rtt_slot
);
453 trace_rxrpc_tx_fail(call
->debug_id
, serial
, ret
,
454 rxrpc_tx_point_call_data_nofrag
);
456 trace_rxrpc_tx_packet(call
->debug_id
, &whdr
,
457 rxrpc_tx_point_call_data_nofrag
);
460 rxrpc_tx_backoff(call
, ret
);
461 if (ret
== -EMSGSIZE
)
462 goto send_fragmentable
;
466 if (whdr
.flags
& RXRPC_REQUEST_ACK
) {
467 call
->peer
->rtt_last_req
= skb
->tstamp
;
468 if (call
->peer
->rtt_count
> 1) {
469 unsigned long nowj
= jiffies
, ack_lost_at
;
471 ack_lost_at
= rxrpc_get_rto_backoff(call
->peer
, retrans
);
473 WRITE_ONCE(call
->ack_lost_at
, ack_lost_at
);
474 rxrpc_reduce_call_timer(call
, ack_lost_at
, nowj
,
475 rxrpc_timer_set_for_lost_ack
);
479 if (sp
->hdr
.seq
== 1 &&
480 !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER
,
482 unsigned long nowj
= jiffies
, expect_rx_by
;
484 expect_rx_by
= nowj
+ call
->next_rx_timo
;
485 WRITE_ONCE(call
->expect_rx_by
, expect_rx_by
);
486 rxrpc_reduce_call_timer(call
, expect_rx_by
, nowj
,
487 rxrpc_timer_set_for_normal
);
490 rxrpc_set_keepalive(call
);
492 /* Cancel the call if the initial transmission fails,
493 * particularly if that's due to network routing issues that
494 * aren't going away anytime soon. The layer above can arrange
495 * the retransmission.
497 if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER
, &call
->flags
))
498 rxrpc_set_call_completion(call
, RXRPC_CALL_LOCAL_ERROR
,
502 _leave(" = %d [%u]", ret
, call
->peer
->maxdata
);
506 /* attempt to send this message with fragmentation enabled */
507 _debug("send fragment");
509 down_write(&conn
->params
.local
->defrag_sem
);
511 sp
->hdr
.serial
= serial
;
512 smp_wmb(); /* Set serial before timestamp */
513 skb
->tstamp
= ktime_get_real();
514 if (whdr
.flags
& RXRPC_REQUEST_ACK
)
515 rtt_slot
= rxrpc_begin_rtt_probe(call
, serial
, rxrpc_rtt_tx_data
);
517 switch (conn
->params
.local
->srx
.transport
.family
) {
520 ip_sock_set_mtu_discover(conn
->params
.local
->socket
->sk
,
522 ret
= kernel_sendmsg(conn
->params
.local
->socket
, &msg
,
524 conn
->params
.peer
->last_tx_at
= ktime_get_seconds();
526 ip_sock_set_mtu_discover(conn
->params
.local
->socket
->sk
,
535 rxrpc_cancel_rtt_probe(call
, serial
, rtt_slot
);
536 trace_rxrpc_tx_fail(call
->debug_id
, serial
, ret
,
537 rxrpc_tx_point_call_data_frag
);
539 trace_rxrpc_tx_packet(call
->debug_id
, &whdr
,
540 rxrpc_tx_point_call_data_frag
);
542 rxrpc_tx_backoff(call
, ret
);
544 up_write(&conn
->params
.local
->defrag_sem
);
549 * reject packets through the local endpoint
551 void rxrpc_reject_packets(struct rxrpc_local
*local
)
553 struct sockaddr_rxrpc srx
;
554 struct rxrpc_skb_priv
*sp
;
555 struct rxrpc_wire_header whdr
;
563 _enter("%d", local
->debug_id
);
565 iov
[0].iov_base
= &whdr
;
566 iov
[0].iov_len
= sizeof(whdr
);
567 iov
[1].iov_base
= &code
;
568 iov
[1].iov_len
= sizeof(code
);
570 msg
.msg_name
= &srx
.transport
;
571 msg
.msg_control
= NULL
;
572 msg
.msg_controllen
= 0;
575 memset(&whdr
, 0, sizeof(whdr
));
577 while ((skb
= skb_dequeue(&local
->reject_queue
))) {
578 rxrpc_see_skb(skb
, rxrpc_skb_seen
);
582 case RXRPC_SKB_MARK_REJECT_BUSY
:
583 whdr
.type
= RXRPC_PACKET_TYPE_BUSY
;
587 case RXRPC_SKB_MARK_REJECT_ABORT
:
588 whdr
.type
= RXRPC_PACKET_TYPE_ABORT
;
589 code
= htonl(skb
->priority
);
590 size
= sizeof(whdr
) + sizeof(code
);
594 rxrpc_free_skb(skb
, rxrpc_skb_freed
);
598 if (rxrpc_extract_addr_from_skb(&srx
, skb
) == 0) {
599 msg
.msg_namelen
= srx
.transport_len
;
601 whdr
.epoch
= htonl(sp
->hdr
.epoch
);
602 whdr
.cid
= htonl(sp
->hdr
.cid
);
603 whdr
.callNumber
= htonl(sp
->hdr
.callNumber
);
604 whdr
.serviceId
= htons(sp
->hdr
.serviceId
);
605 whdr
.flags
= sp
->hdr
.flags
;
606 whdr
.flags
^= RXRPC_CLIENT_INITIATED
;
607 whdr
.flags
&= RXRPC_CLIENT_INITIATED
;
609 ret
= kernel_sendmsg(local
->socket
, &msg
,
612 trace_rxrpc_tx_fail(local
->debug_id
, 0, ret
,
613 rxrpc_tx_point_reject
);
615 trace_rxrpc_tx_packet(local
->debug_id
, &whdr
,
616 rxrpc_tx_point_reject
);
619 rxrpc_free_skb(skb
, rxrpc_skb_freed
);
626 * Send a VERSION reply to a peer as a keepalive.
628 void rxrpc_send_keepalive(struct rxrpc_peer
*peer
)
630 struct rxrpc_wire_header whdr
;
638 msg
.msg_name
= &peer
->srx
.transport
;
639 msg
.msg_namelen
= peer
->srx
.transport_len
;
640 msg
.msg_control
= NULL
;
641 msg
.msg_controllen
= 0;
644 whdr
.epoch
= htonl(peer
->local
->rxnet
->epoch
);
649 whdr
.type
= RXRPC_PACKET_TYPE_VERSION
; /* Not client-initiated */
650 whdr
.flags
= RXRPC_LAST_PACKET
;
652 whdr
.securityIndex
= 0;
656 iov
[0].iov_base
= &whdr
;
657 iov
[0].iov_len
= sizeof(whdr
);
658 iov
[1].iov_base
= (char *)rxrpc_keepalive_string
;
659 iov
[1].iov_len
= sizeof(rxrpc_keepalive_string
);
661 len
= iov
[0].iov_len
+ iov
[1].iov_len
;
663 _proto("Tx VERSION (keepalive)");
665 ret
= kernel_sendmsg(peer
->local
->socket
, &msg
, iov
, 2, len
);
667 trace_rxrpc_tx_fail(peer
->debug_id
, 0, ret
,
668 rxrpc_tx_point_version_keepalive
);
670 trace_rxrpc_tx_packet(peer
->debug_id
, &whdr
,
671 rxrpc_tx_point_version_keepalive
);
673 peer
->last_tx_at
= ktime_get_seconds();