1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Peer event handling, typically ICMP messages.
4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #include <linux/module.h>
10 #include <linux/skbuff.h>
11 #include <linux/errqueue.h>
12 #include <linux/udp.h>
14 #include <linux/in6.h>
15 #include <linux/icmp.h>
17 #include <net/af_rxrpc.h>
19 #include "ar-internal.h"
21 static void rxrpc_store_error(struct rxrpc_peer
*, struct sock_exterr_skb
*);
22 static void rxrpc_distribute_error(struct rxrpc_peer
*, int,
23 enum rxrpc_call_completion
);
26 * Find the peer associated with an ICMP packet.
28 static struct rxrpc_peer
*rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local
*local
,
29 const struct sk_buff
*skb
,
30 struct sockaddr_rxrpc
*srx
)
32 struct sock_exterr_skb
*serr
= SKB_EXT_ERR(skb
);
36 memset(srx
, 0, sizeof(*srx
));
37 srx
->transport_type
= local
->srx
.transport_type
;
38 srx
->transport_len
= local
->srx
.transport_len
;
39 srx
->transport
.family
= local
->srx
.transport
.family
;
41 /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
44 switch (srx
->transport
.family
) {
46 srx
->transport_len
= sizeof(srx
->transport
.sin
);
47 srx
->transport
.family
= AF_INET
;
48 srx
->transport
.sin
.sin_port
= serr
->port
;
49 switch (serr
->ee
.ee_origin
) {
50 case SO_EE_ORIGIN_ICMP
:
52 memcpy(&srx
->transport
.sin
.sin_addr
,
53 skb_network_header(skb
) + serr
->addr_offset
,
54 sizeof(struct in_addr
));
56 case SO_EE_ORIGIN_ICMP6
:
57 _net("Rx ICMP6 on v4 sock");
58 memcpy(&srx
->transport
.sin
.sin_addr
,
59 skb_network_header(skb
) + serr
->addr_offset
+ 12,
60 sizeof(struct in_addr
));
63 memcpy(&srx
->transport
.sin
.sin_addr
, &ip_hdr(skb
)->saddr
,
64 sizeof(struct in_addr
));
69 #ifdef CONFIG_AF_RXRPC_IPV6
71 switch (serr
->ee
.ee_origin
) {
72 case SO_EE_ORIGIN_ICMP6
:
74 srx
->transport
.sin6
.sin6_port
= serr
->port
;
75 memcpy(&srx
->transport
.sin6
.sin6_addr
,
76 skb_network_header(skb
) + serr
->addr_offset
,
77 sizeof(struct in6_addr
));
79 case SO_EE_ORIGIN_ICMP
:
80 _net("Rx ICMP on v6 sock");
81 srx
->transport_len
= sizeof(srx
->transport
.sin
);
82 srx
->transport
.family
= AF_INET
;
83 srx
->transport
.sin
.sin_port
= serr
->port
;
84 memcpy(&srx
->transport
.sin
.sin_addr
,
85 skb_network_header(skb
) + serr
->addr_offset
,
86 sizeof(struct in_addr
));
89 memcpy(&srx
->transport
.sin6
.sin6_addr
,
90 &ipv6_hdr(skb
)->saddr
,
91 sizeof(struct in6_addr
));
101 return rxrpc_lookup_peer_rcu(local
, srx
);
105 * Handle an MTU/fragmentation problem.
107 static void rxrpc_adjust_mtu(struct rxrpc_peer
*peer
, struct sock_exterr_skb
*serr
)
109 u32 mtu
= serr
->ee
.ee_info
;
111 _net("Rx ICMP Fragmentation Needed (%d)", mtu
);
113 /* wind down the local interface MTU */
114 if (mtu
> 0 && peer
->if_mtu
== 65535 && mtu
< peer
->if_mtu
) {
116 _net("I/F MTU %u", mtu
);
120 /* they didn't give us a size, estimate one */
128 if (mtu
< peer
->hdrsize
)
129 mtu
= peer
->hdrsize
+ 4;
133 if (mtu
< peer
->mtu
) {
134 spin_lock_bh(&peer
->lock
);
136 peer
->maxdata
= peer
->mtu
- peer
->hdrsize
;
137 spin_unlock_bh(&peer
->lock
);
138 _net("Net MTU %u (maxdata %u)",
139 peer
->mtu
, peer
->maxdata
);
144 * Handle an error received on the local endpoint.
146 void rxrpc_error_report(struct sock
*sk
)
148 struct sock_exterr_skb
*serr
;
149 struct sockaddr_rxrpc srx
;
150 struct rxrpc_local
*local
;
151 struct rxrpc_peer
*peer
;
155 local
= rcu_dereference_sk_user_data(sk
);
156 if (unlikely(!local
)) {
160 _enter("%p{%d}", sk
, local
->debug_id
);
162 /* Clear the outstanding error value on the socket so that it doesn't
163 * cause kernel_sendmsg() to return it later.
167 skb
= sock_dequeue_err_skb(sk
);
170 _leave("UDP socket errqueue empty");
173 rxrpc_new_skb(skb
, rxrpc_skb_received
);
174 serr
= SKB_EXT_ERR(skb
);
175 if (!skb
->len
&& serr
->ee
.ee_origin
== SO_EE_ORIGIN_TIMESTAMPING
) {
176 _leave("UDP empty message");
178 rxrpc_free_skb(skb
, rxrpc_skb_freed
);
182 peer
= rxrpc_lookup_peer_icmp_rcu(local
, skb
, &srx
);
183 if (peer
&& !rxrpc_get_peer_maybe(peer
))
187 rxrpc_free_skb(skb
, rxrpc_skb_freed
);
188 _leave(" [no peer]");
192 trace_rxrpc_rx_icmp(peer
, &serr
->ee
, &srx
);
194 if ((serr
->ee
.ee_origin
== SO_EE_ORIGIN_ICMP
&&
195 serr
->ee
.ee_type
== ICMP_DEST_UNREACH
&&
196 serr
->ee
.ee_code
== ICMP_FRAG_NEEDED
)) {
197 rxrpc_adjust_mtu(peer
, serr
);
199 rxrpc_free_skb(skb
, rxrpc_skb_freed
);
200 rxrpc_put_peer(peer
);
201 _leave(" [MTU update]");
205 rxrpc_store_error(peer
, serr
);
207 rxrpc_free_skb(skb
, rxrpc_skb_freed
);
208 rxrpc_put_peer(peer
);
214 * Map an error report to error codes on the peer record.
216 static void rxrpc_store_error(struct rxrpc_peer
*peer
,
217 struct sock_exterr_skb
*serr
)
219 enum rxrpc_call_completion
compl = RXRPC_CALL_NETWORK_ERROR
;
220 struct sock_extended_err
*ee
;
229 switch (ee
->ee_origin
) {
230 case SO_EE_ORIGIN_ICMP
:
231 switch (ee
->ee_type
) {
232 case ICMP_DEST_UNREACH
:
233 switch (ee
->ee_code
) {
234 case ICMP_NET_UNREACH
:
235 _net("Rx Received ICMP Network Unreachable");
237 case ICMP_HOST_UNREACH
:
238 _net("Rx Received ICMP Host Unreachable");
240 case ICMP_PORT_UNREACH
:
241 _net("Rx Received ICMP Port Unreachable");
243 case ICMP_NET_UNKNOWN
:
244 _net("Rx Received ICMP Unknown Network");
246 case ICMP_HOST_UNKNOWN
:
247 _net("Rx Received ICMP Unknown Host");
250 _net("Rx Received ICMP DestUnreach code=%u",
256 case ICMP_TIME_EXCEEDED
:
257 _net("Rx Received ICMP TTL Exceeded");
261 _proto("Rx Received ICMP error { type=%u code=%u }",
262 ee
->ee_type
, ee
->ee_code
);
267 case SO_EE_ORIGIN_NONE
:
268 case SO_EE_ORIGIN_LOCAL
:
269 _proto("Rx Received local error { error=%d }", err
);
270 compl = RXRPC_CALL_LOCAL_ERROR
;
273 case SO_EE_ORIGIN_ICMP6
:
275 _proto("Rx Received error report { orig=%u }", ee
->ee_origin
);
279 rxrpc_distribute_error(peer
, err
, compl);
283 * Distribute an error that occurred on a peer.
285 static void rxrpc_distribute_error(struct rxrpc_peer
*peer
, int error
,
286 enum rxrpc_call_completion
compl)
288 struct rxrpc_call
*call
;
290 hlist_for_each_entry_rcu(call
, &peer
->error_targets
, error_link
) {
291 rxrpc_see_call(call
);
292 if (call
->state
< RXRPC_CALL_COMPLETE
&&
293 rxrpc_set_call_completion(call
, compl, 0, -error
))
294 rxrpc_notify_socket(call
);
299 * Add RTT information to cache. This is called in softirq mode and has
300 * exclusive access to the peer RTT data.
302 void rxrpc_peer_add_rtt(struct rxrpc_call
*call
, enum rxrpc_rtt_rx_trace why
,
303 rxrpc_serial_t send_serial
, rxrpc_serial_t resp_serial
,
304 ktime_t send_time
, ktime_t resp_time
)
306 struct rxrpc_peer
*peer
= call
->peer
;
308 u64 sum
= peer
->rtt_sum
, avg
;
309 u8 cursor
= peer
->rtt_cursor
, usage
= peer
->rtt_usage
;
311 rtt
= ktime_to_ns(ktime_sub(resp_time
, send_time
));
315 spin_lock(&peer
->rtt_input_lock
);
317 /* Replace the oldest datum in the RTT buffer */
318 sum
-= peer
->rtt_cache
[cursor
];
320 peer
->rtt_cache
[cursor
] = rtt
;
321 peer
->rtt_cursor
= (cursor
+ 1) & (RXRPC_RTT_CACHE_SIZE
- 1);
323 if (usage
< RXRPC_RTT_CACHE_SIZE
) {
325 peer
->rtt_usage
= usage
;
328 spin_unlock(&peer
->rtt_input_lock
);
330 /* Now recalculate the average */
331 if (usage
== RXRPC_RTT_CACHE_SIZE
) {
332 avg
= sum
/ RXRPC_RTT_CACHE_SIZE
;
338 /* Don't need to update this under lock */
340 trace_rxrpc_rtt_rx(call
, why
, send_serial
, resp_serial
, rtt
,
345 * Perform keep-alive pings.
347 static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net
*rxnet
,
348 struct list_head
*collector
,
352 struct rxrpc_peer
*peer
;
353 const u8 mask
= ARRAY_SIZE(rxnet
->peer_keepalive
) - 1;
354 time64_t keepalive_at
;
357 spin_lock_bh(&rxnet
->peer_hash_lock
);
359 while (!list_empty(collector
)) {
360 peer
= list_entry(collector
->next
,
361 struct rxrpc_peer
, keepalive_link
);
363 list_del_init(&peer
->keepalive_link
);
364 if (!rxrpc_get_peer_maybe(peer
))
367 spin_unlock_bh(&rxnet
->peer_hash_lock
);
369 keepalive_at
= peer
->last_tx_at
+ RXRPC_KEEPALIVE_TIME
;
370 slot
= keepalive_at
- base
;
371 _debug("%02x peer %u t=%d {%pISp}",
372 cursor
, peer
->debug_id
, slot
, &peer
->srx
.transport
);
374 if (keepalive_at
<= base
||
375 keepalive_at
> base
+ RXRPC_KEEPALIVE_TIME
) {
376 rxrpc_send_keepalive(peer
);
377 slot
= RXRPC_KEEPALIVE_TIME
;
380 /* A transmission to this peer occurred since last we examined
381 * it so put it into the appropriate future bucket.
385 spin_lock_bh(&rxnet
->peer_hash_lock
);
386 list_add_tail(&peer
->keepalive_link
,
387 &rxnet
->peer_keepalive
[slot
& mask
]);
388 rxrpc_put_peer_locked(peer
);
391 spin_unlock_bh(&rxnet
->peer_hash_lock
);
395 * Perform keep-alive pings with VERSION packets to keep any NAT alive.
397 void rxrpc_peer_keepalive_worker(struct work_struct
*work
)
399 struct rxrpc_net
*rxnet
=
400 container_of(work
, struct rxrpc_net
, peer_keepalive_work
);
401 const u8 mask
= ARRAY_SIZE(rxnet
->peer_keepalive
) - 1;
402 time64_t base
, now
, delay
;
404 LIST_HEAD(collector
);
406 now
= ktime_get_seconds();
407 base
= rxnet
->peer_keepalive_base
;
408 cursor
= rxnet
->peer_keepalive_cursor
;
409 _enter("%lld,%u", base
- now
, cursor
);
414 /* Remove to a temporary list all the peers that are currently lodged
415 * in expired buckets plus all new peers.
417 * Everything in the bucket at the cursor is processed this
418 * second; the bucket at cursor + 1 goes at now + 1s and so
421 spin_lock_bh(&rxnet
->peer_hash_lock
);
422 list_splice_init(&rxnet
->peer_keepalive_new
, &collector
);
424 stop
= cursor
+ ARRAY_SIZE(rxnet
->peer_keepalive
);
425 while (base
<= now
&& (s8
)(cursor
- stop
) < 0) {
426 list_splice_tail_init(&rxnet
->peer_keepalive
[cursor
& mask
],
433 spin_unlock_bh(&rxnet
->peer_hash_lock
);
435 rxnet
->peer_keepalive_base
= base
;
436 rxnet
->peer_keepalive_cursor
= cursor
;
437 rxrpc_peer_keepalive_dispatch(rxnet
, &collector
, base
, cursor
);
438 ASSERT(list_empty(&collector
));
440 /* Schedule the timer for the next occupied timeslot. */
441 cursor
= rxnet
->peer_keepalive_cursor
;
442 stop
= cursor
+ RXRPC_KEEPALIVE_TIME
- 1;
443 for (; (s8
)(cursor
- stop
) < 0; cursor
++) {
444 if (!list_empty(&rxnet
->peer_keepalive
[cursor
& mask
]))
449 now
= ktime_get_seconds();
455 timer_reduce(&rxnet
->peer_keepalive_timer
, jiffies
+ delay
);