1 /* Peer event handling, typically ICMP messages.
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 #include <linux/module.h>
13 #include <linux/net.h>
14 #include <linux/skbuff.h>
15 #include <linux/errqueue.h>
16 #include <linux/udp.h>
18 #include <linux/in6.h>
19 #include <linux/icmp.h>
21 #include <net/af_rxrpc.h>
23 #include "ar-internal.h"
25 static void rxrpc_store_error(struct rxrpc_peer
*, struct sock_exterr_skb
*);
26 static void rxrpc_distribute_error(struct rxrpc_peer
*, int,
27 enum rxrpc_call_completion
);
30 * Find the peer associated with an ICMP packet.
32 static struct rxrpc_peer
*rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local
*local
,
33 const struct sk_buff
*skb
,
34 struct sockaddr_rxrpc
*srx
)
36 struct sock_exterr_skb
*serr
= SKB_EXT_ERR(skb
);
40 memset(srx
, 0, sizeof(*srx
));
41 srx
->transport_type
= local
->srx
.transport_type
;
42 srx
->transport_len
= local
->srx
.transport_len
;
43 srx
->transport
.family
= local
->srx
.transport
.family
;
45 /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
48 switch (srx
->transport
.family
) {
50 srx
->transport_len
= sizeof(srx
->transport
.sin
);
51 srx
->transport
.family
= AF_INET
;
52 srx
->transport
.sin
.sin_port
= serr
->port
;
53 switch (serr
->ee
.ee_origin
) {
54 case SO_EE_ORIGIN_ICMP
:
56 memcpy(&srx
->transport
.sin
.sin_addr
,
57 skb_network_header(skb
) + serr
->addr_offset
,
58 sizeof(struct in_addr
));
60 case SO_EE_ORIGIN_ICMP6
:
61 _net("Rx ICMP6 on v4 sock");
62 memcpy(&srx
->transport
.sin
.sin_addr
,
63 skb_network_header(skb
) + serr
->addr_offset
+ 12,
64 sizeof(struct in_addr
));
67 memcpy(&srx
->transport
.sin
.sin_addr
, &ip_hdr(skb
)->saddr
,
68 sizeof(struct in_addr
));
73 #ifdef CONFIG_AF_RXRPC_IPV6
75 switch (serr
->ee
.ee_origin
) {
76 case SO_EE_ORIGIN_ICMP6
:
78 srx
->transport
.sin6
.sin6_port
= serr
->port
;
79 memcpy(&srx
->transport
.sin6
.sin6_addr
,
80 skb_network_header(skb
) + serr
->addr_offset
,
81 sizeof(struct in6_addr
));
83 case SO_EE_ORIGIN_ICMP
:
84 _net("Rx ICMP on v6 sock");
85 srx
->transport_len
= sizeof(srx
->transport
.sin
);
86 srx
->transport
.family
= AF_INET
;
87 srx
->transport
.sin
.sin_port
= serr
->port
;
88 memcpy(&srx
->transport
.sin
.sin_addr
,
89 skb_network_header(skb
) + serr
->addr_offset
,
90 sizeof(struct in_addr
));
93 memcpy(&srx
->transport
.sin6
.sin6_addr
,
94 &ipv6_hdr(skb
)->saddr
,
95 sizeof(struct in6_addr
));
105 return rxrpc_lookup_peer_rcu(local
, srx
);
109 * Handle an MTU/fragmentation problem.
111 static void rxrpc_adjust_mtu(struct rxrpc_peer
*peer
, struct sock_exterr_skb
*serr
)
113 u32 mtu
= serr
->ee
.ee_info
;
115 _net("Rx ICMP Fragmentation Needed (%d)", mtu
);
117 /* wind down the local interface MTU */
118 if (mtu
> 0 && peer
->if_mtu
== 65535 && mtu
< peer
->if_mtu
) {
120 _net("I/F MTU %u", mtu
);
124 /* they didn't give us a size, estimate one */
132 if (mtu
< peer
->hdrsize
)
133 mtu
= peer
->hdrsize
+ 4;
137 if (mtu
< peer
->mtu
) {
138 spin_lock_bh(&peer
->lock
);
140 peer
->maxdata
= peer
->mtu
- peer
->hdrsize
;
141 spin_unlock_bh(&peer
->lock
);
142 _net("Net MTU %u (maxdata %u)",
143 peer
->mtu
, peer
->maxdata
);
148 * Handle an error received on the local endpoint.
150 void rxrpc_error_report(struct sock
*sk
)
152 struct sock_exterr_skb
*serr
;
153 struct sockaddr_rxrpc srx
;
154 struct rxrpc_local
*local
= sk
->sk_user_data
;
155 struct rxrpc_peer
*peer
;
158 _enter("%p{%d}", sk
, local
->debug_id
);
160 skb
= sock_dequeue_err_skb(sk
);
162 _leave("UDP socket errqueue empty");
165 rxrpc_new_skb(skb
, rxrpc_skb_rx_received
);
166 serr
= SKB_EXT_ERR(skb
);
167 if (!skb
->len
&& serr
->ee
.ee_origin
== SO_EE_ORIGIN_TIMESTAMPING
) {
168 _leave("UDP empty message");
169 rxrpc_free_skb(skb
, rxrpc_skb_rx_freed
);
174 peer
= rxrpc_lookup_peer_icmp_rcu(local
, skb
, &srx
);
175 if (peer
&& !rxrpc_get_peer_maybe(peer
))
179 rxrpc_free_skb(skb
, rxrpc_skb_rx_freed
);
180 _leave(" [no peer]");
184 trace_rxrpc_rx_icmp(peer
, &serr
->ee
, &srx
);
186 if ((serr
->ee
.ee_origin
== SO_EE_ORIGIN_ICMP
&&
187 serr
->ee
.ee_type
== ICMP_DEST_UNREACH
&&
188 serr
->ee
.ee_code
== ICMP_FRAG_NEEDED
)) {
189 rxrpc_adjust_mtu(peer
, serr
);
191 rxrpc_free_skb(skb
, rxrpc_skb_rx_freed
);
192 rxrpc_put_peer(peer
);
193 _leave(" [MTU update]");
197 rxrpc_store_error(peer
, serr
);
199 rxrpc_free_skb(skb
, rxrpc_skb_rx_freed
);
200 rxrpc_put_peer(peer
);
206 * Map an error report to error codes on the peer record.
208 static void rxrpc_store_error(struct rxrpc_peer
*peer
,
209 struct sock_exterr_skb
*serr
)
211 enum rxrpc_call_completion
compl = RXRPC_CALL_NETWORK_ERROR
;
212 struct sock_extended_err
*ee
;
221 switch (ee
->ee_origin
) {
222 case SO_EE_ORIGIN_ICMP
:
223 switch (ee
->ee_type
) {
224 case ICMP_DEST_UNREACH
:
225 switch (ee
->ee_code
) {
226 case ICMP_NET_UNREACH
:
227 _net("Rx Received ICMP Network Unreachable");
229 case ICMP_HOST_UNREACH
:
230 _net("Rx Received ICMP Host Unreachable");
232 case ICMP_PORT_UNREACH
:
233 _net("Rx Received ICMP Port Unreachable");
235 case ICMP_NET_UNKNOWN
:
236 _net("Rx Received ICMP Unknown Network");
238 case ICMP_HOST_UNKNOWN
:
239 _net("Rx Received ICMP Unknown Host");
242 _net("Rx Received ICMP DestUnreach code=%u",
248 case ICMP_TIME_EXCEEDED
:
249 _net("Rx Received ICMP TTL Exceeded");
253 _proto("Rx Received ICMP error { type=%u code=%u }",
254 ee
->ee_type
, ee
->ee_code
);
259 case SO_EE_ORIGIN_NONE
:
260 case SO_EE_ORIGIN_LOCAL
:
261 _proto("Rx Received local error { error=%d }", err
);
262 compl = RXRPC_CALL_LOCAL_ERROR
;
265 case SO_EE_ORIGIN_ICMP6
:
267 _proto("Rx Received error report { orig=%u }", ee
->ee_origin
);
271 rxrpc_distribute_error(peer
, err
, compl);
275 * Distribute an error that occurred on a peer.
277 static void rxrpc_distribute_error(struct rxrpc_peer
*peer
, int error
,
278 enum rxrpc_call_completion
compl)
280 struct rxrpc_call
*call
;
282 hlist_for_each_entry_rcu(call
, &peer
->error_targets
, error_link
) {
283 rxrpc_see_call(call
);
284 if (call
->state
< RXRPC_CALL_COMPLETE
&&
285 rxrpc_set_call_completion(call
, compl, 0, -error
))
286 rxrpc_notify_socket(call
);
291 * Add RTT information to cache. This is called in softirq mode and has
292 * exclusive access to the peer RTT data.
294 void rxrpc_peer_add_rtt(struct rxrpc_call
*call
, enum rxrpc_rtt_rx_trace why
,
295 rxrpc_serial_t send_serial
, rxrpc_serial_t resp_serial
,
296 ktime_t send_time
, ktime_t resp_time
)
298 struct rxrpc_peer
*peer
= call
->peer
;
300 u64 sum
= peer
->rtt_sum
, avg
;
301 u8 cursor
= peer
->rtt_cursor
, usage
= peer
->rtt_usage
;
303 rtt
= ktime_to_ns(ktime_sub(resp_time
, send_time
));
307 spin_lock(&peer
->rtt_input_lock
);
309 /* Replace the oldest datum in the RTT buffer */
310 sum
-= peer
->rtt_cache
[cursor
];
312 peer
->rtt_cache
[cursor
] = rtt
;
313 peer
->rtt_cursor
= (cursor
+ 1) & (RXRPC_RTT_CACHE_SIZE
- 1);
315 if (usage
< RXRPC_RTT_CACHE_SIZE
) {
317 peer
->rtt_usage
= usage
;
320 spin_unlock(&peer
->rtt_input_lock
);
322 /* Now recalculate the average */
323 if (usage
== RXRPC_RTT_CACHE_SIZE
) {
324 avg
= sum
/ RXRPC_RTT_CACHE_SIZE
;
330 /* Don't need to update this under lock */
332 trace_rxrpc_rtt_rx(call
, why
, send_serial
, resp_serial
, rtt
,
337 * Perform keep-alive pings.
339 static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net
*rxnet
,
340 struct list_head
*collector
,
344 struct rxrpc_peer
*peer
;
345 const u8 mask
= ARRAY_SIZE(rxnet
->peer_keepalive
) - 1;
346 time64_t keepalive_at
;
349 spin_lock_bh(&rxnet
->peer_hash_lock
);
351 while (!list_empty(collector
)) {
352 peer
= list_entry(collector
->next
,
353 struct rxrpc_peer
, keepalive_link
);
355 list_del_init(&peer
->keepalive_link
);
356 if (!rxrpc_get_peer_maybe(peer
))
359 spin_unlock_bh(&rxnet
->peer_hash_lock
);
361 keepalive_at
= peer
->last_tx_at
+ RXRPC_KEEPALIVE_TIME
;
362 slot
= keepalive_at
- base
;
363 _debug("%02x peer %u t=%d {%pISp}",
364 cursor
, peer
->debug_id
, slot
, &peer
->srx
.transport
);
366 if (keepalive_at
<= base
||
367 keepalive_at
> base
+ RXRPC_KEEPALIVE_TIME
) {
368 rxrpc_send_keepalive(peer
);
369 slot
= RXRPC_KEEPALIVE_TIME
;
372 /* A transmission to this peer occurred since last we examined
373 * it so put it into the appropriate future bucket.
377 spin_lock_bh(&rxnet
->peer_hash_lock
);
378 list_add_tail(&peer
->keepalive_link
,
379 &rxnet
->peer_keepalive
[slot
& mask
]);
380 rxrpc_put_peer(peer
);
383 spin_unlock_bh(&rxnet
->peer_hash_lock
);
387 * Perform keep-alive pings with VERSION packets to keep any NAT alive.
389 void rxrpc_peer_keepalive_worker(struct work_struct
*work
)
391 struct rxrpc_net
*rxnet
=
392 container_of(work
, struct rxrpc_net
, peer_keepalive_work
);
393 const u8 mask
= ARRAY_SIZE(rxnet
->peer_keepalive
) - 1;
394 time64_t base
, now
, delay
;
396 LIST_HEAD(collector
);
398 now
= ktime_get_seconds();
399 base
= rxnet
->peer_keepalive_base
;
400 cursor
= rxnet
->peer_keepalive_cursor
;
401 _enter("%lld,%u", base
- now
, cursor
);
406 /* Remove to a temporary list all the peers that are currently lodged
407 * in expired buckets plus all new peers.
409 * Everything in the bucket at the cursor is processed this
410 * second; the bucket at cursor + 1 goes at now + 1s and so
413 spin_lock_bh(&rxnet
->peer_hash_lock
);
414 list_splice_init(&rxnet
->peer_keepalive_new
, &collector
);
416 stop
= cursor
+ ARRAY_SIZE(rxnet
->peer_keepalive
);
417 while (base
<= now
&& (s8
)(cursor
- stop
) < 0) {
418 list_splice_tail_init(&rxnet
->peer_keepalive
[cursor
& mask
],
425 spin_unlock_bh(&rxnet
->peer_hash_lock
);
427 rxnet
->peer_keepalive_base
= base
;
428 rxnet
->peer_keepalive_cursor
= cursor
;
429 rxrpc_peer_keepalive_dispatch(rxnet
, &collector
, base
, cursor
);
430 ASSERT(list_empty(&collector
));
432 /* Schedule the timer for the next occupied timeslot. */
433 cursor
= rxnet
->peer_keepalive_cursor
;
434 stop
= cursor
+ RXRPC_KEEPALIVE_TIME
- 1;
435 for (; (s8
)(cursor
- stop
) < 0; cursor
++) {
436 if (!list_empty(&rxnet
->peer_keepalive
[cursor
& mask
]))
441 now
= ktime_get_seconds();
447 timer_reduce(&rxnet
->peer_keepalive_timer
, jiffies
+ delay
);