1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS fileserver probing
4 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #include <linux/sched.h>
9 #include <linux/slab.h>
12 #include "protocol_afs.h"
13 #include "protocol_yfs.h"
15 static unsigned int afs_fs_probe_fast_poll_interval
= 30 * HZ
;
16 static unsigned int afs_fs_probe_slow_poll_interval
= 5 * 60 * HZ
;
18 struct afs_endpoint_state
*afs_get_endpoint_state(struct afs_endpoint_state
*estate
,
19 enum afs_estate_trace where
)
24 __refcount_inc(&estate
->ref
, &r
);
25 trace_afs_estate(estate
->server_id
, estate
->probe_seq
, r
, where
);
30 static void afs_endpoint_state_rcu(struct rcu_head
*rcu
)
32 struct afs_endpoint_state
*estate
= container_of(rcu
, struct afs_endpoint_state
, rcu
);
34 trace_afs_estate(estate
->server_id
, estate
->probe_seq
, refcount_read(&estate
->ref
),
35 afs_estate_trace_free
);
36 afs_put_addrlist(estate
->addresses
, afs_alist_trace_put_estate
);
40 void afs_put_endpoint_state(struct afs_endpoint_state
*estate
, enum afs_estate_trace where
)
43 unsigned int server_id
= estate
->server_id
, probe_seq
= estate
->probe_seq
;
47 dead
= __refcount_dec_and_test(&estate
->ref
, &r
);
48 trace_afs_estate(server_id
, probe_seq
, r
, where
);
50 call_rcu(&estate
->rcu
, afs_endpoint_state_rcu
);
55 * Start the probe polling timer. We have to supply it with an inc on the
56 * outstanding server count.
58 static void afs_schedule_fs_probe(struct afs_net
*net
,
59 struct afs_server
*server
, bool fast
)
66 atj
= server
->probed_at
;
67 atj
+= fast
? afs_fs_probe_fast_poll_interval
: afs_fs_probe_slow_poll_interval
;
69 afs_inc_servers_outstanding(net
);
70 if (timer_reduce(&net
->fs_probe_timer
, atj
))
71 afs_dec_servers_outstanding(net
);
75 * Handle the completion of a set of probes.
77 static void afs_finished_fs_probe(struct afs_net
*net
, struct afs_server
*server
,
78 struct afs_endpoint_state
*estate
)
80 bool responded
= test_bit(AFS_ESTATE_RESPONDED
, &estate
->flags
);
82 write_seqlock(&net
->fs_lock
);
84 list_add_tail(&server
->probe_link
, &net
->fs_probe_slow
);
86 server
->rtt
= UINT_MAX
;
87 clear_bit(AFS_SERVER_FL_RESPONDING
, &server
->flags
);
88 list_add_tail(&server
->probe_link
, &net
->fs_probe_fast
);
91 write_sequnlock(&net
->fs_lock
);
93 afs_schedule_fs_probe(net
, server
, !responded
);
97 * Handle the completion of a probe.
99 static void afs_done_one_fs_probe(struct afs_net
*net
, struct afs_server
*server
,
100 struct afs_endpoint_state
*estate
)
104 if (atomic_dec_and_test(&estate
->nr_probing
))
105 afs_finished_fs_probe(net
, server
, estate
);
107 wake_up_all(&server
->probe_wq
);
111 * Handle inability to send a probe due to ENOMEM when trying to allocate a
114 static void afs_fs_probe_not_done(struct afs_net
*net
,
115 struct afs_server
*server
,
116 struct afs_endpoint_state
*estate
,
121 trace_afs_io_error(0, -ENOMEM
, afs_io_error_fs_probe_fail
);
122 spin_lock(&server
->probe_lock
);
124 set_bit(AFS_ESTATE_LOCAL_FAILURE
, &estate
->flags
);
125 if (estate
->error
== 0)
126 estate
->error
= -ENOMEM
;
128 set_bit(index
, &estate
->failed_set
);
130 spin_unlock(&server
->probe_lock
);
131 return afs_done_one_fs_probe(net
, server
, estate
);
135 * Process the result of probing a fileserver. This is called after successful
136 * or failed delivery of an FS.GetCapabilities operation.
138 void afs_fileserver_probe_result(struct afs_call
*call
)
140 struct afs_endpoint_state
*estate
= call
->probe
;
141 struct afs_addr_list
*alist
= estate
->addresses
;
142 struct afs_address
*addr
= &alist
->addrs
[call
->probe_index
];
143 struct afs_server
*server
= call
->server
;
144 unsigned int index
= call
->probe_index
;
145 unsigned int rtt_us
= -1, cap0
;
146 int ret
= call
->error
;
148 _enter("%pU,%u", &server
->uuid
, index
);
150 WRITE_ONCE(addr
->last_error
, ret
);
152 spin_lock(&server
->probe_lock
);
159 if (!test_bit(AFS_ESTATE_RESPONDED
, &estate
->flags
)) {
160 estate
->abort_code
= call
->abort_code
;
166 clear_bit(index
, &estate
->responsive_set
);
167 set_bit(AFS_ESTATE_LOCAL_FAILURE
, &estate
->flags
);
168 trace_afs_io_error(call
->debug_id
, ret
, afs_io_error_fs_probe_fail
);
170 case -ECONNRESET
: /* Responded, but call expired. */
180 clear_bit(index
, &estate
->responsive_set
);
181 set_bit(index
, &estate
->failed_set
);
182 if (!test_bit(AFS_ESTATE_RESPONDED
, &estate
->flags
) &&
183 (estate
->error
== 0 ||
184 estate
->error
== -ETIMEDOUT
||
185 estate
->error
== -ETIME
))
187 trace_afs_io_error(call
->debug_id
, ret
, afs_io_error_fs_probe_fail
);
192 clear_bit(index
, &estate
->failed_set
);
194 if (call
->service_id
== YFS_FS_SERVICE
) {
195 set_bit(AFS_ESTATE_IS_YFS
, &estate
->flags
);
196 set_bit(AFS_SERVER_FL_IS_YFS
, &server
->flags
);
197 server
->service_id
= call
->service_id
;
199 set_bit(AFS_ESTATE_NOT_YFS
, &estate
->flags
);
200 if (!test_bit(AFS_ESTATE_IS_YFS
, &estate
->flags
)) {
201 clear_bit(AFS_SERVER_FL_IS_YFS
, &server
->flags
);
202 server
->service_id
= call
->service_id
;
204 cap0
= ntohl(call
->tmp
);
205 if (cap0
& AFS3_VICED_CAPABILITY_64BITFILES
)
206 set_bit(AFS_SERVER_FL_HAS_FS64
, &server
->flags
);
208 clear_bit(AFS_SERVER_FL_HAS_FS64
, &server
->flags
);
211 rtt_us
= rxrpc_kernel_get_srtt(addr
->peer
);
212 if (rtt_us
< estate
->rtt
) {
213 estate
->rtt
= rtt_us
;
214 server
->rtt
= rtt_us
;
215 alist
->preferred
= index
;
218 smp_wmb(); /* Set rtt before responded. */
219 set_bit(AFS_ESTATE_RESPONDED
, &estate
->flags
);
220 set_bit(index
, &estate
->responsive_set
);
221 set_bit(AFS_SERVER_FL_RESPONDING
, &server
->flags
);
223 spin_unlock(&server
->probe_lock
);
225 trace_afs_fs_probe(server
, false, estate
, index
, call
->error
, call
->abort_code
, rtt_us
);
226 _debug("probe[%x] %pU [%u] %pISpc rtt=%d ret=%d",
227 estate
->probe_seq
, &server
->uuid
, index
,
228 rxrpc_kernel_remote_addr(alist
->addrs
[index
].peer
),
231 return afs_done_one_fs_probe(call
->net
, server
, estate
);
235 * Probe all of a fileserver's addresses to find out the best route and to
236 * query its capabilities.
238 void afs_fs_probe_fileserver(struct afs_net
*net
, struct afs_server
*server
,
239 struct afs_addr_list
*new_alist
, struct key
*key
)
241 struct afs_endpoint_state
*estate
, *old
;
242 struct afs_addr_list
*alist
;
243 unsigned long unprobed
;
245 _enter("%pU", &server
->uuid
);
247 estate
= kzalloc(sizeof(*estate
), GFP_KERNEL
);
251 refcount_set(&estate
->ref
, 1);
252 estate
->server_id
= server
->debug_id
;
253 estate
->rtt
= UINT_MAX
;
255 write_lock(&server
->fs_lock
);
257 old
= rcu_dereference_protected(server
->endpoint_state
,
258 lockdep_is_held(&server
->fs_lock
));
259 estate
->responsive_set
= old
->responsive_set
;
260 estate
->addresses
= afs_get_addrlist(new_alist
?: old
->addresses
,
261 afs_alist_trace_get_estate
);
262 alist
= estate
->addresses
;
263 estate
->probe_seq
= ++server
->probe_counter
;
264 atomic_set(&estate
->nr_probing
, alist
->nr_addrs
);
266 rcu_assign_pointer(server
->endpoint_state
, estate
);
267 set_bit(AFS_ESTATE_SUPERSEDED
, &old
->flags
);
268 write_unlock(&server
->fs_lock
);
270 trace_afs_estate(estate
->server_id
, estate
->probe_seq
, refcount_read(&estate
->ref
),
271 afs_estate_trace_alloc_probe
);
273 afs_get_address_preferences(net
, alist
);
275 server
->probed_at
= jiffies
;
276 unprobed
= (1UL << alist
->nr_addrs
) - 1;
278 unsigned int index
= 0, i
;
281 for (i
= 0; i
< alist
->nr_addrs
; i
++) {
282 if (test_bit(i
, &unprobed
) &&
283 alist
->addrs
[i
].prio
> best_prio
) {
285 best_prio
= alist
->addrs
[i
].prio
;
288 __clear_bit(index
, &unprobed
);
290 trace_afs_fs_probe(server
, true, estate
, index
, 0, 0, 0);
291 if (!afs_fs_get_capabilities(net
, server
, estate
, index
, key
))
292 afs_fs_probe_not_done(net
, server
, estate
, index
);
295 afs_put_endpoint_state(old
, afs_estate_trace_put_probe
);
299 * Wait for the first as-yet untried fileserver to respond, for the probe state
300 * to be superseded or for all probes to finish.
302 int afs_wait_for_fs_probes(struct afs_operation
*op
, struct afs_server_state
*states
, bool intr
)
304 struct afs_endpoint_state
*estate
;
305 struct afs_server_list
*slist
= op
->server_list
;
306 bool still_probing
= true;
309 _enter("%u", slist
->nr_servers
);
311 for (i
= 0; i
< slist
->nr_servers
; i
++) {
312 estate
= states
[i
].endpoint_state
;
313 if (test_bit(AFS_ESTATE_SUPERSEDED
, &estate
->flags
))
315 if (atomic_read(&estate
->nr_probing
))
316 still_probing
= true;
317 if (estate
->responsive_set
& states
[i
].untried_addrs
)
323 for (i
= 0; i
< slist
->nr_servers
; i
++)
324 add_wait_queue(&slist
->servers
[i
].server
->probe_wq
, &states
[i
].probe_waiter
);
327 still_probing
= false;
329 set_current_state(intr
? TASK_INTERRUPTIBLE
: TASK_UNINTERRUPTIBLE
);
330 for (i
= 0; i
< slist
->nr_servers
; i
++) {
331 estate
= states
[i
].endpoint_state
;
332 if (test_bit(AFS_ESTATE_SUPERSEDED
, &estate
->flags
)) {
336 if (atomic_read(&estate
->nr_probing
))
337 still_probing
= true;
338 if (estate
->responsive_set
& states
[i
].untried_addrs
) {
344 if (!still_probing
|| signal_pending(current
))
350 set_current_state(TASK_RUNNING
);
352 for (i
= 0; i
< slist
->nr_servers
; i
++)
353 remove_wait_queue(&slist
->servers
[i
].server
->probe_wq
, &states
[i
].probe_waiter
);
355 if (!ret
&& signal_pending(current
))
361 * Probe timer. We have an increment on fs_outstanding that we need to pass
362 * along to the work item.
364 void afs_fs_probe_timer(struct timer_list
*timer
)
366 struct afs_net
*net
= container_of(timer
, struct afs_net
, fs_probe_timer
);
368 if (!net
->live
|| !queue_work(afs_wq
, &net
->fs_prober
))
369 afs_dec_servers_outstanding(net
);
373 * Dispatch a probe to a server.
375 static void afs_dispatch_fs_probe(struct afs_net
*net
, struct afs_server
*server
)
376 __releases(&net
->fs_lock
)
378 struct key
*key
= NULL
;
380 /* We remove it from the queues here - it will be added back to
381 * one of the queues on the completion of the probe.
383 list_del_init(&server
->probe_link
);
385 afs_get_server(server
, afs_server_trace_get_probe
);
386 write_sequnlock(&net
->fs_lock
);
388 afs_fs_probe_fileserver(net
, server
, NULL
, key
);
389 afs_put_server(net
, server
, afs_server_trace_put_probe
);
393 * Probe a server immediately without waiting for its due time to come
394 * round. This is used when all of the addresses have been tried.
396 void afs_probe_fileserver(struct afs_net
*net
, struct afs_server
*server
)
398 write_seqlock(&net
->fs_lock
);
399 if (!list_empty(&server
->probe_link
))
400 return afs_dispatch_fs_probe(net
, server
);
401 write_sequnlock(&net
->fs_lock
);
405 * Probe dispatcher to regularly dispatch probes to keep NAT alive.
407 void afs_fs_probe_dispatcher(struct work_struct
*work
)
409 struct afs_net
*net
= container_of(work
, struct afs_net
, fs_prober
);
410 struct afs_server
*fast
, *slow
, *server
;
411 unsigned long nowj
, timer_at
, poll_at
;
412 bool first_pass
= true, set_timer
= false;
415 afs_dec_servers_outstanding(net
);
421 if (list_empty(&net
->fs_probe_fast
) && list_empty(&net
->fs_probe_slow
)) {
422 afs_dec_servers_outstanding(net
);
428 write_seqlock(&net
->fs_lock
);
430 fast
= slow
= server
= NULL
;
432 timer_at
= nowj
+ MAX_JIFFY_OFFSET
;
434 if (!list_empty(&net
->fs_probe_fast
)) {
435 fast
= list_first_entry(&net
->fs_probe_fast
, struct afs_server
, probe_link
);
436 poll_at
= fast
->probed_at
+ afs_fs_probe_fast_poll_interval
;
437 if (time_before(nowj
, poll_at
)) {
444 if (!list_empty(&net
->fs_probe_slow
)) {
445 slow
= list_first_entry(&net
->fs_probe_slow
, struct afs_server
, probe_link
);
446 poll_at
= slow
->probed_at
+ afs_fs_probe_slow_poll_interval
;
447 if (time_before(nowj
, poll_at
)) {
448 if (time_before(poll_at
, timer_at
))
455 server
= fast
?: slow
;
457 _debug("probe %pU", &server
->uuid
);
459 if (server
&& (first_pass
|| !need_resched())) {
460 afs_dispatch_fs_probe(net
, server
);
465 write_sequnlock(&net
->fs_lock
);
468 if (!queue_work(afs_wq
, &net
->fs_prober
))
469 afs_dec_servers_outstanding(net
);
470 _leave(" [requeue]");
471 } else if (set_timer
) {
472 if (timer_reduce(&net
->fs_probe_timer
, timer_at
))
473 afs_dec_servers_outstanding(net
);
476 afs_dec_servers_outstanding(net
);
477 _leave(" [quiesce]");
482 * Wait for a probe on a particular fileserver to complete for 2s.
484 int afs_wait_for_one_fs_probe(struct afs_server
*server
, struct afs_endpoint_state
*estate
,
485 unsigned long exclude
, bool is_intr
)
487 struct wait_queue_entry wait
;
488 unsigned long timo
= 2 * HZ
;
490 if (atomic_read(&estate
->nr_probing
) == 0)
493 init_wait_entry(&wait
, 0);
495 prepare_to_wait_event(&server
->probe_wq
, &wait
,
496 is_intr
? TASK_INTERRUPTIBLE
: TASK_UNINTERRUPTIBLE
);
498 test_bit(AFS_ESTATE_SUPERSEDED
, &estate
->flags
) ||
499 (estate
->responsive_set
& ~exclude
) ||
500 atomic_read(&estate
->nr_probing
) == 0 ||
501 (is_intr
&& signal_pending(current
)))
503 timo
= schedule_timeout(timo
);
506 finish_wait(&server
->probe_wq
, &wait
);
509 if (estate
->responsive_set
& ~exclude
)
511 if (test_bit(AFS_ESTATE_SUPERSEDED
, &estate
->flags
))
513 if (is_intr
&& signal_pending(current
))
517 return -EDESTADDRREQ
;
521 * Clean up the probing when the namespace is killed off.
523 void afs_fs_probe_cleanup(struct afs_net
*net
)
525 if (del_timer_sync(&net
->fs_probe_timer
))
526 afs_dec_servers_outstanding(net
);