1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Handle vlserver selection and rotation.
4 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #include <linux/kernel.h>
9 #include <linux/sched.h>
10 #include <linux/sched/signal.h>
15 * Begin an operation on a volume location server.
17 bool afs_begin_vlserver_operation(struct afs_vl_cursor
*vc
, struct afs_cell
*cell
,
20 static atomic_t debug_ids
;
22 memset(vc
, 0, sizeof(*vc
));
25 vc
->cumul_error
.error
= -EDESTADDRREQ
;
26 vc
->nr_iterations
= -1;
28 if (signal_pending(current
)) {
29 vc
->cumul_error
.error
= -EINTR
;
30 vc
->flags
|= AFS_VL_CURSOR_STOP
;
34 vc
->debug_id
= atomic_inc_return(&debug_ids
);
39 * Begin iteration through a server list, starting with the last used server if
40 * possible, or the last recorded good server if not.
42 static bool afs_start_vl_iteration(struct afs_vl_cursor
*vc
)
44 struct afs_cell
*cell
= vc
->cell
;
45 unsigned int dns_lookup_count
;
47 if (cell
->dns_source
== DNS_RECORD_UNAVAILABLE
||
48 cell
->dns_expiry
<= ktime_get_real_seconds()) {
49 dns_lookup_count
= smp_load_acquire(&cell
->dns_lookup_count
);
50 set_bit(AFS_CELL_FL_DO_LOOKUP
, &cell
->flags
);
51 afs_queue_cell(cell
, afs_cell_trace_get_queue_dns
);
53 if (cell
->dns_source
== DNS_RECORD_UNAVAILABLE
) {
54 if (wait_var_event_interruptible(
55 &cell
->dns_lookup_count
,
56 smp_load_acquire(&cell
->dns_lookup_count
)
57 != dns_lookup_count
) < 0) {
58 vc
->cumul_error
.error
= -ERESTARTSYS
;
63 /* Status load is ordered after lookup counter load */
64 if (cell
->dns_status
== DNS_LOOKUP_GOT_NOT_FOUND
) {
65 pr_warn("No record of cell %s\n", cell
->name
);
66 vc
->cumul_error
.error
= -ENOENT
;
70 if (cell
->dns_source
== DNS_RECORD_UNAVAILABLE
) {
71 vc
->cumul_error
.error
= -EDESTADDRREQ
;
76 read_lock(&cell
->vl_servers_lock
);
77 vc
->server_list
= afs_get_vlserverlist(
78 rcu_dereference_protected(cell
->vl_servers
,
79 lockdep_is_held(&cell
->vl_servers_lock
)));
80 read_unlock(&cell
->vl_servers_lock
);
81 if (!vc
->server_list
->nr_servers
)
84 vc
->untried_servers
= (1UL << vc
->server_list
->nr_servers
) - 1;
85 vc
->server_index
= -1;
90 * Select the vlserver to use. May be called multiple times to rotate
91 * through the vlservers.
93 bool afs_select_vlserver(struct afs_vl_cursor
*vc
)
95 struct afs_addr_list
*alist
= vc
->alist
;
96 struct afs_vlserver
*vlserver
;
97 unsigned long set
, failed
;
99 s32 abort_code
= vc
->call_abort_code
;
100 int error
= vc
->call_error
, i
;
104 _enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d",
105 vc
->debug_id
, vc
->nr_iterations
, vc
->server_index
, vc
->untried_servers
,
106 vc
->addr_index
, vc
->addr_tried
,
109 if (vc
->flags
& AFS_VL_CURSOR_STOP
) {
110 _leave(" = f [stopped]");
114 if (vc
->nr_iterations
== 0)
117 WRITE_ONCE(alist
->addrs
[vc
->addr_index
].last_error
, error
);
119 /* Evaluate the result of the previous operation, if there was one. */
123 /* Success or local failure. Stop. */
124 vc
->cumul_error
.error
= error
;
125 vc
->flags
|= AFS_VL_CURSOR_STOP
;
126 _leave(" = f [okay/local %d]", vc
->cumul_error
.error
);
130 /* The far side rejected the operation on some grounds. This
131 * might involve the server being busy or the volume having been moved.
133 switch (abort_code
) {
135 case AFSVL_BADVOLOPER
:
137 /* The server went weird. */
138 afs_prioritise_error(&vc
->cumul_error
, -EREMOTEIO
, abort_code
);
139 //write_lock(&vc->cell->vl_servers_lock);
140 //vc->server_list->weird_mask |= 1 << vc->server_index;
141 //write_unlock(&vc->cell->vl_servers_lock);
145 afs_prioritise_error(&vc
->cumul_error
, error
, abort_code
);
157 _debug("no conn %d", error
);
158 afs_prioritise_error(&vc
->cumul_error
, error
, 0);
159 goto iterate_address
;
162 _debug("call reset");
163 afs_prioritise_error(&vc
->cumul_error
, error
, 0);
164 vc
->flags
|= AFS_VL_CURSOR_RETRY
;
172 restart_from_beginning
:
174 if (vc
->call_responded
&&
175 vc
->addr_index
!= vc
->alist
->preferred
&&
176 test_bit(alist
->preferred
, &vc
->addr_tried
))
177 WRITE_ONCE(alist
->preferred
, vc
->addr_index
);
178 afs_put_addrlist(alist
, afs_alist_trace_put_vlrotate_restart
);
179 alist
= vc
->alist
= NULL
;
181 afs_put_vlserverlist(vc
->cell
->net
, vc
->server_list
);
182 vc
->server_list
= NULL
;
183 if (vc
->flags
& AFS_VL_CURSOR_RETRIED
)
185 vc
->flags
|= AFS_VL_CURSOR_RETRIED
;
188 ASSERTCMP(alist
, ==, NULL
);
190 if (!afs_start_vl_iteration(vc
))
193 error
= afs_send_vl_probes(vc
->cell
->net
, vc
->key
, vc
->server_list
);
195 afs_prioritise_error(&vc
->cumul_error
, error
, 0);
200 _debug("pick [%lx]", vc
->untried_servers
);
201 ASSERTCMP(alist
, ==, NULL
);
203 error
= afs_wait_for_vl_probes(vc
->server_list
, vc
->untried_servers
);
205 afs_prioritise_error(&vc
->cumul_error
, error
, 0);
209 /* Pick the untried server with the lowest RTT. */
210 vc
->server_index
= vc
->server_list
->preferred
;
211 if (test_bit(vc
->server_index
, &vc
->untried_servers
))
212 goto selected_server
;
214 vc
->server_index
= -1;
216 for (i
= 0; i
< vc
->server_list
->nr_servers
; i
++) {
217 struct afs_vlserver
*s
= vc
->server_list
->servers
[i
].server
;
219 if (!test_bit(i
, &vc
->untried_servers
) ||
220 !test_bit(AFS_VLSERVER_FL_RESPONDING
, &s
->flags
))
222 if (s
->probe
.rtt
<= rtt
) {
223 vc
->server_index
= i
;
228 if (vc
->server_index
== -1)
229 goto no_more_servers
;
232 _debug("use %d", vc
->server_index
);
233 __clear_bit(vc
->server_index
, &vc
->untried_servers
);
235 /* We're starting on a different vlserver from the list. We need to
236 * check it, find its address list and probe its capabilities before we
239 vlserver
= vc
->server_list
->servers
[vc
->server_index
].server
;
240 vc
->server
= vlserver
;
242 _debug("USING VLSERVER: %s", vlserver
->name
);
244 read_lock(&vlserver
->lock
);
245 alist
= rcu_dereference_protected(vlserver
->addresses
,
246 lockdep_is_held(&vlserver
->lock
));
247 vc
->alist
= afs_get_addrlist(alist
, afs_alist_trace_get_vlrotate_set
);
248 read_unlock(&vlserver
->lock
);
254 /* Iterate over the current server's address list to try and find an
255 * address on which it will respond to us.
257 set
= READ_ONCE(alist
->responded
);
258 failed
= READ_ONCE(alist
->probe_failed
);
259 vc
->addr_index
= READ_ONCE(alist
->preferred
);
261 _debug("%lx-%lx-%lx,%d", set
, failed
, vc
->addr_tried
, vc
->addr_index
);
263 set
&= ~(failed
| vc
->addr_tried
);
268 if (!test_bit(vc
->addr_index
, &set
))
269 vc
->addr_index
= __ffs(set
);
271 set_bit(vc
->addr_index
, &vc
->addr_tried
);
274 _debug("VL address %d/%d", vc
->addr_index
, alist
->nr_addrs
);
276 vc
->call_responded
= false;
277 _leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist
->addrs
[vc
->addr_index
].peer
));
283 if (vc
->call_responded
&&
284 vc
->addr_index
!= alist
->preferred
&&
285 test_bit(alist
->preferred
, &vc
->addr_tried
))
286 WRITE_ONCE(alist
->preferred
, vc
->addr_index
);
287 afs_put_addrlist(alist
, afs_alist_trace_put_vlrotate_next
);
288 alist
= vc
->alist
= NULL
;
292 /* That's all the servers poked to no good effect. Try again if some
295 if (vc
->flags
& AFS_VL_CURSOR_RETRY
)
296 goto restart_from_beginning
;
298 for (i
= 0; i
< vc
->server_list
->nr_servers
; i
++) {
299 struct afs_vlserver
*s
= vc
->server_list
->servers
[i
].server
;
301 if (test_bit(AFS_VLSERVER_FL_RESPONDING
, &s
->flags
))
302 vc
->cumul_error
.responded
= true;
303 afs_prioritise_error(&vc
->cumul_error
, READ_ONCE(s
->probe
.error
),
304 s
->probe
.abort_code
);
309 if (vc
->call_responded
&&
310 vc
->addr_index
!= alist
->preferred
&&
311 test_bit(alist
->preferred
, &vc
->addr_tried
))
312 WRITE_ONCE(alist
->preferred
, vc
->addr_index
);
313 afs_put_addrlist(alist
, afs_alist_trace_put_vlrotate_fail
);
314 alist
= vc
->alist
= NULL
;
316 vc
->flags
|= AFS_VL_CURSOR_STOP
;
317 _leave(" = f [failed %d]", vc
->cumul_error
.error
);
322 * Dump cursor state in the case of the error being EDESTADDRREQ.
324 static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor
*vc
)
326 struct afs_cell
*cell
= vc
->cell
;
330 if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR
) || count
> 3)
335 pr_notice("EDESTADDR occurred\n");
336 pr_notice("CELL: %s err=%d\n", cell
->name
, cell
->error
);
337 pr_notice("DNS: src=%u st=%u lc=%x\n",
338 cell
->dns_source
, cell
->dns_status
, cell
->dns_lookup_count
);
339 pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
340 vc
->untried_servers
, vc
->server_index
, vc
->nr_iterations
,
341 vc
->flags
, vc
->cumul_error
.error
);
342 pr_notice("VC: call er=%d ac=%d r=%u\n",
343 vc
->call_error
, vc
->call_abort_code
, vc
->call_responded
);
345 if (vc
->server_list
) {
346 const struct afs_vlserver_list
*sl
= vc
->server_list
;
347 pr_notice("VC: SL nr=%u ix=%u\n",
348 sl
->nr_servers
, sl
->index
);
349 for (i
= 0; i
< sl
->nr_servers
; i
++) {
350 const struct afs_vlserver
*s
= sl
->servers
[i
].server
;
351 pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
352 s
->name
, s
->port
, s
->flags
, s
->probe
.error
);
354 const struct afs_addr_list
*a
=
355 rcu_dereference(s
->addresses
);
356 pr_notice("VC: - nr=%u/%u/%u pf=%u\n",
357 a
->nr_ipv4
, a
->nr_addrs
, a
->max_addrs
,
359 pr_notice("VC: - R=%lx F=%lx\n",
360 a
->responded
, a
->probe_failed
);
362 pr_notice("VC: - current\n");
367 pr_notice("AC: t=%lx ax=%u\n", vc
->addr_tried
, vc
->addr_index
);
372 * Tidy up a volume location server cursor and unlock the vnode.
374 int afs_end_vlserver_operation(struct afs_vl_cursor
*vc
)
376 struct afs_net
*net
= vc
->cell
->net
;
378 _enter("VC=%x+%x", vc
->debug_id
, vc
->nr_iterations
);
380 switch (vc
->cumul_error
.error
) {
385 afs_vl_dump_edestaddrreq(vc
);
390 if (vc
->call_responded
&&
391 vc
->addr_index
!= vc
->alist
->preferred
&&
392 test_bit(vc
->alist
->preferred
, &vc
->addr_tried
))
393 WRITE_ONCE(vc
->alist
->preferred
, vc
->addr_index
);
394 afs_put_addrlist(vc
->alist
, afs_alist_trace_put_vlrotate_end
);
397 afs_put_vlserverlist(net
, vc
->server_list
);
398 return vc
->cumul_error
.error
;