1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS volume management
4 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #include <linux/kernel.h>
9 #include <linux/slab.h>
12 static unsigned __read_mostly afs_volume_record_life
= 60 * 60;
14 static void afs_destroy_volume(struct work_struct
*work
);
17 * Insert a volume into a cell. If there's an existing volume record, that is
18 * returned instead with a ref held.
20 static struct afs_volume
*afs_insert_volume_into_cell(struct afs_cell
*cell
,
21 struct afs_volume
*volume
)
24 struct rb_node
*parent
= NULL
, **pp
;
26 write_seqlock(&cell
->volume_lock
);
28 pp
= &cell
->volumes
.rb_node
;
31 p
= rb_entry(parent
, struct afs_volume
, cell_node
);
32 if (p
->vid
< volume
->vid
) {
34 } else if (p
->vid
> volume
->vid
) {
35 pp
= &(*pp
)->rb_right
;
37 if (afs_try_get_volume(p
, afs_volume_trace_get_cell_insert
)) {
42 set_bit(AFS_VOLUME_RM_TREE
, &volume
->flags
);
43 rb_replace_node_rcu(&p
->cell_node
, &volume
->cell_node
, &cell
->volumes
);
47 rb_link_node_rcu(&volume
->cell_node
, parent
, pp
);
48 rb_insert_color(&volume
->cell_node
, &cell
->volumes
);
49 hlist_add_head_rcu(&volume
->proc_link
, &cell
->proc_volumes
);
52 write_sequnlock(&cell
->volume_lock
);
57 static void afs_remove_volume_from_cell(struct afs_volume
*volume
)
59 struct afs_cell
*cell
= volume
->cell
;
61 if (!hlist_unhashed(&volume
->proc_link
)) {
62 trace_afs_volume(volume
->vid
, refcount_read(&cell
->ref
),
63 afs_volume_trace_remove
);
64 write_seqlock(&cell
->volume_lock
);
65 hlist_del_rcu(&volume
->proc_link
);
66 if (!test_and_set_bit(AFS_VOLUME_RM_TREE
, &volume
->flags
))
67 rb_erase(&volume
->cell_node
, &cell
->volumes
);
68 write_sequnlock(&cell
->volume_lock
);
73 * Allocate a volume record and load it up from a vldb record.
75 static struct afs_volume
*afs_alloc_volume(struct afs_fs_context
*params
,
76 struct afs_vldb_entry
*vldb
,
77 struct afs_server_list
**_slist
)
79 struct afs_server_list
*slist
;
80 struct afs_volume
*volume
;
83 volume
= kzalloc(sizeof(struct afs_volume
), GFP_KERNEL
);
87 volume
->vid
= vldb
->vid
[params
->type
];
88 volume
->update_at
= ktime_get_real_seconds() + afs_volume_record_life
;
89 volume
->cell
= afs_get_cell(params
->cell
, afs_cell_trace_get_vol
);
90 volume
->type
= params
->type
;
91 volume
->type_force
= params
->force
;
92 volume
->name_len
= vldb
->name_len
;
93 volume
->creation_time
= TIME64_MIN
;
94 volume
->update_time
= TIME64_MIN
;
96 refcount_set(&volume
->ref
, 1);
97 INIT_HLIST_NODE(&volume
->proc_link
);
98 INIT_WORK(&volume
->destructor
, afs_destroy_volume
);
99 rwlock_init(&volume
->servers_lock
);
100 mutex_init(&volume
->volsync_lock
);
101 mutex_init(&volume
->cb_check_lock
);
102 rwlock_init(&volume
->cb_v_break_lock
);
103 INIT_LIST_HEAD(&volume
->open_mmaps
);
104 init_rwsem(&volume
->open_mmaps_lock
);
105 memcpy(volume
->name
, vldb
->name
, vldb
->name_len
+ 1);
107 for (i
= 0; i
< AFS_MAXTYPES
; i
++)
108 volume
->vids
[i
] = vldb
->vid
[i
];
110 slist
= afs_alloc_server_list(volume
, params
->key
, vldb
);
112 ret
= PTR_ERR(slist
);
117 rcu_assign_pointer(volume
->servers
, slist
);
118 trace_afs_volume(volume
->vid
, 1, afs_volume_trace_alloc
);
122 afs_put_cell(volume
->cell
, afs_cell_trace_put_vol
);
129 * Look up or allocate a volume record.
131 static struct afs_volume
*afs_lookup_volume(struct afs_fs_context
*params
,
132 struct afs_vldb_entry
*vldb
)
134 struct afs_server_list
*slist
;
135 struct afs_volume
*candidate
, *volume
;
137 candidate
= afs_alloc_volume(params
, vldb
, &slist
);
138 if (IS_ERR(candidate
))
141 volume
= afs_insert_volume_into_cell(params
->cell
, candidate
);
142 if (volume
== candidate
)
143 afs_attach_volume_to_servers(volume
, slist
);
145 afs_put_volume(candidate
, afs_volume_trace_put_cell_dup
);
150 * Look up a VLDB record for a volume.
152 static struct afs_vldb_entry
*afs_vl_lookup_vldb(struct afs_cell
*cell
,
157 struct afs_vldb_entry
*vldb
= ERR_PTR(-EDESTADDRREQ
);
158 struct afs_vl_cursor vc
;
161 if (!afs_begin_vlserver_operation(&vc
, cell
, key
))
162 return ERR_PTR(-ERESTARTSYS
);
164 while (afs_select_vlserver(&vc
)) {
165 vldb
= afs_vl_get_entry_by_name_u(&vc
, volname
, volnamesz
);
168 ret
= afs_end_vlserver_operation(&vc
);
169 return ret
< 0 ? ERR_PTR(ret
) : vldb
;
173 * Look up a volume in the VL server and create a candidate volume record for
176 * The volume name can be one of the following:
177 * "%[cell:]volume[.]" R/W volume
178 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
179 * or R/W (rwparent=1) volume
180 * "%[cell:]volume.readonly" R/O volume
181 * "#[cell:]volume.readonly" R/O volume
182 * "%[cell:]volume.backup" Backup volume
183 * "#[cell:]volume.backup" Backup volume
185 * The cell name is optional, and defaults to the current cell.
187 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
189 * - Rule 1: Explicit type suffix forces access of that type or nothing
190 * (no suffix, then use Rule 2 & 3)
191 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
193 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
194 * explicitly told otherwise
196 struct afs_volume
*afs_create_volume(struct afs_fs_context
*params
)
198 struct afs_vldb_entry
*vldb
;
199 struct afs_volume
*volume
;
200 unsigned long type_mask
= 1UL << params
->type
;
202 vldb
= afs_vl_lookup_vldb(params
->cell
, params
->key
,
203 params
->volname
, params
->volnamesz
);
205 return ERR_CAST(vldb
);
207 if (test_bit(AFS_VLDB_QUERY_ERROR
, &vldb
->flags
)) {
208 volume
= ERR_PTR(vldb
->error
);
212 /* Make the final decision on the type we want */
213 volume
= ERR_PTR(-ENOMEDIUM
);
215 if (!(vldb
->flags
& type_mask
))
217 } else if (test_bit(AFS_VLDB_HAS_RO
, &vldb
->flags
)) {
218 params
->type
= AFSVL_ROVOL
;
219 } else if (test_bit(AFS_VLDB_HAS_RW
, &vldb
->flags
)) {
220 params
->type
= AFSVL_RWVOL
;
225 volume
= afs_lookup_volume(params
, vldb
);
233 * Destroy a volume record
235 static void afs_destroy_volume(struct work_struct
*work
)
237 struct afs_volume
*volume
= container_of(work
, struct afs_volume
, destructor
);
238 struct afs_server_list
*slist
= rcu_access_pointer(volume
->servers
);
240 _enter("%p", volume
);
242 #ifdef CONFIG_AFS_FSCACHE
243 ASSERTCMP(volume
->cache
, ==, NULL
);
246 afs_detach_volume_from_servers(volume
, slist
);
247 afs_remove_volume_from_cell(volume
);
248 afs_put_serverlist(volume
->cell
->net
, slist
);
249 afs_put_cell(volume
->cell
, afs_cell_trace_put_vol
);
250 trace_afs_volume(volume
->vid
, refcount_read(&volume
->ref
),
251 afs_volume_trace_free
);
252 kfree_rcu(volume
, rcu
);
254 _leave(" [destroyed]");
258 * Try to get a reference on a volume record.
260 bool afs_try_get_volume(struct afs_volume
*volume
, enum afs_volume_trace reason
)
264 if (__refcount_inc_not_zero(&volume
->ref
, &r
)) {
265 trace_afs_volume(volume
->vid
, r
+ 1, reason
);
272 * Get a reference on a volume record.
274 struct afs_volume
*afs_get_volume(struct afs_volume
*volume
,
275 enum afs_volume_trace reason
)
280 __refcount_inc(&volume
->ref
, &r
);
281 trace_afs_volume(volume
->vid
, r
+ 1, reason
);
288 * Drop a reference on a volume record.
290 void afs_put_volume(struct afs_volume
*volume
, enum afs_volume_trace reason
)
293 afs_volid_t vid
= volume
->vid
;
297 zero
= __refcount_dec_and_test(&volume
->ref
, &r
);
298 trace_afs_volume(vid
, r
- 1, reason
);
300 schedule_work(&volume
->destructor
);
307 int afs_activate_volume(struct afs_volume
*volume
)
309 #ifdef CONFIG_AFS_FSCACHE
310 struct fscache_volume
*vcookie
;
313 name
= kasprintf(GFP_KERNEL
, "afs,%s,%llx",
314 volume
->cell
->name
, volume
->vid
);
318 vcookie
= fscache_acquire_volume(name
, NULL
, NULL
, 0);
319 if (IS_ERR(vcookie
)) {
320 if (vcookie
!= ERR_PTR(-EBUSY
)) {
322 return PTR_ERR(vcookie
);
324 pr_err("AFS: Cache volume key already in use (%s)\n", name
);
327 volume
->cache
= vcookie
;
334 * Deactivate a volume.
336 void afs_deactivate_volume(struct afs_volume
*volume
)
338 _enter("%s", volume
->name
);
340 #ifdef CONFIG_AFS_FSCACHE
341 fscache_relinquish_volume(volume
->cache
, NULL
,
342 test_bit(AFS_VOLUME_DELETED
, &volume
->flags
));
343 volume
->cache
= NULL
;
350 * Query the VL service to update the volume status.
352 static int afs_update_volume_status(struct afs_volume
*volume
, struct key
*key
)
354 struct afs_server_list
*new, *old
, *discard
;
355 struct afs_vldb_entry
*vldb
;
361 /* We look up an ID by passing it as a decimal string in the
362 * operation's name parameter.
364 idsz
= snprintf(idbuf
, sizeof(idbuf
), "%llu", volume
->vid
);
366 vldb
= afs_vl_lookup_vldb(volume
->cell
, key
, idbuf
, idsz
);
372 /* See if the volume got renamed. */
373 if (vldb
->name_len
!= volume
->name_len
||
374 memcmp(vldb
->name
, volume
->name
, vldb
->name_len
) != 0) {
375 /* TODO: Use RCU'd string. */
376 memcpy(volume
->name
, vldb
->name
, AFS_MAXVOLNAME
);
377 volume
->name_len
= vldb
->name_len
;
380 /* See if the volume's server list got updated. */
381 new = afs_alloc_server_list(volume
, key
, vldb
);
387 write_lock(&volume
->servers_lock
);
390 old
= rcu_dereference_protected(volume
->servers
,
391 lockdep_is_held(&volume
->servers_lock
));
392 if (afs_annotate_server_list(new, old
)) {
393 new->seq
= volume
->servers_seq
+ 1;
394 rcu_assign_pointer(volume
->servers
, new);
396 volume
->servers_seq
++;
400 /* Check more often if replication is ongoing. */
401 if (new->ro_replicating
)
402 volume
->update_at
= ktime_get_real_seconds() + 10 * 60;
404 volume
->update_at
= ktime_get_real_seconds() + afs_volume_record_life
;
405 write_unlock(&volume
->servers_lock
);
408 afs_reattach_volume_to_servers(volume
, new, old
);
409 afs_put_serverlist(volume
->cell
->net
, discard
);
414 _leave(" = %d", ret
);
419 * Make sure the volume record is up to date.
421 int afs_check_volume_status(struct afs_volume
*volume
, struct afs_operation
*op
)
423 int ret
, retries
= 0;
428 if (test_bit(AFS_VOLUME_WAIT
, &volume
->flags
))
430 if (volume
->update_at
<= ktime_get_real_seconds() ||
431 test_bit(AFS_VOLUME_NEEDS_UPDATE
, &volume
->flags
))
437 if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING
, &volume
->flags
)) {
438 clear_bit(AFS_VOLUME_NEEDS_UPDATE
, &volume
->flags
);
439 ret
= afs_update_volume_status(volume
, op
->key
);
441 set_bit(AFS_VOLUME_NEEDS_UPDATE
, &volume
->flags
);
442 clear_bit_unlock(AFS_VOLUME_WAIT
, &volume
->flags
);
443 clear_bit_unlock(AFS_VOLUME_UPDATING
, &volume
->flags
);
444 wake_up_bit(&volume
->flags
, AFS_VOLUME_WAIT
);
445 _leave(" = %d", ret
);
450 if (!test_bit(AFS_VOLUME_WAIT
, &volume
->flags
)) {
451 _leave(" = 0 [no wait]");
455 ret
= wait_on_bit(&volume
->flags
, AFS_VOLUME_WAIT
,
456 (op
->flags
& AFS_OPERATION_UNINTR
) ?
457 TASK_UNINTERRUPTIBLE
: TASK_INTERRUPTIBLE
);
458 if (ret
== -ERESTARTSYS
) {
459 _leave(" = %d", ret
);
465 _leave(" = -ESTALE");