2 * Copyright 2009-2010, Sine Nomine Associates and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * volume group membership cache
13 * asynchronous partition scanner
16 #include <afsconfig.h>
17 #include <afs/param.h>
21 #ifdef HAVE_SYS_FILE_H
25 #ifdef AFS_DEMAND_ATTACH_FS
28 #include <rx/rx_queue.h>
31 #include <afs/afsutil.h>
33 #include <afs/afsint.h>
37 #include "viceinode.h"
39 #include "partition.h"
40 #include <afs/errors.h>
42 #define __VOL_VG_CACHE_IMPL 1
45 #include "vg_cache_impl.h"
47 static int _VVGC_scan_table_init(VVGCache_scan_table_t
* tbl
);
48 static int _VVGC_scan_table_add(VVGCache_scan_table_t
* tbl
,
49 struct DiskPartition64
* dp
,
52 static int _VVGC_scan_table_flush(VVGCache_scan_table_t
* tbl
,
53 struct DiskPartition64
* dp
);
54 static void * _VVGC_scanner_thread(void *);
55 static int _VVGC_scan_partition(struct DiskPartition64
* part
);
56 static VVGCache_dlist_entry_t
* _VVGC_dlist_lookup_r(struct DiskPartition64
*dp
,
59 static void _VVGC_flush_dlist(struct DiskPartition64
*dp
);
62 * init a thread-local scan table.
64 * @param[in] tbl scan table
66 * @return operation status
72 _VVGC_scan_table_init(VVGCache_scan_table_t
* tbl
)
74 memset(tbl
, 0, sizeof(*tbl
));
80 * add an entry to the thread-local scan table.
82 * @param[in] tbl scan table
83 * @param[in] dp disk partition object
84 * @param[in] volid volume id
85 * @param[in] parent parent volume id
87 * @pre VOL_LOCK is NOT held
89 * @note if the table is full, this routine will acquire
90 * VOL_LOCK and flush the table to the global one.
92 * @return operation status
94 * @retval nonzero a VVGCache_entry_add_r operation failed during a
95 * flush of the thread-local table
100 _VVGC_scan_table_add(VVGCache_scan_table_t
* tbl
,
101 struct DiskPartition64
* dp
,
107 if (tbl
->idx
== VVGC_SCAN_TBL_LEN
) {
108 code
= _VVGC_scan_table_flush(tbl
, dp
);
111 tbl
->entries
[tbl
->idx
].volid
= volid
;
112 tbl
->entries
[tbl
->idx
].parent
= parent
;
119 * flush thread-local scan table to the global VG cache.
121 * @param[in] tbl scan table
122 * @param[in] dp disk partition object
124 * @pre VOL_LOCK is NOT held
126 * @return operation status
128 * @retval nonzero a VVGCache_entry_add_r operation failed during a
129 * flush of the thread-local table
134 _VVGC_scan_table_flush(VVGCache_scan_table_t
* tbl
,
135 struct DiskPartition64
* dp
)
137 int code
= 0, res
, i
;
139 unsigned long newvols
, newvgs
;
141 newvols
= tbl
->newvols
;
142 newvgs
= tbl
->newvgs
;
146 for (i
= 0; i
< tbl
->idx
; i
++) {
148 * We need to check the 'to-delete' list and prevent adding any entries
149 * that are on it. The volser could potentially create a volume in one
150 * VG, then delete it and put it on another VG. If we are doing a scan
151 * when that happens, tbl->entries could have the entries for trying to
152 * put the vol on both VGs, though at least one of them will also be on
153 * the dlist. If we put everything in tbl->entries on the VGC then try
154 * to delete afterwards, putting one entry on the VGC cause an error,
155 * and we'll fail to add it. So instead, avoid adding any new VGC
156 * entries if it is on the dlist.
158 if (_VVGC_dlist_lookup_r(dp
, tbl
->entries
[i
].parent
,
159 tbl
->entries
[i
].volid
)) {
162 res
= VVGCache_entry_add_r(dp
,
163 tbl
->entries
[i
].parent
,
164 tbl
->entries
[i
].volid
,
174 /* flush the to-delete list while we're here. We don't need to preserve
175 * the list across the entire scan, and flushing it each time we flush
176 * a scan table will keep the size of the dlist down */
177 _VVGC_flush_dlist(dp
);
181 ViceLog(125, ("VVGC_scan_table_flush: flushed %d entries from "
182 "scan table to global VG cache\n", tbl
->idx
));
183 ViceLog(125, ("VVGC_scan_table_flush: %s total: %lu vols, %lu groups\n",
184 VPartitionPath(dp
), newvols
, newvgs
));
186 res
= _VVGC_scan_table_init(tbl
);
191 tbl
->newvols
= newvols
;
192 tbl
->newvgs
= newvgs
;
198 * record a volume header found by VWalkVolumeHeaders in a VGC scan table.
200 * @param[in] dp the disk partition
201 * @param[in] name full path to the .vol header (unused)
202 * @param[in] hdr the header data
203 * @param[in] last whether this is the last try or not (unused)
204 * @param[in] rock actually a VVGCache_scan_table_t* to add the volume to
206 * @return operation status
208 * @retval -1 fatal error adding vol to the scan table
211 _VVGC_RecordHeader(struct DiskPartition64
*dp
, const char *name
,
212 struct VolumeDiskHeader
*hdr
, int last
, void *rock
)
215 VVGCache_scan_table_t
*tbl
;
216 tbl
= (VVGCache_scan_table_t
*)rock
;
218 code
= _VVGC_scan_table_add(tbl
, dp
, hdr
->id
, hdr
->parent
);
220 ViceLog(0, ("VVGC_scan_partition: error %d adding volume %s to scan table\n",
228 * unlink a faulty volume header found by VWalkVolumeHeaders.
230 * @param[in] dp the disk partition (unused)
231 * @param[in] name the full path to the .vol header
232 * @param[in] hdr the header data (unused)
233 * @param[in] rock unused
236 _VVGC_UnlinkHeader(struct DiskPartition64
*dp
, const char *name
,
237 struct VolumeDiskHeader
*hdr
, void *rock
)
239 ViceLog(0, ("%s is not a legitimate volume header file; deleted\n", name
));
241 ViceLog(0, ("Unable to unlink %s (errno = %d)\n",
247 * scan a disk partition for .vol files
249 * @param[in] part disk partition object
251 * @pre VOL_LOCK is NOT held
253 * @return operation status
255 * @retval -1 invalid disk partition object
256 * @retval -2 failed to flush stale entries for this partition
261 _VVGC_scan_partition(struct DiskPartition64
* part
)
265 VVGCache_scan_table_t tbl
;
266 char *part_path
= NULL
;
268 code
= _VVGC_scan_table_init(&tbl
);
270 ViceLog(0, ("VVGC_scan_partition: could not init scan table; error = %d\n",
274 part_path
= VPartitionPath(part
);
275 if (part_path
== NULL
) {
276 ViceLog(0, ("VVGC_scan_partition: invalid partition object given; aborting scan\n"));
282 res
= _VVGC_flush_part_r(part
);
284 ViceLog(0, ("VVGC_scan_partition: error flushing partition %s; error = %d\n",
285 VPartitionPath(part
), res
));
293 dirp
= opendir(part_path
);
295 ViceLog(0, ("VVGC_scan_partition: could not open %s, aborting scan; error = %d\n",
301 ViceLog(5, ("VVGC_scan_partition: scanning partition %s for VG cache\n",
304 code
= VWalkVolumeHeaders(part
, part_path
, _VVGC_RecordHeader
,
305 _VVGC_UnlinkHeader
, &tbl
);
310 _VVGC_scan_table_flush(&tbl
, part
);
318 ViceLog(0, ("VVGC_scan_partition: error %d while scanning %s\n",
321 ViceLog(0, ("VVGC_scan_partition: finished scanning %s: %lu volumes in %lu groups\n",
322 part_path
, tbl
.newvols
, tbl
.newvgs
));
327 _VVGC_flush_dlist(part
);
328 free(VVGCache
.part
[part
->index
].dlist_hash_buckets
);
329 VVGCache
.part
[part
->index
].dlist_hash_buckets
= NULL
;
332 _VVGC_state_change(part
, VVGC_PART_STATE_INVALID
);
334 _VVGC_state_change(part
, VVGC_PART_STATE_VALID
);
346 _VVGC_scanner_thread(void * args
)
348 struct DiskPartition64
*part
= args
;
351 code
= _VVGC_scan_partition(part
);
353 ViceLog(0, ("Error: _VVGC_scan_partition failed with code %d for partition %s\n",
354 code
, VPartitionPath(part
)));
361 * start a background scan.
363 * @param[in] dp disk partition object
365 * @return operation status
367 * @retval -1 internal error
368 * @retval -3 racing against another thread
373 _VVGC_scan_start(struct DiskPartition64
* dp
)
377 pthread_attr_t attrs
;
380 if (_VVGC_state_change(dp
,
381 VVGC_PART_STATE_UPDATING
)
382 == VVGC_PART_STATE_UPDATING
) {
384 ViceLog(0, ("VVGC_scan_partition: race detected; aborting scanning partition %s\n",
385 VPartitionPath(dp
)));
390 /* initialize partition's to-delete list */
391 VVGCache
.part
[dp
->index
].dlist_hash_buckets
=
392 malloc(VolumeHashTable
.Size
* sizeof(struct rx_queue
));
393 if (!VVGCache
.part
[dp
->index
].dlist_hash_buckets
) {
397 for (i
= 0; i
< VolumeHashTable
.Size
; i
++) {
398 queue_Init(&VVGCache
.part
[dp
->index
].dlist_hash_buckets
[i
]);
401 code
= pthread_attr_init(&attrs
);
406 code
= pthread_attr_setdetachstate(&attrs
, PTHREAD_CREATE_DETACHED
);
411 code
= pthread_create(&tid
, &attrs
, &_VVGC_scanner_thread
, dp
);
414 VVGCache_part_state_t old_state
;
416 ViceLog(0, ("_VVGC_scan_start: pthread_create failed with %d\n", code
));
418 old_state
= _VVGC_state_change(dp
, VVGC_PART_STATE_INVALID
);
419 opr_Assert(old_state
== VVGC_PART_STATE_UPDATING
);
424 ViceLog(0, ("_VVGC_scan_start failed with code %d for partition %s\n",
425 code
, VPartitionPath(dp
)));
426 if (VVGCache
.part
[dp
->index
].dlist_hash_buckets
) {
427 free(VVGCache
.part
[dp
->index
].dlist_hash_buckets
);
428 VVGCache
.part
[dp
->index
].dlist_hash_buckets
= NULL
;
436 * looks up an entry on the to-delete list, if it exists.
438 * @param[in] dp the partition whose dlist we are looking at
439 * @param[in] parent the parent volume ID we're looking for
440 * @param[in] child the child volume ID we're looking for
442 * @return a pointer to the entry in the dlist for that entry
443 * @retval NULL the requested entry does not exist in the dlist
445 static VVGCache_dlist_entry_t
*
446 _VVGC_dlist_lookup_r(struct DiskPartition64
*dp
, VolumeId parent
,
449 int bucket
= VVGC_HASH(child
);
450 VVGCache_dlist_entry_t
*ent
, *nent
;
452 for (queue_Scan(&VVGCache
.part
[dp
->index
].dlist_hash_buckets
[bucket
],
454 VVGCache_dlist_entry
)) {
456 if (ent
->child
== child
&& ent
->parent
== parent
) {
465 * delete all of the entries in the dlist from the VGC.
467 * Traverses the to-delete list for the specified partition, and deletes
468 * the specified entries from the global VGC. Also deletes the entries from
469 * the dlist itself as it goes along.
471 * @param[in] dp the partition whose dlist we are flushing
474 _VVGC_flush_dlist(struct DiskPartition64
*dp
)
477 VVGCache_dlist_entry_t
*ent
, *nent
;
479 for (i
= 0; i
< VolumeHashTable
.Size
; i
++) {
480 for (queue_Scan(&VVGCache
.part
[dp
->index
].dlist_hash_buckets
[i
],
482 VVGCache_dlist_entry
)) {
484 _VVGC_entry_purge_r(dp
, ent
->parent
, ent
->child
);
492 * add a VGC entry to the partition's to-delete list.
494 * This adds a VGC entry (a parent/child pair) to a list of VGC entries to
495 * be deleted from the VGC at the end of a VGC scan. This is necessary,
496 * while a VGC scan is ocurring, volumes may be deleted. Since a VGC scan
497 * scans a partition in VVGC_SCAN_TBL_LEN chunks, a VGC delete operation
498 * may delete a volume, only for it to be added again when the VGC scan's
499 * table adds it to the VGC. So when a VGC entry is deleted and a VGC scan
500 * is running, this function must be called to ensure it does not come
503 * @param[in] dp the partition to whose dlist we are adding
504 * @param[in] parent the parent volumeID of the VGC entry
505 * @param[in] child the child volumeID of the VGC entry
507 * @return operation status
509 * @retval ENOMEM memory allocation error
511 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
513 * @internal VGC use only
516 _VVGC_dlist_add_r(struct DiskPartition64
*dp
, VolumeId parent
,
519 int bucket
= VVGC_HASH(child
);
520 VVGCache_dlist_entry_t
*entry
;
522 entry
= malloc(sizeof(*entry
));
527 entry
->child
= child
;
528 entry
->parent
= parent
;
530 queue_Append(&VVGCache
.part
[dp
->index
].dlist_hash_buckets
[bucket
],
536 * delete a VGC entry from the partition's to-delete list.
538 * When a VGC scan is ocurring, and a volume is removed, but then created
539 * again, we need to ensure that it does not get deleted from being on the
540 * dlist. Call this function whenever adding a new entry to the VGC during
541 * a VGC scan to ensure it doesn't get deleted later.
543 * @param[in] dp the partition from whose dlist we are deleting
544 * @param[in] parent the parent volumeID of the VGC entry
545 * @param[in] child the child volumeID of the VGC entry
547 * @return operation status
549 * @retval ENOENT the specified VGC entry is not on the dlist
551 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
553 * @internal VGC use only
555 * @see _VVGC_dlist_add_r
558 _VVGC_dlist_del_r(struct DiskPartition64
*dp
, VolumeId parent
,
561 VVGCache_dlist_entry_t
*ent
;
563 ent
= _VVGC_dlist_lookup_r(dp
, parent
, child
);
574 #endif /* AFS_DEMAND_ATTACH_FS */