Update NEWS for 1.6.22
[pkg-k5-afs_openafs.git] / src / vol / vg_scan.c
blob7521878221e23d53e31629dc67478bd9fb4a492d
1 /*
2 * Copyright 2009-2010, Sine Nomine Associates and others.
3 * All Rights Reserved.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
11 * demand attach fs
12 * volume group membership cache
13 * asynchronous partition scanner
16 #include <afsconfig.h>
17 #include <afs/param.h>
19 #ifdef AFS_DEMAND_ATTACH_FS
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <sys/stat.h>
26 #include <dirent.h>
27 #include <afs/afs_assert.h>
28 #include <string.h>
29 #ifdef AFS_NT40_ENV
30 #include <io.h>
31 #else
32 #include <sys/file.h>
33 #include <sys/param.h>
34 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
35 #include <unistd.h>
36 #endif
37 #endif /* AFS_NT40_ENV */
38 #include <lock.h>
39 #include <afs/afsutil.h>
40 #include <lwp.h>
41 #include "nfs.h"
42 #include <afs/afsint.h>
43 #include "ihandle.h"
44 #include "vnode.h"
45 #include "volume.h"
46 #include "viceinode.h"
47 #include "voldefs.h"
48 #include "partition.h"
49 #include <afs/errors.h>
51 #define __VOL_VG_CACHE_IMPL 1
53 #include "vg_cache.h"
54 #include "vg_cache_impl.h"
56 #ifdef O_LARGEFILE
57 #define afs_open open64
58 #else /* !O_LARGEFILE */
59 #define afs_open open
60 #endif /* !O_LARGEFILE */
62 static int _VVGC_scan_table_init(VVGCache_scan_table_t * tbl);
63 static int _VVGC_scan_table_add(VVGCache_scan_table_t * tbl,
64 struct DiskPartition64 * dp,
65 VolumeId volid,
66 VolumeId parent);
67 static int _VVGC_scan_table_flush(VVGCache_scan_table_t * tbl,
68 struct DiskPartition64 * dp);
69 static void * _VVGC_scanner_thread(void *);
70 static int _VVGC_scan_partition(struct DiskPartition64 * part);
71 static VVGCache_dlist_entry_t * _VVGC_dlist_lookup_r(struct DiskPartition64 *dp,
72 VolumeId parent,
73 VolumeId child);
74 static void _VVGC_flush_dlist(struct DiskPartition64 *dp);
76 /**
77 * init a thread-local scan table.
79 * @param[in] tbl scan table
81 * @return operation status
82 * @retval 0 success
84 * @internal
86 static int
87 _VVGC_scan_table_init(VVGCache_scan_table_t * tbl)
89 memset(tbl, 0, sizeof(*tbl));
91 return 0;
94 /**
95 * add an entry to the thread-local scan table.
97 * @param[in] tbl scan table
98 * @param[in] dp disk partition object
99 * @param[in] volid volume id
100 * @param[in] parent parent volume id
102 * @pre VOL_LOCK is NOT held
104 * @note if the table is full, this routine will acquire
105 * VOL_LOCK and flush the table to the global one.
107 * @return operation status
108 * @retval 0 success
109 * @retval nonzero a VVGCache_entry_add_r operation failed during a
110 * flush of the thread-local table
112 * @internal
114 static int
115 _VVGC_scan_table_add(VVGCache_scan_table_t * tbl,
116 struct DiskPartition64 * dp,
117 VolumeId volid,
118 VolumeId parent)
120 int code = 0;
122 if (tbl->idx == VVGC_SCAN_TBL_LEN) {
123 code = _VVGC_scan_table_flush(tbl, dp);
126 tbl->entries[tbl->idx].volid = volid;
127 tbl->entries[tbl->idx].parent = parent;
128 tbl->idx++;
130 return code;
134 * flush thread-local scan table to the global VG cache.
136 * @param[in] tbl scan table
137 * @param[in] dp disk partition object
139 * @pre VOL_LOCK is NOT held
141 * @return operation status
142 * @retval 0 success
143 * @retval nonzero a VVGCache_entry_add_r operation failed during a
144 * flush of the thread-local table
146 * @internal
148 static int
149 _VVGC_scan_table_flush(VVGCache_scan_table_t * tbl,
150 struct DiskPartition64 * dp)
152 int code = 0, res, i;
153 afs_int32 newvg = 0;
154 unsigned long newvols, newvgs;
156 newvols = tbl->newvols;
157 newvgs = tbl->newvgs;
159 VOL_LOCK;
161 for (i = 0; i < tbl->idx; i++) {
163 * We need to check the 'to-delete' list and prevent adding any entries
164 * that are on it. The volser could potentially create a volume in one
165 * VG, then delete it and put it on another VG. If we are doing a scan
166 * when that happens, tbl->entries could have the entries for trying to
167 * put the vol on both VGs, though at least one of them will also be on
168 * the dlist. If we put everything in tbl->entries on the VGC then try
169 * to delete afterwards, putting one entry on the VGC cause an error,
170 * and we'll fail to add it. So instead, avoid adding any new VGC
171 * entries if it is on the dlist.
173 if (_VVGC_dlist_lookup_r(dp, tbl->entries[i].parent,
174 tbl->entries[i].volid)) {
175 continue;
177 res = VVGCache_entry_add_r(dp,
178 tbl->entries[i].parent,
179 tbl->entries[i].volid,
180 &newvg);
181 if (res) {
182 code = res;
183 } else {
184 newvols++;
185 newvgs += newvg;
189 /* flush the to-delete list while we're here. We don't need to preserve
190 * the list across the entire scan, and flushing it each time we flush
191 * a scan table will keep the size of the dlist down */
192 _VVGC_flush_dlist(dp);
194 VOL_UNLOCK;
196 ViceLog(125, ("VVGC_scan_table_flush: flushed %d entries from "
197 "scan table to global VG cache\n", tbl->idx));
198 ViceLog(125, ("VVGC_scan_table_flush: %s total: %lu vols, %lu groups\n",
199 VPartitionPath(dp), newvols, newvgs));
201 res = _VVGC_scan_table_init(tbl);
202 if (res) {
203 code = res;
206 tbl->newvols = newvols;
207 tbl->newvgs = newvgs;
209 return code;
213 * record a volume header found by VWalkVolumeHeaders in a VGC scan table.
215 * @param[in] dp the disk partition
216 * @param[in] name full path to the .vol header (unused)
217 * @param[in] hdr the header data
218 * @param[in] last whether this is the last try or not (unused)
219 * @param[in] rock actually a VVGCache_scan_table_t* to add the volume to
221 * @return operation status
222 * @retval 0 success
223 * @retval -1 fatal error adding vol to the scan table
225 static int
226 _VVGC_RecordHeader(struct DiskPartition64 *dp, const char *name,
227 struct VolumeDiskHeader *hdr, int last, void *rock)
229 int code;
230 VVGCache_scan_table_t *tbl;
231 tbl = (VVGCache_scan_table_t *)rock;
233 code = _VVGC_scan_table_add(tbl, dp, hdr->id, hdr->parent);
234 if (code) {
235 ViceLog(0, ("VVGC_scan_partition: error %d adding volume %s to scan table\n",
236 code, name));
237 return -1;
239 return 0;
243 * unlink a faulty volume header found by VWalkVolumeHeaders.
245 * @param[in] dp the disk partition (unused)
246 * @param[in] name the full path to the .vol header
247 * @param[in] hdr the header data (unused)
248 * @param[in] rock unused
250 static void
251 _VVGC_UnlinkHeader(struct DiskPartition64 *dp, const char *name,
252 struct VolumeDiskHeader *hdr, void *rock)
254 ViceLog(0, ("%s is not a legitimate volume header file; deleted\n", name));
255 if (unlink(name)) {
256 ViceLog(0, ("Unable to unlink %s (errno = %d)\n",
257 name, errno));
262 * scan a disk partition for .vol files
264 * @param[in] part disk partition object
266 * @pre VOL_LOCK is NOT held
268 * @return operation status
269 * @retval 0 success
270 * @retval -1 invalid disk partition object
271 * @retval -2 failed to flush stale entries for this partition
273 * @internal
275 static int
276 _VVGC_scan_partition(struct DiskPartition64 * part)
278 int code, res;
279 DIR *dirp = NULL;
280 VVGCache_scan_table_t tbl;
281 char *part_path = NULL;
283 code = _VVGC_scan_table_init(&tbl);
284 if (code) {
285 ViceLog(0, ("VVGC_scan_partition: could not init scan table; error = %d\n",
286 code));
287 goto done;
289 part_path = VPartitionPath(part);
290 if (part_path == NULL) {
291 ViceLog(0, ("VVGC_scan_partition: invalid partition object given; aborting scan\n"));
292 code = -1;
293 goto done;
296 VOL_LOCK;
297 res = _VVGC_flush_part_r(part);
298 if (res) {
299 ViceLog(0, ("VVGC_scan_partition: error flushing partition %s; error = %d\n",
300 VPartitionPath(part), res));
301 code = -2;
303 VOL_UNLOCK;
304 if (code) {
305 goto done;
308 dirp = opendir(part_path);
309 if (dirp == NULL) {
310 ViceLog(0, ("VVGC_scan_partition: could not open %s, aborting scan; error = %d\n",
311 part_path, errno));
312 code = -1;
313 goto done;
316 ViceLog(5, ("VVGC_scan_partition: scanning partition %s for VG cache\n",
317 part_path));
319 code = VWalkVolumeHeaders(part, part_path, _VVGC_RecordHeader,
320 _VVGC_UnlinkHeader, &tbl);
321 if (code < 0) {
322 goto done;
325 _VVGC_scan_table_flush(&tbl, part);
327 done:
328 if (dirp) {
329 closedir(dirp);
330 dirp = NULL;
332 if (code) {
333 ViceLog(0, ("VVGC_scan_partition: error %d while scanning %s\n",
334 code, part_path));
335 } else {
336 ViceLog(0, ("VVGC_scan_partition: finished scanning %s: %lu volumes in %lu groups\n",
337 part_path, tbl.newvols, tbl.newvgs));
340 VOL_LOCK;
342 _VVGC_flush_dlist(part);
343 free(VVGCache.part[part->index].dlist_hash_buckets);
344 VVGCache.part[part->index].dlist_hash_buckets = NULL;
346 if (code) {
347 _VVGC_state_change(part, VVGC_PART_STATE_INVALID);
348 } else {
349 _VVGC_state_change(part, VVGC_PART_STATE_VALID);
352 VOL_UNLOCK;
354 return code;
358 * scanner thread.
360 static void *
361 _VVGC_scanner_thread(void * args)
363 struct DiskPartition64 *part = args;
364 int code;
366 code = _VVGC_scan_partition(part);
367 if (code) {
368 ViceLog(0, ("Error: _VVGC_scan_partition failed with code %d for partition %s\n",
369 code, VPartitionPath(part)));
372 return NULL;
376 * start a background scan.
378 * @param[in] dp disk partition object
380 * @return operation status
381 * @retval 0 success
382 * @retval -1 internal error
383 * @retval -3 racing against another thread
385 * @internal
388 _VVGC_scan_start(struct DiskPartition64 * dp)
390 int code = 0;
391 pthread_t tid;
392 pthread_attr_t attrs;
393 int i;
395 if (_VVGC_state_change(dp,
396 VVGC_PART_STATE_UPDATING)
397 == VVGC_PART_STATE_UPDATING) {
398 /* race */
399 ViceLog(0, ("VVGC_scan_partition: race detected; aborting scanning partition %s\n",
400 VPartitionPath(dp)));
401 code = -3;
402 goto error;
405 /* initialize partition's to-delete list */
406 VVGCache.part[dp->index].dlist_hash_buckets =
407 malloc(VolumeHashTable.Size * sizeof(struct rx_queue));
408 if (!VVGCache.part[dp->index].dlist_hash_buckets) {
409 code = -1;
410 goto error;
412 for (i = 0; i < VolumeHashTable.Size; i++) {
413 queue_Init(&VVGCache.part[dp->index].dlist_hash_buckets[i]);
416 code = pthread_attr_init(&attrs);
417 if (code) {
418 goto error;
421 code = pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
422 if (code) {
423 goto error;
426 code = pthread_create(&tid, &attrs, &_VVGC_scanner_thread, dp);
428 if (code) {
429 VVGCache_part_state_t old_state;
431 ViceLog(0, ("_VVGC_scan_start: pthread_create failed with %d\n", code));
433 old_state = _VVGC_state_change(dp, VVGC_PART_STATE_INVALID);
434 osi_Assert(old_state == VVGC_PART_STATE_UPDATING);
437 error:
438 if (code) {
439 ViceLog(0, ("_VVGC_scan_start failed with code %d for partition %s\n",
440 code, VPartitionPath(dp)));
441 if (VVGCache.part[dp->index].dlist_hash_buckets) {
442 free(VVGCache.part[dp->index].dlist_hash_buckets);
443 VVGCache.part[dp->index].dlist_hash_buckets = NULL;
447 return code;
451 * looks up an entry on the to-delete list, if it exists.
453 * @param[in] dp the partition whose dlist we are looking at
454 * @param[in] parent the parent volume ID we're looking for
455 * @param[in] child the child volume ID we're looking for
457 * @return a pointer to the entry in the dlist for that entry
458 * @retval NULL the requested entry does not exist in the dlist
460 static VVGCache_dlist_entry_t *
461 _VVGC_dlist_lookup_r(struct DiskPartition64 *dp, VolumeId parent,
462 VolumeId child)
464 int bucket = VVGC_HASH(child);
465 VVGCache_dlist_entry_t *ent, *nent;
467 for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
468 ent, nent,
469 VVGCache_dlist_entry)) {
471 if (ent->child == child && ent->parent == parent) {
472 return ent;
476 return NULL;
480 * delete all of the entries in the dlist from the VGC.
482 * Traverses the to-delete list for the specified partition, and deletes
483 * the specified entries from the global VGC. Also deletes the entries from
484 * the dlist itself as it goes along.
486 * @param[in] dp the partition whose dlist we are flushing
488 static void
489 _VVGC_flush_dlist(struct DiskPartition64 *dp)
491 int i;
492 VVGCache_dlist_entry_t *ent, *nent;
494 for (i = 0; i < VolumeHashTable.Size; i++) {
495 for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[i],
496 ent, nent,
497 VVGCache_dlist_entry)) {
499 _VVGC_entry_purge_r(dp, ent->parent, ent->child);
500 queue_Remove(ent);
501 free(ent);
507 * add a VGC entry to the partition's to-delete list.
509 * This adds a VGC entry (a parent/child pair) to a list of VGC entries to
510 * be deleted from the VGC at the end of a VGC scan. This is necessary,
511 * while a VGC scan is ocurring, volumes may be deleted. Since a VGC scan
512 * scans a partition in VVGC_SCAN_TBL_LEN chunks, a VGC delete operation
513 * may delete a volume, only for it to be added again when the VGC scan's
514 * table adds it to the VGC. So when a VGC entry is deleted and a VGC scan
515 * is running, this function must be called to ensure it does not come
516 * back onto the VGC.
518 * @param[in] dp the partition to whose dlist we are adding
519 * @param[in] parent the parent volumeID of the VGC entry
520 * @param[in] child the child volumeID of the VGC entry
522 * @return operation status
523 * @retval 0 success
524 * @retval ENOMEM memory allocation error
526 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
528 * @internal VGC use only
531 _VVGC_dlist_add_r(struct DiskPartition64 *dp, VolumeId parent,
532 VolumeId child)
534 int bucket = VVGC_HASH(child);
535 VVGCache_dlist_entry_t *entry;
537 entry = malloc(sizeof(*entry));
538 if (!entry) {
539 return ENOMEM;
542 entry->child = child;
543 entry->parent = parent;
545 queue_Append(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
546 entry);
547 return 0;
551 * delete a VGC entry from the partition's to-delete list.
553 * When a VGC scan is ocurring, and a volume is removed, but then created
554 * again, we need to ensure that it does not get deleted from being on the
555 * dlist. Call this function whenever adding a new entry to the VGC during
556 * a VGC scan to ensure it doesn't get deleted later.
558 * @param[in] dp the partition from whose dlist we are deleting
559 * @param[in] parent the parent volumeID of the VGC entry
560 * @param[in] child the child volumeID of the VGC entry
562 * @return operation status
563 * @retval 0 success
564 * @retval ENOENT the specified VGC entry is not on the dlist
566 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
568 * @internal VGC use only
570 * @see _VVGC_dlist_add_r
573 _VVGC_dlist_del_r(struct DiskPartition64 *dp, VolumeId parent,
574 VolumeId child)
576 VVGCache_dlist_entry_t *ent;
578 ent = _VVGC_dlist_lookup_r(dp, parent, child);
579 if (!ent) {
580 return ENOENT;
583 queue_Remove(ent);
584 free(ent);
586 return 0;
589 #endif /* AFS_DEMAND_ATTACH_FS */