Linux 2.6.33-rc2
[pohmelfs.git] / fs / afs / vlocation.c
blob6e689208def255c946cf39d1f44c562aedf51448
1 /* AFS volume location management
3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include <linux/sched.h>
16 #include "internal.h"
18 static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */
19 static unsigned afs_vlocation_update_timeout = 10 * 60;
21 static void afs_vlocation_reaper(struct work_struct *);
22 static void afs_vlocation_updater(struct work_struct *);
24 static LIST_HEAD(afs_vlocation_updates);
25 static LIST_HEAD(afs_vlocation_graveyard);
26 static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
27 static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
28 static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
29 static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
30 static struct workqueue_struct *afs_vlocation_update_worker;
33 * iterate through the VL servers in a cell until one of them admits knowing
34 * about the volume in question
36 static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
37 struct key *key,
38 struct afs_cache_vlocation *vldb)
40 struct afs_cell *cell = vl->cell;
41 struct in_addr addr;
42 int count, ret;
44 _enter("%s,%s", cell->name, vl->vldb.name);
46 down_write(&vl->cell->vl_sem);
47 ret = -ENOMEDIUM;
48 for (count = cell->vl_naddrs; count > 0; count--) {
49 addr = cell->vl_addrs[cell->vl_curr_svix];
51 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
53 /* attempt to access the VL server */
54 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
55 &afs_sync_call);
56 switch (ret) {
57 case 0:
58 goto out;
59 case -ENOMEM:
60 case -ENONET:
61 case -ENETUNREACH:
62 case -EHOSTUNREACH:
63 case -ECONNREFUSED:
64 if (ret == -ENOMEM || ret == -ENONET)
65 goto out;
66 goto rotate;
67 case -ENOMEDIUM:
68 case -EKEYREJECTED:
69 case -EKEYEXPIRED:
70 goto out;
71 default:
72 ret = -EIO;
73 goto rotate;
76 /* rotate the server records upon lookup failure */
77 rotate:
78 cell->vl_curr_svix++;
79 cell->vl_curr_svix %= cell->vl_naddrs;
82 out:
83 up_write(&vl->cell->vl_sem);
84 _leave(" = %d", ret);
85 return ret;
89 * iterate through the VL servers in a cell until one of them admits knowing
90 * about the volume in question
92 static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
93 struct key *key,
94 afs_volid_t volid,
95 afs_voltype_t voltype,
96 struct afs_cache_vlocation *vldb)
98 struct afs_cell *cell = vl->cell;
99 struct in_addr addr;
100 int count, ret;
102 _enter("%s,%x,%d,", cell->name, volid, voltype);
104 down_write(&vl->cell->vl_sem);
105 ret = -ENOMEDIUM;
106 for (count = cell->vl_naddrs; count > 0; count--) {
107 addr = cell->vl_addrs[cell->vl_curr_svix];
109 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
111 /* attempt to access the VL server */
112 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
113 &afs_sync_call);
114 switch (ret) {
115 case 0:
116 goto out;
117 case -ENOMEM:
118 case -ENONET:
119 case -ENETUNREACH:
120 case -EHOSTUNREACH:
121 case -ECONNREFUSED:
122 if (ret == -ENOMEM || ret == -ENONET)
123 goto out;
124 goto rotate;
125 case -EBUSY:
126 vl->upd_busy_cnt++;
127 if (vl->upd_busy_cnt <= 3) {
128 if (vl->upd_busy_cnt > 1) {
129 /* second+ BUSY - sleep a little bit */
130 set_current_state(TASK_UNINTERRUPTIBLE);
131 schedule_timeout(1);
132 __set_current_state(TASK_RUNNING);
134 continue;
136 break;
137 case -ENOMEDIUM:
138 vl->upd_rej_cnt++;
139 goto rotate;
140 default:
141 ret = -EIO;
142 goto rotate;
145 /* rotate the server records upon lookup failure */
146 rotate:
147 cell->vl_curr_svix++;
148 cell->vl_curr_svix %= cell->vl_naddrs;
149 vl->upd_busy_cnt = 0;
152 out:
153 if (ret < 0 && vl->upd_rej_cnt > 0) {
154 printk(KERN_NOTICE "kAFS:"
155 " Active volume no longer valid '%s'\n",
156 vl->vldb.name);
157 vl->valid = 0;
158 ret = -ENOMEDIUM;
161 up_write(&vl->cell->vl_sem);
162 _leave(" = %d", ret);
163 return ret;
167 * allocate a volume location record
169 static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
170 const char *name,
171 size_t namesz)
173 struct afs_vlocation *vl;
175 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
176 if (vl) {
177 vl->cell = cell;
178 vl->state = AFS_VL_NEW;
179 atomic_set(&vl->usage, 1);
180 INIT_LIST_HEAD(&vl->link);
181 INIT_LIST_HEAD(&vl->grave);
182 INIT_LIST_HEAD(&vl->update);
183 init_waitqueue_head(&vl->waitq);
184 spin_lock_init(&vl->lock);
185 memcpy(vl->vldb.name, name, namesz);
188 _leave(" = %p", vl);
189 return vl;
193 * update record if we found it in the cache
195 static int afs_vlocation_update_record(struct afs_vlocation *vl,
196 struct key *key,
197 struct afs_cache_vlocation *vldb)
199 afs_voltype_t voltype;
200 afs_volid_t vid;
201 int ret;
203 /* try to look up a cached volume in the cell VL databases by ID */
204 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
205 vl->vldb.name,
206 vl->vldb.vidmask,
207 ntohl(vl->vldb.servers[0].s_addr),
208 vl->vldb.srvtmask[0],
209 ntohl(vl->vldb.servers[1].s_addr),
210 vl->vldb.srvtmask[1],
211 ntohl(vl->vldb.servers[2].s_addr),
212 vl->vldb.srvtmask[2]);
214 _debug("Vids: %08x %08x %08x",
215 vl->vldb.vid[0],
216 vl->vldb.vid[1],
217 vl->vldb.vid[2]);
219 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
220 vid = vl->vldb.vid[0];
221 voltype = AFSVL_RWVOL;
222 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
223 vid = vl->vldb.vid[1];
224 voltype = AFSVL_ROVOL;
225 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
226 vid = vl->vldb.vid[2];
227 voltype = AFSVL_BACKVOL;
228 } else {
229 BUG();
230 vid = 0;
231 voltype = 0;
234 /* contact the server to make sure the volume is still available
235 * - TODO: need to handle disconnected operation here
237 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
238 switch (ret) {
239 /* net error */
240 default:
241 printk(KERN_WARNING "kAFS:"
242 " failed to update volume '%s' (%x) up in '%s': %d\n",
243 vl->vldb.name, vid, vl->cell->name, ret);
244 _leave(" = %d", ret);
245 return ret;
247 /* pulled from local cache into memory */
248 case 0:
249 _leave(" = 0");
250 return 0;
252 /* uh oh... looks like the volume got deleted */
253 case -ENOMEDIUM:
254 printk(KERN_ERR "kAFS:"
255 " volume '%s' (%x) does not exist '%s'\n",
256 vl->vldb.name, vid, vl->cell->name);
258 /* TODO: make existing record unavailable */
259 _leave(" = %d", ret);
260 return ret;
265 * apply the update to a VL record
267 static void afs_vlocation_apply_update(struct afs_vlocation *vl,
268 struct afs_cache_vlocation *vldb)
270 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
271 vldb->name, vldb->vidmask,
272 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
273 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
274 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
276 _debug("Vids: %08x %08x %08x",
277 vldb->vid[0], vldb->vid[1], vldb->vid[2]);
279 if (strcmp(vldb->name, vl->vldb.name) != 0)
280 printk(KERN_NOTICE "kAFS:"
281 " name of volume '%s' changed to '%s' on server\n",
282 vl->vldb.name, vldb->name);
284 vl->vldb = *vldb;
286 #ifdef CONFIG_AFS_FSCACHE
287 fscache_update_cookie(vl->cache);
288 #endif
292 * fill in a volume location record, consulting the cache and the VL server
293 * both
295 static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
296 struct key *key)
298 struct afs_cache_vlocation vldb;
299 int ret;
301 _enter("");
303 ASSERTCMP(vl->valid, ==, 0);
305 memset(&vldb, 0, sizeof(vldb));
307 /* see if we have an in-cache copy (will set vl->valid if there is) */
308 #ifdef CONFIG_AFS_FSCACHE
309 vl->cache = fscache_acquire_cookie(vl->cell->cache,
310 &afs_vlocation_cache_index_def, vl);
311 #endif
313 if (vl->valid) {
314 /* try to update a known volume in the cell VL databases by
315 * ID as the name may have changed */
316 _debug("found in cache");
317 ret = afs_vlocation_update_record(vl, key, &vldb);
318 } else {
319 /* try to look up an unknown volume in the cell VL databases by
320 * name */
321 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
322 if (ret < 0) {
323 printk("kAFS: failed to locate '%s' in cell '%s'\n",
324 vl->vldb.name, vl->cell->name);
325 return ret;
329 afs_vlocation_apply_update(vl, &vldb);
330 _leave(" = 0");
331 return 0;
335 * queue a vlocation record for updates
337 static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
339 struct afs_vlocation *xvl;
341 /* wait at least 10 minutes before updating... */
342 vl->update_at = get_seconds() + afs_vlocation_update_timeout;
344 spin_lock(&afs_vlocation_updates_lock);
346 if (!list_empty(&afs_vlocation_updates)) {
347 /* ... but wait at least 1 second more than the newest record
348 * already queued so that we don't spam the VL server suddenly
349 * with lots of requests
351 xvl = list_entry(afs_vlocation_updates.prev,
352 struct afs_vlocation, update);
353 if (vl->update_at <= xvl->update_at)
354 vl->update_at = xvl->update_at + 1;
355 } else {
356 queue_delayed_work(afs_vlocation_update_worker,
357 &afs_vlocation_update,
358 afs_vlocation_update_timeout * HZ);
361 list_add_tail(&vl->update, &afs_vlocation_updates);
362 spin_unlock(&afs_vlocation_updates_lock);
366 * lookup volume location
367 * - iterate through the VL servers in a cell until one of them admits knowing
368 * about the volume in question
369 * - lookup in the local cache if not able to find on the VL server
370 * - insert/update in the local cache if did get a VL response
372 struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
373 struct key *key,
374 const char *name,
375 size_t namesz)
377 struct afs_vlocation *vl;
378 int ret;
380 _enter("{%s},{%x},%*.*s,%zu",
381 cell->name, key_serial(key),
382 (int) namesz, (int) namesz, name, namesz);
384 if (namesz >= sizeof(vl->vldb.name)) {
385 _leave(" = -ENAMETOOLONG");
386 return ERR_PTR(-ENAMETOOLONG);
389 /* see if we have an in-memory copy first */
390 down_write(&cell->vl_sem);
391 spin_lock(&cell->vl_lock);
392 list_for_each_entry(vl, &cell->vl_list, link) {
393 if (vl->vldb.name[namesz] != '\0')
394 continue;
395 if (memcmp(vl->vldb.name, name, namesz) == 0)
396 goto found_in_memory;
398 spin_unlock(&cell->vl_lock);
400 /* not in the cell's in-memory lists - create a new record */
401 vl = afs_vlocation_alloc(cell, name, namesz);
402 if (!vl) {
403 up_write(&cell->vl_sem);
404 return ERR_PTR(-ENOMEM);
407 afs_get_cell(cell);
409 list_add_tail(&vl->link, &cell->vl_list);
410 vl->state = AFS_VL_CREATING;
411 up_write(&cell->vl_sem);
413 fill_in_record:
414 ret = afs_vlocation_fill_in_record(vl, key);
415 if (ret < 0)
416 goto error_abandon;
417 spin_lock(&vl->lock);
418 vl->state = AFS_VL_VALID;
419 spin_unlock(&vl->lock);
420 wake_up(&vl->waitq);
422 /* update volume entry in local cache */
423 #ifdef CONFIG_AFS_FSCACHE
424 fscache_update_cookie(vl->cache);
425 #endif
427 /* schedule for regular updates */
428 afs_vlocation_queue_for_updates(vl);
429 goto success;
431 found_in_memory:
432 /* found in memory */
433 _debug("found in memory");
434 atomic_inc(&vl->usage);
435 spin_unlock(&cell->vl_lock);
436 if (!list_empty(&vl->grave)) {
437 spin_lock(&afs_vlocation_graveyard_lock);
438 list_del_init(&vl->grave);
439 spin_unlock(&afs_vlocation_graveyard_lock);
441 up_write(&cell->vl_sem);
443 /* see if it was an abandoned record that we might try filling in */
444 spin_lock(&vl->lock);
445 while (vl->state != AFS_VL_VALID) {
446 afs_vlocation_state_t state = vl->state;
448 _debug("invalid [state %d]", state);
450 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
451 vl->state = AFS_VL_CREATING;
452 spin_unlock(&vl->lock);
453 goto fill_in_record;
456 /* must now wait for creation or update by someone else to
457 * complete */
458 _debug("wait");
460 spin_unlock(&vl->lock);
461 ret = wait_event_interruptible(vl->waitq,
462 vl->state == AFS_VL_NEW ||
463 vl->state == AFS_VL_VALID ||
464 vl->state == AFS_VL_NO_VOLUME);
465 if (ret < 0)
466 goto error;
467 spin_lock(&vl->lock);
469 spin_unlock(&vl->lock);
471 success:
472 _leave(" = %p", vl);
473 return vl;
475 error_abandon:
476 spin_lock(&vl->lock);
477 vl->state = AFS_VL_NEW;
478 spin_unlock(&vl->lock);
479 wake_up(&vl->waitq);
480 error:
481 ASSERT(vl != NULL);
482 afs_put_vlocation(vl);
483 _leave(" = %d", ret);
484 return ERR_PTR(ret);
488 * finish using a volume location record
490 void afs_put_vlocation(struct afs_vlocation *vl)
492 if (!vl)
493 return;
495 _enter("%s", vl->vldb.name);
497 ASSERTCMP(atomic_read(&vl->usage), >, 0);
499 if (likely(!atomic_dec_and_test(&vl->usage))) {
500 _leave("");
501 return;
504 spin_lock(&afs_vlocation_graveyard_lock);
505 if (atomic_read(&vl->usage) == 0) {
506 _debug("buried");
507 list_move_tail(&vl->grave, &afs_vlocation_graveyard);
508 vl->time_of_death = get_seconds();
509 schedule_delayed_work(&afs_vlocation_reap,
510 afs_vlocation_timeout * HZ);
512 /* suspend updates on this record */
513 if (!list_empty(&vl->update)) {
514 spin_lock(&afs_vlocation_updates_lock);
515 list_del_init(&vl->update);
516 spin_unlock(&afs_vlocation_updates_lock);
519 spin_unlock(&afs_vlocation_graveyard_lock);
520 _leave(" [killed?]");
524 * destroy a dead volume location record
526 static void afs_vlocation_destroy(struct afs_vlocation *vl)
528 _enter("%p", vl);
530 #ifdef CONFIG_AFS_FSCACHE
531 fscache_relinquish_cookie(vl->cache, 0);
532 #endif
533 afs_put_cell(vl->cell);
534 kfree(vl);
538 * reap dead volume location records
540 static void afs_vlocation_reaper(struct work_struct *work)
542 LIST_HEAD(corpses);
543 struct afs_vlocation *vl;
544 unsigned long delay, expiry;
545 time_t now;
547 _enter("");
549 now = get_seconds();
550 spin_lock(&afs_vlocation_graveyard_lock);
552 while (!list_empty(&afs_vlocation_graveyard)) {
553 vl = list_entry(afs_vlocation_graveyard.next,
554 struct afs_vlocation, grave);
556 _debug("check %p", vl);
558 /* the queue is ordered most dead first */
559 expiry = vl->time_of_death + afs_vlocation_timeout;
560 if (expiry > now) {
561 delay = (expiry - now) * HZ;
562 _debug("delay %lu", delay);
563 if (!schedule_delayed_work(&afs_vlocation_reap,
564 delay)) {
565 cancel_delayed_work(&afs_vlocation_reap);
566 schedule_delayed_work(&afs_vlocation_reap,
567 delay);
569 break;
572 spin_lock(&vl->cell->vl_lock);
573 if (atomic_read(&vl->usage) > 0) {
574 _debug("no reap");
575 list_del_init(&vl->grave);
576 } else {
577 _debug("reap");
578 list_move_tail(&vl->grave, &corpses);
579 list_del_init(&vl->link);
581 spin_unlock(&vl->cell->vl_lock);
584 spin_unlock(&afs_vlocation_graveyard_lock);
586 /* now reap the corpses we've extracted */
587 while (!list_empty(&corpses)) {
588 vl = list_entry(corpses.next, struct afs_vlocation, grave);
589 list_del(&vl->grave);
590 afs_vlocation_destroy(vl);
593 _leave("");
597 * initialise the VL update process
599 int __init afs_vlocation_update_init(void)
601 afs_vlocation_update_worker =
602 create_singlethread_workqueue("kafs_vlupdated");
603 return afs_vlocation_update_worker ? 0 : -ENOMEM;
607 * discard all the volume location records for rmmod
609 void afs_vlocation_purge(void)
611 afs_vlocation_timeout = 0;
613 spin_lock(&afs_vlocation_updates_lock);
614 list_del_init(&afs_vlocation_updates);
615 spin_unlock(&afs_vlocation_updates_lock);
616 cancel_delayed_work(&afs_vlocation_update);
617 queue_delayed_work(afs_vlocation_update_worker,
618 &afs_vlocation_update, 0);
619 destroy_workqueue(afs_vlocation_update_worker);
621 cancel_delayed_work(&afs_vlocation_reap);
622 schedule_delayed_work(&afs_vlocation_reap, 0);
626 * update a volume location
628 static void afs_vlocation_updater(struct work_struct *work)
630 struct afs_cache_vlocation vldb;
631 struct afs_vlocation *vl, *xvl;
632 time_t now;
633 long timeout;
634 int ret;
636 _enter("");
638 now = get_seconds();
640 /* find a record to update */
641 spin_lock(&afs_vlocation_updates_lock);
642 for (;;) {
643 if (list_empty(&afs_vlocation_updates)) {
644 spin_unlock(&afs_vlocation_updates_lock);
645 _leave(" [nothing]");
646 return;
649 vl = list_entry(afs_vlocation_updates.next,
650 struct afs_vlocation, update);
651 if (atomic_read(&vl->usage) > 0)
652 break;
653 list_del_init(&vl->update);
656 timeout = vl->update_at - now;
657 if (timeout > 0) {
658 queue_delayed_work(afs_vlocation_update_worker,
659 &afs_vlocation_update, timeout * HZ);
660 spin_unlock(&afs_vlocation_updates_lock);
661 _leave(" [nothing]");
662 return;
665 list_del_init(&vl->update);
666 atomic_inc(&vl->usage);
667 spin_unlock(&afs_vlocation_updates_lock);
669 /* we can now perform the update */
670 _debug("update %s", vl->vldb.name);
671 vl->state = AFS_VL_UPDATING;
672 vl->upd_rej_cnt = 0;
673 vl->upd_busy_cnt = 0;
675 ret = afs_vlocation_update_record(vl, NULL, &vldb);
676 spin_lock(&vl->lock);
677 switch (ret) {
678 case 0:
679 afs_vlocation_apply_update(vl, &vldb);
680 vl->state = AFS_VL_VALID;
681 break;
682 case -ENOMEDIUM:
683 vl->state = AFS_VL_VOLUME_DELETED;
684 break;
685 default:
686 vl->state = AFS_VL_UNCERTAIN;
687 break;
689 spin_unlock(&vl->lock);
690 wake_up(&vl->waitq);
692 /* and then reschedule */
693 _debug("reschedule");
694 vl->update_at = get_seconds() + afs_vlocation_update_timeout;
696 spin_lock(&afs_vlocation_updates_lock);
698 if (!list_empty(&afs_vlocation_updates)) {
699 /* next update in 10 minutes, but wait at least 1 second more
700 * than the newest record already queued so that we don't spam
701 * the VL server suddenly with lots of requests
703 xvl = list_entry(afs_vlocation_updates.prev,
704 struct afs_vlocation, update);
705 if (vl->update_at <= xvl->update_at)
706 vl->update_at = xvl->update_at + 1;
707 xvl = list_entry(afs_vlocation_updates.next,
708 struct afs_vlocation, update);
709 timeout = xvl->update_at - now;
710 if (timeout < 0)
711 timeout = 0;
712 } else {
713 timeout = afs_vlocation_update_timeout;
716 ASSERT(list_empty(&vl->update));
718 list_add_tail(&vl->update, &afs_vlocation_updates);
720 _debug("timeout %ld", timeout);
721 queue_delayed_work(afs_vlocation_update_worker,
722 &afs_vlocation_update, timeout * HZ);
723 spin_unlock(&afs_vlocation_updates_lock);
724 afs_put_vlocation(vl);