[TCP]: TCP_CONG_YEAH requires TCP_CONG_VEGAS
[linux-2.6/verdex.git] / fs / afs / vlocation.c
blob3370cdb72566ca6d4eada9d751aab58ebc341dc8
1 /* AFS volume location management
3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include "internal.h"
17 unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */
18 unsigned afs_vlocation_update_timeout = 10 * 60;
20 static void afs_vlocation_reaper(struct work_struct *);
21 static void afs_vlocation_updater(struct work_struct *);
23 static LIST_HEAD(afs_vlocation_updates);
24 static LIST_HEAD(afs_vlocation_graveyard);
25 static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
26 static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
27 static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
28 static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
29 static struct workqueue_struct *afs_vlocation_update_worker;
32 * iterate through the VL servers in a cell until one of them admits knowing
33 * about the volume in question
35 static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
36 struct key *key,
37 struct afs_cache_vlocation *vldb)
39 struct afs_cell *cell = vl->cell;
40 struct in_addr addr;
41 int count, ret;
43 _enter("%s,%s", cell->name, vl->vldb.name);
45 down_write(&vl->cell->vl_sem);
46 ret = -ENOMEDIUM;
47 for (count = cell->vl_naddrs; count > 0; count--) {
48 addr = cell->vl_addrs[cell->vl_curr_svix];
50 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
52 /* attempt to access the VL server */
53 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
54 &afs_sync_call);
55 switch (ret) {
56 case 0:
57 goto out;
58 case -ENOMEM:
59 case -ENONET:
60 case -ENETUNREACH:
61 case -EHOSTUNREACH:
62 case -ECONNREFUSED:
63 if (ret == -ENOMEM || ret == -ENONET)
64 goto out;
65 goto rotate;
66 case -ENOMEDIUM:
67 goto out;
68 default:
69 ret = -EIO;
70 goto rotate;
73 /* rotate the server records upon lookup failure */
74 rotate:
75 cell->vl_curr_svix++;
76 cell->vl_curr_svix %= cell->vl_naddrs;
79 out:
80 up_write(&vl->cell->vl_sem);
81 _leave(" = %d", ret);
82 return ret;
86 * iterate through the VL servers in a cell until one of them admits knowing
87 * about the volume in question
89 static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
90 struct key *key,
91 afs_volid_t volid,
92 afs_voltype_t voltype,
93 struct afs_cache_vlocation *vldb)
95 struct afs_cell *cell = vl->cell;
96 struct in_addr addr;
97 int count, ret;
99 _enter("%s,%x,%d,", cell->name, volid, voltype);
101 down_write(&vl->cell->vl_sem);
102 ret = -ENOMEDIUM;
103 for (count = cell->vl_naddrs; count > 0; count--) {
104 addr = cell->vl_addrs[cell->vl_curr_svix];
106 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
108 /* attempt to access the VL server */
109 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
110 &afs_sync_call);
111 switch (ret) {
112 case 0:
113 goto out;
114 case -ENOMEM:
115 case -ENONET:
116 case -ENETUNREACH:
117 case -EHOSTUNREACH:
118 case -ECONNREFUSED:
119 if (ret == -ENOMEM || ret == -ENONET)
120 goto out;
121 goto rotate;
122 case -EBUSY:
123 vl->upd_busy_cnt++;
124 if (vl->upd_busy_cnt <= 3) {
125 if (vl->upd_busy_cnt > 1) {
126 /* second+ BUSY - sleep a little bit */
127 set_current_state(TASK_UNINTERRUPTIBLE);
128 schedule_timeout(1);
129 __set_current_state(TASK_RUNNING);
131 continue;
133 break;
134 case -ENOMEDIUM:
135 vl->upd_rej_cnt++;
136 goto rotate;
137 default:
138 ret = -EIO;
139 goto rotate;
142 /* rotate the server records upon lookup failure */
143 rotate:
144 cell->vl_curr_svix++;
145 cell->vl_curr_svix %= cell->vl_naddrs;
146 vl->upd_busy_cnt = 0;
149 out:
150 if (ret < 0 && vl->upd_rej_cnt > 0) {
151 printk(KERN_NOTICE "kAFS:"
152 " Active volume no longer valid '%s'\n",
153 vl->vldb.name);
154 vl->valid = 0;
155 ret = -ENOMEDIUM;
158 up_write(&vl->cell->vl_sem);
159 _leave(" = %d", ret);
160 return ret;
164 * allocate a volume location record
166 static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
167 const char *name,
168 size_t namesz)
170 struct afs_vlocation *vl;
172 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
173 if (vl) {
174 vl->cell = cell;
175 vl->state = AFS_VL_NEW;
176 atomic_set(&vl->usage, 1);
177 INIT_LIST_HEAD(&vl->link);
178 INIT_LIST_HEAD(&vl->grave);
179 INIT_LIST_HEAD(&vl->update);
180 init_waitqueue_head(&vl->waitq);
181 spin_lock_init(&vl->lock);
182 memcpy(vl->vldb.name, name, namesz);
185 _leave(" = %p", vl);
186 return vl;
190 * update record if we found it in the cache
192 static int afs_vlocation_update_record(struct afs_vlocation *vl,
193 struct key *key,
194 struct afs_cache_vlocation *vldb)
196 afs_voltype_t voltype;
197 afs_volid_t vid;
198 int ret;
200 /* try to look up a cached volume in the cell VL databases by ID */
201 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
202 vl->vldb.name,
203 vl->vldb.vidmask,
204 ntohl(vl->vldb.servers[0].s_addr),
205 vl->vldb.srvtmask[0],
206 ntohl(vl->vldb.servers[1].s_addr),
207 vl->vldb.srvtmask[1],
208 ntohl(vl->vldb.servers[2].s_addr),
209 vl->vldb.srvtmask[2]);
211 _debug("Vids: %08x %08x %08x",
212 vl->vldb.vid[0],
213 vl->vldb.vid[1],
214 vl->vldb.vid[2]);
216 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
217 vid = vl->vldb.vid[0];
218 voltype = AFSVL_RWVOL;
219 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
220 vid = vl->vldb.vid[1];
221 voltype = AFSVL_ROVOL;
222 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
223 vid = vl->vldb.vid[2];
224 voltype = AFSVL_BACKVOL;
225 } else {
226 BUG();
227 vid = 0;
228 voltype = 0;
231 /* contact the server to make sure the volume is still available
232 * - TODO: need to handle disconnected operation here
234 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
235 switch (ret) {
236 /* net error */
237 default:
238 printk(KERN_WARNING "kAFS:"
239 " failed to update volume '%s' (%x) up in '%s': %d\n",
240 vl->vldb.name, vid, vl->cell->name, ret);
241 _leave(" = %d", ret);
242 return ret;
244 /* pulled from local cache into memory */
245 case 0:
246 _leave(" = 0");
247 return 0;
249 /* uh oh... looks like the volume got deleted */
250 case -ENOMEDIUM:
251 printk(KERN_ERR "kAFS:"
252 " volume '%s' (%x) does not exist '%s'\n",
253 vl->vldb.name, vid, vl->cell->name);
255 /* TODO: make existing record unavailable */
256 _leave(" = %d", ret);
257 return ret;
262 * apply the update to a VL record
264 static void afs_vlocation_apply_update(struct afs_vlocation *vl,
265 struct afs_cache_vlocation *vldb)
267 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
268 vldb->name, vldb->vidmask,
269 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
270 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
271 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
273 _debug("Vids: %08x %08x %08x",
274 vldb->vid[0], vldb->vid[1], vldb->vid[2]);
276 if (strcmp(vldb->name, vl->vldb.name) != 0)
277 printk(KERN_NOTICE "kAFS:"
278 " name of volume '%s' changed to '%s' on server\n",
279 vl->vldb.name, vldb->name);
281 vl->vldb = *vldb;
283 #ifdef AFS_CACHING_SUPPORT
284 /* update volume entry in local cache */
285 cachefs_update_cookie(vl->cache);
286 #endif
290 * fill in a volume location record, consulting the cache and the VL server
291 * both
293 static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
294 struct key *key)
296 struct afs_cache_vlocation vldb;
297 int ret;
299 _enter("");
301 ASSERTCMP(vl->valid, ==, 0);
303 memset(&vldb, 0, sizeof(vldb));
305 /* see if we have an in-cache copy (will set vl->valid if there is) */
306 #ifdef AFS_CACHING_SUPPORT
307 cachefs_acquire_cookie(cell->cache,
308 &afs_volume_cache_index_def,
309 vlocation,
310 &vl->cache);
311 #endif
313 if (vl->valid) {
314 /* try to update a known volume in the cell VL databases by
315 * ID as the name may have changed */
316 _debug("found in cache");
317 ret = afs_vlocation_update_record(vl, key, &vldb);
318 } else {
319 /* try to look up an unknown volume in the cell VL databases by
320 * name */
321 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
322 if (ret < 0) {
323 printk("kAFS: failed to locate '%s' in cell '%s'\n",
324 vl->vldb.name, vl->cell->name);
325 return ret;
329 afs_vlocation_apply_update(vl, &vldb);
330 _leave(" = 0");
331 return 0;
335 * queue a vlocation record for updates
337 void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
339 struct afs_vlocation *xvl;
341 /* wait at least 10 minutes before updating... */
342 vl->update_at = get_seconds() + afs_vlocation_update_timeout;
344 spin_lock(&afs_vlocation_updates_lock);
346 if (!list_empty(&afs_vlocation_updates)) {
347 /* ... but wait at least 1 second more than the newest record
348 * already queued so that we don't spam the VL server suddenly
349 * with lots of requests
351 xvl = list_entry(afs_vlocation_updates.prev,
352 struct afs_vlocation, update);
353 if (vl->update_at <= xvl->update_at)
354 vl->update_at = xvl->update_at + 1;
355 } else {
356 queue_delayed_work(afs_vlocation_update_worker,
357 &afs_vlocation_update,
358 afs_vlocation_update_timeout * HZ);
361 list_add_tail(&vl->update, &afs_vlocation_updates);
362 spin_unlock(&afs_vlocation_updates_lock);
366 * lookup volume location
367 * - iterate through the VL servers in a cell until one of them admits knowing
368 * about the volume in question
369 * - lookup in the local cache if not able to find on the VL server
370 * - insert/update in the local cache if did get a VL response
372 struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
373 struct key *key,
374 const char *name,
375 size_t namesz)
377 struct afs_vlocation *vl;
378 int ret;
380 _enter("{%s},{%x},%*.*s,%zu",
381 cell->name, key_serial(key),
382 (int) namesz, (int) namesz, name, namesz);
384 if (namesz > sizeof(vl->vldb.name)) {
385 _leave(" = -ENAMETOOLONG");
386 return ERR_PTR(-ENAMETOOLONG);
389 /* see if we have an in-memory copy first */
390 down_write(&cell->vl_sem);
391 spin_lock(&cell->vl_lock);
392 list_for_each_entry(vl, &cell->vl_list, link) {
393 if (vl->vldb.name[namesz] != '\0')
394 continue;
395 if (memcmp(vl->vldb.name, name, namesz) == 0)
396 goto found_in_memory;
398 spin_unlock(&cell->vl_lock);
400 /* not in the cell's in-memory lists - create a new record */
401 vl = afs_vlocation_alloc(cell, name, namesz);
402 if (!vl) {
403 up_write(&cell->vl_sem);
404 return ERR_PTR(-ENOMEM);
407 afs_get_cell(cell);
409 list_add_tail(&vl->link, &cell->vl_list);
410 vl->state = AFS_VL_CREATING;
411 up_write(&cell->vl_sem);
413 fill_in_record:
414 ret = afs_vlocation_fill_in_record(vl, key);
415 if (ret < 0)
416 goto error_abandon;
417 spin_lock(&vl->lock);
418 vl->state = AFS_VL_VALID;
419 spin_unlock(&vl->lock);
420 wake_up(&vl->waitq);
422 /* schedule for regular updates */
423 afs_vlocation_queue_for_updates(vl);
424 goto success;
426 found_in_memory:
427 /* found in memory */
428 _debug("found in memory");
429 atomic_inc(&vl->usage);
430 spin_unlock(&cell->vl_lock);
431 if (!list_empty(&vl->grave)) {
432 spin_lock(&afs_vlocation_graveyard_lock);
433 list_del_init(&vl->grave);
434 spin_unlock(&afs_vlocation_graveyard_lock);
436 up_write(&cell->vl_sem);
438 /* see if it was an abandoned record that we might try filling in */
439 spin_lock(&vl->lock);
440 while (vl->state != AFS_VL_VALID) {
441 afs_vlocation_state_t state = vl->state;
443 _debug("invalid [state %d]", state);
445 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
446 vl->state = AFS_VL_CREATING;
447 spin_unlock(&vl->lock);
448 goto fill_in_record;
451 /* must now wait for creation or update by someone else to
452 * complete */
453 _debug("wait");
455 spin_unlock(&vl->lock);
456 ret = wait_event_interruptible(vl->waitq,
457 vl->state == AFS_VL_NEW ||
458 vl->state == AFS_VL_VALID ||
459 vl->state == AFS_VL_NO_VOLUME);
460 if (ret < 0)
461 goto error;
462 spin_lock(&vl->lock);
464 spin_unlock(&vl->lock);
466 success:
467 _leave(" = %p",vl);
468 return vl;
470 error_abandon:
471 spin_lock(&vl->lock);
472 vl->state = AFS_VL_NEW;
473 spin_unlock(&vl->lock);
474 wake_up(&vl->waitq);
475 error:
476 ASSERT(vl != NULL);
477 afs_put_vlocation(vl);
478 _leave(" = %d", ret);
479 return ERR_PTR(ret);
483 * finish using a volume location record
485 void afs_put_vlocation(struct afs_vlocation *vl)
487 if (!vl)
488 return;
490 _enter("%s", vl->vldb.name);
492 ASSERTCMP(atomic_read(&vl->usage), >, 0);
494 if (likely(!atomic_dec_and_test(&vl->usage))) {
495 _leave("");
496 return;
499 spin_lock(&afs_vlocation_graveyard_lock);
500 if (atomic_read(&vl->usage) == 0) {
501 _debug("buried");
502 list_move_tail(&vl->grave, &afs_vlocation_graveyard);
503 vl->time_of_death = get_seconds();
504 schedule_delayed_work(&afs_vlocation_reap,
505 afs_vlocation_timeout * HZ);
507 /* suspend updates on this record */
508 if (!list_empty(&vl->update)) {
509 spin_lock(&afs_vlocation_updates_lock);
510 list_del_init(&vl->update);
511 spin_unlock(&afs_vlocation_updates_lock);
514 spin_unlock(&afs_vlocation_graveyard_lock);
515 _leave(" [killed?]");
519 * destroy a dead volume location record
521 static void afs_vlocation_destroy(struct afs_vlocation *vl)
523 _enter("%p", vl);
525 #ifdef AFS_CACHING_SUPPORT
526 cachefs_relinquish_cookie(vl->cache, 0);
527 #endif
529 afs_put_cell(vl->cell);
530 kfree(vl);
534 * reap dead volume location records
536 static void afs_vlocation_reaper(struct work_struct *work)
538 LIST_HEAD(corpses);
539 struct afs_vlocation *vl;
540 unsigned long delay, expiry;
541 time_t now;
543 _enter("");
545 now = get_seconds();
546 spin_lock(&afs_vlocation_graveyard_lock);
548 while (!list_empty(&afs_vlocation_graveyard)) {
549 vl = list_entry(afs_vlocation_graveyard.next,
550 struct afs_vlocation, grave);
552 _debug("check %p", vl);
554 /* the queue is ordered most dead first */
555 expiry = vl->time_of_death + afs_vlocation_timeout;
556 if (expiry > now) {
557 delay = (expiry - now) * HZ;
558 _debug("delay %lu", delay);
559 if (!schedule_delayed_work(&afs_vlocation_reap,
560 delay)) {
561 cancel_delayed_work(&afs_vlocation_reap);
562 schedule_delayed_work(&afs_vlocation_reap,
563 delay);
565 break;
568 spin_lock(&vl->cell->vl_lock);
569 if (atomic_read(&vl->usage) > 0) {
570 _debug("no reap");
571 list_del_init(&vl->grave);
572 } else {
573 _debug("reap");
574 list_move_tail(&vl->grave, &corpses);
575 list_del_init(&vl->link);
577 spin_unlock(&vl->cell->vl_lock);
580 spin_unlock(&afs_vlocation_graveyard_lock);
582 /* now reap the corpses we've extracted */
583 while (!list_empty(&corpses)) {
584 vl = list_entry(corpses.next, struct afs_vlocation, grave);
585 list_del(&vl->grave);
586 afs_vlocation_destroy(vl);
589 _leave("");
593 * initialise the VL update process
595 int __init afs_vlocation_update_init(void)
597 afs_vlocation_update_worker =
598 create_singlethread_workqueue("kafs_vlupdated");
599 return afs_vlocation_update_worker ? 0 : -ENOMEM;
603 * discard all the volume location records for rmmod
605 void afs_vlocation_purge(void)
607 afs_vlocation_timeout = 0;
609 spin_lock(&afs_vlocation_updates_lock);
610 list_del_init(&afs_vlocation_updates);
611 spin_unlock(&afs_vlocation_updates_lock);
612 cancel_delayed_work(&afs_vlocation_update);
613 queue_delayed_work(afs_vlocation_update_worker,
614 &afs_vlocation_update, 0);
615 destroy_workqueue(afs_vlocation_update_worker);
617 cancel_delayed_work(&afs_vlocation_reap);
618 schedule_delayed_work(&afs_vlocation_reap, 0);
622 * update a volume location
624 static void afs_vlocation_updater(struct work_struct *work)
626 struct afs_cache_vlocation vldb;
627 struct afs_vlocation *vl, *xvl;
628 time_t now;
629 long timeout;
630 int ret;
632 _enter("");
634 now = get_seconds();
636 /* find a record to update */
637 spin_lock(&afs_vlocation_updates_lock);
638 for (;;) {
639 if (list_empty(&afs_vlocation_updates)) {
640 spin_unlock(&afs_vlocation_updates_lock);
641 _leave(" [nothing]");
642 return;
645 vl = list_entry(afs_vlocation_updates.next,
646 struct afs_vlocation, update);
647 if (atomic_read(&vl->usage) > 0)
648 break;
649 list_del_init(&vl->update);
652 timeout = vl->update_at - now;
653 if (timeout > 0) {
654 queue_delayed_work(afs_vlocation_update_worker,
655 &afs_vlocation_update, timeout * HZ);
656 spin_unlock(&afs_vlocation_updates_lock);
657 _leave(" [nothing]");
658 return;
661 list_del_init(&vl->update);
662 atomic_inc(&vl->usage);
663 spin_unlock(&afs_vlocation_updates_lock);
665 /* we can now perform the update */
666 _debug("update %s", vl->vldb.name);
667 vl->state = AFS_VL_UPDATING;
668 vl->upd_rej_cnt = 0;
669 vl->upd_busy_cnt = 0;
671 ret = afs_vlocation_update_record(vl, NULL, &vldb);
672 spin_lock(&vl->lock);
673 switch (ret) {
674 case 0:
675 afs_vlocation_apply_update(vl, &vldb);
676 vl->state = AFS_VL_VALID;
677 break;
678 case -ENOMEDIUM:
679 vl->state = AFS_VL_VOLUME_DELETED;
680 break;
681 default:
682 vl->state = AFS_VL_UNCERTAIN;
683 break;
685 spin_unlock(&vl->lock);
686 wake_up(&vl->waitq);
688 /* and then reschedule */
689 _debug("reschedule");
690 vl->update_at = get_seconds() + afs_vlocation_update_timeout;
692 spin_lock(&afs_vlocation_updates_lock);
694 if (!list_empty(&afs_vlocation_updates)) {
695 /* next update in 10 minutes, but wait at least 1 second more
696 * than the newest record already queued so that we don't spam
697 * the VL server suddenly with lots of requests
699 xvl = list_entry(afs_vlocation_updates.prev,
700 struct afs_vlocation, update);
701 if (vl->update_at <= xvl->update_at)
702 vl->update_at = xvl->update_at + 1;
703 xvl = list_entry(afs_vlocation_updates.next,
704 struct afs_vlocation, update);
705 timeout = xvl->update_at - now;
706 if (timeout < 0)
707 timeout = 0;
708 } else {
709 timeout = afs_vlocation_update_timeout;
712 ASSERT(list_empty(&vl->update));
714 list_add_tail(&vl->update, &afs_vlocation_updates);
716 _debug("timeout %ld", timeout);
717 queue_delayed_work(afs_vlocation_update_worker,
718 &afs_vlocation_update, timeout * HZ);
719 spin_unlock(&afs_vlocation_updates_lock);
720 afs_put_vlocation(vl);