sched: retune wake granularity
[wrt350n-kernel.git] / fs / ocfs2 / dlm / dlmunlock.c
blob86ca085ef3246b8066d7f9f976b0866e7e63e168
1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
4 * dlmunlock.c
6 * underlying calls for unlocking locks
8 * Copyright (C) 2004 Oracle. All rights reserved.
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
28 #include <linux/module.h>
29 #include <linux/fs.h>
30 #include <linux/types.h>
31 #include <linux/slab.h>
32 #include <linux/highmem.h>
33 #include <linux/utsname.h>
34 #include <linux/init.h>
35 #include <linux/sysctl.h>
36 #include <linux/random.h>
37 #include <linux/blkdev.h>
38 #include <linux/socket.h>
39 #include <linux/inet.h>
40 #include <linux/spinlock.h>
41 #include <linux/delay.h>
43 #include "cluster/heartbeat.h"
44 #include "cluster/nodemanager.h"
45 #include "cluster/tcp.h"
47 #include "dlmapi.h"
48 #include "dlmcommon.h"
50 #define MLOG_MASK_PREFIX ML_DLM
51 #include "cluster/masklog.h"
53 #define DLM_UNLOCK_FREE_LOCK 0x00000001
54 #define DLM_UNLOCK_CALL_AST 0x00000002
55 #define DLM_UNLOCK_REMOVE_LOCK 0x00000004
56 #define DLM_UNLOCK_REGRANT_LOCK 0x00000008
57 #define DLM_UNLOCK_CLEAR_CONVERT_TYPE 0x00000010
60 static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
61 struct dlm_lock_resource *res,
62 struct dlm_lock *lock,
63 struct dlm_lockstatus *lksb,
64 int *actions);
65 static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
66 struct dlm_lock_resource *res,
67 struct dlm_lock *lock,
68 struct dlm_lockstatus *lksb,
69 int *actions);
71 static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
72 struct dlm_lock_resource *res,
73 struct dlm_lock *lock,
74 struct dlm_lockstatus *lksb,
75 int flags,
76 u8 owner);
80 * according to the spec:
81 * http://opendlm.sourceforge.net/cvsmirror/opendlm/docs/dlmbook_final.pdf
83 * flags & LKM_CANCEL != 0: must be converting or blocked
84 * flags & LKM_CANCEL == 0: must be granted
86 * So to unlock a converting lock, you must first cancel the
87 * convert (passing LKM_CANCEL in flags), then call the unlock
88 * again (with no LKM_CANCEL in flags).
93 * locking:
94 * caller needs: none
95 * taken: res->spinlock and lock->spinlock taken and dropped
96 * held on exit: none
97 * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
98 * all callers should have taken an extra ref on lock coming in
100 static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
101 struct dlm_lock_resource *res,
102 struct dlm_lock *lock,
103 struct dlm_lockstatus *lksb,
104 int flags, int *call_ast,
105 int master_node)
107 enum dlm_status status;
108 int actions = 0;
109 int in_use;
110 u8 owner;
112 mlog(0, "master_node = %d, valblk = %d\n", master_node,
113 flags & LKM_VALBLK);
115 if (master_node)
116 BUG_ON(res->owner != dlm->node_num);
117 else
118 BUG_ON(res->owner == dlm->node_num);
120 spin_lock(&dlm->spinlock);
121 /* We want to be sure that we're not freeing a lock
122 * that still has AST's pending... */
123 in_use = !list_empty(&lock->ast_list);
124 spin_unlock(&dlm->spinlock);
125 if (in_use) {
126 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
127 "while waiting for an ast!", res->lockname.len,
128 res->lockname.name);
129 return DLM_BADPARAM;
132 spin_lock(&res->spinlock);
133 if (res->state & DLM_LOCK_RES_IN_PROGRESS) {
134 if (master_node) {
135 mlog(ML_ERROR, "lockres in progress!\n");
136 spin_unlock(&res->spinlock);
137 return DLM_FORWARD;
139 /* ok for this to sleep if not in a network handler */
140 __dlm_wait_on_lockres(res);
141 res->state |= DLM_LOCK_RES_IN_PROGRESS;
143 spin_lock(&lock->spinlock);
145 if (res->state & DLM_LOCK_RES_RECOVERING) {
146 status = DLM_RECOVERING;
147 goto leave;
150 if (res->state & DLM_LOCK_RES_MIGRATING) {
151 status = DLM_MIGRATING;
152 goto leave;
155 /* see above for what the spec says about
156 * LKM_CANCEL and the lock queue state */
157 if (flags & LKM_CANCEL)
158 status = dlm_get_cancel_actions(dlm, res, lock, lksb, &actions);
159 else
160 status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
162 if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node))
163 goto leave;
165 /* By now this has been masked out of cancel requests. */
166 if (flags & LKM_VALBLK) {
167 /* make the final update to the lvb */
168 if (master_node)
169 memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
170 else
171 flags |= LKM_PUT_LVB; /* let the send function
172 * handle it. */
175 if (!master_node) {
176 owner = res->owner;
177 /* drop locks and send message */
178 if (flags & LKM_CANCEL)
179 lock->cancel_pending = 1;
180 else
181 lock->unlock_pending = 1;
182 spin_unlock(&lock->spinlock);
183 spin_unlock(&res->spinlock);
184 status = dlm_send_remote_unlock_request(dlm, res, lock, lksb,
185 flags, owner);
186 spin_lock(&res->spinlock);
187 spin_lock(&lock->spinlock);
188 /* if the master told us the lock was already granted,
189 * let the ast handle all of these actions */
190 if (status == DLM_CANCELGRANT) {
191 actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
192 DLM_UNLOCK_REGRANT_LOCK|
193 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
194 } else if (status == DLM_RECOVERING ||
195 status == DLM_MIGRATING ||
196 status == DLM_FORWARD) {
197 /* must clear the actions because this unlock
198 * is about to be retried. cannot free or do
199 * any list manipulation. */
200 mlog(0, "%s:%.*s: clearing actions, %s\n",
201 dlm->name, res->lockname.len,
202 res->lockname.name,
203 status==DLM_RECOVERING?"recovering":
204 (status==DLM_MIGRATING?"migrating":
205 "forward"));
206 actions = 0;
208 if (flags & LKM_CANCEL)
209 lock->cancel_pending = 0;
210 else
211 lock->unlock_pending = 0;
215 /* get an extra ref on lock. if we are just switching
216 * lists here, we dont want the lock to go away. */
217 dlm_lock_get(lock);
219 if (actions & DLM_UNLOCK_REMOVE_LOCK) {
220 list_del_init(&lock->list);
221 dlm_lock_put(lock);
223 if (actions & DLM_UNLOCK_REGRANT_LOCK) {
224 dlm_lock_get(lock);
225 list_add_tail(&lock->list, &res->granted);
227 if (actions & DLM_UNLOCK_CLEAR_CONVERT_TYPE) {
228 mlog(0, "clearing convert_type at %smaster node\n",
229 master_node ? "" : "non-");
230 lock->ml.convert_type = LKM_IVMODE;
233 /* remove the extra ref on lock */
234 dlm_lock_put(lock);
236 leave:
237 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
238 if (!dlm_lock_on_list(&res->converting, lock))
239 BUG_ON(lock->ml.convert_type != LKM_IVMODE);
240 else
241 BUG_ON(lock->ml.convert_type == LKM_IVMODE);
242 spin_unlock(&lock->spinlock);
243 spin_unlock(&res->spinlock);
244 wake_up(&res->wq);
246 /* let the caller's final dlm_lock_put handle the actual kfree */
247 if (actions & DLM_UNLOCK_FREE_LOCK) {
248 /* this should always be coupled with list removal */
249 BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK));
250 mlog(0, "lock %u:%llu should be gone now! refs=%d\n",
251 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
252 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
253 atomic_read(&lock->lock_refs.refcount)-1);
254 dlm_lock_put(lock);
256 if (actions & DLM_UNLOCK_CALL_AST)
257 *call_ast = 1;
259 /* if cancel or unlock succeeded, lvb work is done */
260 if (status == DLM_NORMAL)
261 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
263 return status;
266 void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
267 struct dlm_lock *lock)
269 /* leave DLM_LKSB_PUT_LVB on the lksb so any final
270 * update of the lvb will be sent to the new master */
271 list_del_init(&lock->list);
274 void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
275 struct dlm_lock *lock)
277 list_move_tail(&lock->list, &res->granted);
278 lock->ml.convert_type = LKM_IVMODE;
282 static inline enum dlm_status dlmunlock_master(struct dlm_ctxt *dlm,
283 struct dlm_lock_resource *res,
284 struct dlm_lock *lock,
285 struct dlm_lockstatus *lksb,
286 int flags,
287 int *call_ast)
289 return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 1);
292 static inline enum dlm_status dlmunlock_remote(struct dlm_ctxt *dlm,
293 struct dlm_lock_resource *res,
294 struct dlm_lock *lock,
295 struct dlm_lockstatus *lksb,
296 int flags, int *call_ast)
298 return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 0);
302 * locking:
303 * caller needs: none
304 * taken: none
305 * held on exit: none
306 * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
308 static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
309 struct dlm_lock_resource *res,
310 struct dlm_lock *lock,
311 struct dlm_lockstatus *lksb,
312 int flags,
313 u8 owner)
315 struct dlm_unlock_lock unlock;
316 int tmpret;
317 enum dlm_status ret;
318 int status = 0;
319 struct kvec vec[2];
320 size_t veclen = 1;
322 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
324 if (owner == dlm->node_num) {
325 /* ended up trying to contact ourself. this means
326 * that the lockres had been remote but became local
327 * via a migration. just retry it, now as local */
328 mlog(0, "%s:%.*s: this node became the master due to a "
329 "migration, re-evaluate now\n", dlm->name,
330 res->lockname.len, res->lockname.name);
331 return DLM_FORWARD;
334 memset(&unlock, 0, sizeof(unlock));
335 unlock.node_idx = dlm->node_num;
336 unlock.flags = cpu_to_be32(flags);
337 unlock.cookie = lock->ml.cookie;
338 unlock.namelen = res->lockname.len;
339 memcpy(unlock.name, res->lockname.name, unlock.namelen);
341 vec[0].iov_len = sizeof(struct dlm_unlock_lock);
342 vec[0].iov_base = &unlock;
344 if (flags & LKM_PUT_LVB) {
345 /* extra data to send if we are updating lvb */
346 vec[1].iov_len = DLM_LVB_LEN;
347 vec[1].iov_base = lock->lksb->lvb;
348 veclen++;
351 tmpret = o2net_send_message_vec(DLM_UNLOCK_LOCK_MSG, dlm->key,
352 vec, veclen, owner, &status);
353 if (tmpret >= 0) {
354 // successfully sent and received
355 if (status == DLM_FORWARD)
356 mlog(0, "master was in-progress. retry\n");
357 ret = status;
358 } else {
359 mlog_errno(tmpret);
360 if (dlm_is_host_down(tmpret)) {
361 /* NOTE: this seems strange, but it is what we want.
362 * when the master goes down during a cancel or
363 * unlock, the recovery code completes the operation
364 * as if the master had not died, then passes the
365 * updated state to the recovery master. this thread
366 * just needs to finish out the operation and call
367 * the unlockast. */
368 ret = DLM_NORMAL;
369 } else {
370 /* something bad. this will BUG in ocfs2 */
371 ret = dlm_err_to_dlm_status(tmpret);
375 return ret;
379 * locking:
380 * caller needs: none
381 * taken: takes and drops res->spinlock
382 * held on exit: none
383 * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID,
384 * return value from dlmunlock_master
386 int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
387 void **ret_data)
389 struct dlm_ctxt *dlm = data;
390 struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf;
391 struct dlm_lock_resource *res = NULL;
392 struct list_head *iter;
393 struct dlm_lock *lock = NULL;
394 enum dlm_status status = DLM_NORMAL;
395 int found = 0, i;
396 struct dlm_lockstatus *lksb = NULL;
397 int ignore;
398 u32 flags;
399 struct list_head *queue;
401 flags = be32_to_cpu(unlock->flags);
403 if (flags & LKM_GET_LVB) {
404 mlog(ML_ERROR, "bad args! GET_LVB specified on unlock!\n");
405 return DLM_BADARGS;
408 if ((flags & (LKM_PUT_LVB|LKM_CANCEL)) == (LKM_PUT_LVB|LKM_CANCEL)) {
409 mlog(ML_ERROR, "bad args! cannot modify lvb on a CANCEL "
410 "request!\n");
411 return DLM_BADARGS;
414 if (unlock->namelen > DLM_LOCKID_NAME_MAX) {
415 mlog(ML_ERROR, "Invalid name length in unlock handler!\n");
416 return DLM_IVBUFLEN;
419 if (!dlm_grab(dlm))
420 return DLM_REJECTED;
422 mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
423 "Domain %s not fully joined!\n", dlm->name);
425 mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" : "none");
427 res = dlm_lookup_lockres(dlm, unlock->name, unlock->namelen);
428 if (!res) {
429 /* We assume here that a no lock resource simply means
430 * it was migrated away and destroyed before the other
431 * node could detect it. */
432 mlog(0, "returning DLM_FORWARD -- res no longer exists\n");
433 status = DLM_FORWARD;
434 goto not_found;
437 queue=&res->granted;
438 found = 0;
439 spin_lock(&res->spinlock);
440 if (res->state & DLM_LOCK_RES_RECOVERING) {
441 spin_unlock(&res->spinlock);
442 mlog(0, "returning DLM_RECOVERING\n");
443 status = DLM_RECOVERING;
444 goto leave;
447 if (res->state & DLM_LOCK_RES_MIGRATING) {
448 spin_unlock(&res->spinlock);
449 mlog(0, "returning DLM_MIGRATING\n");
450 status = DLM_MIGRATING;
451 goto leave;
454 if (res->owner != dlm->node_num) {
455 spin_unlock(&res->spinlock);
456 mlog(0, "returning DLM_FORWARD -- not master\n");
457 status = DLM_FORWARD;
458 goto leave;
461 for (i=0; i<3; i++) {
462 list_for_each(iter, queue) {
463 lock = list_entry(iter, struct dlm_lock, list);
464 if (lock->ml.cookie == unlock->cookie &&
465 lock->ml.node == unlock->node_idx) {
466 dlm_lock_get(lock);
467 found = 1;
468 break;
471 if (found)
472 break;
473 /* scan granted -> converting -> blocked queues */
474 queue++;
476 spin_unlock(&res->spinlock);
477 if (!found) {
478 status = DLM_IVLOCKID;
479 goto not_found;
482 /* lock was found on queue */
483 lksb = lock->lksb;
484 if (flags & (LKM_VALBLK|LKM_PUT_LVB) &&
485 lock->ml.type != LKM_EXMODE)
486 flags &= ~(LKM_VALBLK|LKM_PUT_LVB);
488 /* unlockast only called on originating node */
489 if (flags & LKM_PUT_LVB) {
490 lksb->flags |= DLM_LKSB_PUT_LVB;
491 memcpy(&lksb->lvb[0], &unlock->lvb[0], DLM_LVB_LEN);
494 /* if this is in-progress, propagate the DLM_FORWARD
495 * all the way back out */
496 status = dlmunlock_master(dlm, res, lock, lksb, flags, &ignore);
497 if (status == DLM_FORWARD)
498 mlog(0, "lockres is in progress\n");
500 if (flags & LKM_PUT_LVB)
501 lksb->flags &= ~DLM_LKSB_PUT_LVB;
503 dlm_lockres_calc_usage(dlm, res);
504 dlm_kick_thread(dlm, res);
506 not_found:
507 if (!found)
508 mlog(ML_ERROR, "failed to find lock to unlock! "
509 "cookie=%u:%llu\n",
510 dlm_get_lock_cookie_node(be64_to_cpu(unlock->cookie)),
511 dlm_get_lock_cookie_seq(be64_to_cpu(unlock->cookie)));
512 else
513 dlm_lock_put(lock);
515 leave:
516 if (res)
517 dlm_lockres_put(res);
519 dlm_put(dlm);
521 return status;
525 static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
526 struct dlm_lock_resource *res,
527 struct dlm_lock *lock,
528 struct dlm_lockstatus *lksb,
529 int *actions)
531 enum dlm_status status;
533 if (dlm_lock_on_list(&res->blocked, lock)) {
534 /* cancel this outright */
535 status = DLM_NORMAL;
536 *actions = (DLM_UNLOCK_CALL_AST |
537 DLM_UNLOCK_REMOVE_LOCK);
538 } else if (dlm_lock_on_list(&res->converting, lock)) {
539 /* cancel the request, put back on granted */
540 status = DLM_NORMAL;
541 *actions = (DLM_UNLOCK_CALL_AST |
542 DLM_UNLOCK_REMOVE_LOCK |
543 DLM_UNLOCK_REGRANT_LOCK |
544 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
545 } else if (dlm_lock_on_list(&res->granted, lock)) {
546 /* too late, already granted. */
547 status = DLM_CANCELGRANT;
548 *actions = DLM_UNLOCK_CALL_AST;
549 } else {
550 mlog(ML_ERROR, "lock to cancel is not on any list!\n");
551 status = DLM_IVLOCKID;
552 *actions = 0;
554 return status;
557 static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
558 struct dlm_lock_resource *res,
559 struct dlm_lock *lock,
560 struct dlm_lockstatus *lksb,
561 int *actions)
563 enum dlm_status status;
565 /* unlock request */
566 if (!dlm_lock_on_list(&res->granted, lock)) {
567 status = DLM_DENIED;
568 dlm_error(status);
569 *actions = 0;
570 } else {
571 /* unlock granted lock */
572 status = DLM_NORMAL;
573 *actions = (DLM_UNLOCK_FREE_LOCK |
574 DLM_UNLOCK_CALL_AST |
575 DLM_UNLOCK_REMOVE_LOCK);
577 return status;
580 /* there seems to be no point in doing this async
581 * since (even for the remote case) there is really
582 * no work to queue up... so just do it and fire the
583 * unlockast by hand when done... */
584 enum dlm_status dlmunlock(struct dlm_ctxt *dlm, struct dlm_lockstatus *lksb,
585 int flags, dlm_astunlockfunc_t *unlockast, void *data)
587 enum dlm_status status;
588 struct dlm_lock_resource *res;
589 struct dlm_lock *lock = NULL;
590 int call_ast, is_master;
592 mlog_entry_void();
594 if (!lksb) {
595 dlm_error(DLM_BADARGS);
596 return DLM_BADARGS;
599 if (flags & ~(LKM_CANCEL | LKM_VALBLK | LKM_INVVALBLK)) {
600 dlm_error(DLM_BADPARAM);
601 return DLM_BADPARAM;
604 if ((flags & (LKM_VALBLK | LKM_CANCEL)) == (LKM_VALBLK | LKM_CANCEL)) {
605 mlog(0, "VALBLK given with CANCEL: ignoring VALBLK\n");
606 flags &= ~LKM_VALBLK;
609 if (!lksb->lockid || !lksb->lockid->lockres) {
610 dlm_error(DLM_BADPARAM);
611 return DLM_BADPARAM;
614 lock = lksb->lockid;
615 BUG_ON(!lock);
616 dlm_lock_get(lock);
618 res = lock->lockres;
619 BUG_ON(!res);
620 dlm_lockres_get(res);
621 retry:
622 call_ast = 0;
623 /* need to retry up here because owner may have changed */
624 mlog(0, "lock=%p res=%p\n", lock, res);
626 spin_lock(&res->spinlock);
627 is_master = (res->owner == dlm->node_num);
628 if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE)
629 flags &= ~LKM_VALBLK;
630 spin_unlock(&res->spinlock);
632 if (is_master) {
633 status = dlmunlock_master(dlm, res, lock, lksb, flags,
634 &call_ast);
635 mlog(0, "done calling dlmunlock_master: returned %d, "
636 "call_ast is %d\n", status, call_ast);
637 } else {
638 status = dlmunlock_remote(dlm, res, lock, lksb, flags,
639 &call_ast);
640 mlog(0, "done calling dlmunlock_remote: returned %d, "
641 "call_ast is %d\n", status, call_ast);
644 if (status == DLM_RECOVERING ||
645 status == DLM_MIGRATING ||
646 status == DLM_FORWARD) {
647 /* We want to go away for a tiny bit to allow recovery
648 * / migration to complete on this resource. I don't
649 * know of any wait queue we could sleep on as this
650 * may be happening on another node. Perhaps the
651 * proper solution is to queue up requests on the
652 * other end? */
654 /* do we want to yield(); ?? */
655 msleep(50);
657 mlog(0, "retrying unlock due to pending recovery/"
658 "migration/in-progress\n");
659 goto retry;
662 if (call_ast) {
663 mlog(0, "calling unlockast(%p, %d)\n", data, status);
664 if (is_master) {
665 /* it is possible that there is one last bast
666 * pending. make sure it is flushed, then
667 * call the unlockast.
668 * not an issue if this is a mastered remotely,
669 * since this lock has been removed from the
670 * lockres queues and cannot be found. */
671 dlm_kick_thread(dlm, NULL);
672 wait_event(dlm->ast_wq,
673 dlm_lock_basts_flushed(dlm, lock));
675 (*unlockast)(data, status);
678 if (status == DLM_CANCELGRANT)
679 status = DLM_NORMAL;
681 if (status == DLM_NORMAL) {
682 mlog(0, "kicking the thread\n");
683 dlm_kick_thread(dlm, res);
684 } else
685 dlm_error(status);
687 dlm_lockres_calc_usage(dlm, res);
688 dlm_lockres_put(res);
689 dlm_lock_put(lock);
691 mlog(0, "returning status=%d!\n", status);
692 return status;
694 EXPORT_SYMBOL_GPL(dlmunlock);