mm-only debug patch...
[mmotm.git] / fs / ocfs2 / dlm / dlmunlock.c
blob00f53b2aea76a21a955eeac0c403a81c954947e8
1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
4 * dlmunlock.c
6 * underlying calls for unlocking locks
8 * Copyright (C) 2004 Oracle. All rights reserved.
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
28 #include <linux/module.h>
29 #include <linux/fs.h>
30 #include <linux/types.h>
31 #include <linux/slab.h>
32 #include <linux/highmem.h>
33 #include <linux/init.h>
34 #include <linux/sysctl.h>
35 #include <linux/random.h>
36 #include <linux/blkdev.h>
37 #include <linux/socket.h>
38 #include <linux/inet.h>
39 #include <linux/spinlock.h>
40 #include <linux/delay.h>
42 #include "cluster/heartbeat.h"
43 #include "cluster/nodemanager.h"
44 #include "cluster/tcp.h"
46 #include "dlmapi.h"
47 #include "dlmcommon.h"
49 #define MLOG_MASK_PREFIX ML_DLM
50 #include "cluster/masklog.h"
52 #define DLM_UNLOCK_FREE_LOCK 0x00000001
53 #define DLM_UNLOCK_CALL_AST 0x00000002
54 #define DLM_UNLOCK_REMOVE_LOCK 0x00000004
55 #define DLM_UNLOCK_REGRANT_LOCK 0x00000008
56 #define DLM_UNLOCK_CLEAR_CONVERT_TYPE 0x00000010
59 static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
60 struct dlm_lock_resource *res,
61 struct dlm_lock *lock,
62 struct dlm_lockstatus *lksb,
63 int *actions);
64 static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
65 struct dlm_lock_resource *res,
66 struct dlm_lock *lock,
67 struct dlm_lockstatus *lksb,
68 int *actions);
70 static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
71 struct dlm_lock_resource *res,
72 struct dlm_lock *lock,
73 struct dlm_lockstatus *lksb,
74 int flags,
75 u8 owner);
79 * according to the spec:
80 * http://opendlm.sourceforge.net/cvsmirror/opendlm/docs/dlmbook_final.pdf
82 * flags & LKM_CANCEL != 0: must be converting or blocked
83 * flags & LKM_CANCEL == 0: must be granted
85 * So to unlock a converting lock, you must first cancel the
86 * convert (passing LKM_CANCEL in flags), then call the unlock
87 * again (with no LKM_CANCEL in flags).
92 * locking:
93 * caller needs: none
94 * taken: res->spinlock and lock->spinlock taken and dropped
95 * held on exit: none
96 * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
97 * all callers should have taken an extra ref on lock coming in
99 static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
100 struct dlm_lock_resource *res,
101 struct dlm_lock *lock,
102 struct dlm_lockstatus *lksb,
103 int flags, int *call_ast,
104 int master_node)
106 enum dlm_status status;
107 int actions = 0;
108 int in_use;
109 u8 owner;
111 mlog(0, "master_node = %d, valblk = %d\n", master_node,
112 flags & LKM_VALBLK);
114 if (master_node)
115 BUG_ON(res->owner != dlm->node_num);
116 else
117 BUG_ON(res->owner == dlm->node_num);
119 spin_lock(&dlm->ast_lock);
120 /* We want to be sure that we're not freeing a lock
121 * that still has AST's pending... */
122 in_use = !list_empty(&lock->ast_list);
123 spin_unlock(&dlm->ast_lock);
124 if (in_use && !(flags & LKM_CANCEL)) {
125 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
126 "while waiting for an ast!", res->lockname.len,
127 res->lockname.name);
128 return DLM_BADPARAM;
131 spin_lock(&res->spinlock);
132 if (res->state & DLM_LOCK_RES_IN_PROGRESS) {
133 if (master_node && !(flags & LKM_CANCEL)) {
134 mlog(ML_ERROR, "lockres in progress!\n");
135 spin_unlock(&res->spinlock);
136 return DLM_FORWARD;
138 /* ok for this to sleep if not in a network handler */
139 __dlm_wait_on_lockres(res);
140 res->state |= DLM_LOCK_RES_IN_PROGRESS;
142 spin_lock(&lock->spinlock);
144 if (res->state & DLM_LOCK_RES_RECOVERING) {
145 status = DLM_RECOVERING;
146 goto leave;
149 if (res->state & DLM_LOCK_RES_MIGRATING) {
150 status = DLM_MIGRATING;
151 goto leave;
154 /* see above for what the spec says about
155 * LKM_CANCEL and the lock queue state */
156 if (flags & LKM_CANCEL)
157 status = dlm_get_cancel_actions(dlm, res, lock, lksb, &actions);
158 else
159 status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
161 if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node))
162 goto leave;
164 /* By now this has been masked out of cancel requests. */
165 if (flags & LKM_VALBLK) {
166 /* make the final update to the lvb */
167 if (master_node)
168 memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
169 else
170 flags |= LKM_PUT_LVB; /* let the send function
171 * handle it. */
174 if (!master_node) {
175 owner = res->owner;
176 /* drop locks and send message */
177 if (flags & LKM_CANCEL)
178 lock->cancel_pending = 1;
179 else
180 lock->unlock_pending = 1;
181 spin_unlock(&lock->spinlock);
182 spin_unlock(&res->spinlock);
183 status = dlm_send_remote_unlock_request(dlm, res, lock, lksb,
184 flags, owner);
185 spin_lock(&res->spinlock);
186 spin_lock(&lock->spinlock);
187 /* if the master told us the lock was already granted,
188 * let the ast handle all of these actions */
189 if (status == DLM_CANCELGRANT) {
190 actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
191 DLM_UNLOCK_REGRANT_LOCK|
192 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
193 } else if (status == DLM_RECOVERING ||
194 status == DLM_MIGRATING ||
195 status == DLM_FORWARD) {
196 /* must clear the actions because this unlock
197 * is about to be retried. cannot free or do
198 * any list manipulation. */
199 mlog(0, "%s:%.*s: clearing actions, %s\n",
200 dlm->name, res->lockname.len,
201 res->lockname.name,
202 status==DLM_RECOVERING?"recovering":
203 (status==DLM_MIGRATING?"migrating":
204 "forward"));
205 actions = 0;
207 if (flags & LKM_CANCEL)
208 lock->cancel_pending = 0;
209 else
210 lock->unlock_pending = 0;
214 /* get an extra ref on lock. if we are just switching
215 * lists here, we dont want the lock to go away. */
216 dlm_lock_get(lock);
218 if (actions & DLM_UNLOCK_REMOVE_LOCK) {
219 list_del_init(&lock->list);
220 dlm_lock_put(lock);
222 if (actions & DLM_UNLOCK_REGRANT_LOCK) {
223 dlm_lock_get(lock);
224 list_add_tail(&lock->list, &res->granted);
226 if (actions & DLM_UNLOCK_CLEAR_CONVERT_TYPE) {
227 mlog(0, "clearing convert_type at %smaster node\n",
228 master_node ? "" : "non-");
229 lock->ml.convert_type = LKM_IVMODE;
232 /* remove the extra ref on lock */
233 dlm_lock_put(lock);
235 leave:
236 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
237 if (!dlm_lock_on_list(&res->converting, lock))
238 BUG_ON(lock->ml.convert_type != LKM_IVMODE);
239 else
240 BUG_ON(lock->ml.convert_type == LKM_IVMODE);
241 spin_unlock(&lock->spinlock);
242 spin_unlock(&res->spinlock);
243 wake_up(&res->wq);
245 /* let the caller's final dlm_lock_put handle the actual kfree */
246 if (actions & DLM_UNLOCK_FREE_LOCK) {
247 /* this should always be coupled with list removal */
248 BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK));
249 mlog(0, "lock %u:%llu should be gone now! refs=%d\n",
250 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
251 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
252 atomic_read(&lock->lock_refs.refcount)-1);
253 dlm_lock_put(lock);
255 if (actions & DLM_UNLOCK_CALL_AST)
256 *call_ast = 1;
258 /* if cancel or unlock succeeded, lvb work is done */
259 if (status == DLM_NORMAL)
260 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
262 return status;
265 void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
266 struct dlm_lock *lock)
268 /* leave DLM_LKSB_PUT_LVB on the lksb so any final
269 * update of the lvb will be sent to the new master */
270 list_del_init(&lock->list);
273 void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
274 struct dlm_lock *lock)
276 list_move_tail(&lock->list, &res->granted);
277 lock->ml.convert_type = LKM_IVMODE;
281 static inline enum dlm_status dlmunlock_master(struct dlm_ctxt *dlm,
282 struct dlm_lock_resource *res,
283 struct dlm_lock *lock,
284 struct dlm_lockstatus *lksb,
285 int flags,
286 int *call_ast)
288 return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 1);
291 static inline enum dlm_status dlmunlock_remote(struct dlm_ctxt *dlm,
292 struct dlm_lock_resource *res,
293 struct dlm_lock *lock,
294 struct dlm_lockstatus *lksb,
295 int flags, int *call_ast)
297 return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 0);
301 * locking:
302 * caller needs: none
303 * taken: none
304 * held on exit: none
305 * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
307 static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
308 struct dlm_lock_resource *res,
309 struct dlm_lock *lock,
310 struct dlm_lockstatus *lksb,
311 int flags,
312 u8 owner)
314 struct dlm_unlock_lock unlock;
315 int tmpret;
316 enum dlm_status ret;
317 int status = 0;
318 struct kvec vec[2];
319 size_t veclen = 1;
321 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
323 if (owner == dlm->node_num) {
324 /* ended up trying to contact ourself. this means
325 * that the lockres had been remote but became local
326 * via a migration. just retry it, now as local */
327 mlog(0, "%s:%.*s: this node became the master due to a "
328 "migration, re-evaluate now\n", dlm->name,
329 res->lockname.len, res->lockname.name);
330 return DLM_FORWARD;
333 memset(&unlock, 0, sizeof(unlock));
334 unlock.node_idx = dlm->node_num;
335 unlock.flags = cpu_to_be32(flags);
336 unlock.cookie = lock->ml.cookie;
337 unlock.namelen = res->lockname.len;
338 memcpy(unlock.name, res->lockname.name, unlock.namelen);
340 vec[0].iov_len = sizeof(struct dlm_unlock_lock);
341 vec[0].iov_base = &unlock;
343 if (flags & LKM_PUT_LVB) {
344 /* extra data to send if we are updating lvb */
345 vec[1].iov_len = DLM_LVB_LEN;
346 vec[1].iov_base = lock->lksb->lvb;
347 veclen++;
350 tmpret = o2net_send_message_vec(DLM_UNLOCK_LOCK_MSG, dlm->key,
351 vec, veclen, owner, &status);
352 if (tmpret >= 0) {
353 // successfully sent and received
354 if (status == DLM_FORWARD)
355 mlog(0, "master was in-progress. retry\n");
356 ret = status;
357 } else {
358 mlog_errno(tmpret);
359 if (dlm_is_host_down(tmpret)) {
360 /* NOTE: this seems strange, but it is what we want.
361 * when the master goes down during a cancel or
362 * unlock, the recovery code completes the operation
363 * as if the master had not died, then passes the
364 * updated state to the recovery master. this thread
365 * just needs to finish out the operation and call
366 * the unlockast. */
367 ret = DLM_NORMAL;
368 } else {
369 /* something bad. this will BUG in ocfs2 */
370 ret = dlm_err_to_dlm_status(tmpret);
374 return ret;
378 * locking:
379 * caller needs: none
380 * taken: takes and drops res->spinlock
381 * held on exit: none
382 * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID,
383 * return value from dlmunlock_master
385 int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
386 void **ret_data)
388 struct dlm_ctxt *dlm = data;
389 struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf;
390 struct dlm_lock_resource *res = NULL;
391 struct list_head *iter;
392 struct dlm_lock *lock = NULL;
393 enum dlm_status status = DLM_NORMAL;
394 int found = 0, i;
395 struct dlm_lockstatus *lksb = NULL;
396 int ignore;
397 u32 flags;
398 struct list_head *queue;
400 flags = be32_to_cpu(unlock->flags);
402 if (flags & LKM_GET_LVB) {
403 mlog(ML_ERROR, "bad args! GET_LVB specified on unlock!\n");
404 return DLM_BADARGS;
407 if ((flags & (LKM_PUT_LVB|LKM_CANCEL)) == (LKM_PUT_LVB|LKM_CANCEL)) {
408 mlog(ML_ERROR, "bad args! cannot modify lvb on a CANCEL "
409 "request!\n");
410 return DLM_BADARGS;
413 if (unlock->namelen > DLM_LOCKID_NAME_MAX) {
414 mlog(ML_ERROR, "Invalid name length in unlock handler!\n");
415 return DLM_IVBUFLEN;
418 if (!dlm_grab(dlm))
419 return DLM_REJECTED;
421 mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
422 "Domain %s not fully joined!\n", dlm->name);
424 mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" : "none");
426 res = dlm_lookup_lockres(dlm, unlock->name, unlock->namelen);
427 if (!res) {
428 /* We assume here that a no lock resource simply means
429 * it was migrated away and destroyed before the other
430 * node could detect it. */
431 mlog(0, "returning DLM_FORWARD -- res no longer exists\n");
432 status = DLM_FORWARD;
433 goto not_found;
436 queue=&res->granted;
437 found = 0;
438 spin_lock(&res->spinlock);
439 if (res->state & DLM_LOCK_RES_RECOVERING) {
440 spin_unlock(&res->spinlock);
441 mlog(0, "returning DLM_RECOVERING\n");
442 status = DLM_RECOVERING;
443 goto leave;
446 if (res->state & DLM_LOCK_RES_MIGRATING) {
447 spin_unlock(&res->spinlock);
448 mlog(0, "returning DLM_MIGRATING\n");
449 status = DLM_MIGRATING;
450 goto leave;
453 if (res->owner != dlm->node_num) {
454 spin_unlock(&res->spinlock);
455 mlog(0, "returning DLM_FORWARD -- not master\n");
456 status = DLM_FORWARD;
457 goto leave;
460 for (i=0; i<3; i++) {
461 list_for_each(iter, queue) {
462 lock = list_entry(iter, struct dlm_lock, list);
463 if (lock->ml.cookie == unlock->cookie &&
464 lock->ml.node == unlock->node_idx) {
465 dlm_lock_get(lock);
466 found = 1;
467 break;
470 if (found)
471 break;
472 /* scan granted -> converting -> blocked queues */
473 queue++;
475 spin_unlock(&res->spinlock);
476 if (!found) {
477 status = DLM_IVLOCKID;
478 goto not_found;
481 /* lock was found on queue */
482 lksb = lock->lksb;
483 if (flags & (LKM_VALBLK|LKM_PUT_LVB) &&
484 lock->ml.type != LKM_EXMODE)
485 flags &= ~(LKM_VALBLK|LKM_PUT_LVB);
487 /* unlockast only called on originating node */
488 if (flags & LKM_PUT_LVB) {
489 lksb->flags |= DLM_LKSB_PUT_LVB;
490 memcpy(&lksb->lvb[0], &unlock->lvb[0], DLM_LVB_LEN);
493 /* if this is in-progress, propagate the DLM_FORWARD
494 * all the way back out */
495 status = dlmunlock_master(dlm, res, lock, lksb, flags, &ignore);
496 if (status == DLM_FORWARD)
497 mlog(0, "lockres is in progress\n");
499 if (flags & LKM_PUT_LVB)
500 lksb->flags &= ~DLM_LKSB_PUT_LVB;
502 dlm_lockres_calc_usage(dlm, res);
503 dlm_kick_thread(dlm, res);
505 not_found:
506 if (!found)
507 mlog(ML_ERROR, "failed to find lock to unlock! "
508 "cookie=%u:%llu\n",
509 dlm_get_lock_cookie_node(be64_to_cpu(unlock->cookie)),
510 dlm_get_lock_cookie_seq(be64_to_cpu(unlock->cookie)));
511 else
512 dlm_lock_put(lock);
514 leave:
515 if (res)
516 dlm_lockres_put(res);
518 dlm_put(dlm);
520 return status;
524 static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
525 struct dlm_lock_resource *res,
526 struct dlm_lock *lock,
527 struct dlm_lockstatus *lksb,
528 int *actions)
530 enum dlm_status status;
532 if (dlm_lock_on_list(&res->blocked, lock)) {
533 /* cancel this outright */
534 status = DLM_NORMAL;
535 *actions = (DLM_UNLOCK_CALL_AST |
536 DLM_UNLOCK_REMOVE_LOCK);
537 } else if (dlm_lock_on_list(&res->converting, lock)) {
538 /* cancel the request, put back on granted */
539 status = DLM_NORMAL;
540 *actions = (DLM_UNLOCK_CALL_AST |
541 DLM_UNLOCK_REMOVE_LOCK |
542 DLM_UNLOCK_REGRANT_LOCK |
543 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
544 } else if (dlm_lock_on_list(&res->granted, lock)) {
545 /* too late, already granted. */
546 status = DLM_CANCELGRANT;
547 *actions = DLM_UNLOCK_CALL_AST;
548 } else {
549 mlog(ML_ERROR, "lock to cancel is not on any list!\n");
550 status = DLM_IVLOCKID;
551 *actions = 0;
553 return status;
556 static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
557 struct dlm_lock_resource *res,
558 struct dlm_lock *lock,
559 struct dlm_lockstatus *lksb,
560 int *actions)
562 enum dlm_status status;
564 /* unlock request */
565 if (!dlm_lock_on_list(&res->granted, lock)) {
566 status = DLM_DENIED;
567 dlm_error(status);
568 *actions = 0;
569 } else {
570 /* unlock granted lock */
571 status = DLM_NORMAL;
572 *actions = (DLM_UNLOCK_FREE_LOCK |
573 DLM_UNLOCK_CALL_AST |
574 DLM_UNLOCK_REMOVE_LOCK);
576 return status;
579 /* there seems to be no point in doing this async
580 * since (even for the remote case) there is really
581 * no work to queue up... so just do it and fire the
582 * unlockast by hand when done... */
583 enum dlm_status dlmunlock(struct dlm_ctxt *dlm, struct dlm_lockstatus *lksb,
584 int flags, dlm_astunlockfunc_t *unlockast, void *data)
586 enum dlm_status status;
587 struct dlm_lock_resource *res;
588 struct dlm_lock *lock = NULL;
589 int call_ast, is_master;
591 mlog_entry_void();
593 if (!lksb) {
594 dlm_error(DLM_BADARGS);
595 return DLM_BADARGS;
598 if (flags & ~(LKM_CANCEL | LKM_VALBLK | LKM_INVVALBLK)) {
599 dlm_error(DLM_BADPARAM);
600 return DLM_BADPARAM;
603 if ((flags & (LKM_VALBLK | LKM_CANCEL)) == (LKM_VALBLK | LKM_CANCEL)) {
604 mlog(0, "VALBLK given with CANCEL: ignoring VALBLK\n");
605 flags &= ~LKM_VALBLK;
608 if (!lksb->lockid || !lksb->lockid->lockres) {
609 dlm_error(DLM_BADPARAM);
610 return DLM_BADPARAM;
613 lock = lksb->lockid;
614 BUG_ON(!lock);
615 dlm_lock_get(lock);
617 res = lock->lockres;
618 BUG_ON(!res);
619 dlm_lockres_get(res);
620 retry:
621 call_ast = 0;
622 /* need to retry up here because owner may have changed */
623 mlog(0, "lock=%p res=%p\n", lock, res);
625 spin_lock(&res->spinlock);
626 is_master = (res->owner == dlm->node_num);
627 if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE)
628 flags &= ~LKM_VALBLK;
629 spin_unlock(&res->spinlock);
631 if (is_master) {
632 status = dlmunlock_master(dlm, res, lock, lksb, flags,
633 &call_ast);
634 mlog(0, "done calling dlmunlock_master: returned %d, "
635 "call_ast is %d\n", status, call_ast);
636 } else {
637 status = dlmunlock_remote(dlm, res, lock, lksb, flags,
638 &call_ast);
639 mlog(0, "done calling dlmunlock_remote: returned %d, "
640 "call_ast is %d\n", status, call_ast);
643 if (status == DLM_RECOVERING ||
644 status == DLM_MIGRATING ||
645 status == DLM_FORWARD) {
646 /* We want to go away for a tiny bit to allow recovery
647 * / migration to complete on this resource. I don't
648 * know of any wait queue we could sleep on as this
649 * may be happening on another node. Perhaps the
650 * proper solution is to queue up requests on the
651 * other end? */
653 /* do we want to yield(); ?? */
654 msleep(50);
656 mlog(0, "retrying unlock due to pending recovery/"
657 "migration/in-progress\n");
658 goto retry;
661 if (call_ast) {
662 mlog(0, "calling unlockast(%p, %d)\n", data, status);
663 if (is_master) {
664 /* it is possible that there is one last bast
665 * pending. make sure it is flushed, then
666 * call the unlockast.
667 * not an issue if this is a mastered remotely,
668 * since this lock has been removed from the
669 * lockres queues and cannot be found. */
670 dlm_kick_thread(dlm, NULL);
671 wait_event(dlm->ast_wq,
672 dlm_lock_basts_flushed(dlm, lock));
674 (*unlockast)(data, status);
677 if (status == DLM_CANCELGRANT)
678 status = DLM_NORMAL;
680 if (status == DLM_NORMAL) {
681 mlog(0, "kicking the thread\n");
682 dlm_kick_thread(dlm, res);
683 } else
684 dlm_error(status);
686 dlm_lockres_calc_usage(dlm, res);
687 dlm_lockres_put(res);
688 dlm_lock_put(lock);
690 mlog(0, "returning status=%d!\n", status);
691 return status;
693 EXPORT_SYMBOL_GPL(dlmunlock);