1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
6 * underlying calls for unlocking locks
8 * Copyright (C) 2004 Oracle. All rights reserved.
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
28 #include <linux/module.h>
30 #include <linux/types.h>
31 #include <linux/highmem.h>
32 #include <linux/init.h>
33 #include <linux/sysctl.h>
34 #include <linux/random.h>
35 #include <linux/blkdev.h>
36 #include <linux/socket.h>
37 #include <linux/inet.h>
38 #include <linux/spinlock.h>
39 #include <linux/delay.h>
41 #include "cluster/heartbeat.h"
42 #include "cluster/nodemanager.h"
43 #include "cluster/tcp.h"
46 #include "dlmcommon.h"
48 #define MLOG_MASK_PREFIX ML_DLM
49 #include "cluster/masklog.h"
51 #define DLM_UNLOCK_FREE_LOCK 0x00000001
52 #define DLM_UNLOCK_CALL_AST 0x00000002
53 #define DLM_UNLOCK_REMOVE_LOCK 0x00000004
54 #define DLM_UNLOCK_REGRANT_LOCK 0x00000008
55 #define DLM_UNLOCK_CLEAR_CONVERT_TYPE 0x00000010
58 static enum dlm_status
dlm_get_cancel_actions(struct dlm_ctxt
*dlm
,
59 struct dlm_lock_resource
*res
,
60 struct dlm_lock
*lock
,
61 struct dlm_lockstatus
*lksb
,
63 static enum dlm_status
dlm_get_unlock_actions(struct dlm_ctxt
*dlm
,
64 struct dlm_lock_resource
*res
,
65 struct dlm_lock
*lock
,
66 struct dlm_lockstatus
*lksb
,
69 static enum dlm_status
dlm_send_remote_unlock_request(struct dlm_ctxt
*dlm
,
70 struct dlm_lock_resource
*res
,
71 struct dlm_lock
*lock
,
72 struct dlm_lockstatus
*lksb
,
78 * according to the spec:
79 * http://opendlm.sourceforge.net/cvsmirror/opendlm/docs/dlmbook_final.pdf
81 * flags & LKM_CANCEL != 0: must be converting or blocked
82 * flags & LKM_CANCEL == 0: must be granted
84 * So to unlock a converting lock, you must first cancel the
85 * convert (passing LKM_CANCEL in flags), then call the unlock
86 * again (with no LKM_CANCEL in flags).
93 * taken: res->spinlock and lock->spinlock taken and dropped
95 * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
96 * all callers should have taken an extra ref on lock coming in
98 static enum dlm_status
dlmunlock_common(struct dlm_ctxt
*dlm
,
99 struct dlm_lock_resource
*res
,
100 struct dlm_lock
*lock
,
101 struct dlm_lockstatus
*lksb
,
102 int flags
, int *call_ast
,
105 enum dlm_status status
;
109 int recovery_wait
= 0;
111 mlog(0, "master_node = %d, valblk = %d\n", master_node
,
115 BUG_ON(res
->owner
!= dlm
->node_num
);
117 BUG_ON(res
->owner
== dlm
->node_num
);
119 spin_lock(&dlm
->ast_lock
);
120 /* We want to be sure that we're not freeing a lock
121 * that still has AST's pending... */
122 in_use
= !list_empty(&lock
->ast_list
);
123 spin_unlock(&dlm
->ast_lock
);
124 if (in_use
&& !(flags
& LKM_CANCEL
)) {
125 mlog(ML_ERROR
, "lockres %.*s: Someone is calling dlmunlock "
126 "while waiting for an ast!", res
->lockname
.len
,
131 spin_lock(&res
->spinlock
);
132 if (res
->state
& DLM_LOCK_RES_IN_PROGRESS
) {
133 if (master_node
&& !(flags
& LKM_CANCEL
)) {
134 mlog(ML_ERROR
, "lockres in progress!\n");
135 spin_unlock(&res
->spinlock
);
138 /* ok for this to sleep if not in a network handler */
139 __dlm_wait_on_lockres(res
);
140 res
->state
|= DLM_LOCK_RES_IN_PROGRESS
;
142 spin_lock(&lock
->spinlock
);
144 if (res
->state
& DLM_LOCK_RES_RECOVERING
) {
145 status
= DLM_RECOVERING
;
149 if (res
->state
& DLM_LOCK_RES_MIGRATING
) {
150 status
= DLM_MIGRATING
;
154 /* see above for what the spec says about
155 * LKM_CANCEL and the lock queue state */
156 if (flags
& LKM_CANCEL
)
157 status
= dlm_get_cancel_actions(dlm
, res
, lock
, lksb
, &actions
);
159 status
= dlm_get_unlock_actions(dlm
, res
, lock
, lksb
, &actions
);
161 if (status
!= DLM_NORMAL
&& (status
!= DLM_CANCELGRANT
|| !master_node
))
164 /* By now this has been masked out of cancel requests. */
165 if (flags
& LKM_VALBLK
) {
166 /* make the final update to the lvb */
168 memcpy(res
->lvb
, lksb
->lvb
, DLM_LVB_LEN
);
170 flags
|= LKM_PUT_LVB
; /* let the send function
176 /* drop locks and send message */
177 if (flags
& LKM_CANCEL
)
178 lock
->cancel_pending
= 1;
180 lock
->unlock_pending
= 1;
181 spin_unlock(&lock
->spinlock
);
182 spin_unlock(&res
->spinlock
);
183 status
= dlm_send_remote_unlock_request(dlm
, res
, lock
, lksb
,
185 spin_lock(&res
->spinlock
);
186 spin_lock(&lock
->spinlock
);
187 /* if the master told us the lock was already granted,
188 * let the ast handle all of these actions */
189 if (status
== DLM_CANCELGRANT
) {
190 actions
&= ~(DLM_UNLOCK_REMOVE_LOCK
|
191 DLM_UNLOCK_REGRANT_LOCK
|
192 DLM_UNLOCK_CLEAR_CONVERT_TYPE
);
193 } else if (status
== DLM_RECOVERING
||
194 status
== DLM_MIGRATING
||
195 status
== DLM_FORWARD
||
196 status
== DLM_NOLOCKMGR
198 /* must clear the actions because this unlock
199 * is about to be retried. cannot free or do
200 * any list manipulation. */
201 mlog(0, "%s:%.*s: clearing actions, %s\n",
202 dlm
->name
, res
->lockname
.len
,
204 status
==DLM_RECOVERING
?"recovering":
205 (status
==DLM_MIGRATING
?"migrating":
206 (status
== DLM_FORWARD
? "forward" :
210 if (flags
& LKM_CANCEL
)
211 lock
->cancel_pending
= 0;
213 if (!lock
->unlock_pending
)
216 lock
->unlock_pending
= 0;
220 /* get an extra ref on lock. if we are just switching
221 * lists here, we dont want the lock to go away. */
224 if (actions
& DLM_UNLOCK_REMOVE_LOCK
) {
225 list_del_init(&lock
->list
);
228 if (actions
& DLM_UNLOCK_REGRANT_LOCK
) {
230 list_add_tail(&lock
->list
, &res
->granted
);
232 if (actions
& DLM_UNLOCK_CLEAR_CONVERT_TYPE
) {
233 mlog(0, "clearing convert_type at %smaster node\n",
234 master_node
? "" : "non-");
235 lock
->ml
.convert_type
= LKM_IVMODE
;
238 /* remove the extra ref on lock */
242 res
->state
&= ~DLM_LOCK_RES_IN_PROGRESS
;
243 if (!dlm_lock_on_list(&res
->converting
, lock
))
244 BUG_ON(lock
->ml
.convert_type
!= LKM_IVMODE
);
246 BUG_ON(lock
->ml
.convert_type
== LKM_IVMODE
);
247 spin_unlock(&lock
->spinlock
);
248 spin_unlock(&res
->spinlock
);
252 spin_lock(&res
->spinlock
);
253 /* Unlock request will directly succeed after owner dies,
254 * and the lock is already removed from grant list. We have to
255 * wait for RECOVERING done or we miss the chance to purge it
256 * since the removement is much faster than RECOVERING proc.
258 __dlm_wait_on_lockres_flags(res
, DLM_LOCK_RES_RECOVERING
);
259 spin_unlock(&res
->spinlock
);
262 /* let the caller's final dlm_lock_put handle the actual kfree */
263 if (actions
& DLM_UNLOCK_FREE_LOCK
) {
264 /* this should always be coupled with list removal */
265 BUG_ON(!(actions
& DLM_UNLOCK_REMOVE_LOCK
));
266 mlog(0, "lock %u:%llu should be gone now! refs=%d\n",
267 dlm_get_lock_cookie_node(be64_to_cpu(lock
->ml
.cookie
)),
268 dlm_get_lock_cookie_seq(be64_to_cpu(lock
->ml
.cookie
)),
269 atomic_read(&lock
->lock_refs
.refcount
)-1);
272 if (actions
& DLM_UNLOCK_CALL_AST
)
275 /* if cancel or unlock succeeded, lvb work is done */
276 if (status
== DLM_NORMAL
)
277 lksb
->flags
&= ~(DLM_LKSB_PUT_LVB
|DLM_LKSB_GET_LVB
);
282 void dlm_commit_pending_unlock(struct dlm_lock_resource
*res
,
283 struct dlm_lock
*lock
)
285 /* leave DLM_LKSB_PUT_LVB on the lksb so any final
286 * update of the lvb will be sent to the new master */
287 list_del_init(&lock
->list
);
290 void dlm_commit_pending_cancel(struct dlm_lock_resource
*res
,
291 struct dlm_lock
*lock
)
293 list_move_tail(&lock
->list
, &res
->granted
);
294 lock
->ml
.convert_type
= LKM_IVMODE
;
298 static inline enum dlm_status
dlmunlock_master(struct dlm_ctxt
*dlm
,
299 struct dlm_lock_resource
*res
,
300 struct dlm_lock
*lock
,
301 struct dlm_lockstatus
*lksb
,
305 return dlmunlock_common(dlm
, res
, lock
, lksb
, flags
, call_ast
, 1);
308 static inline enum dlm_status
dlmunlock_remote(struct dlm_ctxt
*dlm
,
309 struct dlm_lock_resource
*res
,
310 struct dlm_lock
*lock
,
311 struct dlm_lockstatus
*lksb
,
312 int flags
, int *call_ast
)
314 return dlmunlock_common(dlm
, res
, lock
, lksb
, flags
, call_ast
, 0);
322 * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
324 static enum dlm_status
dlm_send_remote_unlock_request(struct dlm_ctxt
*dlm
,
325 struct dlm_lock_resource
*res
,
326 struct dlm_lock
*lock
,
327 struct dlm_lockstatus
*lksb
,
331 struct dlm_unlock_lock unlock
;
338 mlog(0, "%.*s\n", res
->lockname
.len
, res
->lockname
.name
);
340 if (owner
== dlm
->node_num
) {
341 /* ended up trying to contact ourself. this means
342 * that the lockres had been remote but became local
343 * via a migration. just retry it, now as local */
344 mlog(0, "%s:%.*s: this node became the master due to a "
345 "migration, re-evaluate now\n", dlm
->name
,
346 res
->lockname
.len
, res
->lockname
.name
);
350 memset(&unlock
, 0, sizeof(unlock
));
351 unlock
.node_idx
= dlm
->node_num
;
352 unlock
.flags
= cpu_to_be32(flags
);
353 unlock
.cookie
= lock
->ml
.cookie
;
354 unlock
.namelen
= res
->lockname
.len
;
355 memcpy(unlock
.name
, res
->lockname
.name
, unlock
.namelen
);
357 vec
[0].iov_len
= sizeof(struct dlm_unlock_lock
);
358 vec
[0].iov_base
= &unlock
;
360 if (flags
& LKM_PUT_LVB
) {
361 /* extra data to send if we are updating lvb */
362 vec
[1].iov_len
= DLM_LVB_LEN
;
363 vec
[1].iov_base
= lock
->lksb
->lvb
;
367 tmpret
= o2net_send_message_vec(DLM_UNLOCK_LOCK_MSG
, dlm
->key
,
368 vec
, veclen
, owner
, &status
);
370 // successfully sent and received
371 if (status
== DLM_FORWARD
)
372 mlog(0, "master was in-progress. retry\n");
375 mlog(ML_ERROR
, "Error %d when sending message %u (key 0x%x) to "
376 "node %u\n", tmpret
, DLM_UNLOCK_LOCK_MSG
, dlm
->key
, owner
);
377 if (dlm_is_host_down(tmpret
)) {
378 /* NOTE: this seems strange, but it is what we want.
379 * when the master goes down during a cancel or
380 * unlock, the recovery code completes the operation
381 * as if the master had not died, then passes the
382 * updated state to the recovery master. this thread
383 * just needs to finish out the operation and call
385 if (dlm_is_node_dead(dlm
, owner
))
390 /* something bad. this will BUG in ocfs2 */
391 ret
= dlm_err_to_dlm_status(tmpret
);
401 * taken: takes and drops res->spinlock
403 * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID,
404 * return value from dlmunlock_master
406 int dlm_unlock_lock_handler(struct o2net_msg
*msg
, u32 len
, void *data
,
409 struct dlm_ctxt
*dlm
= data
;
410 struct dlm_unlock_lock
*unlock
= (struct dlm_unlock_lock
*)msg
->buf
;
411 struct dlm_lock_resource
*res
= NULL
;
412 struct dlm_lock
*lock
= NULL
;
413 enum dlm_status status
= DLM_NORMAL
;
415 struct dlm_lockstatus
*lksb
= NULL
;
418 struct list_head
*queue
;
420 flags
= be32_to_cpu(unlock
->flags
);
422 if (flags
& LKM_GET_LVB
) {
423 mlog(ML_ERROR
, "bad args! GET_LVB specified on unlock!\n");
427 if ((flags
& (LKM_PUT_LVB
|LKM_CANCEL
)) == (LKM_PUT_LVB
|LKM_CANCEL
)) {
428 mlog(ML_ERROR
, "bad args! cannot modify lvb on a CANCEL "
433 if (unlock
->namelen
> DLM_LOCKID_NAME_MAX
) {
434 mlog(ML_ERROR
, "Invalid name length in unlock handler!\n");
441 mlog_bug_on_msg(!dlm_domain_fully_joined(dlm
),
442 "Domain %s not fully joined!\n", dlm
->name
);
444 mlog(0, "lvb: %s\n", flags
& LKM_PUT_LVB
? "put lvb" : "none");
446 res
= dlm_lookup_lockres(dlm
, unlock
->name
, unlock
->namelen
);
448 /* We assume here that a no lock resource simply means
449 * it was migrated away and destroyed before the other
450 * node could detect it. */
451 mlog(0, "returning DLM_FORWARD -- res no longer exists\n");
452 status
= DLM_FORWARD
;
458 spin_lock(&res
->spinlock
);
459 if (res
->state
& DLM_LOCK_RES_RECOVERING
) {
460 spin_unlock(&res
->spinlock
);
461 mlog(0, "returning DLM_RECOVERING\n");
462 status
= DLM_RECOVERING
;
466 if (res
->state
& DLM_LOCK_RES_MIGRATING
) {
467 spin_unlock(&res
->spinlock
);
468 mlog(0, "returning DLM_MIGRATING\n");
469 status
= DLM_MIGRATING
;
473 if (res
->owner
!= dlm
->node_num
) {
474 spin_unlock(&res
->spinlock
);
475 mlog(0, "returning DLM_FORWARD -- not master\n");
476 status
= DLM_FORWARD
;
480 for (i
=0; i
<3; i
++) {
481 list_for_each_entry(lock
, queue
, list
) {
482 if (lock
->ml
.cookie
== unlock
->cookie
&&
483 lock
->ml
.node
== unlock
->node_idx
) {
491 /* scan granted -> converting -> blocked queues */
494 spin_unlock(&res
->spinlock
);
496 status
= DLM_IVLOCKID
;
500 /* lock was found on queue */
502 if (flags
& (LKM_VALBLK
|LKM_PUT_LVB
) &&
503 lock
->ml
.type
!= LKM_EXMODE
)
504 flags
&= ~(LKM_VALBLK
|LKM_PUT_LVB
);
506 /* unlockast only called on originating node */
507 if (flags
& LKM_PUT_LVB
) {
508 lksb
->flags
|= DLM_LKSB_PUT_LVB
;
509 memcpy(&lksb
->lvb
[0], &unlock
->lvb
[0], DLM_LVB_LEN
);
512 /* if this is in-progress, propagate the DLM_FORWARD
513 * all the way back out */
514 status
= dlmunlock_master(dlm
, res
, lock
, lksb
, flags
, &ignore
);
515 if (status
== DLM_FORWARD
)
516 mlog(0, "lockres is in progress\n");
518 if (flags
& LKM_PUT_LVB
)
519 lksb
->flags
&= ~DLM_LKSB_PUT_LVB
;
521 dlm_lockres_calc_usage(dlm
, res
);
522 dlm_kick_thread(dlm
, res
);
526 mlog(ML_ERROR
, "failed to find lock to unlock! "
528 dlm_get_lock_cookie_node(be64_to_cpu(unlock
->cookie
)),
529 dlm_get_lock_cookie_seq(be64_to_cpu(unlock
->cookie
)));
535 dlm_lockres_put(res
);
543 static enum dlm_status
dlm_get_cancel_actions(struct dlm_ctxt
*dlm
,
544 struct dlm_lock_resource
*res
,
545 struct dlm_lock
*lock
,
546 struct dlm_lockstatus
*lksb
,
549 enum dlm_status status
;
551 if (dlm_lock_on_list(&res
->blocked
, lock
)) {
552 /* cancel this outright */
554 *actions
= (DLM_UNLOCK_CALL_AST
|
555 DLM_UNLOCK_REMOVE_LOCK
);
556 } else if (dlm_lock_on_list(&res
->converting
, lock
)) {
557 /* cancel the request, put back on granted */
559 *actions
= (DLM_UNLOCK_CALL_AST
|
560 DLM_UNLOCK_REMOVE_LOCK
|
561 DLM_UNLOCK_REGRANT_LOCK
|
562 DLM_UNLOCK_CLEAR_CONVERT_TYPE
);
563 } else if (dlm_lock_on_list(&res
->granted
, lock
)) {
564 /* too late, already granted. */
565 status
= DLM_CANCELGRANT
;
566 *actions
= DLM_UNLOCK_CALL_AST
;
568 mlog(ML_ERROR
, "lock to cancel is not on any list!\n");
569 status
= DLM_IVLOCKID
;
575 static enum dlm_status
dlm_get_unlock_actions(struct dlm_ctxt
*dlm
,
576 struct dlm_lock_resource
*res
,
577 struct dlm_lock
*lock
,
578 struct dlm_lockstatus
*lksb
,
581 enum dlm_status status
;
584 if (!dlm_lock_on_list(&res
->granted
, lock
)) {
589 /* unlock granted lock */
591 *actions
= (DLM_UNLOCK_FREE_LOCK
|
592 DLM_UNLOCK_CALL_AST
|
593 DLM_UNLOCK_REMOVE_LOCK
);
598 /* there seems to be no point in doing this async
599 * since (even for the remote case) there is really
600 * no work to queue up... so just do it and fire the
601 * unlockast by hand when done... */
602 enum dlm_status
dlmunlock(struct dlm_ctxt
*dlm
, struct dlm_lockstatus
*lksb
,
603 int flags
, dlm_astunlockfunc_t
*unlockast
, void *data
)
605 enum dlm_status status
;
606 struct dlm_lock_resource
*res
;
607 struct dlm_lock
*lock
= NULL
;
608 int call_ast
, is_master
;
611 dlm_error(DLM_BADARGS
);
615 if (flags
& ~(LKM_CANCEL
| LKM_VALBLK
| LKM_INVVALBLK
)) {
616 dlm_error(DLM_BADPARAM
);
620 if ((flags
& (LKM_VALBLK
| LKM_CANCEL
)) == (LKM_VALBLK
| LKM_CANCEL
)) {
621 mlog(0, "VALBLK given with CANCEL: ignoring VALBLK\n");
622 flags
&= ~LKM_VALBLK
;
625 if (!lksb
->lockid
|| !lksb
->lockid
->lockres
) {
626 dlm_error(DLM_BADPARAM
);
636 dlm_lockres_get(res
);
639 /* need to retry up here because owner may have changed */
640 mlog(0, "lock=%p res=%p\n", lock
, res
);
642 spin_lock(&res
->spinlock
);
643 is_master
= (res
->owner
== dlm
->node_num
);
644 if (flags
& LKM_VALBLK
&& lock
->ml
.type
!= LKM_EXMODE
)
645 flags
&= ~LKM_VALBLK
;
646 spin_unlock(&res
->spinlock
);
649 status
= dlmunlock_master(dlm
, res
, lock
, lksb
, flags
,
651 mlog(0, "done calling dlmunlock_master: returned %d, "
652 "call_ast is %d\n", status
, call_ast
);
654 status
= dlmunlock_remote(dlm
, res
, lock
, lksb
, flags
,
656 mlog(0, "done calling dlmunlock_remote: returned %d, "
657 "call_ast is %d\n", status
, call_ast
);
660 if (status
== DLM_RECOVERING
||
661 status
== DLM_MIGRATING
||
662 status
== DLM_FORWARD
||
663 status
== DLM_NOLOCKMGR
) {
665 /* We want to go away for a tiny bit to allow recovery
666 * / migration to complete on this resource. I don't
667 * know of any wait queue we could sleep on as this
668 * may be happening on another node. Perhaps the
669 * proper solution is to queue up requests on the
672 /* do we want to yield(); ?? */
675 mlog(0, "retrying unlock due to pending recovery/"
676 "migration/in-progress/reconnect\n");
681 mlog(0, "calling unlockast(%p, %d)\n", data
, status
);
683 /* it is possible that there is one last bast
684 * pending. make sure it is flushed, then
685 * call the unlockast.
686 * not an issue if this is a mastered remotely,
687 * since this lock has been removed from the
688 * lockres queues and cannot be found. */
689 dlm_kick_thread(dlm
, NULL
);
690 wait_event(dlm
->ast_wq
,
691 dlm_lock_basts_flushed(dlm
, lock
));
693 (*unlockast
)(data
, status
);
696 if (status
== DLM_CANCELGRANT
)
699 if (status
== DLM_NORMAL
) {
700 mlog(0, "kicking the thread\n");
701 dlm_kick_thread(dlm
, res
);
705 dlm_lockres_calc_usage(dlm
, res
);
706 dlm_lockres_put(res
);
709 mlog(0, "returning status=%d!\n", status
);
712 EXPORT_SYMBOL_GPL(dlmunlock
);