1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
6 * standalone DLM module
8 * Copyright (C) 2004 Oracle. All rights reserved.
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
28 #include <linux/module.h>
30 #include <linux/types.h>
31 #include <linux/highmem.h>
32 #include <linux/init.h>
33 #include <linux/sysctl.h>
34 #include <linux/random.h>
35 #include <linux/blkdev.h>
36 #include <linux/socket.h>
37 #include <linux/inet.h>
38 #include <linux/timer.h>
39 #include <linux/kthread.h>
40 #include <linux/delay.h>
43 #include "cluster/heartbeat.h"
44 #include "cluster/nodemanager.h"
45 #include "cluster/tcp.h"
48 #include "dlmcommon.h"
49 #include "dlmdomain.h"
51 #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_THREAD)
52 #include "cluster/masklog.h"
54 static int dlm_thread(void *data
);
55 static void dlm_flush_asts(struct dlm_ctxt
*dlm
);
57 #define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num)
59 /* will exit holding res->spinlock, but may drop in function */
60 /* waits until flags are cleared on res->state */
61 void __dlm_wait_on_lockres_flags(struct dlm_lock_resource
*res
, int flags
)
63 DECLARE_WAITQUEUE(wait
, current
);
65 assert_spin_locked(&res
->spinlock
);
67 add_wait_queue(&res
->wq
, &wait
);
69 set_current_state(TASK_UNINTERRUPTIBLE
);
70 if (res
->state
& flags
) {
71 spin_unlock(&res
->spinlock
);
73 spin_lock(&res
->spinlock
);
76 remove_wait_queue(&res
->wq
, &wait
);
77 __set_current_state(TASK_RUNNING
);
80 int __dlm_lockres_has_locks(struct dlm_lock_resource
*res
)
82 if (list_empty(&res
->granted
) &&
83 list_empty(&res
->converting
) &&
84 list_empty(&res
->blocked
))
89 /* "unused": the lockres has no locks, is not on the dirty list,
90 * has no inflight locks (in the gap between mastery and acquiring
91 * the first lock), and has no bits in its refmap.
92 * truly ready to be freed. */
93 int __dlm_lockres_unused(struct dlm_lock_resource
*res
)
97 assert_spin_locked(&res
->spinlock
);
99 if (__dlm_lockres_has_locks(res
))
102 /* Locks are in the process of being created */
103 if (res
->inflight_locks
)
106 if (!list_empty(&res
->dirty
) || res
->state
& DLM_LOCK_RES_DIRTY
)
109 if (res
->state
& (DLM_LOCK_RES_RECOVERING
|
110 DLM_LOCK_RES_RECOVERY_WAITING
))
113 /* Another node has this resource with this node as the master */
114 bit
= find_next_bit(res
->refmap
, O2NM_MAX_NODES
, 0);
115 if (bit
< O2NM_MAX_NODES
)
122 /* Call whenever you may have added or deleted something from one of
123 * the lockres queue's. This will figure out whether it belongs on the
124 * unused list or not and does the appropriate thing. */
125 void __dlm_lockres_calc_usage(struct dlm_ctxt
*dlm
,
126 struct dlm_lock_resource
*res
)
128 assert_spin_locked(&dlm
->spinlock
);
129 assert_spin_locked(&res
->spinlock
);
131 if (__dlm_lockres_unused(res
)){
132 if (list_empty(&res
->purge
)) {
133 mlog(0, "%s: Adding res %.*s to purge list\n",
134 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
);
136 res
->last_used
= jiffies
;
137 dlm_lockres_get(res
);
138 list_add_tail(&res
->purge
, &dlm
->purge_list
);
141 } else if (!list_empty(&res
->purge
)) {
142 mlog(0, "%s: Removing res %.*s from purge list\n",
143 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
);
145 list_del_init(&res
->purge
);
146 dlm_lockres_put(res
);
151 void dlm_lockres_calc_usage(struct dlm_ctxt
*dlm
,
152 struct dlm_lock_resource
*res
)
154 spin_lock(&dlm
->spinlock
);
155 spin_lock(&res
->spinlock
);
157 __dlm_lockres_calc_usage(dlm
, res
);
159 spin_unlock(&res
->spinlock
);
160 spin_unlock(&dlm
->spinlock
);
164 * Do the real purge work:
165 * unhash the lockres, and
166 * clear flag DLM_LOCK_RES_DROPPING_REF.
167 * It requires dlm and lockres spinlock to be taken.
169 void __dlm_do_purge_lockres(struct dlm_ctxt
*dlm
,
170 struct dlm_lock_resource
*res
)
172 assert_spin_locked(&dlm
->spinlock
);
173 assert_spin_locked(&res
->spinlock
);
175 if (!list_empty(&res
->purge
)) {
176 mlog(0, "%s: Removing res %.*s from purgelist\n",
177 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
);
178 list_del_init(&res
->purge
);
179 dlm_lockres_put(res
);
183 if (!__dlm_lockres_unused(res
)) {
184 mlog(ML_ERROR
, "%s: res %.*s in use after deref\n",
185 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
);
186 __dlm_print_one_lock_resource(res
);
190 __dlm_unhash_lockres(dlm
, res
);
192 spin_lock(&dlm
->track_lock
);
193 if (!list_empty(&res
->tracking
))
194 list_del_init(&res
->tracking
);
196 mlog(ML_ERROR
, "%s: Resource %.*s not on the Tracking list\n",
197 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
);
198 __dlm_print_one_lock_resource(res
);
200 spin_unlock(&dlm
->track_lock
);
203 * lockres is not in the hash now. drop the flag and wake up
204 * any processes waiting in dlm_get_lock_resource.
206 res
->state
&= ~DLM_LOCK_RES_DROPPING_REF
;
209 static void dlm_purge_lockres(struct dlm_ctxt
*dlm
,
210 struct dlm_lock_resource
*res
)
215 assert_spin_locked(&dlm
->spinlock
);
216 assert_spin_locked(&res
->spinlock
);
218 master
= (res
->owner
== dlm
->node_num
);
220 mlog(0, "%s: Purging res %.*s, master %d\n", dlm
->name
,
221 res
->lockname
.len
, res
->lockname
.name
, master
);
224 if (res
->state
& DLM_LOCK_RES_DROPPING_REF
) {
225 mlog(ML_NOTICE
, "%s: res %.*s already in DLM_LOCK_RES_DROPPING_REF state\n",
226 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
);
227 spin_unlock(&res
->spinlock
);
231 res
->state
|= DLM_LOCK_RES_DROPPING_REF
;
232 /* drop spinlock... retake below */
233 spin_unlock(&res
->spinlock
);
234 spin_unlock(&dlm
->spinlock
);
236 spin_lock(&res
->spinlock
);
237 /* This ensures that clear refmap is sent after the set */
238 __dlm_wait_on_lockres_flags(res
, DLM_LOCK_RES_SETREF_INPROG
);
239 spin_unlock(&res
->spinlock
);
241 /* clear our bit from the master's refmap, ignore errors */
242 ret
= dlm_drop_lockres_ref(dlm
, res
);
244 if (!dlm_is_host_down(ret
))
247 spin_lock(&dlm
->spinlock
);
248 spin_lock(&res
->spinlock
);
251 if (!list_empty(&res
->purge
)) {
252 mlog(0, "%s: Removing res %.*s from purgelist, master %d\n",
253 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
, master
);
254 list_del_init(&res
->purge
);
255 dlm_lockres_put(res
);
259 if (!master
&& ret
== DLM_DEREF_RESPONSE_INPROG
) {
260 mlog(0, "%s: deref %.*s in progress\n",
261 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
);
262 spin_unlock(&res
->spinlock
);
266 if (!__dlm_lockres_unused(res
)) {
267 mlog(ML_ERROR
, "%s: res %.*s in use after deref\n",
268 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
);
269 __dlm_print_one_lock_resource(res
);
273 __dlm_unhash_lockres(dlm
, res
);
275 spin_lock(&dlm
->track_lock
);
276 if (!list_empty(&res
->tracking
))
277 list_del_init(&res
->tracking
);
279 mlog(ML_ERROR
, "Resource %.*s not on the Tracking list\n",
280 res
->lockname
.len
, res
->lockname
.name
);
281 __dlm_print_one_lock_resource(res
);
283 spin_unlock(&dlm
->track_lock
);
285 /* lockres is not in the hash now. drop the flag and wake up
286 * any processes waiting in dlm_get_lock_resource. */
288 res
->state
&= ~DLM_LOCK_RES_DROPPING_REF
;
289 spin_unlock(&res
->spinlock
);
292 spin_unlock(&res
->spinlock
);
295 static void dlm_run_purge_list(struct dlm_ctxt
*dlm
,
298 unsigned int run_max
, unused
;
299 unsigned long purge_jiffies
;
300 struct dlm_lock_resource
*lockres
;
302 spin_lock(&dlm
->spinlock
);
303 run_max
= dlm
->purge_count
;
305 while(run_max
&& !list_empty(&dlm
->purge_list
)) {
308 lockres
= list_entry(dlm
->purge_list
.next
,
309 struct dlm_lock_resource
, purge
);
311 spin_lock(&lockres
->spinlock
);
313 purge_jiffies
= lockres
->last_used
+
314 msecs_to_jiffies(DLM_PURGE_INTERVAL_MS
);
316 /* Make sure that we want to be processing this guy at
318 if (!purge_now
&& time_after(purge_jiffies
, jiffies
)) {
319 /* Since resources are added to the purge list
320 * in tail order, we can stop at the first
321 * unpurgable resource -- anyone added after
322 * him will have a greater last_used value */
323 spin_unlock(&lockres
->spinlock
);
327 /* Status of the lockres *might* change so double
328 * check. If the lockres is unused, holding the dlm
329 * spinlock will prevent people from getting and more
331 unused
= __dlm_lockres_unused(lockres
);
333 (lockres
->state
& DLM_LOCK_RES_MIGRATING
) ||
334 (lockres
->inflight_assert_workers
!= 0)) {
335 mlog(0, "%s: res %.*s is in use or being remastered, "
336 "used %d, state %d, assert master workers %u\n",
337 dlm
->name
, lockres
->lockname
.len
,
338 lockres
->lockname
.name
,
339 !unused
, lockres
->state
,
340 lockres
->inflight_assert_workers
);
341 list_move_tail(&lockres
->purge
, &dlm
->purge_list
);
342 spin_unlock(&lockres
->spinlock
);
346 dlm_lockres_get(lockres
);
348 dlm_purge_lockres(dlm
, lockres
);
350 dlm_lockres_put(lockres
);
352 /* Avoid adding any scheduling latencies */
353 cond_resched_lock(&dlm
->spinlock
);
356 spin_unlock(&dlm
->spinlock
);
359 static void dlm_shuffle_lists(struct dlm_ctxt
*dlm
,
360 struct dlm_lock_resource
*res
)
362 struct dlm_lock
*lock
, *target
;
366 * Because this function is called with the lockres
367 * spinlock, and because we know that it is not migrating/
368 * recovering/in-progress, it is fine to reserve asts and
369 * basts right before queueing them all throughout
371 assert_spin_locked(&dlm
->ast_lock
);
372 assert_spin_locked(&res
->spinlock
);
373 BUG_ON((res
->state
& (DLM_LOCK_RES_MIGRATING
|
374 DLM_LOCK_RES_RECOVERING
|
375 DLM_LOCK_RES_IN_PROGRESS
)));
378 if (list_empty(&res
->converting
))
380 mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm
->name
,
381 res
->lockname
.len
, res
->lockname
.name
);
383 target
= list_entry(res
->converting
.next
, struct dlm_lock
, list
);
384 if (target
->ml
.convert_type
== LKM_IVMODE
) {
385 mlog(ML_ERROR
, "%s: res %.*s converting lock to invalid mode\n",
386 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
);
389 list_for_each_entry(lock
, &res
->granted
, list
) {
392 if (!dlm_lock_compatible(lock
->ml
.type
,
393 target
->ml
.convert_type
)) {
395 /* queue the BAST if not already */
396 if (lock
->ml
.highest_blocked
== LKM_IVMODE
) {
397 __dlm_lockres_reserve_ast(res
);
398 __dlm_queue_bast(dlm
, lock
);
400 /* update the highest_blocked if needed */
401 if (lock
->ml
.highest_blocked
< target
->ml
.convert_type
)
402 lock
->ml
.highest_blocked
=
403 target
->ml
.convert_type
;
407 list_for_each_entry(lock
, &res
->converting
, list
) {
410 if (!dlm_lock_compatible(lock
->ml
.type
,
411 target
->ml
.convert_type
)) {
413 if (lock
->ml
.highest_blocked
== LKM_IVMODE
) {
414 __dlm_lockres_reserve_ast(res
);
415 __dlm_queue_bast(dlm
, lock
);
417 if (lock
->ml
.highest_blocked
< target
->ml
.convert_type
)
418 lock
->ml
.highest_blocked
=
419 target
->ml
.convert_type
;
423 /* we can convert the lock */
425 spin_lock(&target
->spinlock
);
426 BUG_ON(target
->ml
.highest_blocked
!= LKM_IVMODE
);
428 mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type "
429 "%d => %d, node %u\n", dlm
->name
, res
->lockname
.len
,
431 dlm_get_lock_cookie_node(be64_to_cpu(target
->ml
.cookie
)),
432 dlm_get_lock_cookie_seq(be64_to_cpu(target
->ml
.cookie
)),
434 target
->ml
.convert_type
, target
->ml
.node
);
436 target
->ml
.type
= target
->ml
.convert_type
;
437 target
->ml
.convert_type
= LKM_IVMODE
;
438 list_move_tail(&target
->list
, &res
->granted
);
440 BUG_ON(!target
->lksb
);
441 target
->lksb
->status
= DLM_NORMAL
;
443 spin_unlock(&target
->spinlock
);
445 __dlm_lockres_reserve_ast(res
);
446 __dlm_queue_ast(dlm
, target
);
447 /* go back and check for more */
452 if (list_empty(&res
->blocked
))
454 target
= list_entry(res
->blocked
.next
, struct dlm_lock
, list
);
456 list_for_each_entry(lock
, &res
->granted
, list
) {
459 if (!dlm_lock_compatible(lock
->ml
.type
, target
->ml
.type
)) {
461 if (lock
->ml
.highest_blocked
== LKM_IVMODE
) {
462 __dlm_lockres_reserve_ast(res
);
463 __dlm_queue_bast(dlm
, lock
);
465 if (lock
->ml
.highest_blocked
< target
->ml
.type
)
466 lock
->ml
.highest_blocked
= target
->ml
.type
;
470 list_for_each_entry(lock
, &res
->converting
, list
) {
473 if (!dlm_lock_compatible(lock
->ml
.type
, target
->ml
.type
)) {
475 if (lock
->ml
.highest_blocked
== LKM_IVMODE
) {
476 __dlm_lockres_reserve_ast(res
);
477 __dlm_queue_bast(dlm
, lock
);
479 if (lock
->ml
.highest_blocked
< target
->ml
.type
)
480 lock
->ml
.highest_blocked
= target
->ml
.type
;
484 /* we can grant the blocked lock (only
485 * possible if converting list empty) */
487 spin_lock(&target
->spinlock
);
488 BUG_ON(target
->ml
.highest_blocked
!= LKM_IVMODE
);
490 mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, "
491 "node %u\n", dlm
->name
, res
->lockname
.len
,
493 dlm_get_lock_cookie_node(be64_to_cpu(target
->ml
.cookie
)),
494 dlm_get_lock_cookie_seq(be64_to_cpu(target
->ml
.cookie
)),
495 target
->ml
.type
, target
->ml
.node
);
497 /* target->ml.type is already correct */
498 list_move_tail(&target
->list
, &res
->granted
);
500 BUG_ON(!target
->lksb
);
501 target
->lksb
->status
= DLM_NORMAL
;
503 spin_unlock(&target
->spinlock
);
505 __dlm_lockres_reserve_ast(res
);
506 __dlm_queue_ast(dlm
, target
);
507 /* go back and check for more */
515 /* must have NO locks when calling this with res !=NULL * */
516 void dlm_kick_thread(struct dlm_ctxt
*dlm
, struct dlm_lock_resource
*res
)
519 spin_lock(&dlm
->spinlock
);
520 spin_lock(&res
->spinlock
);
521 __dlm_dirty_lockres(dlm
, res
);
522 spin_unlock(&res
->spinlock
);
523 spin_unlock(&dlm
->spinlock
);
525 wake_up(&dlm
->dlm_thread_wq
);
528 void __dlm_dirty_lockres(struct dlm_ctxt
*dlm
, struct dlm_lock_resource
*res
)
530 assert_spin_locked(&dlm
->spinlock
);
531 assert_spin_locked(&res
->spinlock
);
533 /* don't shuffle secondary queues */
534 if ((res
->owner
== dlm
->node_num
)) {
535 if (res
->state
& (DLM_LOCK_RES_MIGRATING
|
536 DLM_LOCK_RES_BLOCK_DIRTY
))
539 if (list_empty(&res
->dirty
)) {
540 /* ref for dirty_list */
541 dlm_lockres_get(res
);
542 list_add_tail(&res
->dirty
, &dlm
->dirty_list
);
543 res
->state
|= DLM_LOCK_RES_DIRTY
;
547 mlog(0, "%s: res %.*s\n", dlm
->name
, res
->lockname
.len
,
552 /* Launch the NM thread for the mounted volume */
553 int dlm_launch_thread(struct dlm_ctxt
*dlm
)
555 mlog(0, "Starting dlm_thread...\n");
557 dlm
->dlm_thread_task
= kthread_run(dlm_thread
, dlm
, "dlm-%s",
559 if (IS_ERR(dlm
->dlm_thread_task
)) {
560 mlog_errno(PTR_ERR(dlm
->dlm_thread_task
));
561 dlm
->dlm_thread_task
= NULL
;
568 void dlm_complete_thread(struct dlm_ctxt
*dlm
)
570 if (dlm
->dlm_thread_task
) {
571 mlog(ML_KTHREAD
, "Waiting for dlm thread to exit\n");
572 kthread_stop(dlm
->dlm_thread_task
);
573 dlm
->dlm_thread_task
= NULL
;
577 static int dlm_dirty_list_empty(struct dlm_ctxt
*dlm
)
581 spin_lock(&dlm
->spinlock
);
582 empty
= list_empty(&dlm
->dirty_list
);
583 spin_unlock(&dlm
->spinlock
);
588 static void dlm_flush_asts(struct dlm_ctxt
*dlm
)
591 struct dlm_lock
*lock
;
592 struct dlm_lock_resource
*res
;
595 spin_lock(&dlm
->ast_lock
);
596 while (!list_empty(&dlm
->pending_asts
)) {
597 lock
= list_entry(dlm
->pending_asts
.next
,
598 struct dlm_lock
, ast_list
);
599 /* get an extra ref on lock */
602 mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, "
603 "node %u\n", dlm
->name
, res
->lockname
.len
,
605 dlm_get_lock_cookie_node(be64_to_cpu(lock
->ml
.cookie
)),
606 dlm_get_lock_cookie_seq(be64_to_cpu(lock
->ml
.cookie
)),
607 lock
->ml
.type
, lock
->ml
.node
);
609 BUG_ON(!lock
->ast_pending
);
611 /* remove from list (including ref) */
612 list_del_init(&lock
->ast_list
);
614 spin_unlock(&dlm
->ast_lock
);
616 if (lock
->ml
.node
!= dlm
->node_num
) {
617 ret
= dlm_do_remote_ast(dlm
, res
, lock
);
621 dlm_do_local_ast(dlm
, res
, lock
);
623 spin_lock(&dlm
->ast_lock
);
625 /* possible that another ast was queued while
626 * we were delivering the last one */
627 if (!list_empty(&lock
->ast_list
)) {
628 mlog(0, "%s: res %.*s, AST queued while flushing last "
629 "one\n", dlm
->name
, res
->lockname
.len
,
632 lock
->ast_pending
= 0;
634 /* drop the extra ref.
635 * this may drop it completely. */
637 dlm_lockres_release_ast(dlm
, res
);
640 while (!list_empty(&dlm
->pending_basts
)) {
641 lock
= list_entry(dlm
->pending_basts
.next
,
642 struct dlm_lock
, bast_list
);
643 /* get an extra ref on lock */
647 BUG_ON(!lock
->bast_pending
);
649 /* get the highest blocked lock, and reset */
650 spin_lock(&lock
->spinlock
);
651 BUG_ON(lock
->ml
.highest_blocked
<= LKM_IVMODE
);
652 hi
= lock
->ml
.highest_blocked
;
653 lock
->ml
.highest_blocked
= LKM_IVMODE
;
654 spin_unlock(&lock
->spinlock
);
656 /* remove from list (including ref) */
657 list_del_init(&lock
->bast_list
);
659 spin_unlock(&dlm
->ast_lock
);
661 mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, "
662 "blocked %d, node %u\n",
663 dlm
->name
, res
->lockname
.len
, res
->lockname
.name
,
664 dlm_get_lock_cookie_node(be64_to_cpu(lock
->ml
.cookie
)),
665 dlm_get_lock_cookie_seq(be64_to_cpu(lock
->ml
.cookie
)),
668 if (lock
->ml
.node
!= dlm
->node_num
) {
669 ret
= dlm_send_proxy_bast(dlm
, res
, lock
, hi
);
673 dlm_do_local_bast(dlm
, res
, lock
, hi
);
675 spin_lock(&dlm
->ast_lock
);
677 /* possible that another bast was queued while
678 * we were delivering the last one */
679 if (!list_empty(&lock
->bast_list
)) {
680 mlog(0, "%s: res %.*s, BAST queued while flushing last "
681 "one\n", dlm
->name
, res
->lockname
.len
,
684 lock
->bast_pending
= 0;
686 /* drop the extra ref.
687 * this may drop it completely. */
689 dlm_lockres_release_ast(dlm
, res
);
691 wake_up(&dlm
->ast_wq
);
692 spin_unlock(&dlm
->ast_lock
);
696 #define DLM_THREAD_TIMEOUT_MS (4 * 1000)
697 #define DLM_THREAD_MAX_DIRTY 100
698 #define DLM_THREAD_MAX_ASTS 10
700 static int dlm_thread(void *data
)
702 struct dlm_lock_resource
*res
;
703 struct dlm_ctxt
*dlm
= data
;
704 unsigned long timeout
= msecs_to_jiffies(DLM_THREAD_TIMEOUT_MS
);
706 mlog(0, "dlm thread running for %s...\n", dlm
->name
);
708 while (!kthread_should_stop()) {
709 int n
= DLM_THREAD_MAX_DIRTY
;
711 /* dlm_shutting_down is very point-in-time, but that
712 * doesn't matter as we'll just loop back around if we
713 * get false on the leading edge of a state
715 dlm_run_purge_list(dlm
, dlm_shutting_down(dlm
));
717 /* We really don't want to hold dlm->spinlock while
718 * calling dlm_shuffle_lists on each lockres that
719 * needs to have its queues adjusted and AST/BASTs
720 * run. So let's pull each entry off the dirty_list
721 * and drop dlm->spinlock ASAP. Once off the list,
722 * res->spinlock needs to be taken again to protect
723 * the queues while calling dlm_shuffle_lists. */
724 spin_lock(&dlm
->spinlock
);
725 while (!list_empty(&dlm
->dirty_list
)) {
727 res
= list_entry(dlm
->dirty_list
.next
,
728 struct dlm_lock_resource
, dirty
);
730 /* peel a lockres off, remove it from the list,
731 * unset the dirty flag and drop the dlm lock */
733 dlm_lockres_get(res
);
735 spin_lock(&res
->spinlock
);
736 /* We clear the DLM_LOCK_RES_DIRTY state once we shuffle lists below */
737 list_del_init(&res
->dirty
);
738 spin_unlock(&res
->spinlock
);
739 spin_unlock(&dlm
->spinlock
);
740 /* Drop dirty_list ref */
741 dlm_lockres_put(res
);
743 /* lockres can be re-dirtied/re-added to the
744 * dirty_list in this gap, but that is ok */
746 spin_lock(&dlm
->ast_lock
);
747 spin_lock(&res
->spinlock
);
748 if (res
->owner
!= dlm
->node_num
) {
749 __dlm_print_one_lock_resource(res
);
750 mlog(ML_ERROR
, "%s: inprog %d, mig %d, reco %d,"
751 " dirty %d\n", dlm
->name
,
752 !!(res
->state
& DLM_LOCK_RES_IN_PROGRESS
),
753 !!(res
->state
& DLM_LOCK_RES_MIGRATING
),
754 !!(res
->state
& DLM_LOCK_RES_RECOVERING
),
755 !!(res
->state
& DLM_LOCK_RES_DIRTY
));
757 BUG_ON(res
->owner
!= dlm
->node_num
);
759 /* it is now ok to move lockreses in these states
760 * to the dirty list, assuming that they will only be
761 * dirty for a short while. */
762 BUG_ON(res
->state
& DLM_LOCK_RES_MIGRATING
);
763 if (res
->state
& (DLM_LOCK_RES_IN_PROGRESS
|
764 DLM_LOCK_RES_RECOVERING
|
765 DLM_LOCK_RES_RECOVERY_WAITING
)) {
766 /* move it to the tail and keep going */
767 res
->state
&= ~DLM_LOCK_RES_DIRTY
;
768 spin_unlock(&res
->spinlock
);
769 spin_unlock(&dlm
->ast_lock
);
770 mlog(0, "%s: res %.*s, inprogress, delay list "
771 "shuffle, state %d\n", dlm
->name
,
772 res
->lockname
.len
, res
->lockname
.name
,
778 /* at this point the lockres is not migrating/
779 * recovering/in-progress. we have the lockres
780 * spinlock and do NOT have the dlm lock.
781 * safe to reserve/queue asts and run the lists. */
783 /* called while holding lockres lock */
784 dlm_shuffle_lists(dlm
, res
);
785 res
->state
&= ~DLM_LOCK_RES_DIRTY
;
786 spin_unlock(&res
->spinlock
);
787 spin_unlock(&dlm
->ast_lock
);
789 dlm_lockres_calc_usage(dlm
, res
);
793 spin_lock(&dlm
->spinlock
);
794 /* if the lock was in-progress, stick
795 * it on the back of the list */
797 spin_lock(&res
->spinlock
);
798 __dlm_dirty_lockres(dlm
, res
);
799 spin_unlock(&res
->spinlock
);
801 dlm_lockres_put(res
);
803 /* unlikely, but we may need to give time to
806 mlog(0, "%s: Throttling dlm thread\n",
812 spin_unlock(&dlm
->spinlock
);
815 /* yield and continue right away if there is more work to do */
821 wait_event_interruptible_timeout(dlm
->dlm_thread_wq
,
822 !dlm_dirty_list_empty(dlm
) ||
823 kthread_should_stop(),
827 mlog(0, "quitting DLM thread\n");