2 * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License version 2.
10 #include <linux/miscdevice.h>
11 #include <linux/poll.h>
12 #include <linux/dlm.h>
13 #include <linux/dlm_plock.h>
14 #include <linux/slab.h>
16 #include "dlm_internal.h"
17 #include "lockspace.h"
19 static spinlock_t ops_lock
;
20 static struct list_head send_list
;
21 static struct list_head recv_list
;
22 static wait_queue_head_t send_wq
;
23 static wait_queue_head_t recv_wq
;
26 struct list_head list
;
28 struct dlm_plock_info info
;
33 int (*callback
)(struct file_lock
*fl
, int result
);
40 static inline void set_version(struct dlm_plock_info
*info
)
42 info
->version
[0] = DLM_PLOCK_VERSION_MAJOR
;
43 info
->version
[1] = DLM_PLOCK_VERSION_MINOR
;
44 info
->version
[2] = DLM_PLOCK_VERSION_PATCH
;
47 static int check_version(struct dlm_plock_info
*info
)
49 if ((DLM_PLOCK_VERSION_MAJOR
!= info
->version
[0]) ||
50 (DLM_PLOCK_VERSION_MINOR
< info
->version
[1])) {
51 log_print("plock device version mismatch: "
52 "kernel (%u.%u.%u), user (%u.%u.%u)",
53 DLM_PLOCK_VERSION_MAJOR
,
54 DLM_PLOCK_VERSION_MINOR
,
55 DLM_PLOCK_VERSION_PATCH
,
64 static void send_op(struct plock_op
*op
)
66 set_version(&op
->info
);
67 INIT_LIST_HEAD(&op
->list
);
69 list_add_tail(&op
->list
, &send_list
);
70 spin_unlock(&ops_lock
);
74 /* If a process was killed while waiting for the only plock on a file,
75 locks_remove_posix will not see any lock on the file so it won't
76 send an unlock-close to us to pass on to userspace to clean up the
77 abandoned waiter. So, we have to insert the unlock-close when the
78 lock call is interrupted. */
80 static void do_unlock_close(struct dlm_ls
*ls
, u64 number
,
81 struct file
*file
, struct file_lock
*fl
)
85 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
89 op
->info
.optype
= DLM_PLOCK_OP_UNLOCK
;
90 op
->info
.pid
= fl
->fl_pid
;
91 op
->info
.fsid
= ls
->ls_global_id
;
92 op
->info
.number
= number
;
94 op
->info
.end
= OFFSET_MAX
;
95 if (fl
->fl_lmops
&& fl
->fl_lmops
->lm_grant
)
96 op
->info
.owner
= (__u64
) fl
->fl_pid
;
98 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
100 op
->info
.flags
|= DLM_PLOCK_FL_CLOSE
;
104 int dlm_posix_lock(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
105 int cmd
, struct file_lock
*fl
)
109 struct plock_xop
*xop
;
112 ls
= dlm_find_lockspace_local(lockspace
);
116 xop
= kzalloc(sizeof(*xop
), GFP_NOFS
);
123 op
->info
.optype
= DLM_PLOCK_OP_LOCK
;
124 op
->info
.pid
= fl
->fl_pid
;
125 op
->info
.ex
= (fl
->fl_type
== F_WRLCK
);
126 op
->info
.wait
= IS_SETLKW(cmd
);
127 op
->info
.fsid
= ls
->ls_global_id
;
128 op
->info
.number
= number
;
129 op
->info
.start
= fl
->fl_start
;
130 op
->info
.end
= fl
->fl_end
;
131 if (fl
->fl_lmops
&& fl
->fl_lmops
->lm_grant
) {
132 /* fl_owner is lockd which doesn't distinguish
133 processes on the nfs client */
134 op
->info
.owner
= (__u64
) fl
->fl_pid
;
135 xop
->callback
= fl
->fl_lmops
->lm_grant
;
136 locks_init_lock(&xop
->flc
);
137 locks_copy_lock(&xop
->flc
, fl
);
141 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
142 xop
->callback
= NULL
;
147 if (xop
->callback
== NULL
) {
148 rv
= wait_event_killable(recv_wq
, (op
->done
!= 0));
149 if (rv
== -ERESTARTSYS
) {
150 log_debug(ls
, "dlm_posix_lock: wait killed %llx",
151 (unsigned long long)number
);
152 spin_lock(&ops_lock
);
154 spin_unlock(&ops_lock
);
156 do_unlock_close(ls
, number
, file
, fl
);
160 rv
= FILE_LOCK_DEFERRED
;
164 spin_lock(&ops_lock
);
165 if (!list_empty(&op
->list
)) {
166 log_error(ls
, "dlm_posix_lock: op on list %llx",
167 (unsigned long long)number
);
170 spin_unlock(&ops_lock
);
175 if (posix_lock_file_wait(file
, fl
) < 0)
176 log_error(ls
, "dlm_posix_lock: vfs lock error %llx",
177 (unsigned long long)number
);
182 dlm_put_lockspace(ls
);
185 EXPORT_SYMBOL_GPL(dlm_posix_lock
);
187 /* Returns failure iff a successful lock operation should be canceled */
188 static int dlm_plock_callback(struct plock_op
*op
)
191 struct file_lock
*fl
;
192 struct file_lock
*flc
;
193 int (*notify
)(struct file_lock
*fl
, int result
) = NULL
;
194 struct plock_xop
*xop
= (struct plock_xop
*)op
;
197 spin_lock(&ops_lock
);
198 if (!list_empty(&op
->list
)) {
199 log_print("dlm_plock_callback: op on list %llx",
200 (unsigned long long)op
->info
.number
);
203 spin_unlock(&ops_lock
);
205 /* check if the following 2 are still valid or make a copy */
209 notify
= xop
->callback
;
212 notify(fl
, op
->info
.rv
);
216 /* got fs lock; bookkeep locally as well: */
217 flc
->fl_flags
&= ~FL_SLEEP
;
218 if (posix_lock_file(file
, flc
, NULL
)) {
220 * This can only happen in the case of kmalloc() failure.
221 * The filesystem's own lock is the authoritative lock,
222 * so a failure to get the lock locally is not a disaster.
223 * As long as the fs cannot reliably cancel locks (especially
224 * in a low-memory situation), we're better off ignoring
225 * this failure than trying to recover.
227 log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
228 (unsigned long long)op
->info
.number
, file
, fl
);
233 /* XXX: We need to cancel the fs lock here: */
234 log_print("dlm_plock_callback: lock granted after lock request "
235 "failed; dangling lock!\n");
244 int dlm_posix_unlock(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
245 struct file_lock
*fl
)
250 unsigned char fl_flags
= fl
->fl_flags
;
252 ls
= dlm_find_lockspace_local(lockspace
);
256 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
262 /* cause the vfs unlock to return ENOENT if lock is not found */
263 fl
->fl_flags
|= FL_EXISTS
;
265 rv
= posix_lock_file_wait(file
, fl
);
271 log_error(ls
, "dlm_posix_unlock: vfs unlock error %d %llx",
272 rv
, (unsigned long long)number
);
275 op
->info
.optype
= DLM_PLOCK_OP_UNLOCK
;
276 op
->info
.pid
= fl
->fl_pid
;
277 op
->info
.fsid
= ls
->ls_global_id
;
278 op
->info
.number
= number
;
279 op
->info
.start
= fl
->fl_start
;
280 op
->info
.end
= fl
->fl_end
;
281 if (fl
->fl_lmops
&& fl
->fl_lmops
->lm_grant
)
282 op
->info
.owner
= (__u64
) fl
->fl_pid
;
284 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
286 if (fl
->fl_flags
& FL_CLOSE
) {
287 op
->info
.flags
|= DLM_PLOCK_FL_CLOSE
;
294 wait_event(recv_wq
, (op
->done
!= 0));
296 spin_lock(&ops_lock
);
297 if (!list_empty(&op
->list
)) {
298 log_error(ls
, "dlm_posix_unlock: op on list %llx",
299 (unsigned long long)number
);
302 spin_unlock(&ops_lock
);
312 dlm_put_lockspace(ls
);
313 fl
->fl_flags
= fl_flags
;
316 EXPORT_SYMBOL_GPL(dlm_posix_unlock
);
318 int dlm_posix_get(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
319 struct file_lock
*fl
)
325 ls
= dlm_find_lockspace_local(lockspace
);
329 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
335 op
->info
.optype
= DLM_PLOCK_OP_GET
;
336 op
->info
.pid
= fl
->fl_pid
;
337 op
->info
.ex
= (fl
->fl_type
== F_WRLCK
);
338 op
->info
.fsid
= ls
->ls_global_id
;
339 op
->info
.number
= number
;
340 op
->info
.start
= fl
->fl_start
;
341 op
->info
.end
= fl
->fl_end
;
342 if (fl
->fl_lmops
&& fl
->fl_lmops
->lm_grant
)
343 op
->info
.owner
= (__u64
) fl
->fl_pid
;
345 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
348 wait_event(recv_wq
, (op
->done
!= 0));
350 spin_lock(&ops_lock
);
351 if (!list_empty(&op
->list
)) {
352 log_error(ls
, "dlm_posix_get: op on list %llx",
353 (unsigned long long)number
);
356 spin_unlock(&ops_lock
);
358 /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
359 -ENOENT if there are no locks on the file */
363 fl
->fl_type
= F_UNLCK
;
368 fl
->fl_type
= (op
->info
.ex
) ? F_WRLCK
: F_RDLCK
;
369 fl
->fl_flags
= FL_POSIX
;
370 fl
->fl_pid
= op
->info
.pid
;
371 fl
->fl_start
= op
->info
.start
;
372 fl
->fl_end
= op
->info
.end
;
378 dlm_put_lockspace(ls
);
381 EXPORT_SYMBOL_GPL(dlm_posix_get
);
383 /* a read copies out one plock request from the send list */
384 static ssize_t
dev_read(struct file
*file
, char __user
*u
, size_t count
,
387 struct dlm_plock_info info
;
388 struct plock_op
*op
= NULL
;
390 if (count
< sizeof(info
))
393 spin_lock(&ops_lock
);
394 if (!list_empty(&send_list
)) {
395 op
= list_entry(send_list
.next
, struct plock_op
, list
);
396 if (op
->info
.flags
& DLM_PLOCK_FL_CLOSE
)
399 list_move(&op
->list
, &recv_list
);
400 memcpy(&info
, &op
->info
, sizeof(info
));
402 spin_unlock(&ops_lock
);
407 /* there is no need to get a reply from userspace for unlocks
408 that were generated by the vfs cleaning up for a close
409 (the process did not make an unlock call). */
411 if (op
->info
.flags
& DLM_PLOCK_FL_CLOSE
)
414 if (copy_to_user(u
, &info
, sizeof(info
)))
419 /* a write copies in one plock result that should match a plock_op
421 static ssize_t
dev_write(struct file
*file
, const char __user
*u
, size_t count
,
424 struct dlm_plock_info info
;
426 int found
= 0, do_callback
= 0;
428 if (count
!= sizeof(info
))
431 if (copy_from_user(&info
, u
, sizeof(info
)))
434 if (check_version(&info
))
437 spin_lock(&ops_lock
);
438 list_for_each_entry(op
, &recv_list
, list
) {
439 if (op
->info
.fsid
== info
.fsid
&&
440 op
->info
.number
== info
.number
&&
441 op
->info
.owner
== info
.owner
) {
442 struct plock_xop
*xop
= (struct plock_xop
*)op
;
443 list_del_init(&op
->list
);
444 memcpy(&op
->info
, &info
, sizeof(info
));
453 spin_unlock(&ops_lock
);
457 dlm_plock_callback(op
);
461 log_print("dev_write no op %x %llx", info
.fsid
,
462 (unsigned long long)info
.number
);
466 static unsigned int dev_poll(struct file
*file
, poll_table
*wait
)
468 unsigned int mask
= 0;
470 poll_wait(file
, &send_wq
, wait
);
472 spin_lock(&ops_lock
);
473 if (!list_empty(&send_list
))
474 mask
= POLLIN
| POLLRDNORM
;
475 spin_unlock(&ops_lock
);
480 static const struct file_operations dev_fops
= {
484 .owner
= THIS_MODULE
,
485 .llseek
= noop_llseek
,
488 static struct miscdevice plock_dev_misc
= {
489 .minor
= MISC_DYNAMIC_MINOR
,
490 .name
= DLM_PLOCK_MISC_NAME
,
494 int dlm_plock_init(void)
498 spin_lock_init(&ops_lock
);
499 INIT_LIST_HEAD(&send_list
);
500 INIT_LIST_HEAD(&recv_list
);
501 init_waitqueue_head(&send_wq
);
502 init_waitqueue_head(&recv_wq
);
504 rv
= misc_register(&plock_dev_misc
);
506 log_print("dlm_plock_init: misc_register failed %d", rv
);
510 void dlm_plock_exit(void)
512 if (misc_deregister(&plock_dev_misc
) < 0)
513 log_print("dlm_plock_exit: misc_deregister failed");