1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
7 #include <linux/miscdevice.h>
8 #include <linux/poll.h>
10 #include <linux/dlm_plock.h>
11 #include <linux/slab.h>
13 #include "dlm_internal.h"
14 #include "lockspace.h"
16 static spinlock_t ops_lock
;
17 static struct list_head send_list
;
18 static struct list_head recv_list
;
19 static wait_queue_head_t send_wq
;
20 static wait_queue_head_t recv_wq
;
23 struct list_head list
;
25 struct dlm_plock_info info
;
30 int (*callback
)(struct file_lock
*fl
, int result
);
37 static inline void set_version(struct dlm_plock_info
*info
)
39 info
->version
[0] = DLM_PLOCK_VERSION_MAJOR
;
40 info
->version
[1] = DLM_PLOCK_VERSION_MINOR
;
41 info
->version
[2] = DLM_PLOCK_VERSION_PATCH
;
44 static int check_version(struct dlm_plock_info
*info
)
46 if ((DLM_PLOCK_VERSION_MAJOR
!= info
->version
[0]) ||
47 (DLM_PLOCK_VERSION_MINOR
< info
->version
[1])) {
48 log_print("plock device version mismatch: "
49 "kernel (%u.%u.%u), user (%u.%u.%u)",
50 DLM_PLOCK_VERSION_MAJOR
,
51 DLM_PLOCK_VERSION_MINOR
,
52 DLM_PLOCK_VERSION_PATCH
,
61 static void send_op(struct plock_op
*op
)
63 set_version(&op
->info
);
64 INIT_LIST_HEAD(&op
->list
);
66 list_add_tail(&op
->list
, &send_list
);
67 spin_unlock(&ops_lock
);
71 /* If a process was killed while waiting for the only plock on a file,
72 locks_remove_posix will not see any lock on the file so it won't
73 send an unlock-close to us to pass on to userspace to clean up the
74 abandoned waiter. So, we have to insert the unlock-close when the
75 lock call is interrupted. */
77 static void do_unlock_close(struct dlm_ls
*ls
, u64 number
,
78 struct file
*file
, struct file_lock
*fl
)
82 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
86 op
->info
.optype
= DLM_PLOCK_OP_UNLOCK
;
87 op
->info
.pid
= fl
->fl_pid
;
88 op
->info
.fsid
= ls
->ls_global_id
;
89 op
->info
.number
= number
;
91 op
->info
.end
= OFFSET_MAX
;
92 if (fl
->fl_lmops
&& fl
->fl_lmops
->lm_grant
)
93 op
->info
.owner
= (__u64
) fl
->fl_pid
;
95 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
97 op
->info
.flags
|= DLM_PLOCK_FL_CLOSE
;
101 int dlm_posix_lock(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
102 int cmd
, struct file_lock
*fl
)
106 struct plock_xop
*xop
;
109 ls
= dlm_find_lockspace_local(lockspace
);
113 xop
= kzalloc(sizeof(*xop
), GFP_NOFS
);
120 op
->info
.optype
= DLM_PLOCK_OP_LOCK
;
121 op
->info
.pid
= fl
->fl_pid
;
122 op
->info
.ex
= (fl
->fl_type
== F_WRLCK
);
123 op
->info
.wait
= IS_SETLKW(cmd
);
124 op
->info
.fsid
= ls
->ls_global_id
;
125 op
->info
.number
= number
;
126 op
->info
.start
= fl
->fl_start
;
127 op
->info
.end
= fl
->fl_end
;
128 if (fl
->fl_lmops
&& fl
->fl_lmops
->lm_grant
) {
129 /* fl_owner is lockd which doesn't distinguish
130 processes on the nfs client */
131 op
->info
.owner
= (__u64
) fl
->fl_pid
;
132 xop
->callback
= fl
->fl_lmops
->lm_grant
;
133 locks_init_lock(&xop
->flc
);
134 locks_copy_lock(&xop
->flc
, fl
);
138 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
139 xop
->callback
= NULL
;
144 if (xop
->callback
== NULL
) {
145 rv
= wait_event_interruptible(recv_wq
, (op
->done
!= 0));
146 if (rv
== -ERESTARTSYS
) {
147 log_debug(ls
, "dlm_posix_lock: wait killed %llx",
148 (unsigned long long)number
);
149 spin_lock(&ops_lock
);
151 spin_unlock(&ops_lock
);
153 do_unlock_close(ls
, number
, file
, fl
);
157 rv
= FILE_LOCK_DEFERRED
;
161 spin_lock(&ops_lock
);
162 if (!list_empty(&op
->list
)) {
163 log_error(ls
, "dlm_posix_lock: op on list %llx",
164 (unsigned long long)number
);
167 spin_unlock(&ops_lock
);
172 if (locks_lock_file_wait(file
, fl
) < 0)
173 log_error(ls
, "dlm_posix_lock: vfs lock error %llx",
174 (unsigned long long)number
);
179 dlm_put_lockspace(ls
);
182 EXPORT_SYMBOL_GPL(dlm_posix_lock
);
184 /* Returns failure iff a successful lock operation should be canceled */
185 static int dlm_plock_callback(struct plock_op
*op
)
188 struct file_lock
*fl
;
189 struct file_lock
*flc
;
190 int (*notify
)(struct file_lock
*fl
, int result
) = NULL
;
191 struct plock_xop
*xop
= (struct plock_xop
*)op
;
194 spin_lock(&ops_lock
);
195 if (!list_empty(&op
->list
)) {
196 log_print("dlm_plock_callback: op on list %llx",
197 (unsigned long long)op
->info
.number
);
200 spin_unlock(&ops_lock
);
202 /* check if the following 2 are still valid or make a copy */
206 notify
= xop
->callback
;
209 notify(fl
, op
->info
.rv
);
213 /* got fs lock; bookkeep locally as well: */
214 flc
->fl_flags
&= ~FL_SLEEP
;
215 if (posix_lock_file(file
, flc
, NULL
)) {
217 * This can only happen in the case of kmalloc() failure.
218 * The filesystem's own lock is the authoritative lock,
219 * so a failure to get the lock locally is not a disaster.
220 * As long as the fs cannot reliably cancel locks (especially
221 * in a low-memory situation), we're better off ignoring
222 * this failure than trying to recover.
224 log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
225 (unsigned long long)op
->info
.number
, file
, fl
);
230 /* XXX: We need to cancel the fs lock here: */
231 log_print("dlm_plock_callback: lock granted after lock request "
232 "failed; dangling lock!\n");
241 int dlm_posix_unlock(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
242 struct file_lock
*fl
)
247 unsigned char fl_flags
= fl
->fl_flags
;
249 ls
= dlm_find_lockspace_local(lockspace
);
253 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
259 /* cause the vfs unlock to return ENOENT if lock is not found */
260 fl
->fl_flags
|= FL_EXISTS
;
262 rv
= locks_lock_file_wait(file
, fl
);
268 log_error(ls
, "dlm_posix_unlock: vfs unlock error %d %llx",
269 rv
, (unsigned long long)number
);
272 op
->info
.optype
= DLM_PLOCK_OP_UNLOCK
;
273 op
->info
.pid
= fl
->fl_pid
;
274 op
->info
.fsid
= ls
->ls_global_id
;
275 op
->info
.number
= number
;
276 op
->info
.start
= fl
->fl_start
;
277 op
->info
.end
= fl
->fl_end
;
278 if (fl
->fl_lmops
&& fl
->fl_lmops
->lm_grant
)
279 op
->info
.owner
= (__u64
) fl
->fl_pid
;
281 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
283 if (fl
->fl_flags
& FL_CLOSE
) {
284 op
->info
.flags
|= DLM_PLOCK_FL_CLOSE
;
291 wait_event(recv_wq
, (op
->done
!= 0));
293 spin_lock(&ops_lock
);
294 if (!list_empty(&op
->list
)) {
295 log_error(ls
, "dlm_posix_unlock: op on list %llx",
296 (unsigned long long)number
);
299 spin_unlock(&ops_lock
);
309 dlm_put_lockspace(ls
);
310 fl
->fl_flags
= fl_flags
;
313 EXPORT_SYMBOL_GPL(dlm_posix_unlock
);
315 int dlm_posix_get(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
316 struct file_lock
*fl
)
322 ls
= dlm_find_lockspace_local(lockspace
);
326 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
332 op
->info
.optype
= DLM_PLOCK_OP_GET
;
333 op
->info
.pid
= fl
->fl_pid
;
334 op
->info
.ex
= (fl
->fl_type
== F_WRLCK
);
335 op
->info
.fsid
= ls
->ls_global_id
;
336 op
->info
.number
= number
;
337 op
->info
.start
= fl
->fl_start
;
338 op
->info
.end
= fl
->fl_end
;
339 if (fl
->fl_lmops
&& fl
->fl_lmops
->lm_grant
)
340 op
->info
.owner
= (__u64
) fl
->fl_pid
;
342 op
->info
.owner
= (__u64
)(long) fl
->fl_owner
;
345 wait_event(recv_wq
, (op
->done
!= 0));
347 spin_lock(&ops_lock
);
348 if (!list_empty(&op
->list
)) {
349 log_error(ls
, "dlm_posix_get: op on list %llx",
350 (unsigned long long)number
);
353 spin_unlock(&ops_lock
);
355 /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
356 -ENOENT if there are no locks on the file */
360 fl
->fl_type
= F_UNLCK
;
365 fl
->fl_type
= (op
->info
.ex
) ? F_WRLCK
: F_RDLCK
;
366 fl
->fl_flags
= FL_POSIX
;
367 fl
->fl_pid
= -op
->info
.pid
;
368 fl
->fl_start
= op
->info
.start
;
369 fl
->fl_end
= op
->info
.end
;
375 dlm_put_lockspace(ls
);
378 EXPORT_SYMBOL_GPL(dlm_posix_get
);
380 /* a read copies out one plock request from the send list */
381 static ssize_t
dev_read(struct file
*file
, char __user
*u
, size_t count
,
384 struct dlm_plock_info info
;
385 struct plock_op
*op
= NULL
;
387 if (count
< sizeof(info
))
390 spin_lock(&ops_lock
);
391 if (!list_empty(&send_list
)) {
392 op
= list_entry(send_list
.next
, struct plock_op
, list
);
393 if (op
->info
.flags
& DLM_PLOCK_FL_CLOSE
)
396 list_move(&op
->list
, &recv_list
);
397 memcpy(&info
, &op
->info
, sizeof(info
));
399 spin_unlock(&ops_lock
);
404 /* there is no need to get a reply from userspace for unlocks
405 that were generated by the vfs cleaning up for a close
406 (the process did not make an unlock call). */
408 if (op
->info
.flags
& DLM_PLOCK_FL_CLOSE
)
411 if (copy_to_user(u
, &info
, sizeof(info
)))
416 /* a write copies in one plock result that should match a plock_op
418 static ssize_t
dev_write(struct file
*file
, const char __user
*u
, size_t count
,
421 struct dlm_plock_info info
;
423 int found
= 0, do_callback
= 0;
425 if (count
!= sizeof(info
))
428 if (copy_from_user(&info
, u
, sizeof(info
)))
431 if (check_version(&info
))
434 spin_lock(&ops_lock
);
435 list_for_each_entry(op
, &recv_list
, list
) {
436 if (op
->info
.fsid
== info
.fsid
&&
437 op
->info
.number
== info
.number
&&
438 op
->info
.owner
== info
.owner
) {
439 struct plock_xop
*xop
= (struct plock_xop
*)op
;
440 list_del_init(&op
->list
);
441 memcpy(&op
->info
, &info
, sizeof(info
));
450 spin_unlock(&ops_lock
);
454 dlm_plock_callback(op
);
458 log_print("dev_write no op %x %llx", info
.fsid
,
459 (unsigned long long)info
.number
);
463 static __poll_t
dev_poll(struct file
*file
, poll_table
*wait
)
467 poll_wait(file
, &send_wq
, wait
);
469 spin_lock(&ops_lock
);
470 if (!list_empty(&send_list
))
471 mask
= EPOLLIN
| EPOLLRDNORM
;
472 spin_unlock(&ops_lock
);
477 static const struct file_operations dev_fops
= {
481 .owner
= THIS_MODULE
,
482 .llseek
= noop_llseek
,
485 static struct miscdevice plock_dev_misc
= {
486 .minor
= MISC_DYNAMIC_MINOR
,
487 .name
= DLM_PLOCK_MISC_NAME
,
491 int dlm_plock_init(void)
495 spin_lock_init(&ops_lock
);
496 INIT_LIST_HEAD(&send_list
);
497 INIT_LIST_HEAD(&recv_list
);
498 init_waitqueue_head(&send_wq
);
499 init_waitqueue_head(&recv_wq
);
501 rv
= misc_register(&plock_dev_misc
);
503 log_print("dlm_plock_init: misc_register failed %d", rv
);
507 void dlm_plock_exit(void)
509 misc_deregister(&plock_dev_misc
);