1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
7 #include <linux/filelock.h>
8 #include <linux/miscdevice.h>
9 #include <linux/poll.h>
10 #include <linux/dlm.h>
11 #include <linux/dlm_plock.h>
12 #include <linux/slab.h>
14 #include <trace/events/dlm.h>
16 #include "dlm_internal.h"
17 #include "lockspace.h"
19 static DEFINE_SPINLOCK(ops_lock
);
20 static LIST_HEAD(send_list
);
21 static LIST_HEAD(recv_list
);
22 static DECLARE_WAIT_QUEUE_HEAD(send_wq
);
23 static DECLARE_WAIT_QUEUE_HEAD(recv_wq
);
25 struct plock_async_data
{
29 int (*callback
)(struct file_lock
*fl
, int result
);
33 struct list_head list
;
35 struct dlm_plock_info info
;
36 /* if set indicates async handling */
37 struct plock_async_data
*data
;
40 static inline void set_version(struct dlm_plock_info
*info
)
42 info
->version
[0] = DLM_PLOCK_VERSION_MAJOR
;
43 info
->version
[1] = DLM_PLOCK_VERSION_MINOR
;
44 info
->version
[2] = DLM_PLOCK_VERSION_PATCH
;
47 static struct plock_op
*plock_lookup_waiter(const struct dlm_plock_info
*info
)
49 struct plock_op
*op
= NULL
, *iter
;
51 list_for_each_entry(iter
, &recv_list
, list
) {
52 if (iter
->info
.fsid
== info
->fsid
&&
53 iter
->info
.number
== info
->number
&&
54 iter
->info
.owner
== info
->owner
&&
55 iter
->info
.pid
== info
->pid
&&
56 iter
->info
.start
== info
->start
&&
57 iter
->info
.end
== info
->end
&&
58 iter
->info
.ex
== info
->ex
&&
68 static int check_version(struct dlm_plock_info
*info
)
70 if ((DLM_PLOCK_VERSION_MAJOR
!= info
->version
[0]) ||
71 (DLM_PLOCK_VERSION_MINOR
< info
->version
[1])) {
72 log_print("plock device version mismatch: "
73 "kernel (%u.%u.%u), user (%u.%u.%u)",
74 DLM_PLOCK_VERSION_MAJOR
,
75 DLM_PLOCK_VERSION_MINOR
,
76 DLM_PLOCK_VERSION_PATCH
,
85 static void dlm_release_plock_op(struct plock_op
*op
)
91 static void send_op(struct plock_op
*op
)
93 set_version(&op
->info
);
95 list_add_tail(&op
->list
, &send_list
);
96 spin_unlock(&ops_lock
);
100 static int do_lock_cancel(const struct dlm_plock_info
*orig_info
)
105 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
109 op
->info
= *orig_info
;
110 op
->info
.optype
= DLM_PLOCK_OP_CANCEL
;
114 wait_event(recv_wq
, (op
->done
!= 0));
118 dlm_release_plock_op(op
);
122 int dlm_posix_lock(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
123 int cmd
, struct file_lock
*fl
)
125 struct plock_async_data
*op_data
;
130 ls
= dlm_find_lockspace_local(lockspace
);
134 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
140 op
->info
.optype
= DLM_PLOCK_OP_LOCK
;
141 op
->info
.pid
= fl
->c
.flc_pid
;
142 op
->info
.ex
= lock_is_write(fl
);
143 op
->info
.wait
= !!(fl
->c
.flc_flags
& FL_SLEEP
);
144 op
->info
.fsid
= ls
->ls_global_id
;
145 op
->info
.number
= number
;
146 op
->info
.start
= fl
->fl_start
;
147 op
->info
.end
= fl
->fl_end
;
148 op
->info
.owner
= (__u64
)(long) fl
->c
.flc_owner
;
150 if (fl
->fl_lmops
&& fl
->fl_lmops
->lm_grant
) {
151 op_data
= kzalloc(sizeof(*op_data
), GFP_NOFS
);
153 dlm_release_plock_op(op
);
158 op_data
->callback
= fl
->fl_lmops
->lm_grant
;
159 locks_init_lock(&op_data
->flc
);
160 locks_copy_lock(&op_data
->flc
, fl
);
162 op_data
->file
= file
;
167 rv
= FILE_LOCK_DEFERRED
;
174 rv
= wait_event_interruptible(recv_wq
, (op
->done
!= 0));
175 if (rv
== -ERESTARTSYS
) {
176 spin_lock(&ops_lock
);
177 /* recheck under ops_lock if we got a done != 0,
178 * if so this interrupt case should be ignored
181 spin_unlock(&ops_lock
);
184 spin_unlock(&ops_lock
);
186 rv
= do_lock_cancel(&op
->info
);
189 /* waiter was deleted in user space, answer will never come
190 * remove original request. The original request must be
191 * on recv_list because the answer of do_lock_cancel()
194 spin_lock(&ops_lock
);
196 spin_unlock(&ops_lock
);
200 /* cancellation wasn't successful but op should be done */
203 /* internal error doing cancel we need to wait */
207 log_debug(ls
, "%s: wait interrupted %x %llx pid %d",
208 __func__
, ls
->ls_global_id
,
209 (unsigned long long)number
, op
->info
.pid
);
210 dlm_release_plock_op(op
);
215 wait_event(recv_wq
, (op
->done
!= 0));
220 WARN_ON(!list_empty(&op
->list
));
225 if (locks_lock_file_wait(file
, fl
) < 0)
226 log_error(ls
, "dlm_posix_lock: vfs lock error %llx",
227 (unsigned long long)number
);
230 dlm_release_plock_op(op
);
232 dlm_put_lockspace(ls
);
235 EXPORT_SYMBOL_GPL(dlm_posix_lock
);
237 /* Returns failure iff a successful lock operation should be canceled */
238 static int dlm_plock_callback(struct plock_op
*op
)
240 struct plock_async_data
*op_data
= op
->data
;
242 struct file_lock
*fl
;
243 struct file_lock
*flc
;
244 int (*notify
)(struct file_lock
*fl
, int result
) = NULL
;
247 WARN_ON(!list_empty(&op
->list
));
249 /* check if the following 2 are still valid or make a copy */
250 file
= op_data
->file
;
253 notify
= op_data
->callback
;
256 notify(fl
, op
->info
.rv
);
260 /* got fs lock; bookkeep locally as well: */
261 flc
->c
.flc_flags
&= ~FL_SLEEP
;
262 if (posix_lock_file(file
, flc
, NULL
)) {
264 * This can only happen in the case of kmalloc() failure.
265 * The filesystem's own lock is the authoritative lock,
266 * so a failure to get the lock locally is not a disaster.
267 * As long as the fs cannot reliably cancel locks (especially
268 * in a low-memory situation), we're better off ignoring
269 * this failure than trying to recover.
271 log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
272 (unsigned long long)op
->info
.number
, file
, fl
);
277 /* XXX: We need to cancel the fs lock here: */
278 log_print("%s: lock granted after lock request failed; dangling lock!",
284 dlm_release_plock_op(op
);
288 int dlm_posix_unlock(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
289 struct file_lock
*fl
)
294 unsigned char saved_flags
= fl
->c
.flc_flags
;
296 ls
= dlm_find_lockspace_local(lockspace
);
300 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
306 /* cause the vfs unlock to return ENOENT if lock is not found */
307 fl
->c
.flc_flags
|= FL_EXISTS
;
309 rv
= locks_lock_file_wait(file
, fl
);
315 log_error(ls
, "dlm_posix_unlock: vfs unlock error %d %llx",
316 rv
, (unsigned long long)number
);
319 op
->info
.optype
= DLM_PLOCK_OP_UNLOCK
;
320 op
->info
.pid
= fl
->c
.flc_pid
;
321 op
->info
.fsid
= ls
->ls_global_id
;
322 op
->info
.number
= number
;
323 op
->info
.start
= fl
->fl_start
;
324 op
->info
.end
= fl
->fl_end
;
325 op
->info
.owner
= (__u64
)(long) fl
->c
.flc_owner
;
327 if (fl
->c
.flc_flags
& FL_CLOSE
) {
328 op
->info
.flags
|= DLM_PLOCK_FL_CLOSE
;
335 wait_event(recv_wq
, (op
->done
!= 0));
337 WARN_ON(!list_empty(&op
->list
));
345 dlm_release_plock_op(op
);
347 dlm_put_lockspace(ls
);
348 fl
->c
.flc_flags
= saved_flags
;
351 EXPORT_SYMBOL_GPL(dlm_posix_unlock
);
354 * NOTE: This implementation can only handle async lock requests as nfs
355 * do it. It cannot handle cancellation of a pending lock request sitting
356 * in wait_event(), but for now only nfs is the only user local kernel
359 int dlm_posix_cancel(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
360 struct file_lock
*fl
)
362 struct dlm_plock_info info
;
367 /* this only works for async request for now and nfs is the only
368 * kernel user right now.
370 if (WARN_ON_ONCE(!fl
->fl_lmops
|| !fl
->fl_lmops
->lm_grant
))
373 ls
= dlm_find_lockspace_local(lockspace
);
377 memset(&info
, 0, sizeof(info
));
378 info
.pid
= fl
->c
.flc_pid
;
379 info
.ex
= lock_is_write(fl
);
380 info
.fsid
= ls
->ls_global_id
;
381 dlm_put_lockspace(ls
);
382 info
.number
= number
;
383 info
.start
= fl
->fl_start
;
384 info
.end
= fl
->fl_end
;
385 info
.owner
= (__u64
)(long) fl
->c
.flc_owner
;
387 rv
= do_lock_cancel(&info
);
390 spin_lock(&ops_lock
);
391 /* lock request to cancel must be on recv_list because
392 * do_lock_cancel() synchronizes it.
394 op
= plock_lookup_waiter(&info
);
395 if (WARN_ON_ONCE(!op
)) {
396 spin_unlock(&ops_lock
);
402 spin_unlock(&ops_lock
);
403 WARN_ON(op
->info
.optype
!= DLM_PLOCK_OP_LOCK
);
404 op
->data
->callback(op
->data
->fl
, -EINTR
);
405 dlm_release_plock_op(op
);
409 /* if cancel wasn't successful we probably were to late
410 * or it was a non-blocking lock request, so just unlock it.
412 rv
= dlm_posix_unlock(lockspace
, number
, file
, fl
);
420 EXPORT_SYMBOL_GPL(dlm_posix_cancel
);
422 int dlm_posix_get(dlm_lockspace_t
*lockspace
, u64 number
, struct file
*file
,
423 struct file_lock
*fl
)
429 ls
= dlm_find_lockspace_local(lockspace
);
433 op
= kzalloc(sizeof(*op
), GFP_NOFS
);
439 op
->info
.optype
= DLM_PLOCK_OP_GET
;
440 op
->info
.pid
= fl
->c
.flc_pid
;
441 op
->info
.ex
= lock_is_write(fl
);
442 op
->info
.fsid
= ls
->ls_global_id
;
443 op
->info
.number
= number
;
444 op
->info
.start
= fl
->fl_start
;
445 op
->info
.end
= fl
->fl_end
;
446 op
->info
.owner
= (__u64
)(long) fl
->c
.flc_owner
;
449 wait_event(recv_wq
, (op
->done
!= 0));
451 WARN_ON(!list_empty(&op
->list
));
453 /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
454 -ENOENT if there are no locks on the file */
458 fl
->c
.flc_type
= F_UNLCK
;
463 fl
->c
.flc_type
= (op
->info
.ex
) ? F_WRLCK
: F_RDLCK
;
464 fl
->c
.flc_flags
= FL_POSIX
;
465 fl
->c
.flc_pid
= op
->info
.pid
;
466 if (op
->info
.nodeid
!= dlm_our_nodeid())
467 fl
->c
.flc_pid
= -fl
->c
.flc_pid
;
468 fl
->fl_start
= op
->info
.start
;
469 fl
->fl_end
= op
->info
.end
;
473 dlm_release_plock_op(op
);
475 dlm_put_lockspace(ls
);
478 EXPORT_SYMBOL_GPL(dlm_posix_get
);
480 /* a read copies out one plock request from the send list */
481 static ssize_t
dev_read(struct file
*file
, char __user
*u
, size_t count
,
484 struct dlm_plock_info info
;
485 struct plock_op
*op
= NULL
;
487 if (count
< sizeof(info
))
490 spin_lock(&ops_lock
);
491 if (!list_empty(&send_list
)) {
492 op
= list_first_entry(&send_list
, struct plock_op
, list
);
493 if (op
->info
.flags
& DLM_PLOCK_FL_CLOSE
)
496 list_move_tail(&op
->list
, &recv_list
);
497 memcpy(&info
, &op
->info
, sizeof(info
));
499 spin_unlock(&ops_lock
);
504 trace_dlm_plock_read(&info
);
506 /* there is no need to get a reply from userspace for unlocks
507 that were generated by the vfs cleaning up for a close
508 (the process did not make an unlock call). */
510 if (op
->info
.flags
& DLM_PLOCK_FL_CLOSE
)
511 dlm_release_plock_op(op
);
513 if (copy_to_user(u
, &info
, sizeof(info
)))
518 /* a write copies in one plock result that should match a plock_op
520 static ssize_t
dev_write(struct file
*file
, const char __user
*u
, size_t count
,
523 struct plock_op
*op
= NULL
, *iter
;
524 struct dlm_plock_info info
;
527 if (count
!= sizeof(info
))
530 if (copy_from_user(&info
, u
, sizeof(info
)))
533 trace_dlm_plock_write(&info
);
535 if (check_version(&info
))
539 * The results for waiting ops (SETLKW) can be returned in any
540 * order, so match all fields to find the op. The results for
541 * non-waiting ops are returned in the order that they were sent
542 * to userspace, so match the result with the first non-waiting op.
544 spin_lock(&ops_lock
);
546 op
= plock_lookup_waiter(&info
);
548 list_for_each_entry(iter
, &recv_list
, list
) {
549 if (!iter
->info
.wait
&&
550 iter
->info
.fsid
== info
.fsid
) {
558 /* Sanity check that op and info match. */
560 WARN_ON(op
->info
.optype
!= DLM_PLOCK_OP_LOCK
);
562 WARN_ON(op
->info
.number
!= info
.number
||
563 op
->info
.owner
!= info
.owner
||
564 op
->info
.optype
!= info
.optype
);
566 list_del_init(&op
->list
);
567 memcpy(&op
->info
, &info
, sizeof(info
));
573 spin_unlock(&ops_lock
);
577 dlm_plock_callback(op
);
581 pr_debug("%s: no op %x %llx", __func__
,
582 info
.fsid
, (unsigned long long)info
.number
);
586 static __poll_t
dev_poll(struct file
*file
, poll_table
*wait
)
590 poll_wait(file
, &send_wq
, wait
);
592 spin_lock(&ops_lock
);
593 if (!list_empty(&send_list
))
594 mask
= EPOLLIN
| EPOLLRDNORM
;
595 spin_unlock(&ops_lock
);
600 static const struct file_operations dev_fops
= {
604 .owner
= THIS_MODULE
,
605 .llseek
= noop_llseek
,
608 static struct miscdevice plock_dev_misc
= {
609 .minor
= MISC_DYNAMIC_MINOR
,
610 .name
= DLM_PLOCK_MISC_NAME
,
614 int dlm_plock_init(void)
618 rv
= misc_register(&plock_dev_misc
);
620 log_print("dlm_plock_init: misc_register failed %d", rv
);
624 void dlm_plock_exit(void)
626 misc_deregister(&plock_dev_misc
);
627 WARN_ON(!list_empty(&send_list
));
628 WARN_ON(!list_empty(&recv_list
));