2 Unix SMB/CIFS implementation.
4 generic byte range locking code - tdb backend
6 Copyright (C) Andrew Tridgell 1992-2006
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 /* This module implements a tdb based byte range locking service,
24 replacing the fcntl() based byte range locking previously
25 used. This allows us to provide the same semantics as NT */
28 #include "system/filesys.h"
29 #include "messaging/messaging.h"
30 #include "lib/messaging/irpc.h"
31 #include "libcli/libcli.h"
32 #include "cluster/cluster.h"
33 #include "ntvfs/common/brlock.h"
34 #include "ntvfs/ntvfs.h"
35 #include "param/param.h"
36 #include "dbwrap/dbwrap.h"
39 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
40 a file. For a local posix filesystem this will usually be a combination
41 of the device and inode numbers of the file, but it can be anything
42 that uniquely identifies a file for locking purposes, as long
43 as it is applied consistently.
46 /* this struct is typically attached to tcon */
48 struct db_context
*db
;
49 struct server_id server
;
50 struct imessaging_context
*imessaging_ctx
;
54 the lock context contains the elements that define whether one
55 lock is the same as another lock
58 struct server_id server
;
60 struct brl_context
*ctx
;
63 /* The data in brlock records is an unsorted linear array of these
64 records. It is unnecessary to store the count as tdb provides the
67 struct lock_context context
;
68 struct ntvfs_handle
*ntvfs
;
71 enum brl_type lock_type
;
75 /* this struct is attached to on oprn file handle */
78 struct ntvfs_handle
*ntvfs
;
79 struct lock_struct last_lock
;
82 /* see if we have wrapped locks, which are no longer allowed (windows
83 * changed this in win7 */
84 static bool brl_invalid_lock_range(uint64_t start
, uint64_t size
)
86 return (size
> 1 && (start
+ size
< start
));
90 Open up the brlock.tdb database. Close it down using
91 talloc_free(). We need the imessaging_ctx to allow for
92 pending lock notifications.
94 static struct brl_context
*brl_tdb_init(TALLOC_CTX
*mem_ctx
, struct server_id server
,
95 struct loadparm_context
*lp_ctx
,
96 struct imessaging_context
*imessaging_ctx
)
98 struct brl_context
*brl
;
100 brl
= talloc(mem_ctx
, struct brl_context
);
105 brl
->db
= cluster_db_tmp_open(brl
, lp_ctx
, "brlock", TDB_DEFAULT
);
106 if (brl
->db
== NULL
) {
111 brl
->server
= server
;
112 brl
->imessaging_ctx
= imessaging_ctx
;
117 static struct brl_handle
*brl_tdb_create_handle(TALLOC_CTX
*mem_ctx
, struct ntvfs_handle
*ntvfs
,
120 struct brl_handle
*brlh
;
122 brlh
= talloc(mem_ctx
, struct brl_handle
);
127 brlh
->key
= *file_key
;
129 ZERO_STRUCT(brlh
->last_lock
);
135 see if two locking contexts are equal
137 static bool brl_tdb_same_context(struct lock_context
*ctx1
, struct lock_context
*ctx2
)
139 return (cluster_id_equal(&ctx1
->server
, &ctx2
->server
) &&
140 ctx1
->smbpid
== ctx2
->smbpid
&&
141 ctx1
->ctx
== ctx2
->ctx
);
145 see if lck1 and lck2 overlap
147 lck1 is the existing lock. lck2 is the new lock we are
150 static bool brl_tdb_overlap(struct lock_struct
*lck1
,
151 struct lock_struct
*lck2
)
153 /* this extra check is not redundant - it copes with locks
154 that go beyond the end of 64 bit file space */
155 if (lck1
->size
!= 0 &&
156 lck1
->start
== lck2
->start
&&
157 lck1
->size
== lck2
->size
) {
161 if (lck1
->start
>= (lck2
->start
+lck2
->size
) ||
162 lck2
->start
>= (lck1
->start
+lck1
->size
)) {
166 /* we have a conflict. Now check to see if lck1 really still
167 * exists, which involves checking if the process still
168 * exists. We leave this test to last as its the most
169 * expensive test, especially when we are clustered */
170 /* TODO: need to do this via a server_id_exists() call, which
171 * hasn't been written yet. When clustered this will need to
178 See if lock2 can be added when lock1 is in place.
180 static bool brl_tdb_conflict(struct lock_struct
*lck1
,
181 struct lock_struct
*lck2
)
183 /* pending locks don't conflict with anything */
184 if (lck1
->lock_type
>= PENDING_READ_LOCK
||
185 lck2
->lock_type
>= PENDING_READ_LOCK
) {
189 if (lck1
->lock_type
== READ_LOCK
&& lck2
->lock_type
== READ_LOCK
) {
193 if (brl_tdb_same_context(&lck1
->context
, &lck2
->context
) &&
194 lck2
->lock_type
== READ_LOCK
&& lck1
->ntvfs
== lck2
->ntvfs
) {
198 return brl_tdb_overlap(lck1
, lck2
);
203 Check to see if this lock conflicts, but ignore our own locks on the
206 static bool brl_tdb_conflict_other(struct lock_struct
*lck1
, struct lock_struct
*lck2
)
208 /* pending locks don't conflict with anything */
209 if (lck1
->lock_type
>= PENDING_READ_LOCK
||
210 lck2
->lock_type
>= PENDING_READ_LOCK
) {
214 if (lck1
->lock_type
== READ_LOCK
&& lck2
->lock_type
== READ_LOCK
)
218 * note that incoming write calls conflict with existing READ
219 * locks even if the context is the same. JRA. See LOCKTEST7
222 if (brl_tdb_same_context(&lck1
->context
, &lck2
->context
) &&
223 lck1
->ntvfs
== lck2
->ntvfs
&&
224 (lck2
->lock_type
== READ_LOCK
|| lck1
->lock_type
== WRITE_LOCK
)) {
228 return brl_tdb_overlap(lck1
, lck2
);
233 amazingly enough, w2k3 "remembers" whether the last lock failure
234 is the same as this one and changes its error code. I wonder if any
237 static NTSTATUS
brl_tdb_lock_failed(struct brl_handle
*brlh
, struct lock_struct
*lock
)
240 * this function is only called for non pending lock!
243 /* in SMB2 mode always return NT_STATUS_LOCK_NOT_GRANTED! */
244 if (lock
->ntvfs
->ctx
->protocol
>= PROTOCOL_SMB2_02
) {
245 return NT_STATUS_LOCK_NOT_GRANTED
;
249 * if the notify_ptr is non NULL,
250 * it means that we're at the end of a pending lock
251 * and the real lock is requested after the timeout went by
252 * In this case we need to remember the last_lock and always
253 * give FILE_LOCK_CONFLICT
255 if (lock
->notify_ptr
) {
256 brlh
->last_lock
= *lock
;
257 return NT_STATUS_FILE_LOCK_CONFLICT
;
261 * amazing the little things you learn with a test
262 * suite. Locks beyond this offset (as a 64 bit
263 * number!) always generate the conflict error code,
264 * unless the top bit is set
266 if (lock
->start
>= 0xEF000000 && (lock
->start
>> 63) == 0) {
267 brlh
->last_lock
= *lock
;
268 return NT_STATUS_FILE_LOCK_CONFLICT
;
272 * if the current lock matches the last failed lock on the file handle
273 * and starts at the same offset, then FILE_LOCK_CONFLICT should be returned
275 if (cluster_id_equal(&lock
->context
.server
, &brlh
->last_lock
.context
.server
) &&
276 lock
->context
.ctx
== brlh
->last_lock
.context
.ctx
&&
277 lock
->ntvfs
== brlh
->last_lock
.ntvfs
&&
278 lock
->start
== brlh
->last_lock
.start
) {
279 return NT_STATUS_FILE_LOCK_CONFLICT
;
282 brlh
->last_lock
= *lock
;
283 return NT_STATUS_LOCK_NOT_GRANTED
;
287 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
288 which case a real lock is first tried, and if that fails then a
289 pending lock is created. When the pending lock is triggered (by
290 someone else closing an overlapping lock range) a messaging
291 notification is sent, identified by the notify_ptr
293 static NTSTATUS
brl_tdb_lock(struct brl_context
*brl
,
294 struct brl_handle
*brlh
,
296 uint64_t start
, uint64_t size
,
297 enum brl_type lock_type
,
302 struct lock_struct lock
, *locks
=NULL
;
304 struct db_record
*locked
;
306 kbuf
.dptr
= brlh
->key
.data
;
307 kbuf
.dsize
= brlh
->key
.length
;
309 if (brl_invalid_lock_range(start
, size
)) {
310 return NT_STATUS_INVALID_LOCK_RANGE
;
313 locked
= dbwrap_fetch_locked(brl
->db
, brl
, kbuf
);
315 return NT_STATUS_INTERNAL_DB_CORRUPTION
;
318 /* if this is a pending lock, then with the chainlock held we
319 try to get the real lock. If we succeed then we don't need
320 to make it pending. This prevents a possible race condition
321 where the pending lock gets created after the lock that is
322 preventing the real lock gets removed */
323 if (lock_type
>= PENDING_READ_LOCK
) {
324 enum brl_type rw
= (lock_type
==PENDING_READ_LOCK
? READ_LOCK
: WRITE_LOCK
);
326 /* here we need to force that the last_lock isn't overwritten */
327 lock
= brlh
->last_lock
;
328 status
= brl_tdb_lock(brl
, brlh
, smbpid
, start
, size
, rw
, NULL
);
329 brlh
->last_lock
= lock
;
331 if (NT_STATUS_IS_OK(status
)) {
337 dbuf
= dbwrap_record_get_value(locked
);
339 lock
.context
.smbpid
= smbpid
;
340 lock
.context
.server
= brl
->server
;
341 lock
.context
.ctx
= brl
;
342 lock
.ntvfs
= brlh
->ntvfs
;
343 lock
.context
.ctx
= brl
;
346 lock
.lock_type
= lock_type
;
347 lock
.notify_ptr
= notify_ptr
;
350 /* there are existing locks - make sure they don't conflict */
351 locks
= (struct lock_struct
*)dbuf
.dptr
;
352 count
= dbuf
.dsize
/ sizeof(*locks
);
353 for (i
=0; i
<count
; i
++) {
354 if (brl_tdb_conflict(&locks
[i
], &lock
)) {
355 status
= brl_tdb_lock_failed(brlh
, &lock
);
361 /* no conflicts - add it to the list of locks */
362 /* FIXME: a dbwrap_record_append() would help here! */
363 locks
= talloc_array(locked
, struct lock_struct
, count
+1);
365 status
= NT_STATUS_NO_MEMORY
;
368 if (dbuf
.dsize
> 0) {
369 memcpy(locks
, dbuf
.dptr
, dbuf
.dsize
);
373 dbuf
.dptr
= (unsigned char *)locks
;
374 dbuf
.dsize
+= sizeof(lock
);
376 status
= dbwrap_record_store(locked
, dbuf
, TDB_REPLACE
);
377 if (!NT_STATUS_IS_OK(status
)) {
383 /* the caller needs to know if the real lock was granted. If
384 we have reached here then it must be a pending lock that
385 was granted, so tell them the lock failed */
386 if (lock_type
>= PENDING_READ_LOCK
) {
387 return NT_STATUS_LOCK_NOT_GRANTED
;
399 we are removing a lock that might be holding up a pending lock. Scan for pending
400 locks that cover this range and if we find any then notify the server that it should
403 static void brl_tdb_notify_unlock(struct brl_context
*brl
,
404 struct lock_struct
*locks
, int count
,
405 struct lock_struct
*removed_lock
)
409 /* the last_notice logic is to prevent stampeding on a lock
410 range. It prevents us sending hundreds of notifies on the
411 same range of bytes. It doesn't prevent all possible
412 stampedes, but it does prevent the most common problem */
415 for (i
=0;i
<count
;i
++) {
416 if (locks
[i
].lock_type
>= PENDING_READ_LOCK
&&
417 brl_tdb_overlap(&locks
[i
], removed_lock
)) {
418 if (last_notice
!= -1 && brl_tdb_overlap(&locks
[i
], &locks
[last_notice
])) {
421 if (locks
[i
].lock_type
== PENDING_WRITE_LOCK
) {
424 imessaging_send_ptr(brl
->imessaging_ctx
, locks
[i
].context
.server
,
425 MSG_BRL_RETRY
, locks
[i
].notify_ptr
);
432 send notifications for all pending locks - the file is being closed by this
435 static void brl_tdb_notify_all(struct brl_context
*brl
,
436 struct lock_struct
*locks
, int count
)
439 for (i
=0;i
<count
;i
++) {
440 if (locks
->lock_type
>= PENDING_READ_LOCK
) {
441 brl_tdb_notify_unlock(brl
, locks
, count
, &locks
[i
]);
449 Unlock a range of bytes.
451 static NTSTATUS
brl_tdb_unlock(struct brl_context
*brl
,
452 struct brl_handle
*brlh
,
454 uint64_t start
, uint64_t size
)
458 struct lock_struct
*locks
, *lock
= NULL
;
459 struct lock_context context
;
460 struct db_record
*locked
;
463 kbuf
.dptr
= brlh
->key
.data
;
464 kbuf
.dsize
= brlh
->key
.length
;
466 if (brl_invalid_lock_range(start
, size
)) {
467 return NT_STATUS_INVALID_LOCK_RANGE
;
470 locked
= dbwrap_fetch_locked(brl
->db
, brl
, kbuf
);
472 return NT_STATUS_INTERNAL_DB_CORRUPTION
;
474 dbuf
= dbwrap_record_get_value(locked
);
476 context
.smbpid
= smbpid
;
477 context
.server
= brl
->server
;
480 /* there are existing locks - find a match */
481 locks
= (struct lock_struct
*)dbuf
.dptr
;
482 count
= dbuf
.dsize
/ sizeof(*locks
);
484 for (i
=0; i
<count
; i
++) {
486 if (brl_tdb_same_context(&lock
->context
, &context
) &&
487 lock
->ntvfs
== brlh
->ntvfs
&&
488 lock
->start
== start
&&
489 lock
->size
== size
&&
490 lock
->lock_type
== WRITE_LOCK
) {
494 if (i
< count
) goto found
;
496 for (i
=0; i
<count
; i
++) {
498 if (brl_tdb_same_context(&lock
->context
, &context
) &&
499 lock
->ntvfs
== brlh
->ntvfs
&&
500 lock
->start
== start
&&
501 lock
->size
== size
&&
502 lock
->lock_type
< PENDING_READ_LOCK
) {
509 /* found it - delete it */
511 status
= dbwrap_record_delete(locked
);
512 if (!NT_STATUS_IS_OK(status
)) {
516 struct lock_struct removed_lock
= *lock
;
518 memmove(&locks
[i
], &locks
[i
+1],
519 sizeof(*locks
)*((count
-1) - i
));
523 /* send notifications for any relevant pending locks */
524 brl_tdb_notify_unlock(brl
, locks
, count
, &removed_lock
);
526 dbuf
.dsize
= count
* sizeof(*locks
);
528 status
= dbwrap_record_store(locked
, dbuf
, TDB_REPLACE
);
529 if (!NT_STATUS_IS_OK(status
)) {
538 /* we didn't find it */
539 status
= NT_STATUS_RANGE_NOT_LOCKED
;
548 remove a pending lock. This is called when the caller has either
549 given up trying to establish a lock or when they have succeeded in
550 getting it. In either case they no longer need to be notified.
552 static NTSTATUS
brl_tdb_remove_pending(struct brl_context
*brl
,
553 struct brl_handle
*brlh
,
558 struct lock_struct
*locks
;
560 struct db_record
*locked
;
562 kbuf
.dptr
= brlh
->key
.data
;
563 kbuf
.dsize
= brlh
->key
.length
;
565 locked
= dbwrap_fetch_locked(brl
->db
, brl
, kbuf
);
567 return NT_STATUS_INTERNAL_DB_CORRUPTION
;
570 dbuf
= dbwrap_record_get_value(locked
);
573 return NT_STATUS_RANGE_NOT_LOCKED
;
576 /* there are existing locks - find a match */
577 locks
= (struct lock_struct
*)dbuf
.dptr
;
578 count
= dbuf
.dsize
/ sizeof(*locks
);
580 for (i
=0; i
<count
; i
++) {
581 struct lock_struct
*lock
= &locks
[i
];
583 if (lock
->lock_type
>= PENDING_READ_LOCK
&&
584 lock
->notify_ptr
== notify_ptr
&&
585 cluster_id_equal(&lock
->context
.server
, &brl
->server
)) {
586 /* found it - delete it */
588 status
= dbwrap_record_delete(locked
);
589 if (!NT_STATUS_IS_OK(status
)) {
594 memmove(&locks
[i
], &locks
[i
+1],
595 sizeof(*locks
)*((count
-1) - i
));
598 dbuf
.dsize
= count
* sizeof(*locks
);
599 status
= dbwrap_record_store(locked
, dbuf
,
601 if (!NT_STATUS_IS_OK(status
)) {
611 /* we didn't find it */
612 status
= NT_STATUS_RANGE_NOT_LOCKED
;
621 Test if we are allowed to perform IO on a region of an open file
623 static NTSTATUS
brl_tdb_locktest(struct brl_context
*brl
,
624 struct brl_handle
*brlh
,
626 uint64_t start
, uint64_t size
,
627 enum brl_type lock_type
)
631 struct lock_struct lock
, *locks
;
634 kbuf
.dptr
= brlh
->key
.data
;
635 kbuf
.dsize
= brlh
->key
.length
;
637 if (brl_invalid_lock_range(start
, size
)) {
638 return NT_STATUS_INVALID_LOCK_RANGE
;
641 status
= dbwrap_fetch(brl
->db
, brl
, kbuf
, &dbuf
);
642 if (NT_STATUS_EQUAL(status
, NT_STATUS_NOT_FOUND
)) {
644 } else if (!NT_STATUS_IS_OK(status
)) {
648 lock
.context
.smbpid
= smbpid
;
649 lock
.context
.server
= brl
->server
;
650 lock
.context
.ctx
= brl
;
651 lock
.ntvfs
= brlh
->ntvfs
;
654 lock
.lock_type
= lock_type
;
656 /* there are existing locks - make sure they don't conflict */
657 locks
= (struct lock_struct
*)dbuf
.dptr
;
658 count
= dbuf
.dsize
/ sizeof(*locks
);
660 for (i
=0; i
<count
; i
++) {
661 if (brl_tdb_conflict_other(&locks
[i
], &lock
)) {
662 talloc_free(dbuf
.dptr
);
663 return NT_STATUS_FILE_LOCK_CONFLICT
;
667 talloc_free(dbuf
.dptr
);
673 Remove any locks associated with a open file.
675 static NTSTATUS
brl_tdb_close(struct brl_context
*brl
,
676 struct brl_handle
*brlh
)
679 int count
, i
, dcount
=0;
680 struct lock_struct
*locks
;
681 struct db_record
*locked
;
684 kbuf
.dptr
= brlh
->key
.data
;
685 kbuf
.dsize
= brlh
->key
.length
;
687 locked
= dbwrap_fetch_locked(brl
->db
, brl
, kbuf
);
689 return NT_STATUS_INTERNAL_DB_CORRUPTION
;
691 dbuf
= dbwrap_record_get_value(locked
);
697 /* there are existing locks - remove any for this fnum */
698 locks
= (struct lock_struct
*)dbuf
.dptr
;
699 count
= dbuf
.dsize
/ sizeof(*locks
);
701 for (i
=0; i
<count
; i
++) {
702 struct lock_struct
*lock
= &locks
[i
];
704 if (lock
->context
.ctx
== brl
&&
705 cluster_id_equal(&lock
->context
.server
, &brl
->server
) &&
706 lock
->ntvfs
== brlh
->ntvfs
) {
707 /* found it - delete it */
708 if (count
> 1 && i
< count
-1) {
709 memmove(&locks
[i
], &locks
[i
+1],
710 sizeof(*locks
)*((count
-1) - i
));
718 status
= NT_STATUS_OK
;
721 status
= dbwrap_record_delete(locked
);
722 } else if (dcount
!= 0) {
723 /* tell all pending lock holders for this file that
724 they have a chance now. This is a bit indiscriminant,
726 brl_tdb_notify_all(brl
, locks
, count
);
728 dbuf
.dsize
= count
* sizeof(*locks
);
730 status
= dbwrap_record_store(locked
, dbuf
, TDB_REPLACE
);
737 static NTSTATUS
brl_tdb_count(struct brl_context
*brl
, struct brl_handle
*brlh
,
743 kbuf
.dptr
= brlh
->key
.data
;
744 kbuf
.dsize
= brlh
->key
.length
;
747 status
= dbwrap_fetch(brl
->db
, brl
, kbuf
, &dbuf
);
748 if (NT_STATUS_EQUAL(status
, NT_STATUS_NOT_FOUND
)) {
750 } else if (!NT_STATUS_IS_OK(status
)) {
753 *count
= dbuf
.dsize
/ sizeof(struct lock_struct
);
755 talloc_free(dbuf
.dptr
);
760 static const struct brlock_ops brlock_tdb_ops
= {
761 .brl_init
= brl_tdb_init
,
762 .brl_create_handle
= brl_tdb_create_handle
,
763 .brl_lock
= brl_tdb_lock
,
764 .brl_unlock
= brl_tdb_unlock
,
765 .brl_remove_pending
= brl_tdb_remove_pending
,
766 .brl_locktest
= brl_tdb_locktest
,
767 .brl_close
= brl_tdb_close
,
768 .brl_count
= brl_tdb_count
772 void brl_tdb_init_ops(void)
774 brlock_set_ops(&brlock_tdb_ops
);