ctdb-server: Remove duplicate logic
[samba4-gss.git] / source4 / ntvfs / common / brlock_tdb.c
blobc4cd76bbe20c7d171d6df0e340dd5ec56db10513
1 /*
2 Unix SMB/CIFS implementation.
4 generic byte range locking code - tdb backend
6 Copyright (C) Andrew Tridgell 1992-2006
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 /* This module implements a tdb based byte range locking service,
24 replacing the fcntl() based byte range locking previously
25 used. This allows us to provide the same semantics as NT */
27 #include "includes.h"
28 #include "system/filesys.h"
29 #include "messaging/messaging.h"
30 #include "lib/messaging/irpc.h"
31 #include "libcli/libcli.h"
32 #include "cluster/cluster.h"
33 #include "ntvfs/common/brlock.h"
34 #include "ntvfs/ntvfs.h"
35 #include "param/param.h"
36 #include "dbwrap/dbwrap.h"
39 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
40 a file. For a local posix filesystem this will usually be a combination
41 of the device and inode numbers of the file, but it can be anything
42 that uniquely identifies a file for locking purposes, as long
43 as it is applied consistently.
46 /* this struct is typically attached to tcon */
47 struct brl_context {
48 struct db_context *db;
49 struct server_id server;
50 struct imessaging_context *imessaging_ctx;
54 the lock context contains the elements that define whether one
55 lock is the same as another lock
57 struct lock_context {
58 struct server_id server;
59 uint32_t smbpid;
60 struct brl_context *ctx;
63 /* The data in brlock records is an unsorted linear array of these
64 records. It is unnecessary to store the count as tdb provides the
65 size of the record */
66 struct lock_struct {
67 struct lock_context context;
68 struct ntvfs_handle *ntvfs;
69 uint64_t start;
70 uint64_t size;
71 enum brl_type lock_type;
72 void *notify_ptr;
75 /* this struct is attached to on oprn file handle */
76 struct brl_handle {
77 DATA_BLOB key;
78 struct ntvfs_handle *ntvfs;
79 struct lock_struct last_lock;
82 /* see if we have wrapped locks, which are no longer allowed (windows
83 * changed this in win7 */
84 static bool brl_invalid_lock_range(uint64_t start, uint64_t size)
86 return (size > 1 && (start + size < start));
90 Open up the brlock.tdb database. Close it down using
91 talloc_free(). We need the imessaging_ctx to allow for
92 pending lock notifications.
94 static struct brl_context *brl_tdb_init(TALLOC_CTX *mem_ctx, struct server_id server,
95 struct loadparm_context *lp_ctx,
96 struct imessaging_context *imessaging_ctx)
98 struct brl_context *brl;
100 brl = talloc(mem_ctx, struct brl_context);
101 if (brl == NULL) {
102 return NULL;
105 brl->db = cluster_db_tmp_open(brl, lp_ctx, "brlock", TDB_DEFAULT);
106 if (brl->db == NULL) {
107 talloc_free(brl);
108 return NULL;
111 brl->server = server;
112 brl->imessaging_ctx = imessaging_ctx;
114 return brl;
117 static struct brl_handle *brl_tdb_create_handle(TALLOC_CTX *mem_ctx, struct ntvfs_handle *ntvfs,
118 DATA_BLOB *file_key)
120 struct brl_handle *brlh;
122 brlh = talloc(mem_ctx, struct brl_handle);
123 if (brlh == NULL) {
124 return NULL;
127 brlh->key = *file_key;
128 brlh->ntvfs = ntvfs;
129 ZERO_STRUCT(brlh->last_lock);
131 return brlh;
135 see if two locking contexts are equal
137 static bool brl_tdb_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
139 return (cluster_id_equal(&ctx1->server, &ctx2->server) &&
140 ctx1->smbpid == ctx2->smbpid &&
141 ctx1->ctx == ctx2->ctx);
145 see if lck1 and lck2 overlap
147 lck1 is the existing lock. lck2 is the new lock we are
148 looking at adding
150 static bool brl_tdb_overlap(struct lock_struct *lck1,
151 struct lock_struct *lck2)
153 /* this extra check is not redundant - it copes with locks
154 that go beyond the end of 64 bit file space */
155 if (lck1->size != 0 &&
156 lck1->start == lck2->start &&
157 lck1->size == lck2->size) {
158 return true;
161 if (lck1->start >= (lck2->start+lck2->size) ||
162 lck2->start >= (lck1->start+lck1->size)) {
163 return false;
166 /* we have a conflict. Now check to see if lck1 really still
167 * exists, which involves checking if the process still
168 * exists. We leave this test to last as its the most
169 * expensive test, especially when we are clustered */
170 /* TODO: need to do this via a server_id_exists() call, which
171 * hasn't been written yet. When clustered this will need to
172 * call into ctdb */
174 return true;
178 See if lock2 can be added when lock1 is in place.
180 static bool brl_tdb_conflict(struct lock_struct *lck1,
181 struct lock_struct *lck2)
183 /* pending locks don't conflict with anything */
184 if (lck1->lock_type >= PENDING_READ_LOCK ||
185 lck2->lock_type >= PENDING_READ_LOCK) {
186 return false;
189 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
190 return false;
193 if (brl_tdb_same_context(&lck1->context, &lck2->context) &&
194 lck2->lock_type == READ_LOCK && lck1->ntvfs == lck2->ntvfs) {
195 return false;
198 return brl_tdb_overlap(lck1, lck2);
203 Check to see if this lock conflicts, but ignore our own locks on the
204 same fnum only.
206 static bool brl_tdb_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
208 /* pending locks don't conflict with anything */
209 if (lck1->lock_type >= PENDING_READ_LOCK ||
210 lck2->lock_type >= PENDING_READ_LOCK) {
211 return false;
214 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
215 return false;
218 * note that incoming write calls conflict with existing READ
219 * locks even if the context is the same. JRA. See LOCKTEST7
220 * in smbtorture.
222 if (brl_tdb_same_context(&lck1->context, &lck2->context) &&
223 lck1->ntvfs == lck2->ntvfs &&
224 (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
225 return false;
228 return brl_tdb_overlap(lck1, lck2);
233 amazingly enough, w2k3 "remembers" whether the last lock failure
234 is the same as this one and changes its error code. I wonder if any
235 app depends on this?
237 static NTSTATUS brl_tdb_lock_failed(struct brl_handle *brlh, struct lock_struct *lock)
240 * this function is only called for non pending lock!
243 /* in SMB2 mode always return NT_STATUS_LOCK_NOT_GRANTED! */
244 if (lock->ntvfs->ctx->protocol >= PROTOCOL_SMB2_02) {
245 return NT_STATUS_LOCK_NOT_GRANTED;
249 * if the notify_ptr is non NULL,
250 * it means that we're at the end of a pending lock
251 * and the real lock is requested after the timeout went by
252 * In this case we need to remember the last_lock and always
253 * give FILE_LOCK_CONFLICT
255 if (lock->notify_ptr) {
256 brlh->last_lock = *lock;
257 return NT_STATUS_FILE_LOCK_CONFLICT;
261 * amazing the little things you learn with a test
262 * suite. Locks beyond this offset (as a 64 bit
263 * number!) always generate the conflict error code,
264 * unless the top bit is set
266 if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
267 brlh->last_lock = *lock;
268 return NT_STATUS_FILE_LOCK_CONFLICT;
272 * if the current lock matches the last failed lock on the file handle
273 * and starts at the same offset, then FILE_LOCK_CONFLICT should be returned
275 if (cluster_id_equal(&lock->context.server, &brlh->last_lock.context.server) &&
276 lock->context.ctx == brlh->last_lock.context.ctx &&
277 lock->ntvfs == brlh->last_lock.ntvfs &&
278 lock->start == brlh->last_lock.start) {
279 return NT_STATUS_FILE_LOCK_CONFLICT;
282 brlh->last_lock = *lock;
283 return NT_STATUS_LOCK_NOT_GRANTED;
287 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
288 which case a real lock is first tried, and if that fails then a
289 pending lock is created. When the pending lock is triggered (by
290 someone else closing an overlapping lock range) a messaging
291 notification is sent, identified by the notify_ptr
293 static NTSTATUS brl_tdb_lock(struct brl_context *brl,
294 struct brl_handle *brlh,
295 uint32_t smbpid,
296 uint64_t start, uint64_t size,
297 enum brl_type lock_type,
298 void *notify_ptr)
300 TDB_DATA kbuf, dbuf;
301 int count=0, i;
302 struct lock_struct lock, *locks=NULL;
303 NTSTATUS status;
304 struct db_record *locked;
306 kbuf.dptr = brlh->key.data;
307 kbuf.dsize = brlh->key.length;
309 if (brl_invalid_lock_range(start, size)) {
310 return NT_STATUS_INVALID_LOCK_RANGE;
313 locked = dbwrap_fetch_locked(brl->db, brl, kbuf);
314 if (!locked) {
315 return NT_STATUS_INTERNAL_DB_CORRUPTION;
318 /* if this is a pending lock, then with the chainlock held we
319 try to get the real lock. If we succeed then we don't need
320 to make it pending. This prevents a possible race condition
321 where the pending lock gets created after the lock that is
322 preventing the real lock gets removed */
323 if (lock_type >= PENDING_READ_LOCK) {
324 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
326 /* here we need to force that the last_lock isn't overwritten */
327 lock = brlh->last_lock;
328 status = brl_tdb_lock(brl, brlh, smbpid, start, size, rw, NULL);
329 brlh->last_lock = lock;
331 if (NT_STATUS_IS_OK(status)) {
332 talloc_free(locked);
333 return NT_STATUS_OK;
337 dbuf = dbwrap_record_get_value(locked);
339 lock.context.smbpid = smbpid;
340 lock.context.server = brl->server;
341 lock.context.ctx = brl;
342 lock.ntvfs = brlh->ntvfs;
343 lock.context.ctx = brl;
344 lock.start = start;
345 lock.size = size;
346 lock.lock_type = lock_type;
347 lock.notify_ptr = notify_ptr;
349 if (dbuf.dptr) {
350 /* there are existing locks - make sure they don't conflict */
351 locks = (struct lock_struct *)dbuf.dptr;
352 count = dbuf.dsize / sizeof(*locks);
353 for (i=0; i<count; i++) {
354 if (brl_tdb_conflict(&locks[i], &lock)) {
355 status = brl_tdb_lock_failed(brlh, &lock);
356 goto fail;
361 /* no conflicts - add it to the list of locks */
362 /* FIXME: a dbwrap_record_append() would help here! */
363 locks = talloc_array(locked, struct lock_struct, count+1);
364 if (!locks) {
365 status = NT_STATUS_NO_MEMORY;
366 goto fail;
368 if (dbuf.dsize > 0) {
369 memcpy(locks, dbuf.dptr, dbuf.dsize);
371 locks[count] = lock;
373 dbuf.dptr = (unsigned char *)locks;
374 dbuf.dsize += sizeof(lock);
376 status = dbwrap_record_store(locked, dbuf, TDB_REPLACE);
377 if (!NT_STATUS_IS_OK(status)) {
378 goto fail;
381 talloc_free(locked);
383 /* the caller needs to know if the real lock was granted. If
384 we have reached here then it must be a pending lock that
385 was granted, so tell them the lock failed */
386 if (lock_type >= PENDING_READ_LOCK) {
387 return NT_STATUS_LOCK_NOT_GRANTED;
390 return NT_STATUS_OK;
392 fail:
393 talloc_free(locked);
394 return status;
399 we are removing a lock that might be holding up a pending lock. Scan for pending
400 locks that cover this range and if we find any then notify the server that it should
401 retry the lock
403 static void brl_tdb_notify_unlock(struct brl_context *brl,
404 struct lock_struct *locks, int count,
405 struct lock_struct *removed_lock)
407 int i, last_notice;
409 /* the last_notice logic is to prevent stampeding on a lock
410 range. It prevents us sending hundreds of notifies on the
411 same range of bytes. It doesn't prevent all possible
412 stampedes, but it does prevent the most common problem */
413 last_notice = -1;
415 for (i=0;i<count;i++) {
416 if (locks[i].lock_type >= PENDING_READ_LOCK &&
417 brl_tdb_overlap(&locks[i], removed_lock)) {
418 if (last_notice != -1 && brl_tdb_overlap(&locks[i], &locks[last_notice])) {
419 continue;
421 if (locks[i].lock_type == PENDING_WRITE_LOCK) {
422 last_notice = i;
424 imessaging_send_ptr(brl->imessaging_ctx, locks[i].context.server,
425 MSG_BRL_RETRY, locks[i].notify_ptr);
432 send notifications for all pending locks - the file is being closed by this
433 user
435 static void brl_tdb_notify_all(struct brl_context *brl,
436 struct lock_struct *locks, int count)
438 int i;
439 for (i=0;i<count;i++) {
440 if (locks->lock_type >= PENDING_READ_LOCK) {
441 brl_tdb_notify_unlock(brl, locks, count, &locks[i]);
449 Unlock a range of bytes.
451 static NTSTATUS brl_tdb_unlock(struct brl_context *brl,
452 struct brl_handle *brlh,
453 uint32_t smbpid,
454 uint64_t start, uint64_t size)
456 TDB_DATA kbuf, dbuf;
457 int count, i;
458 struct lock_struct *locks, *lock = NULL;
459 struct lock_context context;
460 struct db_record *locked;
461 NTSTATUS status;
463 kbuf.dptr = brlh->key.data;
464 kbuf.dsize = brlh->key.length;
466 if (brl_invalid_lock_range(start, size)) {
467 return NT_STATUS_INVALID_LOCK_RANGE;
470 locked = dbwrap_fetch_locked(brl->db, brl, kbuf);
471 if (!locked) {
472 return NT_STATUS_INTERNAL_DB_CORRUPTION;
474 dbuf = dbwrap_record_get_value(locked);
476 context.smbpid = smbpid;
477 context.server = brl->server;
478 context.ctx = brl;
480 /* there are existing locks - find a match */
481 locks = (struct lock_struct *)dbuf.dptr;
482 count = dbuf.dsize / sizeof(*locks);
484 for (i=0; i<count; i++) {
485 lock = &locks[i];
486 if (brl_tdb_same_context(&lock->context, &context) &&
487 lock->ntvfs == brlh->ntvfs &&
488 lock->start == start &&
489 lock->size == size &&
490 lock->lock_type == WRITE_LOCK) {
491 break;
494 if (i < count) goto found;
496 for (i=0; i<count; i++) {
497 lock = &locks[i];
498 if (brl_tdb_same_context(&lock->context, &context) &&
499 lock->ntvfs == brlh->ntvfs &&
500 lock->start == start &&
501 lock->size == size &&
502 lock->lock_type < PENDING_READ_LOCK) {
503 break;
507 found:
508 if (i < count) {
509 /* found it - delete it */
510 if (count == 1) {
511 status = dbwrap_record_delete(locked);
512 if (!NT_STATUS_IS_OK(status)) {
513 goto fail;
515 } else {
516 struct lock_struct removed_lock = *lock;
517 if (i < count-1) {
518 memmove(&locks[i], &locks[i+1],
519 sizeof(*locks)*((count-1) - i));
521 count--;
523 /* send notifications for any relevant pending locks */
524 brl_tdb_notify_unlock(brl, locks, count, &removed_lock);
526 dbuf.dsize = count * sizeof(*locks);
528 status = dbwrap_record_store(locked, dbuf, TDB_REPLACE);
529 if (!NT_STATUS_IS_OK(status)) {
530 goto fail;
534 talloc_free(locked);
535 return NT_STATUS_OK;
538 /* we didn't find it */
539 status = NT_STATUS_RANGE_NOT_LOCKED;
541 fail:
542 talloc_free(locked);
543 return status;
548 remove a pending lock. This is called when the caller has either
549 given up trying to establish a lock or when they have succeeded in
550 getting it. In either case they no longer need to be notified.
552 static NTSTATUS brl_tdb_remove_pending(struct brl_context *brl,
553 struct brl_handle *brlh,
554 void *notify_ptr)
556 TDB_DATA kbuf, dbuf;
557 int count, i;
558 struct lock_struct *locks;
559 NTSTATUS status;
560 struct db_record *locked;
562 kbuf.dptr = brlh->key.data;
563 kbuf.dsize = brlh->key.length;
565 locked = dbwrap_fetch_locked(brl->db, brl, kbuf);
566 if (!locked) {
567 return NT_STATUS_INTERNAL_DB_CORRUPTION;
570 dbuf = dbwrap_record_get_value(locked);
571 if (!dbuf.dptr) {
572 talloc_free(locked);
573 return NT_STATUS_RANGE_NOT_LOCKED;
576 /* there are existing locks - find a match */
577 locks = (struct lock_struct *)dbuf.dptr;
578 count = dbuf.dsize / sizeof(*locks);
580 for (i=0; i<count; i++) {
581 struct lock_struct *lock = &locks[i];
583 if (lock->lock_type >= PENDING_READ_LOCK &&
584 lock->notify_ptr == notify_ptr &&
585 cluster_id_equal(&lock->context.server, &brl->server)) {
586 /* found it - delete it */
587 if (count == 1) {
588 status = dbwrap_record_delete(locked);
589 if (!NT_STATUS_IS_OK(status)) {
590 goto fail;
592 } else {
593 if (i < count-1) {
594 memmove(&locks[i], &locks[i+1],
595 sizeof(*locks)*((count-1) - i));
597 count--;
598 dbuf.dsize = count * sizeof(*locks);
599 status = dbwrap_record_store(locked, dbuf,
600 TDB_REPLACE);
601 if (!NT_STATUS_IS_OK(status)) {
602 goto fail;
606 talloc_free(locked);
607 return NT_STATUS_OK;
611 /* we didn't find it */
612 status = NT_STATUS_RANGE_NOT_LOCKED;
614 fail:
615 talloc_free(locked);
616 return status;
621 Test if we are allowed to perform IO on a region of an open file
623 static NTSTATUS brl_tdb_locktest(struct brl_context *brl,
624 struct brl_handle *brlh,
625 uint32_t smbpid,
626 uint64_t start, uint64_t size,
627 enum brl_type lock_type)
629 TDB_DATA kbuf, dbuf;
630 int count, i;
631 struct lock_struct lock, *locks;
632 NTSTATUS status;
634 kbuf.dptr = brlh->key.data;
635 kbuf.dsize = brlh->key.length;
637 if (brl_invalid_lock_range(start, size)) {
638 return NT_STATUS_INVALID_LOCK_RANGE;
641 status = dbwrap_fetch(brl->db, brl, kbuf, &dbuf);
642 if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
643 return NT_STATUS_OK;
644 } else if (!NT_STATUS_IS_OK(status)) {
645 return status;
648 lock.context.smbpid = smbpid;
649 lock.context.server = brl->server;
650 lock.context.ctx = brl;
651 lock.ntvfs = brlh->ntvfs;
652 lock.start = start;
653 lock.size = size;
654 lock.lock_type = lock_type;
656 /* there are existing locks - make sure they don't conflict */
657 locks = (struct lock_struct *)dbuf.dptr;
658 count = dbuf.dsize / sizeof(*locks);
660 for (i=0; i<count; i++) {
661 if (brl_tdb_conflict_other(&locks[i], &lock)) {
662 talloc_free(dbuf.dptr);
663 return NT_STATUS_FILE_LOCK_CONFLICT;
667 talloc_free(dbuf.dptr);
668 return NT_STATUS_OK;
673 Remove any locks associated with a open file.
675 static NTSTATUS brl_tdb_close(struct brl_context *brl,
676 struct brl_handle *brlh)
678 TDB_DATA kbuf, dbuf;
679 int count, i, dcount=0;
680 struct lock_struct *locks;
681 struct db_record *locked;
682 NTSTATUS status;
684 kbuf.dptr = brlh->key.data;
685 kbuf.dsize = brlh->key.length;
687 locked = dbwrap_fetch_locked(brl->db, brl, kbuf);
688 if (!locked) {
689 return NT_STATUS_INTERNAL_DB_CORRUPTION;
691 dbuf = dbwrap_record_get_value(locked);
692 if (!dbuf.dptr) {
693 talloc_free(locked);
694 return NT_STATUS_OK;
697 /* there are existing locks - remove any for this fnum */
698 locks = (struct lock_struct *)dbuf.dptr;
699 count = dbuf.dsize / sizeof(*locks);
701 for (i=0; i<count; i++) {
702 struct lock_struct *lock = &locks[i];
704 if (lock->context.ctx == brl &&
705 cluster_id_equal(&lock->context.server, &brl->server) &&
706 lock->ntvfs == brlh->ntvfs) {
707 /* found it - delete it */
708 if (count > 1 && i < count-1) {
709 memmove(&locks[i], &locks[i+1],
710 sizeof(*locks)*((count-1) - i));
712 count--;
713 i--;
714 dcount++;
718 status = NT_STATUS_OK;
720 if (count == 0) {
721 status = dbwrap_record_delete(locked);
722 } else if (dcount != 0) {
723 /* tell all pending lock holders for this file that
724 they have a chance now. This is a bit indiscriminant,
725 but works OK */
726 brl_tdb_notify_all(brl, locks, count);
728 dbuf.dsize = count * sizeof(*locks);
730 status = dbwrap_record_store(locked, dbuf, TDB_REPLACE);
732 talloc_free(locked);
734 return status;
737 static NTSTATUS brl_tdb_count(struct brl_context *brl, struct brl_handle *brlh,
738 int *count)
740 TDB_DATA kbuf, dbuf;
741 NTSTATUS status;
743 kbuf.dptr = brlh->key.data;
744 kbuf.dsize = brlh->key.length;
745 *count = 0;
747 status = dbwrap_fetch(brl->db, brl, kbuf, &dbuf);
748 if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
749 return NT_STATUS_OK;
750 } else if (!NT_STATUS_IS_OK(status)) {
751 return status;
753 *count = dbuf.dsize / sizeof(struct lock_struct);
755 talloc_free(dbuf.dptr);
757 return NT_STATUS_OK;
760 static const struct brlock_ops brlock_tdb_ops = {
761 .brl_init = brl_tdb_init,
762 .brl_create_handle = brl_tdb_create_handle,
763 .brl_lock = brl_tdb_lock,
764 .brl_unlock = brl_tdb_unlock,
765 .brl_remove_pending = brl_tdb_remove_pending,
766 .brl_locktest = brl_tdb_locktest,
767 .brl_close = brl_tdb_close,
768 .brl_count = brl_tdb_count
772 void brl_tdb_init_ops(void)
774 brlock_set_ops(&brlock_tdb_ops);