ctdb-tests: Update statd-callout tests to handle both modes
[samba4-gss.git] / source3 / locking / posix.c
blobe900132430dfbb717afe128d62bf1f64c0502485
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "lib/util/server_id.h"
27 #include "locking/proto.h"
28 #include "dbwrap/dbwrap.h"
29 #include "dbwrap/dbwrap_rbt.h"
30 #include "util_tdb.h"
31 #include "smbd/fd_handle.h"
33 #undef DBGC_CLASS
34 #define DBGC_CLASS DBGC_LOCKING
37 * The pending close database handle.
40 static struct db_context *posix_pending_close_db;
42 /****************************************************************************
43 First - the functions that deal with the underlying system locks - these
44 functions are used no matter if we're mapping CIFS Windows locks or CIFS
45 POSIX locks onto POSIX.
46 ****************************************************************************/
48 /****************************************************************************
49 Utility function to map a lock type correctly depending on the open
50 mode of a file.
51 ****************************************************************************/
53 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
55 if ((lock_type == WRITE_LOCK) && !fsp->fsp_flags.can_write) {
57 * Many UNIX's cannot get a write lock on a file opened read-only.
58 * Win32 locking semantics allow this.
59 * Do the best we can and attempt a read-only lock.
61 DBG_DEBUG("Downgrading write lock to read due to read-only "
62 "file.\n");
63 return F_RDLCK;
67 * This return should be the most normal, as we attempt
68 * to always open files read/write.
71 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
74 /****************************************************************************
75 Debugging aid :-).
76 ****************************************************************************/
78 static const char *posix_lock_type_name(int lock_type)
80 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
83 /****************************************************************************
84 Check to see if the given unsigned lock range is within the possible POSIX
85 range. Modifies the given args to be in range if possible, just returns
86 False if not.
87 ****************************************************************************/
89 #define SMB_OFF_T_BITS (sizeof(off_t)*8)
91 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
92 uint64_t u_offset, uint64_t u_count)
94 off_t offset = (off_t)u_offset;
95 off_t count = (off_t)u_count;
98 * For the type of system we are, attempt to
99 * find the maximum positive lock offset as an off_t.
102 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
104 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
105 #else
107 * In this case off_t is 64 bits,
108 * and the underlying system can handle 64 bit signed locks.
111 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
112 off_t mask = (mask2<<1);
113 off_t max_positive_lock_offset = ~mask;
115 #endif
117 * POSIX locks of length zero mean lock to end-of-file.
118 * Win32 locks of length zero are point probes. Ignore
119 * any Win32 locks of length zero. JRA.
122 if (count == 0) {
123 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
124 return False;
128 * If the given offset was > max_positive_lock_offset then we cannot map this at all
129 * ignore this lock.
132 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
133 DBG_DEBUG("(offset = %ju) offset > %ju"
134 "and we cannot handle this. Ignoring lock.\n",
135 (uintmax_t)u_offset,
136 (uintmax_t)max_positive_lock_offset);
137 return False;
141 * We must truncate the count to less than max_positive_lock_offset.
144 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
145 count = max_positive_lock_offset;
149 * Truncate count to end at max lock offset.
152 if (offset > INT64_MAX - count ||
153 offset + count > max_positive_lock_offset) {
154 count = max_positive_lock_offset - offset;
158 * If we ate all the count, ignore this lock.
161 if (count == 0) {
162 DBG_DEBUG("Count = 0. Ignoring lock "
163 "u_offset = %" PRIu64 ", u_count = %" PRIu64 "\n",
164 u_offset,
165 u_count);
166 return False;
170 * The mapping was successful.
173 DBG_DEBUG("offset_out = %ju count_out = %ju\n",
174 (uintmax_t)offset,
175 (uintmax_t)count);
177 *offset_out = offset;
178 *count_out = count;
180 return True;
183 /****************************************************************************
184 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
185 broken NFS implementations.
186 ****************************************************************************/
188 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
190 bool ret;
192 DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
193 fsp_get_io_fd(fsp),op,(intmax_t)offset,(intmax_t)count,type));
195 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
197 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
199 if ((errno == EINVAL) &&
200 (op != F_GETLK &&
201 op != F_SETLK &&
202 op != F_SETLKW)) {
203 DEBUG(0,("WARNING: OFD locks in use and no kernel "
204 "support. Try setting "
205 "'smbd:force process locks = true' "
206 "in smb.conf\n"));
207 } else {
208 DEBUG(0, ("WARNING: lock request at offset "
209 "%ju, length %ju returned\n",
210 (uintmax_t)offset, (uintmax_t)count));
211 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
212 "lock offsets\n", strerror(errno)));
213 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
217 * If the offset is > 0x7FFFFFFF then this will cause problems on
218 * 32 bit NFS mounted filesystems. Just ignore it.
221 if (offset & ~((off_t)0x7fffffff)) {
222 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
223 return True;
226 if (count & ~((off_t)0x7fffffff)) {
227 /* 32 bit NFS file system, retry with smaller offset */
228 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
229 errno = 0;
230 count &= 0x7fffffff;
231 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
235 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
236 return ret;
239 /****************************************************************************
240 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
241 broken NFS implementations.
242 ****************************************************************************/
244 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
246 pid_t pid;
247 bool ret;
249 DEBUG(8, ("posix_fcntl_getlock %d %ju %ju %d\n",
250 fsp_get_io_fd(fsp), (uintmax_t)*poffset, (uintmax_t)*pcount,
251 *ptype));
253 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
255 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
257 DEBUG(0, ("posix_fcntl_getlock: WARNING: lock request at "
258 "offset %ju, length %ju returned\n",
259 (uintmax_t)*poffset, (uintmax_t)*pcount));
260 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
261 "lock offsets\n", strerror(errno)));
262 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
265 * If the offset is > 0x7FFFFFFF then this will cause problems on
266 * 32 bit NFS mounted filesystems. Just ignore it.
269 if (*poffset & ~((off_t)0x7fffffff)) {
270 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
271 return True;
274 if (*pcount & ~((off_t)0x7fffffff)) {
275 /* 32 bit NFS file system, retry with smaller offset */
276 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
277 errno = 0;
278 *pcount &= 0x7fffffff;
279 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
283 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
284 return ret;
287 /****************************************************************************
288 POSIX function to see if a file region is locked. Returns True if the
289 region is locked, False otherwise.
290 ****************************************************************************/
292 bool is_posix_locked(files_struct *fsp,
293 uint64_t *pu_offset,
294 uint64_t *pu_count,
295 enum brl_type *plock_type,
296 enum brl_flavour lock_flav)
298 off_t offset;
299 off_t count;
300 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
302 DBG_DEBUG("File %s, offset = %" PRIu64 ", count = %" PRIu64 ", "
303 "type = %s\n",
304 fsp_str_dbg(fsp),
305 *pu_offset,
306 *pu_count,
307 posix_lock_type_name(*plock_type));
310 * If the requested lock won't fit in the POSIX range, we will
311 * never set it, so presume it is not locked.
314 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
315 return False;
318 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
319 return False;
322 if (posix_lock_type == F_UNLCK) {
323 return False;
326 if (lock_flav == POSIX_LOCK) {
327 /* Only POSIX lock queries need to know the details. */
328 *pu_offset = (uint64_t)offset;
329 *pu_count = (uint64_t)count;
330 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
332 return True;
335 /****************************************************************************
336 Next - the functions that deal with in memory database storing representations
337 of either Windows CIFS locks or POSIX CIFS locks.
338 ****************************************************************************/
340 /* The key used in the in-memory POSIX databases. */
342 struct lock_ref_count_key {
343 struct file_id id;
344 char r;
347 /*******************************************************************
348 Form a static locking key for a dev/inode pair for the lock ref count
349 ******************************************************************/
351 static TDB_DATA locking_ref_count_key_fsp(const files_struct *fsp,
352 struct lock_ref_count_key *tmp)
354 ZERO_STRUCTP(tmp);
355 tmp->id = fsp->file_id;
356 tmp->r = 'r';
357 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
360 /*******************************************************************
361 Convenience function to get an fd_array key from an fsp.
362 ******************************************************************/
364 static TDB_DATA fd_array_key_fsp(const files_struct *fsp)
366 return make_tdb_data((const uint8_t *)&fsp->file_id, sizeof(fsp->file_id));
369 /*******************************************************************
370 Create the in-memory POSIX lock databases.
371 ********************************************************************/
373 bool posix_locking_init(bool read_only)
375 if (posix_pending_close_db != NULL) {
376 return true;
379 posix_pending_close_db = db_open_rbt(NULL);
381 if (posix_pending_close_db == NULL) {
382 DEBUG(0,("Failed to open POSIX pending close database.\n"));
383 return false;
386 return true;
389 /*******************************************************************
390 Delete the in-memory POSIX lock databases.
391 ********************************************************************/
393 bool posix_locking_end(void)
396 * Shouldn't we close all fd's here?
398 TALLOC_FREE(posix_pending_close_db);
399 return true;
402 /****************************************************************************
403 Next - the functions that deal with reference count of number of locks open
404 on a dev/ino pair.
405 ****************************************************************************/
407 /****************************************************************************
408 Increase the lock ref count. Creates lock_ref_count entry if it doesn't exist.
409 ****************************************************************************/
411 static void increment_lock_ref_count(const files_struct *fsp)
413 struct lock_ref_count_key tmp;
414 int32_t lock_ref_count = 0;
415 NTSTATUS status;
417 status = dbwrap_change_int32_atomic(
418 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
419 &lock_ref_count, 1);
421 SMB_ASSERT(NT_STATUS_IS_OK(status));
422 SMB_ASSERT(lock_ref_count < INT32_MAX);
424 DEBUG(10,("lock_ref_count for file %s = %d\n",
425 fsp_str_dbg(fsp), (int)(lock_ref_count + 1)));
428 /****************************************************************************
429 Reduce the lock ref count.
430 ****************************************************************************/
432 static void decrement_lock_ref_count(const files_struct *fsp)
434 struct lock_ref_count_key tmp;
435 int32_t lock_ref_count = 0;
436 NTSTATUS status;
438 status = dbwrap_change_int32_atomic(
439 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
440 &lock_ref_count, -1);
442 SMB_ASSERT(NT_STATUS_IS_OK(status));
443 SMB_ASSERT(lock_ref_count > 0);
445 DEBUG(10,("lock_ref_count for file %s = %d\n",
446 fsp_str_dbg(fsp), (int)(lock_ref_count - 1)));
449 /****************************************************************************
450 Fetch the lock ref count.
451 ****************************************************************************/
453 static int32_t get_lock_ref_count(const files_struct *fsp)
455 struct lock_ref_count_key tmp;
456 NTSTATUS status;
457 int32_t lock_ref_count = 0;
459 status = dbwrap_fetch_int32(
460 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
461 &lock_ref_count);
463 if (!NT_STATUS_IS_OK(status) &&
464 !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
465 DEBUG(0, ("Error fetching "
466 "lock ref count for file %s: %s\n",
467 fsp_str_dbg(fsp), nt_errstr(status)));
469 return lock_ref_count;
472 /****************************************************************************
473 Delete a lock_ref_count entry.
474 ****************************************************************************/
476 static void delete_lock_ref_count(const files_struct *fsp)
478 struct lock_ref_count_key tmp;
480 /* Not a bug if it doesn't exist - no locks were ever granted. */
482 dbwrap_delete(posix_pending_close_db,
483 locking_ref_count_key_fsp(fsp, &tmp));
485 DEBUG(10,("delete_lock_ref_count for file %s\n",
486 fsp_str_dbg(fsp)));
489 /****************************************************************************
490 Next - the functions that deal with storing fd's that have outstanding
491 POSIX locks when closed.
492 ****************************************************************************/
494 /****************************************************************************
495 The records in posix_pending_close_db are composed of an array of
496 ints keyed by dev/ino pair. Those ints are the fd's that were open on
497 this dev/ino pair that should have been closed, but can't as the lock
498 ref count is non zero.
499 ****************************************************************************/
501 struct add_fd_to_close_entry_state {
502 const struct files_struct *fsp;
505 static void add_fd_to_close_entry_fn(
506 struct db_record *rec,
507 TDB_DATA value,
508 void *private_data)
510 struct add_fd_to_close_entry_state *state = private_data;
511 int fd = fsp_get_pathref_fd(state->fsp);
512 TDB_DATA values[] = {
513 value,
514 { .dptr = (uint8_t *)&fd,
515 .dsize = sizeof(fd) },
517 NTSTATUS status;
519 SMB_ASSERT((values[0].dsize % sizeof(int)) == 0);
521 status = dbwrap_record_storev(rec, values, ARRAY_SIZE(values), 0);
522 SMB_ASSERT(NT_STATUS_IS_OK(status));
525 /****************************************************************************
526 Add an fd to the pending close db.
527 ****************************************************************************/
529 static void add_fd_to_close_entry(const files_struct *fsp)
531 struct add_fd_to_close_entry_state state = { .fsp = fsp };
532 NTSTATUS status;
534 status = dbwrap_do_locked(
535 posix_pending_close_db,
536 fd_array_key_fsp(fsp),
537 add_fd_to_close_entry_fn,
538 &state);
539 SMB_ASSERT(NT_STATUS_IS_OK(status));
541 DBG_DEBUG("added fd %d file %s\n",
542 fsp_get_pathref_fd(fsp),
543 fsp_str_dbg(fsp));
546 static void fd_close_posix_fn(
547 struct db_record *rec,
548 TDB_DATA data,
549 void *private_data)
551 int *saved_errno = (int *)private_data;
552 size_t num_fds, i;
554 SMB_ASSERT((data.dsize % sizeof(int)) == 0);
555 num_fds = data.dsize / sizeof(int);
557 for (i=0; i<num_fds; i++) {
558 int fd;
559 int ret;
560 memcpy(&fd, data.dptr, sizeof(int));
561 ret = close(fd);
562 if (ret == -1) {
563 *saved_errno = errno;
565 data.dptr += sizeof(int);
567 dbwrap_record_delete(rec);
570 /****************************************************************************
571 Deal with pending closes needed by POSIX locking support.
572 Note that locking_close_file() is expected to have been called
573 to delete all locks on this fsp before this function is called.
574 ****************************************************************************/
576 int fd_close_posix(const struct files_struct *fsp)
578 int saved_errno = 0;
579 int ret;
580 NTSTATUS status;
582 if (!lp_locking(fsp->conn->params) ||
583 !lp_posix_locking(fsp->conn->params) ||
584 fsp->fsp_flags.use_ofd_locks)
587 * No locking or POSIX to worry about or we are using POSIX
588 * open file description lock semantics which only removes
589 * locks on the file descriptor we're closing. Just close.
591 return close(fsp_get_pathref_fd(fsp));
594 if (get_lock_ref_count(fsp)) {
597 * There are outstanding locks on this dev/inode pair on
598 * other fds. Add our fd to the pending close db. We also
599 * set fsp_get_io_fd(fsp) to -1 inside fd_close() after returning
600 * from VFS layer.
603 add_fd_to_close_entry(fsp);
604 return 0;
607 status = dbwrap_do_locked(
608 posix_pending_close_db,
609 fd_array_key_fsp(fsp),
610 fd_close_posix_fn,
611 &saved_errno);
612 if (!NT_STATUS_IS_OK(status)) {
613 DBG_WARNING("dbwrap_do_locked failed: %s\n",
614 nt_errstr(status));
617 /* Don't need a lock ref count on this dev/ino anymore. */
618 delete_lock_ref_count(fsp);
621 * Finally close the fd associated with this fsp.
624 ret = close(fsp_get_pathref_fd(fsp));
626 if (ret == 0 && saved_errno != 0) {
627 errno = saved_errno;
628 ret = -1;
631 return ret;
634 /****************************************************************************
635 Next - the functions that deal with the mapping CIFS Windows locks onto
636 the underlying system POSIX locks.
637 ****************************************************************************/
640 * Structure used when splitting a lock range
641 * into a POSIX lock range. Doubly linked list.
644 struct lock_list {
645 struct lock_list *next;
646 struct lock_list *prev;
647 off_t start;
648 off_t size;
651 /****************************************************************************
652 Create a list of lock ranges that don't overlap a given range. Used in calculating
653 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
654 understand it :-).
655 ****************************************************************************/
657 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
658 struct lock_list *lhead,
659 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
660 const struct lock_struct *plocks,
661 int num_locks)
663 int i;
666 * Check the current lock list on this dev/inode pair.
667 * Quit if the list is deleted.
670 DEBUG(10, ("posix_lock_list: curr: start=%ju,size=%ju\n",
671 (uintmax_t)lhead->start, (uintmax_t)lhead->size ));
673 for (i=0; i<num_locks && lhead; i++) {
674 const struct lock_struct *lock = &plocks[i];
675 struct lock_list *l_curr;
677 /* Ignore all but read/write locks. */
678 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
679 continue;
682 /* Ignore locks not owned by this process. */
683 if (!server_id_equal(&lock->context.pid, &lock_ctx->pid)) {
684 continue;
688 * Walk the lock list, checking for overlaps. Note that
689 * the lock list can expand within this loop if the current
690 * range being examined needs to be split.
693 for (l_curr = lhead; l_curr;) {
695 DEBUG(10, ("posix_lock_list: lock: fnum=%ju: "
696 "start=%ju,size=%ju:type=%s",
697 (uintmax_t)lock->fnum,
698 (uintmax_t)lock->start,
699 (uintmax_t)lock->size,
700 posix_lock_type_name(lock->lock_type) ));
702 if ( (l_curr->start >= (lock->start + lock->size)) ||
703 (lock->start >= (l_curr->start + l_curr->size))) {
705 /* No overlap with existing lock - leave this range alone. */
706 /*********************************************
707 +---------+
708 | l_curr |
709 +---------+
710 +-------+
711 | lock |
712 +-------+
713 OR....
714 +---------+
715 | l_curr |
716 +---------+
717 **********************************************/
719 DEBUG(10,(" no overlap case.\n" ));
721 l_curr = l_curr->next;
723 } else if ( (l_curr->start >= lock->start) &&
724 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
727 * This range is completely overlapped by this existing lock range
728 * and thus should have no effect. Delete it from the list.
730 /*********************************************
731 +---------+
732 | l_curr |
733 +---------+
734 +---------------------------+
735 | lock |
736 +---------------------------+
737 **********************************************/
738 /* Save the next pointer */
739 struct lock_list *ul_next = l_curr->next;
741 DEBUG(10,(" delete case.\n" ));
743 DLIST_REMOVE(lhead, l_curr);
744 if(lhead == NULL) {
745 break; /* No more list... */
748 l_curr = ul_next;
750 } else if ( (l_curr->start >= lock->start) &&
751 (l_curr->start < lock->start + lock->size) &&
752 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
755 * This range overlaps the existing lock range at the high end.
756 * Truncate by moving start to existing range end and reducing size.
758 /*********************************************
759 +---------------+
760 | l_curr |
761 +---------------+
762 +---------------+
763 | lock |
764 +---------------+
765 BECOMES....
766 +-------+
767 | l_curr|
768 +-------+
769 **********************************************/
771 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
772 l_curr->start = lock->start + lock->size;
774 DEBUG(10, (" truncate high case: start=%ju,"
775 "size=%ju\n",
776 (uintmax_t)l_curr->start,
777 (uintmax_t)l_curr->size ));
779 l_curr = l_curr->next;
781 } else if ( (l_curr->start < lock->start) &&
782 (l_curr->start + l_curr->size > lock->start) &&
783 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
786 * This range overlaps the existing lock range at the low end.
787 * Truncate by reducing size.
789 /*********************************************
790 +---------------+
791 | l_curr |
792 +---------------+
793 +---------------+
794 | lock |
795 +---------------+
796 BECOMES....
797 +-------+
798 | l_curr|
799 +-------+
800 **********************************************/
802 l_curr->size = lock->start - l_curr->start;
804 DEBUG(10, (" truncate low case: start=%ju,"
805 "size=%ju\n",
806 (uintmax_t)l_curr->start,
807 (uintmax_t)l_curr->size ));
809 l_curr = l_curr->next;
811 } else if ( (l_curr->start < lock->start) &&
812 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
814 * Worst case scenario. Range completely overlaps an existing
815 * lock range. Split the request into two, push the new (upper) request
816 * into the dlink list, and continue with the entry after l_new (as we
817 * know that l_new will not overlap with this lock).
819 /*********************************************
820 +---------------------------+
821 | l_curr |
822 +---------------------------+
823 +---------+
824 | lock |
825 +---------+
826 BECOMES.....
827 +-------+ +---------+
828 | l_curr| | l_new |
829 +-------+ +---------+
830 **********************************************/
831 struct lock_list *l_new = talloc(ctx, struct lock_list);
833 if(l_new == NULL) {
834 DEBUG(0,("posix_lock_list: talloc fail.\n"));
835 return NULL; /* The talloc_destroy takes care of cleanup. */
838 ZERO_STRUCTP(l_new);
839 l_new->start = lock->start + lock->size;
840 l_new->size = l_curr->start + l_curr->size - l_new->start;
842 /* Truncate the l_curr. */
843 l_curr->size = lock->start - l_curr->start;
845 DEBUG(10, (" split case: curr: start=%ju,"
846 "size=%ju new: start=%ju,"
847 "size=%ju\n",
848 (uintmax_t)l_curr->start,
849 (uintmax_t)l_curr->size,
850 (uintmax_t)l_new->start,
851 (uintmax_t)l_new->size ));
854 * Add into the dlink list after the l_curr point - NOT at lhead.
856 DLIST_ADD_AFTER(lhead, l_new, l_curr);
858 /* And move after the link we added. */
859 l_curr = l_new->next;
861 } else {
864 * This logic case should never happen. Ensure this is the
865 * case by forcing an abort.... Remove in production.
867 char *msg = NULL;
869 if (asprintf(&msg, "logic flaw in cases: "
870 "l_curr: start = %ju, "
871 "size = %ju : lock: "
872 "start = %ju, size = %ju",
873 (uintmax_t)l_curr->start,
874 (uintmax_t)l_curr->size,
875 (uintmax_t)lock->start,
876 (uintmax_t)lock->size ) != -1) {
877 smb_panic(msg);
878 } else {
879 smb_panic("posix_lock_list");
882 } /* end for ( l_curr = lhead; l_curr;) */
883 } /* end for (i=0; i<num_locks && ul_head; i++) */
885 return lhead;
888 /****************************************************************************
889 POSIX function to acquire a lock. Returns True if the
890 lock could be granted, False if not.
891 ****************************************************************************/
893 bool set_posix_lock_windows_flavour(files_struct *fsp,
894 uint64_t u_offset,
895 uint64_t u_count,
896 enum brl_type lock_type,
897 const struct lock_context *lock_ctx,
898 const struct lock_struct *plocks,
899 int num_locks,
900 int *errno_ret)
902 off_t offset;
903 off_t count;
904 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
905 bool ret = True;
906 size_t lock_count;
907 TALLOC_CTX *l_ctx = NULL;
908 struct lock_list *llist = NULL;
909 struct lock_list *ll = NULL;
911 DEBUG(5, ("set_posix_lock_windows_flavour: File %s, offset = %ju, "
912 "count = %ju, type = %s\n", fsp_str_dbg(fsp),
913 (uintmax_t)u_offset, (uintmax_t)u_count,
914 posix_lock_type_name(lock_type)));
917 * If the requested lock won't fit in the POSIX range, we will
918 * pretend it was successful.
921 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
922 increment_lock_ref_count(fsp);
923 return True;
927 * Windows is very strange. It allows read locks to be overlaid
928 * (even over a write lock), but leaves the write lock in force until the first
929 * unlock. It also reference counts the locks. This means the following sequence :
931 * process1 process2
932 * ------------------------------------------------------------------------
933 * WRITE LOCK : start = 2, len = 10
934 * READ LOCK: start =0, len = 10 - FAIL
935 * READ LOCK : start = 0, len = 14
936 * READ LOCK: start =0, len = 10 - FAIL
937 * UNLOCK : start = 2, len = 10
938 * READ LOCK: start =0, len = 10 - OK
940 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
941 * would leave a single read lock over the 0-14 region.
944 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
945 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
946 return False;
949 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
950 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
951 talloc_destroy(l_ctx);
952 return False;
956 * Create the initial list entry containing the
957 * lock we want to add.
960 ZERO_STRUCTP(ll);
961 ll->start = offset;
962 ll->size = count;
964 DLIST_ADD(llist, ll);
967 * The following call calculates if there are any
968 * overlapping locks held by this process on
969 * fd's open on the same file and splits this list
970 * into a list of lock ranges that do not overlap with existing
971 * POSIX locks.
974 llist = posix_lock_list(l_ctx,
975 llist,
976 lock_ctx, /* Lock context llist belongs to. */
977 plocks,
978 num_locks);
981 * Add the POSIX locks on the list of ranges returned.
982 * As the lock is supposed to be added atomically, we need to
983 * back out all the locks if any one of these calls fail.
986 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
987 offset = ll->start;
988 count = ll->size;
990 DEBUG(5, ("set_posix_lock_windows_flavour: Real lock: "
991 "Type = %s: offset = %ju, count = %ju\n",
992 posix_lock_type_name(posix_lock_type),
993 (uintmax_t)offset, (uintmax_t)count ));
995 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
996 *errno_ret = errno;
997 DEBUG(5, ("set_posix_lock_windows_flavour: Lock "
998 "fail !: Type = %s: offset = %ju, "
999 "count = %ju. Errno = %s\n",
1000 posix_lock_type_name(posix_lock_type),
1001 (uintmax_t)offset, (uintmax_t)count,
1002 strerror(errno) ));
1003 ret = False;
1004 break;
1008 if (!ret) {
1011 * Back out all the POSIX locks we have on fail.
1014 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1015 offset = ll->start;
1016 count = ll->size;
1018 DEBUG(5, ("set_posix_lock_windows_flavour: Backing "
1019 "out locks: Type = %s: offset = %ju, "
1020 "count = %ju\n",
1021 posix_lock_type_name(posix_lock_type),
1022 (uintmax_t)offset, (uintmax_t)count ));
1024 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1026 } else {
1027 /* Remember the number of locks we have on this dev/ino pair. */
1028 increment_lock_ref_count(fsp);
1031 talloc_destroy(l_ctx);
1032 return ret;
1035 /****************************************************************************
1036 POSIX function to release a lock. Returns True if the
1037 lock could be released, False if not.
1038 ****************************************************************************/
1040 bool release_posix_lock_windows_flavour(files_struct *fsp,
1041 uint64_t u_offset,
1042 uint64_t u_count,
1043 enum brl_type deleted_lock_type,
1044 const struct lock_context *lock_ctx,
1045 const struct lock_struct *plocks,
1046 int num_locks)
1048 off_t offset;
1049 off_t count;
1050 bool ret = True;
1051 TALLOC_CTX *ul_ctx = NULL;
1052 struct lock_list *ulist = NULL;
1053 struct lock_list *ul = NULL;
1055 DBG_INFO("File %s, offset = %" PRIu64 ", "
1056 "count = %" PRIu64 "\n",
1057 fsp_str_dbg(fsp),
1058 u_offset,
1059 u_count);
1061 /* Remember the number of locks we have on this dev/ino pair. */
1062 decrement_lock_ref_count(fsp);
1065 * If the requested lock won't fit in the POSIX range, we will
1066 * pretend it was successful.
1069 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1070 return True;
1073 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1074 DBG_ERR("unable to init talloc context.\n");
1075 return False;
1078 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1079 DBG_ERR("unable to talloc unlock list.\n");
1080 talloc_destroy(ul_ctx);
1081 return False;
1085 * Create the initial list entry containing the
1086 * lock we want to remove.
1089 ZERO_STRUCTP(ul);
1090 ul->start = offset;
1091 ul->size = count;
1093 DLIST_ADD(ulist, ul);
1096 * The following call calculates if there are any
1097 * overlapping locks held by this process on
1098 * fd's open on the same file and creates a
1099 * list of unlock ranges that will allow
1100 * POSIX lock ranges to remain on the file whilst the
1101 * unlocks are performed.
1104 ulist = posix_lock_list(ul_ctx,
1105 ulist,
1106 lock_ctx, /* Lock context ulist belongs to. */
1107 plocks,
1108 num_locks);
1111 * If there were any overlapped entries (list is > 1 or size or start have changed),
1112 * and the lock_type we just deleted from
1113 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1114 * the POSIX lock to a read lock. This allows any overlapping read locks
1115 * to be atomically maintained.
1118 if (deleted_lock_type == WRITE_LOCK &&
1119 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1121 DBG_INFO("downgrading lock to READ: offset = %" PRIu64
1122 ", count = %" PRIu64 "\n",
1123 offset,
1124 count);
1126 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1127 DBG_ERR("downgrade of lock failed with error %s !\n",
1128 strerror(errno));
1129 talloc_destroy(ul_ctx);
1130 return False;
1135 * Release the POSIX locks on the list of ranges returned.
1138 for(; ulist; ulist = ulist->next) {
1139 offset = ulist->start;
1140 count = ulist->size;
1142 DBG_INFO("Real unlock: offset = %" PRIu64 ", count = %" PRIu64
1143 "\n",
1144 offset,
1145 count);
1147 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1148 ret = False;
1152 talloc_destroy(ul_ctx);
1153 return ret;
1156 /****************************************************************************
1157 Next - the functions that deal with mapping CIFS POSIX locks onto
1158 the underlying system POSIX locks.
1159 ****************************************************************************/
1161 /****************************************************************************
1162 We only increment the lock ref count when we see a POSIX lock on a context
1163 that doesn't already have them.
1164 ****************************************************************************/
1166 static void increment_posix_lock_count(const files_struct *fsp,
1167 uint64_t smblctx)
1169 NTSTATUS status;
1170 TDB_DATA ctx_key;
1171 TDB_DATA val = { 0 };
1173 ctx_key.dptr = (uint8_t *)&smblctx;
1174 ctx_key.dsize = sizeof(smblctx);
1177 * Don't increment if we already have any POSIX flavor
1178 * locks on this context.
1180 if (dbwrap_exists(posix_pending_close_db, ctx_key)) {
1181 return;
1184 /* Remember that we have POSIX flavor locks on this context. */
1185 status = dbwrap_store(posix_pending_close_db, ctx_key, val, 0);
1186 SMB_ASSERT(NT_STATUS_IS_OK(status));
1188 increment_lock_ref_count(fsp);
1190 DEBUG(10,("posix_locks set for file %s\n",
1191 fsp_str_dbg(fsp)));
1194 static void decrement_posix_lock_count(const files_struct *fsp, uint64_t smblctx)
1196 NTSTATUS status;
1197 TDB_DATA ctx_key;
1199 ctx_key.dptr = (uint8_t *)&smblctx;
1200 ctx_key.dsize = sizeof(smblctx);
1202 status = dbwrap_delete(posix_pending_close_db, ctx_key);
1203 SMB_ASSERT(NT_STATUS_IS_OK(status));
1205 decrement_lock_ref_count(fsp);
1207 DEBUG(10,("posix_locks deleted for file %s\n",
1208 fsp_str_dbg(fsp)));
1211 /****************************************************************************
1212 Return true if any locks exist on the given lock context.
1213 ****************************************************************************/
1215 static bool locks_exist_on_context(const struct lock_struct *plocks,
1216 int num_locks,
1217 const struct lock_context *lock_ctx)
1219 int i;
1221 for (i=0; i < num_locks; i++) {
1222 const struct lock_struct *lock = &plocks[i];
1224 /* Ignore all but read/write locks. */
1225 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1226 continue;
1229 /* Ignore locks not owned by this process. */
1230 if (!server_id_equal(&lock->context.pid, &lock_ctx->pid)) {
1231 continue;
1234 if (lock_ctx->smblctx == lock->context.smblctx) {
1235 return true;
1238 return false;
1241 /****************************************************************************
1242 POSIX function to acquire a lock. Returns True if the
1243 lock could be granted, False if not.
1244 As POSIX locks don't stack or conflict (they just overwrite)
1245 we can map the requested lock directly onto a system one. We
1246 know it doesn't conflict with locks on other contexts as the
1247 upper layer would have refused it.
1248 ****************************************************************************/
1250 bool set_posix_lock_posix_flavour(files_struct *fsp,
1251 uint64_t u_offset,
1252 uint64_t u_count,
1253 enum brl_type lock_type,
1254 const struct lock_context *lock_ctx,
1255 int *errno_ret)
1257 off_t offset;
1258 off_t count;
1259 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1261 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1262 "= %ju, type = %s\n", fsp_str_dbg(fsp),
1263 (uintmax_t)u_offset, (uintmax_t)u_count,
1264 posix_lock_type_name(lock_type)));
1267 * If the requested lock won't fit in the POSIX range, we will
1268 * pretend it was successful.
1271 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1272 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1273 return True;
1276 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1277 *errno_ret = errno;
1278 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1279 posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1280 return False;
1282 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1283 return True;
1286 /****************************************************************************
1287 POSIX function to release a lock. Returns True if the
1288 lock could be released, False if not.
1289 We are given a complete lock state from the upper layer which is what the lock
1290 state should be after the unlock has already been done, so what
1291 we do is punch out holes in the unlock range where locks owned by this process
1292 have a different lock context.
1293 ****************************************************************************/
1295 bool release_posix_lock_posix_flavour(files_struct *fsp,
1296 uint64_t u_offset,
1297 uint64_t u_count,
1298 const struct lock_context *lock_ctx,
1299 const struct lock_struct *plocks,
1300 int num_locks)
1302 bool ret = True;
1303 off_t offset;
1304 off_t count;
1305 TALLOC_CTX *ul_ctx = NULL;
1306 struct lock_list *ulist = NULL;
1307 struct lock_list *ul = NULL;
1309 DEBUG(5, ("release_posix_lock_posix_flavour: File %s, offset = %ju, "
1310 "count = %ju\n", fsp_str_dbg(fsp),
1311 (uintmax_t)u_offset, (uintmax_t)u_count));
1314 * If the requested lock won't fit in the POSIX range, we will
1315 * pretend it was successful.
1318 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1319 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1320 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1322 return True;
1325 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1326 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1327 return False;
1330 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1331 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1332 talloc_destroy(ul_ctx);
1333 return False;
1337 * Create the initial list entry containing the
1338 * lock we want to remove.
1341 ZERO_STRUCTP(ul);
1342 ul->start = offset;
1343 ul->size = count;
1345 DLIST_ADD(ulist, ul);
1348 * Walk the given array creating a linked list
1349 * of unlock requests.
1352 ulist = posix_lock_list(ul_ctx,
1353 ulist,
1354 lock_ctx, /* Lock context ulist belongs to. */
1355 plocks,
1356 num_locks);
1359 * Release the POSIX locks on the list of ranges returned.
1362 for(; ulist; ulist = ulist->next) {
1363 offset = ulist->start;
1364 count = ulist->size;
1366 DEBUG(5, ("release_posix_lock_posix_flavour: Real unlock: "
1367 "offset = %ju, count = %ju\n",
1368 (uintmax_t)offset, (uintmax_t)count ));
1370 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1371 ret = False;
1375 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1376 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1378 talloc_destroy(ul_ctx);
1379 return ret;