btrfs: Attempt to fix GCC2 build.
[haiku.git] / src / system / kernel / fs / vfs.cpp
blob35b7d908c0fdcca0021c777591bc42ba6772e827
1 /*
2 * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2002-2017, Axel Dörfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7 * Distributed under the terms of the NewOS License.
8 */
11 /*! Virtual File System and File System Interface Layer */
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <KPath.h>
44 #include <lock.h>
45 #include <low_resource_manager.h>
46 #include <syscalls.h>
47 #include <syscall_restart.h>
48 #include <tracing.h>
49 #include <util/atomic.h>
50 #include <util/AutoLock.h>
51 #include <util/DoublyLinkedList.h>
52 #include <vfs.h>
53 #include <vm/vm.h>
54 #include <vm/VMCache.h>
56 #include "EntryCache.h"
57 #include "fifo.h"
58 #include "IORequest.h"
59 #include "unused_vnodes.h"
60 #include "vfs_tracing.h"
61 #include "Vnode.h"
62 #include "../cache/vnode_store.h"
65 //#define TRACE_VFS
66 #ifdef TRACE_VFS
67 # define TRACE(x) dprintf x
68 # define FUNCTION(x) dprintf x
69 #else
70 # define TRACE(x) ;
71 # define FUNCTION(x) ;
72 #endif
74 #define ADD_DEBUGGER_COMMANDS
77 #define HAS_FS_CALL(vnode, op) (vnode->ops->op != NULL)
78 #define HAS_FS_MOUNT_CALL(mount, op) (mount->volume->ops->op != NULL)
80 #if KDEBUG
81 # define FS_CALL(vnode, op, params...) \
82 ( HAS_FS_CALL(vnode, op) ? \
83 vnode->ops->op(vnode->mount->volume, vnode, params) \
84 : (panic("FS_CALL op " #op " is NULL"), 0))
85 # define FS_CALL_NO_PARAMS(vnode, op) \
86 ( HAS_FS_CALL(vnode, op) ? \
87 vnode->ops->op(vnode->mount->volume, vnode) \
88 : (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
89 # define FS_MOUNT_CALL(mount, op, params...) \
90 ( HAS_FS_MOUNT_CALL(mount, op) ? \
91 mount->volume->ops->op(mount->volume, params) \
92 : (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
93 # define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
94 ( HAS_FS_MOUNT_CALL(mount, op) ? \
95 mount->volume->ops->op(mount->volume) \
96 : (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
97 #else
98 # define FS_CALL(vnode, op, params...) \
99 vnode->ops->op(vnode->mount->volume, vnode, params)
100 # define FS_CALL_NO_PARAMS(vnode, op) \
101 vnode->ops->op(vnode->mount->volume, vnode)
102 # define FS_MOUNT_CALL(mount, op, params...) \
103 mount->volume->ops->op(mount->volume, params)
104 # define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
105 mount->volume->ops->op(mount->volume)
106 #endif
109 const static size_t kMaxPathLength = 65536;
110 // The absolute maximum path length (for getcwd() - this is not depending
111 // on PATH_MAX
114 typedef DoublyLinkedList<vnode> VnodeList;
116 /*! \brief Structure to manage a mounted file system
118 Note: The root_vnode and root_vnode->covers fields (what others?) are
119 initialized in fs_mount() and not changed afterwards. That is as soon
120 as the mount is mounted and it is made sure it won't be unmounted
121 (e.g. by holding a reference to a vnode of that mount) (read) access
122 to those fields is always safe, even without additional locking. Morever
123 while mounted the mount holds a reference to the root_vnode->covers vnode,
124 and thus making the access path vnode->mount->root_vnode->covers->mount->...
125 safe if a reference to vnode is held (note that for the root mount
126 root_vnode->covers is NULL, though).
128 struct fs_mount {
129 fs_mount()
131 volume(NULL),
132 device_name(NULL)
134 recursive_lock_init(&rlock, "mount rlock");
137 ~fs_mount()
139 recursive_lock_destroy(&rlock);
140 free(device_name);
142 while (volume) {
143 fs_volume* superVolume = volume->super_volume;
145 if (volume->file_system != NULL)
146 put_module(volume->file_system->info.name);
148 free(volume->file_system_name);
149 free(volume);
150 volume = superVolume;
154 struct fs_mount* next;
155 dev_t id;
156 fs_volume* volume;
157 char* device_name;
158 recursive_lock rlock; // guards the vnodes list
159 // TODO: Make this a mutex! It is never used recursively.
160 struct vnode* root_vnode;
161 struct vnode* covers_vnode; // immutable
162 KPartition* partition;
163 VnodeList vnodes;
164 EntryCache entry_cache;
165 bool unmounting;
166 bool owns_file_device;
170 namespace {
172 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
173 list_link link;
174 team_id team;
175 pid_t session;
176 off_t start;
177 off_t end;
178 bool shared;
181 typedef DoublyLinkedList<advisory_lock> LockList;
183 } // namespace
186 struct advisory_locking {
187 sem_id lock;
188 sem_id wait_sem;
189 LockList locks;
191 advisory_locking()
193 lock(-1),
194 wait_sem(-1)
198 ~advisory_locking()
200 if (lock >= 0)
201 delete_sem(lock);
202 if (wait_sem >= 0)
203 delete_sem(wait_sem);
207 /*! \brief Guards sMountsTable.
209 The holder is allowed to read/write access the sMountsTable.
210 Manipulation of the fs_mount structures themselves
211 (and their destruction) requires different locks though.
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
215 /*! \brief Guards mount/unmount operations.
217 The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 That is locking the lock ensures that no FS is mounted/unmounted. In
219 particular this means that
220 - sMountsTable will not be modified,
221 - the fields immutable after initialization of the fs_mount structures in
222 sMountsTable will not be modified,
224 The thread trying to lock the lock must not hold sVnodeLock or
225 sMountMutex.
227 static recursive_lock sMountOpLock;
229 /*! \brief Guards sVnodeTable.
231 The holder is allowed read/write access to sVnodeTable and to
232 any unbusy vnode in that table, save to the immutable fields (device, id,
233 private_node, mount) to which only read-only access is allowed.
234 The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
235 well as the busy, removed, unused flags, and the vnode's type can also be
236 write accessed when holding a read lock to sVnodeLock *and* having the vnode
237 locked. Write access to covered_by and covers requires to write lock
238 sVnodeLock.
240 The thread trying to acquire the lock must not hold sMountMutex.
241 You must not hold this lock when calling create_sem(), as this might call
242 vfs_free_unused_vnodes() and thus cause a deadlock.
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
246 /*! \brief Guards io_context::root.
248 Must be held when setting or getting the io_context::root field.
249 The only operation allowed while holding this lock besides getting or
250 setting the field is inc_vnode_ref_count() on io_context::root.
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
255 namespace {
257 struct vnode_hash_key {
258 dev_t device;
259 ino_t vnode;
262 struct VnodeHash {
263 typedef vnode_hash_key KeyType;
264 typedef struct vnode ValueType;
266 #define VHASH(mountid, vnodeid) \
267 (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
269 size_t HashKey(KeyType key) const
271 return VHASH(key.device, key.vnode);
274 size_t Hash(ValueType* vnode) const
276 return VHASH(vnode->device, vnode->id);
279 #undef VHASH
281 bool Compare(KeyType key, ValueType* vnode) const
283 return vnode->device == key.device && vnode->id == key.vnode;
286 ValueType*& GetLink(ValueType* value) const
288 return value->next;
292 typedef BOpenHashTable<VnodeHash> VnodeTable;
295 struct MountHash {
296 typedef dev_t KeyType;
297 typedef struct fs_mount ValueType;
299 size_t HashKey(KeyType key) const
301 return key;
304 size_t Hash(ValueType* mount) const
306 return mount->id;
309 bool Compare(KeyType key, ValueType* mount) const
311 return mount->id == key;
314 ValueType*& GetLink(ValueType* value) const
316 return value->next;
320 typedef BOpenHashTable<MountHash> MountTable;
322 } // namespace
325 #define VNODE_HASH_TABLE_SIZE 1024
326 static VnodeTable* sVnodeTable;
327 static struct vnode* sRoot;
329 #define MOUNTS_HASH_TABLE_SIZE 16
330 static MountTable* sMountsTable;
331 static dev_t sNextMountID = 1;
333 #define MAX_TEMP_IO_VECS 8
335 // How long to wait for busy vnodes (10s)
336 #define BUSY_VNODE_RETRIES 2000
337 #define BUSY_VNODE_DELAY 5000
339 mode_t __gUmask = 022;
341 /* function declarations */
343 static void free_unused_vnodes();
345 // file descriptor operation prototypes
346 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
347 void* buffer, size_t* _bytes);
348 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
349 const void* buffer, size_t* _bytes);
350 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
351 int seekType);
352 static void file_free_fd(struct file_descriptor* descriptor);
353 static status_t file_close(struct file_descriptor* descriptor);
354 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
355 struct selectsync* sync);
356 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
357 struct selectsync* sync);
358 static status_t dir_read(struct io_context* context,
359 struct file_descriptor* descriptor, struct dirent* buffer,
360 size_t bufferSize, uint32* _count);
361 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
362 void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
363 static status_t dir_rewind(struct file_descriptor* descriptor);
364 static void dir_free_fd(struct file_descriptor* descriptor);
365 static status_t dir_close(struct file_descriptor* descriptor);
366 static status_t attr_dir_read(struct io_context* context,
367 struct file_descriptor* descriptor, struct dirent* buffer,
368 size_t bufferSize, uint32* _count);
369 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
370 static void attr_dir_free_fd(struct file_descriptor* descriptor);
371 static status_t attr_dir_close(struct file_descriptor* descriptor);
372 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
373 void* buffer, size_t* _bytes);
374 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
375 const void* buffer, size_t* _bytes);
376 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
377 int seekType);
378 static void attr_free_fd(struct file_descriptor* descriptor);
379 static status_t attr_close(struct file_descriptor* descriptor);
380 static status_t attr_read_stat(struct file_descriptor* descriptor,
381 struct stat* statData);
382 static status_t attr_write_stat(struct file_descriptor* descriptor,
383 const struct stat* stat, int statMask);
384 static status_t index_dir_read(struct io_context* context,
385 struct file_descriptor* descriptor, struct dirent* buffer,
386 size_t bufferSize, uint32* _count);
387 static status_t index_dir_rewind(struct file_descriptor* descriptor);
388 static void index_dir_free_fd(struct file_descriptor* descriptor);
389 static status_t index_dir_close(struct file_descriptor* descriptor);
390 static status_t query_read(struct io_context* context,
391 struct file_descriptor* descriptor, struct dirent* buffer,
392 size_t bufferSize, uint32* _count);
393 static status_t query_rewind(struct file_descriptor* descriptor);
394 static void query_free_fd(struct file_descriptor* descriptor);
395 static status_t query_close(struct file_descriptor* descriptor);
397 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
398 void* buffer, size_t length);
399 static status_t common_read_stat(struct file_descriptor* descriptor,
400 struct stat* statData);
401 static status_t common_write_stat(struct file_descriptor* descriptor,
402 const struct stat* statData, int statMask);
403 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
404 struct stat* stat, bool kernel);
406 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
407 bool traverseLeafLink, int count, bool kernel,
408 struct vnode** _vnode, ino_t* _parentID);
409 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
410 size_t bufferSize, bool kernel);
411 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
412 struct vnode** _vnode, ino_t* _parentID, bool kernel);
413 static void inc_vnode_ref_count(struct vnode* vnode);
414 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
415 bool reenter);
416 static inline void put_vnode(struct vnode* vnode);
417 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
418 bool kernel);
419 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
422 static struct fd_ops sFileOps = {
423 file_read,
424 file_write,
425 file_seek,
426 common_ioctl,
427 NULL, // set_flags
428 file_select,
429 file_deselect,
430 NULL, // read_dir()
431 NULL, // rewind_dir()
432 common_read_stat,
433 common_write_stat,
434 file_close,
435 file_free_fd
438 static struct fd_ops sDirectoryOps = {
439 NULL, // read()
440 NULL, // write()
441 NULL, // seek()
442 common_ioctl,
443 NULL, // set_flags
444 NULL, // select()
445 NULL, // deselect()
446 dir_read,
447 dir_rewind,
448 common_read_stat,
449 common_write_stat,
450 dir_close,
451 dir_free_fd
454 static struct fd_ops sAttributeDirectoryOps = {
455 NULL, // read()
456 NULL, // write()
457 NULL, // seek()
458 common_ioctl,
459 NULL, // set_flags
460 NULL, // select()
461 NULL, // deselect()
462 attr_dir_read,
463 attr_dir_rewind,
464 common_read_stat,
465 common_write_stat,
466 attr_dir_close,
467 attr_dir_free_fd
470 static struct fd_ops sAttributeOps = {
471 attr_read,
472 attr_write,
473 attr_seek,
474 common_ioctl,
475 NULL, // set_flags
476 NULL, // select()
477 NULL, // deselect()
478 NULL, // read_dir()
479 NULL, // rewind_dir()
480 attr_read_stat,
481 attr_write_stat,
482 attr_close,
483 attr_free_fd
486 static struct fd_ops sIndexDirectoryOps = {
487 NULL, // read()
488 NULL, // write()
489 NULL, // seek()
490 NULL, // ioctl()
491 NULL, // set_flags
492 NULL, // select()
493 NULL, // deselect()
494 index_dir_read,
495 index_dir_rewind,
496 NULL, // read_stat()
497 NULL, // write_stat()
498 index_dir_close,
499 index_dir_free_fd
502 #if 0
503 static struct fd_ops sIndexOps = {
504 NULL, // read()
505 NULL, // write()
506 NULL, // seek()
507 NULL, // ioctl()
508 NULL, // set_flags
509 NULL, // select()
510 NULL, // deselect()
511 NULL, // dir_read()
512 NULL, // dir_rewind()
513 index_read_stat, // read_stat()
514 NULL, // write_stat()
515 NULL, // dir_close()
516 NULL // free_fd()
518 #endif
520 static struct fd_ops sQueryOps = {
521 NULL, // read()
522 NULL, // write()
523 NULL, // seek()
524 NULL, // ioctl()
525 NULL, // set_flags
526 NULL, // select()
527 NULL, // deselect()
528 query_read,
529 query_rewind,
530 NULL, // read_stat()
531 NULL, // write_stat()
532 query_close,
533 query_free_fd
537 namespace {
539 class VNodePutter {
540 public:
541 VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
543 ~VNodePutter()
545 Put();
548 void SetTo(struct vnode* vnode)
550 Put();
551 fVNode = vnode;
554 void Put()
556 if (fVNode) {
557 put_vnode(fVNode);
558 fVNode = NULL;
562 struct vnode* Detach()
564 struct vnode* vnode = fVNode;
565 fVNode = NULL;
566 return vnode;
569 private:
570 struct vnode* fVNode;
574 class FDCloser {
575 public:
576 FDCloser() : fFD(-1), fKernel(true) {}
578 FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
580 ~FDCloser()
582 Close();
585 void SetTo(int fd, bool kernel)
587 Close();
588 fFD = fd;
589 fKernel = kernel;
592 void Close()
594 if (fFD >= 0) {
595 if (fKernel)
596 _kern_close(fFD);
597 else
598 _user_close(fFD);
599 fFD = -1;
603 int Detach()
605 int fd = fFD;
606 fFD = -1;
607 return fd;
610 private:
611 int fFD;
612 bool fKernel;
615 } // namespace
618 #if VFS_PAGES_IO_TRACING
620 namespace VFSPagesIOTracing {
622 class PagesIOTraceEntry : public AbstractTraceEntry {
623 protected:
624 PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
625 const generic_io_vec* vecs, uint32 count, uint32 flags,
626 generic_size_t bytesRequested, status_t status,
627 generic_size_t bytesTransferred)
629 fVnode(vnode),
630 fMountID(vnode->mount->id),
631 fNodeID(vnode->id),
632 fCookie(cookie),
633 fPos(pos),
634 fCount(count),
635 fFlags(flags),
636 fBytesRequested(bytesRequested),
637 fStatus(status),
638 fBytesTransferred(bytesTransferred)
640 fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
641 sizeof(generic_io_vec) * count, false);
644 void AddDump(TraceOutput& out, const char* mode)
646 out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
647 "cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
648 mode, fVnode, fMountID, fNodeID, fCookie, fPos,
649 (uint64)fBytesRequested);
651 if (fVecs != NULL) {
652 for (uint32 i = 0; i < fCount; i++) {
653 if (i > 0)
654 out.Print(", ");
655 out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
656 (uint64)fVecs[i].length);
660 out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
661 "transferred: %" B_PRIu64, fFlags, fStatus,
662 (uint64)fBytesTransferred);
665 protected:
666 struct vnode* fVnode;
667 dev_t fMountID;
668 ino_t fNodeID;
669 void* fCookie;
670 off_t fPos;
671 generic_io_vec* fVecs;
672 uint32 fCount;
673 uint32 fFlags;
674 generic_size_t fBytesRequested;
675 status_t fStatus;
676 generic_size_t fBytesTransferred;
680 class ReadPages : public PagesIOTraceEntry {
681 public:
682 ReadPages(struct vnode* vnode, void* cookie, off_t pos,
683 const generic_io_vec* vecs, uint32 count, uint32 flags,
684 generic_size_t bytesRequested, status_t status,
685 generic_size_t bytesTransferred)
687 PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
688 bytesRequested, status, bytesTransferred)
690 Initialized();
693 virtual void AddDump(TraceOutput& out)
695 PagesIOTraceEntry::AddDump(out, "read");
700 class WritePages : public PagesIOTraceEntry {
701 public:
702 WritePages(struct vnode* vnode, void* cookie, off_t pos,
703 const generic_io_vec* vecs, uint32 count, uint32 flags,
704 generic_size_t bytesRequested, status_t status,
705 generic_size_t bytesTransferred)
707 PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
708 bytesRequested, status, bytesTransferred)
710 Initialized();
713 virtual void AddDump(TraceOutput& out)
715 PagesIOTraceEntry::AddDump(out, "write");
719 } // namespace VFSPagesIOTracing
721 # define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
722 #else
723 # define TPIO(x) ;
724 #endif // VFS_PAGES_IO_TRACING
727 /*! Finds the mounted device (the fs_mount structure) with the given ID.
728 Note, you must hold the gMountMutex lock when you call this function.
730 static struct fs_mount*
731 find_mount(dev_t id)
733 ASSERT_LOCKED_MUTEX(&sMountMutex);
735 return sMountsTable->Lookup(id);
739 static status_t
740 get_mount(dev_t id, struct fs_mount** _mount)
742 struct fs_mount* mount;
744 ReadLocker nodeLocker(sVnodeLock);
745 MutexLocker mountLocker(sMountMutex);
747 mount = find_mount(id);
748 if (mount == NULL)
749 return B_BAD_VALUE;
751 struct vnode* rootNode = mount->root_vnode;
752 if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
753 || rootNode->ref_count == 0) {
754 // might have been called during a mount/unmount operation
755 return B_BUSY;
758 inc_vnode_ref_count(rootNode);
759 *_mount = mount;
760 return B_OK;
764 static void
765 put_mount(struct fs_mount* mount)
767 if (mount)
768 put_vnode(mount->root_vnode);
772 /*! Tries to open the specified file system module.
773 Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
774 Returns a pointer to file system module interface, or NULL if it
775 could not open the module.
777 static file_system_module_info*
778 get_file_system(const char* fsName)
780 char name[B_FILE_NAME_LENGTH];
781 if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
782 // construct module name if we didn't get one
783 // (we currently support only one API)
784 snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
785 fsName = NULL;
788 file_system_module_info* info;
789 if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
790 return NULL;
792 return info;
796 /*! Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
797 and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
798 The name is allocated for you, and you have to free() it when you're
799 done with it.
800 Returns NULL if the required memory is not available.
802 static char*
803 get_file_system_name(const char* fsName)
805 const size_t length = strlen("file_systems/");
807 if (strncmp(fsName, "file_systems/", length)) {
808 // the name already seems to be the module's file name
809 return strdup(fsName);
812 fsName += length;
813 const char* end = strchr(fsName, '/');
814 if (end == NULL) {
815 // this doesn't seem to be a valid name, but well...
816 return strdup(fsName);
819 // cut off the trailing /v1
821 char* name = (char*)malloc(end + 1 - fsName);
822 if (name == NULL)
823 return NULL;
825 strlcpy(name, fsName, end + 1 - fsName);
826 return name;
830 /*! Accepts a list of file system names separated by a colon, one for each
831 layer and returns the file system name for the specified layer.
832 The name is allocated for you, and you have to free() it when you're
833 done with it.
834 Returns NULL if the required memory is not available or if there is no
835 name for the specified layer.
837 static char*
838 get_file_system_name_for_layer(const char* fsNames, int32 layer)
840 while (layer >= 0) {
841 const char* end = strchr(fsNames, ':');
842 if (end == NULL) {
843 if (layer == 0)
844 return strdup(fsNames);
845 return NULL;
848 if (layer == 0) {
849 size_t length = end - fsNames + 1;
850 char* result = (char*)malloc(length);
851 strlcpy(result, fsNames, length);
852 return result;
855 fsNames = end + 1;
856 layer--;
859 return NULL;
863 static void
864 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
866 RecursiveLocker _(mount->rlock);
867 mount->vnodes.Add(vnode);
871 static void
872 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
874 RecursiveLocker _(mount->rlock);
875 mount->vnodes.Remove(vnode);
879 /*! \brief Looks up a vnode by mount and node ID in the sVnodeTable.
881 The caller must hold the sVnodeLock (read lock at least).
883 \param mountID the mount ID.
884 \param vnodeID the node ID.
886 \return The vnode structure, if it was found in the hash table, \c NULL
887 otherwise.
889 static struct vnode*
890 lookup_vnode(dev_t mountID, ino_t vnodeID)
892 struct vnode_hash_key key;
894 key.device = mountID;
895 key.vnode = vnodeID;
897 return sVnodeTable->Lookup(key);
901 /*! \brief Checks whether or not a busy vnode should be waited for (again).
903 This will also wait for BUSY_VNODE_DELAY before returning if one should
904 still wait for the vnode becoming unbusy.
906 \return \c true if one should retry, \c false if not.
908 static bool
909 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
911 if (--tries < 0) {
912 // vnode doesn't seem to become unbusy
913 dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
914 " is not becoming unbusy!\n", mountID, vnodeID);
915 return false;
917 snooze(BUSY_VNODE_DELAY);
918 return true;
922 /*! Creates a new vnode with the given mount and node ID.
923 If the node already exists, it is returned instead and no new node is
924 created. In either case -- but not, if an error occurs -- the function write
925 locks \c sVnodeLock and keeps it locked for the caller when returning. On
926 error the lock is not held on return.
928 \param mountID The mount ID.
929 \param vnodeID The vnode ID.
930 \param _vnode Will be set to the new vnode on success.
931 \param _nodeCreated Will be set to \c true when the returned vnode has
932 been newly created, \c false when it already existed. Will not be
933 changed on error.
934 \return \c B_OK, when the vnode was successfully created and inserted or
935 a node with the given ID was found, \c B_NO_MEMORY or
936 \c B_ENTRY_NOT_FOUND on error.
938 static status_t
939 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
940 bool& _nodeCreated)
942 FUNCTION(("create_new_vnode_and_lock()\n"));
944 struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
945 if (vnode == NULL)
946 return B_NO_MEMORY;
948 // initialize basic values
949 memset(vnode, 0, sizeof(struct vnode));
950 vnode->device = mountID;
951 vnode->id = vnodeID;
952 vnode->ref_count = 1;
953 vnode->SetBusy(true);
955 // look up the node -- it might have been added by someone else in the
956 // meantime
957 rw_lock_write_lock(&sVnodeLock);
958 struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
959 if (existingVnode != NULL) {
960 free(vnode);
961 _vnode = existingVnode;
962 _nodeCreated = false;
963 return B_OK;
966 // get the mount structure
967 mutex_lock(&sMountMutex);
968 vnode->mount = find_mount(mountID);
969 if (!vnode->mount || vnode->mount->unmounting) {
970 mutex_unlock(&sMountMutex);
971 rw_lock_write_unlock(&sVnodeLock);
972 free(vnode);
973 return B_ENTRY_NOT_FOUND;
976 // add the vnode to the mount's node list and the hash table
977 sVnodeTable->Insert(vnode);
978 add_vnode_to_mount_list(vnode, vnode->mount);
980 mutex_unlock(&sMountMutex);
982 _vnode = vnode;
983 _nodeCreated = true;
985 // keep the vnode lock locked
986 return B_OK;
990 /*! Frees the vnode and all resources it has acquired, and removes
991 it from the vnode hash as well as from its mount structure.
992 Will also make sure that any cache modifications are written back.
994 static void
995 free_vnode(struct vnode* vnode, bool reenter)
997 ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
998 vnode);
1000 // write back any changes in this vnode's cache -- but only
1001 // if the vnode won't be deleted, in which case the changes
1002 // will be discarded
1004 if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1005 FS_CALL_NO_PARAMS(vnode, fsync);
1007 // Note: If this vnode has a cache attached, there will still be two
1008 // references to that cache at this point. The last one belongs to the vnode
1009 // itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1010 // cache. Each but the last reference to a cache also includes a reference
1011 // to the vnode. The file cache, however, released its reference (cf.
1012 // file_cache_create()), so that this vnode's ref count has the chance to
1013 // ever drop to 0. Deleting the file cache now, will cause the next to last
1014 // cache reference to be released, which will also release a (no longer
1015 // existing) vnode reference. To avoid problems, we set the vnode's ref
1016 // count, so that it will neither become negative nor 0.
1017 vnode->ref_count = 2;
1019 if (!vnode->IsUnpublished()) {
1020 if (vnode->IsRemoved())
1021 FS_CALL(vnode, remove_vnode, reenter);
1022 else
1023 FS_CALL(vnode, put_vnode, reenter);
1026 // If the vnode has a VMCache attached, make sure that it won't try to get
1027 // another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1028 // long as the vnode is busy and in the hash, that won't happen, but as
1029 // soon as we've removed it from the hash, it could reload the vnode -- with
1030 // a new cache attached!
1031 if (vnode->cache != NULL)
1032 ((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1034 // The file system has removed the resources of the vnode now, so we can
1035 // make it available again (by removing the busy vnode from the hash).
1036 rw_lock_write_lock(&sVnodeLock);
1037 sVnodeTable->Remove(vnode);
1038 rw_lock_write_unlock(&sVnodeLock);
1040 // if we have a VMCache attached, remove it
1041 if (vnode->cache)
1042 vnode->cache->ReleaseRef();
1044 vnode->cache = NULL;
1046 remove_vnode_from_mount_list(vnode, vnode->mount);
1048 free(vnode);
1052 /*! \brief Decrements the reference counter of the given vnode and deletes it,
1053 if the counter dropped to 0.
1055 The caller must, of course, own a reference to the vnode to call this
1056 function.
1057 The caller must not hold the sVnodeLock or the sMountMutex.
1059 \param vnode the vnode.
1060 \param alwaysFree don't move this vnode into the unused list, but really
1061 delete it if possible.
1062 \param reenter \c true, if this function is called (indirectly) from within
1063 a file system. This will be passed to file system hooks only.
1064 \return \c B_OK, if everything went fine, an error code otherwise.
1066 static status_t
1067 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1069 ReadLocker locker(sVnodeLock);
1070 AutoLocker<Vnode> nodeLocker(vnode);
1072 int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1074 ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1076 TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1077 vnode->ref_count));
1079 if (oldRefCount != 1)
1080 return B_OK;
1082 if (vnode->IsBusy())
1083 panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1085 bool freeNode = false;
1086 bool freeUnusedNodes = false;
1088 // Just insert the vnode into an unused list if we don't need
1089 // to delete it
1090 if (vnode->IsRemoved() || alwaysFree) {
1091 vnode_to_be_freed(vnode);
1092 vnode->SetBusy(true);
1093 freeNode = true;
1094 } else
1095 freeUnusedNodes = vnode_unused(vnode);
1097 nodeLocker.Unlock();
1098 locker.Unlock();
1100 if (freeNode)
1101 free_vnode(vnode, reenter);
1102 else if (freeUnusedNodes)
1103 free_unused_vnodes();
1105 return B_OK;
1109 /*! \brief Increments the reference counter of the given vnode.
1111 The caller must make sure that the node isn't deleted while this function
1112 is called. This can be done either:
1113 - by ensuring that a reference to the node exists and remains in existence,
1115 - by holding the vnode's lock (which also requires read locking sVnodeLock)
1116 or by holding sVnodeLock write locked.
1118 In the second case the caller is responsible for dealing with the ref count
1119 0 -> 1 transition. That is 1. this function must not be invoked when the
1120 node is busy in the first place and 2. vnode_used() must be called for the
1121 node.
1123 \param vnode the vnode.
1125 static void
1126 inc_vnode_ref_count(struct vnode* vnode)
1128 atomic_add(&vnode->ref_count, 1);
1129 TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1130 vnode->ref_count));
1134 static bool
1135 is_special_node_type(int type)
1137 // at the moment only FIFOs are supported
1138 return S_ISFIFO(type);
1142 static status_t
1143 create_special_sub_node(struct vnode* vnode, uint32 flags)
1145 if (S_ISFIFO(vnode->Type()))
1146 return create_fifo_vnode(vnode->mount->volume, vnode);
1148 return B_BAD_VALUE;
1152 /*! \brief Retrieves a vnode for a given mount ID, node ID pair.
1154 If the node is not yet in memory, it will be loaded.
1156 The caller must not hold the sVnodeLock or the sMountMutex.
1158 \param mountID the mount ID.
1159 \param vnodeID the node ID.
1160 \param _vnode Pointer to a vnode* variable into which the pointer to the
1161 retrieved vnode structure shall be written.
1162 \param reenter \c true, if this function is called (indirectly) from within
1163 a file system.
1164 \return \c B_OK, if everything when fine, an error code otherwise.
1166 static status_t
1167 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1168 int reenter)
1170 FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1171 mountID, vnodeID, _vnode));
1173 rw_lock_read_lock(&sVnodeLock);
1175 int32 tries = BUSY_VNODE_RETRIES;
1176 restart:
1177 struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1178 AutoLocker<Vnode> nodeLocker(vnode);
1180 if (vnode && vnode->IsBusy()) {
1181 nodeLocker.Unlock();
1182 rw_lock_read_unlock(&sVnodeLock);
1183 if (!canWait) {
1184 dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1185 mountID, vnodeID);
1186 return B_BUSY;
1188 if (!retry_busy_vnode(tries, mountID, vnodeID))
1189 return B_BUSY;
1191 rw_lock_read_lock(&sVnodeLock);
1192 goto restart;
1195 TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1197 status_t status;
1199 if (vnode) {
1200 if (vnode->ref_count == 0) {
1201 // this vnode has been unused before
1202 vnode_used(vnode);
1204 inc_vnode_ref_count(vnode);
1206 nodeLocker.Unlock();
1207 rw_lock_read_unlock(&sVnodeLock);
1208 } else {
1209 // we need to create a new vnode and read it in
1210 rw_lock_read_unlock(&sVnodeLock);
1211 // unlock -- create_new_vnode_and_lock() write-locks on success
1212 bool nodeCreated;
1213 status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1214 nodeCreated);
1215 if (status != B_OK)
1216 return status;
1218 if (!nodeCreated) {
1219 rw_lock_read_lock(&sVnodeLock);
1220 rw_lock_write_unlock(&sVnodeLock);
1221 goto restart;
1224 rw_lock_write_unlock(&sVnodeLock);
1226 int type;
1227 uint32 flags;
1228 status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1229 &flags, reenter);
1230 if (status == B_OK && vnode->private_node == NULL)
1231 status = B_BAD_VALUE;
1233 bool gotNode = status == B_OK;
1234 bool publishSpecialSubNode = false;
1235 if (gotNode) {
1236 vnode->SetType(type);
1237 publishSpecialSubNode = is_special_node_type(type)
1238 && (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1241 if (gotNode && publishSpecialSubNode)
1242 status = create_special_sub_node(vnode, flags);
1244 if (status != B_OK) {
1245 if (gotNode)
1246 FS_CALL(vnode, put_vnode, reenter);
1248 rw_lock_write_lock(&sVnodeLock);
1249 sVnodeTable->Remove(vnode);
1250 remove_vnode_from_mount_list(vnode, vnode->mount);
1251 rw_lock_write_unlock(&sVnodeLock);
1253 free(vnode);
1254 return status;
1257 rw_lock_read_lock(&sVnodeLock);
1258 vnode->Lock();
1260 vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1261 vnode->SetBusy(false);
1263 vnode->Unlock();
1264 rw_lock_read_unlock(&sVnodeLock);
1267 TRACE(("get_vnode: returning %p\n", vnode));
1269 *_vnode = vnode;
1270 return B_OK;
1274 /*! \brief Decrements the reference counter of the given vnode and deletes it,
1275 if the counter dropped to 0.
1277 The caller must, of course, own a reference to the vnode to call this
1278 function.
1279 The caller must not hold the sVnodeLock or the sMountMutex.
1281 \param vnode the vnode.
1283 static inline void
1284 put_vnode(struct vnode* vnode)
1286 dec_vnode_ref_count(vnode, false, false);
1290 static void
1291 free_unused_vnodes(int32 level)
1293 unused_vnodes_check_started();
1295 if (level == B_NO_LOW_RESOURCE) {
1296 unused_vnodes_check_done();
1297 return;
1300 flush_hot_vnodes();
1302 // determine how many nodes to free
1303 uint32 count = 1;
1305 MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1307 switch (level) {
1308 case B_LOW_RESOURCE_NOTE:
1309 count = sUnusedVnodes / 100;
1310 break;
1311 case B_LOW_RESOURCE_WARNING:
1312 count = sUnusedVnodes / 10;
1313 break;
1314 case B_LOW_RESOURCE_CRITICAL:
1315 count = sUnusedVnodes;
1316 break;
1319 if (count > sUnusedVnodes)
1320 count = sUnusedVnodes;
1323 // Write back the modified pages of some unused vnodes and free them.
1325 for (uint32 i = 0; i < count; i++) {
1326 ReadLocker vnodesReadLocker(sVnodeLock);
1328 // get the first node
1329 MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1330 struct vnode* vnode = (struct vnode*)list_get_first_item(
1331 &sUnusedVnodeList);
1332 unusedVnodesLocker.Unlock();
1334 if (vnode == NULL)
1335 break;
1337 // lock the node
1338 AutoLocker<Vnode> nodeLocker(vnode);
1340 // Check whether the node is still unused -- since we only append to the
1341 // tail of the unused queue, the vnode should still be at its head.
1342 // Alternatively we could check its ref count for 0 and its busy flag,
1343 // but if the node is no longer at the head of the queue, it means it
1344 // has been touched in the meantime, i.e. it is no longer the least
1345 // recently used unused vnode and we rather don't free it.
1346 unusedVnodesLocker.Lock();
1347 if (vnode != list_get_first_item(&sUnusedVnodeList))
1348 continue;
1349 unusedVnodesLocker.Unlock();
1351 ASSERT(!vnode->IsBusy());
1353 // grab a reference
1354 inc_vnode_ref_count(vnode);
1355 vnode_used(vnode);
1357 // write back changes and free the node
1358 nodeLocker.Unlock();
1359 vnodesReadLocker.Unlock();
1361 if (vnode->cache != NULL)
1362 vnode->cache->WriteModified();
1364 dec_vnode_ref_count(vnode, true, false);
1365 // this should free the vnode when it's still unused
1368 unused_vnodes_check_done();
1372 /*! Gets the vnode the given vnode is covering.
1374 The caller must have \c sVnodeLock read-locked at least.
1376 The function returns a reference to the retrieved vnode (if any), the caller
1377 is responsible to free.
1379 \param vnode The vnode whose covered node shall be returned.
1380 \return The covered vnode, or \c NULL if the given vnode doesn't cover any
1381 vnode.
1383 static inline Vnode*
1384 get_covered_vnode_locked(Vnode* vnode)
1386 if (Vnode* coveredNode = vnode->covers) {
1387 while (coveredNode->covers != NULL)
1388 coveredNode = coveredNode->covers;
1390 inc_vnode_ref_count(coveredNode);
1391 return coveredNode;
1394 return NULL;
1398 /*! Gets the vnode the given vnode is covering.
1400 The caller must not hold \c sVnodeLock. Note that this implies a race
1401 condition, since the situation can change at any time.
1403 The function returns a reference to the retrieved vnode (if any), the caller
1404 is responsible to free.
1406 \param vnode The vnode whose covered node shall be returned.
1407 \return The covered vnode, or \c NULL if the given vnode doesn't cover any
1408 vnode.
1410 static inline Vnode*
1411 get_covered_vnode(Vnode* vnode)
1413 if (!vnode->IsCovering())
1414 return NULL;
1416 ReadLocker vnodeReadLocker(sVnodeLock);
1417 return get_covered_vnode_locked(vnode);
1421 /*! Gets the vnode the given vnode is covered by.
1423 The caller must have \c sVnodeLock read-locked at least.
1425 The function returns a reference to the retrieved vnode (if any), the caller
1426 is responsible to free.
1428 \param vnode The vnode whose covering node shall be returned.
1429 \return The covering vnode, or \c NULL if the given vnode isn't covered by
1430 any vnode.
1432 static Vnode*
1433 get_covering_vnode_locked(Vnode* vnode)
1435 if (Vnode* coveringNode = vnode->covered_by) {
1436 while (coveringNode->covered_by != NULL)
1437 coveringNode = coveringNode->covered_by;
1439 inc_vnode_ref_count(coveringNode);
1440 return coveringNode;
1443 return NULL;
1447 /*! Gets the vnode the given vnode is covered by.
1449 The caller must not hold \c sVnodeLock. Note that this implies a race
1450 condition, since the situation can change at any time.
1452 The function returns a reference to the retrieved vnode (if any), the caller
1453 is responsible to free.
1455 \param vnode The vnode whose covering node shall be returned.
1456 \return The covering vnode, or \c NULL if the given vnode isn't covered by
1457 any vnode.
1459 static inline Vnode*
1460 get_covering_vnode(Vnode* vnode)
1462 if (!vnode->IsCovered())
1463 return NULL;
1465 ReadLocker vnodeReadLocker(sVnodeLock);
1466 return get_covering_vnode_locked(vnode);
1470 static void
1471 free_unused_vnodes()
1473 free_unused_vnodes(
1474 low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1475 | B_KERNEL_RESOURCE_ADDRESS_SPACE));
1479 static void
1480 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1482 TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1484 free_unused_vnodes(level);
1488 static inline void
1489 put_advisory_locking(struct advisory_locking* locking)
1491 release_sem(locking->lock);
1495 /*! Returns the advisory_locking object of the \a vnode in case it
1496 has one, and locks it.
1497 You have to call put_advisory_locking() when you're done with
1499 Note, you must not have the vnode mutex locked when calling
1500 this function.
1502 static struct advisory_locking*
1503 get_advisory_locking(struct vnode* vnode)
1505 rw_lock_read_lock(&sVnodeLock);
1506 vnode->Lock();
1508 struct advisory_locking* locking = vnode->advisory_locking;
1509 sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1511 vnode->Unlock();
1512 rw_lock_read_unlock(&sVnodeLock);
1514 if (lock >= 0)
1515 lock = acquire_sem(lock);
1516 if (lock < 0) {
1517 // This means the locking has been deleted in the mean time
1518 // or had never existed in the first place - otherwise, we
1519 // would get the lock at some point.
1520 return NULL;
1523 return locking;
1527 /*! Creates a locked advisory_locking object, and attaches it to the
1528 given \a vnode.
1529 Returns B_OK in case of success - also if the vnode got such an
1530 object from someone else in the mean time, you'll still get this
1531 one locked then.
1533 static status_t
1534 create_advisory_locking(struct vnode* vnode)
1536 if (vnode == NULL)
1537 return B_FILE_ERROR;
1539 ObjectDeleter<advisory_locking> lockingDeleter;
1540 struct advisory_locking* locking = NULL;
1542 while (get_advisory_locking(vnode) == NULL) {
1543 // no locking object set on the vnode yet, create one
1544 if (locking == NULL) {
1545 locking = new(std::nothrow) advisory_locking;
1546 if (locking == NULL)
1547 return B_NO_MEMORY;
1548 lockingDeleter.SetTo(locking);
1550 locking->wait_sem = create_sem(0, "advisory lock");
1551 if (locking->wait_sem < 0)
1552 return locking->wait_sem;
1554 locking->lock = create_sem(0, "advisory locking");
1555 if (locking->lock < 0)
1556 return locking->lock;
1559 // set our newly created locking object
1560 ReadLocker _(sVnodeLock);
1561 AutoLocker<Vnode> nodeLocker(vnode);
1562 if (vnode->advisory_locking == NULL) {
1563 vnode->advisory_locking = locking;
1564 lockingDeleter.Detach();
1565 return B_OK;
1569 // The vnode already had a locking object. That's just as well.
1571 return B_OK;
1575 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1576 with the advisory_lock \a lock.
1578 static bool
1579 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1581 if (flock == NULL)
1582 return true;
1584 return lock->start <= flock->l_start - 1 + flock->l_len
1585 && lock->end >= flock->l_start;
1589 /*! Tests whether acquiring a lock would block.
1591 static status_t
1592 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1594 flock->l_type = F_UNLCK;
1596 struct advisory_locking* locking = get_advisory_locking(vnode);
1597 if (locking == NULL)
1598 return B_OK;
1600 team_id team = team_get_current_team_id();
1602 LockList::Iterator iterator = locking->locks.GetIterator();
1603 while (iterator.HasNext()) {
1604 struct advisory_lock* lock = iterator.Next();
1606 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1607 // locks do overlap
1608 if (flock->l_type != F_RDLCK || !lock->shared) {
1609 // collision
1610 flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1611 flock->l_whence = SEEK_SET;
1612 flock->l_start = lock->start;
1613 flock->l_len = lock->end - lock->start + 1;
1614 flock->l_pid = lock->team;
1615 break;
1620 put_advisory_locking(locking);
1621 return B_OK;
1625 /*! Removes the specified lock, or all locks of the calling team
1626 if \a flock is NULL.
1628 static status_t
1629 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1631 FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1633 struct advisory_locking* locking = get_advisory_locking(vnode);
1634 if (locking == NULL)
1635 return B_OK;
1637 // TODO: use the thread ID instead??
1638 team_id team = team_get_current_team_id();
1639 pid_t session = thread_get_current_thread()->team->session_id;
1641 // find matching lock entries
1643 LockList::Iterator iterator = locking->locks.GetIterator();
1644 while (iterator.HasNext()) {
1645 struct advisory_lock* lock = iterator.Next();
1646 bool removeLock = false;
1648 if (lock->session == session)
1649 removeLock = true;
1650 else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1651 bool endsBeyond = false;
1652 bool startsBefore = false;
1653 if (flock != NULL) {
1654 startsBefore = lock->start < flock->l_start;
1655 endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1658 if (!startsBefore && !endsBeyond) {
1659 // lock is completely contained in flock
1660 removeLock = true;
1661 } else if (startsBefore && !endsBeyond) {
1662 // cut the end of the lock
1663 lock->end = flock->l_start - 1;
1664 } else if (!startsBefore && endsBeyond) {
1665 // cut the start of the lock
1666 lock->start = flock->l_start + flock->l_len;
1667 } else {
1668 // divide the lock into two locks
1669 struct advisory_lock* secondLock = new advisory_lock;
1670 if (secondLock == NULL) {
1671 // TODO: we should probably revert the locks we already
1672 // changed... (ie. allocate upfront)
1673 put_advisory_locking(locking);
1674 return B_NO_MEMORY;
1677 lock->end = flock->l_start - 1;
1679 secondLock->team = lock->team;
1680 secondLock->session = lock->session;
1681 // values must already be normalized when getting here
1682 secondLock->start = flock->l_start + flock->l_len;
1683 secondLock->end = lock->end;
1684 secondLock->shared = lock->shared;
1686 locking->locks.Add(secondLock);
1690 if (removeLock) {
1691 // this lock is no longer used
1692 iterator.Remove();
1693 free(lock);
1697 bool removeLocking = locking->locks.IsEmpty();
1698 release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1700 put_advisory_locking(locking);
1702 if (removeLocking) {
1703 // We can remove the whole advisory locking structure; it's no
1704 // longer used
1705 locking = get_advisory_locking(vnode);
1706 if (locking != NULL) {
1707 ReadLocker locker(sVnodeLock);
1708 AutoLocker<Vnode> nodeLocker(vnode);
1710 // the locking could have been changed in the mean time
1711 if (locking->locks.IsEmpty()) {
1712 vnode->advisory_locking = NULL;
1713 nodeLocker.Unlock();
1714 locker.Unlock();
1716 // we've detached the locking from the vnode, so we can
1717 // safely delete it
1718 delete locking;
1719 } else {
1720 // the locking is in use again
1721 nodeLocker.Unlock();
1722 locker.Unlock();
1723 release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1728 return B_OK;
1732 /*! Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1733 will wait for the lock to become available, if there are any collisions
1734 (it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1736 If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1737 BSD flock() semantics are used, that is, all children can unlock the file
1738 in question (we even allow parents to remove the lock, though, but that
1739 seems to be in line to what the BSD's are doing).
1741 static status_t
1742 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1743 bool wait)
1745 FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1746 vnode, flock, wait ? "yes" : "no"));
1748 bool shared = flock->l_type == F_RDLCK;
1749 status_t status = B_OK;
1751 // TODO: do deadlock detection!
1753 struct advisory_locking* locking;
1755 while (true) {
1756 // if this vnode has an advisory_locking structure attached,
1757 // lock that one and search for any colliding file lock
1758 status = create_advisory_locking(vnode);
1759 if (status != B_OK)
1760 return status;
1762 locking = vnode->advisory_locking;
1763 team_id team = team_get_current_team_id();
1764 sem_id waitForLock = -1;
1766 // test for collisions
1767 LockList::Iterator iterator = locking->locks.GetIterator();
1768 while (iterator.HasNext()) {
1769 struct advisory_lock* lock = iterator.Next();
1771 // TODO: locks from the same team might be joinable!
1772 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1773 // locks do overlap
1774 if (!shared || !lock->shared) {
1775 // we need to wait
1776 waitForLock = locking->wait_sem;
1777 break;
1782 if (waitForLock < 0)
1783 break;
1785 // We need to wait. Do that or fail now, if we've been asked not to.
1787 if (!wait) {
1788 put_advisory_locking(locking);
1789 return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1792 status = switch_sem_etc(locking->lock, waitForLock, 1,
1793 B_CAN_INTERRUPT, 0);
1794 if (status != B_OK && status != B_BAD_SEM_ID)
1795 return status;
1797 // We have been notified, but we need to re-lock the locking object. So
1798 // go another round...
1801 // install new lock
1803 struct advisory_lock* lock = (struct advisory_lock*)malloc(
1804 sizeof(struct advisory_lock));
1805 if (lock == NULL) {
1806 put_advisory_locking(locking);
1807 return B_NO_MEMORY;
1810 lock->team = team_get_current_team_id();
1811 lock->session = session;
1812 // values must already be normalized when getting here
1813 lock->start = flock->l_start;
1814 lock->end = flock->l_start - 1 + flock->l_len;
1815 lock->shared = shared;
1817 locking->locks.Add(lock);
1818 put_advisory_locking(locking);
1820 return status;
1824 /*! Normalizes the \a flock structure to make it easier to compare the
1825 structure with others. The l_start and l_len fields are set to absolute
1826 values according to the l_whence field.
1828 static status_t
1829 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1831 switch (flock->l_whence) {
1832 case SEEK_SET:
1833 break;
1834 case SEEK_CUR:
1835 flock->l_start += descriptor->pos;
1836 break;
1837 case SEEK_END:
1839 struct vnode* vnode = descriptor->u.vnode;
1840 struct stat stat;
1841 status_t status;
1843 if (!HAS_FS_CALL(vnode, read_stat))
1844 return B_UNSUPPORTED;
1846 status = FS_CALL(vnode, read_stat, &stat);
1847 if (status != B_OK)
1848 return status;
1850 flock->l_start += stat.st_size;
1851 break;
1853 default:
1854 return B_BAD_VALUE;
1857 if (flock->l_start < 0)
1858 flock->l_start = 0;
1859 if (flock->l_len == 0)
1860 flock->l_len = OFF_MAX;
1862 // don't let the offset and length overflow
1863 if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1864 flock->l_len = OFF_MAX - flock->l_start;
1866 if (flock->l_len < 0) {
1867 // a negative length reverses the region
1868 flock->l_start += flock->l_len;
1869 flock->l_len = -flock->l_len;
1872 return B_OK;
1876 static void
1877 replace_vnode_if_disconnected(struct fs_mount* mount,
1878 struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1879 struct vnode* fallBack, bool lockRootLock)
1881 struct vnode* givenVnode = vnode;
1882 bool vnodeReplaced = false;
1884 ReadLocker vnodeReadLocker(sVnodeLock);
1886 if (lockRootLock)
1887 mutex_lock(&sIOContextRootLock);
1889 while (vnode != NULL && vnode->mount == mount
1890 && (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1891 if (vnode->covers != NULL) {
1892 // redirect the vnode to the covered vnode
1893 vnode = vnode->covers;
1894 } else
1895 vnode = fallBack;
1897 vnodeReplaced = true;
1900 // If we've replaced the node, grab a reference for the new one.
1901 if (vnodeReplaced && vnode != NULL)
1902 inc_vnode_ref_count(vnode);
1904 if (lockRootLock)
1905 mutex_unlock(&sIOContextRootLock);
1907 vnodeReadLocker.Unlock();
1909 if (vnodeReplaced)
1910 put_vnode(givenVnode);
1914 /*! Disconnects all file descriptors that are associated with the
1915 \a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1916 \a mount object.
1918 Note, after you've called this function, there might still be ongoing
1919 accesses - they won't be interrupted if they already happened before.
1920 However, any subsequent access will fail.
1922 This is not a cheap function and should be used with care and rarely.
1923 TODO: there is currently no means to stop a blocking read/write!
1925 static void
1926 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1927 struct vnode* vnodeToDisconnect)
1929 // iterate over all teams and peek into their file descriptors
1930 TeamListIterator teamIterator;
1931 while (Team* team = teamIterator.Next()) {
1932 BReference<Team> teamReference(team, true);
1934 // lock the I/O context
1935 io_context* context = team->io_context;
1936 MutexLocker contextLocker(context->io_mutex);
1938 replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1939 sRoot, true);
1940 replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1941 sRoot, false);
1943 for (uint32 i = 0; i < context->table_size; i++) {
1944 if (struct file_descriptor* descriptor = context->fds[i]) {
1945 inc_fd_ref_count(descriptor);
1947 // if this descriptor points at this mount, we
1948 // need to disconnect it to be able to unmount
1949 struct vnode* vnode = fd_vnode(descriptor);
1950 if (vnodeToDisconnect != NULL) {
1951 if (vnode == vnodeToDisconnect)
1952 disconnect_fd(descriptor);
1953 } else if ((vnode != NULL && vnode->mount == mount)
1954 || (vnode == NULL && descriptor->u.mount == mount))
1955 disconnect_fd(descriptor);
1957 put_fd(descriptor);
1964 /*! \brief Gets the root node of the current IO context.
1965 If \a kernel is \c true, the kernel IO context will be used.
1966 The caller obtains a reference to the returned node.
1968 struct vnode*
1969 get_root_vnode(bool kernel)
1971 if (!kernel) {
1972 // Get current working directory from io context
1973 struct io_context* context = get_current_io_context(kernel);
1975 mutex_lock(&sIOContextRootLock);
1977 struct vnode* root = context->root;
1978 if (root != NULL)
1979 inc_vnode_ref_count(root);
1981 mutex_unlock(&sIOContextRootLock);
1983 if (root != NULL)
1984 return root;
1986 // That should never happen.
1987 dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1988 "have a root\n", team_get_current_team_id());
1991 inc_vnode_ref_count(sRoot);
1992 return sRoot;
1996 /*! \brief Gets the directory path and leaf name for a given path.
1998 The supplied \a path is transformed to refer to the directory part of
1999 the entry identified by the original path, and into the buffer \a filename
2000 the leaf name of the original entry is written.
2001 Neither the returned path nor the leaf name can be expected to be
2002 canonical.
2004 \param path The path to be analyzed. Must be able to store at least one
2005 additional character.
2006 \param filename The buffer into which the leaf name will be written.
2007 Must be of size B_FILE_NAME_LENGTH at least.
2008 \return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2009 name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2010 if the given path name is empty.
2012 static status_t
2013 get_dir_path_and_leaf(char* path, char* filename)
2015 if (*path == '\0')
2016 return B_ENTRY_NOT_FOUND;
2018 char* last = strrchr(path, '/');
2019 // '/' are not allowed in file names!
2021 FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2023 if (last == NULL) {
2024 // this path is single segment with no '/' in it
2025 // ex. "foo"
2026 if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2027 return B_NAME_TOO_LONG;
2029 strcpy(path, ".");
2030 } else {
2031 last++;
2032 if (last[0] == '\0') {
2033 // special case: the path ends in one or more '/' - remove them
2034 while (*--last == '/' && last != path);
2035 last[1] = '\0';
2037 if (last == path && last[0] == '/') {
2038 // This path points to the root of the file system
2039 strcpy(filename, ".");
2040 return B_OK;
2042 for (; last != path && *(last - 1) != '/'; last--);
2043 // rewind to the start of the leaf before the '/'
2046 // normal leaf: replace the leaf portion of the path with a '.'
2047 if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2048 return B_NAME_TOO_LONG;
2050 last[0] = '.';
2051 last[1] = '\0';
2053 return B_OK;
2057 static status_t
2058 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2059 bool traverse, bool kernel, struct vnode** _vnode)
2061 char clonedName[B_FILE_NAME_LENGTH + 1];
2062 if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2063 return B_NAME_TOO_LONG;
2065 // get the directory vnode and let vnode_path_to_vnode() do the rest
2066 struct vnode* directory;
2068 status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2069 if (status < 0)
2070 return status;
2072 return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2073 _vnode, NULL);
2077 /*! Looks up the entry with name \a name in the directory represented by \a dir
2078 and returns the respective vnode.
2079 On success a reference to the vnode is acquired for the caller.
2081 static status_t
2082 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2084 ino_t id;
2085 bool missing;
2087 if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2088 return missing ? B_ENTRY_NOT_FOUND
2089 : get_vnode(dir->device, id, _vnode, true, false);
2092 status_t status = FS_CALL(dir, lookup, name, &id);
2093 if (status != B_OK)
2094 return status;
2096 // The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2097 // have a reference and just need to look the node up.
2098 rw_lock_read_lock(&sVnodeLock);
2099 *_vnode = lookup_vnode(dir->device, id);
2100 rw_lock_read_unlock(&sVnodeLock);
2102 if (*_vnode == NULL) {
2103 panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2104 " vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2105 return B_ENTRY_NOT_FOUND;
2108 // ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2109 // "%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2110 // (*_vnode)->mount->id, (*_vnode)->id);
2112 return B_OK;
2116 /*! Returns the vnode for the relative path starting at the specified \a vnode.
2117 \a path must not be NULL.
2118 If it returns successfully, \a path contains the name of the last path
2119 component. This function clobbers the buffer pointed to by \a path only
2120 if it does contain more than one component.
2121 Note, this reduces the ref_count of the starting \a vnode, no matter if
2122 it is successful or not!
2124 static status_t
2125 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2126 int count, struct io_context* ioContext, struct vnode** _vnode,
2127 ino_t* _parentID)
2129 status_t status = B_OK;
2130 ino_t lastParentID = vnode->id;
2132 FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2134 if (path == NULL) {
2135 put_vnode(vnode);
2136 return B_BAD_VALUE;
2139 if (*path == '\0') {
2140 put_vnode(vnode);
2141 return B_ENTRY_NOT_FOUND;
2144 while (true) {
2145 struct vnode* nextVnode;
2146 char* nextPath;
2148 TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2149 path));
2151 // done?
2152 if (path[0] == '\0')
2153 break;
2155 // walk to find the next path component ("path" will point to a single
2156 // path component), and filter out multiple slashes
2157 for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2158 nextPath++);
2160 if (*nextPath == '/') {
2161 *nextPath = '\0';
2163 nextPath++;
2164 while (*nextPath == '/');
2167 // See if the '..' is at a covering vnode move to the covered
2168 // vnode so we pass the '..' path to the underlying filesystem.
2169 // Also prevent breaking the root of the IO context.
2170 if (strcmp("..", path) == 0) {
2171 if (vnode == ioContext->root) {
2172 // Attempted prison break! Keep it contained.
2173 path = nextPath;
2174 continue;
2177 if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2178 nextVnode = coveredVnode;
2179 put_vnode(vnode);
2180 vnode = nextVnode;
2184 // check if vnode is really a directory
2185 if (status == B_OK && !S_ISDIR(vnode->Type()))
2186 status = B_NOT_A_DIRECTORY;
2188 // Check if we have the right to search the current directory vnode.
2189 // If a file system doesn't have the access() function, we assume that
2190 // searching a directory is always allowed
2191 if (status == B_OK && HAS_FS_CALL(vnode, access))
2192 status = FS_CALL(vnode, access, X_OK);
2194 // Tell the filesystem to get the vnode of this path component (if we
2195 // got the permission from the call above)
2196 if (status == B_OK)
2197 status = lookup_dir_entry(vnode, path, &nextVnode);
2199 if (status != B_OK) {
2200 put_vnode(vnode);
2201 return status;
2204 // If the new node is a symbolic link, resolve it (if we've been told
2205 // to do it)
2206 if (S_ISLNK(nextVnode->Type())
2207 && (traverseLeafLink || nextPath[0] != '\0')) {
2208 size_t bufferSize;
2209 char* buffer;
2211 TRACE(("traverse link\n"));
2213 // it's not exactly nice style using goto in this way, but hey,
2214 // it works :-/
2215 if (count + 1 > B_MAX_SYMLINKS) {
2216 status = B_LINK_LIMIT;
2217 goto resolve_link_error;
2220 buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2221 if (buffer == NULL) {
2222 status = B_NO_MEMORY;
2223 goto resolve_link_error;
2226 if (HAS_FS_CALL(nextVnode, read_symlink)) {
2227 bufferSize--;
2228 status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2229 // null-terminate
2230 if (status >= 0)
2231 buffer[bufferSize] = '\0';
2232 } else
2233 status = B_BAD_VALUE;
2235 if (status != B_OK) {
2236 free(buffer);
2238 resolve_link_error:
2239 put_vnode(vnode);
2240 put_vnode(nextVnode);
2242 return status;
2244 put_vnode(nextVnode);
2246 // Check if we start from the root directory or the current
2247 // directory ("vnode" still points to that one).
2248 // Cut off all leading slashes if it's the root directory
2249 path = buffer;
2250 bool absoluteSymlink = false;
2251 if (path[0] == '/') {
2252 // we don't need the old directory anymore
2253 put_vnode(vnode);
2255 while (*++path == '/')
2258 mutex_lock(&sIOContextRootLock);
2259 vnode = ioContext->root;
2260 inc_vnode_ref_count(vnode);
2261 mutex_unlock(&sIOContextRootLock);
2263 absoluteSymlink = true;
2266 inc_vnode_ref_count(vnode);
2267 // balance the next recursion - we will decrement the
2268 // ref_count of the vnode, no matter if we succeeded or not
2270 if (absoluteSymlink && *path == '\0') {
2271 // symlink was just "/"
2272 nextVnode = vnode;
2273 } else {
2274 status = vnode_path_to_vnode(vnode, path, true, count + 1,
2275 ioContext, &nextVnode, &lastParentID);
2278 free(buffer);
2280 if (status != B_OK) {
2281 put_vnode(vnode);
2282 return status;
2284 } else
2285 lastParentID = vnode->id;
2287 // decrease the ref count on the old dir we just looked up into
2288 put_vnode(vnode);
2290 path = nextPath;
2291 vnode = nextVnode;
2293 // see if we hit a covered node
2294 if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2295 put_vnode(vnode);
2296 vnode = coveringNode;
2300 *_vnode = vnode;
2301 if (_parentID)
2302 *_parentID = lastParentID;
2304 return B_OK;
2308 static status_t
2309 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2310 int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2312 return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2313 get_current_io_context(kernel), _vnode, _parentID);
2317 static status_t
2318 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2319 ino_t* _parentID, bool kernel)
2321 struct vnode* start = NULL;
2323 FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2325 if (!path)
2326 return B_BAD_VALUE;
2328 if (*path == '\0')
2329 return B_ENTRY_NOT_FOUND;
2331 // figure out if we need to start at root or at cwd
2332 if (*path == '/') {
2333 if (sRoot == NULL) {
2334 // we're a bit early, aren't we?
2335 return B_ERROR;
2338 while (*++path == '/')
2340 start = get_root_vnode(kernel);
2342 if (*path == '\0') {
2343 *_vnode = start;
2344 return B_OK;
2347 } else {
2348 struct io_context* context = get_current_io_context(kernel);
2350 mutex_lock(&context->io_mutex);
2351 start = context->cwd;
2352 if (start != NULL)
2353 inc_vnode_ref_count(start);
2354 mutex_unlock(&context->io_mutex);
2356 if (start == NULL)
2357 return B_ERROR;
2360 return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2361 _parentID);
2365 /*! Returns the vnode in the next to last segment of the path, and returns
2366 the last portion in filename.
2367 The path buffer must be able to store at least one additional character.
2369 static status_t
2370 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2371 bool kernel)
2373 status_t status = get_dir_path_and_leaf(path, filename);
2374 if (status != B_OK)
2375 return status;
2377 return path_to_vnode(path, true, _vnode, NULL, kernel);
2381 /*! \brief Retrieves the directory vnode and the leaf name of an entry referred
2382 to by a FD + path pair.
2384 \a path must be given in either case. \a fd might be omitted, in which
2385 case \a path is either an absolute path or one relative to the current
2386 directory. If both a supplied and \a path is relative it is reckoned off
2387 of the directory referred to by \a fd. If \a path is absolute \a fd is
2388 ignored.
2390 The caller has the responsibility to call put_vnode() on the returned
2391 directory vnode.
2393 \param fd The FD. May be < 0.
2394 \param path The absolute or relative path. Must not be \c NULL. The buffer
2395 is modified by this function. It must have at least room for a
2396 string one character longer than the path it contains.
2397 \param _vnode A pointer to a variable the directory vnode shall be written
2398 into.
2399 \param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2400 the leaf name of the specified entry will be written.
2401 \param kernel \c true, if invoked from inside the kernel, \c false if
2402 invoked from userland.
2403 \return \c B_OK, if everything went fine, another error code otherwise.
2405 static status_t
2406 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2407 char* filename, bool kernel)
2409 if (!path)
2410 return B_BAD_VALUE;
2411 if (*path == '\0')
2412 return B_ENTRY_NOT_FOUND;
2413 if (fd < 0)
2414 return path_to_dir_vnode(path, _vnode, filename, kernel);
2416 status_t status = get_dir_path_and_leaf(path, filename);
2417 if (status != B_OK)
2418 return status;
2420 return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2424 /*! \brief Retrieves the directory vnode and the leaf name of an entry referred
2425 to by a vnode + path pair.
2427 \a path must be given in either case. \a vnode might be omitted, in which
2428 case \a path is either an absolute path or one relative to the current
2429 directory. If both a supplied and \a path is relative it is reckoned off
2430 of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2431 ignored.
2433 The caller has the responsibility to call put_vnode() on the returned
2434 directory vnode.
2436 \param vnode The vnode. May be \c NULL.
2437 \param path The absolute or relative path. Must not be \c NULL. The buffer
2438 is modified by this function. It must have at least room for a
2439 string one character longer than the path it contains.
2440 \param _vnode A pointer to a variable the directory vnode shall be written
2441 into.
2442 \param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2443 the leaf name of the specified entry will be written.
2444 \param kernel \c true, if invoked from inside the kernel, \c false if
2445 invoked from userland.
2446 \return \c B_OK, if everything went fine, another error code otherwise.
2448 static status_t
2449 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2450 struct vnode** _vnode, char* filename, bool kernel)
2452 if (!path)
2453 return B_BAD_VALUE;
2454 if (*path == '\0')
2455 return B_ENTRY_NOT_FOUND;
2456 if (vnode == NULL || path[0] == '/')
2457 return path_to_dir_vnode(path, _vnode, filename, kernel);
2459 status_t status = get_dir_path_and_leaf(path, filename);
2460 if (status != B_OK)
2461 return status;
2463 inc_vnode_ref_count(vnode);
2464 // vnode_path_to_vnode() always decrements the ref count
2466 return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2470 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2472 static status_t
2473 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2474 size_t bufferSize, struct io_context* ioContext)
2476 if (bufferSize < sizeof(struct dirent))
2477 return B_BAD_VALUE;
2479 // See if the vnode is covering another vnode and move to the covered
2480 // vnode so we get the underlying file system
2481 VNodePutter vnodePutter;
2482 if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2483 vnode = coveredVnode;
2484 vnodePutter.SetTo(vnode);
2487 if (HAS_FS_CALL(vnode, get_vnode_name)) {
2488 // The FS supports getting the name of a vnode.
2489 if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2490 (char*)buffer + bufferSize - buffer->d_name) == B_OK)
2491 return B_OK;
2494 // The FS doesn't support getting the name of a vnode. So we search the
2495 // parent directory for the vnode, if the caller let us.
2497 if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2498 return B_UNSUPPORTED;
2500 void* cookie;
2502 status_t status = FS_CALL(parent, open_dir, &cookie);
2503 if (status >= B_OK) {
2504 while (true) {
2505 uint32 num = 1;
2506 // We use the FS hook directly instead of dir_read(), since we don't
2507 // want the entries to be fixed. We have already resolved vnode to
2508 // the covered node.
2509 status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2510 &num);
2511 if (status != B_OK)
2512 break;
2513 if (num == 0) {
2514 status = B_ENTRY_NOT_FOUND;
2515 break;
2518 if (vnode->id == buffer->d_ino) {
2519 // found correct entry!
2520 break;
2524 FS_CALL(parent, close_dir, cookie);
2525 FS_CALL(parent, free_dir_cookie, cookie);
2527 return status;
2531 static status_t
2532 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2533 size_t nameSize, bool kernel)
2535 char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2536 struct dirent* dirent = (struct dirent*)buffer;
2538 status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2539 get_current_io_context(kernel));
2540 if (status != B_OK)
2541 return status;
2543 if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2544 return B_BUFFER_OVERFLOW;
2546 return B_OK;
2550 /*! Gets the full path to a given directory vnode.
2551 It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2552 file system doesn't support this call, it will fall back to iterating
2553 through the parent directory to get the name of the child.
2555 To protect against circular loops, it supports a maximum tree depth
2556 of 256 levels.
2558 Note that the path may not be correct the time this function returns!
2559 It doesn't use any locking to prevent returning the correct path, as
2560 paths aren't safe anyway: the path to a file can change at any time.
2562 It might be a good idea, though, to check if the returned path exists
2563 in the calling function (it's not done here because of efficiency)
2565 static status_t
2566 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2567 bool kernel)
2569 FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2571 if (vnode == NULL || buffer == NULL || bufferSize == 0)
2572 return B_BAD_VALUE;
2574 if (!S_ISDIR(vnode->Type()))
2575 return B_NOT_A_DIRECTORY;
2577 char* path = buffer;
2578 int32 insert = bufferSize;
2579 int32 maxLevel = 256;
2580 int32 length;
2581 status_t status = B_OK;
2582 struct io_context* ioContext = get_current_io_context(kernel);
2584 // we don't use get_vnode() here because this call is more
2585 // efficient and does all we need from get_vnode()
2586 inc_vnode_ref_count(vnode);
2588 path[--insert] = '\0';
2589 // the path is filled right to left
2591 while (true) {
2592 // If the node is the context's root, bail out. Otherwise resolve mount
2593 // points.
2594 if (vnode == ioContext->root)
2595 break;
2597 if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2598 put_vnode(vnode);
2599 vnode = coveredVnode;
2602 // lookup the parent vnode
2603 struct vnode* parentVnode;
2604 status = lookup_dir_entry(vnode, "..", &parentVnode);
2605 if (status != B_OK)
2606 goto out;
2608 if (parentVnode == vnode) {
2609 // The caller apparently got their hands on a node outside of their
2610 // context's root. Now we've hit the global root.
2611 put_vnode(parentVnode);
2612 break;
2615 // get the node's name
2616 char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2617 // also used for fs_read_dir()
2618 char* name = &((struct dirent*)nameBuffer)->d_name[0];
2619 status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2620 sizeof(nameBuffer), ioContext);
2622 // release the current vnode, we only need its parent from now on
2623 put_vnode(vnode);
2624 vnode = parentVnode;
2626 if (status != B_OK)
2627 goto out;
2629 // TODO: add an explicit check for loops in about 10 levels to do
2630 // real loop detection
2632 // don't go deeper as 'maxLevel' to prevent circular loops
2633 if (maxLevel-- < 0) {
2634 status = B_LINK_LIMIT;
2635 goto out;
2638 // add the name in front of the current path
2639 name[B_FILE_NAME_LENGTH - 1] = '\0';
2640 length = strlen(name);
2641 insert -= length;
2642 if (insert <= 0) {
2643 status = B_RESULT_NOT_REPRESENTABLE;
2644 goto out;
2646 memcpy(path + insert, name, length);
2647 path[--insert] = '/';
2650 // the root dir will result in an empty path: fix it
2651 if (path[insert] == '\0')
2652 path[--insert] = '/';
2654 TRACE((" path is: %s\n", path + insert));
2656 // move the path to the start of the buffer
2657 length = bufferSize - insert;
2658 memmove(buffer, path + insert, length);
2660 out:
2661 put_vnode(vnode);
2662 return status;
2666 /*! Checks the length of every path component, and adds a '.'
2667 if the path ends in a slash.
2668 The given path buffer must be able to store at least one
2669 additional character.
2671 static status_t
2672 check_path(char* to)
2674 int32 length = 0;
2676 // check length of every path component
2678 while (*to) {
2679 char* begin;
2680 if (*to == '/')
2681 to++, length++;
2683 begin = to;
2684 while (*to != '/' && *to)
2685 to++, length++;
2687 if (to - begin > B_FILE_NAME_LENGTH)
2688 return B_NAME_TOO_LONG;
2691 if (length == 0)
2692 return B_ENTRY_NOT_FOUND;
2694 // complete path if there is a slash at the end
2696 if (*(to - 1) == '/') {
2697 if (length > B_PATH_NAME_LENGTH - 2)
2698 return B_NAME_TOO_LONG;
2700 to[0] = '.';
2701 to[1] = '\0';
2704 return B_OK;
2708 static struct file_descriptor*
2709 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2711 struct file_descriptor* descriptor
2712 = get_fd(get_current_io_context(kernel), fd);
2713 if (descriptor == NULL)
2714 return NULL;
2716 struct vnode* vnode = fd_vnode(descriptor);
2717 if (vnode == NULL) {
2718 put_fd(descriptor);
2719 return NULL;
2722 // ToDo: when we can close a file descriptor at any point, investigate
2723 // if this is still valid to do (accessing the vnode without ref_count
2724 // or locking)
2725 *_vnode = vnode;
2726 return descriptor;
2730 static struct vnode*
2731 get_vnode_from_fd(int fd, bool kernel)
2733 struct file_descriptor* descriptor;
2734 struct vnode* vnode;
2736 descriptor = get_fd(get_current_io_context(kernel), fd);
2737 if (descriptor == NULL)
2738 return NULL;
2740 vnode = fd_vnode(descriptor);
2741 if (vnode != NULL)
2742 inc_vnode_ref_count(vnode);
2744 put_fd(descriptor);
2745 return vnode;
2749 /*! Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2750 only the path will be considered. In this case, the \a path must not be
2751 NULL.
2752 If \a fd is a valid file descriptor, \a path may be NULL for directories,
2753 and should be NULL for files.
2755 static status_t
2756 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2757 struct vnode** _vnode, ino_t* _parentID, bool kernel)
2759 if (fd < 0 && !path)
2760 return B_BAD_VALUE;
2762 if (path != NULL && *path == '\0')
2763 return B_ENTRY_NOT_FOUND;
2765 if (fd < 0 || (path != NULL && path[0] == '/')) {
2766 // no FD or absolute path
2767 return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2770 // FD only, or FD + relative path
2771 struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2772 if (vnode == NULL)
2773 return B_FILE_ERROR;
2775 if (path != NULL) {
2776 return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2777 _vnode, _parentID);
2780 // there is no relative path to take into account
2782 *_vnode = vnode;
2783 if (_parentID)
2784 *_parentID = -1;
2786 return B_OK;
2790 static int
2791 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2792 void* cookie, int openMode, bool kernel)
2794 struct file_descriptor* descriptor;
2795 int fd;
2797 // If the vnode is locked, we don't allow creating a new file/directory
2798 // file_descriptor for it
2799 if (vnode && vnode->mandatory_locked_by != NULL
2800 && (type == FDTYPE_FILE || type == FDTYPE_DIR))
2801 return B_BUSY;
2803 descriptor = alloc_fd();
2804 if (!descriptor)
2805 return B_NO_MEMORY;
2807 if (vnode)
2808 descriptor->u.vnode = vnode;
2809 else
2810 descriptor->u.mount = mount;
2811 descriptor->cookie = cookie;
2813 switch (type) {
2814 // vnode types
2815 case FDTYPE_FILE:
2816 descriptor->ops = &sFileOps;
2817 break;
2818 case FDTYPE_DIR:
2819 descriptor->ops = &sDirectoryOps;
2820 break;
2821 case FDTYPE_ATTR:
2822 descriptor->ops = &sAttributeOps;
2823 break;
2824 case FDTYPE_ATTR_DIR:
2825 descriptor->ops = &sAttributeDirectoryOps;
2826 break;
2828 // mount types
2829 case FDTYPE_INDEX_DIR:
2830 descriptor->ops = &sIndexDirectoryOps;
2831 break;
2832 case FDTYPE_QUERY:
2833 descriptor->ops = &sQueryOps;
2834 break;
2836 default:
2837 panic("get_new_fd() called with unknown type %d\n", type);
2838 break;
2840 descriptor->type = type;
2841 descriptor->open_mode = openMode;
2843 io_context* context = get_current_io_context(kernel);
2844 fd = new_fd(context, descriptor);
2845 if (fd < 0) {
2846 free(descriptor);
2847 return B_NO_MORE_FDS;
2850 mutex_lock(&context->io_mutex);
2851 fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2852 mutex_unlock(&context->io_mutex);
2854 return fd;
2858 /*! In-place normalizes \a path. It's otherwise semantically equivalent to
2859 vfs_normalize_path(). See there for more documentation.
2861 static status_t
2862 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2864 VNodePutter dirPutter;
2865 struct vnode* dir = NULL;
2866 status_t error;
2868 for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2869 // get dir vnode + leaf name
2870 struct vnode* nextDir;
2871 char leaf[B_FILE_NAME_LENGTH];
2872 error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2873 if (error != B_OK)
2874 return error;
2876 dir = nextDir;
2877 strcpy(path, leaf);
2878 dirPutter.SetTo(dir);
2880 // get file vnode, if we shall resolve links
2881 bool fileExists = false;
2882 struct vnode* fileVnode;
2883 VNodePutter fileVnodePutter;
2884 if (traverseLink) {
2885 inc_vnode_ref_count(dir);
2886 if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2887 NULL) == B_OK) {
2888 fileVnodePutter.SetTo(fileVnode);
2889 fileExists = true;
2893 if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2894 // we're done -- construct the path
2895 bool hasLeaf = true;
2896 if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2897 // special cases "." and ".." -- get the dir, forget the leaf
2898 inc_vnode_ref_count(dir);
2899 error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2900 &nextDir, NULL);
2901 if (error != B_OK)
2902 return error;
2903 dir = nextDir;
2904 dirPutter.SetTo(dir);
2905 hasLeaf = false;
2908 // get the directory path
2909 error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2910 if (error != B_OK)
2911 return error;
2913 // append the leaf name
2914 if (hasLeaf) {
2915 // insert a directory separator if this is not the file system
2916 // root
2917 if ((strcmp(path, "/") != 0
2918 && strlcat(path, "/", pathSize) >= pathSize)
2919 || strlcat(path, leaf, pathSize) >= pathSize) {
2920 return B_NAME_TOO_LONG;
2924 return B_OK;
2927 // read link
2928 if (HAS_FS_CALL(fileVnode, read_symlink)) {
2929 size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2930 error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2931 if (error != B_OK)
2932 return error;
2933 path[bufferSize] = '\0';
2934 } else
2935 return B_BAD_VALUE;
2938 return B_LINK_LIMIT;
2942 static status_t
2943 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2944 struct io_context* ioContext)
2946 // Make sure the IO context root is not bypassed.
2947 if (parent == ioContext->root) {
2948 *_device = parent->device;
2949 *_node = parent->id;
2950 return B_OK;
2953 inc_vnode_ref_count(parent);
2954 // vnode_path_to_vnode() puts the node
2956 // ".." is guaranteed not to be clobbered by this call
2957 struct vnode* vnode;
2958 status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2959 ioContext, &vnode, NULL);
2960 if (status == B_OK) {
2961 *_device = vnode->device;
2962 *_node = vnode->id;
2963 put_vnode(vnode);
2966 return status;
2970 #ifdef ADD_DEBUGGER_COMMANDS
2973 static void
2974 _dump_advisory_locking(advisory_locking* locking)
2976 if (locking == NULL)
2977 return;
2979 kprintf(" lock: %" B_PRId32, locking->lock);
2980 kprintf(" wait_sem: %" B_PRId32, locking->wait_sem);
2982 int32 index = 0;
2983 LockList::Iterator iterator = locking->locks.GetIterator();
2984 while (iterator.HasNext()) {
2985 struct advisory_lock* lock = iterator.Next();
2987 kprintf(" [%2" B_PRId32 "] team: %" B_PRId32 "\n", index++, lock->team);
2988 kprintf(" start: %" B_PRIdOFF "\n", lock->start);
2989 kprintf(" end: %" B_PRIdOFF "\n", lock->end);
2990 kprintf(" shared? %s\n", lock->shared ? "yes" : "no");
2995 static void
2996 _dump_mount(struct fs_mount* mount)
2998 kprintf("MOUNT: %p\n", mount);
2999 kprintf(" id: %" B_PRIdDEV "\n", mount->id);
3000 kprintf(" device_name: %s\n", mount->device_name);
3001 kprintf(" root_vnode: %p\n", mount->root_vnode);
3002 kprintf(" covers: %p\n", mount->root_vnode->covers);
3003 kprintf(" partition: %p\n", mount->partition);
3004 kprintf(" lock: %p\n", &mount->rlock);
3005 kprintf(" flags: %s%s\n", mount->unmounting ? " unmounting" : "",
3006 mount->owns_file_device ? " owns_file_device" : "");
3008 fs_volume* volume = mount->volume;
3009 while (volume != NULL) {
3010 kprintf(" volume %p:\n", volume);
3011 kprintf(" layer: %" B_PRId32 "\n", volume->layer);
3012 kprintf(" private_volume: %p\n", volume->private_volume);
3013 kprintf(" ops: %p\n", volume->ops);
3014 kprintf(" file_system: %p\n", volume->file_system);
3015 kprintf(" file_system_name: %s\n", volume->file_system_name);
3016 volume = volume->super_volume;
3019 set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3020 set_debug_variable("_root", (addr_t)mount->root_vnode);
3021 set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3022 set_debug_variable("_partition", (addr_t)mount->partition);
3026 static bool
3027 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3028 const char* name)
3030 bool insertSlash = buffer[bufferSize] != '\0';
3031 size_t nameLength = strlen(name);
3033 if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3034 return false;
3036 if (insertSlash)
3037 buffer[--bufferSize] = '/';
3039 bufferSize -= nameLength;
3040 memcpy(buffer + bufferSize, name, nameLength);
3042 return true;
3046 static bool
3047 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3048 ino_t nodeID)
3050 if (bufferSize == 0)
3051 return false;
3053 bool insertSlash = buffer[bufferSize] != '\0';
3054 if (insertSlash)
3055 buffer[--bufferSize] = '/';
3057 size_t size = snprintf(buffer, bufferSize,
3058 "<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3059 if (size > bufferSize) {
3060 if (insertSlash)
3061 bufferSize++;
3062 return false;
3065 if (size < bufferSize)
3066 memmove(buffer + bufferSize - size, buffer, size);
3068 bufferSize -= size;
3069 return true;
3073 static char*
3074 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3075 bool& _truncated)
3077 // null-terminate the path
3078 buffer[--bufferSize] = '\0';
3080 while (true) {
3081 while (vnode->covers != NULL)
3082 vnode = vnode->covers;
3084 if (vnode == sRoot) {
3085 _truncated = bufferSize == 0;
3086 if (!_truncated)
3087 buffer[--bufferSize] = '/';
3088 return buffer + bufferSize;
3091 // resolve the name
3092 ino_t dirID;
3093 const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3094 vnode->id, dirID);
3095 if (name == NULL) {
3096 // Failed to resolve the name -- prepend "<dev,node>/".
3097 _truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3098 vnode->mount->id, vnode->id);
3099 return buffer + bufferSize;
3102 // prepend the name
3103 if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3104 _truncated = true;
3105 return buffer + bufferSize;
3108 // resolve the directory node
3109 struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3110 if (nextVnode == NULL) {
3111 _truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3112 vnode->mount->id, dirID);
3113 return buffer + bufferSize;
3116 vnode = nextVnode;
3121 static void
3122 _dump_vnode(struct vnode* vnode, bool printPath)
3124 kprintf("VNODE: %p\n", vnode);
3125 kprintf(" device: %" B_PRIdDEV "\n", vnode->device);
3126 kprintf(" id: %" B_PRIdINO "\n", vnode->id);
3127 kprintf(" ref_count: %" B_PRId32 "\n", vnode->ref_count);
3128 kprintf(" private_node: %p\n", vnode->private_node);
3129 kprintf(" mount: %p\n", vnode->mount);
3130 kprintf(" covered_by: %p\n", vnode->covered_by);
3131 kprintf(" covers: %p\n", vnode->covers);
3132 kprintf(" cache: %p\n", vnode->cache);
3133 kprintf(" type: %#" B_PRIx32 "\n", vnode->Type());
3134 kprintf(" flags: %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3135 vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3136 kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3138 _dump_advisory_locking(vnode->advisory_locking);
3140 if (printPath) {
3141 void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3142 if (buffer != NULL) {
3143 bool truncated;
3144 char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3145 B_PATH_NAME_LENGTH, truncated);
3146 if (path != NULL) {
3147 kprintf(" path: ");
3148 if (truncated)
3149 kputs("<truncated>/");
3150 kputs(path);
3151 kputs("\n");
3152 } else
3153 kprintf("Failed to resolve vnode path.\n");
3155 debug_free(buffer);
3156 } else
3157 kprintf("Failed to allocate memory for constructing the path.\n");
3160 set_debug_variable("_node", (addr_t)vnode->private_node);
3161 set_debug_variable("_mount", (addr_t)vnode->mount);
3162 set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3163 set_debug_variable("_covers", (addr_t)vnode->covers);
3164 set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3168 static int
3169 dump_mount(int argc, char** argv)
3171 if (argc != 2 || !strcmp(argv[1], "--help")) {
3172 kprintf("usage: %s [id|address]\n", argv[0]);
3173 return 0;
3176 ulong val = parse_expression(argv[1]);
3177 uint32 id = val;
3179 struct fs_mount* mount = sMountsTable->Lookup(id);
3180 if (mount == NULL) {
3181 if (IS_USER_ADDRESS(id)) {
3182 kprintf("fs_mount not found\n");
3183 return 0;
3185 mount = (fs_mount*)val;
3188 _dump_mount(mount);
3189 return 0;
3193 static int
3194 dump_mounts(int argc, char** argv)
3196 if (argc != 1) {
3197 kprintf("usage: %s\n", argv[0]);
3198 return 0;
3201 kprintf("%-*s id %-*s %-*s %-*s fs_name\n",
3202 B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3203 B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3205 struct fs_mount* mount;
3207 MountTable::Iterator iterator(sMountsTable);
3208 while (iterator.HasNext()) {
3209 mount = iterator.Next();
3210 kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3211 mount->root_vnode->covers, mount->volume->private_volume,
3212 mount->volume->file_system_name);
3214 fs_volume* volume = mount->volume;
3215 while (volume->super_volume != NULL) {
3216 volume = volume->super_volume;
3217 kprintf(" %p %s\n",
3218 volume->private_volume, volume->file_system_name);
3222 return 0;
3226 static int
3227 dump_vnode(int argc, char** argv)
3229 bool printPath = false;
3230 int argi = 1;
3231 if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3232 printPath = true;
3233 argi++;
3236 if (argi >= argc || argi + 2 < argc) {
3237 print_debugger_command_usage(argv[0]);
3238 return 0;
3241 struct vnode* vnode = NULL;
3243 if (argi + 1 == argc) {
3244 vnode = (struct vnode*)parse_expression(argv[argi]);
3245 if (IS_USER_ADDRESS(vnode)) {
3246 kprintf("invalid vnode address\n");
3247 return 0;
3249 _dump_vnode(vnode, printPath);
3250 return 0;
3253 dev_t device = parse_expression(argv[argi]);
3254 ino_t id = parse_expression(argv[argi + 1]);
3256 VnodeTable::Iterator iterator(sVnodeTable);
3257 while (iterator.HasNext()) {
3258 vnode = iterator.Next();
3259 if (vnode->id != id || vnode->device != device)
3260 continue;
3262 _dump_vnode(vnode, printPath);
3265 return 0;
3269 static int
3270 dump_vnodes(int argc, char** argv)
3272 if (argc != 2 || !strcmp(argv[1], "--help")) {
3273 kprintf("usage: %s [device]\n", argv[0]);
3274 return 0;
3277 // restrict dumped nodes to a certain device if requested
3278 dev_t device = parse_expression(argv[1]);
3280 struct vnode* vnode;
3282 kprintf("%-*s dev inode ref %-*s %-*s %-*s flags\n",
3283 B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3284 B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3286 VnodeTable::Iterator iterator(sVnodeTable);
3287 while (iterator.HasNext()) {
3288 vnode = iterator.Next();
3289 if (vnode->device != device)
3290 continue;
3292 kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3293 vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3294 vnode->private_node, vnode->advisory_locking,
3295 vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3296 vnode->IsUnpublished() ? "u" : "-");
3299 return 0;
3303 static int
3304 dump_vnode_caches(int argc, char** argv)
3306 struct vnode* vnode;
3308 if (argc > 2 || !strcmp(argv[1], "--help")) {
3309 kprintf("usage: %s [device]\n", argv[0]);
3310 return 0;
3313 // restrict dumped nodes to a certain device if requested
3314 dev_t device = -1;
3315 if (argc > 1)
3316 device = parse_expression(argv[1]);
3318 kprintf("%-*s dev inode %-*s size pages\n",
3319 B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3321 VnodeTable::Iterator iterator(sVnodeTable);
3322 while (iterator.HasNext()) {
3323 vnode = iterator.Next();
3324 if (vnode->cache == NULL)
3325 continue;
3326 if (device != -1 && vnode->device != device)
3327 continue;
3329 kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3330 vnode, vnode->device, vnode->id, vnode->cache,
3331 (vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3332 vnode->cache->page_count);
3335 return 0;
3340 dump_io_context(int argc, char** argv)
3342 if (argc > 2 || !strcmp(argv[1], "--help")) {
3343 kprintf("usage: %s [team-id|address]\n", argv[0]);
3344 return 0;
3347 struct io_context* context = NULL;
3349 if (argc > 1) {
3350 ulong num = parse_expression(argv[1]);
3351 if (IS_KERNEL_ADDRESS(num))
3352 context = (struct io_context*)num;
3353 else {
3354 Team* team = team_get_team_struct_locked(num);
3355 if (team == NULL) {
3356 kprintf("could not find team with ID %lu\n", num);
3357 return 0;
3359 context = (struct io_context*)team->io_context;
3361 } else
3362 context = get_current_io_context(true);
3364 kprintf("I/O CONTEXT: %p\n", context);
3365 kprintf(" root vnode:\t%p\n", context->root);
3366 kprintf(" cwd vnode:\t%p\n", context->cwd);
3367 kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3368 kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3370 if (context->num_used_fds) {
3371 kprintf(" no. type %*s ref open mode pos %*s\n",
3372 B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3375 for (uint32 i = 0; i < context->table_size; i++) {
3376 struct file_descriptor* fd = context->fds[i];
3377 if (fd == NULL)
3378 continue;
3380 kprintf(" %3" B_PRIu32 ": %4" B_PRId32 " %p %3" B_PRId32 " %4"
3381 B_PRIu32 " %4" B_PRIx32 " %10" B_PRIdOFF " %p %s %p\n", i,
3382 fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3383 fd->pos, fd->cookie,
3384 fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3385 ? "mount" : "vnode",
3386 fd->u.vnode);
3389 kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3390 kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3392 set_debug_variable("_cwd", (addr_t)context->cwd);
3394 return 0;
3399 dump_vnode_usage(int argc, char** argv)
3401 if (argc != 1) {
3402 kprintf("usage: %s\n", argv[0]);
3403 return 0;
3406 kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3407 sUnusedVnodes, kMaxUnusedVnodes);
3409 uint32 count = sVnodeTable->CountElements();
3411 kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3412 count - sUnusedVnodes);
3413 return 0;
3416 #endif // ADD_DEBUGGER_COMMANDS
3419 /*! Clears memory specified by an iovec array.
3421 static void
3422 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3424 for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3425 size_t length = std::min(vecs[i].iov_len, bytes);
3426 memset(vecs[i].iov_base, 0, length);
3427 bytes -= length;
3432 /*! Does the dirty work of combining the file_io_vecs with the iovecs
3433 and calls the file system hooks to read/write the request to disk.
3435 static status_t
3436 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3437 const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3438 size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3439 bool doWrite)
3441 if (fileVecCount == 0) {
3442 // There are no file vecs at this offset, so we're obviously trying
3443 // to access the file outside of its bounds
3444 return B_BAD_VALUE;
3447 size_t numBytes = *_numBytes;
3448 uint32 fileVecIndex;
3449 size_t vecOffset = *_vecOffset;
3450 uint32 vecIndex = *_vecIndex;
3451 status_t status;
3452 size_t size;
3454 if (!doWrite && vecOffset == 0) {
3455 // now directly read the data from the device
3456 // the first file_io_vec can be read directly
3458 if (fileVecs[0].length < (off_t)numBytes)
3459 size = fileVecs[0].length;
3460 else
3461 size = numBytes;
3463 if (fileVecs[0].offset >= 0) {
3464 status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3465 &vecs[vecIndex], vecCount - vecIndex, &size);
3466 } else {
3467 // sparse read
3468 zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3469 status = B_OK;
3471 if (status != B_OK)
3472 return status;
3474 // TODO: this is a work-around for buggy device drivers!
3475 // When our own drivers honour the length, we can:
3476 // a) also use this direct I/O for writes (otherwise, it would
3477 // overwrite precious data)
3478 // b) panic if the term below is true (at least for writes)
3479 if ((off_t)size > fileVecs[0].length) {
3480 //dprintf("warning: device driver %p doesn't respect total length "
3481 // "in read_pages() call!\n", ref->device);
3482 size = fileVecs[0].length;
3485 ASSERT((off_t)size <= fileVecs[0].length);
3487 // If the file portion was contiguous, we're already done now
3488 if (size == numBytes)
3489 return B_OK;
3491 // if we reached the end of the file, we can return as well
3492 if ((off_t)size != fileVecs[0].length) {
3493 *_numBytes = size;
3494 return B_OK;
3497 fileVecIndex = 1;
3499 // first, find out where we have to continue in our iovecs
3500 for (; vecIndex < vecCount; vecIndex++) {
3501 if (size < vecs[vecIndex].iov_len)
3502 break;
3504 size -= vecs[vecIndex].iov_len;
3507 vecOffset = size;
3508 } else {
3509 fileVecIndex = 0;
3510 size = 0;
3513 // Too bad, let's process the rest of the file_io_vecs
3515 size_t totalSize = size;
3516 size_t bytesLeft = numBytes - size;
3518 for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3519 const file_io_vec &fileVec = fileVecs[fileVecIndex];
3520 off_t fileOffset = fileVec.offset;
3521 off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3523 TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3524 fileLeft));
3526 // process the complete fileVec
3527 while (fileLeft > 0) {
3528 iovec tempVecs[MAX_TEMP_IO_VECS];
3529 uint32 tempCount = 0;
3531 // size tracks how much of what is left of the current fileVec
3532 // (fileLeft) has been assigned to tempVecs
3533 size = 0;
3535 // assign what is left of the current fileVec to the tempVecs
3536 for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3537 && tempCount < MAX_TEMP_IO_VECS;) {
3538 // try to satisfy one iovec per iteration (or as much as
3539 // possible)
3541 // bytes left of the current iovec
3542 size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3543 if (vecLeft == 0) {
3544 vecOffset = 0;
3545 vecIndex++;
3546 continue;
3549 TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3550 vecIndex, vecOffset, size));
3552 // actually available bytes
3553 size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3555 tempVecs[tempCount].iov_base
3556 = (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3557 tempVecs[tempCount].iov_len = tempVecSize;
3558 tempCount++;
3560 size += tempVecSize;
3561 vecOffset += tempVecSize;
3564 size_t bytes = size;
3566 if (fileOffset == -1) {
3567 if (doWrite) {
3568 panic("sparse write attempt: vnode %p", vnode);
3569 status = B_IO_ERROR;
3570 } else {
3571 // sparse read
3572 zero_iovecs(tempVecs, tempCount, bytes);
3573 status = B_OK;
3575 } else if (doWrite) {
3576 status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3577 tempVecs, tempCount, &bytes);
3578 } else {
3579 status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3580 tempVecs, tempCount, &bytes);
3582 if (status != B_OK)
3583 return status;
3585 totalSize += bytes;
3586 bytesLeft -= size;
3587 if (fileOffset >= 0)
3588 fileOffset += size;
3589 fileLeft -= size;
3590 //dprintf("-> file left = %Lu\n", fileLeft);
3592 if (size != bytes || vecIndex >= vecCount) {
3593 // there are no more bytes or iovecs, let's bail out
3594 *_numBytes = totalSize;
3595 return B_OK;
3600 *_vecIndex = vecIndex;
3601 *_vecOffset = vecOffset;
3602 *_numBytes = totalSize;
3603 return B_OK;
3607 static bool
3608 is_user_in_group(gid_t gid)
3610 if (gid == getegid())
3611 return true;
3613 gid_t groups[NGROUPS_MAX];
3614 int groupCount = getgroups(NGROUPS_MAX, groups);
3615 for (int i = 0; i < groupCount; i++) {
3616 if (gid == groups[i])
3617 return true;
3620 return false;
3624 static status_t
3625 free_io_context(io_context* context)
3627 uint32 i;
3629 TIOC(FreeIOContext(context));
3631 if (context->root)
3632 put_vnode(context->root);
3634 if (context->cwd)
3635 put_vnode(context->cwd);
3637 mutex_lock(&context->io_mutex);
3639 for (i = 0; i < context->table_size; i++) {
3640 if (struct file_descriptor* descriptor = context->fds[i]) {
3641 close_fd(descriptor);
3642 put_fd(descriptor);
3646 mutex_destroy(&context->io_mutex);
3648 remove_node_monitors(context);
3649 free(context->fds);
3650 free(context);
3652 return B_OK;
3656 static status_t
3657 resize_monitor_table(struct io_context* context, const int newSize)
3659 int status = B_OK;
3661 if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3662 return B_BAD_VALUE;
3664 mutex_lock(&context->io_mutex);
3666 if ((size_t)newSize < context->num_monitors) {
3667 status = B_BUSY;
3668 goto out;
3670 context->max_monitors = newSize;
3672 out:
3673 mutex_unlock(&context->io_mutex);
3674 return status;
3678 // #pragma mark - public API for file systems
3681 extern "C" status_t
3682 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3683 fs_vnode_ops* ops)
3685 FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3686 ", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3688 if (privateNode == NULL)
3689 return B_BAD_VALUE;
3691 int32 tries = BUSY_VNODE_RETRIES;
3692 restart:
3693 // create the node
3694 bool nodeCreated;
3695 struct vnode* vnode;
3696 status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3697 nodeCreated);
3698 if (status != B_OK)
3699 return status;
3701 WriteLocker nodeLocker(sVnodeLock, true);
3702 // create_new_vnode_and_lock() has locked for us
3704 if (!nodeCreated && vnode->IsBusy()) {
3705 nodeLocker.Unlock();
3706 if (!retry_busy_vnode(tries, volume->id, vnodeID))
3707 return B_BUSY;
3708 goto restart;
3711 // file system integrity check:
3712 // test if the vnode already exists and bail out if this is the case!
3713 if (!nodeCreated) {
3714 panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3715 "vnode->node = %p)!", volume->id, vnodeID, privateNode,
3716 vnode->private_node);
3717 return B_ERROR;
3720 vnode->private_node = privateNode;
3721 vnode->ops = ops;
3722 vnode->SetUnpublished(true);
3724 TRACE(("returns: %s\n", strerror(status)));
3726 return status;
3730 extern "C" status_t
3731 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3732 fs_vnode_ops* ops, int type, uint32 flags)
3734 FUNCTION(("publish_vnode()\n"));
3736 int32 tries = BUSY_VNODE_RETRIES;
3737 restart:
3738 WriteLocker locker(sVnodeLock);
3740 struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3742 bool nodeCreated = false;
3743 if (vnode == NULL) {
3744 if (privateNode == NULL)
3745 return B_BAD_VALUE;
3747 // create the node
3748 locker.Unlock();
3749 // create_new_vnode_and_lock() will re-lock for us on success
3750 status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3751 nodeCreated);
3752 if (status != B_OK)
3753 return status;
3755 locker.SetTo(sVnodeLock, true);
3758 if (nodeCreated) {
3759 vnode->private_node = privateNode;
3760 vnode->ops = ops;
3761 vnode->SetUnpublished(true);
3762 } else if (vnode->IsBusy() && vnode->IsUnpublished()
3763 && vnode->private_node == privateNode && vnode->ops == ops) {
3764 // already known, but not published
3765 } else if (vnode->IsBusy()) {
3766 locker.Unlock();
3767 if (!retry_busy_vnode(tries, volume->id, vnodeID))
3768 return B_BUSY;
3769 goto restart;
3770 } else
3771 return B_BAD_VALUE;
3773 bool publishSpecialSubNode = false;
3775 vnode->SetType(type);
3776 vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3777 publishSpecialSubNode = is_special_node_type(type)
3778 && (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3780 status_t status = B_OK;
3782 // create sub vnodes, if necessary
3783 if (volume->sub_volume != NULL || publishSpecialSubNode) {
3784 locker.Unlock();
3786 fs_volume* subVolume = volume;
3787 if (volume->sub_volume != NULL) {
3788 while (status == B_OK && subVolume->sub_volume != NULL) {
3789 subVolume = subVolume->sub_volume;
3790 status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3791 vnode);
3795 if (status == B_OK && publishSpecialSubNode)
3796 status = create_special_sub_node(vnode, flags);
3798 if (status != B_OK) {
3799 // error -- clean up the created sub vnodes
3800 while (subVolume->super_volume != volume) {
3801 subVolume = subVolume->super_volume;
3802 subVolume->ops->delete_sub_vnode(subVolume, vnode);
3806 if (status == B_OK) {
3807 ReadLocker vnodesReadLocker(sVnodeLock);
3808 AutoLocker<Vnode> nodeLocker(vnode);
3809 vnode->SetBusy(false);
3810 vnode->SetUnpublished(false);
3811 } else {
3812 locker.Lock();
3813 sVnodeTable->Remove(vnode);
3814 remove_vnode_from_mount_list(vnode, vnode->mount);
3815 free(vnode);
3817 } else {
3818 // we still hold the write lock -- mark the node unbusy and published
3819 vnode->SetBusy(false);
3820 vnode->SetUnpublished(false);
3823 TRACE(("returns: %s\n", strerror(status)));
3825 return status;
3829 extern "C" status_t
3830 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3832 struct vnode* vnode;
3834 if (volume == NULL)
3835 return B_BAD_VALUE;
3837 status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3838 if (status != B_OK)
3839 return status;
3841 // If this is a layered FS, we need to get the node cookie for the requested
3842 // layer.
3843 if (HAS_FS_CALL(vnode, get_super_vnode)) {
3844 fs_vnode resolvedNode;
3845 status_t status = FS_CALL(vnode, get_super_vnode, volume,
3846 &resolvedNode);
3847 if (status != B_OK) {
3848 panic("get_vnode(): Failed to get super node for vnode %p, "
3849 "volume: %p", vnode, volume);
3850 put_vnode(vnode);
3851 return status;
3854 if (_privateNode != NULL)
3855 *_privateNode = resolvedNode.private_node;
3856 } else if (_privateNode != NULL)
3857 *_privateNode = vnode->private_node;
3859 return B_OK;
3863 extern "C" status_t
3864 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3866 struct vnode* vnode;
3868 rw_lock_read_lock(&sVnodeLock);
3869 vnode = lookup_vnode(volume->id, vnodeID);
3870 rw_lock_read_unlock(&sVnodeLock);
3872 if (vnode == NULL)
3873 return B_BAD_VALUE;
3875 inc_vnode_ref_count(vnode);
3876 return B_OK;
3880 extern "C" status_t
3881 put_vnode(fs_volume* volume, ino_t vnodeID)
3883 struct vnode* vnode;
3885 rw_lock_read_lock(&sVnodeLock);
3886 vnode = lookup_vnode(volume->id, vnodeID);
3887 rw_lock_read_unlock(&sVnodeLock);
3889 if (vnode == NULL)
3890 return B_BAD_VALUE;
3892 dec_vnode_ref_count(vnode, false, true);
3893 return B_OK;
3897 extern "C" status_t
3898 remove_vnode(fs_volume* volume, ino_t vnodeID)
3900 ReadLocker locker(sVnodeLock);
3902 struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3903 if (vnode == NULL)
3904 return B_ENTRY_NOT_FOUND;
3906 if (vnode->covered_by != NULL || vnode->covers != NULL) {
3907 // this vnode is in use
3908 return B_BUSY;
3911 vnode->Lock();
3913 vnode->SetRemoved(true);
3914 bool removeUnpublished = false;
3916 if (vnode->IsUnpublished()) {
3917 // prepare the vnode for deletion
3918 removeUnpublished = true;
3919 vnode->SetBusy(true);
3922 vnode->Unlock();
3923 locker.Unlock();
3925 if (removeUnpublished) {
3926 // If the vnode hasn't been published yet, we delete it here
3927 atomic_add(&vnode->ref_count, -1);
3928 free_vnode(vnode, true);
3931 return B_OK;
3935 extern "C" status_t
3936 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3938 struct vnode* vnode;
3940 rw_lock_read_lock(&sVnodeLock);
3942 vnode = lookup_vnode(volume->id, vnodeID);
3943 if (vnode) {
3944 AutoLocker<Vnode> nodeLocker(vnode);
3945 vnode->SetRemoved(false);
3948 rw_lock_read_unlock(&sVnodeLock);
3949 return B_OK;
3953 extern "C" status_t
3954 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3956 ReadLocker _(sVnodeLock);
3958 if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3959 if (_removed != NULL)
3960 *_removed = vnode->IsRemoved();
3961 return B_OK;
3964 return B_BAD_VALUE;
3968 extern "C" fs_volume*
3969 volume_for_vnode(fs_vnode* _vnode)
3971 if (_vnode == NULL)
3972 return NULL;
3974 struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3975 return vnode->mount->volume;
3979 extern "C" status_t
3980 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3981 uid_t nodeUserID)
3983 // get node permissions
3984 int userPermissions = (mode & S_IRWXU) >> 6;
3985 int groupPermissions = (mode & S_IRWXG) >> 3;
3986 int otherPermissions = mode & S_IRWXO;
3988 // get the node permissions for this uid/gid
3989 int permissions = 0;
3990 uid_t uid = geteuid();
3992 if (uid == 0) {
3993 // user is root
3994 // root has always read/write permission, but at least one of the
3995 // X bits must be set for execute permission
3996 permissions = userPermissions | groupPermissions | otherPermissions
3997 | S_IROTH | S_IWOTH;
3998 if (S_ISDIR(mode))
3999 permissions |= S_IXOTH;
4000 } else if (uid == nodeUserID) {
4001 // user is node owner
4002 permissions = userPermissions;
4003 } else if (is_user_in_group(nodeGroupID)) {
4004 // user is in owning group
4005 permissions = groupPermissions;
4006 } else {
4007 // user is one of the others
4008 permissions = otherPermissions;
4011 return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4015 #if 0
4016 extern "C" status_t
4017 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4018 size_t* _numBytes)
4020 struct file_descriptor* descriptor;
4021 struct vnode* vnode;
4023 descriptor = get_fd_and_vnode(fd, &vnode, true);
4024 if (descriptor == NULL)
4025 return B_FILE_ERROR;
4027 status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4028 count, 0, _numBytes);
4030 put_fd(descriptor);
4031 return status;
4035 extern "C" status_t
4036 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4037 size_t* _numBytes)
4039 struct file_descriptor* descriptor;
4040 struct vnode* vnode;
4042 descriptor = get_fd_and_vnode(fd, &vnode, true);
4043 if (descriptor == NULL)
4044 return B_FILE_ERROR;
4046 status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4047 count, 0, _numBytes);
4049 put_fd(descriptor);
4050 return status;
4052 #endif
4055 extern "C" status_t
4056 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4057 const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4058 size_t* _bytes)
4060 struct file_descriptor* descriptor;
4061 struct vnode* vnode;
4063 descriptor = get_fd_and_vnode(fd, &vnode, true);
4064 if (descriptor == NULL)
4065 return B_FILE_ERROR;
4067 status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4068 fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4069 false);
4071 put_fd(descriptor);
4072 return status;
4076 extern "C" status_t
4077 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4078 const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4079 size_t* _bytes)
4081 struct file_descriptor* descriptor;
4082 struct vnode* vnode;
4084 descriptor = get_fd_and_vnode(fd, &vnode, true);
4085 if (descriptor == NULL)
4086 return B_FILE_ERROR;
4088 status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4089 fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4090 true);
4092 put_fd(descriptor);
4093 return status;
4097 extern "C" status_t
4098 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4100 // lookup mount -- the caller is required to make sure that the mount
4101 // won't go away
4102 MutexLocker locker(sMountMutex);
4103 struct fs_mount* mount = find_mount(mountID);
4104 if (mount == NULL)
4105 return B_BAD_VALUE;
4106 locker.Unlock();
4108 return mount->entry_cache.Add(dirID, name, nodeID, false);
4112 extern "C" status_t
4113 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4115 // lookup mount -- the caller is required to make sure that the mount
4116 // won't go away
4117 MutexLocker locker(sMountMutex);
4118 struct fs_mount* mount = find_mount(mountID);
4119 if (mount == NULL)
4120 return B_BAD_VALUE;
4121 locker.Unlock();
4123 return mount->entry_cache.Add(dirID, name, -1, true);
4127 extern "C" status_t
4128 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4130 // lookup mount -- the caller is required to make sure that the mount
4131 // won't go away
4132 MutexLocker locker(sMountMutex);
4133 struct fs_mount* mount = find_mount(mountID);
4134 if (mount == NULL)
4135 return B_BAD_VALUE;
4136 locker.Unlock();
4138 return mount->entry_cache.Remove(dirID, name);
4142 // #pragma mark - private VFS API
4143 // Functions the VFS exports for other parts of the kernel
4146 /*! Acquires another reference to the vnode that has to be released
4147 by calling vfs_put_vnode().
4149 void
4150 vfs_acquire_vnode(struct vnode* vnode)
4152 inc_vnode_ref_count(vnode);
4156 /*! This is currently called from file_cache_create() only.
4157 It's probably a temporary solution as long as devfs requires that
4158 fs_read_pages()/fs_write_pages() are called with the standard
4159 open cookie and not with a device cookie.
4160 If that's done differently, remove this call; it has no other
4161 purpose.
4163 extern "C" status_t
4164 vfs_get_cookie_from_fd(int fd, void** _cookie)
4166 struct file_descriptor* descriptor;
4168 descriptor = get_fd(get_current_io_context(true), fd);
4169 if (descriptor == NULL)
4170 return B_FILE_ERROR;
4172 *_cookie = descriptor->cookie;
4173 return B_OK;
4177 extern "C" status_t
4178 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4180 *vnode = get_vnode_from_fd(fd, kernel);
4182 if (*vnode == NULL)
4183 return B_FILE_ERROR;
4185 return B_NO_ERROR;
4189 extern "C" status_t
4190 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4192 TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4193 path, kernel));
4195 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4196 if (pathBuffer.InitCheck() != B_OK)
4197 return B_NO_MEMORY;
4199 char* buffer = pathBuffer.LockBuffer();
4200 strlcpy(buffer, path, pathBuffer.BufferSize());
4202 struct vnode* vnode;
4203 status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4204 if (status != B_OK)
4205 return status;
4207 *_vnode = vnode;
4208 return B_OK;
4212 extern "C" status_t
4213 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4215 struct vnode* vnode = NULL;
4217 status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4218 if (status != B_OK)
4219 return status;
4221 *_vnode = vnode;
4222 return B_OK;
4226 extern "C" status_t
4227 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4228 const char* name, struct vnode** _vnode)
4230 return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4234 extern "C" void
4235 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4237 *_mountID = vnode->device;
4238 *_vnodeID = vnode->id;
4243 Helper function abstracting the process of "converting" a given
4244 vnode-pointer to a fs_vnode-pointer.
4245 Currently only used in bindfs.
4247 extern "C" fs_vnode*
4248 vfs_fsnode_for_vnode(struct vnode* vnode)
4250 return vnode;
4255 Calls fs_open() on the given vnode and returns a new
4256 file descriptor for it
4259 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4261 return open_vnode(vnode, openMode, kernel);
4265 /*! Looks up a vnode with the given mount and vnode ID.
4266 Must only be used with "in-use" vnodes as it doesn't grab a reference
4267 to the node.
4268 It's currently only be used by file_cache_create().
4270 extern "C" status_t
4271 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4273 rw_lock_read_lock(&sVnodeLock);
4274 struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4275 rw_lock_read_unlock(&sVnodeLock);
4277 if (vnode == NULL)
4278 return B_ERROR;
4280 *_vnode = vnode;
4281 return B_OK;
4285 extern "C" status_t
4286 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4287 bool traverseLeafLink, bool kernel, void** _node)
4289 TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4290 volume, path, kernel));
4292 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4293 if (pathBuffer.InitCheck() != B_OK)
4294 return B_NO_MEMORY;
4296 fs_mount* mount;
4297 status_t status = get_mount(volume->id, &mount);
4298 if (status != B_OK)
4299 return status;
4301 char* buffer = pathBuffer.LockBuffer();
4302 strlcpy(buffer, path, pathBuffer.BufferSize());
4304 struct vnode* vnode = mount->root_vnode;
4306 if (buffer[0] == '/')
4307 status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4308 else {
4309 inc_vnode_ref_count(vnode);
4310 // vnode_path_to_vnode() releases a reference to the starting vnode
4311 status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4312 kernel, &vnode, NULL);
4315 put_mount(mount);
4317 if (status != B_OK)
4318 return status;
4320 if (vnode->device != volume->id) {
4321 // wrong mount ID - must not gain access on foreign file system nodes
4322 put_vnode(vnode);
4323 return B_BAD_VALUE;
4326 // Use get_vnode() to resolve the cookie for the right layer.
4327 status = get_vnode(volume, vnode->id, _node);
4328 put_vnode(vnode);
4330 return status;
4334 status_t
4335 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4336 struct stat* stat, bool kernel)
4338 status_t status;
4340 if (path != NULL) {
4341 // path given: get the stat of the node referred to by (fd, path)
4342 KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
4343 if (pathBuffer.InitCheck() != B_OK)
4344 return B_NO_MEMORY;
4346 status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4347 traverseLeafLink, stat, kernel);
4348 } else {
4349 // no path given: get the FD and use the FD operation
4350 struct file_descriptor* descriptor
4351 = get_fd(get_current_io_context(kernel), fd);
4352 if (descriptor == NULL)
4353 return B_FILE_ERROR;
4355 if (descriptor->ops->fd_read_stat)
4356 status = descriptor->ops->fd_read_stat(descriptor, stat);
4357 else
4358 status = B_UNSUPPORTED;
4360 put_fd(descriptor);
4363 return status;
4367 /*! Finds the full path to the file that contains the module \a moduleName,
4368 puts it into \a pathBuffer, and returns B_OK for success.
4369 If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4370 \c B_ENTRY_NOT_FOUNT if no file could be found.
4371 \a pathBuffer is clobbered in any case and must not be relied on if this
4372 functions returns unsuccessfully.
4373 \a basePath and \a pathBuffer must not point to the same space.
4375 status_t
4376 vfs_get_module_path(const char* basePath, const char* moduleName,
4377 char* pathBuffer, size_t bufferSize)
4379 struct vnode* dir;
4380 struct vnode* file;
4381 status_t status;
4382 size_t length;
4383 char* path;
4385 if (bufferSize == 0
4386 || strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4387 return B_BUFFER_OVERFLOW;
4389 status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4390 if (status != B_OK)
4391 return status;
4393 // the path buffer had been clobbered by the above call
4394 length = strlcpy(pathBuffer, basePath, bufferSize);
4395 if (pathBuffer[length - 1] != '/')
4396 pathBuffer[length++] = '/';
4398 path = pathBuffer + length;
4399 bufferSize -= length;
4401 while (moduleName) {
4402 char* nextPath = strchr(moduleName, '/');
4403 if (nextPath == NULL)
4404 length = strlen(moduleName);
4405 else {
4406 length = nextPath - moduleName;
4407 nextPath++;
4410 if (length + 1 >= bufferSize) {
4411 status = B_BUFFER_OVERFLOW;
4412 goto err;
4415 memcpy(path, moduleName, length);
4416 path[length] = '\0';
4417 moduleName = nextPath;
4419 status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4420 if (status != B_OK) {
4421 // vnode_path_to_vnode() has already released the reference to dir
4422 return status;
4425 if (S_ISDIR(file->Type())) {
4426 // goto the next directory
4427 path[length] = '/';
4428 path[length + 1] = '\0';
4429 path += length + 1;
4430 bufferSize -= length + 1;
4432 dir = file;
4433 } else if (S_ISREG(file->Type())) {
4434 // it's a file so it should be what we've searched for
4435 put_vnode(file);
4437 return B_OK;
4438 } else {
4439 TRACE(("vfs_get_module_path(): something is strange here: "
4440 "0x%08" B_PRIx32 "...\n", file->Type()));
4441 status = B_ERROR;
4442 dir = file;
4443 goto err;
4447 // if we got here, the moduleName just pointed to a directory, not to
4448 // a real module - what should we do in this case?
4449 status = B_ENTRY_NOT_FOUND;
4451 err:
4452 put_vnode(dir);
4453 return status;
4457 /*! \brief Normalizes a given path.
4459 The path must refer to an existing or non-existing entry in an existing
4460 directory, that is chopping off the leaf component the remaining path must
4461 refer to an existing directory.
4463 The returned will be canonical in that it will be absolute, will not
4464 contain any "." or ".." components or duplicate occurrences of '/'s,
4465 and none of the directory components will by symbolic links.
4467 Any two paths referring to the same entry, will result in the same
4468 normalized path (well, that is pretty much the definition of `normalized',
4469 isn't it :-).
4471 \param path The path to be normalized.
4472 \param buffer The buffer into which the normalized path will be written.
4473 May be the same one as \a path.
4474 \param bufferSize The size of \a buffer.
4475 \param traverseLink If \c true, the function also resolves leaf symlinks.
4476 \param kernel \c true, if the IO context of the kernel shall be used,
4477 otherwise that of the team this thread belongs to. Only relevant,
4478 if the path is relative (to get the CWD).
4479 \return \c B_OK if everything went fine, another error code otherwise.
4481 status_t
4482 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4483 bool traverseLink, bool kernel)
4485 if (!path || !buffer || bufferSize < 1)
4486 return B_BAD_VALUE;
4488 if (path != buffer) {
4489 if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4490 return B_BUFFER_OVERFLOW;
4493 return normalize_path(buffer, bufferSize, traverseLink, kernel);
4497 /*! \brief Gets the parent of the passed in node.
4499 Gets the parent of the passed in node, and correctly resolves covered
4500 nodes.
4502 extern "C" status_t
4503 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4505 return resolve_covered_parent(parent, device, node,
4506 get_current_io_context(true));
4510 /*! \brief Creates a special node in the file system.
4512 The caller gets a reference to the newly created node (which is passed
4513 back through \a _createdVnode) and is responsible for releasing it.
4515 \param path The path where to create the entry for the node. Can be \c NULL,
4516 in which case the node is created without an entry in the root FS -- it
4517 will automatically be deleted when the last reference has been released.
4518 \param subVnode The definition of the subnode. Can be \c NULL, in which case
4519 the target file system will just create the node with its standard
4520 operations. Depending on the type of the node a subnode might be created
4521 automatically, though.
4522 \param mode The type and permissions for the node to be created.
4523 \param flags Flags to be passed to the creating FS.
4524 \param kernel \c true, if called in the kernel context (relevant only if
4525 \a path is not \c NULL and not absolute).
4526 \param _superVnode Pointer to a pre-allocated structure to be filled by the
4527 file system creating the node, with the private data pointer and
4528 operations for the super node. Can be \c NULL.
4529 \param _createVnode Pointer to pre-allocated storage where to store the
4530 pointer to the newly created node.
4531 \return \c B_OK, if everything went fine, another error code otherwise.
4533 status_t
4534 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4535 uint32 flags, bool kernel, fs_vnode* _superVnode,
4536 struct vnode** _createdVnode)
4538 struct vnode* dirNode;
4539 char _leaf[B_FILE_NAME_LENGTH];
4540 char* leaf = NULL;
4542 if (path) {
4543 // We've got a path. Get the dir vnode and the leaf name.
4544 KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4545 if (tmpPathBuffer.InitCheck() != B_OK)
4546 return B_NO_MEMORY;
4548 char* tmpPath = tmpPathBuffer.LockBuffer();
4549 if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4550 return B_NAME_TOO_LONG;
4552 // get the dir vnode and the leaf name
4553 leaf = _leaf;
4554 status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4555 if (error != B_OK)
4556 return error;
4557 } else {
4558 // No path. Create the node in the root FS.
4559 dirNode = sRoot;
4560 inc_vnode_ref_count(dirNode);
4563 VNodePutter _(dirNode);
4565 // check support for creating special nodes
4566 if (!HAS_FS_CALL(dirNode, create_special_node))
4567 return B_UNSUPPORTED;
4569 // create the node
4570 fs_vnode superVnode;
4571 ino_t nodeID;
4572 status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4573 mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4574 if (status != B_OK)
4575 return status;
4577 // lookup the node
4578 rw_lock_read_lock(&sVnodeLock);
4579 *_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4580 rw_lock_read_unlock(&sVnodeLock);
4582 if (*_createdVnode == NULL) {
4583 panic("vfs_create_special_node(): lookup of node failed");
4584 return B_ERROR;
4587 return B_OK;
4591 extern "C" void
4592 vfs_put_vnode(struct vnode* vnode)
4594 put_vnode(vnode);
4598 extern "C" status_t
4599 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4601 // Get current working directory from io context
4602 struct io_context* context = get_current_io_context(false);
4603 status_t status = B_OK;
4605 mutex_lock(&context->io_mutex);
4607 if (context->cwd != NULL) {
4608 *_mountID = context->cwd->device;
4609 *_vnodeID = context->cwd->id;
4610 } else
4611 status = B_ERROR;
4613 mutex_unlock(&context->io_mutex);
4614 return status;
4618 status_t
4619 vfs_unmount(dev_t mountID, uint32 flags)
4621 return fs_unmount(NULL, mountID, flags, true);
4625 extern "C" status_t
4626 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4628 struct vnode* vnode;
4630 status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4631 if (status != B_OK)
4632 return status;
4634 disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4635 put_vnode(vnode);
4636 return B_OK;
4640 extern "C" void
4641 vfs_free_unused_vnodes(int32 level)
4643 vnode_low_resource_handler(NULL,
4644 B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4645 | B_KERNEL_RESOURCE_ADDRESS_SPACE,
4646 level);
4650 extern "C" bool
4651 vfs_can_page(struct vnode* vnode, void* cookie)
4653 FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4655 if (HAS_FS_CALL(vnode, can_page))
4656 return FS_CALL(vnode, can_page, cookie);
4657 return false;
4661 extern "C" status_t
4662 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4663 const generic_io_vec* vecs, size_t count, uint32 flags,
4664 generic_size_t* _numBytes)
4666 FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4667 vecs, pos));
4669 #if VFS_PAGES_IO_TRACING
4670 generic_size_t bytesRequested = *_numBytes;
4671 #endif
4673 IORequest request;
4674 status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4675 if (status == B_OK) {
4676 status = vfs_vnode_io(vnode, cookie, &request);
4677 if (status == B_OK)
4678 status = request.Wait();
4679 *_numBytes = request.TransferredBytes();
4682 TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4683 status, *_numBytes));
4685 return status;
4689 extern "C" status_t
4690 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4691 const generic_io_vec* vecs, size_t count, uint32 flags,
4692 generic_size_t* _numBytes)
4694 FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4695 vecs, pos));
4697 #if VFS_PAGES_IO_TRACING
4698 generic_size_t bytesRequested = *_numBytes;
4699 #endif
4701 IORequest request;
4702 status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4703 if (status == B_OK) {
4704 status = vfs_vnode_io(vnode, cookie, &request);
4705 if (status == B_OK)
4706 status = request.Wait();
4707 *_numBytes = request.TransferredBytes();
4710 TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4711 status, *_numBytes));
4713 return status;
4717 /*! Gets the vnode's VMCache object. If it didn't have one, it will be
4718 created if \a allocate is \c true.
4719 In case it's successful, it will also grab a reference to the cache
4720 it returns.
4722 extern "C" status_t
4723 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4725 if (vnode->cache != NULL) {
4726 vnode->cache->AcquireRef();
4727 *_cache = vnode->cache;
4728 return B_OK;
4731 rw_lock_read_lock(&sVnodeLock);
4732 vnode->Lock();
4734 status_t status = B_OK;
4736 // The cache could have been created in the meantime
4737 if (vnode->cache == NULL) {
4738 if (allocate) {
4739 // TODO: actually the vnode needs to be busy already here, or
4740 // else this won't work...
4741 bool wasBusy = vnode->IsBusy();
4742 vnode->SetBusy(true);
4744 vnode->Unlock();
4745 rw_lock_read_unlock(&sVnodeLock);
4747 status = vm_create_vnode_cache(vnode, &vnode->cache);
4749 rw_lock_read_lock(&sVnodeLock);
4750 vnode->Lock();
4751 vnode->SetBusy(wasBusy);
4752 } else
4753 status = B_BAD_VALUE;
4756 vnode->Unlock();
4757 rw_lock_read_unlock(&sVnodeLock);
4759 if (status == B_OK) {
4760 vnode->cache->AcquireRef();
4761 *_cache = vnode->cache;
4764 return status;
4768 status_t
4769 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4770 file_io_vec* vecs, size_t* _count)
4772 FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4773 ", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4775 return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4779 status_t
4780 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4782 status_t status = FS_CALL(vnode, read_stat, stat);
4784 // fill in the st_dev and st_ino fields
4785 if (status == B_OK) {
4786 stat->st_dev = vnode->device;
4787 stat->st_ino = vnode->id;
4788 // the rdev field must stay unset for non-special files
4789 if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4790 stat->st_rdev = -1;
4793 return status;
4797 status_t
4798 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4800 struct vnode* vnode;
4801 status_t status = get_vnode(device, inode, &vnode, true, false);
4802 if (status != B_OK)
4803 return status;
4805 status = vfs_stat_vnode(vnode, stat);
4807 put_vnode(vnode);
4808 return status;
4812 status_t
4813 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4815 return get_vnode_name(vnode, NULL, name, nameSize, true);
4819 status_t
4820 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4821 bool kernel, char* path, size_t pathLength)
4823 struct vnode* vnode;
4824 status_t status;
4826 // filter invalid leaf names
4827 if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4828 return B_BAD_VALUE;
4830 // get the vnode matching the dir's node_ref
4831 if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4832 // special cases "." and "..": we can directly get the vnode of the
4833 // referenced directory
4834 status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4835 leaf = NULL;
4836 } else
4837 status = get_vnode(device, inode, &vnode, true, false);
4838 if (status != B_OK)
4839 return status;
4841 // get the directory path
4842 status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4843 put_vnode(vnode);
4844 // we don't need the vnode anymore
4845 if (status != B_OK)
4846 return status;
4848 // append the leaf name
4849 if (leaf) {
4850 // insert a directory separator if this is not the file system root
4851 if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4852 >= pathLength)
4853 || strlcat(path, leaf, pathLength) >= pathLength) {
4854 return B_NAME_TOO_LONG;
4858 return B_OK;
4862 /*! If the given descriptor locked its vnode, that lock will be released. */
4863 void
4864 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4866 struct vnode* vnode = fd_vnode(descriptor);
4868 if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4869 vnode->mandatory_locked_by = NULL;
4873 /*! Closes all file descriptors of the specified I/O context that
4874 have the O_CLOEXEC flag set.
4876 void
4877 vfs_exec_io_context(io_context* context)
4879 uint32 i;
4881 for (i = 0; i < context->table_size; i++) {
4882 mutex_lock(&context->io_mutex);
4884 struct file_descriptor* descriptor = context->fds[i];
4885 bool remove = false;
4887 if (descriptor != NULL && fd_close_on_exec(context, i)) {
4888 context->fds[i] = NULL;
4889 context->num_used_fds--;
4891 remove = true;
4894 mutex_unlock(&context->io_mutex);
4896 if (remove) {
4897 close_fd(descriptor);
4898 put_fd(descriptor);
4904 /*! Sets up a new io_control structure, and inherits the properties
4905 of the parent io_control if it is given.
4907 io_context*
4908 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4910 io_context* context = (io_context*)malloc(sizeof(io_context));
4911 if (context == NULL)
4912 return NULL;
4914 TIOC(NewIOContext(context, parentContext));
4916 memset(context, 0, sizeof(io_context));
4917 context->ref_count = 1;
4919 MutexLocker parentLocker;
4921 size_t tableSize;
4922 if (parentContext != NULL) {
4923 parentLocker.SetTo(parentContext->io_mutex, false);
4924 tableSize = parentContext->table_size;
4925 } else
4926 tableSize = DEFAULT_FD_TABLE_SIZE;
4928 // allocate space for FDs and their close-on-exec flag
4929 context->fds = (file_descriptor**)malloc(
4930 sizeof(struct file_descriptor*) * tableSize
4931 + sizeof(struct select_sync*) * tableSize
4932 + (tableSize + 7) / 8);
4933 if (context->fds == NULL) {
4934 free(context);
4935 return NULL;
4938 context->select_infos = (select_info**)(context->fds + tableSize);
4939 context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4941 memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4942 + sizeof(struct select_sync*) * tableSize
4943 + (tableSize + 7) / 8);
4945 mutex_init(&context->io_mutex, "I/O context");
4947 // Copy all parent file descriptors
4949 if (parentContext != NULL) {
4950 size_t i;
4952 mutex_lock(&sIOContextRootLock);
4953 context->root = parentContext->root;
4954 if (context->root)
4955 inc_vnode_ref_count(context->root);
4956 mutex_unlock(&sIOContextRootLock);
4958 context->cwd = parentContext->cwd;
4959 if (context->cwd)
4960 inc_vnode_ref_count(context->cwd);
4962 if (parentContext->inherit_fds) {
4963 for (i = 0; i < tableSize; i++) {
4964 struct file_descriptor* descriptor = parentContext->fds[i];
4966 if (descriptor != NULL
4967 && (descriptor->open_mode & O_DISCONNECTED) == 0) {
4968 bool closeOnExec = fd_close_on_exec(parentContext, i);
4969 if (closeOnExec && purgeCloseOnExec)
4970 continue;
4972 TFD(InheritFD(context, i, descriptor, parentContext));
4974 context->fds[i] = descriptor;
4975 context->num_used_fds++;
4976 atomic_add(&descriptor->ref_count, 1);
4977 atomic_add(&descriptor->open_count, 1);
4979 if (closeOnExec)
4980 fd_set_close_on_exec(context, i, true);
4985 parentLocker.Unlock();
4986 } else {
4987 context->root = sRoot;
4988 context->cwd = sRoot;
4990 if (context->root)
4991 inc_vnode_ref_count(context->root);
4993 if (context->cwd)
4994 inc_vnode_ref_count(context->cwd);
4997 context->table_size = tableSize;
4998 context->inherit_fds = parentContext != NULL;
5000 list_init(&context->node_monitors);
5001 context->max_monitors = DEFAULT_NODE_MONITORS;
5003 return context;
5007 void
5008 vfs_get_io_context(io_context* context)
5010 atomic_add(&context->ref_count, 1);
5014 void
5015 vfs_put_io_context(io_context* context)
5017 if (atomic_add(&context->ref_count, -1) == 1)
5018 free_io_context(context);
5022 status_t
5023 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5025 if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5026 return B_BAD_VALUE;
5028 TIOC(ResizeIOContext(context, newSize));
5030 MutexLocker _(context->io_mutex);
5032 uint32 oldSize = context->table_size;
5033 int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5034 int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5036 // If the tables shrink, make sure none of the fds being dropped are in use.
5037 if (newSize < oldSize) {
5038 for (uint32 i = oldSize; i-- > newSize;) {
5039 if (context->fds[i])
5040 return B_BUSY;
5044 // store pointers to the old tables
5045 file_descriptor** oldFDs = context->fds;
5046 select_info** oldSelectInfos = context->select_infos;
5047 uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5049 // allocate new tables
5050 file_descriptor** newFDs = (file_descriptor**)malloc(
5051 sizeof(struct file_descriptor*) * newSize
5052 + sizeof(struct select_sync*) * newSize
5053 + newCloseOnExitBitmapSize);
5054 if (newFDs == NULL)
5055 return B_NO_MEMORY;
5057 context->fds = newFDs;
5058 context->select_infos = (select_info**)(context->fds + newSize);
5059 context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5060 context->table_size = newSize;
5062 // copy entries from old tables
5063 uint32 toCopy = min_c(oldSize, newSize);
5065 memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5066 memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5067 memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5068 min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5070 // clear additional entries, if the tables grow
5071 if (newSize > oldSize) {
5072 memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5073 memset(context->select_infos + oldSize, 0,
5074 sizeof(void*) * (newSize - oldSize));
5075 memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5076 newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5079 free(oldFDs);
5081 return B_OK;
5085 /*! \brief Resolves a vnode to the vnode it is covered by, if any.
5087 Given an arbitrary vnode (identified by mount and node ID), the function
5088 checks, whether the vnode is covered by another vnode. If it is, the
5089 function returns the mount and node ID of the covering vnode. Otherwise
5090 it simply returns the supplied mount and node ID.
5092 In case of error (e.g. the supplied node could not be found) the variables
5093 for storing the resolved mount and node ID remain untouched and an error
5094 code is returned.
5096 \param mountID The mount ID of the vnode in question.
5097 \param nodeID The node ID of the vnode in question.
5098 \param resolvedMountID Pointer to storage for the resolved mount ID.
5099 \param resolvedNodeID Pointer to storage for the resolved node ID.
5100 \return
5101 - \c B_OK, if everything went fine,
5102 - another error code, if something went wrong.
5104 status_t
5105 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5106 dev_t* resolvedMountID, ino_t* resolvedNodeID)
5108 // get the node
5109 struct vnode* node;
5110 status_t error = get_vnode(mountID, nodeID, &node, true, false);
5111 if (error != B_OK)
5112 return error;
5114 // resolve the node
5115 if (Vnode* coveringNode = get_covering_vnode(node)) {
5116 put_vnode(node);
5117 node = coveringNode;
5120 // set the return values
5121 *resolvedMountID = node->device;
5122 *resolvedNodeID = node->id;
5124 put_vnode(node);
5126 return B_OK;
5130 status_t
5131 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5132 ino_t* _mountPointNodeID)
5134 ReadLocker nodeLocker(sVnodeLock);
5135 MutexLocker mountLocker(sMountMutex);
5137 struct fs_mount* mount = find_mount(mountID);
5138 if (mount == NULL)
5139 return B_BAD_VALUE;
5141 Vnode* mountPoint = mount->covers_vnode;
5143 *_mountPointMountID = mountPoint->device;
5144 *_mountPointNodeID = mountPoint->id;
5146 return B_OK;
5150 status_t
5151 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5152 ino_t coveredNodeID)
5154 // get the vnodes
5155 Vnode* vnode;
5156 status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5157 if (error != B_OK)
5158 return B_BAD_VALUE;
5159 VNodePutter vnodePutter(vnode);
5161 Vnode* coveredVnode;
5162 error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5163 false);
5164 if (error != B_OK)
5165 return B_BAD_VALUE;
5166 VNodePutter coveredVnodePutter(coveredVnode);
5168 // establish the covered/covering links
5169 WriteLocker locker(sVnodeLock);
5171 if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5172 || vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5173 return B_BUSY;
5176 vnode->covers = coveredVnode;
5177 vnode->SetCovering(true);
5179 coveredVnode->covered_by = vnode;
5180 coveredVnode->SetCovered(true);
5182 // the vnodes do now reference each other
5183 inc_vnode_ref_count(vnode);
5184 inc_vnode_ref_count(coveredVnode);
5186 return B_OK;
5191 vfs_getrlimit(int resource, struct rlimit* rlp)
5193 if (!rlp)
5194 return B_BAD_ADDRESS;
5196 switch (resource) {
5197 case RLIMIT_NOFILE:
5199 struct io_context* context = get_current_io_context(false);
5200 MutexLocker _(context->io_mutex);
5202 rlp->rlim_cur = context->table_size;
5203 rlp->rlim_max = MAX_FD_TABLE_SIZE;
5204 return 0;
5207 case RLIMIT_NOVMON:
5209 struct io_context* context = get_current_io_context(false);
5210 MutexLocker _(context->io_mutex);
5212 rlp->rlim_cur = context->max_monitors;
5213 rlp->rlim_max = MAX_NODE_MONITORS;
5214 return 0;
5217 default:
5218 return B_BAD_VALUE;
5224 vfs_setrlimit(int resource, const struct rlimit* rlp)
5226 if (!rlp)
5227 return B_BAD_ADDRESS;
5229 switch (resource) {
5230 case RLIMIT_NOFILE:
5231 /* TODO: check getuid() */
5232 if (rlp->rlim_max != RLIM_SAVED_MAX
5233 && rlp->rlim_max != MAX_FD_TABLE_SIZE)
5234 return B_NOT_ALLOWED;
5236 return vfs_resize_fd_table(get_current_io_context(false),
5237 rlp->rlim_cur);
5239 case RLIMIT_NOVMON:
5240 /* TODO: check getuid() */
5241 if (rlp->rlim_max != RLIM_SAVED_MAX
5242 && rlp->rlim_max != MAX_NODE_MONITORS)
5243 return B_NOT_ALLOWED;
5245 return resize_monitor_table(get_current_io_context(false),
5246 rlp->rlim_cur);
5248 default:
5249 return B_BAD_VALUE;
5254 status_t
5255 vfs_init(kernel_args* args)
5257 vnode::StaticInit();
5259 sVnodeTable = new(std::nothrow) VnodeTable();
5260 if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5261 panic("vfs_init: error creating vnode hash table\n");
5263 struct vnode dummy_vnode;
5264 list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5266 struct fs_mount dummyMount;
5267 sMountsTable = new(std::nothrow) MountTable();
5268 if (sMountsTable == NULL
5269 || sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5270 panic("vfs_init: error creating mounts hash table\n");
5272 node_monitor_init();
5274 sRoot = NULL;
5276 recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5278 if (block_cache_init() != B_OK)
5279 return B_ERROR;
5281 #ifdef ADD_DEBUGGER_COMMANDS
5282 // add some debugger commands
5283 add_debugger_command_etc("vnode", &dump_vnode,
5284 "Print info about the specified vnode",
5285 "[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5286 "Prints information about the vnode specified by address <vnode> or\n"
5287 "<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5288 "constructed and printed. It might not be possible to construct a\n"
5289 "complete path, though.\n",
5291 add_debugger_command("vnodes", &dump_vnodes,
5292 "list all vnodes (from the specified device)");
5293 add_debugger_command("vnode_caches", &dump_vnode_caches,
5294 "list all vnode caches");
5295 add_debugger_command("mount", &dump_mount,
5296 "info about the specified fs_mount");
5297 add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5298 add_debugger_command("io_context", &dump_io_context,
5299 "info about the I/O context");
5300 add_debugger_command("vnode_usage", &dump_vnode_usage,
5301 "info about vnode usage");
5302 #endif
5304 register_low_resource_handler(&vnode_low_resource_handler, NULL,
5305 B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5306 | B_KERNEL_RESOURCE_ADDRESS_SPACE,
5309 fifo_init();
5310 file_map_init();
5312 return file_cache_init();
5316 // #pragma mark - fd_ops implementations
5320 Calls fs_open() on the given vnode and returns a new
5321 file descriptor for it
5323 static int
5324 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5326 void* cookie;
5327 status_t status = FS_CALL(vnode, open, openMode, &cookie);
5328 if (status != B_OK)
5329 return status;
5331 int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5332 if (fd < 0) {
5333 FS_CALL(vnode, close, cookie);
5334 FS_CALL(vnode, free_cookie, cookie);
5336 return fd;
5341 Calls fs_open() on the given vnode and returns a new
5342 file descriptor for it
5344 static int
5345 create_vnode(struct vnode* directory, const char* name, int openMode,
5346 int perms, bool kernel)
5348 bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5349 status_t status = B_ERROR;
5350 struct vnode* vnode;
5351 void* cookie;
5352 ino_t newID;
5354 // This is somewhat tricky: If the entry already exists, the FS responsible
5355 // for the directory might not necessarily also be the one responsible for
5356 // the node the entry refers to (e.g. in case of mount points or FIFOs). So
5357 // we can actually never call the create() hook without O_EXCL. Instead we
5358 // try to look the entry up first. If it already exists, we just open the
5359 // node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5360 // introduces a race condition, since someone else might have created the
5361 // entry in the meantime. We hope the respective FS returns the correct
5362 // error code and retry (up to 3 times) again.
5364 for (int i = 0; i < 3 && status != B_OK; i++) {
5365 // look the node up
5366 status = lookup_dir_entry(directory, name, &vnode);
5367 if (status == B_OK) {
5368 VNodePutter putter(vnode);
5370 if ((openMode & O_EXCL) != 0)
5371 return B_FILE_EXISTS;
5373 // If the node is a symlink, we have to follow it, unless
5374 // O_NOTRAVERSE is set.
5375 if (S_ISLNK(vnode->Type()) && traverse) {
5376 putter.Put();
5377 char clonedName[B_FILE_NAME_LENGTH + 1];
5378 if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5379 >= B_FILE_NAME_LENGTH) {
5380 return B_NAME_TOO_LONG;
5383 inc_vnode_ref_count(directory);
5384 status = vnode_path_to_vnode(directory, clonedName, true, 0,
5385 kernel, &vnode, NULL);
5386 if (status != B_OK)
5387 return status;
5389 putter.SetTo(vnode);
5392 if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5393 return B_LINK_LIMIT;
5395 int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5396 // on success keep the vnode reference for the FD
5397 if (fd >= 0)
5398 putter.Detach();
5400 return fd;
5403 // it doesn't exist yet -- try to create it
5405 if (!HAS_FS_CALL(directory, create))
5406 return B_READ_ONLY_DEVICE;
5408 status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5409 &cookie, &newID);
5410 if (status != B_OK
5411 && ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5412 return status;
5416 if (status != B_OK)
5417 return status;
5419 // the node has been created successfully
5421 rw_lock_read_lock(&sVnodeLock);
5422 vnode = lookup_vnode(directory->device, newID);
5423 rw_lock_read_unlock(&sVnodeLock);
5425 if (vnode == NULL) {
5426 panic("vfs: fs_create() returned success but there is no vnode, "
5427 "mount ID %" B_PRIdDEV "!\n", directory->device);
5428 return B_BAD_VALUE;
5431 int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5432 if (fd >= 0)
5433 return fd;
5435 status = fd;
5437 // something went wrong, clean up
5439 FS_CALL(vnode, close, cookie);
5440 FS_CALL(vnode, free_cookie, cookie);
5441 put_vnode(vnode);
5443 FS_CALL(directory, unlink, name);
5445 return status;
5449 /*! Calls fs open_dir() on the given vnode and returns a new
5450 file descriptor for it
5452 static int
5453 open_dir_vnode(struct vnode* vnode, bool kernel)
5455 void* cookie;
5456 status_t status = FS_CALL(vnode, open_dir, &cookie);
5457 if (status != B_OK)
5458 return status;
5460 // directory is opened, create a fd
5461 status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5462 if (status >= 0)
5463 return status;
5465 FS_CALL(vnode, close_dir, cookie);
5466 FS_CALL(vnode, free_dir_cookie, cookie);
5468 return status;
5472 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5473 file descriptor for it.
5474 Used by attr_dir_open(), and attr_dir_open_fd().
5476 static int
5477 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5479 if (!HAS_FS_CALL(vnode, open_attr_dir))
5480 return B_UNSUPPORTED;
5482 void* cookie;
5483 status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5484 if (status != B_OK)
5485 return status;
5487 // directory is opened, create a fd
5488 status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5489 kernel);
5490 if (status >= 0)
5491 return status;
5493 FS_CALL(vnode, close_attr_dir, cookie);
5494 FS_CALL(vnode, free_attr_dir_cookie, cookie);
5496 return status;
5500 static int
5501 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5502 int openMode, int perms, bool kernel)
5504 FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5505 "kernel %d\n", name, openMode, perms, kernel));
5507 // get directory to put the new file in
5508 struct vnode* directory;
5509 status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5510 if (status != B_OK)
5511 return status;
5513 status = create_vnode(directory, name, openMode, perms, kernel);
5514 put_vnode(directory);
5516 return status;
5520 static int
5521 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5523 FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5524 openMode, perms, kernel));
5526 // get directory to put the new file in
5527 char name[B_FILE_NAME_LENGTH];
5528 struct vnode* directory;
5529 status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5530 kernel);
5531 if (status < 0)
5532 return status;
5534 status = create_vnode(directory, name, openMode, perms, kernel);
5536 put_vnode(directory);
5537 return status;
5541 static int
5542 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5543 int openMode, bool kernel)
5545 if (name == NULL || *name == '\0')
5546 return B_BAD_VALUE;
5548 FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5549 "openMode = %d)\n", mountID, directoryID, name, openMode));
5551 bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5553 // get the vnode matching the entry_ref
5554 struct vnode* vnode;
5555 status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5556 kernel, &vnode);
5557 if (status != B_OK)
5558 return status;
5560 if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5561 put_vnode(vnode);
5562 return B_LINK_LIMIT;
5565 int newFD = open_vnode(vnode, openMode, kernel);
5566 if (newFD >= 0) {
5567 // The vnode reference has been transferred to the FD
5568 cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5569 directoryID, vnode->id, name);
5570 } else
5571 put_vnode(vnode);
5573 return newFD;
5577 static int
5578 file_open(int fd, char* path, int openMode, bool kernel)
5580 bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5582 FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5583 fd, path, openMode, kernel));
5585 // get the vnode matching the vnode + path combination
5586 struct vnode* vnode;
5587 ino_t parentID;
5588 status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5589 &parentID, kernel);
5590 if (status != B_OK)
5591 return status;
5593 if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5594 put_vnode(vnode);
5595 return B_LINK_LIMIT;
5598 // open the vnode
5599 int newFD = open_vnode(vnode, openMode, kernel);
5600 if (newFD >= 0) {
5601 // The vnode reference has been transferred to the FD
5602 cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5603 vnode->device, parentID, vnode->id, NULL);
5604 } else
5605 put_vnode(vnode);
5607 return newFD;
5611 static status_t
5612 file_close(struct file_descriptor* descriptor)
5614 struct vnode* vnode = descriptor->u.vnode;
5615 status_t status = B_OK;
5617 FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5619 cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5620 vnode->id);
5621 if (HAS_FS_CALL(vnode, close)) {
5622 status = FS_CALL(vnode, close, descriptor->cookie);
5625 if (status == B_OK) {
5626 // remove all outstanding locks for this team
5627 if (HAS_FS_CALL(vnode, release_lock))
5628 status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5629 else
5630 status = release_advisory_lock(vnode, NULL);
5632 return status;
5636 static void
5637 file_free_fd(struct file_descriptor* descriptor)
5639 struct vnode* vnode = descriptor->u.vnode;
5641 if (vnode != NULL) {
5642 FS_CALL(vnode, free_cookie, descriptor->cookie);
5643 put_vnode(vnode);
5648 static status_t
5649 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5650 size_t* length)
5652 struct vnode* vnode = descriptor->u.vnode;
5653 FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5654 pos, length, *length));
5656 if (S_ISDIR(vnode->Type()))
5657 return B_IS_A_DIRECTORY;
5659 return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5663 static status_t
5664 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5665 size_t* length)
5667 struct vnode* vnode = descriptor->u.vnode;
5668 FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5669 length));
5671 if (S_ISDIR(vnode->Type()))
5672 return B_IS_A_DIRECTORY;
5673 if (!HAS_FS_CALL(vnode, write))
5674 return B_READ_ONLY_DEVICE;
5676 return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5680 static off_t
5681 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5683 struct vnode* vnode = descriptor->u.vnode;
5684 off_t offset;
5685 bool isDevice = false;
5687 FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5688 seekType));
5690 // some kinds of files are not seekable
5691 switch (vnode->Type() & S_IFMT) {
5692 case S_IFIFO:
5693 case S_IFSOCK:
5694 return ESPIPE;
5696 // drivers publish block devices as chr, so pick both
5697 case S_IFBLK:
5698 case S_IFCHR:
5699 isDevice = true;
5700 break;
5701 // The Open Group Base Specs don't mention any file types besides pipes,
5702 // fifos, and sockets specially, so we allow seeking them.
5703 case S_IFREG:
5704 case S_IFDIR:
5705 case S_IFLNK:
5706 break;
5709 switch (seekType) {
5710 case SEEK_SET:
5711 offset = 0;
5712 break;
5713 case SEEK_CUR:
5714 offset = descriptor->pos;
5715 break;
5716 case SEEK_END:
5718 // stat() the node
5719 if (!HAS_FS_CALL(vnode, read_stat))
5720 return B_UNSUPPORTED;
5722 struct stat stat;
5723 status_t status = FS_CALL(vnode, read_stat, &stat);
5724 if (status != B_OK)
5725 return status;
5727 offset = stat.st_size;
5729 if (offset == 0 && isDevice) {
5730 // stat() on regular drivers doesn't report size
5731 device_geometry geometry;
5733 if (HAS_FS_CALL(vnode, ioctl)) {
5734 status = FS_CALL(vnode, ioctl, descriptor->cookie,
5735 B_GET_GEOMETRY, &geometry, sizeof(geometry));
5736 if (status == B_OK)
5737 offset = (off_t)geometry.bytes_per_sector
5738 * geometry.sectors_per_track
5739 * geometry.cylinder_count
5740 * geometry.head_count;
5744 break;
5746 default:
5747 return B_BAD_VALUE;
5750 // assumes off_t is 64 bits wide
5751 if (offset > 0 && LONGLONG_MAX - offset < pos)
5752 return B_BUFFER_OVERFLOW;
5754 pos += offset;
5755 if (pos < 0)
5756 return B_BAD_VALUE;
5758 return descriptor->pos = pos;
5762 static status_t
5763 file_select(struct file_descriptor* descriptor, uint8 event,
5764 struct selectsync* sync)
5766 FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5768 struct vnode* vnode = descriptor->u.vnode;
5770 // If the FS has no select() hook, notify select() now.
5771 if (!HAS_FS_CALL(vnode, select))
5772 return notify_select_event(sync, event);
5774 return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5778 static status_t
5779 file_deselect(struct file_descriptor* descriptor, uint8 event,
5780 struct selectsync* sync)
5782 struct vnode* vnode = descriptor->u.vnode;
5784 if (!HAS_FS_CALL(vnode, deselect))
5785 return B_OK;
5787 return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5791 static status_t
5792 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5793 bool kernel)
5795 struct vnode* vnode;
5796 status_t status;
5798 if (name == NULL || *name == '\0')
5799 return B_BAD_VALUE;
5801 FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5802 "name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5804 status = get_vnode(mountID, parentID, &vnode, true, false);
5805 if (status != B_OK)
5806 return status;
5808 if (HAS_FS_CALL(vnode, create_dir))
5809 status = FS_CALL(vnode, create_dir, name, perms);
5810 else
5811 status = B_READ_ONLY_DEVICE;
5813 put_vnode(vnode);
5814 return status;
5818 static status_t
5819 dir_create(int fd, char* path, int perms, bool kernel)
5821 char filename[B_FILE_NAME_LENGTH];
5822 struct vnode* vnode;
5823 status_t status;
5825 FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5826 kernel));
5828 status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5829 if (status < 0)
5830 return status;
5832 if (HAS_FS_CALL(vnode, create_dir)) {
5833 status = FS_CALL(vnode, create_dir, filename, perms);
5834 } else
5835 status = B_READ_ONLY_DEVICE;
5837 put_vnode(vnode);
5838 return status;
5842 static int
5843 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5845 FUNCTION(("dir_open_entry_ref()\n"));
5847 if (name && name[0] == '\0')
5848 return B_BAD_VALUE;
5850 // get the vnode matching the entry_ref/node_ref
5851 struct vnode* vnode;
5852 status_t status;
5853 if (name) {
5854 status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5855 &vnode);
5856 } else
5857 status = get_vnode(mountID, parentID, &vnode, true, false);
5858 if (status != B_OK)
5859 return status;
5861 int newFD = open_dir_vnode(vnode, kernel);
5862 if (newFD >= 0) {
5863 // The vnode reference has been transferred to the FD
5864 cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5865 vnode->id, name);
5866 } else
5867 put_vnode(vnode);
5869 return newFD;
5873 static int
5874 dir_open(int fd, char* path, bool kernel)
5876 FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5877 kernel));
5879 // get the vnode matching the vnode + path combination
5880 struct vnode* vnode = NULL;
5881 ino_t parentID;
5882 status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5883 kernel);
5884 if (status != B_OK)
5885 return status;
5887 // open the dir
5888 int newFD = open_dir_vnode(vnode, kernel);
5889 if (newFD >= 0) {
5890 // The vnode reference has been transferred to the FD
5891 cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5892 parentID, vnode->id, NULL);
5893 } else
5894 put_vnode(vnode);
5896 return newFD;
5900 static status_t
5901 dir_close(struct file_descriptor* descriptor)
5903 struct vnode* vnode = descriptor->u.vnode;
5905 FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5907 cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5908 vnode->id);
5909 if (HAS_FS_CALL(vnode, close_dir))
5910 return FS_CALL(vnode, close_dir, descriptor->cookie);
5912 return B_OK;
5916 static void
5917 dir_free_fd(struct file_descriptor* descriptor)
5919 struct vnode* vnode = descriptor->u.vnode;
5921 if (vnode != NULL) {
5922 FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5923 put_vnode(vnode);
5928 static status_t
5929 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5930 struct dirent* buffer, size_t bufferSize, uint32* _count)
5932 return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5933 bufferSize, _count);
5937 static status_t
5938 fix_dirent(struct vnode* parent, struct dirent* entry,
5939 struct io_context* ioContext)
5941 // set d_pdev and d_pino
5942 entry->d_pdev = parent->device;
5943 entry->d_pino = parent->id;
5945 // If this is the ".." entry and the directory covering another vnode,
5946 // we need to replace d_dev and d_ino with the actual values.
5947 if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5948 return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
5949 ioContext);
5952 // resolve covered vnodes
5953 ReadLocker _(&sVnodeLock);
5955 struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5956 if (vnode != NULL && vnode->covered_by != NULL) {
5957 do {
5958 vnode = vnode->covered_by;
5959 } while (vnode->covered_by != NULL);
5961 entry->d_dev = vnode->device;
5962 entry->d_ino = vnode->id;
5965 return B_OK;
5969 static status_t
5970 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5971 struct dirent* buffer, size_t bufferSize, uint32* _count)
5973 if (!HAS_FS_CALL(vnode, read_dir))
5974 return B_UNSUPPORTED;
5976 status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5977 _count);
5978 if (error != B_OK)
5979 return error;
5981 // we need to adjust the read dirents
5982 uint32 count = *_count;
5983 for (uint32 i = 0; i < count; i++) {
5984 error = fix_dirent(vnode, buffer, ioContext);
5985 if (error != B_OK)
5986 return error;
5988 buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5991 return error;
5995 static status_t
5996 dir_rewind(struct file_descriptor* descriptor)
5998 struct vnode* vnode = descriptor->u.vnode;
6000 if (HAS_FS_CALL(vnode, rewind_dir)) {
6001 return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6004 return B_UNSUPPORTED;
6008 static status_t
6009 dir_remove(int fd, char* path, bool kernel)
6011 char name[B_FILE_NAME_LENGTH];
6012 struct vnode* directory;
6013 status_t status;
6015 if (path != NULL) {
6016 // we need to make sure our path name doesn't stop with "/", ".",
6017 // or ".."
6018 char* lastSlash;
6019 while ((lastSlash = strrchr(path, '/')) != NULL) {
6020 char* leaf = lastSlash + 1;
6021 if (!strcmp(leaf, ".."))
6022 return B_NOT_ALLOWED;
6024 // omit multiple slashes
6025 while (lastSlash > path && lastSlash[-1] == '/')
6026 lastSlash--;
6028 if (leaf[0]
6029 && strcmp(leaf, ".")) {
6030 break;
6032 // "name/" -> "name", or "name/." -> "name"
6033 lastSlash[0] = '\0';
6036 if (!strcmp(path, ".") || !strcmp(path, ".."))
6037 return B_NOT_ALLOWED;
6040 status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6041 if (status != B_OK)
6042 return status;
6044 if (HAS_FS_CALL(directory, remove_dir))
6045 status = FS_CALL(directory, remove_dir, name);
6046 else
6047 status = B_READ_ONLY_DEVICE;
6049 put_vnode(directory);
6050 return status;
6054 static status_t
6055 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6056 size_t length)
6058 struct vnode* vnode = descriptor->u.vnode;
6060 if (HAS_FS_CALL(vnode, ioctl))
6061 return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6063 return B_DEV_INVALID_IOCTL;
6067 static status_t
6068 common_fcntl(int fd, int op, size_t argument, bool kernel)
6070 struct flock flock;
6072 FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6073 fd, op, argument, kernel ? "kernel" : "user"));
6075 struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
6076 fd);
6077 if (descriptor == NULL)
6078 return B_FILE_ERROR;
6080 struct vnode* vnode = fd_vnode(descriptor);
6082 status_t status = B_OK;
6084 if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6085 if (descriptor->type != FDTYPE_FILE)
6086 status = B_BAD_VALUE;
6087 else if (user_memcpy(&flock, (struct flock*)argument,
6088 sizeof(struct flock)) != B_OK)
6089 status = B_BAD_ADDRESS;
6091 if (status != B_OK) {
6092 put_fd(descriptor);
6093 return status;
6097 switch (op) {
6098 case F_SETFD:
6100 struct io_context* context = get_current_io_context(kernel);
6101 // Set file descriptor flags
6103 // O_CLOEXEC is the only flag available at this time
6104 mutex_lock(&context->io_mutex);
6105 fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6106 mutex_unlock(&context->io_mutex);
6108 status = B_OK;
6109 break;
6112 case F_GETFD:
6114 struct io_context* context = get_current_io_context(kernel);
6116 // Get file descriptor flags
6117 mutex_lock(&context->io_mutex);
6118 status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6119 mutex_unlock(&context->io_mutex);
6120 break;
6123 case F_SETFL:
6124 // Set file descriptor open mode
6126 // we only accept changes to O_APPEND and O_NONBLOCK
6127 argument &= O_APPEND | O_NONBLOCK;
6128 if (descriptor->ops->fd_set_flags != NULL) {
6129 status = descriptor->ops->fd_set_flags(descriptor, argument);
6130 } else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6131 status = FS_CALL(vnode, set_flags, descriptor->cookie,
6132 (int)argument);
6133 } else
6134 status = B_UNSUPPORTED;
6136 if (status == B_OK) {
6137 // update this descriptor's open_mode field
6138 descriptor->open_mode = (descriptor->open_mode
6139 & ~(O_APPEND | O_NONBLOCK)) | argument;
6142 break;
6144 case F_GETFL:
6145 // Get file descriptor open mode
6146 status = descriptor->open_mode;
6147 break;
6149 case F_DUPFD:
6150 case F_DUPFD_CLOEXEC:
6152 struct io_context* context = get_current_io_context(kernel);
6154 status = new_fd_etc(context, descriptor, (int)argument);
6155 if (status >= 0) {
6156 mutex_lock(&context->io_mutex);
6157 fd_set_close_on_exec(context, fd, op == F_DUPFD_CLOEXEC);
6158 mutex_unlock(&context->io_mutex);
6160 atomic_add(&descriptor->ref_count, 1);
6162 break;
6165 case F_GETLK:
6166 if (vnode != NULL) {
6167 struct flock normalizedLock;
6169 memcpy(&normalizedLock, &flock, sizeof(struct flock));
6170 status = normalize_flock(descriptor, &normalizedLock);
6171 if (status != B_OK)
6172 break;
6174 if (HAS_FS_CALL(vnode, test_lock)) {
6175 status = FS_CALL(vnode, test_lock, descriptor->cookie,
6176 &normalizedLock);
6177 } else
6178 status = test_advisory_lock(vnode, &normalizedLock);
6179 if (status == B_OK) {
6180 if (normalizedLock.l_type == F_UNLCK) {
6181 // no conflicting lock found, copy back the same struct
6182 // we were given except change type to F_UNLCK
6183 flock.l_type = F_UNLCK;
6184 status = user_memcpy((struct flock*)argument, &flock,
6185 sizeof(struct flock));
6186 } else {
6187 // a conflicting lock was found, copy back its range and
6188 // type
6189 if (normalizedLock.l_len == OFF_MAX)
6190 normalizedLock.l_len = 0;
6192 status = user_memcpy((struct flock*)argument,
6193 &normalizedLock, sizeof(struct flock));
6196 } else
6197 status = B_BAD_VALUE;
6198 break;
6200 case F_SETLK:
6201 case F_SETLKW:
6202 status = normalize_flock(descriptor, &flock);
6203 if (status != B_OK)
6204 break;
6206 if (vnode == NULL) {
6207 status = B_BAD_VALUE;
6208 } else if (flock.l_type == F_UNLCK) {
6209 if (HAS_FS_CALL(vnode, release_lock)) {
6210 status = FS_CALL(vnode, release_lock, descriptor->cookie,
6211 &flock);
6212 } else
6213 status = release_advisory_lock(vnode, &flock);
6214 } else {
6215 // the open mode must match the lock type
6216 if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6217 && flock.l_type == F_WRLCK)
6218 || ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6219 && flock.l_type == F_RDLCK))
6220 status = B_FILE_ERROR;
6221 else {
6222 if (HAS_FS_CALL(vnode, acquire_lock)) {
6223 status = FS_CALL(vnode, acquire_lock,
6224 descriptor->cookie, &flock, op == F_SETLKW);
6225 } else {
6226 status = acquire_advisory_lock(vnode, -1,
6227 &flock, op == F_SETLKW);
6231 break;
6233 // ToDo: add support for more ops?
6235 default:
6236 status = B_BAD_VALUE;
6239 put_fd(descriptor);
6240 return status;
6244 static status_t
6245 common_sync(int fd, bool kernel)
6247 struct file_descriptor* descriptor;
6248 struct vnode* vnode;
6249 status_t status;
6251 FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6253 descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6254 if (descriptor == NULL)
6255 return B_FILE_ERROR;
6257 if (HAS_FS_CALL(vnode, fsync))
6258 status = FS_CALL_NO_PARAMS(vnode, fsync);
6259 else
6260 status = B_UNSUPPORTED;
6262 put_fd(descriptor);
6263 return status;
6267 static status_t
6268 common_lock_node(int fd, bool kernel)
6270 struct file_descriptor* descriptor;
6271 struct vnode* vnode;
6273 descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6274 if (descriptor == NULL)
6275 return B_FILE_ERROR;
6277 status_t status = B_OK;
6279 // We need to set the locking atomically - someone
6280 // else might set one at the same time
6281 if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6282 (file_descriptor*)NULL) != NULL)
6283 status = B_BUSY;
6285 put_fd(descriptor);
6286 return status;
6290 static status_t
6291 common_unlock_node(int fd, bool kernel)
6293 struct file_descriptor* descriptor;
6294 struct vnode* vnode;
6296 descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6297 if (descriptor == NULL)
6298 return B_FILE_ERROR;
6300 status_t status = B_OK;
6302 // We need to set the locking atomically - someone
6303 // else might set one at the same time
6304 if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6305 (file_descriptor*)NULL, descriptor) != descriptor)
6306 status = B_BAD_VALUE;
6308 put_fd(descriptor);
6309 return status;
6313 static status_t
6314 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6315 bool kernel)
6317 struct vnode* vnode;
6318 status_t status;
6320 status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6321 if (status != B_OK)
6322 return status;
6324 if (HAS_FS_CALL(vnode, read_symlink)) {
6325 status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6326 } else
6327 status = B_BAD_VALUE;
6329 put_vnode(vnode);
6330 return status;
6334 static status_t
6335 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6336 bool kernel)
6338 // path validity checks have to be in the calling function!
6339 char name[B_FILE_NAME_LENGTH];
6340 struct vnode* vnode;
6341 status_t status;
6343 FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6344 "mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6346 status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6347 if (status != B_OK)
6348 return status;
6350 if (HAS_FS_CALL(vnode, create_symlink))
6351 status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6352 else {
6353 status = HAS_FS_CALL(vnode, write)
6354 ? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6357 put_vnode(vnode);
6359 return status;
6363 static status_t
6364 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6365 bool traverseLeafLink, bool kernel)
6367 // path validity checks have to be in the calling function!
6369 FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6370 toPath, kernel));
6372 char name[B_FILE_NAME_LENGTH];
6373 struct vnode* directory;
6374 status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6375 kernel);
6376 if (status != B_OK)
6377 return status;
6379 struct vnode* vnode;
6380 status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6381 kernel);
6382 if (status != B_OK)
6383 goto err;
6385 if (directory->mount != vnode->mount) {
6386 status = B_CROSS_DEVICE_LINK;
6387 goto err1;
6390 if (HAS_FS_CALL(directory, link))
6391 status = FS_CALL(directory, link, name, vnode);
6392 else
6393 status = B_READ_ONLY_DEVICE;
6395 err1:
6396 put_vnode(vnode);
6397 err:
6398 put_vnode(directory);
6400 return status;
6404 static status_t
6405 common_unlink(int fd, char* path, bool kernel)
6407 char filename[B_FILE_NAME_LENGTH];
6408 struct vnode* vnode;
6409 status_t status;
6411 FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6412 kernel));
6414 status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6415 if (status < 0)
6416 return status;
6418 if (HAS_FS_CALL(vnode, unlink))
6419 status = FS_CALL(vnode, unlink, filename);
6420 else
6421 status = B_READ_ONLY_DEVICE;
6423 put_vnode(vnode);
6425 return status;
6429 static status_t
6430 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6432 struct vnode* vnode;
6433 status_t status;
6435 // TODO: honor effectiveUserGroup argument
6437 status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6438 if (status != B_OK)
6439 return status;
6441 if (HAS_FS_CALL(vnode, access))
6442 status = FS_CALL(vnode, access, mode);
6443 else
6444 status = B_OK;
6446 put_vnode(vnode);
6448 return status;
6452 static status_t
6453 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6455 struct vnode* fromVnode;
6456 struct vnode* toVnode;
6457 char fromName[B_FILE_NAME_LENGTH];
6458 char toName[B_FILE_NAME_LENGTH];
6459 status_t status;
6461 FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6462 "kernel = %d)\n", fd, path, newFD, newPath, kernel));
6464 status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6465 if (status != B_OK)
6466 return status;
6468 status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6469 if (status != B_OK)
6470 goto err1;
6472 if (fromVnode->device != toVnode->device) {
6473 status = B_CROSS_DEVICE_LINK;
6474 goto err2;
6477 if (fromName[0] == '\0' || toName[0] == '\0'
6478 || !strcmp(fromName, ".") || !strcmp(fromName, "..")
6479 || !strcmp(toName, ".") || !strcmp(toName, "..")
6480 || (fromVnode == toVnode && !strcmp(fromName, toName))) {
6481 status = B_BAD_VALUE;
6482 goto err2;
6485 if (HAS_FS_CALL(fromVnode, rename))
6486 status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6487 else
6488 status = B_READ_ONLY_DEVICE;
6490 err2:
6491 put_vnode(toVnode);
6492 err1:
6493 put_vnode(fromVnode);
6495 return status;
6499 static status_t
6500 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6502 struct vnode* vnode = descriptor->u.vnode;
6504 FUNCTION(("common_read_stat: stat %p\n", stat));
6506 // TODO: remove this once all file systems properly set them!
6507 stat->st_crtim.tv_nsec = 0;
6508 stat->st_ctim.tv_nsec = 0;
6509 stat->st_mtim.tv_nsec = 0;
6510 stat->st_atim.tv_nsec = 0;
6512 return vfs_stat_vnode(vnode, stat);
6516 static status_t
6517 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6518 int statMask)
6520 struct vnode* vnode = descriptor->u.vnode;
6522 FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6523 vnode, stat, statMask));
6525 if (!HAS_FS_CALL(vnode, write_stat))
6526 return B_READ_ONLY_DEVICE;
6528 return FS_CALL(vnode, write_stat, stat, statMask);
6532 static status_t
6533 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6534 struct stat* stat, bool kernel)
6536 FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6537 stat));
6539 struct vnode* vnode;
6540 status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6541 NULL, kernel);
6542 if (status != B_OK)
6543 return status;
6545 status = vfs_stat_vnode(vnode, stat);
6547 put_vnode(vnode);
6548 return status;
6552 static status_t
6553 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6554 const struct stat* stat, int statMask, bool kernel)
6556 FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6557 "kernel %d\n", fd, path, stat, statMask, kernel));
6559 struct vnode* vnode;
6560 status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6561 NULL, kernel);
6562 if (status != B_OK)
6563 return status;
6565 if (HAS_FS_CALL(vnode, write_stat))
6566 status = FS_CALL(vnode, write_stat, stat, statMask);
6567 else
6568 status = B_READ_ONLY_DEVICE;
6570 put_vnode(vnode);
6572 return status;
6576 static int
6577 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6579 FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6580 kernel));
6582 struct vnode* vnode;
6583 status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6584 NULL, kernel);
6585 if (status != B_OK)
6586 return status;
6588 status = open_attr_dir_vnode(vnode, kernel);
6589 if (status < 0)
6590 put_vnode(vnode);
6592 return status;
6596 static status_t
6597 attr_dir_close(struct file_descriptor* descriptor)
6599 struct vnode* vnode = descriptor->u.vnode;
6601 FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6603 if (HAS_FS_CALL(vnode, close_attr_dir))
6604 return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6606 return B_OK;
6610 static void
6611 attr_dir_free_fd(struct file_descriptor* descriptor)
6613 struct vnode* vnode = descriptor->u.vnode;
6615 if (vnode != NULL) {
6616 FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6617 put_vnode(vnode);
6622 static status_t
6623 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6624 struct dirent* buffer, size_t bufferSize, uint32* _count)
6626 struct vnode* vnode = descriptor->u.vnode;
6628 FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6630 if (HAS_FS_CALL(vnode, read_attr_dir))
6631 return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6632 bufferSize, _count);
6634 return B_UNSUPPORTED;
6638 static status_t
6639 attr_dir_rewind(struct file_descriptor* descriptor)
6641 struct vnode* vnode = descriptor->u.vnode;
6643 FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6645 if (HAS_FS_CALL(vnode, rewind_attr_dir))
6646 return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6648 return B_UNSUPPORTED;
6652 static int
6653 attr_create(int fd, char* path, const char* name, uint32 type,
6654 int openMode, bool kernel)
6656 if (name == NULL || *name == '\0')
6657 return B_BAD_VALUE;
6659 bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6660 struct vnode* vnode;
6661 status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6662 kernel);
6663 if (status != B_OK)
6664 return status;
6666 if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6667 status = B_LINK_LIMIT;
6668 goto err;
6671 if (!HAS_FS_CALL(vnode, create_attr)) {
6672 status = B_READ_ONLY_DEVICE;
6673 goto err;
6676 void* cookie;
6677 status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6678 if (status != B_OK)
6679 goto err;
6681 fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6682 if (fd >= 0)
6683 return fd;
6685 status = fd;
6687 FS_CALL(vnode, close_attr, cookie);
6688 FS_CALL(vnode, free_attr_cookie, cookie);
6690 FS_CALL(vnode, remove_attr, name);
6692 err:
6693 put_vnode(vnode);
6695 return status;
6699 static int
6700 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6702 if (name == NULL || *name == '\0')
6703 return B_BAD_VALUE;
6705 bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6706 struct vnode* vnode;
6707 status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6708 kernel);
6709 if (status != B_OK)
6710 return status;
6712 if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6713 status = B_LINK_LIMIT;
6714 goto err;
6717 if (!HAS_FS_CALL(vnode, open_attr)) {
6718 status = B_UNSUPPORTED;
6719 goto err;
6722 void* cookie;
6723 status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6724 if (status != B_OK)
6725 goto err;
6727 // now we only need a file descriptor for this attribute and we're done
6728 fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6729 if (fd >= 0)
6730 return fd;
6732 status = fd;
6734 FS_CALL(vnode, close_attr, cookie);
6735 FS_CALL(vnode, free_attr_cookie, cookie);
6737 err:
6738 put_vnode(vnode);
6740 return status;
6744 static status_t
6745 attr_close(struct file_descriptor* descriptor)
6747 struct vnode* vnode = descriptor->u.vnode;
6749 FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6751 if (HAS_FS_CALL(vnode, close_attr))
6752 return FS_CALL(vnode, close_attr, descriptor->cookie);
6754 return B_OK;
6758 static void
6759 attr_free_fd(struct file_descriptor* descriptor)
6761 struct vnode* vnode = descriptor->u.vnode;
6763 if (vnode != NULL) {
6764 FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6765 put_vnode(vnode);
6770 static status_t
6771 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6772 size_t* length)
6774 struct vnode* vnode = descriptor->u.vnode;
6776 FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6777 pos, length, *length));
6779 if (!HAS_FS_CALL(vnode, read_attr))
6780 return B_UNSUPPORTED;
6782 return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6786 static status_t
6787 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6788 size_t* length)
6790 struct vnode* vnode = descriptor->u.vnode;
6792 FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6793 length));
6795 if (!HAS_FS_CALL(vnode, write_attr))
6796 return B_UNSUPPORTED;
6798 return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6802 static off_t
6803 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6805 off_t offset;
6807 switch (seekType) {
6808 case SEEK_SET:
6809 offset = 0;
6810 break;
6811 case SEEK_CUR:
6812 offset = descriptor->pos;
6813 break;
6814 case SEEK_END:
6816 struct vnode* vnode = descriptor->u.vnode;
6817 if (!HAS_FS_CALL(vnode, read_stat))
6818 return B_UNSUPPORTED;
6820 struct stat stat;
6821 status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6822 &stat);
6823 if (status != B_OK)
6824 return status;
6826 offset = stat.st_size;
6827 break;
6829 default:
6830 return B_BAD_VALUE;
6833 // assumes off_t is 64 bits wide
6834 if (offset > 0 && LONGLONG_MAX - offset < pos)
6835 return B_BUFFER_OVERFLOW;
6837 pos += offset;
6838 if (pos < 0)
6839 return B_BAD_VALUE;
6841 return descriptor->pos = pos;
6845 static status_t
6846 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6848 struct vnode* vnode = descriptor->u.vnode;
6850 FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6852 if (!HAS_FS_CALL(vnode, read_attr_stat))
6853 return B_UNSUPPORTED;
6855 return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6859 static status_t
6860 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6861 int statMask)
6863 struct vnode* vnode = descriptor->u.vnode;
6865 FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6867 if (!HAS_FS_CALL(vnode, write_attr_stat))
6868 return B_READ_ONLY_DEVICE;
6870 return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6874 static status_t
6875 attr_remove(int fd, const char* name, bool kernel)
6877 struct file_descriptor* descriptor;
6878 struct vnode* vnode;
6879 status_t status;
6881 if (name == NULL || *name == '\0')
6882 return B_BAD_VALUE;
6884 FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6885 kernel));
6887 descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6888 if (descriptor == NULL)
6889 return B_FILE_ERROR;
6891 if (HAS_FS_CALL(vnode, remove_attr))
6892 status = FS_CALL(vnode, remove_attr, name);
6893 else
6894 status = B_READ_ONLY_DEVICE;
6896 put_fd(descriptor);
6898 return status;
6902 static status_t
6903 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6904 bool kernel)
6906 struct file_descriptor* fromDescriptor;
6907 struct file_descriptor* toDescriptor;
6908 struct vnode* fromVnode;
6909 struct vnode* toVnode;
6910 status_t status;
6912 if (fromName == NULL || *fromName == '\0' || toName == NULL
6913 || *toName == '\0')
6914 return B_BAD_VALUE;
6916 FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6917 "name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6919 fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6920 if (fromDescriptor == NULL)
6921 return B_FILE_ERROR;
6923 toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6924 if (toDescriptor == NULL) {
6925 status = B_FILE_ERROR;
6926 goto err;
6929 // are the files on the same volume?
6930 if (fromVnode->device != toVnode->device) {
6931 status = B_CROSS_DEVICE_LINK;
6932 goto err1;
6935 if (HAS_FS_CALL(fromVnode, rename_attr)) {
6936 status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6937 } else
6938 status = B_READ_ONLY_DEVICE;
6940 err1:
6941 put_fd(toDescriptor);
6942 err:
6943 put_fd(fromDescriptor);
6945 return status;
6949 static int
6950 index_dir_open(dev_t mountID, bool kernel)
6952 struct fs_mount* mount;
6953 void* cookie;
6955 FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
6956 kernel));
6958 status_t status = get_mount(mountID, &mount);
6959 if (status != B_OK)
6960 return status;
6962 if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6963 status = B_UNSUPPORTED;
6964 goto error;
6967 status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6968 if (status != B_OK)
6969 goto error;
6971 // get fd for the index directory
6972 int fd;
6973 fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6974 if (fd >= 0)
6975 return fd;
6977 // something went wrong
6978 FS_MOUNT_CALL(mount, close_index_dir, cookie);
6979 FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6981 status = fd;
6983 error:
6984 put_mount(mount);
6985 return status;
6989 static status_t
6990 index_dir_close(struct file_descriptor* descriptor)
6992 struct fs_mount* mount = descriptor->u.mount;
6994 FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6996 if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6997 return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6999 return B_OK;
7003 static void
7004 index_dir_free_fd(struct file_descriptor* descriptor)
7006 struct fs_mount* mount = descriptor->u.mount;
7008 if (mount != NULL) {
7009 FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7010 put_mount(mount);
7015 static status_t
7016 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7017 struct dirent* buffer, size_t bufferSize, uint32* _count)
7019 struct fs_mount* mount = descriptor->u.mount;
7021 if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7022 return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7023 bufferSize, _count);
7026 return B_UNSUPPORTED;
7030 static status_t
7031 index_dir_rewind(struct file_descriptor* descriptor)
7033 struct fs_mount* mount = descriptor->u.mount;
7035 if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7036 return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7038 return B_UNSUPPORTED;
7042 static status_t
7043 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7044 bool kernel)
7046 FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7047 mountID, name, kernel));
7049 struct fs_mount* mount;
7050 status_t status = get_mount(mountID, &mount);
7051 if (status != B_OK)
7052 return status;
7054 if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7055 status = B_READ_ONLY_DEVICE;
7056 goto out;
7059 status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7061 out:
7062 put_mount(mount);
7063 return status;
7067 #if 0
7068 static status_t
7069 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7071 struct vnode* vnode = descriptor->u.vnode;
7073 // ToDo: currently unused!
7074 FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7075 if (!HAS_FS_CALL(vnode, read_index_stat))
7076 return B_UNSUPPORTED;
7078 return B_UNSUPPORTED;
7079 //return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7083 static void
7084 index_free_fd(struct file_descriptor* descriptor)
7086 struct vnode* vnode = descriptor->u.vnode;
7088 if (vnode != NULL) {
7089 FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7090 put_vnode(vnode);
7093 #endif
7096 static status_t
7097 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7098 bool kernel)
7100 FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7101 mountID, name, kernel));
7103 struct fs_mount* mount;
7104 status_t status = get_mount(mountID, &mount);
7105 if (status != B_OK)
7106 return status;
7108 if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7109 status = B_UNSUPPORTED;
7110 goto out;
7113 status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7115 out:
7116 put_mount(mount);
7117 return status;
7121 static status_t
7122 index_remove(dev_t mountID, const char* name, bool kernel)
7124 FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7125 mountID, name, kernel));
7127 struct fs_mount* mount;
7128 status_t status = get_mount(mountID, &mount);
7129 if (status != B_OK)
7130 return status;
7132 if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7133 status = B_READ_ONLY_DEVICE;
7134 goto out;
7137 status = FS_MOUNT_CALL(mount, remove_index, name);
7139 out:
7140 put_mount(mount);
7141 return status;
7145 /*! TODO: the query FS API is still the pretty much the same as in R5.
7146 It would be nice if the FS would find some more kernel support
7147 for them.
7148 For example, query parsing should be moved into the kernel.
7150 static int
7151 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7152 int32 token, bool kernel)
7154 struct fs_mount* mount;
7155 void* cookie;
7157 FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7158 device, query, kernel));
7160 status_t status = get_mount(device, &mount);
7161 if (status != B_OK)
7162 return status;
7164 if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7165 status = B_UNSUPPORTED;
7166 goto error;
7169 status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7170 &cookie);
7171 if (status != B_OK)
7172 goto error;
7174 // get fd for the index directory
7175 int fd;
7176 fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7177 if (fd >= 0)
7178 return fd;
7180 status = fd;
7182 // something went wrong
7183 FS_MOUNT_CALL(mount, close_query, cookie);
7184 FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7186 error:
7187 put_mount(mount);
7188 return status;
7192 static status_t
7193 query_close(struct file_descriptor* descriptor)
7195 struct fs_mount* mount = descriptor->u.mount;
7197 FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7199 if (HAS_FS_MOUNT_CALL(mount, close_query))
7200 return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7202 return B_OK;
7206 static void
7207 query_free_fd(struct file_descriptor* descriptor)
7209 struct fs_mount* mount = descriptor->u.mount;
7211 if (mount != NULL) {
7212 FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7213 put_mount(mount);
7218 static status_t
7219 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7220 struct dirent* buffer, size_t bufferSize, uint32* _count)
7222 struct fs_mount* mount = descriptor->u.mount;
7224 if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7225 return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7226 bufferSize, _count);
7229 return B_UNSUPPORTED;
7233 static status_t
7234 query_rewind(struct file_descriptor* descriptor)
7236 struct fs_mount* mount = descriptor->u.mount;
7238 if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7239 return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7241 return B_UNSUPPORTED;
7245 // #pragma mark - General File System functions
7248 static dev_t
7249 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7250 const char* args, bool kernel)
7252 struct ::fs_mount* mount;
7253 status_t status = B_OK;
7254 fs_volume* volume = NULL;
7255 int32 layer = 0;
7256 Vnode* coveredNode = NULL;
7258 FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7259 B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7261 // The path is always safe, we just have to make sure that fsName is
7262 // almost valid - we can't make any assumptions about args, though.
7263 // A NULL fsName is OK, if a device was given and the FS is not virtual.
7264 // We'll get it from the DDM later.
7265 if (fsName == NULL) {
7266 if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7267 return B_BAD_VALUE;
7268 } else if (fsName[0] == '\0')
7269 return B_BAD_VALUE;
7271 RecursiveLocker mountOpLocker(sMountOpLock);
7273 // Helper to delete a newly created file device on failure.
7274 // Not exactly beautiful, but helps to keep the code below cleaner.
7275 struct FileDeviceDeleter {
7276 FileDeviceDeleter() : id(-1) {}
7277 ~FileDeviceDeleter()
7279 KDiskDeviceManager::Default()->DeleteFileDevice(id);
7282 partition_id id;
7283 } fileDeviceDeleter;
7285 // If the file system is not a "virtual" one, the device argument should
7286 // point to a real file/device (if given at all).
7287 // get the partition
7288 KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7289 KPartition* partition = NULL;
7290 KPath normalizedDevice;
7291 bool newlyCreatedFileDevice = false;
7293 if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7294 // normalize the device path
7295 status = normalizedDevice.SetTo(device, true);
7296 if (status != B_OK)
7297 return status;
7299 // get a corresponding partition from the DDM
7300 partition = ddm->RegisterPartition(normalizedDevice.Path());
7301 if (partition == NULL) {
7302 // Partition not found: This either means, the user supplied
7303 // an invalid path, or the path refers to an image file. We try
7304 // to let the DDM create a file device for the path.
7305 partition_id deviceID = ddm->CreateFileDevice(
7306 normalizedDevice.Path(), &newlyCreatedFileDevice);
7307 if (deviceID >= 0) {
7308 partition = ddm->RegisterPartition(deviceID);
7309 if (newlyCreatedFileDevice)
7310 fileDeviceDeleter.id = deviceID;
7314 if (!partition) {
7315 TRACE(("fs_mount(): Partition `%s' not found.\n",
7316 normalizedDevice.Path()));
7317 return B_ENTRY_NOT_FOUND;
7320 device = normalizedDevice.Path();
7321 // correct path to file device
7323 PartitionRegistrar partitionRegistrar(partition, true);
7325 // Write lock the partition's device. For the time being, we keep the lock
7326 // until we're done mounting -- not nice, but ensure, that no-one is
7327 // interfering.
7328 // TODO: Just mark the partition busy while mounting!
7329 KDiskDevice* diskDevice = NULL;
7330 if (partition) {
7331 diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7332 if (!diskDevice) {
7333 TRACE(("fs_mount(): Failed to lock disk device!\n"));
7334 return B_ERROR;
7338 DeviceWriteLocker writeLocker(diskDevice, true);
7339 // this takes over the write lock acquired before
7341 if (partition != NULL) {
7342 // make sure, that the partition is not busy
7343 if (partition->IsBusy()) {
7344 TRACE(("fs_mount(): Partition is busy.\n"));
7345 return B_BUSY;
7348 // if no FS name had been supplied, we get it from the partition
7349 if (fsName == NULL) {
7350 KDiskSystem* diskSystem = partition->DiskSystem();
7351 if (!diskSystem) {
7352 TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7353 "recognize it.\n"));
7354 return B_BAD_VALUE;
7357 if (!diskSystem->IsFileSystem()) {
7358 TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7359 "partitioning system.\n"));
7360 return B_BAD_VALUE;
7363 // The disk system name will not change, and the KDiskSystem
7364 // object will not go away while the disk device is locked (and
7365 // the partition has a reference to it), so this is safe.
7366 fsName = diskSystem->Name();
7370 mount = new(std::nothrow) (struct ::fs_mount);
7371 if (mount == NULL)
7372 return B_NO_MEMORY;
7374 mount->device_name = strdup(device);
7375 // "device" can be NULL
7377 status = mount->entry_cache.Init();
7378 if (status != B_OK)
7379 goto err1;
7381 // initialize structure
7382 mount->id = sNextMountID++;
7383 mount->partition = NULL;
7384 mount->root_vnode = NULL;
7385 mount->covers_vnode = NULL;
7386 mount->unmounting = false;
7387 mount->owns_file_device = false;
7388 mount->volume = NULL;
7390 // build up the volume(s)
7391 while (true) {
7392 char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7393 if (layerFSName == NULL) {
7394 if (layer == 0) {
7395 status = B_NO_MEMORY;
7396 goto err1;
7399 break;
7401 MemoryDeleter layerFSNameDeleter(layerFSName);
7403 volume = (fs_volume*)malloc(sizeof(fs_volume));
7404 if (volume == NULL) {
7405 status = B_NO_MEMORY;
7406 goto err1;
7409 volume->id = mount->id;
7410 volume->partition = partition != NULL ? partition->ID() : -1;
7411 volume->layer = layer++;
7412 volume->private_volume = NULL;
7413 volume->ops = NULL;
7414 volume->sub_volume = NULL;
7415 volume->super_volume = NULL;
7416 volume->file_system = NULL;
7417 volume->file_system_name = NULL;
7419 volume->file_system_name = get_file_system_name(layerFSName);
7420 if (volume->file_system_name == NULL) {
7421 status = B_NO_MEMORY;
7422 free(volume);
7423 goto err1;
7426 volume->file_system = get_file_system(layerFSName);
7427 if (volume->file_system == NULL) {
7428 status = B_DEVICE_NOT_FOUND;
7429 free(volume->file_system_name);
7430 free(volume);
7431 goto err1;
7434 if (mount->volume == NULL)
7435 mount->volume = volume;
7436 else {
7437 volume->super_volume = mount->volume;
7438 mount->volume->sub_volume = volume;
7439 mount->volume = volume;
7443 // insert mount struct into list before we call FS's mount() function
7444 // so that vnodes can be created for this mount
7445 mutex_lock(&sMountMutex);
7446 sMountsTable->Insert(mount);
7447 mutex_unlock(&sMountMutex);
7449 ino_t rootID;
7451 if (!sRoot) {
7452 // we haven't mounted anything yet
7453 if (strcmp(path, "/") != 0) {
7454 status = B_ERROR;
7455 goto err2;
7458 status = mount->volume->file_system->mount(mount->volume, device, flags,
7459 args, &rootID);
7460 if (status != 0)
7461 goto err2;
7462 } else {
7463 status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7464 if (status != B_OK)
7465 goto err2;
7467 mount->covers_vnode = coveredNode;
7469 // make sure covered_vnode is a directory
7470 if (!S_ISDIR(coveredNode->Type())) {
7471 status = B_NOT_A_DIRECTORY;
7472 goto err3;
7475 if (coveredNode->IsCovered()) {
7476 // this is already a covered vnode
7477 status = B_BUSY;
7478 goto err3;
7481 // mount it/them
7482 fs_volume* volume = mount->volume;
7483 while (volume) {
7484 status = volume->file_system->mount(volume, device, flags, args,
7485 &rootID);
7486 if (status != B_OK) {
7487 if (volume->sub_volume)
7488 goto err4;
7489 goto err3;
7492 volume = volume->super_volume;
7495 volume = mount->volume;
7496 while (volume) {
7497 if (volume->ops->all_layers_mounted != NULL)
7498 volume->ops->all_layers_mounted(volume);
7499 volume = volume->super_volume;
7503 // the root node is supposed to be owned by the file system - it must
7504 // exist at this point
7505 mount->root_vnode = lookup_vnode(mount->id, rootID);
7506 if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7507 panic("fs_mount: file system does not own its root node!\n");
7508 status = B_ERROR;
7509 goto err4;
7512 // set up the links between the root vnode and the vnode it covers
7513 rw_lock_write_lock(&sVnodeLock);
7514 if (coveredNode != NULL) {
7515 if (coveredNode->IsCovered()) {
7516 // the vnode is covered now
7517 status = B_BUSY;
7518 rw_lock_write_unlock(&sVnodeLock);
7519 goto err4;
7522 mount->root_vnode->covers = coveredNode;
7523 mount->root_vnode->SetCovering(true);
7525 coveredNode->covered_by = mount->root_vnode;
7526 coveredNode->SetCovered(true);
7528 rw_lock_write_unlock(&sVnodeLock);
7530 if (!sRoot) {
7531 sRoot = mount->root_vnode;
7532 mutex_lock(&sIOContextRootLock);
7533 get_current_io_context(true)->root = sRoot;
7534 mutex_unlock(&sIOContextRootLock);
7535 inc_vnode_ref_count(sRoot);
7538 // supply the partition (if any) with the mount cookie and mark it mounted
7539 if (partition) {
7540 partition->SetMountCookie(mount->volume->private_volume);
7541 partition->SetVolumeID(mount->id);
7543 // keep a partition reference as long as the partition is mounted
7544 partitionRegistrar.Detach();
7545 mount->partition = partition;
7546 mount->owns_file_device = newlyCreatedFileDevice;
7547 fileDeviceDeleter.id = -1;
7550 notify_mount(mount->id,
7551 coveredNode != NULL ? coveredNode->device : -1,
7552 coveredNode ? coveredNode->id : -1);
7554 return mount->id;
7556 err4:
7557 FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7558 err3:
7559 if (coveredNode != NULL)
7560 put_vnode(coveredNode);
7561 err2:
7562 mutex_lock(&sMountMutex);
7563 sMountsTable->Remove(mount);
7564 mutex_unlock(&sMountMutex);
7565 err1:
7566 delete mount;
7568 return status;
7572 static status_t
7573 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7575 struct fs_mount* mount;
7576 status_t err;
7578 FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7579 mountID, kernel));
7581 struct vnode* pathVnode = NULL;
7582 if (path != NULL) {
7583 err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7584 if (err != B_OK)
7585 return B_ENTRY_NOT_FOUND;
7588 RecursiveLocker mountOpLocker(sMountOpLock);
7590 // this lock is not strictly necessary, but here in case of KDEBUG
7591 // to keep the ASSERT in find_mount() working.
7592 KDEBUG_ONLY(mutex_lock(&sMountMutex));
7593 mount = find_mount(path != NULL ? pathVnode->device : mountID);
7594 KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7595 if (mount == NULL) {
7596 panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7597 pathVnode);
7600 if (path != NULL) {
7601 put_vnode(pathVnode);
7603 if (mount->root_vnode != pathVnode) {
7604 // not mountpoint
7605 return B_BAD_VALUE;
7609 // if the volume is associated with a partition, lock the device of the
7610 // partition as long as we are unmounting
7611 KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7612 KPartition* partition = mount->partition;
7613 KDiskDevice* diskDevice = NULL;
7614 if (partition != NULL) {
7615 if (partition->Device() == NULL) {
7616 dprintf("fs_unmount(): There is no device!\n");
7617 return B_ERROR;
7619 diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7620 if (!diskDevice) {
7621 TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7622 return B_ERROR;
7625 DeviceWriteLocker writeLocker(diskDevice, true);
7627 // make sure, that the partition is not busy
7628 if (partition != NULL) {
7629 if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7630 TRACE(("fs_unmount(): Partition is busy.\n"));
7631 return B_BUSY;
7635 // grab the vnode master mutex to keep someone from creating
7636 // a vnode while we're figuring out if we can continue
7637 WriteLocker vnodesWriteLocker(&sVnodeLock);
7639 bool disconnectedDescriptors = false;
7641 while (true) {
7642 bool busy = false;
7644 // cycle through the list of vnodes associated with this mount and
7645 // make sure all of them are not busy or have refs on them
7646 VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7647 while (struct vnode* vnode = iterator.Next()) {
7648 if (vnode->IsBusy()) {
7649 busy = true;
7650 break;
7653 // check the vnode's ref count -- subtract additional references for
7654 // covering
7655 int32 refCount = vnode->ref_count;
7656 if (vnode->covers != NULL)
7657 refCount--;
7658 if (vnode->covered_by != NULL)
7659 refCount--;
7661 if (refCount != 0) {
7662 // there are still vnodes in use on this mount, so we cannot
7663 // unmount yet
7664 busy = true;
7665 break;
7669 if (!busy)
7670 break;
7672 if ((flags & B_FORCE_UNMOUNT) == 0)
7673 return B_BUSY;
7675 if (disconnectedDescriptors) {
7676 // wait a bit until the last access is finished, and then try again
7677 vnodesWriteLocker.Unlock();
7678 snooze(100000);
7679 // TODO: if there is some kind of bug that prevents the ref counts
7680 // from getting back to zero, this will fall into an endless loop...
7681 vnodesWriteLocker.Lock();
7682 continue;
7685 // the file system is still busy - but we're forced to unmount it,
7686 // so let's disconnect all open file descriptors
7688 mount->unmounting = true;
7689 // prevent new vnodes from being created
7691 vnodesWriteLocker.Unlock();
7693 disconnect_mount_or_vnode_fds(mount, NULL);
7694 disconnectedDescriptors = true;
7696 vnodesWriteLocker.Lock();
7699 // We can safely continue. Mark all of the vnodes busy and this mount
7700 // structure in unmounting state. Also undo the vnode covers/covered_by
7701 // links.
7702 mount->unmounting = true;
7704 VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7705 while (struct vnode* vnode = iterator.Next()) {
7706 // Remove all covers/covered_by links from other mounts' nodes to this
7707 // vnode and adjust the node ref count accordingly. We will release the
7708 // references to the external vnodes below.
7709 if (Vnode* coveredNode = vnode->covers) {
7710 if (Vnode* coveringNode = vnode->covered_by) {
7711 // We have both covered and covering vnodes, so just remove us
7712 // from the chain.
7713 coveredNode->covered_by = coveringNode;
7714 coveringNode->covers = coveredNode;
7715 vnode->ref_count -= 2;
7717 vnode->covered_by = NULL;
7718 vnode->covers = NULL;
7719 vnode->SetCovering(false);
7720 vnode->SetCovered(false);
7721 } else {
7722 // We only have a covered vnode. Remove its link to us.
7723 coveredNode->covered_by = NULL;
7724 coveredNode->SetCovered(false);
7725 vnode->ref_count--;
7727 // If the other node is an external vnode, we keep its link
7728 // link around so we can put the reference later on. Otherwise
7729 // we get rid of it right now.
7730 if (coveredNode->mount == mount) {
7731 vnode->covers = NULL;
7732 coveredNode->ref_count--;
7735 } else if (Vnode* coveringNode = vnode->covered_by) {
7736 // We only have a covering vnode. Remove its link to us.
7737 coveringNode->covers = NULL;
7738 coveringNode->SetCovering(false);
7739 vnode->ref_count--;
7741 // If the other node is an external vnode, we keep its link
7742 // link around so we can put the reference later on. Otherwise
7743 // we get rid of it right now.
7744 if (coveringNode->mount == mount) {
7745 vnode->covered_by = NULL;
7746 coveringNode->ref_count--;
7750 vnode->SetBusy(true);
7751 vnode_to_be_freed(vnode);
7754 vnodesWriteLocker.Unlock();
7756 // Free all vnodes associated with this mount.
7757 // They will be removed from the mount list by free_vnode(), so
7758 // we don't have to do this.
7759 while (struct vnode* vnode = mount->vnodes.Head()) {
7760 // Put the references to external covered/covering vnodes we kept above.
7761 if (Vnode* coveredNode = vnode->covers)
7762 put_vnode(coveredNode);
7763 if (Vnode* coveringNode = vnode->covered_by)
7764 put_vnode(coveringNode);
7766 free_vnode(vnode, false);
7769 // remove the mount structure from the hash table
7770 mutex_lock(&sMountMutex);
7771 sMountsTable->Remove(mount);
7772 mutex_unlock(&sMountMutex);
7774 mountOpLocker.Unlock();
7776 FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7777 notify_unmount(mount->id);
7779 // dereference the partition and mark it unmounted
7780 if (partition) {
7781 partition->SetVolumeID(-1);
7782 partition->SetMountCookie(NULL);
7784 if (mount->owns_file_device)
7785 KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7786 partition->Unregister();
7789 delete mount;
7790 return B_OK;
7794 static status_t
7795 fs_sync(dev_t device)
7797 struct fs_mount* mount;
7798 status_t status = get_mount(device, &mount);
7799 if (status != B_OK)
7800 return status;
7802 struct vnode marker;
7803 memset(&marker, 0, sizeof(marker));
7804 marker.SetBusy(true);
7805 marker.SetRemoved(true);
7807 // First, synchronize all file caches
7809 while (true) {
7810 WriteLocker locker(sVnodeLock);
7811 // Note: That's the easy way. Which is probably OK for sync(),
7812 // since it's a relatively rare call and doesn't need to allow for
7813 // a lot of concurrency. Using a read lock would be possible, but
7814 // also more involved, since we had to lock the individual nodes
7815 // and take care of the locking order, which we might not want to
7816 // do while holding fs_mount::rlock.
7818 // synchronize access to vnode list
7819 recursive_lock_lock(&mount->rlock);
7821 struct vnode* vnode;
7822 if (!marker.IsRemoved()) {
7823 vnode = mount->vnodes.GetNext(&marker);
7824 mount->vnodes.Remove(&marker);
7825 marker.SetRemoved(true);
7826 } else
7827 vnode = mount->vnodes.First();
7829 while (vnode != NULL && (vnode->cache == NULL
7830 || vnode->IsRemoved() || vnode->IsBusy())) {
7831 // TODO: we could track writes (and writable mapped vnodes)
7832 // and have a simple flag that we could test for here
7833 vnode = mount->vnodes.GetNext(vnode);
7836 if (vnode != NULL) {
7837 // insert marker vnode again
7838 mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7839 marker.SetRemoved(false);
7842 recursive_lock_unlock(&mount->rlock);
7844 if (vnode == NULL)
7845 break;
7847 vnode = lookup_vnode(mount->id, vnode->id);
7848 if (vnode == NULL || vnode->IsBusy())
7849 continue;
7851 if (vnode->ref_count == 0) {
7852 // this vnode has been unused before
7853 vnode_used(vnode);
7855 inc_vnode_ref_count(vnode);
7857 locker.Unlock();
7859 if (vnode->cache != NULL && !vnode->IsRemoved())
7860 vnode->cache->WriteModified();
7862 put_vnode(vnode);
7865 // And then, let the file systems do their synchronizing work
7867 if (HAS_FS_MOUNT_CALL(mount, sync))
7868 status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7870 put_mount(mount);
7871 return status;
7875 static status_t
7876 fs_read_info(dev_t device, struct fs_info* info)
7878 struct fs_mount* mount;
7879 status_t status = get_mount(device, &mount);
7880 if (status != B_OK)
7881 return status;
7883 memset(info, 0, sizeof(struct fs_info));
7885 if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7886 status = FS_MOUNT_CALL(mount, read_fs_info, info);
7888 // fill in info the file system doesn't (have to) know about
7889 if (status == B_OK) {
7890 info->dev = mount->id;
7891 info->root = mount->root_vnode->id;
7893 fs_volume* volume = mount->volume;
7894 while (volume->super_volume != NULL)
7895 volume = volume->super_volume;
7897 strlcpy(info->fsh_name, volume->file_system_name,
7898 sizeof(info->fsh_name));
7899 if (mount->device_name != NULL) {
7900 strlcpy(info->device_name, mount->device_name,
7901 sizeof(info->device_name));
7905 // if the call is not supported by the file system, there are still
7906 // the parts that we filled out ourselves
7908 put_mount(mount);
7909 return status;
7913 static status_t
7914 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7916 struct fs_mount* mount;
7917 status_t status = get_mount(device, &mount);
7918 if (status != B_OK)
7919 return status;
7921 if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7922 status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7923 else
7924 status = B_READ_ONLY_DEVICE;
7926 put_mount(mount);
7927 return status;
7931 static dev_t
7932 fs_next_device(int32* _cookie)
7934 struct fs_mount* mount = NULL;
7935 dev_t device = *_cookie;
7937 mutex_lock(&sMountMutex);
7939 // Since device IDs are assigned sequentially, this algorithm
7940 // does work good enough. It makes sure that the device list
7941 // returned is sorted, and that no device is skipped when an
7942 // already visited device got unmounted.
7944 while (device < sNextMountID) {
7945 mount = find_mount(device++);
7946 if (mount != NULL && mount->volume->private_volume != NULL)
7947 break;
7950 *_cookie = device;
7952 if (mount != NULL)
7953 device = mount->id;
7954 else
7955 device = B_BAD_VALUE;
7957 mutex_unlock(&sMountMutex);
7959 return device;
7963 ssize_t
7964 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7965 void *buffer, size_t readBytes)
7967 int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7968 if (attrFD < 0)
7969 return attrFD;
7971 ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7973 _kern_close(attrFD);
7975 return bytesRead;
7979 static status_t
7980 get_cwd(char* buffer, size_t size, bool kernel)
7982 // Get current working directory from io context
7983 struct io_context* context = get_current_io_context(kernel);
7984 status_t status;
7986 FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7988 mutex_lock(&context->io_mutex);
7990 struct vnode* vnode = context->cwd;
7991 if (vnode)
7992 inc_vnode_ref_count(vnode);
7994 mutex_unlock(&context->io_mutex);
7996 if (vnode) {
7997 status = dir_vnode_to_path(vnode, buffer, size, kernel);
7998 put_vnode(vnode);
7999 } else
8000 status = B_ERROR;
8002 return status;
8006 static status_t
8007 set_cwd(int fd, char* path, bool kernel)
8009 struct io_context* context;
8010 struct vnode* vnode = NULL;
8011 struct vnode* oldDirectory;
8012 status_t status;
8014 FUNCTION(("set_cwd: path = \'%s\'\n", path));
8016 // Get vnode for passed path, and bail if it failed
8017 status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8018 if (status < 0)
8019 return status;
8021 if (!S_ISDIR(vnode->Type())) {
8022 // nope, can't cwd to here
8023 status = B_NOT_A_DIRECTORY;
8024 goto err;
8027 // We need to have the permission to enter the directory, too
8028 if (HAS_FS_CALL(vnode, access)) {
8029 status = FS_CALL(vnode, access, X_OK);
8030 if (status != B_OK)
8031 goto err;
8034 // Get current io context and lock
8035 context = get_current_io_context(kernel);
8036 mutex_lock(&context->io_mutex);
8038 // save the old current working directory first
8039 oldDirectory = context->cwd;
8040 context->cwd = vnode;
8042 mutex_unlock(&context->io_mutex);
8044 if (oldDirectory)
8045 put_vnode(oldDirectory);
8047 return B_NO_ERROR;
8049 err:
8050 put_vnode(vnode);
8051 return status;
8055 // #pragma mark - kernel mirrored syscalls
8058 dev_t
8059 _kern_mount(const char* path, const char* device, const char* fsName,
8060 uint32 flags, const char* args, size_t argsLength)
8062 KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8063 if (pathBuffer.InitCheck() != B_OK)
8064 return B_NO_MEMORY;
8066 return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8070 status_t
8071 _kern_unmount(const char* path, uint32 flags)
8073 KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8074 if (pathBuffer.InitCheck() != B_OK)
8075 return B_NO_MEMORY;
8077 return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8081 status_t
8082 _kern_read_fs_info(dev_t device, struct fs_info* info)
8084 if (info == NULL)
8085 return B_BAD_VALUE;
8087 return fs_read_info(device, info);
8091 status_t
8092 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8094 if (info == NULL)
8095 return B_BAD_VALUE;
8097 return fs_write_info(device, info, mask);
8101 status_t
8102 _kern_sync(void)
8104 // Note: _kern_sync() is also called from _user_sync()
8105 int32 cookie = 0;
8106 dev_t device;
8107 while ((device = next_dev(&cookie)) >= 0) {
8108 status_t status = fs_sync(device);
8109 if (status != B_OK && status != B_BAD_VALUE) {
8110 dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8111 strerror(status));
8115 return B_OK;
8119 dev_t
8120 _kern_next_device(int32* _cookie)
8122 return fs_next_device(_cookie);
8126 status_t
8127 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8128 size_t infoSize)
8130 if (infoSize != sizeof(fd_info))
8131 return B_BAD_VALUE;
8133 // get the team
8134 Team* team = Team::Get(teamID);
8135 if (team == NULL)
8136 return B_BAD_TEAM_ID;
8137 BReference<Team> teamReference(team, true);
8139 // now that we have a team reference, its I/O context won't go away
8140 io_context* context = team->io_context;
8141 MutexLocker contextLocker(context->io_mutex);
8143 uint32 slot = *_cookie;
8145 struct file_descriptor* descriptor;
8146 while (slot < context->table_size
8147 && (descriptor = context->fds[slot]) == NULL) {
8148 slot++;
8151 if (slot >= context->table_size)
8152 return B_ENTRY_NOT_FOUND;
8154 info->number = slot;
8155 info->open_mode = descriptor->open_mode;
8157 struct vnode* vnode = fd_vnode(descriptor);
8158 if (vnode != NULL) {
8159 info->device = vnode->device;
8160 info->node = vnode->id;
8161 } else if (descriptor->u.mount != NULL) {
8162 info->device = descriptor->u.mount->id;
8163 info->node = -1;
8166 *_cookie = slot + 1;
8167 return B_OK;
8172 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8173 int perms)
8175 if ((openMode & O_CREAT) != 0) {
8176 return file_create_entry_ref(device, inode, name, openMode, perms,
8177 true);
8180 return file_open_entry_ref(device, inode, name, openMode, true);
8184 /*! \brief Opens a node specified by a FD + path pair.
8186 At least one of \a fd and \a path must be specified.
8187 If only \a fd is given, the function opens the node identified by this
8188 FD. If only a path is given, this path is opened. If both are given and
8189 the path is absolute, \a fd is ignored; a relative path is reckoned off
8190 of the directory (!) identified by \a fd.
8192 \param fd The FD. May be < 0.
8193 \param path The absolute or relative path. May be \c NULL.
8194 \param openMode The open mode.
8195 \return A FD referring to the newly opened node, or an error code,
8196 if an error occurs.
8199 _kern_open(int fd, const char* path, int openMode, int perms)
8201 KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8202 if (pathBuffer.InitCheck() != B_OK)
8203 return B_NO_MEMORY;
8205 if ((openMode & O_CREAT) != 0)
8206 return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8208 return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8212 /*! \brief Opens a directory specified by entry_ref or node_ref.
8214 The supplied name may be \c NULL, in which case directory identified
8215 by \a device and \a inode will be opened. Otherwise \a device and
8216 \a inode identify the parent directory of the directory to be opened
8217 and \a name its entry name.
8219 \param device If \a name is specified the ID of the device the parent
8220 directory of the directory to be opened resides on, otherwise
8221 the device of the directory itself.
8222 \param inode If \a name is specified the node ID of the parent
8223 directory of the directory to be opened, otherwise node ID of the
8224 directory itself.
8225 \param name The entry name of the directory to be opened. If \c NULL,
8226 the \a device + \a inode pair identify the node to be opened.
8227 \return The FD of the newly opened directory or an error code, if
8228 something went wrong.
8231 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8233 return dir_open_entry_ref(device, inode, name, true);
8237 /*! \brief Opens a directory specified by a FD + path pair.
8239 At least one of \a fd and \a path must be specified.
8240 If only \a fd is given, the function opens the directory identified by this
8241 FD. If only a path is given, this path is opened. If both are given and
8242 the path is absolute, \a fd is ignored; a relative path is reckoned off
8243 of the directory (!) identified by \a fd.
8245 \param fd The FD. May be < 0.
8246 \param path The absolute or relative path. May be \c NULL.
8247 \return A FD referring to the newly opened directory, or an error code,
8248 if an error occurs.
8251 _kern_open_dir(int fd, const char* path)
8253 KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8254 if (pathBuffer.InitCheck() != B_OK)
8255 return B_NO_MEMORY;
8257 return dir_open(fd, pathBuffer.LockBuffer(), true);
8261 status_t
8262 _kern_fcntl(int fd, int op, size_t argument)
8264 return common_fcntl(fd, op, argument, true);
8268 status_t
8269 _kern_fsync(int fd)
8271 return common_sync(fd, true);
8275 status_t
8276 _kern_lock_node(int fd)
8278 return common_lock_node(fd, true);
8282 status_t
8283 _kern_unlock_node(int fd)
8285 return common_unlock_node(fd, true);
8289 status_t
8290 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8291 int perms)
8293 return dir_create_entry_ref(device, inode, name, perms, true);
8297 /*! \brief Creates a directory specified by a FD + path pair.
8299 \a path must always be specified (it contains the name of the new directory
8300 at least). If only a path is given, this path identifies the location at
8301 which the directory shall be created. If both \a fd and \a path are given
8302 and the path is absolute, \a fd is ignored; a relative path is reckoned off
8303 of the directory (!) identified by \a fd.
8305 \param fd The FD. May be < 0.
8306 \param path The absolute or relative path. Must not be \c NULL.
8307 \param perms The access permissions the new directory shall have.
8308 \return \c B_OK, if the directory has been created successfully, another
8309 error code otherwise.
8311 status_t
8312 _kern_create_dir(int fd, const char* path, int perms)
8314 KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8315 if (pathBuffer.InitCheck() != B_OK)
8316 return B_NO_MEMORY;
8318 return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8322 status_t
8323 _kern_remove_dir(int fd, const char* path)
8325 KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8326 if (pathBuffer.InitCheck() != B_OK)
8327 return B_NO_MEMORY;
8329 return dir_remove(fd, pathBuffer.LockBuffer(), true);
8333 /*! \brief Reads the contents of a symlink referred to by a FD + path pair.
8335 At least one of \a fd and \a path must be specified.
8336 If only \a fd is given, the function the symlink to be read is the node
8337 identified by this FD. If only a path is given, this path identifies the
8338 symlink to be read. If both are given and the path is absolute, \a fd is
8339 ignored; a relative path is reckoned off of the directory (!) identified
8340 by \a fd.
8341 If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8342 will still be updated to reflect the required buffer size.
8344 \param fd The FD. May be < 0.
8345 \param path The absolute or relative path. May be \c NULL.
8346 \param buffer The buffer into which the contents of the symlink shall be
8347 written.
8348 \param _bufferSize A pointer to the size of the supplied buffer.
8349 \return The length of the link on success or an appropriate error code
8351 status_t
8352 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8354 KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8355 if (pathBuffer.InitCheck() != B_OK)
8356 return B_NO_MEMORY;
8358 return common_read_link(fd, pathBuffer.LockBuffer(),
8359 buffer, _bufferSize, true);
8363 /*! \brief Creates a symlink specified by a FD + path pair.
8365 \a path must always be specified (it contains the name of the new symlink
8366 at least). If only a path is given, this path identifies the location at
8367 which the symlink shall be created. If both \a fd and \a path are given and
8368 the path is absolute, \a fd is ignored; a relative path is reckoned off
8369 of the directory (!) identified by \a fd.
8371 \param fd The FD. May be < 0.
8372 \param toPath The absolute or relative path. Must not be \c NULL.
8373 \param mode The access permissions the new symlink shall have.
8374 \return \c B_OK, if the symlink has been created successfully, another
8375 error code otherwise.
8377 status_t
8378 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8380 KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8381 if (pathBuffer.InitCheck() != B_OK)
8382 return B_NO_MEMORY;
8384 return common_create_symlink(fd, pathBuffer.LockBuffer(),
8385 toPath, mode, true);
8389 status_t
8390 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8391 bool traverseLeafLink)
8393 KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8394 KPath toPathBuffer(toPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8395 if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8396 return B_NO_MEMORY;
8398 return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8399 toPathBuffer.LockBuffer(), traverseLeafLink, true);
8403 /*! \brief Removes an entry specified by a FD + path pair from its directory.
8405 \a path must always be specified (it contains at least the name of the entry
8406 to be deleted). If only a path is given, this path identifies the entry
8407 directly. If both \a fd and \a path are given and the path is absolute,
8408 \a fd is ignored; a relative path is reckoned off of the directory (!)
8409 identified by \a fd.
8411 \param fd The FD. May be < 0.
8412 \param path The absolute or relative path. Must not be \c NULL.
8413 \return \c B_OK, if the entry has been removed successfully, another
8414 error code otherwise.
8416 status_t
8417 _kern_unlink(int fd, const char* path)
8419 KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8420 if (pathBuffer.InitCheck() != B_OK)
8421 return B_NO_MEMORY;
8423 return common_unlink(fd, pathBuffer.LockBuffer(), true);
8427 /*! \brief Moves an entry specified by a FD + path pair to a an entry specified
8428 by another FD + path pair.
8430 \a oldPath and \a newPath must always be specified (they contain at least
8431 the name of the entry). If only a path is given, this path identifies the
8432 entry directly. If both a FD and a path are given and the path is absolute,
8433 the FD is ignored; a relative path is reckoned off of the directory (!)
8434 identified by the respective FD.
8436 \param oldFD The FD of the old location. May be < 0.
8437 \param oldPath The absolute or relative path of the old location. Must not
8438 be \c NULL.
8439 \param newFD The FD of the new location. May be < 0.
8440 \param newPath The absolute or relative path of the new location. Must not
8441 be \c NULL.
8442 \return \c B_OK, if the entry has been moved successfully, another
8443 error code otherwise.
8445 status_t
8446 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8448 KPath oldPathBuffer(oldPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8449 KPath newPathBuffer(newPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8450 if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8451 return B_NO_MEMORY;
8453 return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8454 newFD, newPathBuffer.LockBuffer(), true);
8458 status_t
8459 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8461 KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8462 if (pathBuffer.InitCheck() != B_OK)
8463 return B_NO_MEMORY;
8465 return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8466 true);
8470 /*! \brief Reads stat data of an entity specified by a FD + path pair.
8472 If only \a fd is given, the stat operation associated with the type
8473 of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8474 given, this path identifies the entry for whose node to retrieve the
8475 stat data. If both \a fd and \a path are given and the path is absolute,
8476 \a fd is ignored; a relative path is reckoned off of the directory (!)
8477 identified by \a fd and specifies the entry whose stat data shall be
8478 retrieved.
8480 \param fd The FD. May be < 0.
8481 \param path The absolute or relative path. Must not be \c NULL.
8482 \param traverseLeafLink If \a path is given, \c true specifies that the
8483 function shall not stick to symlinks, but traverse them.
8484 \param stat The buffer the stat data shall be written into.
8485 \param statSize The size of the supplied stat buffer.
8486 \return \c B_OK, if the the stat data have been read successfully, another
8487 error code otherwise.
8489 status_t
8490 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8491 struct stat* stat, size_t statSize)
8493 struct stat completeStat;
8494 struct stat* originalStat = NULL;
8495 status_t status;
8497 if (statSize > sizeof(struct stat))
8498 return B_BAD_VALUE;
8500 // this supports different stat extensions
8501 if (statSize < sizeof(struct stat)) {
8502 originalStat = stat;
8503 stat = &completeStat;
8506 status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8508 if (status == B_OK && originalStat != NULL)
8509 memcpy(originalStat, stat, statSize);
8511 return status;
8515 /*! \brief Writes stat data of an entity specified by a FD + path pair.
8517 If only \a fd is given, the stat operation associated with the type
8518 of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8519 given, this path identifies the entry for whose node to write the
8520 stat data. If both \a fd and \a path are given and the path is absolute,
8521 \a fd is ignored; a relative path is reckoned off of the directory (!)
8522 identified by \a fd and specifies the entry whose stat data shall be
8523 written.
8525 \param fd The FD. May be < 0.
8526 \param path The absolute or relative path. May be \c NULL.
8527 \param traverseLeafLink If \a path is given, \c true specifies that the
8528 function shall not stick to symlinks, but traverse them.
8529 \param stat The buffer containing the stat data to be written.
8530 \param statSize The size of the supplied stat buffer.
8531 \param statMask A mask specifying which parts of the stat data shall be
8532 written.
8533 \return \c B_OK, if the the stat data have been written successfully,
8534 another error code otherwise.
8536 status_t
8537 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8538 const struct stat* stat, size_t statSize, int statMask)
8540 struct stat completeStat;
8542 if (statSize > sizeof(struct stat))
8543 return B_BAD_VALUE;
8545 // this supports different stat extensions
8546 if (statSize < sizeof(struct stat)) {
8547 memset((uint8*)&completeStat + statSize, 0,
8548 sizeof(struct stat) - statSize);
8549 memcpy(&completeStat, stat, statSize);
8550 stat = &completeStat;
8553 status_t status;
8555 if (path != NULL) {
8556 // path given: write the stat of the node referred to by (fd, path)
8557 KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8558 if (pathBuffer.InitCheck() != B_OK)
8559 return B_NO_MEMORY;
8561 status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8562 traverseLeafLink, stat, statMask, true);
8563 } else {
8564 // no path given: get the FD and use the FD operation
8565 struct file_descriptor* descriptor
8566 = get_fd(get_current_io_context(true), fd);
8567 if (descriptor == NULL)
8568 return B_FILE_ERROR;
8570 if (descriptor->ops->fd_write_stat)
8571 status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8572 else
8573 status = B_UNSUPPORTED;
8575 put_fd(descriptor);
8578 return status;
8583 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8585 KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8586 if (pathBuffer.InitCheck() != B_OK)
8587 return B_NO_MEMORY;
8589 return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8594 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8595 int openMode)
8597 KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8598 if (pathBuffer.InitCheck() != B_OK)
8599 return B_NO_MEMORY;
8601 if ((openMode & O_CREAT) != 0) {
8602 return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8603 true);
8606 return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8610 status_t
8611 _kern_remove_attr(int fd, const char* name)
8613 return attr_remove(fd, name, true);
8617 status_t
8618 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8619 const char* toName)
8621 return attr_rename(fromFile, fromName, toFile, toName, true);
8626 _kern_open_index_dir(dev_t device)
8628 return index_dir_open(device, true);
8632 status_t
8633 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8635 return index_create(device, name, type, flags, true);
8639 status_t
8640 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8642 return index_name_read_stat(device, name, stat, true);
8646 status_t
8647 _kern_remove_index(dev_t device, const char* name)
8649 return index_remove(device, name, true);
8653 status_t
8654 _kern_getcwd(char* buffer, size_t size)
8656 TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8658 // Call vfs to get current working directory
8659 return get_cwd(buffer, size, true);
8663 status_t
8664 _kern_setcwd(int fd, const char* path)
8666 KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8667 if (pathBuffer.InitCheck() != B_OK)
8668 return B_NO_MEMORY;
8670 return set_cwd(fd, pathBuffer.LockBuffer(), true);
8674 // #pragma mark - userland syscalls
8677 dev_t
8678 _user_mount(const char* userPath, const char* userDevice,
8679 const char* userFileSystem, uint32 flags, const char* userArgs,
8680 size_t argsLength)
8682 char fileSystem[B_FILE_NAME_LENGTH];
8683 KPath path, device;
8684 char* args = NULL;
8685 status_t status;
8687 if (!IS_USER_ADDRESS(userPath)
8688 || !IS_USER_ADDRESS(userFileSystem)
8689 || !IS_USER_ADDRESS(userDevice))
8690 return B_BAD_ADDRESS;
8692 if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8693 return B_NO_MEMORY;
8695 if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8696 return B_BAD_ADDRESS;
8698 if (userFileSystem != NULL
8699 && user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8700 return B_BAD_ADDRESS;
8702 if (userDevice != NULL
8703 && user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8704 < B_OK)
8705 return B_BAD_ADDRESS;
8707 if (userArgs != NULL && argsLength > 0) {
8708 // this is a safety restriction
8709 if (argsLength >= 65536)
8710 return B_NAME_TOO_LONG;
8712 args = (char*)malloc(argsLength + 1);
8713 if (args == NULL)
8714 return B_NO_MEMORY;
8716 if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8717 free(args);
8718 return B_BAD_ADDRESS;
8721 path.UnlockBuffer();
8722 device.UnlockBuffer();
8724 status = fs_mount(path.LockBuffer(),
8725 userDevice != NULL ? device.Path() : NULL,
8726 userFileSystem ? fileSystem : NULL, flags, args, false);
8728 free(args);
8729 return status;
8733 status_t
8734 _user_unmount(const char* userPath, uint32 flags)
8736 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8737 if (pathBuffer.InitCheck() != B_OK)
8738 return B_NO_MEMORY;
8740 char* path = pathBuffer.LockBuffer();
8742 if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8743 return B_BAD_ADDRESS;
8745 return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8749 status_t
8750 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8752 struct fs_info info;
8753 status_t status;
8755 if (userInfo == NULL)
8756 return B_BAD_VALUE;
8758 if (!IS_USER_ADDRESS(userInfo))
8759 return B_BAD_ADDRESS;
8761 status = fs_read_info(device, &info);
8762 if (status != B_OK)
8763 return status;
8765 if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8766 return B_BAD_ADDRESS;
8768 return B_OK;
8772 status_t
8773 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8775 struct fs_info info;
8777 if (userInfo == NULL)
8778 return B_BAD_VALUE;
8780 if (!IS_USER_ADDRESS(userInfo)
8781 || user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8782 return B_BAD_ADDRESS;
8784 return fs_write_info(device, &info, mask);
8788 dev_t
8789 _user_next_device(int32* _userCookie)
8791 int32 cookie;
8792 dev_t device;
8794 if (!IS_USER_ADDRESS(_userCookie)
8795 || user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8796 return B_BAD_ADDRESS;
8798 device = fs_next_device(&cookie);
8800 if (device >= B_OK) {
8801 // update user cookie
8802 if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8803 return B_BAD_ADDRESS;
8806 return device;
8810 status_t
8811 _user_sync(void)
8813 return _kern_sync();
8817 status_t
8818 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8819 size_t infoSize)
8821 struct fd_info info;
8822 uint32 cookie;
8824 // only root can do this (or should root's group be enough?)
8825 if (geteuid() != 0)
8826 return B_NOT_ALLOWED;
8828 if (infoSize != sizeof(fd_info))
8829 return B_BAD_VALUE;
8831 if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8832 || user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8833 return B_BAD_ADDRESS;
8835 status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8836 if (status != B_OK)
8837 return status;
8839 if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8840 || user_memcpy(userInfo, &info, infoSize) != B_OK)
8841 return B_BAD_ADDRESS;
8843 return status;
8847 status_t
8848 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8849 char* userPath, size_t pathLength)
8851 if (!IS_USER_ADDRESS(userPath))
8852 return B_BAD_ADDRESS;
8854 KPath path(B_PATH_NAME_LENGTH + 1);
8855 if (path.InitCheck() != B_OK)
8856 return B_NO_MEMORY;
8858 // copy the leaf name onto the stack
8859 char stackLeaf[B_FILE_NAME_LENGTH];
8860 if (leaf != NULL) {
8861 if (!IS_USER_ADDRESS(leaf))
8862 return B_BAD_ADDRESS;
8864 int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8865 if (length < 0)
8866 return length;
8867 if (length >= B_FILE_NAME_LENGTH)
8868 return B_NAME_TOO_LONG;
8870 leaf = stackLeaf;
8873 status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8874 false, path.LockBuffer(), path.BufferSize());
8875 if (status != B_OK)
8876 return status;
8878 path.UnlockBuffer();
8880 int length = user_strlcpy(userPath, path.Path(), pathLength);
8881 if (length < 0)
8882 return length;
8883 if (length >= (int)pathLength)
8884 return B_BUFFER_OVERFLOW;
8886 return B_OK;
8890 status_t
8891 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8893 if (userPath == NULL || buffer == NULL)
8894 return B_BAD_VALUE;
8895 if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8896 return B_BAD_ADDRESS;
8898 // copy path from userland
8899 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8900 if (pathBuffer.InitCheck() != B_OK)
8901 return B_NO_MEMORY;
8902 char* path = pathBuffer.LockBuffer();
8904 if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8905 return B_BAD_ADDRESS;
8907 status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8908 false);
8909 if (error != B_OK)
8910 return error;
8912 // copy back to userland
8913 int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8914 if (len < 0)
8915 return len;
8916 if (len >= B_PATH_NAME_LENGTH)
8917 return B_BUFFER_OVERFLOW;
8919 return B_OK;
8924 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8925 int openMode, int perms)
8927 char name[B_FILE_NAME_LENGTH];
8929 if (userName == NULL || device < 0 || inode < 0)
8930 return B_BAD_VALUE;
8931 if (!IS_USER_ADDRESS(userName)
8932 || user_strlcpy(name, userName, sizeof(name)) < B_OK)
8933 return B_BAD_ADDRESS;
8935 if ((openMode & O_CREAT) != 0) {
8936 return file_create_entry_ref(device, inode, name, openMode, perms,
8937 false);
8940 return file_open_entry_ref(device, inode, name, openMode, false);
8945 _user_open(int fd, const char* userPath, int openMode, int perms)
8947 KPath path(B_PATH_NAME_LENGTH + 1);
8948 if (path.InitCheck() != B_OK)
8949 return B_NO_MEMORY;
8951 char* buffer = path.LockBuffer();
8953 if (!IS_USER_ADDRESS(userPath)
8954 || user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8955 return B_BAD_ADDRESS;
8957 if ((openMode & O_CREAT) != 0)
8958 return file_create(fd, buffer, openMode, perms, false);
8960 return file_open(fd, buffer, openMode, false);
8965 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8967 if (userName != NULL) {
8968 char name[B_FILE_NAME_LENGTH];
8970 if (!IS_USER_ADDRESS(userName)
8971 || user_strlcpy(name, userName, sizeof(name)) < B_OK)
8972 return B_BAD_ADDRESS;
8974 return dir_open_entry_ref(device, inode, name, false);
8976 return dir_open_entry_ref(device, inode, NULL, false);
8981 _user_open_dir(int fd, const char* userPath)
8983 if (userPath == NULL)
8984 return dir_open(fd, NULL, false);
8986 KPath path(B_PATH_NAME_LENGTH + 1);
8987 if (path.InitCheck() != B_OK)
8988 return B_NO_MEMORY;
8990 char* buffer = path.LockBuffer();
8992 if (!IS_USER_ADDRESS(userPath)
8993 || user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8994 return B_BAD_ADDRESS;
8996 return dir_open(fd, buffer, false);
9000 /*! \brief Opens a directory's parent directory and returns the entry name
9001 of the former.
9003 Aside from that it returns the directory's entry name, this method is
9004 equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9005 equivalent, if \a userName is \c NULL.
9007 If a name buffer is supplied and the name does not fit the buffer, the
9008 function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9010 \param fd A FD referring to a directory.
9011 \param userName Buffer the directory's entry name shall be written into.
9012 May be \c NULL.
9013 \param nameLength Size of the name buffer.
9014 \return The file descriptor of the opened parent directory, if everything
9015 went fine, an error code otherwise.
9018 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9020 bool kernel = false;
9022 if (userName && !IS_USER_ADDRESS(userName))
9023 return B_BAD_ADDRESS;
9025 // open the parent dir
9026 int parentFD = dir_open(fd, (char*)"..", kernel);
9027 if (parentFD < 0)
9028 return parentFD;
9029 FDCloser fdCloser(parentFD, kernel);
9031 if (userName) {
9032 // get the vnodes
9033 struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9034 struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9035 VNodePutter parentVNodePutter(parentVNode);
9036 VNodePutter dirVNodePutter(dirVNode);
9037 if (!parentVNode || !dirVNode)
9038 return B_FILE_ERROR;
9040 // get the vnode name
9041 char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9042 struct dirent* buffer = (struct dirent*)_buffer;
9043 status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9044 sizeof(_buffer), get_current_io_context(false));
9045 if (status != B_OK)
9046 return status;
9048 // copy the name to the userland buffer
9049 int len = user_strlcpy(userName, buffer->d_name, nameLength);
9050 if (len < 0)
9051 return len;
9052 if (len >= (int)nameLength)
9053 return B_BUFFER_OVERFLOW;
9056 return fdCloser.Detach();
9060 status_t
9061 _user_fcntl(int fd, int op, size_t argument)
9063 status_t status = common_fcntl(fd, op, argument, false);
9064 if (op == F_SETLKW)
9065 syscall_restart_handle_post(status);
9067 return status;
9071 status_t
9072 _user_fsync(int fd)
9074 return common_sync(fd, false);
9078 status_t
9079 _user_flock(int fd, int operation)
9081 FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9083 // Check if the operation is valid
9084 switch (operation & ~LOCK_NB) {
9085 case LOCK_UN:
9086 case LOCK_SH:
9087 case LOCK_EX:
9088 break;
9090 default:
9091 return B_BAD_VALUE;
9094 struct file_descriptor* descriptor;
9095 struct vnode* vnode;
9096 descriptor = get_fd_and_vnode(fd, &vnode, false);
9097 if (descriptor == NULL)
9098 return B_FILE_ERROR;
9100 if (descriptor->type != FDTYPE_FILE) {
9101 put_fd(descriptor);
9102 return B_BAD_VALUE;
9105 struct flock flock;
9106 flock.l_start = 0;
9107 flock.l_len = OFF_MAX;
9108 flock.l_whence = 0;
9109 flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9111 status_t status;
9112 if ((operation & LOCK_UN) != 0)
9113 status = release_advisory_lock(vnode, &flock);
9114 else {
9115 status = acquire_advisory_lock(vnode,
9116 thread_get_current_thread()->team->session_id, &flock,
9117 (operation & LOCK_NB) == 0);
9120 syscall_restart_handle_post(status);
9122 put_fd(descriptor);
9123 return status;
9127 status_t
9128 _user_lock_node(int fd)
9130 return common_lock_node(fd, false);
9134 status_t
9135 _user_unlock_node(int fd)
9137 return common_unlock_node(fd, false);
9141 status_t
9142 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9143 int perms)
9145 char name[B_FILE_NAME_LENGTH];
9146 status_t status;
9148 if (!IS_USER_ADDRESS(userName))
9149 return B_BAD_ADDRESS;
9151 status = user_strlcpy(name, userName, sizeof(name));
9152 if (status < 0)
9153 return status;
9155 return dir_create_entry_ref(device, inode, name, perms, false);
9159 status_t
9160 _user_create_dir(int fd, const char* userPath, int perms)
9162 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9163 if (pathBuffer.InitCheck() != B_OK)
9164 return B_NO_MEMORY;
9166 char* path = pathBuffer.LockBuffer();
9168 if (!IS_USER_ADDRESS(userPath)
9169 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9170 return B_BAD_ADDRESS;
9172 return dir_create(fd, path, perms, false);
9176 status_t
9177 _user_remove_dir(int fd, const char* userPath)
9179 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9180 if (pathBuffer.InitCheck() != B_OK)
9181 return B_NO_MEMORY;
9183 char* path = pathBuffer.LockBuffer();
9185 if (userPath != NULL) {
9186 if (!IS_USER_ADDRESS(userPath)
9187 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9188 return B_BAD_ADDRESS;
9191 return dir_remove(fd, userPath ? path : NULL, false);
9195 status_t
9196 _user_read_link(int fd, const char* userPath, char* userBuffer,
9197 size_t* userBufferSize)
9199 KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9200 if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9201 return B_NO_MEMORY;
9203 size_t bufferSize;
9205 if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9206 || user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9207 return B_BAD_ADDRESS;
9209 char* path = pathBuffer.LockBuffer();
9210 char* buffer = linkBuffer.LockBuffer();
9212 if (userPath) {
9213 if (!IS_USER_ADDRESS(userPath)
9214 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9215 return B_BAD_ADDRESS;
9217 if (bufferSize > B_PATH_NAME_LENGTH)
9218 bufferSize = B_PATH_NAME_LENGTH;
9221 status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9222 &bufferSize, false);
9224 // we also update the bufferSize in case of errors
9225 // (the real length will be returned in case of B_BUFFER_OVERFLOW)
9226 if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9227 return B_BAD_ADDRESS;
9229 if (status != B_OK)
9230 return status;
9232 if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9233 return B_BAD_ADDRESS;
9235 return B_OK;
9239 status_t
9240 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9241 int mode)
9243 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9244 KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9245 if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9246 return B_NO_MEMORY;
9248 char* path = pathBuffer.LockBuffer();
9249 char* toPath = toPathBuffer.LockBuffer();
9251 if (!IS_USER_ADDRESS(userPath)
9252 || !IS_USER_ADDRESS(userToPath)
9253 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9254 || user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9255 return B_BAD_ADDRESS;
9257 return common_create_symlink(fd, path, toPath, mode, false);
9261 status_t
9262 _user_create_link(int pathFD, const char* userPath, int toFD,
9263 const char* userToPath, bool traverseLeafLink)
9265 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9266 KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9267 if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9268 return B_NO_MEMORY;
9270 char* path = pathBuffer.LockBuffer();
9271 char* toPath = toPathBuffer.LockBuffer();
9273 if (!IS_USER_ADDRESS(userPath)
9274 || !IS_USER_ADDRESS(userToPath)
9275 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9276 || user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9277 return B_BAD_ADDRESS;
9279 status_t status = check_path(toPath);
9280 if (status != B_OK)
9281 return status;
9283 return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9284 false);
9288 status_t
9289 _user_unlink(int fd, const char* userPath)
9291 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9292 if (pathBuffer.InitCheck() != B_OK)
9293 return B_NO_MEMORY;
9295 char* path = pathBuffer.LockBuffer();
9297 if (!IS_USER_ADDRESS(userPath)
9298 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9299 return B_BAD_ADDRESS;
9301 return common_unlink(fd, path, false);
9305 status_t
9306 _user_rename(int oldFD, const char* userOldPath, int newFD,
9307 const char* userNewPath)
9309 KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9310 KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9311 if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9312 return B_NO_MEMORY;
9314 char* oldPath = oldPathBuffer.LockBuffer();
9315 char* newPath = newPathBuffer.LockBuffer();
9317 if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9318 || user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9319 || user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9320 return B_BAD_ADDRESS;
9322 return common_rename(oldFD, oldPath, newFD, newPath, false);
9326 status_t
9327 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9329 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9330 if (pathBuffer.InitCheck() != B_OK)
9331 return B_NO_MEMORY;
9333 char* path = pathBuffer.LockBuffer();
9335 if (!IS_USER_ADDRESS(userPath)
9336 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9337 return B_BAD_ADDRESS;
9340 // split into directory vnode and filename path
9341 char filename[B_FILE_NAME_LENGTH];
9342 struct vnode* dir;
9343 status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9344 if (status != B_OK)
9345 return status;
9347 VNodePutter _(dir);
9349 // the underlying FS needs to support creating FIFOs
9350 if (!HAS_FS_CALL(dir, create_special_node))
9351 return B_UNSUPPORTED;
9353 // create the entry -- the FIFO sub node is set up automatically
9354 fs_vnode superVnode;
9355 ino_t nodeID;
9356 status = FS_CALL(dir, create_special_node, filename, NULL,
9357 S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9359 // create_special_node() acquired a reference for us that we don't need.
9360 if (status == B_OK)
9361 put_vnode(dir->mount->volume, nodeID);
9363 return status;
9367 status_t
9368 _user_create_pipe(int* userFDs)
9370 // rootfs should support creating FIFOs, but let's be sure
9371 if (!HAS_FS_CALL(sRoot, create_special_node))
9372 return B_UNSUPPORTED;
9374 // create the node -- the FIFO sub node is set up automatically
9375 fs_vnode superVnode;
9376 ino_t nodeID;
9377 status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9378 S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9379 if (status != B_OK)
9380 return status;
9382 // We've got one reference to the node and need another one.
9383 struct vnode* vnode;
9384 status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9385 if (status != B_OK) {
9386 // that should not happen
9387 dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9388 "%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9389 return status;
9392 // Everything looks good so far. Open two FDs for reading respectively
9393 // writing.
9394 int fds[2];
9395 fds[0] = open_vnode(vnode, O_RDONLY, false);
9396 fds[1] = open_vnode(vnode, O_WRONLY, false);
9398 FDCloser closer0(fds[0], false);
9399 FDCloser closer1(fds[1], false);
9401 status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9403 // copy FDs to userland
9404 if (status == B_OK) {
9405 if (!IS_USER_ADDRESS(userFDs)
9406 || user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9407 status = B_BAD_ADDRESS;
9411 // keep FDs, if everything went fine
9412 if (status == B_OK) {
9413 closer0.Detach();
9414 closer1.Detach();
9417 return status;
9421 status_t
9422 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9424 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9425 if (pathBuffer.InitCheck() != B_OK)
9426 return B_NO_MEMORY;
9428 char* path = pathBuffer.LockBuffer();
9430 if (!IS_USER_ADDRESS(userPath)
9431 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9432 return B_BAD_ADDRESS;
9434 return common_access(fd, path, mode, effectiveUserGroup, false);
9438 status_t
9439 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9440 struct stat* userStat, size_t statSize)
9442 struct stat stat;
9443 status_t status;
9445 if (statSize > sizeof(struct stat))
9446 return B_BAD_VALUE;
9448 if (!IS_USER_ADDRESS(userStat))
9449 return B_BAD_ADDRESS;
9451 if (userPath != NULL) {
9452 // path given: get the stat of the node referred to by (fd, path)
9453 if (!IS_USER_ADDRESS(userPath))
9454 return B_BAD_ADDRESS;
9456 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9457 if (pathBuffer.InitCheck() != B_OK)
9458 return B_NO_MEMORY;
9460 char* path = pathBuffer.LockBuffer();
9462 ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9463 if (length < B_OK)
9464 return length;
9465 if (length >= B_PATH_NAME_LENGTH)
9466 return B_NAME_TOO_LONG;
9468 status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9469 } else {
9470 // no path given: get the FD and use the FD operation
9471 struct file_descriptor* descriptor
9472 = get_fd(get_current_io_context(false), fd);
9473 if (descriptor == NULL)
9474 return B_FILE_ERROR;
9476 if (descriptor->ops->fd_read_stat)
9477 status = descriptor->ops->fd_read_stat(descriptor, &stat);
9478 else
9479 status = B_UNSUPPORTED;
9481 put_fd(descriptor);
9484 if (status != B_OK)
9485 return status;
9487 return user_memcpy(userStat, &stat, statSize);
9491 status_t
9492 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9493 const struct stat* userStat, size_t statSize, int statMask)
9495 if (statSize > sizeof(struct stat))
9496 return B_BAD_VALUE;
9498 struct stat stat;
9500 if (!IS_USER_ADDRESS(userStat)
9501 || user_memcpy(&stat, userStat, statSize) < B_OK)
9502 return B_BAD_ADDRESS;
9504 // clear additional stat fields
9505 if (statSize < sizeof(struct stat))
9506 memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9508 status_t status;
9510 if (userPath != NULL) {
9511 // path given: write the stat of the node referred to by (fd, path)
9512 if (!IS_USER_ADDRESS(userPath))
9513 return B_BAD_ADDRESS;
9515 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9516 if (pathBuffer.InitCheck() != B_OK)
9517 return B_NO_MEMORY;
9519 char* path = pathBuffer.LockBuffer();
9521 ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9522 if (length < B_OK)
9523 return length;
9524 if (length >= B_PATH_NAME_LENGTH)
9525 return B_NAME_TOO_LONG;
9527 status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9528 statMask, false);
9529 } else {
9530 // no path given: get the FD and use the FD operation
9531 struct file_descriptor* descriptor
9532 = get_fd(get_current_io_context(false), fd);
9533 if (descriptor == NULL)
9534 return B_FILE_ERROR;
9536 if (descriptor->ops->fd_write_stat) {
9537 status = descriptor->ops->fd_write_stat(descriptor, &stat,
9538 statMask);
9539 } else
9540 status = B_UNSUPPORTED;
9542 put_fd(descriptor);
9545 return status;
9550 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9552 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9553 if (pathBuffer.InitCheck() != B_OK)
9554 return B_NO_MEMORY;
9556 char* path = pathBuffer.LockBuffer();
9558 if (userPath != NULL) {
9559 if (!IS_USER_ADDRESS(userPath)
9560 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9561 return B_BAD_ADDRESS;
9564 return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9568 ssize_t
9569 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9570 size_t readBytes)
9572 int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9573 if (attr < 0)
9574 return attr;
9576 ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9577 _user_close(attr);
9579 return bytes;
9583 ssize_t
9584 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9585 const void* buffer, size_t writeBytes)
9587 // Try to support the BeOS typical truncation as well as the position
9588 // argument
9589 int attr = attr_create(fd, NULL, attribute, type,
9590 O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9591 if (attr < 0)
9592 return attr;
9594 ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9595 _user_close(attr);
9597 return bytes;
9601 status_t
9602 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9604 int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9605 if (attr < 0)
9606 return attr;
9608 struct file_descriptor* descriptor
9609 = get_fd(get_current_io_context(false), attr);
9610 if (descriptor == NULL) {
9611 _user_close(attr);
9612 return B_FILE_ERROR;
9615 struct stat stat;
9616 status_t status;
9617 if (descriptor->ops->fd_read_stat)
9618 status = descriptor->ops->fd_read_stat(descriptor, &stat);
9619 else
9620 status = B_UNSUPPORTED;
9622 put_fd(descriptor);
9623 _user_close(attr);
9625 if (status == B_OK) {
9626 attr_info info;
9627 info.type = stat.st_type;
9628 info.size = stat.st_size;
9630 if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9631 return B_BAD_ADDRESS;
9634 return status;
9639 _user_open_attr(int fd, const char* userPath, const char* userName,
9640 uint32 type, int openMode)
9642 char name[B_FILE_NAME_LENGTH];
9644 if (!IS_USER_ADDRESS(userName)
9645 || user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9646 return B_BAD_ADDRESS;
9648 KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9649 if (pathBuffer.InitCheck() != B_OK)
9650 return B_NO_MEMORY;
9652 char* path = pathBuffer.LockBuffer();
9654 if (userPath != NULL) {
9655 if (!IS_USER_ADDRESS(userPath)
9656 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9657 return B_BAD_ADDRESS;
9660 if ((openMode & O_CREAT) != 0) {
9661 return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9662 false);
9665 return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9669 status_t
9670 _user_remove_attr(int fd, const char* userName)
9672 char name[B_FILE_NAME_LENGTH];
9674 if (!IS_USER_ADDRESS(userName)
9675 || user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9676 return B_BAD_ADDRESS;
9678 return attr_remove(fd, name, false);
9682 status_t
9683 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9684 const char* userToName)
9686 if (!IS_USER_ADDRESS(userFromName)
9687 || !IS_USER_ADDRESS(userToName))
9688 return B_BAD_ADDRESS;
9690 KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9691 KPath toNameBuffer(B_FILE_NAME_LENGTH);
9692 if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9693 return B_NO_MEMORY;
9695 char* fromName = fromNameBuffer.LockBuffer();
9696 char* toName = toNameBuffer.LockBuffer();
9698 if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9699 || user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9700 return B_BAD_ADDRESS;
9702 return attr_rename(fromFile, fromName, toFile, toName, false);
9707 _user_open_index_dir(dev_t device)
9709 return index_dir_open(device, false);
9713 status_t
9714 _user_create_index(dev_t device, const char* userName, uint32 type,
9715 uint32 flags)
9717 char name[B_FILE_NAME_LENGTH];
9719 if (!IS_USER_ADDRESS(userName)
9720 || user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9721 return B_BAD_ADDRESS;
9723 return index_create(device, name, type, flags, false);
9727 status_t
9728 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9730 char name[B_FILE_NAME_LENGTH];
9731 struct stat stat;
9732 status_t status;
9734 if (!IS_USER_ADDRESS(userName)
9735 || !IS_USER_ADDRESS(userStat)
9736 || user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9737 return B_BAD_ADDRESS;
9739 status = index_name_read_stat(device, name, &stat, false);
9740 if (status == B_OK) {
9741 if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9742 return B_BAD_ADDRESS;
9745 return status;
9749 status_t
9750 _user_remove_index(dev_t device, const char* userName)
9752 char name[B_FILE_NAME_LENGTH];
9754 if (!IS_USER_ADDRESS(userName)
9755 || user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9756 return B_BAD_ADDRESS;
9758 return index_remove(device, name, false);
9762 status_t
9763 _user_getcwd(char* userBuffer, size_t size)
9765 if (size == 0)
9766 return B_BAD_VALUE;
9767 if (!IS_USER_ADDRESS(userBuffer))
9768 return B_BAD_ADDRESS;
9770 if (size > kMaxPathLength)
9771 size = kMaxPathLength;
9773 KPath pathBuffer(size);
9774 if (pathBuffer.InitCheck() != B_OK)
9775 return B_NO_MEMORY;
9777 TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9779 char* path = pathBuffer.LockBuffer();
9781 status_t status = get_cwd(path, size, false);
9782 if (status != B_OK)
9783 return status;
9785 // Copy back the result
9786 if (user_strlcpy(userBuffer, path, size) < B_OK)
9787 return B_BAD_ADDRESS;
9789 return status;
9793 status_t
9794 _user_setcwd(int fd, const char* userPath)
9796 TRACE(("user_setcwd: path = %p\n", userPath));
9798 KPath pathBuffer(B_PATH_NAME_LENGTH);
9799 if (pathBuffer.InitCheck() != B_OK)
9800 return B_NO_MEMORY;
9802 char* path = pathBuffer.LockBuffer();
9804 if (userPath != NULL) {
9805 if (!IS_USER_ADDRESS(userPath)
9806 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9807 return B_BAD_ADDRESS;
9810 return set_cwd(fd, userPath != NULL ? path : NULL, false);
9814 status_t
9815 _user_change_root(const char* userPath)
9817 // only root is allowed to chroot()
9818 if (geteuid() != 0)
9819 return B_NOT_ALLOWED;
9821 // alloc path buffer
9822 KPath pathBuffer(B_PATH_NAME_LENGTH);
9823 if (pathBuffer.InitCheck() != B_OK)
9824 return B_NO_MEMORY;
9826 // copy userland path to kernel
9827 char* path = pathBuffer.LockBuffer();
9828 if (userPath != NULL) {
9829 if (!IS_USER_ADDRESS(userPath)
9830 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9831 return B_BAD_ADDRESS;
9834 // get the vnode
9835 struct vnode* vnode;
9836 status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9837 if (status != B_OK)
9838 return status;
9840 // set the new root
9841 struct io_context* context = get_current_io_context(false);
9842 mutex_lock(&sIOContextRootLock);
9843 struct vnode* oldRoot = context->root;
9844 context->root = vnode;
9845 mutex_unlock(&sIOContextRootLock);
9847 put_vnode(oldRoot);
9849 return B_OK;
9854 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9855 uint32 flags, port_id port, int32 token)
9857 char* query;
9859 if (device < 0 || userQuery == NULL || queryLength == 0)
9860 return B_BAD_VALUE;
9862 // this is a safety restriction
9863 if (queryLength >= 65536)
9864 return B_NAME_TOO_LONG;
9866 query = (char*)malloc(queryLength + 1);
9867 if (query == NULL)
9868 return B_NO_MEMORY;
9869 if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9870 free(query);
9871 return B_BAD_ADDRESS;
9874 int fd = query_open(device, query, flags, port, token, false);
9876 free(query);
9877 return fd;
9881 #include "vfs_request_io.cpp"