btrfs: [] on the end of a struct field is a variable length array.
[haiku.git] / src / add-ons / kernel / file_systems / bfs / Inode.cpp
blobcd5287b476e17de58a61fdd6d0b34dfb23bce7c5
1 /*
2 * Copyright 2001-2017, Axel Dörfler, axeld@pinc-software.de.
3 * This file may be used under the terms of the MIT License.
4 */
7 //! Inode access functions
10 #include "Debug.h"
11 #include "Inode.h"
12 #include "BPlusTree.h"
13 #include "Index.h"
16 #if BFS_TRACING && !defined(FS_SHELL) && !defined(_BOOT_MODE)
17 namespace BFSInodeTracing {
19 class Create : public AbstractTraceEntry {
20 public:
21 Create(Inode* inode, Inode* parent, const char* name, int32 mode,
22 int openMode, uint32 type)
24 fInode(inode),
25 fID(inode->ID()),
26 fParent(parent),
27 fParentID(parent != NULL ? parent->ID() : 0),
28 fMode(mode),
29 fOpenMode(openMode),
30 fType(type)
32 if (name != NULL)
33 strlcpy(fName, name, sizeof(fName));
34 else
35 fName[0] = '\0';
37 Initialized();
40 virtual void AddDump(TraceOutput& out)
42 out.Print("bfs:Create %Ld (%p), parent %Ld (%p), \"%s\", "
43 "mode %lx, omode %x, type %lx", fID, fInode, fParentID,
44 fParent, fName, fMode, fOpenMode, fType);
47 private:
48 Inode* fInode;
49 ino_t fID;
50 Inode* fParent;
51 ino_t fParentID;
52 char fName[32];
53 int32 fMode;
54 int fOpenMode;
55 uint32 fType;
58 class Remove : public AbstractTraceEntry {
59 public:
60 Remove(Inode* inode, const char* name)
62 fInode(inode),
63 fID(inode->ID())
65 strlcpy(fName, name, sizeof(fName));
66 Initialized();
69 virtual void AddDump(TraceOutput& out)
71 out.Print("bfs:Remove %Ld (%p), \"%s\"", fID, fInode, fName);
74 private:
75 Inode* fInode;
76 ino_t fID;
77 char fName[32];
80 class Action : public AbstractTraceEntry {
81 public:
82 Action(const char* action, Inode* inode)
84 fInode(inode),
85 fID(inode->ID())
87 strlcpy(fAction, action, sizeof(fAction));
88 Initialized();
91 virtual void AddDump(TraceOutput& out)
93 out.Print("bfs:%s %Ld (%p)\n", fAction, fID, fInode);
96 private:
97 Inode* fInode;
98 ino_t fID;
99 char fAction[16];
102 class Resize : public AbstractTraceEntry {
103 public:
104 Resize(Inode* inode, off_t oldSize, off_t newSize, bool trim)
106 fInode(inode),
107 fID(inode->ID()),
108 fOldSize(oldSize),
109 fNewSize(newSize),
110 fTrim(trim)
112 Initialized();
115 virtual void AddDump(TraceOutput& out)
117 out.Print("bfs:%s %Ld (%p), %Ld -> %Ld", fTrim ? "Trim" : "Resize",
118 fID, fInode, fOldSize, fNewSize);
121 private:
122 Inode* fInode;
123 ino_t fID;
124 off_t fOldSize;
125 off_t fNewSize;
126 bool fTrim;
129 } // namespace BFSInodeTracing
131 # define T(x) new(std::nothrow) BFSInodeTracing::x;
132 #else
133 # define T(x) ;
134 #endif
137 /*! A helper class used by Inode::Create() to keep track of the belongings
138 of an inode creation in progress.
139 This class will make sure everything is cleaned up properly.
141 class InodeAllocator {
142 public:
143 InodeAllocator(Transaction& transaction);
144 ~InodeAllocator();
146 status_t New(block_run* parentRun, mode_t mode, uint32 flags,
147 block_run& run, fs_vnode_ops* vnodeOps,
148 Inode** _inode);
149 status_t CreateTree();
150 status_t Keep(fs_vnode_ops* vnodeOps, uint32 publishFlags);
152 private:
153 static void _TransactionListener(int32 id, int32 event,
154 void* _inode);
156 Transaction* fTransaction;
157 block_run fRun;
158 Inode* fInode;
162 InodeAllocator::InodeAllocator(Transaction& transaction)
164 fTransaction(&transaction),
165 fInode(NULL)
170 InodeAllocator::~InodeAllocator()
172 if (fTransaction != NULL) {
173 Volume* volume = fTransaction->GetVolume();
175 if (fInode != NULL) {
176 fInode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
177 // this unblocks any pending bfs_read_vnode() calls
178 fInode->Free(*fTransaction);
180 if (fInode->fTree != NULL)
181 fTransaction->RemoveListener(fInode->fTree);
182 fTransaction->RemoveListener(fInode);
184 remove_vnode(volume->FSVolume(), fInode->ID());
185 } else
186 volume->Free(*fTransaction, fRun);
189 delete fInode;
193 status_t
194 InodeAllocator::New(block_run* parentRun, mode_t mode, uint32 publishFlags,
195 block_run& run, fs_vnode_ops* vnodeOps, Inode** _inode)
197 Volume* volume = fTransaction->GetVolume();
199 status_t status = volume->AllocateForInode(*fTransaction, parentRun, mode,
200 fRun);
201 if (status < B_OK) {
202 // don't free the space in the destructor, because
203 // the allocation failed
204 fTransaction = NULL;
205 RETURN_ERROR(status);
208 run = fRun;
209 fInode = new(std::nothrow) Inode(volume, *fTransaction,
210 volume->ToVnode(run), mode, run);
211 if (fInode == NULL)
212 RETURN_ERROR(B_NO_MEMORY);
214 if (!volume->IsInitializing()
215 && (publishFlags & BFS_DO_NOT_PUBLISH_VNODE) == 0) {
216 status = new_vnode(volume->FSVolume(), fInode->ID(), fInode,
217 vnodeOps != NULL ? vnodeOps : &gBFSVnodeOps);
218 if (status < B_OK) {
219 delete fInode;
220 fInode = NULL;
221 RETURN_ERROR(status);
225 fInode->WriteLockInTransaction(*fTransaction);
226 *_inode = fInode;
227 return B_OK;
231 status_t
232 InodeAllocator::CreateTree()
234 Volume* volume = fTransaction->GetVolume();
236 // force S_STR_INDEX to be set, if no type is set
237 if ((fInode->Mode() & S_INDEX_TYPES) == 0)
238 fInode->Node().mode |= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX);
240 BPlusTree* tree = new(std::nothrow) BPlusTree(*fTransaction, fInode);
241 if (tree == NULL)
242 return B_ERROR;
244 status_t status = tree->InitCheck();
245 if (status != B_OK) {
246 delete tree;
247 return status;
250 fInode->fTree = tree;
252 if (fInode->IsRegularNode()) {
253 if (tree->Insert(*fTransaction, ".", fInode->ID()) < B_OK
254 || tree->Insert(*fTransaction, "..",
255 volume->ToVnode(fInode->Parent())) < B_OK)
256 return B_ERROR;
258 return B_OK;
262 status_t
263 InodeAllocator::Keep(fs_vnode_ops* vnodeOps, uint32 publishFlags)
265 ASSERT(fInode != NULL && fTransaction != NULL);
266 Volume* volume = fTransaction->GetVolume();
268 status_t status = fInode->WriteBack(*fTransaction);
269 if (status < B_OK) {
270 FATAL(("writing new inode %" B_PRIdINO " failed!\n", fInode->ID()));
271 return status;
274 // Symbolic links are not published -- the caller needs to do this once
275 // the contents have been written.
276 if (!fInode->IsSymLink() && !volume->IsInitializing()
277 && (publishFlags & BFS_DO_NOT_PUBLISH_VNODE) == 0) {
278 status = publish_vnode(volume->FSVolume(), fInode->ID(), fInode,
279 vnodeOps != NULL ? vnodeOps : &gBFSVnodeOps, fInode->Mode(),
280 publishFlags);
283 if (status == B_OK) {
284 cache_add_transaction_listener(volume->BlockCache(), fTransaction->ID(),
285 TRANSACTION_ABORTED, &_TransactionListener, fInode);
288 fTransaction = NULL;
289 fInode = NULL;
291 return status;
295 /*static*/ void
296 InodeAllocator::_TransactionListener(int32 id, int32 event, void* _inode)
298 Inode* inode = (Inode*)_inode;
300 if (event == TRANSACTION_ABORTED)
301 panic("transaction %d aborted, inode %p still around!\n", (int)id, inode);
305 // #pragma mark -
308 status_t
309 bfs_inode::InitCheck(Volume* volume) const
311 if (Magic1() != INODE_MAGIC1
312 || !(Flags() & INODE_IN_USE)
313 || inode_num.Length() != 1
314 // matches inode size?
315 || (uint32)InodeSize() != volume->InodeSize()
316 // parent resides on disk?
317 || parent.AllocationGroup() > int32(volume->AllocationGroups())
318 || parent.AllocationGroup() < 0
319 || parent.Start() > (1L << volume->AllocationGroupShift())
320 || parent.Length() != 1
321 // attributes, too?
322 || attributes.AllocationGroup() > int32(volume->AllocationGroups())
323 || attributes.AllocationGroup() < 0
324 || attributes.Start() > (1L << volume->AllocationGroupShift()))
325 RETURN_ERROR(B_BAD_DATA);
327 if (Flags() & INODE_DELETED)
328 return B_NOT_ALLOWED;
330 // TODO: Add some tests to check the integrity of the other stuff here,
331 // especially for the data_stream!
333 return B_OK;
337 // #pragma mark - Inode
340 Inode::Inode(Volume* volume, ino_t id)
342 fVolume(volume),
343 fID(id),
344 fTree(NULL),
345 fAttributes(NULL),
346 fCache(NULL),
347 fMap(NULL)
349 PRINT(("Inode::Inode(volume = %p, id = %Ld) @ %p\n", volume, id, this));
351 rw_lock_init(&fLock, "bfs inode");
352 recursive_lock_init(&fSmallDataLock, "bfs inode small data");
354 if (UpdateNodeFromDisk() != B_OK) {
355 // TODO: the error code gets eaten
356 return;
359 // these two will help to maintain the indices
360 fOldSize = Size();
361 fOldLastModified = LastModified();
363 if (IsContainer())
364 fTree = new(std::nothrow) BPlusTree(this);
365 if (NeedsFileCache()) {
366 SetFileCache(file_cache_create(fVolume->ID(), ID(), Size()));
367 SetMap(file_map_create(volume->ID(), ID(), Size()));
372 Inode::Inode(Volume* volume, Transaction& transaction, ino_t id, mode_t mode,
373 block_run& run)
375 fVolume(volume),
376 fID(id),
377 fTree(NULL),
378 fAttributes(NULL),
379 fCache(NULL),
380 fMap(NULL)
382 PRINT(("Inode::Inode(volume = %p, transaction = %p, id = %Ld) @ %p\n",
383 volume, &transaction, id, this));
385 rw_lock_init(&fLock, "bfs inode");
386 recursive_lock_init(&fSmallDataLock, "bfs inode small data");
388 NodeGetter node(volume, transaction, this, true);
389 if (node.Node() == NULL) {
390 FATAL(("Could not read inode block %" B_PRId64 "!\n", BlockNumber()));
391 return;
394 memset(&fNode, 0, sizeof(bfs_inode));
396 // Initialize the bfs_inode structure -- it's not written back to disk
397 // here, because it will be done so already when the inode could be
398 // created completely.
400 Node().magic1 = HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1);
401 Node().inode_num = run;
402 Node().mode = HOST_ENDIAN_TO_BFS_INT32(mode);
403 Node().flags = HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
405 Node().create_time = Node().last_modified_time = Node().status_change_time
406 = HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
408 Node().inode_size = HOST_ENDIAN_TO_BFS_INT32(volume->InodeSize());
410 // these two will help to maintain the indices
411 fOldSize = Size();
412 fOldLastModified = LastModified();
416 Inode::~Inode()
418 PRINT(("Inode::~Inode() @ %p\n", this));
420 file_cache_delete(FileCache());
421 file_map_delete(Map());
422 delete fTree;
424 rw_lock_destroy(&fLock);
425 recursive_lock_destroy(&fSmallDataLock);
429 status_t
430 Inode::InitCheck(bool checkNode) const
432 // test inode magic and flags
433 if (checkNode) {
434 status_t status = Node().InitCheck(fVolume);
435 if (status == B_BUSY)
436 return B_BUSY;
438 if (status != B_OK) {
439 FATAL(("inode at block %" B_PRIdOFF " corrupt!\n", BlockNumber()));
440 RETURN_ERROR(B_BAD_DATA);
444 if (IsContainer()) {
445 // inodes that have a B+tree
446 if (fTree == NULL)
447 RETURN_ERROR(B_NO_MEMORY);
449 status_t status = fTree->InitCheck();
450 if (status != B_OK) {
451 FATAL(("inode tree at block %" B_PRIdOFF " corrupt!\n",
452 BlockNumber()));
453 RETURN_ERROR(B_BAD_DATA);
457 if (NeedsFileCache() && (fCache == NULL || fMap == NULL))
458 return B_NO_MEMORY;
460 return B_OK;
464 /*! Adds this inode to the specified transaction. This means that the inode will
465 be write locked until the transaction ended.
466 To ensure that the inode will stay valid until that point, an extra reference
467 is acquired to it as long as this transaction stays active.
469 void
470 Inode::WriteLockInTransaction(Transaction& transaction)
472 // These flags can only change while holding the transaction lock
473 if ((Flags() & INODE_IN_TRANSACTION) != 0)
474 return;
476 // We share the same list link with the removed list, so we have to remove
477 // the inode from that list here (and add it back when we no longer need it)
478 if ((Flags() & INODE_DELETED) != 0)
479 fVolume->RemovedInodes().Remove(this);
481 if (!fVolume->IsInitializing())
482 acquire_vnode(fVolume->FSVolume(), ID());
484 rw_lock_write_lock(&Lock());
485 Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_IN_TRANSACTION);
487 transaction.AddListener(this);
491 status_t
492 Inode::WriteBack(Transaction& transaction)
494 NodeGetter node(fVolume, transaction, this);
495 if (node.WritableNode() == NULL)
496 return B_IO_ERROR;
498 memcpy(node.WritableNode(), &Node(), sizeof(bfs_inode));
499 return B_OK;
503 status_t
504 Inode::UpdateNodeFromDisk()
506 NodeGetter node(fVolume, this);
507 if (node.Node() == NULL) {
508 FATAL(("Failed to read block %" B_PRId64 " from disk!\n",
509 BlockNumber()));
510 return B_IO_ERROR;
513 memcpy(&fNode, node.Node(), sizeof(bfs_inode));
514 fNode.flags &= HOST_ENDIAN_TO_BFS_INT32(INODE_PERMANENT_FLAGS);
515 return B_OK;
519 status_t
520 Inode::CheckPermissions(int accessMode) const
522 // you never have write access to a read-only volume
523 if ((accessMode & W_OK) != 0 && fVolume->IsReadOnly())
524 return B_READ_ONLY_DEVICE;
526 return check_access_permissions(accessMode, Mode(), (gid_t)fNode.GroupID(),
527 (uid_t)fNode.UserID());
531 // #pragma mark - attributes
534 void
535 Inode::_AddIterator(AttributeIterator* iterator)
537 RecursiveLocker _(fSmallDataLock);
538 fIterators.Add(iterator);
542 void
543 Inode::_RemoveIterator(AttributeIterator* iterator)
545 RecursiveLocker _(fSmallDataLock);
546 fIterators.Remove(iterator);
550 /*! Tries to free up "bytes" space in the small_data section by moving
551 attributes to real files. Used for system attributes like the name.
552 You need to hold the fSmallDataLock when you call this method
554 status_t
555 Inode::_MakeSpaceForSmallData(Transaction& transaction, bfs_inode* node,
556 const char* name, int32 bytes)
558 ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
560 while (bytes > 0) {
561 small_data* item = node->SmallDataStart();
562 small_data* max = NULL;
563 int32 index = 0, maxIndex = 0;
564 for (; !item->IsLast(node); item = item->Next(), index++) {
565 // should not remove those
566 if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name()))
567 continue;
569 if (max == NULL || max->Size() < item->Size()) {
570 maxIndex = index;
571 max = item;
574 // Remove the first one large enough to free the needed amount of
575 // bytes
576 if (bytes < (int32)item->Size())
577 break;
580 if (item->IsLast(node) || (int32)item->Size() < bytes)
581 return B_ERROR;
583 bytes -= max->Size();
585 // Move the attribute to a real attribute file
586 // Luckily, this doesn't cause any index updates
588 Inode* attribute;
589 status_t status = CreateAttribute(transaction, item->Name(),
590 item->Type(), &attribute);
591 if (status != B_OK)
592 RETURN_ERROR(status);
594 size_t length = item->DataSize();
595 status = attribute->WriteAt(transaction, 0, item->Data(), &length);
597 ReleaseAttribute(attribute);
599 if (status != B_OK) {
600 Vnode vnode(fVolume, Attributes());
601 Inode* attributes;
602 if (vnode.Get(&attributes) < B_OK
603 || attributes->Remove(transaction, name) < B_OK) {
604 FATAL(("Could not remove newly created attribute!\n"));
607 RETURN_ERROR(status);
610 _RemoveSmallData(node, max, maxIndex);
612 return B_OK;
616 /*! Private function which removes the given attribute from the small_data
617 section.
618 You need to hold the fSmallDataLock when you call this method
620 status_t
621 Inode::_RemoveSmallData(bfs_inode* node, small_data* item, int32 index)
623 ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
625 small_data* next = item->Next();
626 if (!next->IsLast(node)) {
627 // find the last attribute
628 small_data* last = next;
629 while (!last->IsLast(node))
630 last = last->Next();
632 int32 size = (uint8*)last - (uint8*)next;
633 if (size < 0
634 || size > (uint8*)node + fVolume->BlockSize() - (uint8*)next)
635 return B_BAD_DATA;
637 memmove(item, next, size);
639 // Move the "last" one to its new location and
640 // correctly terminate the small_data section
641 last = (small_data*)((uint8*)last - ((uint8*)next - (uint8*)item));
642 memset(last, 0, (uint8*)node + fVolume->BlockSize() - (uint8*)last);
643 } else
644 memset(item, 0, item->Size());
646 // update all current iterators
647 SinglyLinkedList<AttributeIterator>::Iterator iterator
648 = fIterators.GetIterator();
649 while (iterator.HasNext()) {
650 iterator.Next()->Update(index, -1);
653 return B_OK;
657 //! Removes the given attribute from the small_data section.
658 status_t
659 Inode::_RemoveSmallData(Transaction& transaction, NodeGetter& nodeGetter,
660 const char* name)
662 if (name == NULL)
663 return B_BAD_VALUE;
665 bfs_inode* node = nodeGetter.WritableNode();
666 RecursiveLocker locker(fSmallDataLock);
668 // search for the small_data item
670 small_data* item = node->SmallDataStart();
671 int32 index = 0;
672 while (!item->IsLast(node) && strcmp(item->Name(), name)) {
673 item = item->Next();
674 index++;
677 if (item->IsLast(node))
678 return B_ENTRY_NOT_FOUND;
680 nodeGetter.MakeWritable(transaction);
682 status_t status = _RemoveSmallData(node, item, index);
683 if (status == B_OK) {
684 Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
685 bfs_inode::ToInode(real_time_clock_usecs()));
687 status = WriteBack(transaction);
690 return status;
694 /*! Try to place the given attribute in the small_data section - if the
695 new attribute is too big to fit in that section, it returns B_DEVICE_FULL.
696 In that case, the attribute should be written to a real attribute file;
697 it's the caller's responsibility to remove any existing attributes in the
698 small data section if that's the case.
700 Note that you need to write back the inode yourself after having called that
701 method - it's a bad API decision that it needs a transaction but enforces
702 you to write back the inode all by yourself, but it's just more efficient
703 in most cases...
705 status_t
706 Inode::_AddSmallData(Transaction& transaction, NodeGetter& nodeGetter,
707 const char* name, uint32 type, off_t pos, const uint8* data, size_t length,
708 bool force)
710 bfs_inode* node = nodeGetter.WritableNode();
712 if (node == NULL || name == NULL || data == NULL)
713 return B_BAD_VALUE;
715 // reject any requests that can't fit into the small_data section
716 uint32 nameLength = strlen(name);
717 uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + pos + length + 1;
718 if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode))
719 return B_DEVICE_FULL;
721 nodeGetter.MakeWritable(transaction);
722 RecursiveLocker locker(fSmallDataLock);
724 // Find the last item or one with the same name we have to add
725 small_data* item = node->SmallDataStart();
726 int32 index = 0;
727 while (!item->IsLast(node) && strcmp(item->Name(), name)) {
728 item = item->Next();
729 index++;
732 // is the attribute already in the small_data section?
733 // then just replace the data part of that one
734 if (!item->IsLast(node)) {
735 // find last attribute
736 small_data* last = item;
737 while (!last->IsLast(node))
738 last = last->Next();
740 // try to change the attributes value
741 if (item->data_size > pos + length
742 || force
743 || ((uint8*)last + pos + length - item->DataSize())
744 <= ((uint8*)node + fVolume->InodeSize())) {
745 // Make room for the new attribute if needed (and we are forced
746 // to do so)
747 if (force && ((uint8*)last + pos + length - item->DataSize())
748 > ((uint8*)node + fVolume->InodeSize())) {
749 // We also take the free space at the end of the small_data
750 // section into account, and request only what's really needed
751 uint32 needed = pos + length - item->DataSize() -
752 (uint32)((uint8*)node + fVolume->InodeSize()
753 - (uint8*)last);
755 if (_MakeSpaceForSmallData(transaction, node, name, needed)
756 != B_OK)
757 return B_ERROR;
759 // reset our pointers
760 item = node->SmallDataStart();
761 index = 0;
762 while (!item->IsLast(node) && strcmp(item->Name(), name)) {
763 item = item->Next();
764 index++;
767 last = item;
768 while (!last->IsLast(node))
769 last = last->Next();
772 size_t oldDataSize = item->DataSize();
774 // Normally, we can just overwrite the attribute data as the size
775 // is specified by the type and does not change that often
776 if (pos + length != item->DataSize()) {
777 // move the attributes after the current one
778 small_data* next = item->Next();
779 if (!next->IsLast(node)) {
780 memmove((uint8*)item + spaceNeeded, next,
781 (uint8*)last - (uint8*)next);
784 // Move the "last" one to its new location and
785 // correctly terminate the small_data section
786 last = (small_data*)((uint8*)last
787 - ((uint8*)next - ((uint8*)item + spaceNeeded)));
788 if ((uint8*)last < (uint8*)node + fVolume->BlockSize()) {
789 memset(last, 0, (uint8*)node + fVolume->BlockSize()
790 - (uint8*)last);
793 item->data_size = HOST_ENDIAN_TO_BFS_INT16(pos + length);
796 item->type = HOST_ENDIAN_TO_BFS_INT32(type);
798 if ((uint64)oldDataSize < (uint64)pos) {
799 // Fill gap with zeros
800 memset(item->Data() + oldDataSize, 0, pos - oldDataSize);
802 memcpy(item->Data() + pos, data, length);
803 item->Data()[pos + length] = '\0';
805 return B_OK;
808 return B_DEVICE_FULL;
811 // try to add the new attribute!
813 if ((uint8*)item + spaceNeeded > (uint8*)node + fVolume->InodeSize()) {
814 // there is not enough space for it!
815 if (!force)
816 return B_DEVICE_FULL;
818 // make room for the new attribute
819 if (_MakeSpaceForSmallData(transaction, node, name, spaceNeeded) < B_OK)
820 return B_ERROR;
822 // get new last item!
823 item = node->SmallDataStart();
824 index = 0;
825 while (!item->IsLast(node)) {
826 item = item->Next();
827 index++;
831 memset(item, 0, spaceNeeded);
832 item->type = HOST_ENDIAN_TO_BFS_INT32(type);
833 item->name_size = HOST_ENDIAN_TO_BFS_INT16(nameLength);
834 item->data_size = HOST_ENDIAN_TO_BFS_INT16(length);
835 strcpy(item->Name(), name);
836 memcpy(item->Data() + pos, data, length);
838 // correctly terminate the small_data section
839 item = item->Next();
840 if (!item->IsLast(node))
841 memset(item, 0, (uint8*)node + fVolume->InodeSize() - (uint8*)item);
843 // update all current iterators
844 SinglyLinkedList<AttributeIterator>::Iterator iterator
845 = fIterators.GetIterator();
846 while (iterator.HasNext()) {
847 iterator.Next()->Update(index, 1);
850 return B_OK;
854 /*! Iterates through the small_data section of an inode.
855 To start at the beginning of this section, you let smallData
856 point to NULL, like:
857 small_data* data = NULL;
858 while (inode->GetNextSmallData(&data) { ... }
860 This function is reentrant and doesn't allocate any memory;
861 you can safely stop calling it at any point (you don't need
862 to iterate through the whole list).
863 You need to hold the fSmallDataLock when you call this method
865 status_t
866 Inode::_GetNextSmallData(bfs_inode* node, small_data** _smallData) const
868 if (node == NULL)
869 RETURN_ERROR(B_BAD_VALUE);
871 ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
873 small_data* data = *_smallData;
875 // begin from the start?
876 if (data == NULL)
877 data = node->SmallDataStart();
878 else
879 data = data->Next();
881 // is already last item?
882 if (data->IsLast(node))
883 return B_ENTRY_NOT_FOUND;
885 *_smallData = data;
887 return B_OK;
891 /*! Finds the attribute "name" in the small data section, and
892 returns a pointer to it (or NULL if it doesn't exist).
893 You need to hold the fSmallDataLock when you call this method
895 small_data*
896 Inode::FindSmallData(const bfs_inode* node, const char* name) const
898 ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
900 small_data* smallData = NULL;
901 while (_GetNextSmallData(const_cast<bfs_inode*>(node), &smallData)
902 == B_OK) {
903 if (!strcmp(smallData->Name(), name))
904 return smallData;
906 return NULL;
910 /*! Returns a pointer to the node's name if present in the small data
911 section, NULL otherwise.
912 You need to hold the fSmallDataLock when you call this method
914 const char*
915 Inode::Name(const bfs_inode* node) const
917 ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
919 small_data* smallData = NULL;
920 while (_GetNextSmallData((bfs_inode*)node, &smallData) == B_OK) {
921 if (*smallData->Name() == FILE_NAME_NAME
922 && smallData->NameSize() == FILE_NAME_NAME_LENGTH)
923 return (const char*)smallData->Data();
925 return NULL;
929 /*! Copies the node's name into the provided buffer.
930 The buffer should be B_FILE_NAME_LENGTH bytes large.
932 status_t
933 Inode::GetName(char* buffer, size_t size) const
935 NodeGetter node(fVolume, this);
936 if (node.Node() == NULL)
937 return B_IO_ERROR;
939 RecursiveLocker locker(fSmallDataLock);
941 const char* name = Name(node.Node());
942 if (name == NULL)
943 return B_ENTRY_NOT_FOUND;
945 strlcpy(buffer, name, size);
946 return B_OK;
950 /*! Changes or set the name of a file: in the inode small_data section only, it
951 doesn't change it in the parent directory's b+tree.
952 Note that you need to write back the inode yourself after having called
953 that method. It suffers from the same API decision as AddSmallData() does
954 (and for the same reason).
956 status_t
957 Inode::SetName(Transaction& transaction, const char* name)
959 if (name == NULL || *name == '\0')
960 return B_BAD_VALUE;
962 NodeGetter node(fVolume, transaction, this);
963 if (node.Node() == NULL)
964 return B_IO_ERROR;
966 const char nameTag[2] = {FILE_NAME_NAME, 0};
968 return _AddSmallData(transaction, node, nameTag, FILE_NAME_TYPE, 0,
969 (uint8*)name, strlen(name), true);
973 status_t
974 Inode::_RemoveAttribute(Transaction& transaction, const char* name,
975 bool hasIndex, Index* index)
977 // remove the attribute file if it exists
978 Vnode vnode(fVolume, Attributes());
979 Inode* attributes;
980 status_t status = vnode.Get(&attributes);
981 if (status < B_OK)
982 return status;
984 // update index
985 if (index != NULL) {
986 Inode* attribute;
987 if ((hasIndex || fVolume->CheckForLiveQuery(name))
988 && GetAttribute(name, &attribute) == B_OK) {
989 uint8 data[MAX_INDEX_KEY_LENGTH];
990 size_t length = MAX_INDEX_KEY_LENGTH;
991 if (attribute->ReadAt(0, data, &length) == B_OK) {
992 index->Update(transaction, name, attribute->Type(), data,
993 length, NULL, 0, this);
996 ReleaseAttribute(attribute);
1000 if ((status = attributes->Remove(transaction, name)) < B_OK)
1001 return status;
1003 if (attributes->IsEmpty()) {
1004 attributes->WriteLockInTransaction(transaction);
1006 // remove attribute directory (don't fail if that can't be done)
1007 if (remove_vnode(fVolume->FSVolume(), attributes->ID()) == B_OK) {
1008 // update the inode, so that no one will ever doubt it's deleted :-)
1009 attributes->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
1010 if (attributes->WriteBack(transaction) == B_OK) {
1011 Attributes().SetTo(0, 0, 0);
1012 WriteBack(transaction);
1013 } else {
1014 unremove_vnode(fVolume->FSVolume(), attributes->ID());
1015 attributes->Node().flags
1016 &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
1021 return status;
1025 /*! Reads data from the specified attribute.
1026 This is a high-level attribute function that understands attributes
1027 in the small_data section as well as real attribute files.
1029 status_t
1030 Inode::ReadAttribute(const char* name, int32 type, off_t pos, uint8* buffer,
1031 size_t* _length)
1033 if (pos < 0)
1034 pos = 0;
1036 // search in the small_data section (which has to be locked first)
1038 NodeGetter node(fVolume, this);
1039 if (node.Node() == NULL)
1040 return B_IO_ERROR;
1042 RecursiveLocker locker(fSmallDataLock);
1044 small_data* smallData = FindSmallData(node.Node(), name);
1045 if (smallData != NULL) {
1046 size_t length = *_length;
1047 if (pos >= smallData->data_size) {
1048 *_length = 0;
1049 return B_OK;
1051 if (length + pos > smallData->DataSize())
1052 length = smallData->DataSize() - pos;
1054 memcpy(buffer, smallData->Data() + pos, length);
1055 *_length = length;
1056 return B_OK;
1060 // search in the attribute directory
1061 Inode* attribute;
1062 status_t status = GetAttribute(name, &attribute);
1063 if (status == B_OK) {
1064 status = attribute->ReadAt(pos, (uint8*)buffer, _length);
1066 ReleaseAttribute(attribute);
1069 RETURN_ERROR(status);
1073 /*! Writes data to the specified attribute.
1074 This is a high-level attribute function that understands attributes
1075 in the small_data section as well as real attribute files.
1077 status_t
1078 Inode::WriteAttribute(Transaction& transaction, const char* name, int32 type,
1079 off_t pos, const uint8* buffer, size_t* _length, bool* _created)
1081 if (pos < 0)
1082 return B_BAD_VALUE;
1084 // needed to maintain the index
1085 uint8 oldBuffer[MAX_INDEX_KEY_LENGTH];
1086 uint8* oldData = NULL;
1087 size_t oldLength = 0;
1088 bool created = false;
1090 // TODO: we actually depend on that the contents of "buffer" are constant.
1091 // If they get changed during the write (hey, user programs), we may mess
1092 // up our index trees!
1093 // TODO: for attribute files, we need to log the first
1094 // MAX_INDEX_KEY_LENGTH bytes of the data stream, or the same as above
1095 // might happen.
1097 Index index(fVolume);
1098 bool hasIndex = index.SetTo(name) == B_OK;
1100 Inode* attribute = NULL;
1101 status_t status = B_OK;
1103 if (GetAttribute(name, &attribute) != B_OK) {
1104 // No attribute inode exists yet
1106 // save the old attribute data
1107 NodeGetter node(fVolume, transaction, this);
1108 if (node.Node() == NULL)
1109 return B_IO_ERROR;
1111 recursive_lock_lock(&fSmallDataLock);
1113 small_data* smallData = FindSmallData(node.Node(), name);
1114 if (smallData != NULL) {
1115 oldLength = smallData->DataSize();
1116 if (oldLength > 0) {
1117 if (oldLength > MAX_INDEX_KEY_LENGTH)
1118 oldLength = MAX_INDEX_KEY_LENGTH;
1119 memcpy(oldData = oldBuffer, smallData->Data(), oldLength);
1121 } else
1122 created = true;
1124 recursive_lock_unlock(&fSmallDataLock);
1126 // if the attribute doesn't exist yet (as a file), try to put it in the
1127 // small_data section first - if that fails (due to insufficent space),
1128 // create a real attribute file
1129 status = _AddSmallData(transaction, node, name, type, pos, buffer,
1130 *_length);
1131 if (status == B_DEVICE_FULL) {
1132 if (smallData != NULL) {
1133 // remove the old attribute from the small data section - there
1134 // is no space left for the new data
1135 status = _RemoveSmallData(transaction, node, name);
1136 } else
1137 status = B_OK;
1139 if (status == B_OK)
1140 status = CreateAttribute(transaction, name, type, &attribute);
1141 if (status != B_OK)
1142 RETURN_ERROR(status);
1144 created = true;
1145 } else if (status == B_OK) {
1146 // Update status time on attribute write
1147 Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
1148 bfs_inode::ToInode(real_time_clock_usecs()));
1150 status = WriteBack(transaction);
1154 if (attribute != NULL) {
1155 WriteLocker writeLocker(attribute->fLock);
1157 if (hasIndex || fVolume->CheckForLiveQuery(name)) {
1158 // Save the old attribute data (if this fails, oldLength will
1159 // reflect it)
1160 while (attribute->Size() > 0) {
1161 bigtime_t oldModified = attribute->LastModified();
1162 writeLocker.Unlock();
1164 oldLength = MAX_INDEX_KEY_LENGTH;
1165 if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK)
1166 oldData = oldBuffer;
1168 writeLocker.Lock();
1170 // Read until the data hasn't changed in between
1171 if (oldModified == attribute->LastModified())
1172 break;
1174 oldLength = 0;
1178 // check if the data fits into the small_data section again
1179 NodeGetter node(fVolume, transaction, this);
1180 if (node.Node() == NULL)
1181 return B_IO_ERROR;
1183 status = _AddSmallData(transaction, node, name, type, pos, buffer,
1184 *_length);
1186 if (status == B_OK) {
1187 // it does - remove its file
1188 writeLocker.Unlock();
1189 status = _RemoveAttribute(transaction, name, false, NULL);
1190 } else {
1191 // The attribute type might have been changed - we need to
1192 // adopt the new one
1193 attribute->Node().type = HOST_ENDIAN_TO_BFS_INT32(type);
1194 status = attribute->WriteBack(transaction);
1195 writeLocker.Unlock();
1197 if (status == B_OK) {
1198 status = attribute->WriteAt(transaction, pos, buffer,
1199 _length);
1203 if (status == B_OK) {
1204 // Update status time on attribute write
1205 Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
1206 bfs_inode::ToInode(real_time_clock_usecs()));
1208 status = WriteBack(transaction);
1211 attribute->WriteLockInTransaction(transaction);
1212 ReleaseAttribute(attribute);
1215 // TODO: find a better way than this "pos" thing (the begin of the old key
1216 // must be copied to the start of the new one for a comparison)
1217 if (status == B_OK && pos == 0) {
1218 // Index only the first MAX_INDEX_KEY_LENGTH bytes
1219 uint16 length = *_length;
1220 if (length > MAX_INDEX_KEY_LENGTH)
1221 length = MAX_INDEX_KEY_LENGTH;
1223 // Update index. Note, Index::Update() may be called even if
1224 // initializing the index failed - it will just update the live
1225 // queries in this case
1226 if (pos < length || (uint64)pos < (uint64)oldLength) {
1227 index.Update(transaction, name, type, oldData, oldLength, buffer,
1228 length, this);
1232 if (_created != NULL)
1233 *_created = created;
1235 return status;
1239 /*! Removes the specified attribute from the inode.
1240 This is a high-level attribute function that understands attributes
1241 in the small_data section as well as real attribute files.
1243 status_t
1244 Inode::RemoveAttribute(Transaction& transaction, const char* name)
1246 Index index(fVolume);
1247 bool hasIndex = index.SetTo(name) == B_OK;
1248 NodeGetter node(fVolume, this);
1249 if (node.Node() == NULL)
1250 return B_IO_ERROR;
1252 // update index for attributes in the small_data section
1254 RecursiveLocker _(fSmallDataLock);
1256 small_data* smallData = FindSmallData(node.Node(), name);
1257 if (smallData != NULL) {
1258 uint32 length = smallData->DataSize();
1259 if (length > MAX_INDEX_KEY_LENGTH)
1260 length = MAX_INDEX_KEY_LENGTH;
1261 index.Update(transaction, name, smallData->Type(),
1262 smallData->Data(), length, NULL, 0, this);
1266 status_t status = _RemoveSmallData(transaction, node, name);
1267 if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) {
1268 // remove the attribute file if it exists
1269 status = _RemoveAttribute(transaction, name, hasIndex, &index);
1270 if (status == B_OK) {
1271 Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
1272 bfs_inode::ToInode(real_time_clock_usecs()));
1273 WriteBack(transaction);
1277 return status;
1281 /*! Returns the attribute inode with the specified \a name, in case it exists.
1282 This method can only return real attribute files; the attributes in the
1283 small data section are ignored.
1285 status_t
1286 Inode::GetAttribute(const char* name, Inode** _attribute)
1288 // does this inode even have attributes?
1289 if (Attributes().IsZero())
1290 return B_ENTRY_NOT_FOUND;
1292 Vnode vnode(fVolume, Attributes());
1293 Inode* attributes;
1294 if (vnode.Get(&attributes) < B_OK) {
1295 FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n",
1296 name));
1297 return B_ERROR;
1300 BPlusTree* tree = attributes->Tree();
1301 if (tree == NULL)
1302 return B_BAD_VALUE;
1304 InodeReadLocker locker(attributes);
1306 ino_t id;
1307 status_t status = tree->Find((uint8*)name, (uint16)strlen(name), &id);
1308 if (status == B_OK) {
1309 Vnode vnode(fVolume, id);
1310 Inode* inode;
1311 // Check if the attribute is really an attribute
1312 if (vnode.Get(&inode) != B_OK || !inode->IsAttribute())
1313 return B_ERROR;
1315 *_attribute = inode;
1316 vnode.Keep();
1317 return B_OK;
1320 return status;
1324 void
1325 Inode::ReleaseAttribute(Inode* attribute)
1327 if (attribute == NULL)
1328 return;
1330 put_vnode(fVolume->FSVolume(), attribute->ID());
1334 status_t
1335 Inode::CreateAttribute(Transaction& transaction, const char* name, uint32 type,
1336 Inode** attribute)
1338 // do we need to create the attribute directory first?
1339 if (Attributes().IsZero()) {
1340 status_t status = Inode::Create(transaction, this, NULL,
1341 S_ATTR_DIR | S_DIRECTORY | 0666, 0, 0, NULL);
1342 if (status < B_OK)
1343 RETURN_ERROR(status);
1345 Vnode vnode(fVolume, Attributes());
1346 Inode* attributes;
1347 if (vnode.Get(&attributes) < B_OK)
1348 return B_ERROR;
1350 // Inode::Create() locks the inode for us
1351 return Inode::Create(transaction, attributes, name,
1352 S_ATTR | S_FILE | 0666, 0, type, NULL, NULL, attribute);
1356 // #pragma mark - directory tree
1359 bool
1360 Inode::IsEmpty()
1362 TreeIterator iterator(fTree);
1364 uint32 count = 0;
1365 char name[MAX_INDEX_KEY_LENGTH + 1];
1366 uint16 length;
1367 ino_t id;
1368 while (iterator.GetNextEntry(name, &length, MAX_INDEX_KEY_LENGTH + 1,
1369 &id) == B_OK) {
1370 if ((Mode() & (S_ATTR_DIR | S_INDEX_DIR)) != 0)
1371 return false;
1373 // Unlike index and attribute directories, directories
1374 // for standard files always contain ".", and "..", so
1375 // we need to ignore those two
1376 if (++count > 2 || (strcmp(".", name) != 0 && strcmp("..", name) != 0))
1377 return false;
1379 return true;
1383 status_t
1384 Inode::ContainerContentsChanged(Transaction& transaction)
1386 ASSERT(!InLastModifiedIndex());
1388 Node().last_modified_time = Node().status_change_time
1389 = HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
1391 return WriteBack(transaction);
1395 // #pragma mark - data stream
1398 /*! Computes the number of bytes this inode occupies in the file system.
1399 This includes the file meta blocks used for maintaining its data stream.
1401 TODO: However, the attributes in extra files are not really accounted for;
1402 depending on the speed penalty, this should be changed, though (the value
1403 could be cached in the inode structure or Inode object, though).
1405 off_t
1406 Inode::AllocatedSize() const
1408 if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0) {
1409 // This symlink does not have a data stream
1410 return Node().InodeSize();
1413 const data_stream& data = Node().data;
1414 uint32 blockSize = fVolume->BlockSize();
1415 off_t size = blockSize;
1417 if (data.MaxDoubleIndirectRange() != 0) {
1418 off_t doubleIndirectSize = data.MaxDoubleIndirectRange()
1419 - data.MaxIndirectRange();
1420 int32 indirectSize = double_indirect_max_indirect_size(
1421 data.double_indirect.Length(), fVolume->BlockSize());
1423 size += (2 * data.double_indirect.Length()
1424 + doubleIndirectSize / indirectSize)
1425 * blockSize + data.MaxDoubleIndirectRange();
1426 } else if (data.MaxIndirectRange() != 0)
1427 size += data.indirect.Length() + data.MaxIndirectRange();
1428 else
1429 size += data.MaxDirectRange();
1431 if (!Node().attributes.IsZero()) {
1432 // TODO: to make this exact, we'd had to count all attributes
1433 size += 2 * blockSize;
1434 // 2 blocks, one for the attributes inode, one for its B+tree
1437 return size;
1441 /*! Finds the block_run where "pos" is located in the data_stream of
1442 the inode.
1443 If successful, "offset" will then be set to the file offset
1444 of the block_run returned; so "pos - offset" is for the block_run
1445 what "pos" is for the whole stream.
1446 The caller has to make sure that "pos" is inside the stream.
1448 status_t
1449 Inode::FindBlockRun(off_t pos, block_run& run, off_t& offset)
1451 data_stream* data = &Node().data;
1453 // find matching block run
1455 if (data->MaxIndirectRange() > 0 && pos >= data->MaxDirectRange()) {
1456 if (data->MaxDoubleIndirectRange() > 0
1457 && pos >= data->MaxIndirectRange()) {
1458 // access to double indirect blocks
1460 CachedBlock cached(fVolume);
1462 int32 runsPerBlock;
1463 int32 directSize;
1464 int32 indirectSize;
1465 get_double_indirect_sizes(data->double_indirect.Length(),
1466 fVolume->BlockSize(), runsPerBlock, directSize, indirectSize);
1467 if (directSize <= 0 || indirectSize <= 0)
1468 RETURN_ERROR(B_BAD_DATA);
1470 off_t start = pos - data->MaxIndirectRange();
1471 int32 index = start / indirectSize;
1473 block_run* indirect = (block_run*)cached.SetTo(
1474 fVolume->ToBlock(data->double_indirect) + index / runsPerBlock);
1475 if (indirect == NULL)
1476 RETURN_ERROR(B_ERROR);
1478 int32 current = (start % indirectSize) / directSize;
1480 indirect = (block_run*)cached.SetTo(
1481 fVolume->ToBlock(indirect[index % runsPerBlock])
1482 + current / runsPerBlock);
1483 if (indirect == NULL)
1484 RETURN_ERROR(B_ERROR);
1486 run = indirect[current % runsPerBlock];
1487 if (run.Length() != data->double_indirect.Length())
1488 RETURN_ERROR(B_BAD_DATA);
1490 offset = data->MaxIndirectRange() + (index * indirectSize)
1491 + (current * directSize);
1492 } else {
1493 // access to indirect blocks
1495 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
1496 off_t runBlockEnd = data->MaxDirectRange();
1498 CachedBlock cached(fVolume);
1499 off_t block = fVolume->ToBlock(data->indirect);
1501 for (int32 i = 0; i < data->indirect.Length(); i++) {
1502 block_run* indirect = (block_run*)cached.SetTo(block + i);
1503 if (indirect == NULL)
1504 RETURN_ERROR(B_IO_ERROR);
1506 int32 current = -1;
1507 while (++current < runsPerBlock) {
1508 if (indirect[current].IsZero())
1509 break;
1511 runBlockEnd += (uint32)indirect[current].Length()
1512 << cached.BlockShift();
1513 if (runBlockEnd > pos) {
1514 run = indirect[current];
1515 offset = runBlockEnd - ((uint32)run.Length()
1516 << cached.BlockShift());
1517 return fVolume->ValidateBlockRun(run);
1521 RETURN_ERROR(B_ERROR);
1523 } else {
1524 // access from direct blocks
1526 off_t runBlockEnd = 0LL;
1527 int32 current = -1;
1529 while (++current < NUM_DIRECT_BLOCKS) {
1530 if (data->direct[current].IsZero())
1531 break;
1533 runBlockEnd += (uint32)data->direct[current].Length()
1534 << fVolume->BlockShift();
1535 if (runBlockEnd > pos) {
1536 run = data->direct[current];
1537 offset = runBlockEnd
1538 - ((uint32)run.Length() << fVolume->BlockShift());
1539 return fVolume->ValidateBlockRun(run);
1543 return B_ENTRY_NOT_FOUND;
1545 return fVolume->ValidateBlockRun(run);
1549 status_t
1550 Inode::ReadAt(off_t pos, uint8* buffer, size_t* _length)
1552 size_t length = *_length;
1554 // set/check boundaries for pos/length
1555 if (pos < 0)
1556 return B_BAD_VALUE;
1558 InodeReadLocker locker(this);
1560 if (pos >= Size() || length == 0) {
1561 *_length = 0;
1562 return B_NO_ERROR;
1565 locker.Unlock();
1567 return file_cache_read(FileCache(), NULL, pos, buffer, _length);
1571 status_t
1572 Inode::WriteAt(Transaction& transaction, off_t pos, const uint8* buffer,
1573 size_t* _length)
1575 InodeReadLocker locker(this);
1577 // update the last modification time in memory, it will be written
1578 // back to the inode, and the index when the file is closed
1579 // TODO: should update the internal last modified time only at this point!
1580 Node().last_modified_time = Node().status_change_time
1581 = HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
1583 // TODO: support INODE_LOGGED!
1585 size_t length = *_length;
1586 bool changeSize = (uint64)pos + (uint64)length > (uint64)Size();
1588 // set/check boundaries for pos/length
1589 if (pos < 0)
1590 return B_BAD_VALUE;
1592 locker.Unlock();
1594 // the transaction doesn't have to be started already
1595 if (changeSize && !transaction.IsStarted())
1596 transaction.Start(fVolume, BlockNumber());
1598 WriteLocker writeLocker(fLock);
1600 // Work around possible race condition: Someone might have shrunken the file
1601 // while we had no lock.
1602 if (!transaction.IsStarted()
1603 && (uint64)pos + (uint64)length > (uint64)Size()) {
1604 writeLocker.Unlock();
1605 transaction.Start(fVolume, BlockNumber());
1606 writeLocker.Lock();
1609 off_t oldSize = Size();
1611 if ((uint64)pos + (uint64)length > (uint64)oldSize) {
1612 // let's grow the data stream to the size needed
1613 status_t status = SetFileSize(transaction, pos + length);
1614 if (status != B_OK) {
1615 *_length = 0;
1616 WriteLockInTransaction(transaction);
1617 RETURN_ERROR(status);
1619 // TODO: In theory we would need to update the file size
1620 // index here as part of the current transaction - this might just
1621 // be a bit too expensive, but worth a try.
1623 // we need to write back the inode here because it has to
1624 // go into this transaction (we cannot wait until the file
1625 // is closed)
1626 status = WriteBack(transaction);
1627 if (status != B_OK) {
1628 WriteLockInTransaction(transaction);
1629 return status;
1633 writeLocker.Unlock();
1635 if (oldSize < pos)
1636 FillGapWithZeros(oldSize, pos);
1638 // If we don't want to write anything, we can now return (we may
1639 // just have changed the file size using the position parameter)
1640 if (length == 0)
1641 return B_OK;
1643 status_t status = file_cache_write(FileCache(), NULL, pos, buffer, _length);
1645 if (transaction.IsStarted())
1646 WriteLockInTransaction(transaction);
1648 return status;
1652 /*! Fills the gap between the old file size and the new file size
1653 with zeros.
1654 It's more or less a copy of Inode::WriteAt() but it can handle
1655 length differences of more than just 4 GB, and it never uses
1656 the log, even if the INODE_LOGGED flag is set.
1658 status_t
1659 Inode::FillGapWithZeros(off_t pos, off_t newSize)
1661 while (pos < newSize) {
1662 size_t size;
1663 if (newSize > pos + 1024 * 1024 * 1024)
1664 size = 1024 * 1024 * 1024;
1665 else
1666 size = newSize - pos;
1668 status_t status = file_cache_write(FileCache(), NULL, pos, NULL, &size);
1669 if (status < B_OK)
1670 return status;
1672 pos += size;
1675 return B_OK;
1679 /*! Allocates \a length blocks, and clears their contents. Growing
1680 the indirect and double indirect range uses this method.
1681 The allocated block_run is saved in "run"
1683 status_t
1684 Inode::_AllocateBlockArray(Transaction& transaction, block_run& run,
1685 size_t length, bool variableSize)
1687 if (!run.IsZero())
1688 return B_BAD_VALUE;
1690 status_t status = fVolume->Allocate(transaction, this, length, run,
1691 variableSize ? 1 : length);
1692 if (status != B_OK)
1693 return status;
1695 // make sure those blocks are empty
1696 CachedBlock cached(fVolume);
1697 off_t block = fVolume->ToBlock(run);
1699 for (int32 i = 0; i < run.Length(); i++) {
1700 block_run* runs = (block_run*)cached.SetToWritable(transaction,
1701 block + i, true);
1702 if (runs == NULL)
1703 return B_IO_ERROR;
1705 return B_OK;
1709 /*! Grows the stream to \a size, and fills the direct/indirect/double indirect
1710 ranges with the runs.
1711 This method will also determine the size of the preallocation, if any.
1713 status_t
1714 Inode::_GrowStream(Transaction& transaction, off_t size)
1716 data_stream* data = &Node().data;
1718 // is the data stream already large enough to hold the new size?
1719 // (can be the case with preallocated blocks)
1720 if (size < data->MaxDirectRange()
1721 || size < data->MaxIndirectRange()
1722 || size < data->MaxDoubleIndirectRange()) {
1723 data->size = HOST_ENDIAN_TO_BFS_INT64(size);
1724 return B_OK;
1727 // how many bytes are still needed? (unused ranges are always zero)
1728 uint16 minimum = 1;
1729 off_t bytes;
1730 if (data->Size() < data->MaxDoubleIndirectRange()) {
1731 bytes = size - data->MaxDoubleIndirectRange();
1732 // The double indirect range can only handle multiples of
1733 // its base length
1734 minimum = data->double_indirect.Length();
1735 } else if (data->Size() < data->MaxIndirectRange())
1736 bytes = size - data->MaxIndirectRange();
1737 else if (data->Size() < data->MaxDirectRange())
1738 bytes = size - data->MaxDirectRange();
1739 else {
1740 // no preallocation left to be used
1741 bytes = size - data->Size();
1742 if (data->MaxDoubleIndirectRange() > 0)
1743 minimum = data->double_indirect.Length();
1746 // do we have enough free blocks on the disk?
1747 off_t blocksNeeded = (bytes + fVolume->BlockSize() - 1)
1748 >> fVolume->BlockShift();
1749 if (blocksNeeded > fVolume->FreeBlocks())
1750 return B_DEVICE_FULL;
1752 off_t blocksRequested = blocksNeeded;
1753 // because of preallocations and partial allocations, the number of
1754 // blocks we need to allocate may be different from the one we request
1755 // from the block allocator
1757 // Should we preallocate some blocks?
1758 // Attributes, attribute directories, and long symlinks usually won't get
1759 // that big, and should stay close to the inode - preallocating could be
1760 // counterproductive.
1761 // Also, if free disk space is tight, don't preallocate.
1762 if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink()
1763 && fVolume->FreeBlocks() > 128) {
1764 off_t roundTo = 0;
1765 if (IsFile()) {
1766 // Request preallocated blocks depending on the file size and growth
1767 if (size < 1 * 1024 * 1024 && bytes < 512 * 1024) {
1768 // Preallocate 64 KB for file sizes <1 MB and grow rates <512 KB
1769 roundTo = 65536 >> fVolume->BlockShift();
1770 } else if (size < 32 * 1024 * 1024 && bytes <= 1 * 1024 * 1024) {
1771 // Preallocate 512 KB for file sizes between 1 MB and 32 MB, and
1772 // grow rates smaller than 1 MB
1773 roundTo = (512 * 1024) >> fVolume->BlockShift();
1774 } else {
1775 // Preallocate 1/16 of the file size (ie. 4 MB for 64 MB,
1776 // 64 MB for 1 GB)
1777 roundTo = size >> (fVolume->BlockShift() + 4);
1779 } else if (IsIndex()) {
1780 // Always preallocate 64 KB for index directories
1781 roundTo = 65536 >> fVolume->BlockShift();
1782 } else {
1783 // Preallocate only 4 KB - directories only get trimmed when their
1784 // vnode is flushed, which might not happen very often.
1785 roundTo = 4096 >> fVolume->BlockShift();
1787 if (roundTo > 1) {
1788 // Round to next "roundTo" block count
1789 blocksRequested = ((blocksNeeded + roundTo) / roundTo) * roundTo;
1793 while (blocksNeeded > 0) {
1794 // the requested blocks do not need to be returned with a
1795 // single allocation, so we need to iterate until we have
1796 // enough blocks allocated
1797 if (minimum > 1) {
1798 // make sure that "blocks" is a multiple of minimum
1799 blocksRequested = round_up(blocksRequested, minimum);
1802 block_run run;
1803 status_t status = fVolume->Allocate(transaction, this, blocksRequested,
1804 run, minimum);
1805 if (status != B_OK)
1806 return status;
1808 // okay, we have the needed blocks, so just distribute them to the
1809 // different ranges of the stream (direct, indirect & double indirect)
1811 blocksNeeded -= run.Length();
1812 // don't preallocate if the first allocation was already too small
1813 blocksRequested = blocksNeeded;
1815 // Direct block range
1817 if (data->Size() <= data->MaxDirectRange()) {
1818 // let's try to put them into the direct block range
1819 int32 free = 0;
1820 for (; free < NUM_DIRECT_BLOCKS; free++) {
1821 if (data->direct[free].IsZero())
1822 break;
1825 if (free < NUM_DIRECT_BLOCKS) {
1826 // can we merge the last allocated run with the new one?
1827 int32 last = free - 1;
1828 if (free > 0 && data->direct[last].MergeableWith(run)) {
1829 data->direct[last].length = HOST_ENDIAN_TO_BFS_INT16(
1830 data->direct[last].Length() + run.Length());
1831 } else
1832 data->direct[free] = run;
1834 data->max_direct_range = HOST_ENDIAN_TO_BFS_INT64(
1835 data->MaxDirectRange()
1836 + run.Length() * fVolume->BlockSize());
1837 data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0
1838 ? data->max_direct_range : size);
1839 continue;
1843 // Indirect block range
1845 if (data->Size() <= data->MaxIndirectRange()
1846 || !data->MaxIndirectRange()) {
1847 CachedBlock cached(fVolume);
1848 block_run* runs = NULL;
1849 uint32 free = 0;
1850 off_t block;
1852 // if there is no indirect block yet, create one
1853 if (data->indirect.IsZero()) {
1854 status = _AllocateBlockArray(transaction, data->indirect,
1855 NUM_ARRAY_BLOCKS, true);
1856 if (status != B_OK)
1857 return status;
1859 data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
1860 data->MaxDirectRange());
1861 // insert the block_run in the first block
1862 runs = (block_run*)cached.SetTo(data->indirect);
1863 } else {
1864 uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run);
1865 block = fVolume->ToBlock(data->indirect);
1867 // search first empty entry
1868 int32 i = 0;
1869 for (; i < data->indirect.Length(); i++) {
1870 if ((runs = (block_run*)cached.SetTo(block + i)) == NULL)
1871 return B_IO_ERROR;
1873 for (free = 0; free < numberOfRuns; free++)
1874 if (runs[free].IsZero())
1875 break;
1877 if (free < numberOfRuns)
1878 break;
1880 if (i == data->indirect.Length())
1881 runs = NULL;
1884 if (runs != NULL) {
1885 // try to insert the run to the last one - note that this
1886 // doesn't take block borders into account, so it could be
1887 // further optimized
1888 cached.MakeWritable(transaction);
1890 int32 last = free - 1;
1891 if (free > 0 && runs[last].MergeableWith(run)) {
1892 runs[last].length = HOST_ENDIAN_TO_BFS_INT16(
1893 runs[last].Length() + run.Length());
1894 } else
1895 runs[free] = run;
1897 data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
1898 data->MaxIndirectRange()
1899 + ((uint32)run.Length() << fVolume->BlockShift()));
1900 data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0
1901 ? data->MaxIndirectRange() : size);
1902 continue;
1906 // Double indirect block range
1908 if (data->Size() <= data->MaxDoubleIndirectRange()
1909 || !data->max_double_indirect_range) {
1910 // We make sure here that we have this minimum allocated, so if
1911 // the allocation succeeds, we don't run into an endless loop.
1912 if (!data->max_double_indirect_range)
1913 minimum = _DoubleIndirectBlockLength();
1914 else
1915 minimum = data->double_indirect.Length();
1917 if ((run.Length() % minimum) != 0) {
1918 // The number of allocated blocks isn't a multiple of 'minimum',
1919 // so we have to change this. This can happen the first time the
1920 // stream grows into the double indirect range.
1921 // First, free the remaining blocks that don't fit into this
1922 // multiple.
1923 int32 rest = run.Length() % minimum;
1924 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - rest);
1926 status = fVolume->Free(transaction,
1927 block_run::Run(run.AllocationGroup(),
1928 run.Start() + run.Length(), rest));
1929 if (status != B_OK)
1930 return status;
1932 blocksNeeded += rest;
1933 blocksRequested = round_up(blocksNeeded, minimum);
1935 // Are there any blocks left in the run? If not, allocate
1936 // a new one
1937 if (run.length == 0)
1938 continue;
1941 // if there is no double indirect block yet, create one
1942 if (data->double_indirect.IsZero()) {
1943 status = _AllocateBlockArray(transaction,
1944 data->double_indirect, _DoubleIndirectBlockLength());
1945 if (status != B_OK)
1946 return status;
1948 data->max_double_indirect_range = data->max_indirect_range;
1951 // calculate the index where to insert the new blocks
1953 int32 runsPerBlock;
1954 int32 directSize;
1955 int32 indirectSize;
1956 get_double_indirect_sizes(data->double_indirect.Length(),
1957 fVolume->BlockSize(), runsPerBlock, directSize, indirectSize);
1958 if (directSize <= 0 || indirectSize <= 0)
1959 return B_BAD_DATA;
1961 off_t start = data->MaxDoubleIndirectRange()
1962 - data->MaxIndirectRange();
1963 int32 indirectIndex = start / indirectSize;
1964 int32 index = (start % indirectSize) / directSize;
1965 int32 runsPerArray = runsPerBlock * minimum;
1967 // distribute the blocks to the array and allocate
1968 // new array blocks when needed
1970 CachedBlock cached(fVolume);
1971 CachedBlock cachedDirect(fVolume);
1972 block_run* array = NULL;
1973 uint32 runLength = run.Length();
1975 while (run.length != 0) {
1976 // get the indirect array block
1977 if (array == NULL) {
1978 uint32 block = indirectIndex / runsPerBlock;
1979 if (block >= minimum)
1980 return EFBIG;
1982 array = (block_run*)cached.SetTo(fVolume->ToBlock(
1983 data->double_indirect) + block);
1984 if (array == NULL)
1985 return B_IO_ERROR;
1988 do {
1989 // do we need a new array block?
1990 if (array[indirectIndex % runsPerBlock].IsZero()) {
1991 cached.MakeWritable(transaction);
1993 status = _AllocateBlockArray(transaction,
1994 array[indirectIndex % runsPerBlock],
1995 data->double_indirect.Length());
1996 if (status != B_OK)
1997 return status;
2000 block_run* runs = (block_run*)cachedDirect.SetToWritable(
2001 transaction, fVolume->ToBlock(array[indirectIndex
2002 % runsPerBlock]) + index / runsPerBlock);
2003 if (runs == NULL)
2004 return B_IO_ERROR;
2006 do {
2007 // insert the block_run into the array
2008 runs[index % runsPerBlock] = run;
2009 runs[index % runsPerBlock].length
2010 = HOST_ENDIAN_TO_BFS_INT16(minimum);
2012 // alter the remaining block_run
2013 run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start()
2014 + minimum);
2015 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length()
2016 - minimum);
2017 } while ((++index % runsPerBlock) != 0 && run.length);
2018 } while ((index % runsPerArray) != 0 && run.length);
2020 if (index == runsPerArray)
2021 index = 0;
2022 if (++indirectIndex % runsPerBlock == 0) {
2023 array = NULL;
2024 index = 0;
2028 data->max_double_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
2029 data->MaxDoubleIndirectRange()
2030 + (runLength << fVolume->BlockShift()));
2031 data->size = blocksNeeded > 0 ? HOST_ENDIAN_TO_BFS_INT64(
2032 data->max_double_indirect_range) : size;
2034 continue;
2037 RETURN_ERROR(EFBIG);
2039 // update the size of the data stream
2040 data->size = HOST_ENDIAN_TO_BFS_INT64(size);
2042 return B_OK;
2046 size_t
2047 Inode::_DoubleIndirectBlockLength() const
2049 if (fVolume->BlockSize() > DOUBLE_INDIRECT_ARRAY_SIZE)
2050 return 1;
2052 return DOUBLE_INDIRECT_ARRAY_SIZE / fVolume->BlockSize();
2056 /*! Frees the statically sized array of the double indirect part of a data
2057 stream.
2059 status_t
2060 Inode::_FreeStaticStreamArray(Transaction& transaction, int32 level,
2061 block_run run, off_t size, off_t offset, off_t& max)
2063 int32 indirectSize;
2064 if (level == 0) {
2065 indirectSize = double_indirect_max_indirect_size(run.Length(),
2066 fVolume->BlockSize());
2067 } else {
2068 indirectSize = double_indirect_max_direct_size(run.Length(),
2069 fVolume->BlockSize());
2071 if (indirectSize <= 0)
2072 return B_BAD_DATA;
2074 off_t start;
2075 if (size > offset)
2076 start = size - offset;
2077 else
2078 start = 0;
2080 int32 index = start / indirectSize;
2081 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
2083 CachedBlock cached(fVolume);
2084 off_t blockNumber = fVolume->ToBlock(run);
2086 // set the file offset to the current block run
2087 offset += (off_t)index * indirectSize;
2089 for (int32 i = index / runsPerBlock; i < run.Length(); i++) {
2090 block_run* array = (block_run*)cached.SetToWritable(transaction,
2091 blockNumber + i);
2092 if (array == NULL)
2093 RETURN_ERROR(B_ERROR);
2095 for (index = index % runsPerBlock; index < runsPerBlock; index++) {
2096 if (array[index].IsZero()) {
2097 // we also want to break out of the outer loop
2098 i = run.Length();
2099 break;
2102 status_t status = B_OK;
2103 if (level == 0) {
2104 status = _FreeStaticStreamArray(transaction, 1, array[index],
2105 size, offset, max);
2106 } else if (offset >= size)
2107 status = fVolume->Free(transaction, array[index]);
2108 else
2109 max = HOST_ENDIAN_TO_BFS_INT64(offset + indirectSize);
2111 if (status < B_OK)
2112 RETURN_ERROR(status);
2114 if (offset >= size)
2115 array[index].SetTo(0, 0, 0);
2117 offset += indirectSize;
2119 index = 0;
2121 return B_OK;
2125 /*! Frees all block_runs in the array which come after the specified size.
2126 It also trims the last block_run that contain the size.
2127 "offset" and "max" are maintained until the last block_run that doesn't
2128 have to be freed - after this, the values won't be correct anymore, but
2129 will still assure correct function for all subsequent calls.
2130 "max" is considered to be in file system byte order.
2132 status_t
2133 Inode::_FreeStreamArray(Transaction& transaction, block_run* array,
2134 uint32 arrayLength, off_t size, off_t& offset, off_t& max)
2136 PRINT(("FreeStreamArray: arrayLength %lu, size %Ld, offset %Ld, max %Ld\n",
2137 arrayLength, size, offset, max));
2139 off_t newOffset = offset;
2140 uint32 i = 0;
2141 for (; i < arrayLength; i++, offset = newOffset) {
2142 if (array[i].IsZero())
2143 break;
2145 newOffset += (off_t)array[i].Length() << fVolume->BlockShift();
2146 if (newOffset <= size)
2147 continue;
2149 block_run run = array[i];
2151 // determine the block_run to be freed
2152 if (newOffset > size && offset < size) {
2153 // free partial block_run (and update the original block_run)
2154 run.start = HOST_ENDIAN_TO_BFS_INT16(array[i].Start()
2155 + ((size + fVolume->BlockSize() - 1 - offset)
2156 >> fVolume->BlockShift()));
2157 // round to next block
2158 array[i].length = HOST_ENDIAN_TO_BFS_INT16(run.Start()
2159 - array[i].Start());
2160 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length()
2161 - array[i].Length());
2163 if (run.length == 0)
2164 continue;
2166 // update maximum range
2167 max = HOST_ENDIAN_TO_BFS_INT64(offset + ((off_t)array[i].Length()
2168 << fVolume->BlockShift()));
2169 } else {
2170 // free the whole block_run
2171 array[i].SetTo(0, 0, 0);
2173 if ((off_t)BFS_ENDIAN_TO_HOST_INT64(max) > offset)
2174 max = HOST_ENDIAN_TO_BFS_INT64(offset);
2177 if (fVolume->Free(transaction, run) < B_OK)
2178 return B_IO_ERROR;
2180 return B_OK;
2184 status_t
2185 Inode::_ShrinkStream(Transaction& transaction, off_t size)
2187 data_stream* data = &Node().data;
2188 status_t status;
2190 if (data->MaxDoubleIndirectRange() > size) {
2191 off_t* maxDoubleIndirect = &data->max_double_indirect_range;
2192 // gcc 4 work-around: "error: cannot bind packed field
2193 // 'data->data_stream::max_double_indirect_range' to 'off_t&'"
2194 status = _FreeStaticStreamArray(transaction, 0, data->double_indirect,
2195 size, data->MaxIndirectRange(), *maxDoubleIndirect);
2196 if (status != B_OK)
2197 return status;
2199 if (size <= data->MaxIndirectRange()) {
2200 fVolume->Free(transaction, data->double_indirect);
2201 data->double_indirect.SetTo(0, 0, 0);
2202 data->max_double_indirect_range = 0;
2206 if (data->MaxIndirectRange() > size) {
2207 CachedBlock cached(fVolume);
2208 off_t block = fVolume->ToBlock(data->indirect);
2209 off_t offset = data->MaxDirectRange();
2211 for (int32 i = 0; i < data->indirect.Length(); i++) {
2212 block_run* array = (block_run*)cached.SetToWritable(transaction,
2213 block + i);
2214 if (array == NULL)
2215 break;
2217 off_t* maxIndirect = &data->max_indirect_range;
2218 // gcc 4 work-around: "error: cannot bind packed field
2219 // 'data->data_stream::max_indirect_range' to 'off_t&'"
2220 if (_FreeStreamArray(transaction, array, fVolume->BlockSize()
2221 / sizeof(block_run), size, offset, *maxIndirect) != B_OK)
2222 return B_IO_ERROR;
2224 if (data->max_direct_range == data->max_indirect_range) {
2225 fVolume->Free(transaction, data->indirect);
2226 data->indirect.SetTo(0, 0, 0);
2227 data->max_indirect_range = 0;
2231 if (data->MaxDirectRange() > size) {
2232 off_t offset = 0;
2233 off_t *maxDirect = &data->max_direct_range;
2234 // gcc 4 work-around: "error: cannot bind packed field
2235 // 'data->data_stream::max_direct_range' to 'off_t&'"
2236 status = _FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS,
2237 size, offset, *maxDirect);
2238 if (status < B_OK)
2239 return status;
2242 data->size = HOST_ENDIAN_TO_BFS_INT64(size);
2243 return B_OK;
2247 status_t
2248 Inode::SetFileSize(Transaction& transaction, off_t size)
2250 if (size < 0)
2251 return B_BAD_VALUE;
2253 off_t oldSize = Size();
2255 if (size == oldSize)
2256 return B_OK;
2258 T(Resize(this, oldSize, size, false));
2260 // should the data stream grow or shrink?
2261 status_t status;
2262 if (size > oldSize) {
2263 status = _GrowStream(transaction, size);
2264 if (status < B_OK) {
2265 // if the growing of the stream fails, the whole operation
2266 // fails, so we should shrink the stream to its former size
2267 _ShrinkStream(transaction, oldSize);
2269 } else
2270 status = _ShrinkStream(transaction, size);
2272 if (status < B_OK)
2273 return status;
2275 file_cache_set_size(FileCache(), size);
2276 file_map_set_size(Map(), size);
2278 return WriteBack(transaction);
2282 status_t
2283 Inode::Append(Transaction& transaction, off_t bytes)
2285 return SetFileSize(transaction, Size() + bytes);
2289 /*! Checks whether or not this inode's data stream needs to be trimmed
2290 because of an earlier preallocation.
2291 Returns true if there are any blocks to be trimmed.
2293 bool
2294 Inode::NeedsTrimming() const
2296 // We never trim preallocated index blocks to make them grow as smooth as
2297 // possible. There are only few indices anyway, so this doesn't hurt.
2298 // Also, if an inode is already in deleted state, we don't bother trimming
2299 // it.
2300 if (IsIndex() || IsDeleted()
2301 || (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0))
2302 return false;
2304 off_t roundedSize = round_up(Size(), fVolume->BlockSize());
2306 return Node().data.MaxDirectRange() > roundedSize
2307 || Node().data.MaxIndirectRange() > roundedSize
2308 || Node().data.MaxDoubleIndirectRange() > roundedSize;
2312 status_t
2313 Inode::TrimPreallocation(Transaction& transaction)
2315 T(Resize(this, max_c(Node().data.MaxDirectRange(),
2316 Node().data.MaxIndirectRange()), Size(), true));
2318 status_t status = _ShrinkStream(transaction, Size());
2319 if (status < B_OK)
2320 return status;
2322 return WriteBack(transaction);
2326 //! Frees the file's data stream and removes all attributes
2327 status_t
2328 Inode::Free(Transaction& transaction)
2330 FUNCTION();
2332 // Perhaps there should be an implementation of Inode::ShrinkStream() that
2333 // just frees the data_stream, but doesn't change the inode (since it is
2334 // freed anyway) - that would make an undelete command possible
2335 if (!IsSymLink() || (Flags() & INODE_LONG_SYMLINK) != 0) {
2336 status_t status = SetFileSize(transaction, 0);
2337 if (status < B_OK)
2338 return status;
2341 // Free all attributes, and remove their indices
2343 // We have to limit the scope of AttributeIterator, so that its
2344 // destructor is not called after the inode is deleted
2345 AttributeIterator iterator(this);
2347 char name[B_FILE_NAME_LENGTH];
2348 uint32 type;
2349 size_t length;
2350 ino_t id;
2351 while (iterator.GetNext(name, &length, &type, &id) == B_OK) {
2352 RemoveAttribute(transaction, name);
2356 if (WriteBack(transaction) < B_OK)
2357 return B_IO_ERROR;
2359 return fVolume->Free(transaction, BlockRun());
2363 //! Synchronizes (writes back to disk) the file stream of the inode.
2364 status_t
2365 Inode::Sync()
2367 if (FileCache())
2368 return file_cache_sync(FileCache());
2370 // We may also want to flush the attribute's data stream to
2371 // disk here... (do we?)
2373 if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0)
2374 return B_OK;
2376 InodeReadLocker locker(this);
2378 data_stream* data = &Node().data;
2379 status_t status = B_OK;
2381 // flush direct range
2383 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) {
2384 if (data->direct[i].IsZero())
2385 return B_OK;
2387 status = block_cache_sync_etc(fVolume->BlockCache(),
2388 fVolume->ToBlock(data->direct[i]), data->direct[i].Length());
2389 if (status != B_OK)
2390 return status;
2393 // flush indirect range
2395 if (data->max_indirect_range == 0)
2396 return B_OK;
2398 CachedBlock cached(fVolume);
2399 off_t block = fVolume->ToBlock(data->indirect);
2400 int32 count = fVolume->BlockSize() / sizeof(block_run);
2402 for (int32 j = 0; j < data->indirect.Length(); j++) {
2403 block_run* runs = (block_run*)cached.SetTo(block + j);
2404 if (runs == NULL)
2405 break;
2407 for (int32 i = 0; i < count; i++) {
2408 if (runs[i].IsZero())
2409 return B_OK;
2411 status = block_cache_sync_etc(fVolume->BlockCache(),
2412 fVolume->ToBlock(runs[i]), runs[i].Length());
2413 if (status != B_OK)
2414 return status;
2418 // flush double indirect range
2420 if (data->max_double_indirect_range == 0)
2421 return B_OK;
2423 off_t indirectBlock = fVolume->ToBlock(data->double_indirect);
2425 for (int32 l = 0; l < data->double_indirect.Length(); l++) {
2426 block_run* indirectRuns = (block_run*)cached.SetTo(indirectBlock + l);
2427 if (indirectRuns == NULL)
2428 return B_FILE_ERROR;
2430 CachedBlock directCached(fVolume);
2432 for (int32 k = 0; k < count; k++) {
2433 if (indirectRuns[k].IsZero())
2434 return B_OK;
2436 block = fVolume->ToBlock(indirectRuns[k]);
2437 for (int32 j = 0; j < indirectRuns[k].Length(); j++) {
2438 block_run* runs = (block_run*)directCached.SetTo(block + j);
2439 if (runs == NULL)
2440 return B_FILE_ERROR;
2442 for (int32 i = 0; i < count; i++) {
2443 if (runs[i].IsZero())
2444 return B_OK;
2446 // TODO: combine single block_runs to bigger ones when
2447 // they are adjacent
2448 status = block_cache_sync_etc(fVolume->BlockCache(),
2449 fVolume->ToBlock(runs[i]), runs[i].Length());
2450 if (status != B_OK)
2451 return status;
2456 return B_OK;
2460 // #pragma mark - TransactionListener implementation
2463 void
2464 Inode::TransactionDone(bool success)
2466 if (!success) {
2467 // Revert any changes made to the cached bfs_inode
2468 // TODO: return code gets eaten
2469 UpdateNodeFromDisk();
2474 void
2475 Inode::RemovedFromTransaction()
2477 Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_TRANSACTION);
2479 // See AddInode() why we do this here
2480 if ((Flags() & INODE_DELETED) != 0)
2481 fVolume->RemovedInodes().Add(this);
2483 rw_lock_write_unlock(&Lock());
2485 if (!fVolume->IsInitializing())
2486 put_vnode(fVolume->FSVolume(), ID());
2490 // #pragma mark - creation/deletion
2493 status_t
2494 Inode::Remove(Transaction& transaction, const char* name, ino_t* _id,
2495 bool isDirectory, bool force)
2497 if (fTree == NULL)
2498 RETURN_ERROR(B_BAD_VALUE);
2500 WriteLockInTransaction(transaction);
2502 // does the file even exist?
2503 off_t id;
2504 if (fTree->Find((uint8*)name, (uint16)strlen(name), &id) < B_OK)
2505 return B_ENTRY_NOT_FOUND;
2507 if (_id)
2508 *_id = id;
2510 Vnode vnode(fVolume, id);
2511 Inode* inode;
2512 status_t status = vnode.Get(&inode);
2513 if (status < B_OK) {
2514 REPORT_ERROR(status);
2515 return B_ENTRY_NOT_FOUND;
2518 T(Remove(inode, name));
2519 inode->WriteLockInTransaction(transaction);
2521 // Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR
2522 // bit is set for indices in BFS, not for attribute directories) - but you
2523 // should really be able to do whatever you want with your indices
2524 // without having to remove all files first :)
2525 if (!inode->IsIndex() && !force) {
2526 // if it's not of the correct type, don't delete it!
2527 if (inode->IsContainer() != isDirectory)
2528 return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY;
2530 // only delete empty directories
2531 if (isDirectory && !inode->IsEmpty())
2532 return B_DIRECTORY_NOT_EMPTY;
2535 // remove_vnode() allows the inode to be accessed until the last put_vnode()
2536 status = remove_vnode(fVolume->FSVolume(), id);
2537 if (status != B_OK)
2538 return status;
2540 if (fTree->Remove(transaction, name, id) != B_OK && !force) {
2541 unremove_vnode(fVolume->FSVolume(), id);
2542 RETURN_ERROR(B_ERROR);
2545 #ifdef DEBUG
2546 if (fTree->Find((uint8*)name, (uint16)strlen(name), &id) == B_OK) {
2547 DIE(("deleted entry still there"));
2549 #endif
2551 ContainerContentsChanged(transaction);
2553 // update the inode, so that no one will ever doubt it's deleted :-)
2554 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
2555 inode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
2557 // In balance to the Inode::Create() method, the main indices
2558 // are updated here (name, size, & last_modified)
2560 Index index(fVolume);
2561 if (inode->InNameIndex()) {
2562 index.RemoveName(transaction, name, inode);
2563 // If removing from the index fails, it is not regarded as a
2564 // fatal error and will not be reported back!
2565 // Deleted inodes won't be visible in queries anyway.
2568 if (inode->InSizeIndex())
2569 index.RemoveSize(transaction, inode);
2570 if (inode->InLastModifiedIndex())
2571 index.RemoveLastModified(transaction, inode);
2573 return inode->WriteBack(transaction);
2577 /*! Creates the inode with the specified \a parent directory, and automatically
2578 adds the created inode to that parent directory. If an attribute directory
2579 is created, it will also automatically be added to the \a parent inode as
2580 such. However, the indices root node, and the regular root node won't be
2581 added to the superblock.
2582 It will also create the initial B+tree for the inode if it's a directory
2583 of any kind.
2584 \a name may be \c NULL, but only if no \a parent is given.
2585 If the "_id" or "_inode" variable is given and non-NULL to store the
2586 inode's ID, the inode stays locked - you have to call put_vnode() if you
2587 don't use it anymore.
2588 If the node already exists, this method will fail if \c O_EXCL is set, or
2589 it's a directory or a symlink. Otherwise, it will just be returned.
2590 If \c O_TRUNC has been specified, the file will also be truncated.
2592 status_t
2593 Inode::Create(Transaction& transaction, Inode* parent, const char* name,
2594 int32 mode, int openMode, uint32 type, bool* _created, ino_t* _id,
2595 Inode** _inode, fs_vnode_ops* vnodeOps, uint32 publishFlags)
2597 FUNCTION_START(("name = %s, mode = %ld\n", name, mode));
2599 block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0);
2600 Volume* volume = transaction.GetVolume();
2601 BPlusTree* tree = NULL;
2603 if (parent != NULL && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) {
2604 // check if the file already exists in the directory
2605 tree = parent->Tree();
2608 if (parent != NULL) {
2609 // the parent directory is locked during the whole inode creation
2610 parent->WriteLockInTransaction(transaction);
2613 if (parent != NULL && !volume->IsInitializing() && parent->IsContainer()) {
2614 // don't create anything in removed directories
2615 bool removed;
2616 if (get_vnode_removed(volume->FSVolume(), parent->ID(), &removed)
2617 == B_OK && removed) {
2618 RETURN_ERROR(B_ENTRY_NOT_FOUND);
2622 if (tree != NULL) {
2623 // Does the file already exist?
2624 off_t offset;
2625 if (tree->Find((uint8*)name, (uint16)strlen(name), &offset) == B_OK) {
2626 // Return if the file should be a directory/link or opened in
2627 // exclusive mode
2628 if (S_ISDIR(mode) || S_ISLNK(mode) || (openMode & O_EXCL) != 0)
2629 return B_FILE_EXISTS;
2631 Vnode vnode(volume, offset);
2632 Inode* inode;
2633 status_t status = vnode.Get(&inode);
2634 if (status != B_OK) {
2635 REPORT_ERROR(status);
2636 return B_ENTRY_NOT_FOUND;
2639 if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
2640 return B_IS_A_DIRECTORY;
2641 if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
2642 return B_NOT_A_DIRECTORY;
2644 // we want to open the file, so we should have the rights to do so
2645 if (inode->CheckPermissions(open_mode_to_access(openMode)
2646 | ((openMode & O_TRUNC) != 0 ? W_OK : 0)) != B_OK)
2647 return B_NOT_ALLOWED;
2649 if ((openMode & O_TRUNC) != 0) {
2650 // truncate the existing file
2651 inode->WriteLockInTransaction(transaction);
2653 status_t status = inode->SetFileSize(transaction, 0);
2654 if (status == B_OK)
2655 status = inode->WriteBack(transaction);
2657 if (status != B_OK)
2658 return status;
2661 if (_created)
2662 *_created = false;
2663 if (_id)
2664 *_id = inode->ID();
2665 if (_inode)
2666 *_inode = inode;
2668 // Only keep the vnode in memory if the _id or _inode pointer is
2669 // provided
2670 if (_id != NULL || _inode != NULL)
2671 vnode.Keep();
2673 return B_OK;
2675 } else if (parent != NULL && (mode & S_ATTR_DIR) == 0) {
2676 return B_BAD_VALUE;
2677 } else if ((openMode & O_DIRECTORY) != 0) {
2678 // TODO: we might need to return B_NOT_A_DIRECTORY here
2679 return B_ENTRY_NOT_FOUND;
2682 status_t status;
2684 // do we have the power to create new files at all?
2685 if (parent != NULL && (status = parent->CheckPermissions(W_OK)) != B_OK)
2686 RETURN_ERROR(status);
2688 // allocate space for the new inode
2689 InodeAllocator allocator(transaction);
2690 block_run run;
2691 Inode* inode;
2692 status = allocator.New(&parentRun, mode, publishFlags, run, vnodeOps,
2693 &inode);
2694 if (status < B_OK)
2695 return status;
2697 T(Create(inode, parent, name, mode, openMode, type));
2699 // Initialize the parts of the bfs_inode structure that
2700 // InodeAllocator::New() hasn't touched yet
2702 bfs_inode* node = &inode->Node();
2704 if (parent == NULL) {
2705 // we set the parent to itself in this case
2706 // (only happens for the root and indices node)
2707 node->parent = run;
2708 } else
2709 node->parent = parentRun;
2711 node->uid = HOST_ENDIAN_TO_BFS_INT32(geteuid());
2712 node->gid = HOST_ENDIAN_TO_BFS_INT32(parent
2713 ? parent->Node().GroupID() : getegid());
2714 // the group ID is inherited from the parent, if available
2716 node->type = HOST_ENDIAN_TO_BFS_INT32(type);
2718 inode->WriteBack(transaction);
2719 // make sure the initialized node is available to others
2721 // only add the name to regular files, directories, or symlinks
2722 // don't add it to attributes, or indices
2723 if (tree && inode->IsRegularNode()
2724 && inode->SetName(transaction, name) != B_OK)
2725 return B_ERROR;
2727 // Initialize b+tree if it's a directory (and add "." & ".." if it's
2728 // a standard directory for files - not for attributes or indices)
2729 if (inode->IsContainer()) {
2730 status = allocator.CreateTree();
2731 if (status != B_OK)
2732 return status;
2735 // Add a link to the inode from the parent, depending on its type
2736 // (the vnode is not published yet, so it is safe to make the inode
2737 // accessable to the file system here)
2738 if (tree != NULL) {
2739 status = tree->Insert(transaction, name, inode->ID());
2740 } else if (parent != NULL && (mode & S_ATTR_DIR) != 0) {
2741 parent->Attributes() = run;
2742 status = parent->WriteBack(transaction);
2745 // Note, we only care if the inode could be made accessable for the
2746 // two cases above; the root node or the indices root node must
2747 // handle this case on their own (or other cases where "parent" is
2748 // NULL)
2749 if (status != B_OK)
2750 RETURN_ERROR(status);
2752 // Update the main indices (name, size & last_modified)
2753 // (live queries might want to access us after this)
2755 Index index(volume);
2756 if (inode->InNameIndex() && name != NULL) {
2757 // the name index only contains regular files
2758 // (but not the root node where name == NULL)
2759 status = index.InsertName(transaction, name, inode);
2760 if (status != B_OK && status != B_BAD_INDEX) {
2761 // We have to remove the node from the parent at this point,
2762 // because the InodeAllocator destructor can't handle this
2763 // case (and if it fails, we can't do anything about it...)
2764 if (tree)
2765 tree->Remove(transaction, name, inode->ID());
2766 else if (parent != NULL && (mode & S_ATTR_DIR) != 0)
2767 parent->Node().attributes.SetTo(0, 0, 0);
2769 RETURN_ERROR(status);
2773 if (parent != NULL && parent->IsContainer())
2774 parent->ContainerContentsChanged(transaction);
2776 inode->UpdateOldLastModified();
2778 // The "size" & "last_modified" indices don't contain directories.
2779 // If adding to these indices fails, the inode creation will not be
2780 // harmed; they are considered less important than the "name" index.
2781 if (inode->InSizeIndex())
2782 index.InsertSize(transaction, inode);
2783 if (inode->InLastModifiedIndex())
2784 index.InsertLastModified(transaction, inode);
2786 if (inode->NeedsFileCache()) {
2787 inode->SetFileCache(file_cache_create(volume->ID(), inode->ID(),
2788 inode->Size()));
2789 inode->SetMap(file_map_create(volume->ID(), inode->ID(),
2790 inode->Size()));
2792 if (inode->FileCache() == NULL || inode->Map() == NULL)
2793 return B_NO_MEMORY;
2796 // Everything worked well until this point, we have a fully
2797 // initialized inode, and we want to keep it
2798 allocator.Keep(vnodeOps, publishFlags);
2800 if (_created)
2801 *_created = true;
2802 if (_id != NULL)
2803 *_id = inode->ID();
2804 if (_inode != NULL)
2805 *_inode = inode;
2807 // if either _id or _inode is passed, we will keep the inode locked
2808 if (_id == NULL && _inode == NULL)
2809 put_vnode(volume->FSVolume(), inode->ID());
2811 return B_OK;
2815 // #pragma mark - AttributeIterator
2818 AttributeIterator::AttributeIterator(Inode* inode)
2820 fCurrentSmallData(0),
2821 fInode(inode),
2822 fAttributes(NULL),
2823 fIterator(NULL),
2824 fBuffer(NULL)
2826 inode->_AddIterator(this);
2830 AttributeIterator::~AttributeIterator()
2832 if (fAttributes)
2833 put_vnode(fAttributes->GetVolume()->FSVolume(), fAttributes->ID());
2835 delete fIterator;
2836 fInode->_RemoveIterator(this);
2840 status_t
2841 AttributeIterator::Rewind()
2843 fCurrentSmallData = 0;
2845 if (fIterator != NULL)
2846 fIterator->Rewind();
2848 return B_OK;
2852 status_t
2853 AttributeIterator::GetNext(char* name, size_t* _length, uint32* _type,
2854 ino_t* _id)
2856 // read attributes out of the small data section
2858 if (fCurrentSmallData >= 0) {
2859 NodeGetter nodeGetter(fInode->GetVolume(), fInode);
2860 if (nodeGetter.Node() == NULL)
2861 return B_IO_ERROR;
2863 const bfs_inode* node = nodeGetter.Node();
2864 const small_data* item = ((bfs_inode*)node)->SmallDataStart();
2866 RecursiveLocker _(&fInode->SmallDataLock());
2868 int32 index = 0;
2869 for (; !item->IsLast(node); item = item->Next(), index++) {
2870 if (item->NameSize() == FILE_NAME_NAME_LENGTH
2871 && *item->Name() == FILE_NAME_NAME)
2872 continue;
2874 if (index >= fCurrentSmallData)
2875 break;
2878 if (!item->IsLast(node)) {
2879 strncpy(name, item->Name(), B_FILE_NAME_LENGTH);
2880 *_type = item->Type();
2881 *_length = item->NameSize();
2882 *_id = (ino_t)index;
2884 fCurrentSmallData = index + 1;
2885 return B_OK;
2888 // stop traversing the small_data section
2889 fCurrentSmallData = -1;
2892 // read attributes out of the attribute directory
2894 if (fInode->Attributes().IsZero())
2895 return B_ENTRY_NOT_FOUND;
2897 Volume* volume = fInode->GetVolume();
2899 // if you haven't yet access to the attributes directory, get it
2900 if (fAttributes == NULL) {
2901 if (get_vnode(volume->FSVolume(), volume->ToVnode(fInode->Attributes()),
2902 (void**)&fAttributes) != B_OK) {
2903 FATAL(("get_vnode() failed in AttributeIterator::GetNext(ino_t"
2904 " = %" B_PRIdINO ",name = \"%s\")\n", fInode->ID(), name));
2905 return B_ENTRY_NOT_FOUND;
2908 BPlusTree* tree = fAttributes->Tree();
2909 if (tree == NULL
2910 || (fIterator = new(std::nothrow) TreeIterator(tree)) == NULL) {
2911 FATAL(("could not get tree in AttributeIterator::GetNext(ino_t"
2912 " = %" B_PRIdINO ",name = \"%s\")\n", fInode->ID(), name));
2913 return B_ENTRY_NOT_FOUND;
2917 uint16 length;
2918 ino_t id;
2919 status_t status = fIterator->GetNextEntry(name, &length,
2920 B_FILE_NAME_LENGTH, &id);
2921 if (status != B_OK)
2922 return status;
2924 Vnode vnode(volume, id);
2925 Inode* attribute;
2926 if ((status = vnode.Get(&attribute)) == B_OK) {
2927 *_type = attribute->Type();
2928 *_length = length;
2929 *_id = id;
2932 return status;
2936 void
2937 AttributeIterator::Update(uint16 index, int8 change)
2939 // fCurrentSmallData points already to the next item
2940 if (index < fCurrentSmallData)
2941 fCurrentSmallData += change;