2 * Copyright 2001-2017, Axel Dörfler, axeld@pinc-software.de.
3 * This file may be used under the terms of the MIT License.
7 //! Inode access functions
12 #include "BPlusTree.h"
16 #if BFS_TRACING && !defined(FS_SHELL) && !defined(_BOOT_MODE)
17 namespace BFSInodeTracing
{
19 class Create
: public AbstractTraceEntry
{
21 Create(Inode
* inode
, Inode
* parent
, const char* name
, int32 mode
,
22 int openMode
, uint32 type
)
27 fParentID(parent
!= NULL
? parent
->ID() : 0),
33 strlcpy(fName
, name
, sizeof(fName
));
40 virtual void AddDump(TraceOutput
& out
)
42 out
.Print("bfs:Create %Ld (%p), parent %Ld (%p), \"%s\", "
43 "mode %lx, omode %x, type %lx", fID
, fInode
, fParentID
,
44 fParent
, fName
, fMode
, fOpenMode
, fType
);
58 class Remove
: public AbstractTraceEntry
{
60 Remove(Inode
* inode
, const char* name
)
65 strlcpy(fName
, name
, sizeof(fName
));
69 virtual void AddDump(TraceOutput
& out
)
71 out
.Print("bfs:Remove %Ld (%p), \"%s\"", fID
, fInode
, fName
);
80 class Action
: public AbstractTraceEntry
{
82 Action(const char* action
, Inode
* inode
)
87 strlcpy(fAction
, action
, sizeof(fAction
));
91 virtual void AddDump(TraceOutput
& out
)
93 out
.Print("bfs:%s %Ld (%p)\n", fAction
, fID
, fInode
);
102 class Resize
: public AbstractTraceEntry
{
104 Resize(Inode
* inode
, off_t oldSize
, off_t newSize
, bool trim
)
115 virtual void AddDump(TraceOutput
& out
)
117 out
.Print("bfs:%s %Ld (%p), %Ld -> %Ld", fTrim
? "Trim" : "Resize",
118 fID
, fInode
, fOldSize
, fNewSize
);
129 } // namespace BFSInodeTracing
131 # define T(x) new(std::nothrow) BFSInodeTracing::x;
137 /*! A helper class used by Inode::Create() to keep track of the belongings
138 of an inode creation in progress.
139 This class will make sure everything is cleaned up properly.
141 class InodeAllocator
{
143 InodeAllocator(Transaction
& transaction
);
146 status_t
New(block_run
* parentRun
, mode_t mode
, uint32 flags
,
147 block_run
& run
, fs_vnode_ops
* vnodeOps
,
149 status_t
CreateTree();
150 status_t
Keep(fs_vnode_ops
* vnodeOps
, uint32 publishFlags
);
153 static void _TransactionListener(int32 id
, int32 event
,
156 Transaction
* fTransaction
;
162 InodeAllocator::InodeAllocator(Transaction
& transaction
)
164 fTransaction(&transaction
),
170 InodeAllocator::~InodeAllocator()
172 if (fTransaction
!= NULL
) {
173 Volume
* volume
= fTransaction
->GetVolume();
175 if (fInode
!= NULL
) {
176 fInode
->Node().flags
&= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE
);
177 // this unblocks any pending bfs_read_vnode() calls
178 fInode
->Free(*fTransaction
);
180 if (fInode
->fTree
!= NULL
)
181 fTransaction
->RemoveListener(fInode
->fTree
);
182 fTransaction
->RemoveListener(fInode
);
184 remove_vnode(volume
->FSVolume(), fInode
->ID());
186 volume
->Free(*fTransaction
, fRun
);
194 InodeAllocator::New(block_run
* parentRun
, mode_t mode
, uint32 publishFlags
,
195 block_run
& run
, fs_vnode_ops
* vnodeOps
, Inode
** _inode
)
197 Volume
* volume
= fTransaction
->GetVolume();
199 status_t status
= volume
->AllocateForInode(*fTransaction
, parentRun
, mode
,
202 // don't free the space in the destructor, because
203 // the allocation failed
205 RETURN_ERROR(status
);
209 fInode
= new(std::nothrow
) Inode(volume
, *fTransaction
,
210 volume
->ToVnode(run
), mode
, run
);
212 RETURN_ERROR(B_NO_MEMORY
);
214 if (!volume
->IsInitializing()
215 && (publishFlags
& BFS_DO_NOT_PUBLISH_VNODE
) == 0) {
216 status
= new_vnode(volume
->FSVolume(), fInode
->ID(), fInode
,
217 vnodeOps
!= NULL
? vnodeOps
: &gBFSVnodeOps
);
221 RETURN_ERROR(status
);
225 fInode
->WriteLockInTransaction(*fTransaction
);
232 InodeAllocator::CreateTree()
234 Volume
* volume
= fTransaction
->GetVolume();
236 // force S_STR_INDEX to be set, if no type is set
237 if ((fInode
->Mode() & S_INDEX_TYPES
) == 0)
238 fInode
->Node().mode
|= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX
);
240 BPlusTree
* tree
= new(std::nothrow
) BPlusTree(*fTransaction
, fInode
);
244 status_t status
= tree
->InitCheck();
245 if (status
!= B_OK
) {
250 fInode
->fTree
= tree
;
252 if (fInode
->IsRegularNode()) {
253 if (tree
->Insert(*fTransaction
, ".", fInode
->ID()) < B_OK
254 || tree
->Insert(*fTransaction
, "..",
255 volume
->ToVnode(fInode
->Parent())) < B_OK
)
263 InodeAllocator::Keep(fs_vnode_ops
* vnodeOps
, uint32 publishFlags
)
265 ASSERT(fInode
!= NULL
&& fTransaction
!= NULL
);
266 Volume
* volume
= fTransaction
->GetVolume();
268 status_t status
= fInode
->WriteBack(*fTransaction
);
270 FATAL(("writing new inode %" B_PRIdINO
" failed!\n", fInode
->ID()));
274 // Symbolic links are not published -- the caller needs to do this once
275 // the contents have been written.
276 if (!fInode
->IsSymLink() && !volume
->IsInitializing()
277 && (publishFlags
& BFS_DO_NOT_PUBLISH_VNODE
) == 0) {
278 status
= publish_vnode(volume
->FSVolume(), fInode
->ID(), fInode
,
279 vnodeOps
!= NULL
? vnodeOps
: &gBFSVnodeOps
, fInode
->Mode(),
283 if (status
== B_OK
) {
284 cache_add_transaction_listener(volume
->BlockCache(), fTransaction
->ID(),
285 TRANSACTION_ABORTED
, &_TransactionListener
, fInode
);
296 InodeAllocator::_TransactionListener(int32 id
, int32 event
, void* _inode
)
298 Inode
* inode
= (Inode
*)_inode
;
300 if (event
== TRANSACTION_ABORTED
)
301 panic("transaction %d aborted, inode %p still around!\n", (int)id
, inode
);
309 bfs_inode::InitCheck(Volume
* volume
) const
311 if (Magic1() != INODE_MAGIC1
312 || !(Flags() & INODE_IN_USE
)
313 || inode_num
.Length() != 1
314 // matches inode size?
315 || (uint32
)InodeSize() != volume
->InodeSize()
316 // parent resides on disk?
317 || parent
.AllocationGroup() > int32(volume
->AllocationGroups())
318 || parent
.AllocationGroup() < 0
319 || parent
.Start() > (1L << volume
->AllocationGroupShift())
320 || parent
.Length() != 1
322 || attributes
.AllocationGroup() > int32(volume
->AllocationGroups())
323 || attributes
.AllocationGroup() < 0
324 || attributes
.Start() > (1L << volume
->AllocationGroupShift()))
325 RETURN_ERROR(B_BAD_DATA
);
327 if (Flags() & INODE_DELETED
)
328 return B_NOT_ALLOWED
;
330 // TODO: Add some tests to check the integrity of the other stuff here,
331 // especially for the data_stream!
337 // #pragma mark - Inode
340 Inode::Inode(Volume
* volume
, ino_t id
)
349 PRINT(("Inode::Inode(volume = %p, id = %Ld) @ %p\n", volume
, id
, this));
351 rw_lock_init(&fLock
, "bfs inode");
352 recursive_lock_init(&fSmallDataLock
, "bfs inode small data");
354 if (UpdateNodeFromDisk() != B_OK
) {
355 // TODO: the error code gets eaten
359 // these two will help to maintain the indices
361 fOldLastModified
= LastModified();
364 fTree
= new(std::nothrow
) BPlusTree(this);
365 if (NeedsFileCache()) {
366 SetFileCache(file_cache_create(fVolume
->ID(), ID(), Size()));
367 SetMap(file_map_create(volume
->ID(), ID(), Size()));
372 Inode::Inode(Volume
* volume
, Transaction
& transaction
, ino_t id
, mode_t mode
,
382 PRINT(("Inode::Inode(volume = %p, transaction = %p, id = %Ld) @ %p\n",
383 volume
, &transaction
, id
, this));
385 rw_lock_init(&fLock
, "bfs inode");
386 recursive_lock_init(&fSmallDataLock
, "bfs inode small data");
388 NodeGetter
node(volume
, transaction
, this, true);
389 if (node
.Node() == NULL
) {
390 FATAL(("Could not read inode block %" B_PRId64
"!\n", BlockNumber()));
394 memset(&fNode
, 0, sizeof(bfs_inode
));
396 // Initialize the bfs_inode structure -- it's not written back to disk
397 // here, because it will be done so already when the inode could be
398 // created completely.
400 Node().magic1
= HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1
);
401 Node().inode_num
= run
;
402 Node().mode
= HOST_ENDIAN_TO_BFS_INT32(mode
);
403 Node().flags
= HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE
);
405 Node().create_time
= Node().last_modified_time
= Node().status_change_time
406 = HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
408 Node().inode_size
= HOST_ENDIAN_TO_BFS_INT32(volume
->InodeSize());
410 // these two will help to maintain the indices
412 fOldLastModified
= LastModified();
418 PRINT(("Inode::~Inode() @ %p\n", this));
420 file_cache_delete(FileCache());
421 file_map_delete(Map());
424 rw_lock_destroy(&fLock
);
425 recursive_lock_destroy(&fSmallDataLock
);
430 Inode::InitCheck(bool checkNode
) const
432 // test inode magic and flags
434 status_t status
= Node().InitCheck(fVolume
);
435 if (status
== B_BUSY
)
438 if (status
!= B_OK
) {
439 FATAL(("inode at block %" B_PRIdOFF
" corrupt!\n", BlockNumber()));
440 RETURN_ERROR(B_BAD_DATA
);
445 // inodes that have a B+tree
447 RETURN_ERROR(B_NO_MEMORY
);
449 status_t status
= fTree
->InitCheck();
450 if (status
!= B_OK
) {
451 FATAL(("inode tree at block %" B_PRIdOFF
" corrupt!\n",
453 RETURN_ERROR(B_BAD_DATA
);
457 if (NeedsFileCache() && (fCache
== NULL
|| fMap
== NULL
))
464 /*! Adds this inode to the specified transaction. This means that the inode will
465 be write locked until the transaction ended.
466 To ensure that the inode will stay valid until that point, an extra reference
467 is acquired to it as long as this transaction stays active.
470 Inode::WriteLockInTransaction(Transaction
& transaction
)
472 // These flags can only change while holding the transaction lock
473 if ((Flags() & INODE_IN_TRANSACTION
) != 0)
476 // We share the same list link with the removed list, so we have to remove
477 // the inode from that list here (and add it back when we no longer need it)
478 if ((Flags() & INODE_DELETED
) != 0)
479 fVolume
->RemovedInodes().Remove(this);
481 if (!fVolume
->IsInitializing())
482 acquire_vnode(fVolume
->FSVolume(), ID());
484 rw_lock_write_lock(&Lock());
485 Node().flags
|= HOST_ENDIAN_TO_BFS_INT32(INODE_IN_TRANSACTION
);
487 transaction
.AddListener(this);
492 Inode::WriteBack(Transaction
& transaction
)
494 NodeGetter
node(fVolume
, transaction
, this);
495 if (node
.WritableNode() == NULL
)
498 memcpy(node
.WritableNode(), &Node(), sizeof(bfs_inode
));
504 Inode::UpdateNodeFromDisk()
506 NodeGetter
node(fVolume
, this);
507 if (node
.Node() == NULL
) {
508 FATAL(("Failed to read block %" B_PRId64
" from disk!\n",
513 memcpy(&fNode
, node
.Node(), sizeof(bfs_inode
));
514 fNode
.flags
&= HOST_ENDIAN_TO_BFS_INT32(INODE_PERMANENT_FLAGS
);
520 Inode::CheckPermissions(int accessMode
) const
522 // you never have write access to a read-only volume
523 if ((accessMode
& W_OK
) != 0 && fVolume
->IsReadOnly())
524 return B_READ_ONLY_DEVICE
;
526 return check_access_permissions(accessMode
, Mode(), (gid_t
)fNode
.GroupID(),
527 (uid_t
)fNode
.UserID());
531 // #pragma mark - attributes
535 Inode::_AddIterator(AttributeIterator
* iterator
)
537 RecursiveLocker
_(fSmallDataLock
);
538 fIterators
.Add(iterator
);
543 Inode::_RemoveIterator(AttributeIterator
* iterator
)
545 RecursiveLocker
_(fSmallDataLock
);
546 fIterators
.Remove(iterator
);
550 /*! Tries to free up "bytes" space in the small_data section by moving
551 attributes to real files. Used for system attributes like the name.
552 You need to hold the fSmallDataLock when you call this method
555 Inode::_MakeSpaceForSmallData(Transaction
& transaction
, bfs_inode
* node
,
556 const char* name
, int32 bytes
)
558 ASSERT_LOCKED_RECURSIVE(&fSmallDataLock
);
561 small_data
* item
= node
->SmallDataStart();
562 small_data
* max
= NULL
;
563 int32 index
= 0, maxIndex
= 0;
564 for (; !item
->IsLast(node
); item
= item
->Next(), index
++) {
565 // should not remove those
566 if (*item
->Name() == FILE_NAME_NAME
|| !strcmp(name
, item
->Name()))
569 if (max
== NULL
|| max
->Size() < item
->Size()) {
574 // Remove the first one large enough to free the needed amount of
576 if (bytes
< (int32
)item
->Size())
580 if (item
->IsLast(node
) || (int32
)item
->Size() < bytes
)
583 bytes
-= max
->Size();
585 // Move the attribute to a real attribute file
586 // Luckily, this doesn't cause any index updates
589 status_t status
= CreateAttribute(transaction
, item
->Name(),
590 item
->Type(), &attribute
);
592 RETURN_ERROR(status
);
594 size_t length
= item
->DataSize();
595 status
= attribute
->WriteAt(transaction
, 0, item
->Data(), &length
);
597 ReleaseAttribute(attribute
);
599 if (status
!= B_OK
) {
600 Vnode
vnode(fVolume
, Attributes());
602 if (vnode
.Get(&attributes
) < B_OK
603 || attributes
->Remove(transaction
, name
) < B_OK
) {
604 FATAL(("Could not remove newly created attribute!\n"));
607 RETURN_ERROR(status
);
610 _RemoveSmallData(node
, max
, maxIndex
);
616 /*! Private function which removes the given attribute from the small_data
618 You need to hold the fSmallDataLock when you call this method
621 Inode::_RemoveSmallData(bfs_inode
* node
, small_data
* item
, int32 index
)
623 ASSERT_LOCKED_RECURSIVE(&fSmallDataLock
);
625 small_data
* next
= item
->Next();
626 if (!next
->IsLast(node
)) {
627 // find the last attribute
628 small_data
* last
= next
;
629 while (!last
->IsLast(node
))
632 int32 size
= (uint8
*)last
- (uint8
*)next
;
634 || size
> (uint8
*)node
+ fVolume
->BlockSize() - (uint8
*)next
)
637 memmove(item
, next
, size
);
639 // Move the "last" one to its new location and
640 // correctly terminate the small_data section
641 last
= (small_data
*)((uint8
*)last
- ((uint8
*)next
- (uint8
*)item
));
642 memset(last
, 0, (uint8
*)node
+ fVolume
->BlockSize() - (uint8
*)last
);
644 memset(item
, 0, item
->Size());
646 // update all current iterators
647 SinglyLinkedList
<AttributeIterator
>::Iterator iterator
648 = fIterators
.GetIterator();
649 while (iterator
.HasNext()) {
650 iterator
.Next()->Update(index
, -1);
657 //! Removes the given attribute from the small_data section.
659 Inode::_RemoveSmallData(Transaction
& transaction
, NodeGetter
& nodeGetter
,
665 bfs_inode
* node
= nodeGetter
.WritableNode();
666 RecursiveLocker
locker(fSmallDataLock
);
668 // search for the small_data item
670 small_data
* item
= node
->SmallDataStart();
672 while (!item
->IsLast(node
) && strcmp(item
->Name(), name
)) {
677 if (item
->IsLast(node
))
678 return B_ENTRY_NOT_FOUND
;
680 nodeGetter
.MakeWritable(transaction
);
682 status_t status
= _RemoveSmallData(node
, item
, index
);
683 if (status
== B_OK
) {
684 Node().status_change_time
= HOST_ENDIAN_TO_BFS_INT64(
685 bfs_inode::ToInode(real_time_clock_usecs()));
687 status
= WriteBack(transaction
);
694 /*! Try to place the given attribute in the small_data section - if the
695 new attribute is too big to fit in that section, it returns B_DEVICE_FULL.
696 In that case, the attribute should be written to a real attribute file;
697 it's the caller's responsibility to remove any existing attributes in the
698 small data section if that's the case.
700 Note that you need to write back the inode yourself after having called that
701 method - it's a bad API decision that it needs a transaction but enforces
702 you to write back the inode all by yourself, but it's just more efficient
706 Inode::_AddSmallData(Transaction
& transaction
, NodeGetter
& nodeGetter
,
707 const char* name
, uint32 type
, off_t pos
, const uint8
* data
, size_t length
,
710 bfs_inode
* node
= nodeGetter
.WritableNode();
712 if (node
== NULL
|| name
== NULL
|| data
== NULL
)
715 // reject any requests that can't fit into the small_data section
716 uint32 nameLength
= strlen(name
);
717 uint32 spaceNeeded
= sizeof(small_data
) + nameLength
+ 3 + pos
+ length
+ 1;
718 if (spaceNeeded
> fVolume
->InodeSize() - sizeof(bfs_inode
))
719 return B_DEVICE_FULL
;
721 nodeGetter
.MakeWritable(transaction
);
722 RecursiveLocker
locker(fSmallDataLock
);
724 // Find the last item or one with the same name we have to add
725 small_data
* item
= node
->SmallDataStart();
727 while (!item
->IsLast(node
) && strcmp(item
->Name(), name
)) {
732 // is the attribute already in the small_data section?
733 // then just replace the data part of that one
734 if (!item
->IsLast(node
)) {
735 // find last attribute
736 small_data
* last
= item
;
737 while (!last
->IsLast(node
))
740 // try to change the attributes value
741 if (item
->data_size
> pos
+ length
743 || ((uint8
*)last
+ pos
+ length
- item
->DataSize())
744 <= ((uint8
*)node
+ fVolume
->InodeSize())) {
745 // Make room for the new attribute if needed (and we are forced
747 if (force
&& ((uint8
*)last
+ pos
+ length
- item
->DataSize())
748 > ((uint8
*)node
+ fVolume
->InodeSize())) {
749 // We also take the free space at the end of the small_data
750 // section into account, and request only what's really needed
751 uint32 needed
= pos
+ length
- item
->DataSize() -
752 (uint32
)((uint8
*)node
+ fVolume
->InodeSize()
755 if (_MakeSpaceForSmallData(transaction
, node
, name
, needed
)
759 // reset our pointers
760 item
= node
->SmallDataStart();
762 while (!item
->IsLast(node
) && strcmp(item
->Name(), name
)) {
768 while (!last
->IsLast(node
))
772 size_t oldDataSize
= item
->DataSize();
774 // Normally, we can just overwrite the attribute data as the size
775 // is specified by the type and does not change that often
776 if (pos
+ length
!= item
->DataSize()) {
777 // move the attributes after the current one
778 small_data
* next
= item
->Next();
779 if (!next
->IsLast(node
)) {
780 memmove((uint8
*)item
+ spaceNeeded
, next
,
781 (uint8
*)last
- (uint8
*)next
);
784 // Move the "last" one to its new location and
785 // correctly terminate the small_data section
786 last
= (small_data
*)((uint8
*)last
787 - ((uint8
*)next
- ((uint8
*)item
+ spaceNeeded
)));
788 if ((uint8
*)last
< (uint8
*)node
+ fVolume
->BlockSize()) {
789 memset(last
, 0, (uint8
*)node
+ fVolume
->BlockSize()
793 item
->data_size
= HOST_ENDIAN_TO_BFS_INT16(pos
+ length
);
796 item
->type
= HOST_ENDIAN_TO_BFS_INT32(type
);
798 if ((uint64
)oldDataSize
< (uint64
)pos
) {
799 // Fill gap with zeros
800 memset(item
->Data() + oldDataSize
, 0, pos
- oldDataSize
);
802 memcpy(item
->Data() + pos
, data
, length
);
803 item
->Data()[pos
+ length
] = '\0';
808 return B_DEVICE_FULL
;
811 // try to add the new attribute!
813 if ((uint8
*)item
+ spaceNeeded
> (uint8
*)node
+ fVolume
->InodeSize()) {
814 // there is not enough space for it!
816 return B_DEVICE_FULL
;
818 // make room for the new attribute
819 if (_MakeSpaceForSmallData(transaction
, node
, name
, spaceNeeded
) < B_OK
)
822 // get new last item!
823 item
= node
->SmallDataStart();
825 while (!item
->IsLast(node
)) {
831 memset(item
, 0, spaceNeeded
);
832 item
->type
= HOST_ENDIAN_TO_BFS_INT32(type
);
833 item
->name_size
= HOST_ENDIAN_TO_BFS_INT16(nameLength
);
834 item
->data_size
= HOST_ENDIAN_TO_BFS_INT16(length
);
835 strcpy(item
->Name(), name
);
836 memcpy(item
->Data() + pos
, data
, length
);
838 // correctly terminate the small_data section
840 if (!item
->IsLast(node
))
841 memset(item
, 0, (uint8
*)node
+ fVolume
->InodeSize() - (uint8
*)item
);
843 // update all current iterators
844 SinglyLinkedList
<AttributeIterator
>::Iterator iterator
845 = fIterators
.GetIterator();
846 while (iterator
.HasNext()) {
847 iterator
.Next()->Update(index
, 1);
854 /*! Iterates through the small_data section of an inode.
855 To start at the beginning of this section, you let smallData
857 small_data* data = NULL;
858 while (inode->GetNextSmallData(&data) { ... }
860 This function is reentrant and doesn't allocate any memory;
861 you can safely stop calling it at any point (you don't need
862 to iterate through the whole list).
863 You need to hold the fSmallDataLock when you call this method
866 Inode::_GetNextSmallData(bfs_inode
* node
, small_data
** _smallData
) const
869 RETURN_ERROR(B_BAD_VALUE
);
871 ASSERT_LOCKED_RECURSIVE(&fSmallDataLock
);
873 small_data
* data
= *_smallData
;
875 // begin from the start?
877 data
= node
->SmallDataStart();
881 // is already last item?
882 if (data
->IsLast(node
))
883 return B_ENTRY_NOT_FOUND
;
891 /*! Finds the attribute "name" in the small data section, and
892 returns a pointer to it (or NULL if it doesn't exist).
893 You need to hold the fSmallDataLock when you call this method
896 Inode::FindSmallData(const bfs_inode
* node
, const char* name
) const
898 ASSERT_LOCKED_RECURSIVE(&fSmallDataLock
);
900 small_data
* smallData
= NULL
;
901 while (_GetNextSmallData(const_cast<bfs_inode
*>(node
), &smallData
)
903 if (!strcmp(smallData
->Name(), name
))
910 /*! Returns a pointer to the node's name if present in the small data
911 section, NULL otherwise.
912 You need to hold the fSmallDataLock when you call this method
915 Inode::Name(const bfs_inode
* node
) const
917 ASSERT_LOCKED_RECURSIVE(&fSmallDataLock
);
919 small_data
* smallData
= NULL
;
920 while (_GetNextSmallData((bfs_inode
*)node
, &smallData
) == B_OK
) {
921 if (*smallData
->Name() == FILE_NAME_NAME
922 && smallData
->NameSize() == FILE_NAME_NAME_LENGTH
)
923 return (const char*)smallData
->Data();
929 /*! Copies the node's name into the provided buffer.
930 The buffer should be B_FILE_NAME_LENGTH bytes large.
933 Inode::GetName(char* buffer
, size_t size
) const
935 NodeGetter
node(fVolume
, this);
936 if (node
.Node() == NULL
)
939 RecursiveLocker
locker(fSmallDataLock
);
941 const char* name
= Name(node
.Node());
943 return B_ENTRY_NOT_FOUND
;
945 strlcpy(buffer
, name
, size
);
950 /*! Changes or set the name of a file: in the inode small_data section only, it
951 doesn't change it in the parent directory's b+tree.
952 Note that you need to write back the inode yourself after having called
953 that method. It suffers from the same API decision as AddSmallData() does
954 (and for the same reason).
957 Inode::SetName(Transaction
& transaction
, const char* name
)
959 if (name
== NULL
|| *name
== '\0')
962 NodeGetter
node(fVolume
, transaction
, this);
963 if (node
.Node() == NULL
)
966 const char nameTag
[2] = {FILE_NAME_NAME
, 0};
968 return _AddSmallData(transaction
, node
, nameTag
, FILE_NAME_TYPE
, 0,
969 (uint8
*)name
, strlen(name
), true);
974 Inode::_RemoveAttribute(Transaction
& transaction
, const char* name
,
975 bool hasIndex
, Index
* index
)
977 // remove the attribute file if it exists
978 Vnode
vnode(fVolume
, Attributes());
980 status_t status
= vnode
.Get(&attributes
);
987 if ((hasIndex
|| fVolume
->CheckForLiveQuery(name
))
988 && GetAttribute(name
, &attribute
) == B_OK
) {
989 uint8 data
[MAX_INDEX_KEY_LENGTH
];
990 size_t length
= MAX_INDEX_KEY_LENGTH
;
991 if (attribute
->ReadAt(0, data
, &length
) == B_OK
) {
992 index
->Update(transaction
, name
, attribute
->Type(), data
,
993 length
, NULL
, 0, this);
996 ReleaseAttribute(attribute
);
1000 if ((status
= attributes
->Remove(transaction
, name
)) < B_OK
)
1003 if (attributes
->IsEmpty()) {
1004 attributes
->WriteLockInTransaction(transaction
);
1006 // remove attribute directory (don't fail if that can't be done)
1007 if (remove_vnode(fVolume
->FSVolume(), attributes
->ID()) == B_OK
) {
1008 // update the inode, so that no one will ever doubt it's deleted :-)
1009 attributes
->Node().flags
|= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED
);
1010 if (attributes
->WriteBack(transaction
) == B_OK
) {
1011 Attributes().SetTo(0, 0, 0);
1012 WriteBack(transaction
);
1014 unremove_vnode(fVolume
->FSVolume(), attributes
->ID());
1015 attributes
->Node().flags
1016 &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED
);
1025 /*! Reads data from the specified attribute.
1026 This is a high-level attribute function that understands attributes
1027 in the small_data section as well as real attribute files.
1030 Inode::ReadAttribute(const char* name
, int32 type
, off_t pos
, uint8
* buffer
,
1036 // search in the small_data section (which has to be locked first)
1038 NodeGetter
node(fVolume
, this);
1039 if (node
.Node() == NULL
)
1042 RecursiveLocker
locker(fSmallDataLock
);
1044 small_data
* smallData
= FindSmallData(node
.Node(), name
);
1045 if (smallData
!= NULL
) {
1046 size_t length
= *_length
;
1047 if (pos
>= smallData
->data_size
) {
1051 if (length
+ pos
> smallData
->DataSize())
1052 length
= smallData
->DataSize() - pos
;
1054 memcpy(buffer
, smallData
->Data() + pos
, length
);
1060 // search in the attribute directory
1062 status_t status
= GetAttribute(name
, &attribute
);
1063 if (status
== B_OK
) {
1064 status
= attribute
->ReadAt(pos
, (uint8
*)buffer
, _length
);
1066 ReleaseAttribute(attribute
);
1069 RETURN_ERROR(status
);
1073 /*! Writes data to the specified attribute.
1074 This is a high-level attribute function that understands attributes
1075 in the small_data section as well as real attribute files.
1078 Inode::WriteAttribute(Transaction
& transaction
, const char* name
, int32 type
,
1079 off_t pos
, const uint8
* buffer
, size_t* _length
, bool* _created
)
1084 // needed to maintain the index
1085 uint8 oldBuffer
[MAX_INDEX_KEY_LENGTH
];
1086 uint8
* oldData
= NULL
;
1087 size_t oldLength
= 0;
1088 bool created
= false;
1090 // TODO: we actually depend on that the contents of "buffer" are constant.
1091 // If they get changed during the write (hey, user programs), we may mess
1092 // up our index trees!
1093 // TODO: for attribute files, we need to log the first
1094 // MAX_INDEX_KEY_LENGTH bytes of the data stream, or the same as above
1097 Index
index(fVolume
);
1098 bool hasIndex
= index
.SetTo(name
) == B_OK
;
1100 Inode
* attribute
= NULL
;
1101 status_t status
= B_OK
;
1103 if (GetAttribute(name
, &attribute
) != B_OK
) {
1104 // No attribute inode exists yet
1106 // save the old attribute data
1107 NodeGetter
node(fVolume
, transaction
, this);
1108 if (node
.Node() == NULL
)
1111 recursive_lock_lock(&fSmallDataLock
);
1113 small_data
* smallData
= FindSmallData(node
.Node(), name
);
1114 if (smallData
!= NULL
) {
1115 oldLength
= smallData
->DataSize();
1116 if (oldLength
> 0) {
1117 if (oldLength
> MAX_INDEX_KEY_LENGTH
)
1118 oldLength
= MAX_INDEX_KEY_LENGTH
;
1119 memcpy(oldData
= oldBuffer
, smallData
->Data(), oldLength
);
1124 recursive_lock_unlock(&fSmallDataLock
);
1126 // if the attribute doesn't exist yet (as a file), try to put it in the
1127 // small_data section first - if that fails (due to insufficent space),
1128 // create a real attribute file
1129 status
= _AddSmallData(transaction
, node
, name
, type
, pos
, buffer
,
1131 if (status
== B_DEVICE_FULL
) {
1132 if (smallData
!= NULL
) {
1133 // remove the old attribute from the small data section - there
1134 // is no space left for the new data
1135 status
= _RemoveSmallData(transaction
, node
, name
);
1140 status
= CreateAttribute(transaction
, name
, type
, &attribute
);
1142 RETURN_ERROR(status
);
1145 } else if (status
== B_OK
) {
1146 // Update status time on attribute write
1147 Node().status_change_time
= HOST_ENDIAN_TO_BFS_INT64(
1148 bfs_inode::ToInode(real_time_clock_usecs()));
1150 status
= WriteBack(transaction
);
1154 if (attribute
!= NULL
) {
1155 WriteLocker
writeLocker(attribute
->fLock
);
1157 if (hasIndex
|| fVolume
->CheckForLiveQuery(name
)) {
1158 // Save the old attribute data (if this fails, oldLength will
1160 while (attribute
->Size() > 0) {
1161 bigtime_t oldModified
= attribute
->LastModified();
1162 writeLocker
.Unlock();
1164 oldLength
= MAX_INDEX_KEY_LENGTH
;
1165 if (attribute
->ReadAt(0, oldBuffer
, &oldLength
) == B_OK
)
1166 oldData
= oldBuffer
;
1170 // Read until the data hasn't changed in between
1171 if (oldModified
== attribute
->LastModified())
1178 // check if the data fits into the small_data section again
1179 NodeGetter
node(fVolume
, transaction
, this);
1180 if (node
.Node() == NULL
)
1183 status
= _AddSmallData(transaction
, node
, name
, type
, pos
, buffer
,
1186 if (status
== B_OK
) {
1187 // it does - remove its file
1188 writeLocker
.Unlock();
1189 status
= _RemoveAttribute(transaction
, name
, false, NULL
);
1191 // The attribute type might have been changed - we need to
1192 // adopt the new one
1193 attribute
->Node().type
= HOST_ENDIAN_TO_BFS_INT32(type
);
1194 status
= attribute
->WriteBack(transaction
);
1195 writeLocker
.Unlock();
1197 if (status
== B_OK
) {
1198 status
= attribute
->WriteAt(transaction
, pos
, buffer
,
1203 if (status
== B_OK
) {
1204 // Update status time on attribute write
1205 Node().status_change_time
= HOST_ENDIAN_TO_BFS_INT64(
1206 bfs_inode::ToInode(real_time_clock_usecs()));
1208 status
= WriteBack(transaction
);
1211 attribute
->WriteLockInTransaction(transaction
);
1212 ReleaseAttribute(attribute
);
1215 // TODO: find a better way than this "pos" thing (the begin of the old key
1216 // must be copied to the start of the new one for a comparison)
1217 if (status
== B_OK
&& pos
== 0) {
1218 // Index only the first MAX_INDEX_KEY_LENGTH bytes
1219 uint16 length
= *_length
;
1220 if (length
> MAX_INDEX_KEY_LENGTH
)
1221 length
= MAX_INDEX_KEY_LENGTH
;
1223 // Update index. Note, Index::Update() may be called even if
1224 // initializing the index failed - it will just update the live
1225 // queries in this case
1226 if (pos
< length
|| (uint64
)pos
< (uint64
)oldLength
) {
1227 index
.Update(transaction
, name
, type
, oldData
, oldLength
, buffer
,
1232 if (_created
!= NULL
)
1233 *_created
= created
;
1239 /*! Removes the specified attribute from the inode.
1240 This is a high-level attribute function that understands attributes
1241 in the small_data section as well as real attribute files.
1244 Inode::RemoveAttribute(Transaction
& transaction
, const char* name
)
1246 Index
index(fVolume
);
1247 bool hasIndex
= index
.SetTo(name
) == B_OK
;
1248 NodeGetter
node(fVolume
, this);
1249 if (node
.Node() == NULL
)
1252 // update index for attributes in the small_data section
1254 RecursiveLocker
_(fSmallDataLock
);
1256 small_data
* smallData
= FindSmallData(node
.Node(), name
);
1257 if (smallData
!= NULL
) {
1258 uint32 length
= smallData
->DataSize();
1259 if (length
> MAX_INDEX_KEY_LENGTH
)
1260 length
= MAX_INDEX_KEY_LENGTH
;
1261 index
.Update(transaction
, name
, smallData
->Type(),
1262 smallData
->Data(), length
, NULL
, 0, this);
1266 status_t status
= _RemoveSmallData(transaction
, node
, name
);
1267 if (status
== B_ENTRY_NOT_FOUND
&& !Attributes().IsZero()) {
1268 // remove the attribute file if it exists
1269 status
= _RemoveAttribute(transaction
, name
, hasIndex
, &index
);
1270 if (status
== B_OK
) {
1271 Node().status_change_time
= HOST_ENDIAN_TO_BFS_INT64(
1272 bfs_inode::ToInode(real_time_clock_usecs()));
1273 WriteBack(transaction
);
1281 /*! Returns the attribute inode with the specified \a name, in case it exists.
1282 This method can only return real attribute files; the attributes in the
1283 small data section are ignored.
1286 Inode::GetAttribute(const char* name
, Inode
** _attribute
)
1288 // does this inode even have attributes?
1289 if (Attributes().IsZero())
1290 return B_ENTRY_NOT_FOUND
;
1292 Vnode
vnode(fVolume
, Attributes());
1294 if (vnode
.Get(&attributes
) < B_OK
) {
1295 FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n",
1300 BPlusTree
* tree
= attributes
->Tree();
1304 InodeReadLocker
locker(attributes
);
1307 status_t status
= tree
->Find((uint8
*)name
, (uint16
)strlen(name
), &id
);
1308 if (status
== B_OK
) {
1309 Vnode
vnode(fVolume
, id
);
1311 // Check if the attribute is really an attribute
1312 if (vnode
.Get(&inode
) != B_OK
|| !inode
->IsAttribute())
1315 *_attribute
= inode
;
1325 Inode::ReleaseAttribute(Inode
* attribute
)
1327 if (attribute
== NULL
)
1330 put_vnode(fVolume
->FSVolume(), attribute
->ID());
1335 Inode::CreateAttribute(Transaction
& transaction
, const char* name
, uint32 type
,
1338 // do we need to create the attribute directory first?
1339 if (Attributes().IsZero()) {
1340 status_t status
= Inode::Create(transaction
, this, NULL
,
1341 S_ATTR_DIR
| S_DIRECTORY
| 0666, 0, 0, NULL
);
1343 RETURN_ERROR(status
);
1345 Vnode
vnode(fVolume
, Attributes());
1347 if (vnode
.Get(&attributes
) < B_OK
)
1350 // Inode::Create() locks the inode for us
1351 return Inode::Create(transaction
, attributes
, name
,
1352 S_ATTR
| S_FILE
| 0666, 0, type
, NULL
, NULL
, attribute
);
1356 // #pragma mark - directory tree
1362 TreeIterator
iterator(fTree
);
1365 char name
[MAX_INDEX_KEY_LENGTH
+ 1];
1368 while (iterator
.GetNextEntry(name
, &length
, MAX_INDEX_KEY_LENGTH
+ 1,
1370 if ((Mode() & (S_ATTR_DIR
| S_INDEX_DIR
)) != 0)
1373 // Unlike index and attribute directories, directories
1374 // for standard files always contain ".", and "..", so
1375 // we need to ignore those two
1376 if (++count
> 2 || (strcmp(".", name
) != 0 && strcmp("..", name
) != 0))
1384 Inode::ContainerContentsChanged(Transaction
& transaction
)
1386 ASSERT(!InLastModifiedIndex());
1388 Node().last_modified_time
= Node().status_change_time
1389 = HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
1391 return WriteBack(transaction
);
1395 // #pragma mark - data stream
1398 /*! Computes the number of bytes this inode occupies in the file system.
1399 This includes the file meta blocks used for maintaining its data stream.
1401 TODO: However, the attributes in extra files are not really accounted for;
1402 depending on the speed penalty, this should be changed, though (the value
1403 could be cached in the inode structure or Inode object, though).
1406 Inode::AllocatedSize() const
1408 if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK
) == 0) {
1409 // This symlink does not have a data stream
1410 return Node().InodeSize();
1413 const data_stream
& data
= Node().data
;
1414 uint32 blockSize
= fVolume
->BlockSize();
1415 off_t size
= blockSize
;
1417 if (data
.MaxDoubleIndirectRange() != 0) {
1418 off_t doubleIndirectSize
= data
.MaxDoubleIndirectRange()
1419 - data
.MaxIndirectRange();
1420 int32 indirectSize
= double_indirect_max_indirect_size(
1421 data
.double_indirect
.Length(), fVolume
->BlockSize());
1423 size
+= (2 * data
.double_indirect
.Length()
1424 + doubleIndirectSize
/ indirectSize
)
1425 * blockSize
+ data
.MaxDoubleIndirectRange();
1426 } else if (data
.MaxIndirectRange() != 0)
1427 size
+= data
.indirect
.Length() + data
.MaxIndirectRange();
1429 size
+= data
.MaxDirectRange();
1431 if (!Node().attributes
.IsZero()) {
1432 // TODO: to make this exact, we'd had to count all attributes
1433 size
+= 2 * blockSize
;
1434 // 2 blocks, one for the attributes inode, one for its B+tree
1441 /*! Finds the block_run where "pos" is located in the data_stream of
1443 If successful, "offset" will then be set to the file offset
1444 of the block_run returned; so "pos - offset" is for the block_run
1445 what "pos" is for the whole stream.
1446 The caller has to make sure that "pos" is inside the stream.
1449 Inode::FindBlockRun(off_t pos
, block_run
& run
, off_t
& offset
)
1451 data_stream
* data
= &Node().data
;
1453 // find matching block run
1455 if (data
->MaxIndirectRange() > 0 && pos
>= data
->MaxDirectRange()) {
1456 if (data
->MaxDoubleIndirectRange() > 0
1457 && pos
>= data
->MaxIndirectRange()) {
1458 // access to double indirect blocks
1460 CachedBlock
cached(fVolume
);
1465 get_double_indirect_sizes(data
->double_indirect
.Length(),
1466 fVolume
->BlockSize(), runsPerBlock
, directSize
, indirectSize
);
1467 if (directSize
<= 0 || indirectSize
<= 0)
1468 RETURN_ERROR(B_BAD_DATA
);
1470 off_t start
= pos
- data
->MaxIndirectRange();
1471 int32 index
= start
/ indirectSize
;
1473 block_run
* indirect
= (block_run
*)cached
.SetTo(
1474 fVolume
->ToBlock(data
->double_indirect
) + index
/ runsPerBlock
);
1475 if (indirect
== NULL
)
1476 RETURN_ERROR(B_ERROR
);
1478 int32 current
= (start
% indirectSize
) / directSize
;
1480 indirect
= (block_run
*)cached
.SetTo(
1481 fVolume
->ToBlock(indirect
[index
% runsPerBlock
])
1482 + current
/ runsPerBlock
);
1483 if (indirect
== NULL
)
1484 RETURN_ERROR(B_ERROR
);
1486 run
= indirect
[current
% runsPerBlock
];
1487 if (run
.Length() != data
->double_indirect
.Length())
1488 RETURN_ERROR(B_BAD_DATA
);
1490 offset
= data
->MaxIndirectRange() + (index
* indirectSize
)
1491 + (current
* directSize
);
1493 // access to indirect blocks
1495 int32 runsPerBlock
= fVolume
->BlockSize() / sizeof(block_run
);
1496 off_t runBlockEnd
= data
->MaxDirectRange();
1498 CachedBlock
cached(fVolume
);
1499 off_t block
= fVolume
->ToBlock(data
->indirect
);
1501 for (int32 i
= 0; i
< data
->indirect
.Length(); i
++) {
1502 block_run
* indirect
= (block_run
*)cached
.SetTo(block
+ i
);
1503 if (indirect
== NULL
)
1504 RETURN_ERROR(B_IO_ERROR
);
1507 while (++current
< runsPerBlock
) {
1508 if (indirect
[current
].IsZero())
1511 runBlockEnd
+= (uint32
)indirect
[current
].Length()
1512 << cached
.BlockShift();
1513 if (runBlockEnd
> pos
) {
1514 run
= indirect
[current
];
1515 offset
= runBlockEnd
- ((uint32
)run
.Length()
1516 << cached
.BlockShift());
1517 return fVolume
->ValidateBlockRun(run
);
1521 RETURN_ERROR(B_ERROR
);
1524 // access from direct blocks
1526 off_t runBlockEnd
= 0LL;
1529 while (++current
< NUM_DIRECT_BLOCKS
) {
1530 if (data
->direct
[current
].IsZero())
1533 runBlockEnd
+= (uint32
)data
->direct
[current
].Length()
1534 << fVolume
->BlockShift();
1535 if (runBlockEnd
> pos
) {
1536 run
= data
->direct
[current
];
1537 offset
= runBlockEnd
1538 - ((uint32
)run
.Length() << fVolume
->BlockShift());
1539 return fVolume
->ValidateBlockRun(run
);
1543 return B_ENTRY_NOT_FOUND
;
1545 return fVolume
->ValidateBlockRun(run
);
1550 Inode::ReadAt(off_t pos
, uint8
* buffer
, size_t* _length
)
1552 size_t length
= *_length
;
1554 // set/check boundaries for pos/length
1558 InodeReadLocker
locker(this);
1560 if (pos
>= Size() || length
== 0) {
1567 return file_cache_read(FileCache(), NULL
, pos
, buffer
, _length
);
1572 Inode::WriteAt(Transaction
& transaction
, off_t pos
, const uint8
* buffer
,
1575 InodeReadLocker
locker(this);
1577 // update the last modification time in memory, it will be written
1578 // back to the inode, and the index when the file is closed
1579 // TODO: should update the internal last modified time only at this point!
1580 Node().last_modified_time
= Node().status_change_time
1581 = HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
1583 // TODO: support INODE_LOGGED!
1585 size_t length
= *_length
;
1586 bool changeSize
= (uint64
)pos
+ (uint64
)length
> (uint64
)Size();
1588 // set/check boundaries for pos/length
1594 // the transaction doesn't have to be started already
1595 if (changeSize
&& !transaction
.IsStarted())
1596 transaction
.Start(fVolume
, BlockNumber());
1598 WriteLocker
writeLocker(fLock
);
1600 // Work around possible race condition: Someone might have shrunken the file
1601 // while we had no lock.
1602 if (!transaction
.IsStarted()
1603 && (uint64
)pos
+ (uint64
)length
> (uint64
)Size()) {
1604 writeLocker
.Unlock();
1605 transaction
.Start(fVolume
, BlockNumber());
1609 off_t oldSize
= Size();
1611 if ((uint64
)pos
+ (uint64
)length
> (uint64
)oldSize
) {
1612 // let's grow the data stream to the size needed
1613 status_t status
= SetFileSize(transaction
, pos
+ length
);
1614 if (status
!= B_OK
) {
1616 WriteLockInTransaction(transaction
);
1617 RETURN_ERROR(status
);
1619 // TODO: In theory we would need to update the file size
1620 // index here as part of the current transaction - this might just
1621 // be a bit too expensive, but worth a try.
1623 // we need to write back the inode here because it has to
1624 // go into this transaction (we cannot wait until the file
1626 status
= WriteBack(transaction
);
1627 if (status
!= B_OK
) {
1628 WriteLockInTransaction(transaction
);
1633 writeLocker
.Unlock();
1636 FillGapWithZeros(oldSize
, pos
);
1638 // If we don't want to write anything, we can now return (we may
1639 // just have changed the file size using the position parameter)
1643 status_t status
= file_cache_write(FileCache(), NULL
, pos
, buffer
, _length
);
1645 if (transaction
.IsStarted())
1646 WriteLockInTransaction(transaction
);
1652 /*! Fills the gap between the old file size and the new file size
1654 It's more or less a copy of Inode::WriteAt() but it can handle
1655 length differences of more than just 4 GB, and it never uses
1656 the log, even if the INODE_LOGGED flag is set.
1659 Inode::FillGapWithZeros(off_t pos
, off_t newSize
)
1661 while (pos
< newSize
) {
1663 if (newSize
> pos
+ 1024 * 1024 * 1024)
1664 size
= 1024 * 1024 * 1024;
1666 size
= newSize
- pos
;
1668 status_t status
= file_cache_write(FileCache(), NULL
, pos
, NULL
, &size
);
1679 /*! Allocates \a length blocks, and clears their contents. Growing
1680 the indirect and double indirect range uses this method.
1681 The allocated block_run is saved in "run"
1684 Inode::_AllocateBlockArray(Transaction
& transaction
, block_run
& run
,
1685 size_t length
, bool variableSize
)
1690 status_t status
= fVolume
->Allocate(transaction
, this, length
, run
,
1691 variableSize
? 1 : length
);
1695 // make sure those blocks are empty
1696 CachedBlock
cached(fVolume
);
1697 off_t block
= fVolume
->ToBlock(run
);
1699 for (int32 i
= 0; i
< run
.Length(); i
++) {
1700 block_run
* runs
= (block_run
*)cached
.SetToWritable(transaction
,
1709 /*! Grows the stream to \a size, and fills the direct/indirect/double indirect
1710 ranges with the runs.
1711 This method will also determine the size of the preallocation, if any.
1714 Inode::_GrowStream(Transaction
& transaction
, off_t size
)
1716 data_stream
* data
= &Node().data
;
1718 // is the data stream already large enough to hold the new size?
1719 // (can be the case with preallocated blocks)
1720 if (size
< data
->MaxDirectRange()
1721 || size
< data
->MaxIndirectRange()
1722 || size
< data
->MaxDoubleIndirectRange()) {
1723 data
->size
= HOST_ENDIAN_TO_BFS_INT64(size
);
1727 // how many bytes are still needed? (unused ranges are always zero)
1730 if (data
->Size() < data
->MaxDoubleIndirectRange()) {
1731 bytes
= size
- data
->MaxDoubleIndirectRange();
1732 // The double indirect range can only handle multiples of
1734 minimum
= data
->double_indirect
.Length();
1735 } else if (data
->Size() < data
->MaxIndirectRange())
1736 bytes
= size
- data
->MaxIndirectRange();
1737 else if (data
->Size() < data
->MaxDirectRange())
1738 bytes
= size
- data
->MaxDirectRange();
1740 // no preallocation left to be used
1741 bytes
= size
- data
->Size();
1742 if (data
->MaxDoubleIndirectRange() > 0)
1743 minimum
= data
->double_indirect
.Length();
1746 // do we have enough free blocks on the disk?
1747 off_t blocksNeeded
= (bytes
+ fVolume
->BlockSize() - 1)
1748 >> fVolume
->BlockShift();
1749 if (blocksNeeded
> fVolume
->FreeBlocks())
1750 return B_DEVICE_FULL
;
1752 off_t blocksRequested
= blocksNeeded
;
1753 // because of preallocations and partial allocations, the number of
1754 // blocks we need to allocate may be different from the one we request
1755 // from the block allocator
1757 // Should we preallocate some blocks?
1758 // Attributes, attribute directories, and long symlinks usually won't get
1759 // that big, and should stay close to the inode - preallocating could be
1760 // counterproductive.
1761 // Also, if free disk space is tight, don't preallocate.
1762 if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink()
1763 && fVolume
->FreeBlocks() > 128) {
1766 // Request preallocated blocks depending on the file size and growth
1767 if (size
< 1 * 1024 * 1024 && bytes
< 512 * 1024) {
1768 // Preallocate 64 KB for file sizes <1 MB and grow rates <512 KB
1769 roundTo
= 65536 >> fVolume
->BlockShift();
1770 } else if (size
< 32 * 1024 * 1024 && bytes
<= 1 * 1024 * 1024) {
1771 // Preallocate 512 KB for file sizes between 1 MB and 32 MB, and
1772 // grow rates smaller than 1 MB
1773 roundTo
= (512 * 1024) >> fVolume
->BlockShift();
1775 // Preallocate 1/16 of the file size (ie. 4 MB for 64 MB,
1777 roundTo
= size
>> (fVolume
->BlockShift() + 4);
1779 } else if (IsIndex()) {
1780 // Always preallocate 64 KB for index directories
1781 roundTo
= 65536 >> fVolume
->BlockShift();
1783 // Preallocate only 4 KB - directories only get trimmed when their
1784 // vnode is flushed, which might not happen very often.
1785 roundTo
= 4096 >> fVolume
->BlockShift();
1788 // Round to next "roundTo" block count
1789 blocksRequested
= ((blocksNeeded
+ roundTo
) / roundTo
) * roundTo
;
1793 while (blocksNeeded
> 0) {
1794 // the requested blocks do not need to be returned with a
1795 // single allocation, so we need to iterate until we have
1796 // enough blocks allocated
1798 // make sure that "blocks" is a multiple of minimum
1799 blocksRequested
= round_up(blocksRequested
, minimum
);
1803 status_t status
= fVolume
->Allocate(transaction
, this, blocksRequested
,
1808 // okay, we have the needed blocks, so just distribute them to the
1809 // different ranges of the stream (direct, indirect & double indirect)
1811 blocksNeeded
-= run
.Length();
1812 // don't preallocate if the first allocation was already too small
1813 blocksRequested
= blocksNeeded
;
1815 // Direct block range
1817 if (data
->Size() <= data
->MaxDirectRange()) {
1818 // let's try to put them into the direct block range
1820 for (; free
< NUM_DIRECT_BLOCKS
; free
++) {
1821 if (data
->direct
[free
].IsZero())
1825 if (free
< NUM_DIRECT_BLOCKS
) {
1826 // can we merge the last allocated run with the new one?
1827 int32 last
= free
- 1;
1828 if (free
> 0 && data
->direct
[last
].MergeableWith(run
)) {
1829 data
->direct
[last
].length
= HOST_ENDIAN_TO_BFS_INT16(
1830 data
->direct
[last
].Length() + run
.Length());
1832 data
->direct
[free
] = run
;
1834 data
->max_direct_range
= HOST_ENDIAN_TO_BFS_INT64(
1835 data
->MaxDirectRange()
1836 + run
.Length() * fVolume
->BlockSize());
1837 data
->size
= HOST_ENDIAN_TO_BFS_INT64(blocksNeeded
> 0
1838 ? data
->max_direct_range
: size
);
1843 // Indirect block range
1845 if (data
->Size() <= data
->MaxIndirectRange()
1846 || !data
->MaxIndirectRange()) {
1847 CachedBlock
cached(fVolume
);
1848 block_run
* runs
= NULL
;
1852 // if there is no indirect block yet, create one
1853 if (data
->indirect
.IsZero()) {
1854 status
= _AllocateBlockArray(transaction
, data
->indirect
,
1855 NUM_ARRAY_BLOCKS
, true);
1859 data
->max_indirect_range
= HOST_ENDIAN_TO_BFS_INT64(
1860 data
->MaxDirectRange());
1861 // insert the block_run in the first block
1862 runs
= (block_run
*)cached
.SetTo(data
->indirect
);
1864 uint32 numberOfRuns
= fVolume
->BlockSize() / sizeof(block_run
);
1865 block
= fVolume
->ToBlock(data
->indirect
);
1867 // search first empty entry
1869 for (; i
< data
->indirect
.Length(); i
++) {
1870 if ((runs
= (block_run
*)cached
.SetTo(block
+ i
)) == NULL
)
1873 for (free
= 0; free
< numberOfRuns
; free
++)
1874 if (runs
[free
].IsZero())
1877 if (free
< numberOfRuns
)
1880 if (i
== data
->indirect
.Length())
1885 // try to insert the run to the last one - note that this
1886 // doesn't take block borders into account, so it could be
1887 // further optimized
1888 cached
.MakeWritable(transaction
);
1890 int32 last
= free
- 1;
1891 if (free
> 0 && runs
[last
].MergeableWith(run
)) {
1892 runs
[last
].length
= HOST_ENDIAN_TO_BFS_INT16(
1893 runs
[last
].Length() + run
.Length());
1897 data
->max_indirect_range
= HOST_ENDIAN_TO_BFS_INT64(
1898 data
->MaxIndirectRange()
1899 + ((uint32
)run
.Length() << fVolume
->BlockShift()));
1900 data
->size
= HOST_ENDIAN_TO_BFS_INT64(blocksNeeded
> 0
1901 ? data
->MaxIndirectRange() : size
);
1906 // Double indirect block range
1908 if (data
->Size() <= data
->MaxDoubleIndirectRange()
1909 || !data
->max_double_indirect_range
) {
1910 // We make sure here that we have this minimum allocated, so if
1911 // the allocation succeeds, we don't run into an endless loop.
1912 if (!data
->max_double_indirect_range
)
1913 minimum
= _DoubleIndirectBlockLength();
1915 minimum
= data
->double_indirect
.Length();
1917 if ((run
.Length() % minimum
) != 0) {
1918 // The number of allocated blocks isn't a multiple of 'minimum',
1919 // so we have to change this. This can happen the first time the
1920 // stream grows into the double indirect range.
1921 // First, free the remaining blocks that don't fit into this
1923 int32 rest
= run
.Length() % minimum
;
1924 run
.length
= HOST_ENDIAN_TO_BFS_INT16(run
.Length() - rest
);
1926 status
= fVolume
->Free(transaction
,
1927 block_run::Run(run
.AllocationGroup(),
1928 run
.Start() + run
.Length(), rest
));
1932 blocksNeeded
+= rest
;
1933 blocksRequested
= round_up(blocksNeeded
, minimum
);
1935 // Are there any blocks left in the run? If not, allocate
1937 if (run
.length
== 0)
1941 // if there is no double indirect block yet, create one
1942 if (data
->double_indirect
.IsZero()) {
1943 status
= _AllocateBlockArray(transaction
,
1944 data
->double_indirect
, _DoubleIndirectBlockLength());
1948 data
->max_double_indirect_range
= data
->max_indirect_range
;
1951 // calculate the index where to insert the new blocks
1956 get_double_indirect_sizes(data
->double_indirect
.Length(),
1957 fVolume
->BlockSize(), runsPerBlock
, directSize
, indirectSize
);
1958 if (directSize
<= 0 || indirectSize
<= 0)
1961 off_t start
= data
->MaxDoubleIndirectRange()
1962 - data
->MaxIndirectRange();
1963 int32 indirectIndex
= start
/ indirectSize
;
1964 int32 index
= (start
% indirectSize
) / directSize
;
1965 int32 runsPerArray
= runsPerBlock
* minimum
;
1967 // distribute the blocks to the array and allocate
1968 // new array blocks when needed
1970 CachedBlock
cached(fVolume
);
1971 CachedBlock
cachedDirect(fVolume
);
1972 block_run
* array
= NULL
;
1973 uint32 runLength
= run
.Length();
1975 while (run
.length
!= 0) {
1976 // get the indirect array block
1977 if (array
== NULL
) {
1978 uint32 block
= indirectIndex
/ runsPerBlock
;
1979 if (block
>= minimum
)
1982 array
= (block_run
*)cached
.SetTo(fVolume
->ToBlock(
1983 data
->double_indirect
) + block
);
1989 // do we need a new array block?
1990 if (array
[indirectIndex
% runsPerBlock
].IsZero()) {
1991 cached
.MakeWritable(transaction
);
1993 status
= _AllocateBlockArray(transaction
,
1994 array
[indirectIndex
% runsPerBlock
],
1995 data
->double_indirect
.Length());
2000 block_run
* runs
= (block_run
*)cachedDirect
.SetToWritable(
2001 transaction
, fVolume
->ToBlock(array
[indirectIndex
2002 % runsPerBlock
]) + index
/ runsPerBlock
);
2007 // insert the block_run into the array
2008 runs
[index
% runsPerBlock
] = run
;
2009 runs
[index
% runsPerBlock
].length
2010 = HOST_ENDIAN_TO_BFS_INT16(minimum
);
2012 // alter the remaining block_run
2013 run
.start
= HOST_ENDIAN_TO_BFS_INT16(run
.Start()
2015 run
.length
= HOST_ENDIAN_TO_BFS_INT16(run
.Length()
2017 } while ((++index
% runsPerBlock
) != 0 && run
.length
);
2018 } while ((index
% runsPerArray
) != 0 && run
.length
);
2020 if (index
== runsPerArray
)
2022 if (++indirectIndex
% runsPerBlock
== 0) {
2028 data
->max_double_indirect_range
= HOST_ENDIAN_TO_BFS_INT64(
2029 data
->MaxDoubleIndirectRange()
2030 + (runLength
<< fVolume
->BlockShift()));
2031 data
->size
= blocksNeeded
> 0 ? HOST_ENDIAN_TO_BFS_INT64(
2032 data
->max_double_indirect_range
) : size
;
2037 RETURN_ERROR(EFBIG
);
2039 // update the size of the data stream
2040 data
->size
= HOST_ENDIAN_TO_BFS_INT64(size
);
2047 Inode::_DoubleIndirectBlockLength() const
2049 if (fVolume
->BlockSize() > DOUBLE_INDIRECT_ARRAY_SIZE
)
2052 return DOUBLE_INDIRECT_ARRAY_SIZE
/ fVolume
->BlockSize();
2056 /*! Frees the statically sized array of the double indirect part of a data
2060 Inode::_FreeStaticStreamArray(Transaction
& transaction
, int32 level
,
2061 block_run run
, off_t size
, off_t offset
, off_t
& max
)
2065 indirectSize
= double_indirect_max_indirect_size(run
.Length(),
2066 fVolume
->BlockSize());
2068 indirectSize
= double_indirect_max_direct_size(run
.Length(),
2069 fVolume
->BlockSize());
2071 if (indirectSize
<= 0)
2076 start
= size
- offset
;
2080 int32 index
= start
/ indirectSize
;
2081 int32 runsPerBlock
= fVolume
->BlockSize() / sizeof(block_run
);
2083 CachedBlock
cached(fVolume
);
2084 off_t blockNumber
= fVolume
->ToBlock(run
);
2086 // set the file offset to the current block run
2087 offset
+= (off_t
)index
* indirectSize
;
2089 for (int32 i
= index
/ runsPerBlock
; i
< run
.Length(); i
++) {
2090 block_run
* array
= (block_run
*)cached
.SetToWritable(transaction
,
2093 RETURN_ERROR(B_ERROR
);
2095 for (index
= index
% runsPerBlock
; index
< runsPerBlock
; index
++) {
2096 if (array
[index
].IsZero()) {
2097 // we also want to break out of the outer loop
2102 status_t status
= B_OK
;
2104 status
= _FreeStaticStreamArray(transaction
, 1, array
[index
],
2106 } else if (offset
>= size
)
2107 status
= fVolume
->Free(transaction
, array
[index
]);
2109 max
= HOST_ENDIAN_TO_BFS_INT64(offset
+ indirectSize
);
2112 RETURN_ERROR(status
);
2115 array
[index
].SetTo(0, 0, 0);
2117 offset
+= indirectSize
;
2125 /*! Frees all block_runs in the array which come after the specified size.
2126 It also trims the last block_run that contain the size.
2127 "offset" and "max" are maintained until the last block_run that doesn't
2128 have to be freed - after this, the values won't be correct anymore, but
2129 will still assure correct function for all subsequent calls.
2130 "max" is considered to be in file system byte order.
2133 Inode::_FreeStreamArray(Transaction
& transaction
, block_run
* array
,
2134 uint32 arrayLength
, off_t size
, off_t
& offset
, off_t
& max
)
2136 PRINT(("FreeStreamArray: arrayLength %lu, size %Ld, offset %Ld, max %Ld\n",
2137 arrayLength
, size
, offset
, max
));
2139 off_t newOffset
= offset
;
2141 for (; i
< arrayLength
; i
++, offset
= newOffset
) {
2142 if (array
[i
].IsZero())
2145 newOffset
+= (off_t
)array
[i
].Length() << fVolume
->BlockShift();
2146 if (newOffset
<= size
)
2149 block_run run
= array
[i
];
2151 // determine the block_run to be freed
2152 if (newOffset
> size
&& offset
< size
) {
2153 // free partial block_run (and update the original block_run)
2154 run
.start
= HOST_ENDIAN_TO_BFS_INT16(array
[i
].Start()
2155 + ((size
+ fVolume
->BlockSize() - 1 - offset
)
2156 >> fVolume
->BlockShift()));
2157 // round to next block
2158 array
[i
].length
= HOST_ENDIAN_TO_BFS_INT16(run
.Start()
2159 - array
[i
].Start());
2160 run
.length
= HOST_ENDIAN_TO_BFS_INT16(run
.Length()
2161 - array
[i
].Length());
2163 if (run
.length
== 0)
2166 // update maximum range
2167 max
= HOST_ENDIAN_TO_BFS_INT64(offset
+ ((off_t
)array
[i
].Length()
2168 << fVolume
->BlockShift()));
2170 // free the whole block_run
2171 array
[i
].SetTo(0, 0, 0);
2173 if ((off_t
)BFS_ENDIAN_TO_HOST_INT64(max
) > offset
)
2174 max
= HOST_ENDIAN_TO_BFS_INT64(offset
);
2177 if (fVolume
->Free(transaction
, run
) < B_OK
)
2185 Inode::_ShrinkStream(Transaction
& transaction
, off_t size
)
2187 data_stream
* data
= &Node().data
;
2190 if (data
->MaxDoubleIndirectRange() > size
) {
2191 off_t
* maxDoubleIndirect
= &data
->max_double_indirect_range
;
2192 // gcc 4 work-around: "error: cannot bind packed field
2193 // 'data->data_stream::max_double_indirect_range' to 'off_t&'"
2194 status
= _FreeStaticStreamArray(transaction
, 0, data
->double_indirect
,
2195 size
, data
->MaxIndirectRange(), *maxDoubleIndirect
);
2199 if (size
<= data
->MaxIndirectRange()) {
2200 fVolume
->Free(transaction
, data
->double_indirect
);
2201 data
->double_indirect
.SetTo(0, 0, 0);
2202 data
->max_double_indirect_range
= 0;
2206 if (data
->MaxIndirectRange() > size
) {
2207 CachedBlock
cached(fVolume
);
2208 off_t block
= fVolume
->ToBlock(data
->indirect
);
2209 off_t offset
= data
->MaxDirectRange();
2211 for (int32 i
= 0; i
< data
->indirect
.Length(); i
++) {
2212 block_run
* array
= (block_run
*)cached
.SetToWritable(transaction
,
2217 off_t
* maxIndirect
= &data
->max_indirect_range
;
2218 // gcc 4 work-around: "error: cannot bind packed field
2219 // 'data->data_stream::max_indirect_range' to 'off_t&'"
2220 if (_FreeStreamArray(transaction
, array
, fVolume
->BlockSize()
2221 / sizeof(block_run
), size
, offset
, *maxIndirect
) != B_OK
)
2224 if (data
->max_direct_range
== data
->max_indirect_range
) {
2225 fVolume
->Free(transaction
, data
->indirect
);
2226 data
->indirect
.SetTo(0, 0, 0);
2227 data
->max_indirect_range
= 0;
2231 if (data
->MaxDirectRange() > size
) {
2233 off_t
*maxDirect
= &data
->max_direct_range
;
2234 // gcc 4 work-around: "error: cannot bind packed field
2235 // 'data->data_stream::max_direct_range' to 'off_t&'"
2236 status
= _FreeStreamArray(transaction
, data
->direct
, NUM_DIRECT_BLOCKS
,
2237 size
, offset
, *maxDirect
);
2242 data
->size
= HOST_ENDIAN_TO_BFS_INT64(size
);
2248 Inode::SetFileSize(Transaction
& transaction
, off_t size
)
2253 off_t oldSize
= Size();
2255 if (size
== oldSize
)
2258 T(Resize(this, oldSize
, size
, false));
2260 // should the data stream grow or shrink?
2262 if (size
> oldSize
) {
2263 status
= _GrowStream(transaction
, size
);
2264 if (status
< B_OK
) {
2265 // if the growing of the stream fails, the whole operation
2266 // fails, so we should shrink the stream to its former size
2267 _ShrinkStream(transaction
, oldSize
);
2270 status
= _ShrinkStream(transaction
, size
);
2275 file_cache_set_size(FileCache(), size
);
2276 file_map_set_size(Map(), size
);
2278 return WriteBack(transaction
);
2283 Inode::Append(Transaction
& transaction
, off_t bytes
)
2285 return SetFileSize(transaction
, Size() + bytes
);
2289 /*! Checks whether or not this inode's data stream needs to be trimmed
2290 because of an earlier preallocation.
2291 Returns true if there are any blocks to be trimmed.
2294 Inode::NeedsTrimming() const
2296 // We never trim preallocated index blocks to make them grow as smooth as
2297 // possible. There are only few indices anyway, so this doesn't hurt.
2298 // Also, if an inode is already in deleted state, we don't bother trimming
2300 if (IsIndex() || IsDeleted()
2301 || (IsSymLink() && (Flags() & INODE_LONG_SYMLINK
) == 0))
2304 off_t roundedSize
= round_up(Size(), fVolume
->BlockSize());
2306 return Node().data
.MaxDirectRange() > roundedSize
2307 || Node().data
.MaxIndirectRange() > roundedSize
2308 || Node().data
.MaxDoubleIndirectRange() > roundedSize
;
2313 Inode::TrimPreallocation(Transaction
& transaction
)
2315 T(Resize(this, max_c(Node().data
.MaxDirectRange(),
2316 Node().data
.MaxIndirectRange()), Size(), true));
2318 status_t status
= _ShrinkStream(transaction
, Size());
2322 return WriteBack(transaction
);
2326 //! Frees the file's data stream and removes all attributes
2328 Inode::Free(Transaction
& transaction
)
2332 // Perhaps there should be an implementation of Inode::ShrinkStream() that
2333 // just frees the data_stream, but doesn't change the inode (since it is
2334 // freed anyway) - that would make an undelete command possible
2335 if (!IsSymLink() || (Flags() & INODE_LONG_SYMLINK
) != 0) {
2336 status_t status
= SetFileSize(transaction
, 0);
2341 // Free all attributes, and remove their indices
2343 // We have to limit the scope of AttributeIterator, so that its
2344 // destructor is not called after the inode is deleted
2345 AttributeIterator
iterator(this);
2347 char name
[B_FILE_NAME_LENGTH
];
2351 while (iterator
.GetNext(name
, &length
, &type
, &id
) == B_OK
) {
2352 RemoveAttribute(transaction
, name
);
2356 if (WriteBack(transaction
) < B_OK
)
2359 return fVolume
->Free(transaction
, BlockRun());
2363 //! Synchronizes (writes back to disk) the file stream of the inode.
2368 return file_cache_sync(FileCache());
2370 // We may also want to flush the attribute's data stream to
2371 // disk here... (do we?)
2373 if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK
) == 0)
2376 InodeReadLocker
locker(this);
2378 data_stream
* data
= &Node().data
;
2379 status_t status
= B_OK
;
2381 // flush direct range
2383 for (int32 i
= 0; i
< NUM_DIRECT_BLOCKS
; i
++) {
2384 if (data
->direct
[i
].IsZero())
2387 status
= block_cache_sync_etc(fVolume
->BlockCache(),
2388 fVolume
->ToBlock(data
->direct
[i
]), data
->direct
[i
].Length());
2393 // flush indirect range
2395 if (data
->max_indirect_range
== 0)
2398 CachedBlock
cached(fVolume
);
2399 off_t block
= fVolume
->ToBlock(data
->indirect
);
2400 int32 count
= fVolume
->BlockSize() / sizeof(block_run
);
2402 for (int32 j
= 0; j
< data
->indirect
.Length(); j
++) {
2403 block_run
* runs
= (block_run
*)cached
.SetTo(block
+ j
);
2407 for (int32 i
= 0; i
< count
; i
++) {
2408 if (runs
[i
].IsZero())
2411 status
= block_cache_sync_etc(fVolume
->BlockCache(),
2412 fVolume
->ToBlock(runs
[i
]), runs
[i
].Length());
2418 // flush double indirect range
2420 if (data
->max_double_indirect_range
== 0)
2423 off_t indirectBlock
= fVolume
->ToBlock(data
->double_indirect
);
2425 for (int32 l
= 0; l
< data
->double_indirect
.Length(); l
++) {
2426 block_run
* indirectRuns
= (block_run
*)cached
.SetTo(indirectBlock
+ l
);
2427 if (indirectRuns
== NULL
)
2428 return B_FILE_ERROR
;
2430 CachedBlock
directCached(fVolume
);
2432 for (int32 k
= 0; k
< count
; k
++) {
2433 if (indirectRuns
[k
].IsZero())
2436 block
= fVolume
->ToBlock(indirectRuns
[k
]);
2437 for (int32 j
= 0; j
< indirectRuns
[k
].Length(); j
++) {
2438 block_run
* runs
= (block_run
*)directCached
.SetTo(block
+ j
);
2440 return B_FILE_ERROR
;
2442 for (int32 i
= 0; i
< count
; i
++) {
2443 if (runs
[i
].IsZero())
2446 // TODO: combine single block_runs to bigger ones when
2447 // they are adjacent
2448 status
= block_cache_sync_etc(fVolume
->BlockCache(),
2449 fVolume
->ToBlock(runs
[i
]), runs
[i
].Length());
2460 // #pragma mark - TransactionListener implementation
2464 Inode::TransactionDone(bool success
)
2467 // Revert any changes made to the cached bfs_inode
2468 // TODO: return code gets eaten
2469 UpdateNodeFromDisk();
2475 Inode::RemovedFromTransaction()
2477 Node().flags
&= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_TRANSACTION
);
2479 // See AddInode() why we do this here
2480 if ((Flags() & INODE_DELETED
) != 0)
2481 fVolume
->RemovedInodes().Add(this);
2483 rw_lock_write_unlock(&Lock());
2485 if (!fVolume
->IsInitializing())
2486 put_vnode(fVolume
->FSVolume(), ID());
2490 // #pragma mark - creation/deletion
2494 Inode::Remove(Transaction
& transaction
, const char* name
, ino_t
* _id
,
2495 bool isDirectory
, bool force
)
2498 RETURN_ERROR(B_BAD_VALUE
);
2500 WriteLockInTransaction(transaction
);
2502 // does the file even exist?
2504 if (fTree
->Find((uint8
*)name
, (uint16
)strlen(name
), &id
) < B_OK
)
2505 return B_ENTRY_NOT_FOUND
;
2510 Vnode
vnode(fVolume
, id
);
2512 status_t status
= vnode
.Get(&inode
);
2513 if (status
< B_OK
) {
2514 REPORT_ERROR(status
);
2515 return B_ENTRY_NOT_FOUND
;
2518 T(Remove(inode
, name
));
2519 inode
->WriteLockInTransaction(transaction
);
2521 // Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR
2522 // bit is set for indices in BFS, not for attribute directories) - but you
2523 // should really be able to do whatever you want with your indices
2524 // without having to remove all files first :)
2525 if (!inode
->IsIndex() && !force
) {
2526 // if it's not of the correct type, don't delete it!
2527 if (inode
->IsContainer() != isDirectory
)
2528 return isDirectory
? B_NOT_A_DIRECTORY
: B_IS_A_DIRECTORY
;
2530 // only delete empty directories
2531 if (isDirectory
&& !inode
->IsEmpty())
2532 return B_DIRECTORY_NOT_EMPTY
;
2535 // remove_vnode() allows the inode to be accessed until the last put_vnode()
2536 status
= remove_vnode(fVolume
->FSVolume(), id
);
2540 if (fTree
->Remove(transaction
, name
, id
) != B_OK
&& !force
) {
2541 unremove_vnode(fVolume
->FSVolume(), id
);
2542 RETURN_ERROR(B_ERROR
);
2546 if (fTree
->Find((uint8
*)name
, (uint16
)strlen(name
), &id
) == B_OK
) {
2547 DIE(("deleted entry still there"));
2551 ContainerContentsChanged(transaction
);
2553 // update the inode, so that no one will ever doubt it's deleted :-)
2554 inode
->Node().flags
|= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED
);
2555 inode
->Node().flags
&= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE
);
2557 // In balance to the Inode::Create() method, the main indices
2558 // are updated here (name, size, & last_modified)
2560 Index
index(fVolume
);
2561 if (inode
->InNameIndex()) {
2562 index
.RemoveName(transaction
, name
, inode
);
2563 // If removing from the index fails, it is not regarded as a
2564 // fatal error and will not be reported back!
2565 // Deleted inodes won't be visible in queries anyway.
2568 if (inode
->InSizeIndex())
2569 index
.RemoveSize(transaction
, inode
);
2570 if (inode
->InLastModifiedIndex())
2571 index
.RemoveLastModified(transaction
, inode
);
2573 return inode
->WriteBack(transaction
);
2577 /*! Creates the inode with the specified \a parent directory, and automatically
2578 adds the created inode to that parent directory. If an attribute directory
2579 is created, it will also automatically be added to the \a parent inode as
2580 such. However, the indices root node, and the regular root node won't be
2581 added to the superblock.
2582 It will also create the initial B+tree for the inode if it's a directory
2584 \a name may be \c NULL, but only if no \a parent is given.
2585 If the "_id" or "_inode" variable is given and non-NULL to store the
2586 inode's ID, the inode stays locked - you have to call put_vnode() if you
2587 don't use it anymore.
2588 If the node already exists, this method will fail if \c O_EXCL is set, or
2589 it's a directory or a symlink. Otherwise, it will just be returned.
2590 If \c O_TRUNC has been specified, the file will also be truncated.
2593 Inode::Create(Transaction
& transaction
, Inode
* parent
, const char* name
,
2594 int32 mode
, int openMode
, uint32 type
, bool* _created
, ino_t
* _id
,
2595 Inode
** _inode
, fs_vnode_ops
* vnodeOps
, uint32 publishFlags
)
2597 FUNCTION_START(("name = %s, mode = %ld\n", name
, mode
));
2599 block_run parentRun
= parent
? parent
->BlockRun() : block_run::Run(0, 0, 0);
2600 Volume
* volume
= transaction
.GetVolume();
2601 BPlusTree
* tree
= NULL
;
2603 if (parent
!= NULL
&& (mode
& S_ATTR_DIR
) == 0 && parent
->IsContainer()) {
2604 // check if the file already exists in the directory
2605 tree
= parent
->Tree();
2608 if (parent
!= NULL
) {
2609 // the parent directory is locked during the whole inode creation
2610 parent
->WriteLockInTransaction(transaction
);
2613 if (parent
!= NULL
&& !volume
->IsInitializing() && parent
->IsContainer()) {
2614 // don't create anything in removed directories
2616 if (get_vnode_removed(volume
->FSVolume(), parent
->ID(), &removed
)
2617 == B_OK
&& removed
) {
2618 RETURN_ERROR(B_ENTRY_NOT_FOUND
);
2623 // Does the file already exist?
2625 if (tree
->Find((uint8
*)name
, (uint16
)strlen(name
), &offset
) == B_OK
) {
2626 // Return if the file should be a directory/link or opened in
2628 if (S_ISDIR(mode
) || S_ISLNK(mode
) || (openMode
& O_EXCL
) != 0)
2629 return B_FILE_EXISTS
;
2631 Vnode
vnode(volume
, offset
);
2633 status_t status
= vnode
.Get(&inode
);
2634 if (status
!= B_OK
) {
2635 REPORT_ERROR(status
);
2636 return B_ENTRY_NOT_FOUND
;
2639 if (inode
->IsDirectory() && (openMode
& O_RWMASK
) != O_RDONLY
)
2640 return B_IS_A_DIRECTORY
;
2641 if ((openMode
& O_DIRECTORY
) != 0 && !inode
->IsDirectory())
2642 return B_NOT_A_DIRECTORY
;
2644 // we want to open the file, so we should have the rights to do so
2645 if (inode
->CheckPermissions(open_mode_to_access(openMode
)
2646 | ((openMode
& O_TRUNC
) != 0 ? W_OK
: 0)) != B_OK
)
2647 return B_NOT_ALLOWED
;
2649 if ((openMode
& O_TRUNC
) != 0) {
2650 // truncate the existing file
2651 inode
->WriteLockInTransaction(transaction
);
2653 status_t status
= inode
->SetFileSize(transaction
, 0);
2655 status
= inode
->WriteBack(transaction
);
2668 // Only keep the vnode in memory if the _id or _inode pointer is
2670 if (_id
!= NULL
|| _inode
!= NULL
)
2675 } else if (parent
!= NULL
&& (mode
& S_ATTR_DIR
) == 0) {
2677 } else if ((openMode
& O_DIRECTORY
) != 0) {
2678 // TODO: we might need to return B_NOT_A_DIRECTORY here
2679 return B_ENTRY_NOT_FOUND
;
2684 // do we have the power to create new files at all?
2685 if (parent
!= NULL
&& (status
= parent
->CheckPermissions(W_OK
)) != B_OK
)
2686 RETURN_ERROR(status
);
2688 // allocate space for the new inode
2689 InodeAllocator
allocator(transaction
);
2692 status
= allocator
.New(&parentRun
, mode
, publishFlags
, run
, vnodeOps
,
2697 T(Create(inode
, parent
, name
, mode
, openMode
, type
));
2699 // Initialize the parts of the bfs_inode structure that
2700 // InodeAllocator::New() hasn't touched yet
2702 bfs_inode
* node
= &inode
->Node();
2704 if (parent
== NULL
) {
2705 // we set the parent to itself in this case
2706 // (only happens for the root and indices node)
2709 node
->parent
= parentRun
;
2711 node
->uid
= HOST_ENDIAN_TO_BFS_INT32(geteuid());
2712 node
->gid
= HOST_ENDIAN_TO_BFS_INT32(parent
2713 ? parent
->Node().GroupID() : getegid());
2714 // the group ID is inherited from the parent, if available
2716 node
->type
= HOST_ENDIAN_TO_BFS_INT32(type
);
2718 inode
->WriteBack(transaction
);
2719 // make sure the initialized node is available to others
2721 // only add the name to regular files, directories, or symlinks
2722 // don't add it to attributes, or indices
2723 if (tree
&& inode
->IsRegularNode()
2724 && inode
->SetName(transaction
, name
) != B_OK
)
2727 // Initialize b+tree if it's a directory (and add "." & ".." if it's
2728 // a standard directory for files - not for attributes or indices)
2729 if (inode
->IsContainer()) {
2730 status
= allocator
.CreateTree();
2735 // Add a link to the inode from the parent, depending on its type
2736 // (the vnode is not published yet, so it is safe to make the inode
2737 // accessable to the file system here)
2739 status
= tree
->Insert(transaction
, name
, inode
->ID());
2740 } else if (parent
!= NULL
&& (mode
& S_ATTR_DIR
) != 0) {
2741 parent
->Attributes() = run
;
2742 status
= parent
->WriteBack(transaction
);
2745 // Note, we only care if the inode could be made accessable for the
2746 // two cases above; the root node or the indices root node must
2747 // handle this case on their own (or other cases where "parent" is
2750 RETURN_ERROR(status
);
2752 // Update the main indices (name, size & last_modified)
2753 // (live queries might want to access us after this)
2755 Index
index(volume
);
2756 if (inode
->InNameIndex() && name
!= NULL
) {
2757 // the name index only contains regular files
2758 // (but not the root node where name == NULL)
2759 status
= index
.InsertName(transaction
, name
, inode
);
2760 if (status
!= B_OK
&& status
!= B_BAD_INDEX
) {
2761 // We have to remove the node from the parent at this point,
2762 // because the InodeAllocator destructor can't handle this
2763 // case (and if it fails, we can't do anything about it...)
2765 tree
->Remove(transaction
, name
, inode
->ID());
2766 else if (parent
!= NULL
&& (mode
& S_ATTR_DIR
) != 0)
2767 parent
->Node().attributes
.SetTo(0, 0, 0);
2769 RETURN_ERROR(status
);
2773 if (parent
!= NULL
&& parent
->IsContainer())
2774 parent
->ContainerContentsChanged(transaction
);
2776 inode
->UpdateOldLastModified();
2778 // The "size" & "last_modified" indices don't contain directories.
2779 // If adding to these indices fails, the inode creation will not be
2780 // harmed; they are considered less important than the "name" index.
2781 if (inode
->InSizeIndex())
2782 index
.InsertSize(transaction
, inode
);
2783 if (inode
->InLastModifiedIndex())
2784 index
.InsertLastModified(transaction
, inode
);
2786 if (inode
->NeedsFileCache()) {
2787 inode
->SetFileCache(file_cache_create(volume
->ID(), inode
->ID(),
2789 inode
->SetMap(file_map_create(volume
->ID(), inode
->ID(),
2792 if (inode
->FileCache() == NULL
|| inode
->Map() == NULL
)
2796 // Everything worked well until this point, we have a fully
2797 // initialized inode, and we want to keep it
2798 allocator
.Keep(vnodeOps
, publishFlags
);
2807 // if either _id or _inode is passed, we will keep the inode locked
2808 if (_id
== NULL
&& _inode
== NULL
)
2809 put_vnode(volume
->FSVolume(), inode
->ID());
2815 // #pragma mark - AttributeIterator
2818 AttributeIterator::AttributeIterator(Inode
* inode
)
2820 fCurrentSmallData(0),
2826 inode
->_AddIterator(this);
2830 AttributeIterator::~AttributeIterator()
2833 put_vnode(fAttributes
->GetVolume()->FSVolume(), fAttributes
->ID());
2836 fInode
->_RemoveIterator(this);
2841 AttributeIterator::Rewind()
2843 fCurrentSmallData
= 0;
2845 if (fIterator
!= NULL
)
2846 fIterator
->Rewind();
2853 AttributeIterator::GetNext(char* name
, size_t* _length
, uint32
* _type
,
2856 // read attributes out of the small data section
2858 if (fCurrentSmallData
>= 0) {
2859 NodeGetter
nodeGetter(fInode
->GetVolume(), fInode
);
2860 if (nodeGetter
.Node() == NULL
)
2863 const bfs_inode
* node
= nodeGetter
.Node();
2864 const small_data
* item
= ((bfs_inode
*)node
)->SmallDataStart();
2866 RecursiveLocker
_(&fInode
->SmallDataLock());
2869 for (; !item
->IsLast(node
); item
= item
->Next(), index
++) {
2870 if (item
->NameSize() == FILE_NAME_NAME_LENGTH
2871 && *item
->Name() == FILE_NAME_NAME
)
2874 if (index
>= fCurrentSmallData
)
2878 if (!item
->IsLast(node
)) {
2879 strncpy(name
, item
->Name(), B_FILE_NAME_LENGTH
);
2880 *_type
= item
->Type();
2881 *_length
= item
->NameSize();
2882 *_id
= (ino_t
)index
;
2884 fCurrentSmallData
= index
+ 1;
2888 // stop traversing the small_data section
2889 fCurrentSmallData
= -1;
2892 // read attributes out of the attribute directory
2894 if (fInode
->Attributes().IsZero())
2895 return B_ENTRY_NOT_FOUND
;
2897 Volume
* volume
= fInode
->GetVolume();
2899 // if you haven't yet access to the attributes directory, get it
2900 if (fAttributes
== NULL
) {
2901 if (get_vnode(volume
->FSVolume(), volume
->ToVnode(fInode
->Attributes()),
2902 (void**)&fAttributes
) != B_OK
) {
2903 FATAL(("get_vnode() failed in AttributeIterator::GetNext(ino_t"
2904 " = %" B_PRIdINO
",name = \"%s\")\n", fInode
->ID(), name
));
2905 return B_ENTRY_NOT_FOUND
;
2908 BPlusTree
* tree
= fAttributes
->Tree();
2910 || (fIterator
= new(std::nothrow
) TreeIterator(tree
)) == NULL
) {
2911 FATAL(("could not get tree in AttributeIterator::GetNext(ino_t"
2912 " = %" B_PRIdINO
",name = \"%s\")\n", fInode
->ID(), name
));
2913 return B_ENTRY_NOT_FOUND
;
2919 status_t status
= fIterator
->GetNextEntry(name
, &length
,
2920 B_FILE_NAME_LENGTH
, &id
);
2924 Vnode
vnode(volume
, id
);
2926 if ((status
= vnode
.Get(&attribute
)) == B_OK
) {
2927 *_type
= attribute
->Type();
2937 AttributeIterator::Update(uint16 index
, int8 change
)
2939 // fCurrentSmallData points already to the next item
2940 if (index
< fCurrentSmallData
)
2941 fCurrentSmallData
+= change
;