1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/utility/safe_browsing/mac/hfs.h"
7 #include <libkern/OSByteOrder.h>
14 #include "base/logging.h"
15 #include "base/numerics/safe_math.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "chrome/utility/safe_browsing/mac/convert_big_endian.h"
18 #include "chrome/utility/safe_browsing/mac/read_stream.h"
20 namespace safe_browsing
{
23 // UTF-16 character for file path seprator.
24 static const uint16_t kFilePathSeparator
= '/';
26 static void ConvertBigEndian(HFSPlusForkData
* fork
) {
27 ConvertBigEndian(&fork
->logicalSize
);
28 ConvertBigEndian(&fork
->clumpSize
);
29 ConvertBigEndian(&fork
->totalBlocks
);
30 for (size_t i
= 0; i
< arraysize(fork
->extents
); ++i
) {
31 ConvertBigEndian(&fork
->extents
[i
].startBlock
);
32 ConvertBigEndian(&fork
->extents
[i
].blockCount
);
36 static void ConvertBigEndian(HFSPlusVolumeHeader
* header
) {
37 ConvertBigEndian(&header
->signature
);
38 ConvertBigEndian(&header
->version
);
39 ConvertBigEndian(&header
->attributes
);
40 ConvertBigEndian(&header
->lastMountedVersion
);
41 ConvertBigEndian(&header
->journalInfoBlock
);
42 ConvertBigEndian(&header
->createDate
);
43 ConvertBigEndian(&header
->modifyDate
);
44 ConvertBigEndian(&header
->backupDate
);
45 ConvertBigEndian(&header
->checkedDate
);
46 ConvertBigEndian(&header
->fileCount
);
47 ConvertBigEndian(&header
->folderCount
);
48 ConvertBigEndian(&header
->blockSize
);
49 ConvertBigEndian(&header
->totalBlocks
);
50 ConvertBigEndian(&header
->freeBlocks
);
51 ConvertBigEndian(&header
->nextAllocation
);
52 ConvertBigEndian(&header
->rsrcClumpSize
);
53 ConvertBigEndian(&header
->dataClumpSize
);
54 ConvertBigEndian(&header
->nextCatalogID
);
55 ConvertBigEndian(&header
->writeCount
);
56 ConvertBigEndian(&header
->encodingsBitmap
);
57 ConvertBigEndian(&header
->allocationFile
);
58 ConvertBigEndian(&header
->extentsFile
);
59 ConvertBigEndian(&header
->catalogFile
);
60 ConvertBigEndian(&header
->attributesFile
);
61 ConvertBigEndian(&header
->startupFile
);
64 static void ConvertBigEndian(BTHeaderRec
* header
) {
65 ConvertBigEndian(&header
->treeDepth
);
66 ConvertBigEndian(&header
->rootNode
);
67 ConvertBigEndian(&header
->leafRecords
);
68 ConvertBigEndian(&header
->firstLeafNode
);
69 ConvertBigEndian(&header
->lastLeafNode
);
70 ConvertBigEndian(&header
->nodeSize
);
71 ConvertBigEndian(&header
->maxKeyLength
);
72 ConvertBigEndian(&header
->totalNodes
);
73 ConvertBigEndian(&header
->freeNodes
);
74 ConvertBigEndian(&header
->reserved1
);
75 ConvertBigEndian(&header
->clumpSize
);
76 ConvertBigEndian(&header
->attributes
);
79 static void ConvertBigEndian(BTNodeDescriptor
* node
) {
80 ConvertBigEndian(&node
->fLink
);
81 ConvertBigEndian(&node
->bLink
);
82 ConvertBigEndian(&node
->numRecords
);
85 static void ConvertBigEndian(HFSPlusCatalogFolder
* folder
) {
86 ConvertBigEndian(&folder
->recordType
);
87 ConvertBigEndian(&folder
->flags
);
88 ConvertBigEndian(&folder
->valence
);
89 ConvertBigEndian(&folder
->folderID
);
90 ConvertBigEndian(&folder
->createDate
);
91 ConvertBigEndian(&folder
->contentModDate
);
92 ConvertBigEndian(&folder
->attributeModDate
);
93 ConvertBigEndian(&folder
->accessDate
);
94 ConvertBigEndian(&folder
->backupDate
);
95 ConvertBigEndian(&folder
->bsdInfo
.ownerID
);
96 ConvertBigEndian(&folder
->bsdInfo
.groupID
);
97 ConvertBigEndian(&folder
->bsdInfo
.fileMode
);
98 ConvertBigEndian(&folder
->textEncoding
);
99 ConvertBigEndian(&folder
->folderCount
);
102 static void ConvertBigEndian(HFSPlusCatalogFile
* file
) {
103 ConvertBigEndian(&file
->recordType
);
104 ConvertBigEndian(&file
->flags
);
105 ConvertBigEndian(&file
->reserved1
);
106 ConvertBigEndian(&file
->fileID
);
107 ConvertBigEndian(&file
->createDate
);
108 ConvertBigEndian(&file
->contentModDate
);
109 ConvertBigEndian(&file
->attributeModDate
);
110 ConvertBigEndian(&file
->accessDate
);
111 ConvertBigEndian(&file
->backupDate
);
112 ConvertBigEndian(&file
->bsdInfo
.ownerID
);
113 ConvertBigEndian(&file
->bsdInfo
.groupID
);
114 ConvertBigEndian(&file
->bsdInfo
.fileMode
);
115 ConvertBigEndian(&file
->userInfo
.fdType
);
116 ConvertBigEndian(&file
->userInfo
.fdCreator
);
117 ConvertBigEndian(&file
->userInfo
.fdFlags
);
118 ConvertBigEndian(&file
->textEncoding
);
119 ConvertBigEndian(&file
->reserved2
);
120 ConvertBigEndian(&file
->dataFork
);
121 ConvertBigEndian(&file
->resourceFork
);
124 // A ReadStream implementation for an HFS+ fork. This only consults the eight
125 // fork extents. This does not consult the extent overflow file.
126 class HFSForkReadStream
: public ReadStream
{
128 HFSForkReadStream(HFSIterator
* hfs
, const HFSPlusForkData
& fork
);
129 ~HFSForkReadStream() override
;
131 bool Read(uint8_t* buffer
, size_t buffer_size
, size_t* bytes_read
) override
;
132 // Seek only supports SEEK_SET.
133 off_t
Seek(off_t offset
, int whence
) override
;
136 HFSIterator
* const hfs_
; // The HFS+ iterator.
137 const HFSPlusForkData fork_
; // The fork to be read.
138 uint8_t current_extent_
; // The current extent index in the fork.
139 bool read_current_extent_
; // Whether the current_extent_ has been read.
140 std::vector
<uint8_t> current_extent_data_
; // Data for |current_extent_|.
141 size_t fork_logical_offset_
; // The logical offset into the fork.
143 DISALLOW_COPY_AND_ASSIGN(HFSForkReadStream
);
146 // HFSBTreeIterator iterates over the HFS+ catalog file.
147 class HFSBTreeIterator
{
150 uint16_t record_type
; // Catalog folder item type.
151 base::string16 path
; // Full path to the item.
152 bool unexported
; // Whether this is HFS+ private data.
154 HFSPlusCatalogFile
* file
;
155 HFSPlusCatalogFolder
* folder
;
162 bool Init(ReadStream
* stream
);
167 const Entry
* current_record() const { return ¤t_record_
; }
170 // Seeks |stream_| to the catalog node ID.
171 bool SeekToNode(uint32_t node_id
);
173 // If required, reads the current leaf into |leaf_data_| and updates the
175 bool ReadCurrentLeaf();
177 // Returns a pointer to data at |current_leaf_offset_| in |leaf_data_|. This
178 // then advances the offset by the size of the object being returned.
179 template <typename T
> T
* GetLeafData();
181 // Checks if the HFS+ catalog key is a Mac OS X reserved key that should not
182 // have it or its contents iterated over.
183 bool IsKeyUnexported(const base::string16
& path
);
185 ReadStream
* stream_
; // The stream backing the catalog file.
186 BTHeaderRec header_
; // The header B-tree node.
188 // Maps CNIDs to their full path. This is used to construct full paths for
189 // items that descend from the folders in this map.
190 std::map
<uint32_t, base::string16
> folder_cnid_map_
;
192 // CNIDs of the non-exported folders reserved by OS X. If an item has this
193 // CNID as a parent, it should be skipped.
194 std::set
<uint32_t> unexported_parents_
;
196 // The total number of leaf records read from all the leaf nodes.
197 uint32_t leaf_records_read_
;
199 // The number of records read from the current leaf node.
200 uint32_t current_leaf_records_read_
;
201 uint32_t current_leaf_number_
; // The node ID of the leaf being read.
202 // Whether the |current_leaf_number_|'s data has been read into the
203 // |leaf_data_| buffer.
204 bool read_current_leaf_
;
205 // The node data for |current_leaf_number_| copied from |stream_|.
206 std::vector
<uint8_t> leaf_data_
;
207 size_t current_leaf_offset_
; // The offset in |leaf_data_|.
209 // Pointer to |leaf_data_| as a BTNodeDescriptor.
210 const BTNodeDescriptor
* current_leaf_
;
211 Entry current_record_
; // The record read at |current_leaf_offset_|.
213 // Constant, string16 versions of the __APPLE_API_PRIVATE values.
214 const base::string16 kHFSMetadataFolder
=
215 base::UTF8ToUTF16(base::StringPiece("\x0\x0\x0\x0HFS+ Private Data", 21));
216 const base::string16 kHFSDirMetadataFolder
=
217 base::UTF8ToUTF16(".HFS+ Private Directory Data\xd");
219 DISALLOW_COPY_AND_ASSIGN(HFSBTreeIterator
);
222 HFSIterator::HFSIterator(ReadStream
* stream
)
227 HFSIterator::~HFSIterator() {}
229 bool HFSIterator::Open() {
230 if (stream_
->Seek(1024, SEEK_SET
) != 1024)
233 if (!stream_
->ReadType(&volume_header_
)) {
234 DLOG(ERROR
) << "Failed to read volume header";
237 ConvertBigEndian(&volume_header_
);
239 if (volume_header_
.signature
!= kHFSPlusSigWord
&&
240 volume_header_
.signature
!= kHFSXSigWord
) {
241 DLOG(ERROR
) << "Unrecognized volume header signature "
242 << volume_header_
.signature
;
246 if (!ReadCatalogFile())
252 bool HFSIterator::Next() {
253 if (!catalog_
->HasNext())
256 // The iterator should only stop on file and folders, skipping over "thread
257 // records". In addition, unexported private files and directories should be
259 bool keep_going
= false;
261 keep_going
= catalog_
->Next();
263 if (!catalog_
->current_record()->unexported
&&
264 (catalog_
->current_record()->record_type
== kHFSPlusFolderRecord
||
265 catalog_
->current_record()->record_type
== kHFSPlusFileRecord
)) {
268 keep_going
= catalog_
->HasNext();
270 } while (keep_going
);
275 bool HFSIterator::IsDirectory() {
276 return catalog_
->current_record()->record_type
== kHFSPlusFolderRecord
;
279 bool HFSIterator::IsSymbolicLink() {
281 return S_ISLNK(catalog_
->current_record()->folder
->bsdInfo
.fileMode
);
283 return S_ISLNK(catalog_
->current_record()->file
->bsdInfo
.fileMode
);
286 bool HFSIterator::IsHardLink() {
289 const HFSPlusCatalogFile
* file
= catalog_
->current_record()->file
;
290 return file
->userInfo
.fdType
== kHardLinkFileType
&&
291 file
->userInfo
.fdCreator
== kHFSPlusCreator
;
294 bool HFSIterator::IsDecmpfsCompressed() {
297 const HFSPlusCatalogFile
* file
= catalog_
->current_record()->file
;
298 return file
->bsdInfo
.ownerFlags
& UF_COMPRESSED
;
301 base::string16
HFSIterator::GetPath() {
302 return catalog_
->current_record()->path
;
305 scoped_ptr
<ReadStream
> HFSIterator::GetReadStream() {
306 if (IsDirectory() || IsHardLink())
309 DCHECK_EQ(kHFSPlusFileRecord
, catalog_
->current_record()->record_type
);
310 return make_scoped_ptr(
311 new HFSForkReadStream(this, catalog_
->current_record()->file
->dataFork
));
314 bool HFSIterator::SeekToBlock(uint64_t block
) {
315 uint64_t offset
= block
* volume_header_
.blockSize
;
316 off_t rv
= stream_
->Seek(offset
, SEEK_SET
);
317 return rv
>= 0 && static_cast<uint64_t>(rv
) == offset
;
320 bool HFSIterator::ReadCatalogFile() {
321 catalog_file_
.reset(new HFSForkReadStream(this, volume_header_
.catalogFile
));
322 catalog_
.reset(new HFSBTreeIterator());
323 return catalog_
->Init(catalog_file_
.get());
326 HFSForkReadStream::HFSForkReadStream(HFSIterator
* hfs
,
327 const HFSPlusForkData
& fork
)
331 read_current_extent_(false),
332 current_extent_data_(),
333 fork_logical_offset_(0) {
336 HFSForkReadStream::~HFSForkReadStream() {}
338 bool HFSForkReadStream::Read(uint8_t* buffer
,
340 size_t* bytes_read
) {
341 size_t buffer_space_remaining
= buffer_size
;
344 if (fork_logical_offset_
== fork_
.logicalSize
)
347 for (; current_extent_
< arraysize(fork_
.extents
); ++current_extent_
) {
348 // If the buffer is out of space, do not attempt any reads. Check this
349 // here, so that current_extent_ is advanced by the loop if the last
350 // extent was fully read.
351 if (buffer_space_remaining
== 0)
354 const HFSPlusExtentDescriptor
* extent
= &fork_
.extents
[current_extent_
];
356 // A zero-length extent means end-of-fork.
357 if (extent
->startBlock
== 0 && extent
->blockCount
== 0)
361 base::CheckedNumeric
<size_t>(extent
->blockCount
) * hfs_
->block_size();
362 if (!extent_size
.IsValid()) {
363 DLOG(ERROR
) << "Extent blockCount overflows";
367 // Read the entire extent now, to avoid excessive seeking and re-reading.
368 if (!read_current_extent_
) {
369 hfs_
->SeekToBlock(extent
->startBlock
);
370 current_extent_data_
.resize(extent_size
.ValueOrDie());
371 if (!hfs_
->stream()->ReadExact(¤t_extent_data_
[0],
372 extent_size
.ValueOrDie())) {
373 DLOG(ERROR
) << "Failed to read extent " << current_extent_
;
377 read_current_extent_
= true;
380 size_t extent_offset
= fork_logical_offset_
% extent_size
.ValueOrDie();
381 size_t bytes_to_copy
=
382 std::min(std::min(static_cast<size_t>(fork_
.logicalSize
) -
383 fork_logical_offset_
,
384 extent_size
.ValueOrDie() - extent_offset
),
385 buffer_space_remaining
);
387 memcpy(&buffer
[buffer_size
- buffer_space_remaining
],
388 ¤t_extent_data_
[extent_offset
],
391 buffer_space_remaining
-= bytes_to_copy
;
392 *bytes_read
+= bytes_to_copy
;
393 fork_logical_offset_
+= bytes_to_copy
;
395 // If the fork's data have been read, then end the loop.
396 if (fork_logical_offset_
== fork_
.logicalSize
)
399 // If this extent still has data to be copied out, then the read was
400 // partial and the buffer is full. Do not advance to the next extent.
401 if (extent_offset
< current_extent_data_
.size())
404 // Advance to the next extent, so reset the state.
405 read_current_extent_
= false;
411 off_t
HFSForkReadStream::Seek(off_t offset
, int whence
) {
412 DCHECK_EQ(SEEK_SET
, whence
);
413 DCHECK_GE(offset
, 0);
414 DCHECK_LT(static_cast<uint64_t>(offset
), fork_
.logicalSize
);
415 size_t target_block
= offset
/ hfs_
->block_size();
416 size_t block_count
= 0;
417 for (size_t i
= 0; i
< arraysize(fork_
.extents
); ++i
) {
418 const HFSPlusExtentDescriptor
* extent
= &fork_
.extents
[i
];
420 // An empty extent indicates end-of-fork.
421 if (extent
->startBlock
== 0 && extent
->blockCount
== 0)
424 base::CheckedNumeric
<size_t> new_block_count(block_count
);
425 new_block_count
+= extent
->blockCount
;
426 if (!new_block_count
.IsValid()) {
427 DLOG(ERROR
) << "Seek offset block count overflows";
431 if (target_block
< new_block_count
.ValueOrDie()) {
432 if (current_extent_
!= i
) {
433 read_current_extent_
= false;
436 auto iterator_block_offset
=
437 base::CheckedNumeric
<size_t>(block_count
) * hfs_
->block_size();
438 if (!iterator_block_offset
.IsValid()) {
439 DLOG(ERROR
) << "Seek block offset overflows";
442 fork_logical_offset_
= offset
;
446 block_count
= new_block_count
.ValueOrDie();
451 HFSBTreeIterator::HFSBTreeIterator()
454 leaf_records_read_(0),
455 current_leaf_records_read_(0),
456 current_leaf_number_(0),
457 read_current_leaf_(false),
459 current_leaf_offset_(0),
463 HFSBTreeIterator::~HFSBTreeIterator() {}
465 bool HFSBTreeIterator::Init(ReadStream
* stream
) {
469 if (stream_
->Seek(0, SEEK_SET
) != 0) {
470 DLOG(ERROR
) << "Failed to seek to header node";
474 BTNodeDescriptor node
;
475 if (!stream_
->ReadType(&node
)) {
476 DLOG(ERROR
) << "Failed to read BTNodeDescriptor";
479 ConvertBigEndian(&node
);
481 if (node
.kind
!= kBTHeaderNode
) {
482 DLOG(ERROR
) << "Initial node is not a header node";
486 if (!stream_
->ReadType(&header_
)) {
487 DLOG(ERROR
) << "Failed to read BTHeaderRec";
490 ConvertBigEndian(&header_
);
492 current_leaf_number_
= header_
.firstLeafNode
;
493 leaf_data_
.resize(header_
.nodeSize
);
498 bool HFSBTreeIterator::HasNext() {
499 return leaf_records_read_
< header_
.leafRecords
;
502 bool HFSBTreeIterator::Next() {
503 if (!ReadCurrentLeaf())
506 GetLeafData
<uint16_t>(); // keyLength
507 auto parent_id
= OSSwapBigToHostInt32(*GetLeafData
<uint32_t>());
508 auto key_string_length
= OSSwapBigToHostInt16(*GetLeafData
<uint16_t>());
510 reinterpret_cast<uint16_t*>(&leaf_data_
[current_leaf_offset_
]);
512 i
< key_string_length
;
513 ++i
, current_leaf_offset_
+= sizeof(uint16_t)) {
514 key_string
[i
] = OSSwapBigToHostInt16(key_string
[i
]);
516 base::string16
key(key_string
, key_string_length
);
518 // Read the record type and then rewind as the field is part of the catalog
519 // structure that is read next.
520 current_record_
.record_type
= OSSwapBigToHostInt16(*GetLeafData
<int16_t>());
521 current_record_
.unexported
= false;
522 current_leaf_offset_
-= sizeof(int16_t);
523 switch (current_record_
.record_type
) {
524 case kHFSPlusFolderRecord
: {
525 auto folder
= GetLeafData
<HFSPlusCatalogFolder
>();
526 ConvertBigEndian(folder
);
527 ++leaf_records_read_
;
528 ++current_leaf_records_read_
;
530 // If this key is unexported, or the parent folder is, then mark the
532 if (IsKeyUnexported(key
) ||
533 unexported_parents_
.find(parent_id
) != unexported_parents_
.end()) {
534 unexported_parents_
.insert(folder
->folderID
);
535 current_record_
.unexported
= true;
538 // Update the CNID map to construct the path tree.
539 if (parent_id
!= 0) {
540 auto parent_name
= folder_cnid_map_
.find(parent_id
);
541 if (parent_name
!= folder_cnid_map_
.end())
542 key
= parent_name
->second
+ kFilePathSeparator
+ key
;
544 folder_cnid_map_
[folder
->folderID
] = key
;
546 current_record_
.path
= key
;
547 current_record_
.folder
= folder
;
550 case kHFSPlusFileRecord
: {
551 auto file
= GetLeafData
<HFSPlusCatalogFile
>();
552 ConvertBigEndian(file
);
553 ++leaf_records_read_
;
554 ++current_leaf_records_read_
;
556 base::string16 path
=
557 folder_cnid_map_
[parent_id
] + kFilePathSeparator
+ key
;
558 current_record_
.path
= path
;
559 current_record_
.file
= file
;
560 current_record_
.unexported
=
561 unexported_parents_
.find(parent_id
) != unexported_parents_
.end();
564 case kHFSPlusFolderThreadRecord
:
565 case kHFSPlusFileThreadRecord
: {
566 // Thread records are used to quickly locate a file or folder just by
567 // CNID. As these are not necessary for the iterator, skip past the data.
568 GetLeafData
<uint16_t>(); // recordType
569 GetLeafData
<uint16_t>(); // reserved
570 GetLeafData
<uint32_t>(); // parentID
571 auto string_length
= OSSwapBigToHostInt16(*GetLeafData
<uint16_t>());
572 for (uint16_t i
= 0; i
< string_length
; ++i
)
573 GetLeafData
<uint16_t>();
574 ++leaf_records_read_
;
575 ++current_leaf_records_read_
;
579 DLOG(ERROR
) << "Unknown record type " << current_record_
.record_type
;
583 // If all the records from this leaf have been read, follow the forward link
584 // to the next B-Tree leaf node.
585 if (current_leaf_records_read_
>= current_leaf_
->numRecords
) {
586 current_leaf_number_
= current_leaf_
->fLink
;
587 read_current_leaf_
= false;
593 bool HFSBTreeIterator::SeekToNode(uint32_t node_id
) {
594 if (node_id
>= header_
.totalNodes
)
596 size_t offset
= node_id
* header_
.nodeSize
;
597 if (stream_
->Seek(offset
, SEEK_SET
) != -1) {
598 current_leaf_number_
= node_id
;
604 bool HFSBTreeIterator::ReadCurrentLeaf() {
605 if (read_current_leaf_
)
608 if (!SeekToNode(current_leaf_number_
)) {
609 DLOG(ERROR
) << "Failed to seek to node " << current_leaf_number_
;
613 if (!stream_
->ReadExact(&leaf_data_
[0], header_
.nodeSize
)) {
614 DLOG(ERROR
) << "Failed to read node " << current_leaf_number_
;
618 auto leaf
= reinterpret_cast<BTNodeDescriptor
*>(&leaf_data_
[0]);
619 ConvertBigEndian(leaf
);
620 if (leaf
->kind
!= kBTLeafNode
) {
621 DLOG(ERROR
) << "Node " << current_leaf_number_
<< " is not a leaf";
624 current_leaf_
= leaf
;
625 current_leaf_offset_
= sizeof(BTNodeDescriptor
);
626 current_leaf_records_read_
= 0;
627 read_current_leaf_
= true;
631 template <typename T
>
632 T
* HFSBTreeIterator::GetLeafData() {
633 base::CheckedNumeric
<size_t> size
= sizeof(T
);
634 auto new_offset
= size
+ current_leaf_offset_
;
635 if (!new_offset
.IsValid() || new_offset
.ValueOrDie() >= leaf_data_
.size())
637 T
* object
= reinterpret_cast<T
*>(&leaf_data_
[current_leaf_offset_
]);
638 current_leaf_offset_
= new_offset
.ValueOrDie();
642 bool HFSBTreeIterator::IsKeyUnexported(const base::string16
& key
) {
643 return key
== kHFSDirMetadataFolder
||
644 key
== kHFSMetadataFolder
;
648 } // namespace safe_browsing