Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / chrome / utility / safe_browsing / mac / hfs.cc
blob185d11bb43cf4cf11ce9e85d77a38c466030e0c3
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/utility/safe_browsing/mac/hfs.h"
7 #include <libkern/OSByteOrder.h>
8 #include <sys/stat.h>
10 #include <map>
11 #include <set>
12 #include <vector>
14 #include "base/logging.h"
15 #include "base/numerics/safe_math.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "chrome/utility/safe_browsing/mac/convert_big_endian.h"
18 #include "chrome/utility/safe_browsing/mac/read_stream.h"
20 namespace safe_browsing {
21 namespace dmg {
23 // UTF-16 character for file path seprator.
24 static const uint16_t kFilePathSeparator = '/';
26 static void ConvertBigEndian(HFSPlusForkData* fork) {
27 ConvertBigEndian(&fork->logicalSize);
28 ConvertBigEndian(&fork->clumpSize);
29 ConvertBigEndian(&fork->totalBlocks);
30 for (size_t i = 0; i < arraysize(fork->extents); ++i) {
31 ConvertBigEndian(&fork->extents[i].startBlock);
32 ConvertBigEndian(&fork->extents[i].blockCount);
36 static void ConvertBigEndian(HFSPlusVolumeHeader* header) {
37 ConvertBigEndian(&header->signature);
38 ConvertBigEndian(&header->version);
39 ConvertBigEndian(&header->attributes);
40 ConvertBigEndian(&header->lastMountedVersion);
41 ConvertBigEndian(&header->journalInfoBlock);
42 ConvertBigEndian(&header->createDate);
43 ConvertBigEndian(&header->modifyDate);
44 ConvertBigEndian(&header->backupDate);
45 ConvertBigEndian(&header->checkedDate);
46 ConvertBigEndian(&header->fileCount);
47 ConvertBigEndian(&header->folderCount);
48 ConvertBigEndian(&header->blockSize);
49 ConvertBigEndian(&header->totalBlocks);
50 ConvertBigEndian(&header->freeBlocks);
51 ConvertBigEndian(&header->nextAllocation);
52 ConvertBigEndian(&header->rsrcClumpSize);
53 ConvertBigEndian(&header->dataClumpSize);
54 ConvertBigEndian(&header->nextCatalogID);
55 ConvertBigEndian(&header->writeCount);
56 ConvertBigEndian(&header->encodingsBitmap);
57 ConvertBigEndian(&header->allocationFile);
58 ConvertBigEndian(&header->extentsFile);
59 ConvertBigEndian(&header->catalogFile);
60 ConvertBigEndian(&header->attributesFile);
61 ConvertBigEndian(&header->startupFile);
64 static void ConvertBigEndian(BTHeaderRec* header) {
65 ConvertBigEndian(&header->treeDepth);
66 ConvertBigEndian(&header->rootNode);
67 ConvertBigEndian(&header->leafRecords);
68 ConvertBigEndian(&header->firstLeafNode);
69 ConvertBigEndian(&header->lastLeafNode);
70 ConvertBigEndian(&header->nodeSize);
71 ConvertBigEndian(&header->maxKeyLength);
72 ConvertBigEndian(&header->totalNodes);
73 ConvertBigEndian(&header->freeNodes);
74 ConvertBigEndian(&header->reserved1);
75 ConvertBigEndian(&header->clumpSize);
76 ConvertBigEndian(&header->attributes);
79 static void ConvertBigEndian(BTNodeDescriptor* node) {
80 ConvertBigEndian(&node->fLink);
81 ConvertBigEndian(&node->bLink);
82 ConvertBigEndian(&node->numRecords);
85 static void ConvertBigEndian(HFSPlusCatalogFolder* folder) {
86 ConvertBigEndian(&folder->recordType);
87 ConvertBigEndian(&folder->flags);
88 ConvertBigEndian(&folder->valence);
89 ConvertBigEndian(&folder->folderID);
90 ConvertBigEndian(&folder->createDate);
91 ConvertBigEndian(&folder->contentModDate);
92 ConvertBigEndian(&folder->attributeModDate);
93 ConvertBigEndian(&folder->accessDate);
94 ConvertBigEndian(&folder->backupDate);
95 ConvertBigEndian(&folder->bsdInfo.ownerID);
96 ConvertBigEndian(&folder->bsdInfo.groupID);
97 ConvertBigEndian(&folder->bsdInfo.fileMode);
98 ConvertBigEndian(&folder->textEncoding);
99 ConvertBigEndian(&folder->folderCount);
102 static void ConvertBigEndian(HFSPlusCatalogFile* file) {
103 ConvertBigEndian(&file->recordType);
104 ConvertBigEndian(&file->flags);
105 ConvertBigEndian(&file->reserved1);
106 ConvertBigEndian(&file->fileID);
107 ConvertBigEndian(&file->createDate);
108 ConvertBigEndian(&file->contentModDate);
109 ConvertBigEndian(&file->attributeModDate);
110 ConvertBigEndian(&file->accessDate);
111 ConvertBigEndian(&file->backupDate);
112 ConvertBigEndian(&file->bsdInfo.ownerID);
113 ConvertBigEndian(&file->bsdInfo.groupID);
114 ConvertBigEndian(&file->bsdInfo.fileMode);
115 ConvertBigEndian(&file->userInfo.fdType);
116 ConvertBigEndian(&file->userInfo.fdCreator);
117 ConvertBigEndian(&file->userInfo.fdFlags);
118 ConvertBigEndian(&file->textEncoding);
119 ConvertBigEndian(&file->reserved2);
120 ConvertBigEndian(&file->dataFork);
121 ConvertBigEndian(&file->resourceFork);
124 // A ReadStream implementation for an HFS+ fork. This only consults the eight
125 // fork extents. This does not consult the extent overflow file.
126 class HFSForkReadStream : public ReadStream {
127 public:
128 HFSForkReadStream(HFSIterator* hfs, const HFSPlusForkData& fork);
129 ~HFSForkReadStream() override;
131 bool Read(uint8_t* buffer, size_t buffer_size, size_t* bytes_read) override;
132 // Seek only supports SEEK_SET.
133 off_t Seek(off_t offset, int whence) override;
135 private:
136 HFSIterator* const hfs_; // The HFS+ iterator.
137 const HFSPlusForkData fork_; // The fork to be read.
138 uint8_t current_extent_; // The current extent index in the fork.
139 bool read_current_extent_; // Whether the current_extent_ has been read.
140 std::vector<uint8_t> current_extent_data_; // Data for |current_extent_|.
141 size_t fork_logical_offset_; // The logical offset into the fork.
143 DISALLOW_COPY_AND_ASSIGN(HFSForkReadStream);
146 // HFSBTreeIterator iterates over the HFS+ catalog file.
147 class HFSBTreeIterator {
148 public:
149 struct Entry {
150 uint16_t record_type; // Catalog folder item type.
151 base::string16 path; // Full path to the item.
152 bool unexported; // Whether this is HFS+ private data.
153 union {
154 HFSPlusCatalogFile* file;
155 HFSPlusCatalogFolder* folder;
159 HFSBTreeIterator();
160 ~HFSBTreeIterator();
162 bool Init(ReadStream* stream);
164 bool HasNext();
165 bool Next();
167 const Entry* current_record() const { return &current_record_; }
169 private:
170 // Seeks |stream_| to the catalog node ID.
171 bool SeekToNode(uint32_t node_id);
173 // If required, reads the current leaf into |leaf_data_| and updates the
174 // buffer offsets.
175 bool ReadCurrentLeaf();
177 // Returns a pointer to data at |current_leaf_offset_| in |leaf_data_|. This
178 // then advances the offset by the size of the object being returned.
179 template <typename T> T* GetLeafData();
181 // Checks if the HFS+ catalog key is a Mac OS X reserved key that should not
182 // have it or its contents iterated over.
183 bool IsKeyUnexported(const base::string16& path);
185 ReadStream* stream_; // The stream backing the catalog file.
186 BTHeaderRec header_; // The header B-tree node.
188 // Maps CNIDs to their full path. This is used to construct full paths for
189 // items that descend from the folders in this map.
190 std::map<uint32_t, base::string16> folder_cnid_map_;
192 // CNIDs of the non-exported folders reserved by OS X. If an item has this
193 // CNID as a parent, it should be skipped.
194 std::set<uint32_t> unexported_parents_;
196 // The total number of leaf records read from all the leaf nodes.
197 uint32_t leaf_records_read_;
199 // The number of records read from the current leaf node.
200 uint32_t current_leaf_records_read_;
201 uint32_t current_leaf_number_; // The node ID of the leaf being read.
202 // Whether the |current_leaf_number_|'s data has been read into the
203 // |leaf_data_| buffer.
204 bool read_current_leaf_;
205 // The node data for |current_leaf_number_| copied from |stream_|.
206 std::vector<uint8_t> leaf_data_;
207 size_t current_leaf_offset_; // The offset in |leaf_data_|.
209 // Pointer to |leaf_data_| as a BTNodeDescriptor.
210 const BTNodeDescriptor* current_leaf_;
211 Entry current_record_; // The record read at |current_leaf_offset_|.
213 // Constant, string16 versions of the __APPLE_API_PRIVATE values.
214 const base::string16 kHFSMetadataFolder =
215 base::UTF8ToUTF16(base::StringPiece("\x0\x0\x0\x0HFS+ Private Data", 21));
216 const base::string16 kHFSDirMetadataFolder =
217 base::UTF8ToUTF16(".HFS+ Private Directory Data\xd");
219 DISALLOW_COPY_AND_ASSIGN(HFSBTreeIterator);
222 HFSIterator::HFSIterator(ReadStream* stream)
223 : stream_(stream),
224 volume_header_() {
227 HFSIterator::~HFSIterator() {}
229 bool HFSIterator::Open() {
230 if (stream_->Seek(1024, SEEK_SET) != 1024)
231 return false;
233 if (!stream_->ReadType(&volume_header_)) {
234 DLOG(ERROR) << "Failed to read volume header";
235 return false;
237 ConvertBigEndian(&volume_header_);
239 if (volume_header_.signature != kHFSPlusSigWord &&
240 volume_header_.signature != kHFSXSigWord) {
241 DLOG(ERROR) << "Unrecognized volume header signature "
242 << volume_header_.signature;
243 return false;
246 if (!ReadCatalogFile())
247 return false;
249 return true;
252 bool HFSIterator::Next() {
253 if (!catalog_->HasNext())
254 return false;
256 // The iterator should only stop on file and folders, skipping over "thread
257 // records". In addition, unexported private files and directories should be
258 // skipped as well.
259 bool keep_going = false;
260 do {
261 keep_going = catalog_->Next();
262 if (keep_going) {
263 if (!catalog_->current_record()->unexported &&
264 (catalog_->current_record()->record_type == kHFSPlusFolderRecord ||
265 catalog_->current_record()->record_type == kHFSPlusFileRecord)) {
266 return true;
268 keep_going = catalog_->HasNext();
270 } while (keep_going);
272 return keep_going;
275 bool HFSIterator::IsDirectory() {
276 return catalog_->current_record()->record_type == kHFSPlusFolderRecord;
279 bool HFSIterator::IsSymbolicLink() {
280 if (IsDirectory())
281 return S_ISLNK(catalog_->current_record()->folder->bsdInfo.fileMode);
282 else
283 return S_ISLNK(catalog_->current_record()->file->bsdInfo.fileMode);
286 bool HFSIterator::IsHardLink() {
287 if (IsDirectory())
288 return false;
289 const HFSPlusCatalogFile* file = catalog_->current_record()->file;
290 return file->userInfo.fdType == kHardLinkFileType &&
291 file->userInfo.fdCreator == kHFSPlusCreator;
294 bool HFSIterator::IsDecmpfsCompressed() {
295 if (IsDirectory())
296 return false;
297 const HFSPlusCatalogFile* file = catalog_->current_record()->file;
298 return file->bsdInfo.ownerFlags & UF_COMPRESSED;
301 base::string16 HFSIterator::GetPath() {
302 return catalog_->current_record()->path;
305 scoped_ptr<ReadStream> HFSIterator::GetReadStream() {
306 if (IsDirectory() || IsHardLink())
307 return nullptr;
309 DCHECK_EQ(kHFSPlusFileRecord, catalog_->current_record()->record_type);
310 return make_scoped_ptr(
311 new HFSForkReadStream(this, catalog_->current_record()->file->dataFork));
314 bool HFSIterator::SeekToBlock(uint64_t block) {
315 uint64_t offset = block * volume_header_.blockSize;
316 off_t rv = stream_->Seek(offset, SEEK_SET);
317 return rv >= 0 && static_cast<uint64_t>(rv) == offset;
320 bool HFSIterator::ReadCatalogFile() {
321 catalog_file_.reset(new HFSForkReadStream(this, volume_header_.catalogFile));
322 catalog_.reset(new HFSBTreeIterator());
323 return catalog_->Init(catalog_file_.get());
326 HFSForkReadStream::HFSForkReadStream(HFSIterator* hfs,
327 const HFSPlusForkData& fork)
328 : hfs_(hfs),
329 fork_(fork),
330 current_extent_(0),
331 read_current_extent_(false),
332 current_extent_data_(),
333 fork_logical_offset_(0) {
336 HFSForkReadStream::~HFSForkReadStream() {}
338 bool HFSForkReadStream::Read(uint8_t* buffer,
339 size_t buffer_size,
340 size_t* bytes_read) {
341 size_t buffer_space_remaining = buffer_size;
342 *bytes_read = 0;
344 if (fork_logical_offset_ == fork_.logicalSize)
345 return true;
347 for (; current_extent_ < arraysize(fork_.extents); ++current_extent_) {
348 // If the buffer is out of space, do not attempt any reads. Check this
349 // here, so that current_extent_ is advanced by the loop if the last
350 // extent was fully read.
351 if (buffer_space_remaining == 0)
352 break;
354 const HFSPlusExtentDescriptor* extent = &fork_.extents[current_extent_];
356 // A zero-length extent means end-of-fork.
357 if (extent->startBlock == 0 && extent->blockCount == 0)
358 break;
360 auto extent_size =
361 base::CheckedNumeric<size_t>(extent->blockCount) * hfs_->block_size();
362 if (!extent_size.IsValid()) {
363 DLOG(ERROR) << "Extent blockCount overflows";
364 return false;
367 // Read the entire extent now, to avoid excessive seeking and re-reading.
368 if (!read_current_extent_) {
369 hfs_->SeekToBlock(extent->startBlock);
370 current_extent_data_.resize(extent_size.ValueOrDie());
371 if (!hfs_->stream()->ReadExact(&current_extent_data_[0],
372 extent_size.ValueOrDie())) {
373 DLOG(ERROR) << "Failed to read extent " << current_extent_;
374 return false;
377 read_current_extent_ = true;
380 size_t extent_offset = fork_logical_offset_ % extent_size.ValueOrDie();
381 size_t bytes_to_copy =
382 std::min(std::min(static_cast<size_t>(fork_.logicalSize) -
383 fork_logical_offset_,
384 extent_size.ValueOrDie() - extent_offset),
385 buffer_space_remaining);
387 memcpy(&buffer[buffer_size - buffer_space_remaining],
388 &current_extent_data_[extent_offset],
389 bytes_to_copy);
391 buffer_space_remaining -= bytes_to_copy;
392 *bytes_read += bytes_to_copy;
393 fork_logical_offset_ += bytes_to_copy;
395 // If the fork's data have been read, then end the loop.
396 if (fork_logical_offset_ == fork_.logicalSize)
397 return true;
399 // If this extent still has data to be copied out, then the read was
400 // partial and the buffer is full. Do not advance to the next extent.
401 if (extent_offset < current_extent_data_.size())
402 break;
404 // Advance to the next extent, so reset the state.
405 read_current_extent_ = false;
408 return true;
411 off_t HFSForkReadStream::Seek(off_t offset, int whence) {
412 DCHECK_EQ(SEEK_SET, whence);
413 DCHECK_GE(offset, 0);
414 DCHECK(offset == 0 || static_cast<uint64_t>(offset) < fork_.logicalSize);
415 size_t target_block = offset / hfs_->block_size();
416 size_t block_count = 0;
417 for (size_t i = 0; i < arraysize(fork_.extents); ++i) {
418 const HFSPlusExtentDescriptor* extent = &fork_.extents[i];
420 // An empty extent indicates end-of-fork.
421 if (extent->startBlock == 0 && extent->blockCount == 0)
422 break;
424 base::CheckedNumeric<size_t> new_block_count(block_count);
425 new_block_count += extent->blockCount;
426 if (!new_block_count.IsValid()) {
427 DLOG(ERROR) << "Seek offset block count overflows";
428 return false;
431 if (target_block < new_block_count.ValueOrDie()) {
432 if (current_extent_ != i) {
433 read_current_extent_ = false;
434 current_extent_ = i;
436 auto iterator_block_offset =
437 base::CheckedNumeric<size_t>(block_count) * hfs_->block_size();
438 if (!iterator_block_offset.IsValid()) {
439 DLOG(ERROR) << "Seek block offset overflows";
440 return false;
442 fork_logical_offset_ = offset;
443 return offset;
446 block_count = new_block_count.ValueOrDie();
448 return -1;
451 HFSBTreeIterator::HFSBTreeIterator()
452 : stream_(),
453 header_(),
454 leaf_records_read_(0),
455 current_leaf_records_read_(0),
456 current_leaf_number_(0),
457 read_current_leaf_(false),
458 leaf_data_(),
459 current_leaf_offset_(0),
460 current_leaf_() {
463 HFSBTreeIterator::~HFSBTreeIterator() {}
465 bool HFSBTreeIterator::Init(ReadStream* stream) {
466 DCHECK(!stream_);
467 stream_ = stream;
469 if (stream_->Seek(0, SEEK_SET) != 0) {
470 DLOG(ERROR) << "Failed to seek to header node";
471 return false;
474 BTNodeDescriptor node;
475 if (!stream_->ReadType(&node)) {
476 DLOG(ERROR) << "Failed to read BTNodeDescriptor";
477 return false;
479 ConvertBigEndian(&node);
481 if (node.kind != kBTHeaderNode) {
482 DLOG(ERROR) << "Initial node is not a header node";
483 return false;
486 if (!stream_->ReadType(&header_)) {
487 DLOG(ERROR) << "Failed to read BTHeaderRec";
488 return false;
490 ConvertBigEndian(&header_);
492 current_leaf_number_ = header_.firstLeafNode;
493 leaf_data_.resize(header_.nodeSize);
495 return true;
498 bool HFSBTreeIterator::HasNext() {
499 return leaf_records_read_ < header_.leafRecords;
502 bool HFSBTreeIterator::Next() {
503 if (!ReadCurrentLeaf())
504 return false;
506 GetLeafData<uint16_t>(); // keyLength
507 auto parent_id = OSSwapBigToHostInt32(*GetLeafData<uint32_t>());
508 auto key_string_length = OSSwapBigToHostInt16(*GetLeafData<uint16_t>());
509 auto key_string =
510 reinterpret_cast<uint16_t*>(&leaf_data_[current_leaf_offset_]);
511 for (uint16_t i = 0;
512 i < key_string_length;
513 ++i, current_leaf_offset_ += sizeof(uint16_t)) {
514 key_string[i] = OSSwapBigToHostInt16(key_string[i]);
516 base::string16 key(key_string, key_string_length);
518 // Read the record type and then rewind as the field is part of the catalog
519 // structure that is read next.
520 current_record_.record_type = OSSwapBigToHostInt16(*GetLeafData<int16_t>());
521 current_record_.unexported = false;
522 current_leaf_offset_ -= sizeof(int16_t);
523 switch (current_record_.record_type) {
524 case kHFSPlusFolderRecord: {
525 auto folder = GetLeafData<HFSPlusCatalogFolder>();
526 ConvertBigEndian(folder);
527 ++leaf_records_read_;
528 ++current_leaf_records_read_;
530 // If this key is unexported, or the parent folder is, then mark the
531 // record as such.
532 if (IsKeyUnexported(key) ||
533 unexported_parents_.find(parent_id) != unexported_parents_.end()) {
534 unexported_parents_.insert(folder->folderID);
535 current_record_.unexported = true;
538 // Update the CNID map to construct the path tree.
539 if (parent_id != 0) {
540 auto parent_name = folder_cnid_map_.find(parent_id);
541 if (parent_name != folder_cnid_map_.end())
542 key = parent_name->second + kFilePathSeparator + key;
544 folder_cnid_map_[folder->folderID] = key;
546 current_record_.path = key;
547 current_record_.folder = folder;
548 break;
550 case kHFSPlusFileRecord: {
551 auto file = GetLeafData<HFSPlusCatalogFile>();
552 ConvertBigEndian(file);
553 ++leaf_records_read_;
554 ++current_leaf_records_read_;
556 base::string16 path =
557 folder_cnid_map_[parent_id] + kFilePathSeparator + key;
558 current_record_.path = path;
559 current_record_.file = file;
560 current_record_.unexported =
561 unexported_parents_.find(parent_id) != unexported_parents_.end();
562 break;
564 case kHFSPlusFolderThreadRecord:
565 case kHFSPlusFileThreadRecord: {
566 // Thread records are used to quickly locate a file or folder just by
567 // CNID. As these are not necessary for the iterator, skip past the data.
568 GetLeafData<uint16_t>(); // recordType
569 GetLeafData<uint16_t>(); // reserved
570 GetLeafData<uint32_t>(); // parentID
571 auto string_length = OSSwapBigToHostInt16(*GetLeafData<uint16_t>());
572 for (uint16_t i = 0; i < string_length; ++i)
573 GetLeafData<uint16_t>();
574 ++leaf_records_read_;
575 ++current_leaf_records_read_;
576 break;
578 default:
579 DLOG(ERROR) << "Unknown record type " << current_record_.record_type;
580 return false;
583 // If all the records from this leaf have been read, follow the forward link
584 // to the next B-Tree leaf node.
585 if (current_leaf_records_read_ >= current_leaf_->numRecords) {
586 current_leaf_number_ = current_leaf_->fLink;
587 read_current_leaf_ = false;
590 return true;
593 bool HFSBTreeIterator::SeekToNode(uint32_t node_id) {
594 if (node_id >= header_.totalNodes)
595 return false;
596 size_t offset = node_id * header_.nodeSize;
597 if (stream_->Seek(offset, SEEK_SET) != -1) {
598 current_leaf_number_ = node_id;
599 return true;
601 return false;
604 bool HFSBTreeIterator::ReadCurrentLeaf() {
605 if (read_current_leaf_)
606 return true;
608 if (!SeekToNode(current_leaf_number_)) {
609 DLOG(ERROR) << "Failed to seek to node " << current_leaf_number_;
610 return false;
613 if (!stream_->ReadExact(&leaf_data_[0], header_.nodeSize)) {
614 DLOG(ERROR) << "Failed to read node " << current_leaf_number_;
615 return false;
618 auto leaf = reinterpret_cast<BTNodeDescriptor*>(&leaf_data_[0]);
619 ConvertBigEndian(leaf);
620 if (leaf->kind != kBTLeafNode) {
621 DLOG(ERROR) << "Node " << current_leaf_number_ << " is not a leaf";
622 return false;
624 current_leaf_ = leaf;
625 current_leaf_offset_ = sizeof(BTNodeDescriptor);
626 current_leaf_records_read_ = 0;
627 read_current_leaf_ = true;
628 return true;
631 template <typename T>
632 T* HFSBTreeIterator::GetLeafData() {
633 base::CheckedNumeric<size_t> size = sizeof(T);
634 auto new_offset = size + current_leaf_offset_;
635 if (!new_offset.IsValid() || new_offset.ValueOrDie() >= leaf_data_.size())
636 return nullptr;
637 T* object = reinterpret_cast<T*>(&leaf_data_[current_leaf_offset_]);
638 current_leaf_offset_ = new_offset.ValueOrDie();
639 return object;
642 bool HFSBTreeIterator::IsKeyUnexported(const base::string16& key) {
643 return key == kHFSDirMetadataFolder ||
644 key == kHFSMetadataFolder;
647 } // namespace dmg
648 } // namespace safe_browsing