ui/base/resource/data_pack.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "ui/base/resource/data_pack.h"
   6
   7 #include <errno.h>
   8
   9 #include "base/files/file_util.h"
  10 #include "base/files/memory_mapped_file.h"
  11 #include "base/logging.h"
  12 #include "base/memory/ref_counted_memory.h"
  13 #include "base/metrics/histogram.h"
  14 #include "base/strings/string_piece.h"
  15
  16 // For details of the file layout, see
  17 // http://dev.chromium.org/developers/design-documents/linuxresourcesandlocalizedstrings
  18
  19 namespace {
  20
  21 static const uint32 kFileFormatVersion = 4;
  22 // Length of file header: version, entry count and text encoding type.
  23 static const size_t kHeaderLength = 2 * sizeof(uint32) + sizeof(uint8);
  24
  25 #pragma pack(push,2)
  26 struct DataPackEntry {
  27   uint16 resource_id;
  28   uint32 file_offset;
  29
  30   static int CompareById(const void* void_key, const void* void_entry) {
  31     uint16 key = *reinterpret_cast<const uint16*>(void_key);
  32     const DataPackEntry* entry =
  33         reinterpret_cast<const DataPackEntry*>(void_entry);
  34     if (key < entry->resource_id) {
  35       return -1;
  36     } else if (key > entry->resource_id) {
  37       return 1;
  38     } else {
  39       return 0;
  40     }
  41   }
  42 };
  43 #pragma pack(pop)
  44
  45 static_assert(sizeof(DataPackEntry) == 6, "size of entry must be six");
  46
  47 // We're crashing when trying to load a pak file on Windows.  Add some error
  48 // codes for logging.
  49 // http://crbug.com/58056
  50 enum LoadErrors {
  51   INIT_FAILED = 1,
  52   BAD_VERSION,
  53   INDEX_TRUNCATED,
  54   ENTRY_NOT_FOUND,
  55   HEADER_TRUNCATED,
  56   WRONG_ENCODING,
  57   INIT_FAILED_FROM_FILE,
  58
  59   LOAD_ERRORS_COUNT,
  60 };
  61
  62 }  // namespace
  63
  64 namespace ui {
  65
  66 DataPack::DataPack(ui::ScaleFactor scale_factor)
  67     : resource_count_(0),
  68       text_encoding_type_(BINARY),
  69       scale_factor_(scale_factor) {
  70 }
  71
  72 DataPack::~DataPack() {
  73 }
  74
  75 bool DataPack::LoadFromPath(const base::FilePath& path) {
  76   mmap_.reset(new base::MemoryMappedFile);
  77   if (!mmap_->Initialize(path)) {
  78     DLOG(ERROR) << "Failed to mmap datapack";
  79     UMA_HISTOGRAM_ENUMERATION("DataPack.Load", INIT_FAILED,
  80                               LOAD_ERRORS_COUNT);
  81     mmap_.reset();
  82     return false;
  83   }
  84   return LoadImpl();
  85 }
  86
  87 bool DataPack::LoadFromFile(base::File file) {
  88   return LoadFromFileRegion(file.Pass(),
  89                             base::MemoryMappedFile::Region::kWholeFile);
  90 }
  91
  92 bool DataPack::LoadFromFileRegion(
  93     base::File file,
  94     const base::MemoryMappedFile::Region& region) {
  95   mmap_.reset(new base::MemoryMappedFile);
  96   if (!mmap_->Initialize(file.Pass(), region)) {
  97     DLOG(ERROR) << "Failed to mmap datapack";
  98     UMA_HISTOGRAM_ENUMERATION("DataPack.Load", INIT_FAILED_FROM_FILE,
  99                               LOAD_ERRORS_COUNT);
 100     mmap_.reset();
 101     return false;
 102   }
 103   return LoadImpl();
 104 }
 105
 106 bool DataPack::LoadImpl() {
 107   // Sanity check the header of the file.
 108   if (kHeaderLength > mmap_->length()) {
 109     DLOG(ERROR) << "Data pack file corruption: incomplete file header.";
 110     UMA_HISTOGRAM_ENUMERATION("DataPack.Load", HEADER_TRUNCATED,
 111                               LOAD_ERRORS_COUNT);
 112     mmap_.reset();
 113     return false;
 114   }
 115
 116   // Parse the header of the file.
 117   // First uint32: version; second: resource count;
 118   const uint32* ptr = reinterpret_cast<const uint32*>(mmap_->data());
 119   uint32 version = ptr[0];
 120   if (version != kFileFormatVersion) {
 121     LOG(ERROR) << "Bad data pack version: got " << version << ", expected "
 122                << kFileFormatVersion;
 123     UMA_HISTOGRAM_ENUMERATION("DataPack.Load", BAD_VERSION,
 124                               LOAD_ERRORS_COUNT);
 125     mmap_.reset();
 126     return false;
 127   }
 128   resource_count_ = ptr[1];
 129
 130   // third: text encoding.
 131   const uint8* ptr_encoding = reinterpret_cast<const uint8*>(ptr + 2);
 132   text_encoding_type_ = static_cast<TextEncodingType>(*ptr_encoding);
 133   if (text_encoding_type_ != UTF8 && text_encoding_type_ != UTF16 &&
 134       text_encoding_type_ != BINARY) {
 135     LOG(ERROR) << "Bad data pack text encoding: got " << text_encoding_type_
 136                << ", expected between " << BINARY << " and " << UTF16;
 137     UMA_HISTOGRAM_ENUMERATION("DataPack.Load", WRONG_ENCODING,
 138                               LOAD_ERRORS_COUNT);
 139     mmap_.reset();
 140     return false;
 141   }
 142
 143   // Sanity check the file.
 144   // 1) Check we have enough entries. There's an extra entry after the last item
 145   // which gives the length of the last item.
 146   if (kHeaderLength + (resource_count_ + 1) * sizeof(DataPackEntry) >
 147       mmap_->length()) {
 148     LOG(ERROR) << "Data pack file corruption: too short for number of "
 149                   "entries specified.";
 150     UMA_HISTOGRAM_ENUMERATION("DataPack.Load", INDEX_TRUNCATED,
 151                               LOAD_ERRORS_COUNT);
 152     mmap_.reset();
 153     return false;
 154   }
 155   // 2) Verify the entries are within the appropriate bounds. There's an extra
 156   // entry after the last item which gives us the length of the last item.
 157   for (size_t i = 0; i < resource_count_ + 1; ++i) {
 158     const DataPackEntry* entry = reinterpret_cast<const DataPackEntry*>(
 159         mmap_->data() + kHeaderLength + (i * sizeof(DataPackEntry)));
 160     if (entry->file_offset > mmap_->length()) {
 161       LOG(ERROR) << "Entry #" << i << " in data pack points off end of file. "
 162                  << "Was the file corrupted?";
 163       UMA_HISTOGRAM_ENUMERATION("DataPack.Load", ENTRY_NOT_FOUND,
 164                                 LOAD_ERRORS_COUNT);
 165       mmap_.reset();
 166       return false;
 167     }
 168   }
 169
 170   return true;
 171 }
 172
 173 bool DataPack::HasResource(uint16 resource_id) const {
 174   return !!bsearch(&resource_id, mmap_->data() + kHeaderLength, resource_count_,
 175                    sizeof(DataPackEntry), DataPackEntry::CompareById);
 176 }
 177
 178 bool DataPack::GetStringPiece(uint16 resource_id,
 179                               base::StringPiece* data) const {
 180   // It won't be hard to make this endian-agnostic, but it's not worth
 181   // bothering to do right now.
 182 #if defined(__BYTE_ORDER)
 183   // Linux check
 184   static_assert(__BYTE_ORDER == __LITTLE_ENDIAN,
 185                 "datapack assumes little endian");
 186 #elif defined(__BIG_ENDIAN__)
 187   // Mac check
 188   #error DataPack assumes little endian
 189 #endif
 190
 191   const DataPackEntry* target = reinterpret_cast<const DataPackEntry*>(
 192       bsearch(&resource_id, mmap_->data() + kHeaderLength, resource_count_,
 193               sizeof(DataPackEntry), DataPackEntry::CompareById));
 194   if (!target) {
 195     return false;
 196   }
 197
 198   const DataPackEntry* next_entry = target + 1;
 199   // If the next entry points beyond the end of the file this data pack's entry
 200   // table is corrupt. Log an error and return false. See
 201   // http://crbug.com/371301.
 202   if (next_entry->file_offset > mmap_->length()) {
 203     size_t entry_index = target -
 204         reinterpret_cast<const DataPackEntry*>(mmap_->data() + kHeaderLength);
 205     LOG(ERROR) << "Entry #" << entry_index << " in data pack points off end "
 206                << "of file. This should have been caught when loading. Was the "
 207                << "file modified?";
 208     return false;
 209   }
 210
 211   size_t length = next_entry->file_offset - target->file_offset;
 212   data->set(reinterpret_cast<const char*>(mmap_->data() + target->file_offset),
 213             length);
 214   return true;
 215 }
 216
 217 base::RefCountedStaticMemory* DataPack::GetStaticMemory(
 218     uint16 resource_id) const {
 219   base::StringPiece piece;
 220   if (!GetStringPiece(resource_id, &piece))
 221     return NULL;
 222
 223   return new base::RefCountedStaticMemory(piece.data(), piece.length());
 224 }
 225
 226 ResourceHandle::TextEncodingType DataPack::GetTextEncodingType() const {
 227   return text_encoding_type_;
 228 }
 229
 230 ui::ScaleFactor DataPack::GetScaleFactor() const {
 231   return scale_factor_;
 232 }
 233
 234 // static
 235 bool DataPack::WritePack(const base::FilePath& path,
 236                          const std::map<uint16, base::StringPiece>& resources,
 237                          TextEncodingType textEncodingType) {
 238   FILE* file = base::OpenFile(path, "wb");
 239   if (!file)
 240     return false;
 241
 242   if (fwrite(&kFileFormatVersion, sizeof(kFileFormatVersion), 1, file) != 1) {
 243     LOG(ERROR) << "Failed to write file version";
 244     base::CloseFile(file);
 245     return false;
 246   }
 247
 248   // Note: the python version of this function explicitly sorted keys, but
 249   // std::map is a sorted associative container, we shouldn't have to do that.
 250   uint32 entry_count = resources.size();
 251   if (fwrite(&entry_count, sizeof(entry_count), 1, file) != 1) {
 252     LOG(ERROR) << "Failed to write entry count";
 253     base::CloseFile(file);
 254     return false;
 255   }
 256
 257   if (textEncodingType != UTF8 && textEncodingType != UTF16 &&
 258       textEncodingType != BINARY) {
 259     LOG(ERROR) << "Invalid text encoding type, got " << textEncodingType
 260                << ", expected between " << BINARY << " and " << UTF16;
 261     base::CloseFile(file);
 262     return false;
 263   }
 264
 265   uint8 write_buffer = static_cast<uint8>(textEncodingType);
 266   if (fwrite(&write_buffer, sizeof(uint8), 1, file) != 1) {
 267     LOG(ERROR) << "Failed to write file text resources encoding";
 268     base::CloseFile(file);
 269     return false;
 270   }
 271
 272   // Each entry is a uint16 + a uint32. We have an extra entry after the last
 273   // item so we can compute the size of the list item.
 274   uint32 index_length = (entry_count + 1) * sizeof(DataPackEntry);
 275   uint32 data_offset = kHeaderLength + index_length;
 276   for (std::map<uint16, base::StringPiece>::const_iterator it =
 277            resources.begin();
 278        it != resources.end(); ++it) {
 279     uint16 resource_id = it->first;
 280     if (fwrite(&resource_id, sizeof(resource_id), 1, file) != 1) {
 281       LOG(ERROR) << "Failed to write id for " << resource_id;
 282       base::CloseFile(file);
 283       return false;
 284     }
 285
 286     if (fwrite(&data_offset, sizeof(data_offset), 1, file) != 1) {
 287       LOG(ERROR) << "Failed to write offset for " << resource_id;
 288       base::CloseFile(file);
 289       return false;
 290     }
 291
 292     data_offset += it->second.length();
 293   }
 294
 295   // We place an extra entry after the last item that allows us to read the
 296   // size of the last item.
 297   uint16 resource_id = 0;
 298   if (fwrite(&resource_id, sizeof(resource_id), 1, file) != 1) {
 299     LOG(ERROR) << "Failed to write extra resource id.";
 300     base::CloseFile(file);
 301     return false;
 302   }
 303
 304   if (fwrite(&data_offset, sizeof(data_offset), 1, file) != 1) {
 305     LOG(ERROR) << "Failed to write extra offset.";
 306     base::CloseFile(file);
 307     return false;
 308   }
 309
 310   for (std::map<uint16, base::StringPiece>::const_iterator it =
 311            resources.begin();
 312        it != resources.end(); ++it) {
 313     if (fwrite(it->second.data(), it->second.length(), 1, file) != 1) {
 314       LOG(ERROR) << "Failed to write data for " << it->first;
 315       base::CloseFile(file);
 316       return false;
 317     }
 318   }
 319
 320   base::CloseFile(file);
 321
 322   return true;
 323 }
 324
 325 }  // namespace ui