Elim cr-checkbox
[chromium-blink-merge.git] / chrome / browser / safe_browsing / incident_reporting / module_integrity_verifier_win.cc
blob55ea230e501dd62138cc46cc81445985e9b9e18a
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/incident_reporting/module_integrity_verifier_win.h"
7 #include <algorithm>
8 #include <string>
9 #include <vector>
11 #include "base/files/file_path.h"
12 #include "base/files/memory_mapped_file.h"
13 #include "base/metrics/sparse_histogram.h"
14 #include "base/scoped_native_library.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/win/pe_image.h"
17 #include "chrome/common/safe_browsing/csd.pb.h"
19 namespace safe_browsing {
21 namespace {
23 // The maximum amount of bytes that can be reported as modified by VerifyModule.
24 const int kMaxModuleModificationBytes = 256;
26 struct Export {
27 Export(void* addr, const std::string& name);
28 ~Export();
30 bool operator<(const Export& other) const {
31 return addr < other.addr;
34 void* addr;
35 std::string name;
38 Export::Export(void* addr, const std::string& name) : addr(addr), name(name) {
41 Export::~Export() {
44 struct ModuleVerificationState {
45 explicit ModuleVerificationState(HMODULE hModule);
46 ~ModuleVerificationState();
48 base::win::PEImageAsData disk_peimage;
50 // The module's preferred base address minus the base address it actually
51 // loaded at.
52 intptr_t image_base_delta;
54 // The location of the disk_peimage module's code section minus that of the
55 // mem_peimage module's code section.
56 intptr_t code_section_delta;
58 // Set true if the relocation table contains a reloc of type that we don't
59 // currently handle.
60 bool unknown_reloc_type;
62 // The start of the code section of the in-memory binary.
63 uint8_t* mem_code_addr;
65 // The start of the code section of the on-disk binary.
66 uint8_t* disk_code_addr;
68 // The size of the binary's code section.
69 uint32_t code_size;
71 // The exports of the DLL, sorted by address in ascending order.
72 std::vector<Export> exports;
74 // The location in the in-memory binary of the latest reloc encountered by
75 // |EnumRelocsCallback|.
76 uint8_t* last_mem_reloc_position;
78 // The location in the on-disk binary of the latest reloc encountered by
79 // |EnumRelocsCallback|.
80 uint8_t* last_disk_reloc_position;
82 // The number of bytes with a different value on disk and in memory, as
83 // computed by |VerifyModule|.
84 int bytes_different;
86 // The module state protobuf object that |VerifyModule| will populate.
87 ClientIncidentReport_EnvironmentData_Process_ModuleState* module_state;
89 private:
90 DISALLOW_COPY_AND_ASSIGN(ModuleVerificationState);
93 ModuleVerificationState::ModuleVerificationState(HMODULE hModule)
94 : disk_peimage(hModule),
95 image_base_delta(0),
96 code_section_delta(0),
97 unknown_reloc_type(false),
98 mem_code_addr(nullptr),
99 disk_code_addr(nullptr),
100 code_size(0),
101 last_mem_reloc_position(nullptr),
102 last_disk_reloc_position(nullptr),
103 bytes_different(0),
104 module_state(nullptr) {
107 ModuleVerificationState::~ModuleVerificationState() {
110 // Find which export a modification at address |mem_address| is in. Looks for
111 // the largest export address still smaller than |mem_address|. |start| and
112 // |end| must come from a sorted collection.
113 std::vector<Export>::const_iterator FindModifiedExport(
114 uint8_t* mem_address,
115 std::vector<Export>::const_iterator start,
116 std::vector<Export>::const_iterator end) {
117 // We get the largest export address still smaller than |addr|. It is
118 // possible that |addr| belongs to some nonexported function located
119 // between this export and the following one.
120 Export addr(reinterpret_cast<void*>(mem_address), std::string());
121 return std::upper_bound(start, end, addr);
124 // Checks each byte in a subsection of the module's code section against the
125 // corresponding byte on disk, returning the number of bytes differing between
126 // the two. |state.exports| must be sorted.
127 int ExamineByteRangeDiff(uint8_t* disk_start,
128 uint8_t* mem_start,
129 ptrdiff_t range_size,
130 ModuleVerificationState* state) {
131 int bytes_different = 0;
132 std::vector<Export>::const_iterator export_it = state->exports.begin();
134 for (uint8_t* end = mem_start + range_size; mem_start < end;
135 ++mem_start, ++disk_start) {
136 if (*disk_start == *mem_start)
137 continue;
139 auto modification = state->module_state->add_modification();
140 // Store the address at which the modification starts on disk, relative to
141 // the beginning of the image.
142 modification->set_file_offset(
143 disk_start - reinterpret_cast<uint8_t*>(state->disk_peimage.module()));
145 // Find the export containing this modification.
146 std::vector<Export>::const_iterator modified_export_it =
147 FindModifiedExport(mem_start, export_it, state->exports.end());
148 // No later byte can belong to an earlier export.
149 export_it = modified_export_it;
150 if (modified_export_it != state->exports.begin())
151 modification->set_export_name((modified_export_it - 1)->name);
153 const uint8_t* range_start = mem_start;
154 while (mem_start < end && *disk_start != *mem_start) {
155 ++disk_start;
156 ++mem_start;
158 int bytes_in_modification = mem_start - range_start;
159 bytes_different += bytes_in_modification;
160 modification->set_byte_count(bytes_in_modification);
161 modification->set_modified_bytes(
162 range_start,
163 std::min(bytes_in_modification, kMaxModuleModificationBytes));
165 return bytes_different;
168 bool AddrIsInCodeSection(void* address,
169 uint8_t* code_addr,
170 uint32_t code_size) {
171 return (code_addr <= address && address < code_addr + code_size);
174 bool EnumRelocsCallback(const base::win::PEImage& mem_peimage,
175 WORD type,
176 void* address,
177 void* cookie) {
178 ModuleVerificationState* state =
179 reinterpret_cast<ModuleVerificationState*>(cookie);
181 // If not in the code section return true to continue to the next reloc.
182 if (!AddrIsInCodeSection(address, state->mem_code_addr, state->code_size))
183 return true;
185 switch (type) {
186 case IMAGE_REL_BASED_ABSOLUTE: // 0
187 break;
188 case IMAGE_REL_BASED_HIGHLOW: // 3
190 // The range to inspect is from the last reloc to the current one at
191 // |ptr|
192 uint8_t* ptr = reinterpret_cast<uint8_t*>(address);
194 // If the last relocation was not before this one in the binary,
195 // there's an issue in the reloc section. We can't really recover from
196 // that so flag state as such so the error can be logged.
197 if (ptr < state->last_mem_reloc_position)
198 return false;
200 // Check which bytes of the relocation are not accounted for by the
201 // rebase. If the beginning of the relocation is modified by something
202 // other than the rebase, extend the verification range to include those
203 // bytes since they are considered part of a modification.
204 uint32_t relocated = *reinterpret_cast<uint32_t*>(ptr);
205 uint32_t original = relocated + state->image_base_delta;
206 uint8_t* original_reloc_bytes = reinterpret_cast<uint8_t*>(&original);
207 uint8_t* reloc_disk_position = ptr + state->code_section_delta;
208 size_t unaccounted_reloc_bytes = 0;
209 while (unaccounted_reloc_bytes < sizeof(uint32_t) &&
210 original_reloc_bytes[unaccounted_reloc_bytes] !=
211 reloc_disk_position[unaccounted_reloc_bytes]) {
212 ++unaccounted_reloc_bytes;
215 // If the entire reloc was modified, return true to let the next
216 // EnumReloc track it as part of a larger modification.
217 if (unaccounted_reloc_bytes == sizeof(uint32_t))
218 return true;
220 ptrdiff_t range_size = ptr +
221 unaccounted_reloc_bytes -
222 state->last_mem_reloc_position;
224 state->bytes_different += ExamineByteRangeDiff(
225 state->last_disk_reloc_position,
226 state->last_mem_reloc_position,
227 range_size,
228 state);
230 // Starting after the verified range, check if the relocation ends with
231 // modified bytes. If it does, include them in the following range to be
232 // verified as they're considered modified. Otherwise, the following
233 // range will start right after the current reloc.
234 size_t unmodified_reloc_byte_count = unaccounted_reloc_bytes;
235 while (unmodified_reloc_byte_count < sizeof(uint32_t) &&
236 original_reloc_bytes[unmodified_reloc_byte_count] ==
237 reloc_disk_position[unmodified_reloc_byte_count]) {
238 ++unmodified_reloc_byte_count;
240 state->last_disk_reloc_position +=
241 range_size + unmodified_reloc_byte_count;
242 state->last_mem_reloc_position +=
243 range_size + unmodified_reloc_byte_count;
245 break;
246 case IMAGE_REL_BASED_DIR64: // 10
247 break;
248 default:
249 // TODO(robertshield): Find a reliable description of the behaviour of the
250 // remaining types of relocation and handle them.
251 UMA_HISTOGRAM_SPARSE_SLOWLY("SafeBrowsing.ModuleBaseRelocation", type);
252 state->unknown_reloc_type = true;
253 break;
255 return true;
258 bool EnumExportsCallback(const base::win::PEImage& mem_peimage,
259 DWORD ordinal,
260 DWORD hint,
261 LPCSTR name,
262 PVOID function_addr,
263 LPCSTR forward,
264 PVOID cookie) {
265 std::vector<Export>* exports = reinterpret_cast<std::vector<Export>*>(cookie);
266 if (name)
267 exports->push_back(Export(function_addr, std::string(name)));
268 return true;
271 } // namespace
273 bool GetCodeAddrsAndSize(const base::win::PEImage& mem_peimage,
274 const base::win::PEImageAsData& disk_peimage,
275 uint8_t** mem_code_addr,
276 uint8_t** disk_code_addr,
277 uint32_t* code_size) {
278 DWORD base_of_code = mem_peimage.GetNTHeaders()->OptionalHeader.BaseOfCode;
280 // Get the address and size of the code section in the loaded module image.
281 PIMAGE_SECTION_HEADER mem_code_header =
282 mem_peimage.GetImageSectionFromAddr(mem_peimage.RVAToAddr(base_of_code));
283 if (mem_code_header == NULL)
284 return false;
285 *mem_code_addr = reinterpret_cast<uint8_t*>(
286 mem_peimage.RVAToAddr(mem_code_header->VirtualAddress));
287 // If the section is padded with zeros when mapped then |VirtualSize| can be
288 // larger. Alternatively, |SizeOfRawData| can be rounded up to align
289 // according to OptionalHeader.FileAlignment.
290 *code_size = std::min(mem_code_header->Misc.VirtualSize,
291 mem_code_header->SizeOfRawData);
293 // Get the address of the code section in the module mapped as data from disk.
294 DWORD disk_code_offset = 0;
295 if (!mem_peimage.ImageAddrToOnDiskOffset(
296 reinterpret_cast<void*>(*mem_code_addr), &disk_code_offset))
297 return false;
298 *disk_code_addr =
299 reinterpret_cast<uint8_t*>(disk_peimage.module()) + disk_code_offset;
300 return true;
303 bool VerifyModule(
304 const wchar_t* module_name,
305 ClientIncidentReport_EnvironmentData_Process_ModuleState* module_state,
306 int* num_bytes_different) {
307 using ModuleState = ClientIncidentReport_EnvironmentData_Process_ModuleState;
308 *num_bytes_different = 0;
309 module_state->set_name(base::WideToUTF8(module_name));
310 module_state->set_modified_state(ModuleState::MODULE_STATE_UNKNOWN);
312 // Get module handle, load a copy from disk as data and create PEImages.
313 HMODULE module_handle = NULL;
314 if (!GetModuleHandleEx(0, module_name, &module_handle))
315 return false;
316 base::ScopedNativeLibrary native_library(module_handle);
318 WCHAR module_path[MAX_PATH] = {};
319 DWORD length =
320 GetModuleFileName(module_handle, module_path, arraysize(module_path));
321 if (!length || length == arraysize(module_path))
322 return false;
324 base::MemoryMappedFile mapped_module;
325 if (!mapped_module.Initialize(base::FilePath(module_path)))
326 return false;
327 ModuleVerificationState state(
328 reinterpret_cast<HMODULE>(const_cast<uint8*>(mapped_module.data())));
330 base::win::PEImage mem_peimage(module_handle);
331 if (!mem_peimage.VerifyMagic() || !state.disk_peimage.VerifyMagic())
332 return false;
334 // Get the list of exports and sort them by address for efficient lookups.
335 mem_peimage.EnumExports(EnumExportsCallback, &state.exports);
336 std::sort(state.exports.begin(), state.exports.end());
338 // Get the addresses of the code sections then calculate |code_section_delta|
339 // and |image_base_delta|.
340 if (!GetCodeAddrsAndSize(mem_peimage,
341 state.disk_peimage,
342 &state.mem_code_addr,
343 &state.disk_code_addr,
344 &state.code_size))
345 return false;
347 state.module_state = module_state;
348 state.last_mem_reloc_position = state.mem_code_addr;
349 state.last_disk_reloc_position = state.disk_code_addr;
350 state.code_section_delta = state.disk_code_addr - state.mem_code_addr;
352 uint8_t* preferred_image_base = reinterpret_cast<uint8_t*>(
353 state.disk_peimage.GetNTHeaders()->OptionalHeader.ImageBase);
354 state.image_base_delta =
355 preferred_image_base - reinterpret_cast<uint8_t*>(mem_peimage.module());
357 state.last_mem_reloc_position = state.mem_code_addr;
358 state.last_disk_reloc_position = state.disk_code_addr;
360 // Enumerate relocations and verify the bytes between them.
361 bool scan_complete = mem_peimage.EnumRelocs(EnumRelocsCallback, &state);
363 if (scan_complete) {
364 size_t range_size =
365 state.code_size - (state.last_mem_reloc_position - state.mem_code_addr);
366 // Inspect the last chunk spanning from the furthest relocation to the end
367 // of the code section.
368 state.bytes_different += ExamineByteRangeDiff(
369 state.last_disk_reloc_position,
370 state.last_mem_reloc_position,
371 range_size,
372 &state);
374 *num_bytes_different = state.bytes_different;
376 // Report STATE_MODIFIED if any difference was found, regardless of whether or
377 // not the entire module was scanned. Report STATE_UNMODIFIED only if the
378 // entire module was scanned and understood.
379 if (state.bytes_different)
380 module_state->set_modified_state(ModuleState::MODULE_STATE_MODIFIED);
381 else if (!state.unknown_reloc_type && scan_complete)
382 module_state->set_modified_state(ModuleState::MODULE_STATE_UNMODIFIED);
384 return scan_complete;
387 } // namespace safe_browsing