1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/incident_reporting/module_integrity_verifier_win.h"
10 #include "base/containers/hash_tables.h"
11 #include "base/files/file_path.h"
12 #include "base/files/memory_mapped_file.h"
13 #include "base/metrics/sparse_histogram.h"
14 #include "base/scoped_native_library.h"
15 #include "base/win/pe_image.h"
16 #include "build/build_config.h"
18 namespace safe_browsing
{
22 // The maximum amount of bytes that can be reported as modified by
24 const int kMaxModuleModificationBytes
= 256;
27 Export(void* addr
, const std::string
& name
);
30 bool operator<(const Export
& other
) const {
31 return addr
< other
.addr
;
38 Export::Export(void* addr
, const std::string
& name
) : addr(addr
), name(name
) {
44 struct ModuleVerificationState
{
45 explicit ModuleVerificationState(HMODULE hModule
);
46 ~ModuleVerificationState();
48 base::win::PEImageAsData disk_peimage
;
50 // The module's preferred base address minus the base address it actually
52 intptr_t image_base_delta
;
54 // The location of the disk_peimage module's code section minus that of the
55 // mem_peimage module's code section.
56 intptr_t code_section_delta
;
58 // The bytes corrected by relocs.
59 base::hash_set
<uintptr_t> reloc_addr
;
61 // Set true if the relocation table contains a reloc of type that we don't
63 bool unknown_reloc_type
;
65 // The start of the code section of the in-memory binary.
66 uint8_t* mem_code_addr
;
68 // The start of the code section of the on-disk binary.
69 uint8_t* disk_code_addr
;
71 // The size of the binary's code section.
74 // The exports of the DLL, sorted by address in ascending order.
75 std::vector
<Export
> exports
;
77 // The location in the in-memory binary of the latest reloc encountered by
78 // |NewEnumRelocsCallback|.
79 uint8_t* last_mem_reloc_position
;
81 // The location in the on-disk binary of the latest reloc encountered by
82 // |NewEnumRelocsCallback|.
83 uint8_t* last_disk_reloc_position
;
85 // The number of bytes with a different value on disk and in memory, as
86 // computed by |NewVerifyModule|.
89 // The module state protobuf object that |NewVerifyModule| will populate.
90 ClientIncidentReport_EnvironmentData_Process_ModuleState
* module_state
;
93 DISALLOW_COPY_AND_ASSIGN(ModuleVerificationState
);
96 ModuleVerificationState::ModuleVerificationState(HMODULE hModule
)
97 : disk_peimage(hModule
),
99 code_section_delta(0),
101 unknown_reloc_type(false),
102 mem_code_addr(nullptr),
103 disk_code_addr(nullptr),
105 last_mem_reloc_position(nullptr),
106 last_disk_reloc_position(nullptr),
108 module_state(nullptr) {
111 ModuleVerificationState::~ModuleVerificationState() {
114 bool ByteAccountedForByReloc(uint8_t* byte_addr
,
115 const ModuleVerificationState
& state
) {
116 return ((state
.reloc_addr
.count(reinterpret_cast<uintptr_t>(byte_addr
))) > 0);
119 // Find which export a modification at address |mem_address| is in. Looks for
120 // the largest export address still smaller than |mem_address|. |start| and
121 // |end| must come from a sorted collection.
122 std::vector
<Export
>::const_iterator
FindModifiedExport(
123 uint8_t* mem_address
,
124 std::vector
<Export
>::const_iterator start
,
125 std::vector
<Export
>::const_iterator end
) {
126 // We get the largest export address still smaller than |addr|. It is
127 // possible that |addr| belongs to some nonexported function located
128 // between this export and the following one.
129 Export
addr(reinterpret_cast<void*>(mem_address
), std::string());
130 return std::upper_bound(start
, end
, addr
);
133 // Checks each byte in a subsection of the module's code section against the
134 // corresponding byte on disk, returning the number of bytes differing between
135 // the two. |state.exports| must be sorted.
136 int ExamineByteRangeDiff(uint8_t* disk_start
,
138 ptrdiff_t range_size
,
139 ModuleVerificationState
* state
) {
140 int bytes_different
= 0;
141 std::vector
<Export
>::const_iterator export_it
= state
->exports
.begin();
143 for (uint8_t* end
= mem_start
+ range_size
; mem_start
< end
;
144 ++mem_start
, ++disk_start
) {
145 if (*disk_start
== *mem_start
)
148 auto modification
= state
->module_state
->add_modification();
149 // Store the address at which the modification starts on disk, relative to
150 // the beginning of the image.
151 modification
->set_file_offset(
152 disk_start
- reinterpret_cast<uint8_t*>(state
->disk_peimage
.module()));
154 // Find the export containing this modification.
155 std::vector
<Export
>::const_iterator modified_export_it
=
156 FindModifiedExport(mem_start
, export_it
, state
->exports
.end());
157 // No later byte can belong to an earlier export.
158 export_it
= modified_export_it
;
159 if (modified_export_it
!= state
->exports
.begin())
160 modification
->set_export_name((modified_export_it
- 1)->name
);
162 const uint8_t* range_start
= mem_start
;
163 while (mem_start
< end
&& *disk_start
!= *mem_start
) {
167 int bytes_in_modification
= mem_start
- range_start
;
168 bytes_different
+= bytes_in_modification
;
169 modification
->set_byte_count(bytes_in_modification
);
170 modification
->set_modified_bytes(
172 std::min(bytes_in_modification
, kMaxModuleModificationBytes
));
174 return bytes_different
;
177 // Checks each byte in the module's code section again the corresponding byte on
178 // disk, returning the number of bytes differing between the two. Also adds the
179 // names of any modfied functions exported by name to |modified_exports|.
180 // |state.exports| must be sorted.
181 int ExamineBytesDiffInMemory(uint8_t* disk_code_start
,
182 uint8_t* mem_code_start
,
184 const ModuleVerificationState
& state
,
185 std::set
<std::string
>* modified_exports
) {
186 int bytes_different
= 0;
187 std::vector
<Export
>::const_iterator export_it
= state
.exports
.begin();
189 for (uint8_t* end
= mem_code_start
+ code_size
; mem_code_start
!= end
;
191 if ((*disk_code_start
++ != *mem_code_start
) &&
192 !ByteAccountedForByReloc(mem_code_start
, state
)) {
193 std::vector
<Export
>::const_iterator modified_export_it
=
194 FindModifiedExport(mem_code_start
, export_it
, state
.exports
.end());
196 if (modified_export_it
!= state
.exports
.begin())
197 modified_exports
->insert((modified_export_it
- 1)->name
);
200 // No later byte can belong to an earlier export.
201 export_it
= modified_export_it
;
204 return bytes_different
;
207 // Adds to |state->reloc_addr| the bytes of the pointer at |address| that are
208 // corrected by adding |image_base_delta|.
209 void AddBytesCorrectedByReloc(uintptr_t address
,
210 ModuleVerificationState
* state
) {
211 #if defined(ARCH_CPU_LITTLE_ENDIAN)
214 # define OFFSET(i) (sizeof(uintptr_t) - i)
217 uintptr_t orig_mem_value
= *reinterpret_cast<uintptr_t*>(address
);
218 uintptr_t fixed_mem_value
= orig_mem_value
+ state
->image_base_delta
;
219 uintptr_t disk_value
=
220 *reinterpret_cast<uintptr_t*>(address
+ state
->code_section_delta
);
222 uintptr_t diff_before
= orig_mem_value
^ disk_value
;
223 uintptr_t shared_after
= ~(fixed_mem_value
^ disk_value
);
225 for (uintptr_t fixed
= diff_before
& shared_after
; fixed
; fixed
>>= 8, ++i
) {
227 state
->reloc_addr
.insert(address
+ OFFSET(i
));
232 bool AddrIsInCodeSection(void* address
,
234 uint32_t code_size
) {
235 return (code_addr
<= address
&& address
< code_addr
+ code_size
);
238 bool NewEnumRelocsCallback(const base::win::PEImage
& mem_peimage
,
242 ModuleVerificationState
* state
=
243 reinterpret_cast<ModuleVerificationState
*>(cookie
);
245 // If not in the code section return true to continue to the next reloc.
246 if (!AddrIsInCodeSection(address
, state
->mem_code_addr
, state
->code_size
))
250 case IMAGE_REL_BASED_ABSOLUTE
: // 0
252 case IMAGE_REL_BASED_HIGHLOW
: // 3
254 // The range to inspect is from the last reloc to the current one at
256 uint8_t* ptr
= reinterpret_cast<uint8_t*>(address
);
258 // If the last relocation was not before this one in the binary,
259 // there's an issue in the reloc section. We can't really recover from
260 // that so flag state as such so the error can be logged.
261 if (ptr
< state
->last_mem_reloc_position
)
264 // Check which bytes of the relocation are not accounted for by the
265 // rebase. If the beginning of the relocation is modified by something
266 // other than the rebase, extend the verification range to include those
267 // bytes since they are considered part of a modification.
268 uint32_t relocated
= *reinterpret_cast<uint32_t*>(ptr
);
269 uint32_t original
= relocated
+ state
->image_base_delta
;
270 uint8_t* original_reloc_bytes
= reinterpret_cast<uint8_t*>(&original
);
271 uint8_t* reloc_disk_position
= ptr
+ state
->code_section_delta
;
272 size_t unaccounted_reloc_bytes
= 0;
273 while (unaccounted_reloc_bytes
< sizeof(uint32_t) &&
274 original_reloc_bytes
[unaccounted_reloc_bytes
] !=
275 reloc_disk_position
[unaccounted_reloc_bytes
]) {
276 ++unaccounted_reloc_bytes
;
279 // If the entire reloc was modified, return true to let the next
280 // EnumReloc track it as part of a larger modification.
281 if (unaccounted_reloc_bytes
== sizeof(uint32_t))
284 ptrdiff_t range_size
= ptr
+
285 unaccounted_reloc_bytes
-
286 state
->last_mem_reloc_position
;
288 state
->bytes_different
+= ExamineByteRangeDiff(
289 state
->last_disk_reloc_position
,
290 state
->last_mem_reloc_position
,
294 // Starting after the verified range, check if the relocation ends with
295 // modified bytes. If it does, include them in the following range to be
296 // verified as they're considered modified. Otherwise, the following
297 // range will start right after the current reloc.
298 size_t unmodified_reloc_byte_count
= unaccounted_reloc_bytes
;
299 while (unmodified_reloc_byte_count
< sizeof(uint32_t) &&
300 original_reloc_bytes
[unmodified_reloc_byte_count
] ==
301 reloc_disk_position
[unmodified_reloc_byte_count
]) {
302 ++unmodified_reloc_byte_count
;
304 state
->last_disk_reloc_position
+=
305 range_size
+ unmodified_reloc_byte_count
;
306 state
->last_mem_reloc_position
+=
307 range_size
+ unmodified_reloc_byte_count
;
310 case IMAGE_REL_BASED_DIR64
: // 10
313 // TODO(robertshield): Find a reliable description of the behaviour of the
314 // remaining types of relocation and handle them.
315 UMA_HISTOGRAM_SPARSE_SLOWLY("SafeBrowsing.ModuleBaseRelocation", type
);
316 state
->unknown_reloc_type
= true;
322 bool EnumRelocsCallback(const base::win::PEImage
& mem_peimage
,
326 ModuleVerificationState
* state
=
327 reinterpret_cast<ModuleVerificationState
*>(cookie
);
329 uint8_t* mem_code_addr
= NULL
;
330 uint8_t* disk_code_addr
= NULL
;
331 uint32_t code_size
= 0;
332 if (!GetCodeAddrsAndSize(mem_peimage
,
339 // If not in the code section return true to continue to the next reloc.
340 if (!AddrIsInCodeSection(address
, mem_code_addr
, code_size
))
344 case IMAGE_REL_BASED_ABSOLUTE
: // 0
345 // Absolute type relocations are a noop, sometimes used to pad a section
348 case IMAGE_REL_BASED_HIGHLOW
: // 3
349 // The base relocation applies all 32 bits of the difference to the 32-bit
351 AddBytesCorrectedByReloc(reinterpret_cast<uintptr_t>(address
), state
);
353 case IMAGE_REL_BASED_DIR64
: // 10
354 // The base relocation applies the difference to the 64-bit field at
356 // TODO(robertshield): Handle this type of reloc.
359 // TODO(robertshield): Find a reliable description of the behaviour of the
360 // remaining types of relocation and handle them.
361 UMA_HISTOGRAM_SPARSE_SLOWLY("SafeBrowsing.ModuleBaseRelocation", type
);
362 state
->unknown_reloc_type
= true;
368 bool EnumExportsCallback(const base::win::PEImage
& mem_peimage
,
375 std::vector
<Export
>* exports
= reinterpret_cast<std::vector
<Export
>*>(cookie
);
377 exports
->push_back(Export(function_addr
, std::string(name
)));
383 bool GetCodeAddrsAndSize(const base::win::PEImage
& mem_peimage
,
384 const base::win::PEImageAsData
& disk_peimage
,
385 uint8_t** mem_code_addr
,
386 uint8_t** disk_code_addr
,
387 uint32_t* code_size
) {
388 DWORD base_of_code
= mem_peimage
.GetNTHeaders()->OptionalHeader
.BaseOfCode
;
390 // Get the address and size of the code section in the loaded module image.
391 PIMAGE_SECTION_HEADER mem_code_header
=
392 mem_peimage
.GetImageSectionFromAddr(mem_peimage
.RVAToAddr(base_of_code
));
393 if (mem_code_header
== NULL
)
395 *mem_code_addr
= reinterpret_cast<uint8_t*>(
396 mem_peimage
.RVAToAddr(mem_code_header
->VirtualAddress
));
397 // If the section is padded with zeros when mapped then |VirtualSize| can be
398 // larger. Alternatively, |SizeOfRawData| can be rounded up to align
399 // according to OptionalHeader.FileAlignment.
400 *code_size
= std::min(mem_code_header
->Misc
.VirtualSize
,
401 mem_code_header
->SizeOfRawData
);
403 // Get the address of the code section in the module mapped as data from disk.
404 DWORD disk_code_offset
= 0;
405 if (!mem_peimage
.ImageAddrToOnDiskOffset(
406 reinterpret_cast<void*>(*mem_code_addr
), &disk_code_offset
))
409 reinterpret_cast<uint8_t*>(disk_peimage
.module()) + disk_code_offset
;
413 VerificationResult
NewVerifyModule(
414 const wchar_t* module_name
,
415 ClientIncidentReport_EnvironmentData_Process_ModuleState
* module_state
) {
416 VerificationResult result
= { MODULE_STATE_UNKNOWN
, 0, false, };
418 // Get module handle, load a copy from disk as data and create PEImages.
419 HMODULE module_handle
= NULL
;
420 if (!GetModuleHandleEx(0, module_name
, &module_handle
))
422 base::ScopedNativeLibrary
native_library(module_handle
);
424 WCHAR module_path
[MAX_PATH
] = {};
426 GetModuleFileName(module_handle
, module_path
, arraysize(module_path
));
427 if (!length
|| length
== arraysize(module_path
))
430 base::MemoryMappedFile mapped_module
;
431 if (!mapped_module
.Initialize(base::FilePath(module_path
)))
433 ModuleVerificationState
state(
434 reinterpret_cast<HMODULE
>(const_cast<uint8
*>(mapped_module
.data())));
436 base::win::PEImage
mem_peimage(module_handle
);
437 if (!mem_peimage
.VerifyMagic() || !state
.disk_peimage
.VerifyMagic())
440 // Get the addresses of the code sections then calculate |code_section_delta|
441 // and |image_base_delta|.
442 if (!GetCodeAddrsAndSize(mem_peimage
,
444 &state
.mem_code_addr
,
445 &state
.disk_code_addr
,
449 state
.module_state
= module_state
;
450 state
.last_mem_reloc_position
= state
.mem_code_addr
;
451 state
.last_disk_reloc_position
= state
.disk_code_addr
;
452 state
.code_section_delta
= state
.disk_code_addr
- state
.mem_code_addr
;
454 uint8_t* preferred_image_base
= reinterpret_cast<uint8_t*>(
455 state
.disk_peimage
.GetNTHeaders()->OptionalHeader
.ImageBase
);
456 state
.image_base_delta
=
457 preferred_image_base
- reinterpret_cast<uint8_t*>(mem_peimage
.module());
459 state
.last_mem_reloc_position
= state
.mem_code_addr
;
460 state
.last_disk_reloc_position
= state
.disk_code_addr
;
462 // Enumerate relocations and verify the bytes between them.
463 result
.verification_completed
=
464 mem_peimage
.EnumRelocs(NewEnumRelocsCallback
, &state
);
466 if (result
.verification_completed
) {
468 state
.code_size
- (state
.last_mem_reloc_position
- state
.mem_code_addr
);
469 // Inspect the last chunk spanning from the furthest relocation to the end
470 // of the code section.
471 state
.bytes_different
+= ExamineByteRangeDiff(
472 state
.last_disk_reloc_position
,
473 state
.last_mem_reloc_position
,
477 result
.num_bytes_different
= state
.bytes_different
;
479 // Report STATE_MODIFIED if any difference was found, regardless of whether or
480 // not the entire module was scanned. Report STATE_UNMODIFIED only if the
481 // entire module was scanned and understood.
482 if (state
.bytes_different
)
483 result
.state
= MODULE_STATE_MODIFIED
;
484 else if (!state
.unknown_reloc_type
&& result
.verification_completed
)
485 result
.state
= MODULE_STATE_UNMODIFIED
;
490 ModuleState
VerifyModule(const wchar_t* module_name
,
491 std::set
<std::string
>* modified_exports
,
492 int* num_bytes_different
) {
493 // Get module handle, load a copy from disk as data and create PEImages.
494 HMODULE module_handle
= NULL
;
495 if (!GetModuleHandleEx(0, module_name
, &module_handle
))
496 return MODULE_STATE_UNKNOWN
;
497 base::ScopedNativeLibrary
native_library(module_handle
);
499 WCHAR module_path
[MAX_PATH
] = {};
501 GetModuleFileName(module_handle
, module_path
, arraysize(module_path
));
502 if (!length
|| length
== arraysize(module_path
))
503 return MODULE_STATE_UNKNOWN
;
505 base::MemoryMappedFile mapped_module
;
506 if (!mapped_module
.Initialize(base::FilePath(module_path
)))
507 return MODULE_STATE_UNKNOWN
;
508 ModuleVerificationState
state(
509 reinterpret_cast<HMODULE
>(const_cast<uint8
*>(mapped_module
.data())));
511 base::win::PEImage
mem_peimage(module_handle
);
512 if (!mem_peimage
.VerifyMagic() || !state
.disk_peimage
.VerifyMagic())
513 return MODULE_STATE_UNKNOWN
;
515 // Get the list of exports.
516 mem_peimage
.EnumExports(EnumExportsCallback
, &state
.exports
);
517 std::sort(state
.exports
.begin(), state
.exports
.end());
519 // Get the addresses of the code sections then calculate |code_section_delta|
520 // and |image_base_delta|.
521 uint8_t* mem_code_addr
= NULL
;
522 uint8_t* disk_code_addr
= NULL
;
523 uint32_t code_size
= 0;
524 if (!GetCodeAddrsAndSize(mem_peimage
,
529 return MODULE_STATE_UNKNOWN
;
531 state
.code_section_delta
= disk_code_addr
- mem_code_addr
;
533 uint8_t* preferred_image_base
= reinterpret_cast<uint8_t*>(
534 state
.disk_peimage
.GetNTHeaders()->OptionalHeader
.ImageBase
);
535 state
.image_base_delta
=
536 preferred_image_base
- reinterpret_cast<uint8_t*>(mem_peimage
.module());
538 // Get the relocations.
539 mem_peimage
.EnumRelocs(EnumRelocsCallback
, &state
);
540 if (state
.unknown_reloc_type
)
541 return MODULE_STATE_UNKNOWN
;
543 // Count the modified bytes (after accounting for relocs) and get the set of
544 // modified functions.
545 *num_bytes_different
= ExamineBytesDiffInMemory(disk_code_addr
,
551 return *num_bytes_different
? MODULE_STATE_MODIFIED
: MODULE_STATE_UNMODIFIED
;
554 } // namespace safe_browsing