1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/incident_reporting/module_integrity_verifier_win.h"
11 #include "base/files/file_path.h"
12 #include "base/files/memory_mapped_file.h"
13 #include "base/metrics/sparse_histogram.h"
14 #include "base/scoped_native_library.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/win/pe_image.h"
17 #include "chrome/common/safe_browsing/csd.pb.h"
19 namespace safe_browsing
{
23 // The maximum amount of bytes that can be reported as modified by VerifyModule.
24 const int kMaxModuleModificationBytes
= 256;
27 Export(void* addr
, const std::string
& name
);
30 bool operator<(const Export
& other
) const {
31 return addr
< other
.addr
;
38 Export::Export(void* addr
, const std::string
& name
) : addr(addr
), name(name
) {
44 struct ModuleVerificationState
{
45 explicit ModuleVerificationState(HMODULE hModule
);
46 ~ModuleVerificationState();
48 base::win::PEImageAsData disk_peimage
;
50 // The module's preferred base address minus the base address it actually
52 intptr_t image_base_delta
;
54 // The location of the disk_peimage module's code section minus that of the
55 // mem_peimage module's code section.
56 intptr_t code_section_delta
;
58 // Set true if the relocation table contains a reloc of type that we don't
60 bool unknown_reloc_type
;
62 // The start of the code section of the in-memory binary.
63 uint8_t* mem_code_addr
;
65 // The start of the code section of the on-disk binary.
66 uint8_t* disk_code_addr
;
68 // The size of the binary's code section.
71 // The exports of the DLL, sorted by address in ascending order.
72 std::vector
<Export
> exports
;
74 // The location in the in-memory binary of the latest reloc encountered by
75 // |EnumRelocsCallback|.
76 uint8_t* last_mem_reloc_position
;
78 // The location in the on-disk binary of the latest reloc encountered by
79 // |EnumRelocsCallback|.
80 uint8_t* last_disk_reloc_position
;
82 // The number of bytes with a different value on disk and in memory, as
83 // computed by |VerifyModule|.
86 // The module state protobuf object that |VerifyModule| will populate.
87 ClientIncidentReport_EnvironmentData_Process_ModuleState
* module_state
;
90 DISALLOW_COPY_AND_ASSIGN(ModuleVerificationState
);
93 ModuleVerificationState::ModuleVerificationState(HMODULE hModule
)
94 : disk_peimage(hModule
),
96 code_section_delta(0),
97 unknown_reloc_type(false),
98 mem_code_addr(nullptr),
99 disk_code_addr(nullptr),
101 last_mem_reloc_position(nullptr),
102 last_disk_reloc_position(nullptr),
104 module_state(nullptr) {
107 ModuleVerificationState::~ModuleVerificationState() {
110 // Find which export a modification at address |mem_address| is in. Looks for
111 // the largest export address still smaller than |mem_address|. |start| and
112 // |end| must come from a sorted collection.
113 std::vector
<Export
>::const_iterator
FindModifiedExport(
114 uint8_t* mem_address
,
115 std::vector
<Export
>::const_iterator start
,
116 std::vector
<Export
>::const_iterator end
) {
117 // We get the largest export address still smaller than |addr|. It is
118 // possible that |addr| belongs to some nonexported function located
119 // between this export and the following one.
120 Export
addr(reinterpret_cast<void*>(mem_address
), std::string());
121 return std::upper_bound(start
, end
, addr
);
124 // Checks each byte in a subsection of the module's code section against the
125 // corresponding byte on disk, returning the number of bytes differing between
126 // the two. |state.exports| must be sorted.
127 int ExamineByteRangeDiff(uint8_t* disk_start
,
129 ptrdiff_t range_size
,
130 ModuleVerificationState
* state
) {
131 int bytes_different
= 0;
132 std::vector
<Export
>::const_iterator export_it
= state
->exports
.begin();
134 for (uint8_t* end
= mem_start
+ range_size
; mem_start
< end
;
135 ++mem_start
, ++disk_start
) {
136 if (*disk_start
== *mem_start
)
139 auto modification
= state
->module_state
->add_modification();
140 // Store the address at which the modification starts on disk, relative to
141 // the beginning of the image.
142 modification
->set_file_offset(
143 disk_start
- reinterpret_cast<uint8_t*>(state
->disk_peimage
.module()));
145 // Find the export containing this modification.
146 std::vector
<Export
>::const_iterator modified_export_it
=
147 FindModifiedExport(mem_start
, export_it
, state
->exports
.end());
148 // No later byte can belong to an earlier export.
149 export_it
= modified_export_it
;
150 if (modified_export_it
!= state
->exports
.begin())
151 modification
->set_export_name((modified_export_it
- 1)->name
);
153 const uint8_t* range_start
= mem_start
;
154 while (mem_start
< end
&& *disk_start
!= *mem_start
) {
158 int bytes_in_modification
= mem_start
- range_start
;
159 bytes_different
+= bytes_in_modification
;
160 modification
->set_byte_count(bytes_in_modification
);
161 modification
->set_modified_bytes(
163 std::min(bytes_in_modification
, kMaxModuleModificationBytes
));
165 return bytes_different
;
168 bool AddrIsInCodeSection(void* address
,
170 uint32_t code_size
) {
171 return (code_addr
<= address
&& address
< code_addr
+ code_size
);
174 bool EnumRelocsCallback(const base::win::PEImage
& mem_peimage
,
178 ModuleVerificationState
* state
=
179 reinterpret_cast<ModuleVerificationState
*>(cookie
);
181 // If not in the code section return true to continue to the next reloc.
182 if (!AddrIsInCodeSection(address
, state
->mem_code_addr
, state
->code_size
))
186 case IMAGE_REL_BASED_ABSOLUTE
: // 0
188 case IMAGE_REL_BASED_HIGHLOW
: // 3
190 // The range to inspect is from the last reloc to the current one at
192 uint8_t* ptr
= reinterpret_cast<uint8_t*>(address
);
194 // If the last relocation was not before this one in the binary,
195 // there's an issue in the reloc section. We can't really recover from
196 // that so flag state as such so the error can be logged.
197 if (ptr
< state
->last_mem_reloc_position
)
200 // Check which bytes of the relocation are not accounted for by the
201 // rebase. If the beginning of the relocation is modified by something
202 // other than the rebase, extend the verification range to include those
203 // bytes since they are considered part of a modification.
204 uint32_t relocated
= *reinterpret_cast<uint32_t*>(ptr
);
205 uint32_t original
= relocated
+ state
->image_base_delta
;
206 uint8_t* original_reloc_bytes
= reinterpret_cast<uint8_t*>(&original
);
207 uint8_t* reloc_disk_position
= ptr
+ state
->code_section_delta
;
208 size_t unaccounted_reloc_bytes
= 0;
209 while (unaccounted_reloc_bytes
< sizeof(uint32_t) &&
210 original_reloc_bytes
[unaccounted_reloc_bytes
] !=
211 reloc_disk_position
[unaccounted_reloc_bytes
]) {
212 ++unaccounted_reloc_bytes
;
215 // If the entire reloc was modified, return true to let the next
216 // EnumReloc track it as part of a larger modification.
217 if (unaccounted_reloc_bytes
== sizeof(uint32_t))
220 ptrdiff_t range_size
= ptr
+
221 unaccounted_reloc_bytes
-
222 state
->last_mem_reloc_position
;
224 state
->bytes_different
+= ExamineByteRangeDiff(
225 state
->last_disk_reloc_position
,
226 state
->last_mem_reloc_position
,
230 // Starting after the verified range, check if the relocation ends with
231 // modified bytes. If it does, include them in the following range to be
232 // verified as they're considered modified. Otherwise, the following
233 // range will start right after the current reloc.
234 size_t unmodified_reloc_byte_count
= unaccounted_reloc_bytes
;
235 while (unmodified_reloc_byte_count
< sizeof(uint32_t) &&
236 original_reloc_bytes
[unmodified_reloc_byte_count
] ==
237 reloc_disk_position
[unmodified_reloc_byte_count
]) {
238 ++unmodified_reloc_byte_count
;
240 state
->last_disk_reloc_position
+=
241 range_size
+ unmodified_reloc_byte_count
;
242 state
->last_mem_reloc_position
+=
243 range_size
+ unmodified_reloc_byte_count
;
246 case IMAGE_REL_BASED_DIR64
: // 10
249 // TODO(robertshield): Find a reliable description of the behaviour of the
250 // remaining types of relocation and handle them.
251 UMA_HISTOGRAM_SPARSE_SLOWLY("SafeBrowsing.ModuleBaseRelocation", type
);
252 state
->unknown_reloc_type
= true;
258 bool EnumExportsCallback(const base::win::PEImage
& mem_peimage
,
265 std::vector
<Export
>* exports
= reinterpret_cast<std::vector
<Export
>*>(cookie
);
267 exports
->push_back(Export(function_addr
, std::string(name
)));
273 bool GetCodeAddrsAndSize(const base::win::PEImage
& mem_peimage
,
274 const base::win::PEImageAsData
& disk_peimage
,
275 uint8_t** mem_code_addr
,
276 uint8_t** disk_code_addr
,
277 uint32_t* code_size
) {
278 DWORD base_of_code
= mem_peimage
.GetNTHeaders()->OptionalHeader
.BaseOfCode
;
280 // Get the address and size of the code section in the loaded module image.
281 PIMAGE_SECTION_HEADER mem_code_header
=
282 mem_peimage
.GetImageSectionFromAddr(mem_peimage
.RVAToAddr(base_of_code
));
283 if (mem_code_header
== NULL
)
285 *mem_code_addr
= reinterpret_cast<uint8_t*>(
286 mem_peimage
.RVAToAddr(mem_code_header
->VirtualAddress
));
287 // If the section is padded with zeros when mapped then |VirtualSize| can be
288 // larger. Alternatively, |SizeOfRawData| can be rounded up to align
289 // according to OptionalHeader.FileAlignment.
290 *code_size
= std::min(mem_code_header
->Misc
.VirtualSize
,
291 mem_code_header
->SizeOfRawData
);
293 // Get the address of the code section in the module mapped as data from disk.
294 DWORD disk_code_offset
= 0;
295 if (!mem_peimage
.ImageAddrToOnDiskOffset(
296 reinterpret_cast<void*>(*mem_code_addr
), &disk_code_offset
))
299 reinterpret_cast<uint8_t*>(disk_peimage
.module()) + disk_code_offset
;
304 const wchar_t* module_name
,
305 ClientIncidentReport_EnvironmentData_Process_ModuleState
* module_state
,
306 int* num_bytes_different
) {
307 using ModuleState
= ClientIncidentReport_EnvironmentData_Process_ModuleState
;
308 *num_bytes_different
= 0;
309 module_state
->set_name(base::WideToUTF8(module_name
));
310 module_state
->set_modified_state(ModuleState::MODULE_STATE_UNKNOWN
);
312 // Get module handle, load a copy from disk as data and create PEImages.
313 HMODULE module_handle
= NULL
;
314 if (!GetModuleHandleEx(0, module_name
, &module_handle
))
316 base::ScopedNativeLibrary
native_library(module_handle
);
318 WCHAR module_path
[MAX_PATH
] = {};
320 GetModuleFileName(module_handle
, module_path
, arraysize(module_path
));
321 if (!length
|| length
== arraysize(module_path
))
324 base::MemoryMappedFile mapped_module
;
325 if (!mapped_module
.Initialize(base::FilePath(module_path
)))
327 ModuleVerificationState
state(
328 reinterpret_cast<HMODULE
>(const_cast<uint8
*>(mapped_module
.data())));
330 base::win::PEImage
mem_peimage(module_handle
);
331 if (!mem_peimage
.VerifyMagic() || !state
.disk_peimage
.VerifyMagic())
334 // Get the list of exports and sort them by address for efficient lookups.
335 mem_peimage
.EnumExports(EnumExportsCallback
, &state
.exports
);
336 std::sort(state
.exports
.begin(), state
.exports
.end());
338 // Get the addresses of the code sections then calculate |code_section_delta|
339 // and |image_base_delta|.
340 if (!GetCodeAddrsAndSize(mem_peimage
,
342 &state
.mem_code_addr
,
343 &state
.disk_code_addr
,
347 state
.module_state
= module_state
;
348 state
.last_mem_reloc_position
= state
.mem_code_addr
;
349 state
.last_disk_reloc_position
= state
.disk_code_addr
;
350 state
.code_section_delta
= state
.disk_code_addr
- state
.mem_code_addr
;
352 uint8_t* preferred_image_base
= reinterpret_cast<uint8_t*>(
353 state
.disk_peimage
.GetNTHeaders()->OptionalHeader
.ImageBase
);
354 state
.image_base_delta
=
355 preferred_image_base
- reinterpret_cast<uint8_t*>(mem_peimage
.module());
357 state
.last_mem_reloc_position
= state
.mem_code_addr
;
358 state
.last_disk_reloc_position
= state
.disk_code_addr
;
360 // Enumerate relocations and verify the bytes between them.
361 bool scan_complete
= mem_peimage
.EnumRelocs(EnumRelocsCallback
, &state
);
365 state
.code_size
- (state
.last_mem_reloc_position
- state
.mem_code_addr
);
366 // Inspect the last chunk spanning from the furthest relocation to the end
367 // of the code section.
368 state
.bytes_different
+= ExamineByteRangeDiff(
369 state
.last_disk_reloc_position
,
370 state
.last_mem_reloc_position
,
374 *num_bytes_different
= state
.bytes_different
;
376 // Report STATE_MODIFIED if any difference was found, regardless of whether or
377 // not the entire module was scanned. Report STATE_UNMODIFIED only if the
378 // entire module was scanned and understood.
379 if (state
.bytes_different
)
380 module_state
->set_modified_state(ModuleState::MODULE_STATE_MODIFIED
);
381 else if (!state
.unknown_reloc_type
&& scan_complete
)
382 module_state
->set_modified_state(ModuleState::MODULE_STATE_UNMODIFIED
);
384 return scan_complete
;
387 } // namespace safe_browsing