1 // Copyright (c) 2007, Google Inc.
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 // Author: Craig Silverstein
33 // The main purpose of this file is to patch the libc allocation
34 // routines (malloc and friends, but also _msize and other
35 // windows-specific libc-style routines). However, we also patch
36 // windows routines to do accounting. We do better at the former than
37 // the latter. Here are some comments from Paul Pluzhnikov about what
38 // it might take to do a really good job patching windows routines to
39 // keep track of memory usage:
41 // "You should intercept at least the following:
42 // HeapCreate HeapDestroy HeapAlloc HeapReAlloc HeapFree
43 // RtlCreateHeap RtlDestroyHeap RtlAllocateHeap RtlFreeHeap
44 // malloc calloc realloc free
45 // malloc_dbg calloc_dbg realloc_dbg free_dbg
46 // Some of these call the other ones (but not always), sometimes
47 // recursively (i.e. HeapCreate may call HeapAlloc on a different
50 // Since Paul didn't mention VirtualAllocEx, he may not have even been
51 // considering all the mmap-like functions that windows has (or he may
52 // just be ignoring it because he's seen we already patch it). Of the
53 // above, we do not patch the *_dbg functions, and of the windows
54 // functions, we only patch HeapAlloc and HeapFree.
56 // The *_dbg functions come into play with /MDd, /MTd, and /MLd,
57 // probably. It may be ok to just turn off tcmalloc in those cases --
58 // if the user wants the windows debug malloc, they probably don't
59 // want tcmalloc! We should also test with all of /MD, /MT, and /ML,
60 // which we're not currently doing.
62 // TODO(csilvers): try to do better here? Paul does conclude:
63 // "Keeping track of all of this was a nightmare."
66 # error You should only be including windows/patch_functions.cc in a windows environment!
71 #ifdef WIN32_OVERRIDE_ALLOCATORS
72 #error This file is intended for patching allocators - use override_functions.cc instead.
75 // We use psapi. Non-MSVC systems will have to link this in themselves.
77 #pragma comment(lib, "Psapi.lib")
80 // Make sure we always use the 'old' names of the psapi functions.
82 #define PSAPI_VERSION 1
87 #include <malloc.h> // for _msize and _expand
88 #include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc.
92 #include <base/logging.h>
93 #include "base/spinlock.h"
94 #include "gperftools/malloc_hook.h"
95 #include "malloc_hook-inl.h"
96 #include "preamble_patcher.h"
98 // The maximum number of modules we allow to be in one executable
99 const int kMaxModules
= 8182;
101 // These are hard-coded, unfortunately. :-( They are also probably
102 // compiler specific. See get_mangled_names.cc, in this directory,
103 // for instructions on how to update these names for your compiler.
104 const char kMangledNew
[] = "??2@YAPAXI@Z";
105 const char kMangledNewArray
[] = "??_U@YAPAXI@Z";
106 const char kMangledDelete
[] = "??3@YAXPAX@Z";
107 const char kMangledDeleteArray
[] = "??_V@YAXPAX@Z";
108 const char kMangledNewNothrow
[] = "??2@YAPAXIABUnothrow_t@std@@@Z";
109 const char kMangledNewArrayNothrow
[] = "??_U@YAPAXIABUnothrow_t@std@@@Z";
110 const char kMangledDeleteNothrow
[] = "??3@YAXPAXABUnothrow_t@std@@@Z";
111 const char kMangledDeleteArrayNothrow
[] = "??_V@YAXPAXABUnothrow_t@std@@@Z";
113 // This is an unused but exported symbol that we can use to tell the
114 // MSVC linker to bring in libtcmalloc, via the /INCLUDE linker flag.
115 // Without this, the linker will likely decide that libtcmalloc.dll
116 // doesn't add anything to the executable (since it does all its work
117 // through patching, which the linker can't see), and ignore it
118 // entirely. (The name 'tcmalloc' is already reserved for a
119 // namespace. I'd rather export a variable named "_tcmalloc", but I
120 // couldn't figure out how to get that to work. This function exports
121 // the symbol "__tcmalloc".)
122 extern "C" PERFTOOLS_DLL_DECL
void _tcmalloc();
125 // This is the version needed for windows x64, which has a different
126 // decoration scheme which doesn't auto-add a leading underscore.
127 extern "C" PERFTOOLS_DLL_DECL
void __tcmalloc();
128 void __tcmalloc() { }
130 namespace { // most everything here is in an unnamed namespace
132 typedef void (*GenericFnPtr
)();
134 using sidestep::PreamblePatcher
;
136 struct ModuleEntryCopy
; // defined below
138 // These functions are how we override the memory allocation
139 // functions, just like tcmalloc.cc and malloc_hook.cc do.
141 // This is information about the routines we're patching, for a given
142 // module that implements libc memory routines. A single executable
143 // can have several libc implementations running about (in different
144 // .dll's), and we need to patch/unpatch them all. This defines
145 // everything except the new functions we're patching in, which
146 // are defined in LibcFunctions, below.
150 memset(this, 0, sizeof(*this)); // easiest way to initialize the array
153 bool patched() const { return is_valid(); }
154 void set_is_valid(bool b
) { is_valid_
= b
; }
155 // According to http://msdn.microsoft.com/en-us/library/ms684229(VS.85).aspx:
156 // "The load address of a module (lpBaseOfDll) is the same as the HMODULE
158 HMODULE
hmodule() const {
159 return reinterpret_cast<HMODULE
>(const_cast<void*>(module_base_address_
));
162 // Populates all the windows_fn_[] vars based on our module info.
163 // Returns false if windows_fn_ is all NULL's, because there's
164 // nothing to patch. Also populates the rest of the module_entry
165 // info, such as the module's name.
166 bool PopulateWindowsFn(const ModuleEntryCopy
& module_entry
);
169 void CopyFrom(const LibcInfo
& that
) {
172 this->is_valid_
= that
.is_valid_
;
173 memcpy(this->windows_fn_
, that
.windows_fn_
, sizeof(windows_fn_
));
174 this->module_base_address_
= that
.module_base_address_
;
175 this->module_base_size_
= that
.module_base_size_
;
179 kMalloc
, kFree
, kRealloc
, kCalloc
,
180 kNew
, kNewArray
, kDelete
, kDeleteArray
,
181 kNewNothrow
, kNewArrayNothrow
, kDeleteNothrow
, kDeleteArrayNothrow
,
182 // These are windows-only functions from malloc.h
184 // A MS CRT "internal" function, implemented using _calloc_impl
189 // I'd like to put these together in a struct (perhaps in the
190 // subclass, so we can put in perftools_fn_ as well), but vc8 seems
191 // to have a bug where it doesn't initialize the struct properly if
192 // we try to take the address of a function that's not yet loaded
193 // from a dll, as is the common case for static_fn_. So we need
194 // each to be in its own array. :-(
195 static const char* const function_name_
[kNumFunctions
];
197 // This function is only used when statically linking the binary.
198 // In that case, loading malloc/etc from the dll (via
199 // PatchOneModule) won't work, since there are no dlls. Instead,
200 // you just want to be taking the address of malloc/etc directly.
201 // In the common, non-static-link case, these pointers will all be
202 // NULL, since this initializer runs before msvcrt.dll is loaded.
203 static const GenericFnPtr static_fn_
[kNumFunctions
];
205 // This is the address of the function we are going to patch
206 // (malloc, etc). Other info about the function is in the
207 // patch-specific subclasses, below.
208 GenericFnPtr windows_fn_
[kNumFunctions
];
210 // This is set to true when this structure is initialized (because
211 // we're patching a new library) and set to false when it's
212 // uninitialized (because we've freed that library).
215 const void *module_base_address_
;
216 size_t module_base_size_
;
219 // These shouldn't have to be public, since only subclasses of
220 // LibcInfo need it, but they do. Maybe something to do with
221 // templates. Shrug. I hide them down here so users won't see
222 // them. :-) (OK, I also need to define ctrgProcAddress late.)
223 bool is_valid() const { return is_valid_
; }
224 GenericFnPtr
windows_fn(int ifunction
) const {
225 return windows_fn_
[ifunction
];
227 // These three are needed by ModuleEntryCopy.
228 static const int ctrgProcAddress
= kNumFunctions
;
229 static GenericFnPtr
static_fn(int ifunction
) {
230 return static_fn_
[ifunction
];
232 static const char* const function_name(int ifunction
) {
233 return function_name_
[ifunction
];
237 // Template trickiness: logically, a LibcInfo would include
238 // Windows_malloc_, origstub_malloc_, and Perftools_malloc_: for a
239 // given module, these three go together. And in fact,
240 // Perftools_malloc_ may need to call origstub_malloc_, which means we
241 // either need to change Perftools_malloc_ to take origstub_malloc_ as
242 // an arugment -- unfortunately impossible since it needs to keep the
243 // same API as normal malloc -- or we need to write a different
244 // version of Perftools_malloc_ for each LibcInfo instance we create.
245 // We choose the second route, and use templates to implement it (we
246 // could have also used macros). So to get multiple versions
247 // of the struct, we say "struct<1> var1; struct<2> var2;". The price
248 // we pay is some code duplication, and more annoying, each instance
249 // of this var is a separate type.
250 template<int> class LibcInfoWithPatchFunctions
: public LibcInfo
{
252 // me_info should have had PopulateWindowsFn() called on it, so the
253 // module_* vars and windows_fn_ are set up.
254 bool Patch(const LibcInfo
& me_info
);
258 // This holds the original function contents after we patch the function.
259 // This has to be defined static in the subclass, because the perftools_fns
260 // reference origstub_fn_.
261 static GenericFnPtr origstub_fn_
[kNumFunctions
];
263 // This is the function we want to patch in
264 static const GenericFnPtr perftools_fn_
[kNumFunctions
];
266 static void* Perftools_malloc(size_t size
) __THROW
;
267 static void Perftools_free(void* ptr
) __THROW
;
268 static void* Perftools_realloc(void* ptr
, size_t size
) __THROW
;
269 static void* Perftools_calloc(size_t nmemb
, size_t size
) __THROW
;
270 static void* Perftools_new(size_t size
);
271 static void* Perftools_newarray(size_t size
);
272 static void Perftools_delete(void *ptr
);
273 static void Perftools_deletearray(void *ptr
);
274 static void* Perftools_new_nothrow(size_t size
,
275 const std::nothrow_t
&) __THROW
;
276 static void* Perftools_newarray_nothrow(size_t size
,
277 const std::nothrow_t
&) __THROW
;
278 static void Perftools_delete_nothrow(void *ptr
,
279 const std::nothrow_t
&) __THROW
;
280 static void Perftools_deletearray_nothrow(void *ptr
,
281 const std::nothrow_t
&) __THROW
;
282 static size_t Perftools__msize(void *ptr
) __THROW
;
283 static void* Perftools__expand(void *ptr
, size_t size
) __THROW
;
284 // malloc.h also defines these functions:
285 // _aligned_malloc, _aligned_free,
286 // _recalloc, _aligned_offset_malloc, _aligned_realloc, _aligned_recalloc
287 // _aligned_offset_realloc, _aligned_offset_recalloc, _malloca, _freea
288 // But they seem pretty obscure, and I'm fine not overriding them for now.
289 // It may be they all call into malloc/free anyway.
292 // This is a subset of MODDULEENTRY32, that we need for patching.
293 struct ModuleEntryCopy
{
294 LPVOID modBaseAddr
; // the same as hmodule
296 // This is not part of MODDULEENTRY32, but is needed to avoid making
297 // windows syscalls while we're holding patch_all_modules_lock (see
298 // lock-inversion comments at patch_all_modules_lock definition, below).
299 GenericFnPtr rgProcAddresses
[LibcInfo::ctrgProcAddress
];
304 for (int i
= 0; i
< sizeof(rgProcAddresses
)/sizeof(*rgProcAddresses
); i
++)
305 rgProcAddresses
[i
] = LibcInfo::static_fn(i
);
307 ModuleEntryCopy(const MODULEINFO
& mi
) {
308 this->modBaseAddr
= mi
.lpBaseOfDll
;
309 this->modBaseSize
= mi
.SizeOfImage
;
310 LPVOID modEndAddr
= (char*)mi
.lpBaseOfDll
+ mi
.SizeOfImage
;
311 for (int i
= 0; i
< sizeof(rgProcAddresses
)/sizeof(*rgProcAddresses
); i
++) {
312 FARPROC target
= ::GetProcAddress(
313 reinterpret_cast<const HMODULE
>(mi
.lpBaseOfDll
),
314 LibcInfo::function_name(i
));
315 // Sometimes a DLL forwards a function to a function in another
316 // DLL. We don't want to patch those forwarded functions --
317 // they'll get patched when the other DLL is processed.
318 if (target
>= modBaseAddr
&& target
< modEndAddr
)
319 rgProcAddresses
[i
] = (GenericFnPtr
)target
;
321 rgProcAddresses
[i
] = (GenericFnPtr
)NULL
;
326 // This class is easier because there's only one of them.
333 // TODO(csilvers): should we be patching GlobalAlloc/LocalAlloc instead,
334 // for pre-XP systems?
336 kHeapAlloc
, kHeapFree
, kVirtualAllocEx
, kVirtualFreeEx
,
337 kMapViewOfFileEx
, kUnmapViewOfFile
, kLoadLibraryExW
, kFreeLibrary
,
341 struct FunctionInfo
{
342 const char* const name
; // name of fn in a module (eg "malloc")
343 GenericFnPtr windows_fn
; // the fn whose name we call (&malloc)
344 GenericFnPtr origstub_fn
; // original fn contents after we patch
345 const GenericFnPtr perftools_fn
; // fn we want to patch in
348 static FunctionInfo function_info_
[kNumFunctions
];
350 // A Windows-API equivalent of malloc and free
351 static LPVOID WINAPI
Perftools_HeapAlloc(HANDLE hHeap
, DWORD dwFlags
,
353 static BOOL WINAPI
Perftools_HeapFree(HANDLE hHeap
, DWORD dwFlags
,
355 // A Windows-API equivalent of mmap and munmap, for "anonymous regions"
356 static LPVOID WINAPI
Perftools_VirtualAllocEx(HANDLE process
, LPVOID address
,
357 SIZE_T size
, DWORD type
,
359 static BOOL WINAPI
Perftools_VirtualFreeEx(HANDLE process
, LPVOID address
,
360 SIZE_T size
, DWORD type
);
361 // A Windows-API equivalent of mmap and munmap, for actual files
362 static LPVOID WINAPI
Perftools_MapViewOfFileEx(HANDLE hFileMappingObject
,
363 DWORD dwDesiredAccess
,
364 DWORD dwFileOffsetHigh
,
365 DWORD dwFileOffsetLow
,
366 SIZE_T dwNumberOfBytesToMap
,
367 LPVOID lpBaseAddress
);
368 static BOOL WINAPI
Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress
);
369 // We don't need the other 3 variants because they all call this one. */
370 static HMODULE WINAPI
Perftools_LoadLibraryExW(LPCWSTR lpFileName
,
373 static BOOL WINAPI
Perftools_FreeLibrary(HMODULE hLibModule
);
376 // If you run out, just add a few more to the array. You'll also need
377 // to update the switch statement in PatchOneModule(), and the list in
378 // UnpatchWindowsFunctions().
379 // main_executable and main_executable_windows are two windows into
380 // the same executable. One is responsible for patching the libc
381 // routines that live in the main executable (if any) to use tcmalloc;
382 // the other is responsible for patching the windows routines like
383 // HeapAlloc/etc to use tcmalloc.
384 static LibcInfoWithPatchFunctions
<0> main_executable
;
385 static LibcInfoWithPatchFunctions
<1> libc1
;
386 static LibcInfoWithPatchFunctions
<2> libc2
;
387 static LibcInfoWithPatchFunctions
<3> libc3
;
388 static LibcInfoWithPatchFunctions
<4> libc4
;
389 static LibcInfoWithPatchFunctions
<5> libc5
;
390 static LibcInfoWithPatchFunctions
<6> libc6
;
391 static LibcInfoWithPatchFunctions
<7> libc7
;
392 static LibcInfoWithPatchFunctions
<8> libc8
;
393 static LibcInfo
* g_module_libcs
[] = {
394 &libc1
, &libc2
, &libc3
, &libc4
, &libc5
, &libc6
, &libc7
, &libc8
396 static WindowsInfo main_executable_windows
;
398 const char* const LibcInfo::function_name_
[] = {
399 "malloc", "free", "realloc", "calloc",
400 kMangledNew
, kMangledNewArray
, kMangledDelete
, kMangledDeleteArray
,
401 // Ideally we should patch the nothrow versions of new/delete, but
402 // at least in msvcrt, nothrow-new machine-code is of a type we
403 // can't patch. Since these are relatively rare, I'm hoping it's ok
404 // not to patch them. (NULL name turns off patching.)
405 NULL
, // kMangledNewNothrow,
406 NULL
, // kMangledNewArrayNothrow,
407 NULL
, // kMangledDeleteNothrow,
408 NULL
, // kMangledDeleteArrayNothrow,
409 "_msize", "_expand", "_calloc_crt",
412 // For mingw, I can't patch the new/delete here, because the
413 // instructions are too small to patch. Luckily, they're so small
414 // because all they do is call into malloc/free, so they still end up
415 // calling tcmalloc routines, and we don't actually lose anything
416 // (except maybe some stacktrace goodness) by not patching.
417 const GenericFnPtr
LibcInfo::static_fn_
[] = {
418 (GenericFnPtr
)&::malloc
,
419 (GenericFnPtr
)&::free
,
420 (GenericFnPtr
)&::realloc
,
421 (GenericFnPtr
)&::calloc
,
423 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
425 (GenericFnPtr
)(void*(*)(size_t))&::operator new,
426 (GenericFnPtr
)(void*(*)(size_t))&::operator new[],
427 (GenericFnPtr
)(void(*)(void*))&::operator delete,
428 (GenericFnPtr
)(void(*)(void*))&::operator delete[],
430 (void*(*)(size_t, struct std::nothrow_t
const &))&::operator new,
432 (void*(*)(size_t, struct std::nothrow_t
const &))&::operator new[],
434 (void(*)(void*, struct std::nothrow_t
const &))&::operator delete,
436 (void(*)(void*, struct std::nothrow_t
const &))&::operator delete[],
438 (GenericFnPtr
)&::_msize
,
439 (GenericFnPtr
)&::_expand
,
440 (GenericFnPtr
)&::calloc
,
443 template<int T
> GenericFnPtr LibcInfoWithPatchFunctions
<T
>::origstub_fn_
[] = {
444 // This will get filled in at run-time, as patching is done.
448 const GenericFnPtr LibcInfoWithPatchFunctions
<T
>::perftools_fn_
[] = {
449 (GenericFnPtr
)&Perftools_malloc
,
450 (GenericFnPtr
)&Perftools_free
,
451 (GenericFnPtr
)&Perftools_realloc
,
452 (GenericFnPtr
)&Perftools_calloc
,
453 (GenericFnPtr
)&Perftools_new
,
454 (GenericFnPtr
)&Perftools_newarray
,
455 (GenericFnPtr
)&Perftools_delete
,
456 (GenericFnPtr
)&Perftools_deletearray
,
457 (GenericFnPtr
)&Perftools_new_nothrow
,
458 (GenericFnPtr
)&Perftools_newarray_nothrow
,
459 (GenericFnPtr
)&Perftools_delete_nothrow
,
460 (GenericFnPtr
)&Perftools_deletearray_nothrow
,
461 (GenericFnPtr
)&Perftools__msize
,
462 (GenericFnPtr
)&Perftools__expand
,
463 (GenericFnPtr
)&Perftools_calloc
,
466 /*static*/ WindowsInfo::FunctionInfo
WindowsInfo::function_info_
[] = {
467 { "HeapAlloc", NULL
, NULL
, (GenericFnPtr
)&Perftools_HeapAlloc
},
468 { "HeapFree", NULL
, NULL
, (GenericFnPtr
)&Perftools_HeapFree
},
469 { "VirtualAllocEx", NULL
, NULL
, (GenericFnPtr
)&Perftools_VirtualAllocEx
},
470 { "VirtualFreeEx", NULL
, NULL
, (GenericFnPtr
)&Perftools_VirtualFreeEx
},
471 { "MapViewOfFileEx", NULL
, NULL
, (GenericFnPtr
)&Perftools_MapViewOfFileEx
},
472 { "UnmapViewOfFile", NULL
, NULL
, (GenericFnPtr
)&Perftools_UnmapViewOfFile
},
473 { "LoadLibraryExW", NULL
, NULL
, (GenericFnPtr
)&Perftools_LoadLibraryExW
},
474 { "FreeLibrary", NULL
, NULL
, (GenericFnPtr
)&Perftools_FreeLibrary
},
477 bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy
& module_entry
) {
478 // First, store the location of the function to patch before
479 // patching it. If none of these functions are found in the module,
480 // then this module has no libc in it, and we just return false.
481 for (int i
= 0; i
< kNumFunctions
; i
++) {
482 if (!function_name_
[i
]) // we can turn off patching by unsetting name
484 // The ::GetProcAddress calls were done in the ModuleEntryCopy
485 // constructor, so we don't have to make any windows calls here.
486 const GenericFnPtr fn
= module_entry
.rgProcAddresses
[i
];
488 windows_fn_
[i
] = PreamblePatcher::ResolveTarget(fn
);
492 // Some modules use the same function pointer for new and new[]. If
493 // we find that, set one of the pointers to NULL so we don't double-
494 // patch. Same may happen with new and nothrow-new, or even new[]
495 // and nothrow-new. It's easiest just to check each fn-ptr against
497 for (int i
= 0; i
< kNumFunctions
; i
++) {
498 for (int j
= i
+1; j
< kNumFunctions
; j
++) {
499 if (windows_fn_
[i
] == windows_fn_
[j
]) {
500 // We NULL the later one (j), so as to minimize the chances we
501 // NULL kFree and kRealloc. See comments below. This is fragile!
502 windows_fn_
[j
] = NULL
;
507 // There's always a chance that our module uses the same function
508 // as another module that we've already loaded. In that case, we
509 // need to set our windows_fn to NULL, to avoid double-patching.
510 for (int ifn
= 0; ifn
< kNumFunctions
; ifn
++) {
512 imod
< sizeof(g_module_libcs
)/sizeof(*g_module_libcs
); imod
++) {
513 if (g_module_libcs
[imod
]->is_valid() &&
514 this->windows_fn(ifn
) == g_module_libcs
[imod
]->windows_fn(ifn
)) {
515 windows_fn_
[ifn
] = NULL
;
520 bool found_non_null
= false;
521 for (int i
= 0; i
< kNumFunctions
; i
++) {
523 found_non_null
= true;
528 // It's important we didn't NULL out windows_fn_[kFree] or [kRealloc].
529 // The reason is, if those are NULL-ed out, we'll never patch them
530 // and thus never get an origstub_fn_ value for them, and when we
531 // try to call origstub_fn_[kFree/kRealloc] in Perftools_free and
532 // Perftools_realloc, below, it will fail. We could work around
533 // that by adding a pointer from one patch-unit to the other, but we
534 // haven't needed to yet.
535 CHECK(windows_fn_
[kFree
]);
536 CHECK(windows_fn_
[kRealloc
]);
538 // OK, we successfully populated. Let's store our member information.
539 module_base_address_
= module_entry
.modBaseAddr
;
540 module_base_size_
= module_entry
.modBaseSize
;
545 bool LibcInfoWithPatchFunctions
<T
>::Patch(const LibcInfo
& me_info
) {
546 CopyFrom(me_info
); // copies the module_entry and the windows_fn_ array
547 for (int i
= 0; i
< kNumFunctions
; i
++) {
548 if (windows_fn_
[i
] && windows_fn_
[i
] != perftools_fn_
[i
]) {
549 // if origstub_fn_ is not NULL, it's left around from a previous
550 // patch. We need to set it to NULL for the new Patch call.
551 // Since we've patched Unpatch() not to delete origstub_fn_ (it
552 // causes problems in some contexts, though obviously not this
553 // one), we should delete it now, before setting it to NULL.
554 // NOTE: casting from a function to a pointer is contra the C++
555 // spec. It's not safe on IA64, but is on i386. We use
556 // a C-style cast here to emphasize this is not legal C++.
557 delete[] (char*)(origstub_fn_
[i
]);
558 origstub_fn_
[i
] = NULL
; // Patch() will fill this in
559 CHECK_EQ(sidestep::SIDESTEP_SUCCESS
,
560 PreamblePatcher::Patch(windows_fn_
[i
], perftools_fn_
[i
],
569 void LibcInfoWithPatchFunctions
<T
>::Unpatch() {
570 // We have to cast our GenericFnPtrs to void* for unpatch. This is
571 // contra the C++ spec; we use C-style casts to empahsize that.
572 for (int i
= 0; i
< kNumFunctions
; i
++) {
574 CHECK_EQ(sidestep::SIDESTEP_SUCCESS
,
575 PreamblePatcher::Unpatch((void*)windows_fn_
[i
],
576 (void*)perftools_fn_
[i
],
577 (void*)origstub_fn_
[i
]));
582 void WindowsInfo::Patch() {
583 HMODULE hkernel32
= ::GetModuleHandleA("kernel32");
584 CHECK_NE(hkernel32
, NULL
);
586 // Unlike for libc, we know these exist in our module, so we can get
587 // and patch at the same time.
588 for (int i
= 0; i
< kNumFunctions
; i
++) {
589 function_info_
[i
].windows_fn
= (GenericFnPtr
)
590 ::GetProcAddress(hkernel32
, function_info_
[i
].name
);
591 // If origstub_fn is not NULL, it's left around from a previous
592 // patch. We need to set it to NULL for the new Patch call.
593 // Since we've patched Unpatch() not to delete origstub_fn_ (it
594 // causes problems in some contexts, though obviously not this
595 // one), we should delete it now, before setting it to NULL.
596 // NOTE: casting from a function to a pointer is contra the C++
597 // spec. It's not safe on IA64, but is on i386. We use
598 // a C-style cast here to emphasize this is not legal C++.
599 delete[] (char*)(function_info_
[i
].origstub_fn
);
600 function_info_
[i
].origstub_fn
= NULL
; // Patch() will fill this in
601 CHECK_EQ(sidestep::SIDESTEP_SUCCESS
,
602 PreamblePatcher::Patch(function_info_
[i
].windows_fn
,
603 function_info_
[i
].perftools_fn
,
604 &function_info_
[i
].origstub_fn
));
608 void WindowsInfo::Unpatch() {
609 // We have to cast our GenericFnPtrs to void* for unpatch. This is
610 // contra the C++ spec; we use C-style casts to empahsize that.
611 for (int i
= 0; i
< kNumFunctions
; i
++) {
612 CHECK_EQ(sidestep::SIDESTEP_SUCCESS
,
613 PreamblePatcher::Unpatch((void*)function_info_
[i
].windows_fn
,
614 (void*)function_info_
[i
].perftools_fn
,
615 (void*)function_info_
[i
].origstub_fn
));
619 // You should hold the patch_all_modules_lock when calling this.
620 void PatchOneModuleLocked(const LibcInfo
& me_info
) {
621 // If we don't already have info on this module, let's add it. This
622 // is where we're sad that each libcX has a different type, so we
623 // can't use an array; instead, we have to use a switch statement.
624 // Patch() returns false if there were no libc functions in the module.
625 for (int i
= 0; i
< sizeof(g_module_libcs
)/sizeof(*g_module_libcs
); i
++) {
626 if (!g_module_libcs
[i
]->is_valid()) { // found an empty spot to add!
628 case 0: libc1
.Patch(me_info
); return;
629 case 1: libc2
.Patch(me_info
); return;
630 case 2: libc3
.Patch(me_info
); return;
631 case 3: libc4
.Patch(me_info
); return;
632 case 4: libc5
.Patch(me_info
); return;
633 case 5: libc6
.Patch(me_info
); return;
634 case 6: libc7
.Patch(me_info
); return;
635 case 7: libc8
.Patch(me_info
); return;
639 printf("PERFTOOLS ERROR: Too many modules containing libc in this executable\n");
642 void PatchMainExecutableLocked() {
643 if (main_executable
.patched())
644 return; // main executable has already been patched
645 ModuleEntryCopy fake_module_entry
; // make a fake one to pass into Patch()
646 // No need to call PopulateModuleEntryProcAddresses on the main executable.
647 main_executable
.PopulateWindowsFn(fake_module_entry
);
648 main_executable
.Patch(main_executable
);
651 // This lock is subject to a subtle and annoying lock inversion
652 // problem: it may interact badly with unknown internal windows locks.
653 // In particular, windows may be holding a lock when it calls
654 // LoadLibraryExW and FreeLibrary, which we've patched. We have those
655 // routines call PatchAllModules, which acquires this lock. If we
656 // make windows system calls while holding this lock, those system
657 // calls may need the internal windows locks that are being held in
658 // the call to LoadLibraryExW, resulting in deadlock. The solution is
659 // to be very careful not to call *any* windows routines while holding
660 // patch_all_modules_lock, inside PatchAllModules().
661 static SpinLock
patch_all_modules_lock(SpinLock::LINKER_INITIALIZED
);
663 // last_loaded: The set of modules that were loaded the last time
664 // PatchAllModules was called. This is an optimization for only
665 // looking at modules that were added or removed from the last call.
666 static std::set
<HMODULE
> *g_last_loaded
;
668 // Iterates over all the modules currently loaded by the executable,
669 // according to windows, and makes sure they're all patched. Most
670 // modules will already be in loaded_modules, meaning we have already
671 // loaded and either patched them or determined they did not need to
672 // be patched. Others will not, which means we need to patch them
673 // (if necessary). Finally, we have to go through the existing
674 // g_module_libcs and see if any of those are *not* in the modules
675 // currently loaded by the executable. If so, we need to invalidate
676 // them. Returns true if we did any work (patching or invalidating),
677 // false if we were a noop. May update loaded_modules as well.
678 // NOTE: you must hold the patch_all_modules_lock to access loaded_modules.
679 bool PatchAllModules() {
680 std::vector
<ModuleEntryCopy
> modules
;
681 bool made_changes
= false;
683 const HANDLE hCurrentProcess
= GetCurrentProcess();
684 DWORD num_modules
= 0;
685 HMODULE hModules
[kMaxModules
]; // max # of modules we support in one process
686 if (!::EnumProcessModules(hCurrentProcess
, hModules
, sizeof(hModules
),
690 // EnumProcessModules actually set the bytes written into hModules,
691 // so we need to divide to make num_modules actually be a module-count.
692 num_modules
/= sizeof(*hModules
);
693 if (num_modules
>= kMaxModules
) {
694 printf("PERFTOOLS ERROR: Too many modules in this executable to try"
695 " to patch them all (if you need to, raise kMaxModules in"
696 " patch_functions.cc).\n");
697 num_modules
= kMaxModules
;
700 // Now we handle the unpatching of modules we have in g_module_libcs
701 // but that were not found in EnumProcessModules. We need to
702 // invalidate them. To speed that up, we store the EnumProcessModules
704 // At the same time, we prepare for the adding of new modules, by
705 // removing from hModules all the modules we know we've already
706 // patched (or decided don't need to be patched). At the end,
707 // hModules will hold only the modules that we need to consider patching.
708 std::set
<HMODULE
> currently_loaded_modules
;
710 SpinLockHolder
h(&patch_all_modules_lock
);
711 if (!g_last_loaded
) g_last_loaded
= new std::set
<HMODULE
>;
712 // At the end of this loop, currently_loaded_modules contains the
713 // full list of EnumProcessModules, and hModules just the ones we
714 // haven't handled yet.
715 for (int i
= 0; i
< num_modules
; ) {
716 currently_loaded_modules
.insert(hModules
[i
]);
717 if (g_last_loaded
->count(hModules
[i
]) > 0) {
718 hModules
[i
] = hModules
[--num_modules
]; // replace element i with tail
720 i
++; // keep element i
723 // Now we do the unpatching/invalidation.
724 for (int i
= 0; i
< sizeof(g_module_libcs
)/sizeof(*g_module_libcs
); i
++) {
725 if (g_module_libcs
[i
]->patched() &&
726 currently_loaded_modules
.count(g_module_libcs
[i
]->hmodule()) == 0) {
727 // Means g_module_libcs[i] is no longer loaded (no me32 matched).
728 // We could call Unpatch() here, but why bother? The module
729 // has gone away, so nobody is going to call into it anyway.
730 g_module_libcs
[i
]->set_is_valid(false);
734 // Update the loaded module cache.
735 g_last_loaded
->swap(currently_loaded_modules
);
738 // Now that we know what modules are new, let's get the info we'll
739 // need to patch them. Note this *cannot* be done while holding the
740 // lock, since it needs to make windows calls (see the lock-inversion
741 // comments before the definition of patch_all_modules_lock).
743 for (int i
= 0; i
< num_modules
; i
++) {
744 if (::GetModuleInformation(hCurrentProcess
, hModules
[i
], &mi
, sizeof(mi
)))
745 modules
.push_back(ModuleEntryCopy(mi
));
748 // Now we can do the patching of new modules.
750 SpinLockHolder
h(&patch_all_modules_lock
);
751 for (std::vector
<ModuleEntryCopy
>::iterator it
= modules
.begin();
752 it
!= modules
.end(); ++it
) {
754 if (libc_info
.PopulateWindowsFn(*it
)) { // true==module has libc routines
755 PatchOneModuleLocked(libc_info
);
760 // Now that we've dealt with the modules (dlls), update the main
761 // executable. We do this last because PatchMainExecutableLocked
762 // wants to look at how other modules were patched.
763 if (!main_executable
.patched()) {
764 PatchMainExecutableLocked();
768 // TODO(csilvers): for this to be reliable, we need to also take
769 // into account if we *would* have patched any modules had they not
770 // already been loaded. (That is, made_changes should ignore
776 } // end unnamed namespace
778 // ---------------------------------------------------------------------
779 // Now that we've done all the patching machinery, let's actually
780 // define the functions we're patching in. Mostly these are
781 // simple wrappers around the do_* routines in tcmalloc.cc.
783 // In fact, we #include tcmalloc.cc to get at the tcmalloc internal
784 // do_* functions, the better to write our own hook functions.
785 // U-G-L-Y, I know. But the alternatives are, perhaps, worse. This
786 // also lets us define _msize(), _expand(), and other windows-specific
787 // functions here, using tcmalloc internals, without polluting
789 // -------------------------------------------------------------------
791 // TODO(csilvers): refactor tcmalloc.cc into two files, so I can link
792 // against the file with do_malloc, and ignore the one with malloc.
793 #include "tcmalloc.cc"
796 void* LibcInfoWithPatchFunctions
<T
>::Perftools_malloc(size_t size
) __THROW
{
797 void* result
= do_malloc_or_cpp_alloc(size
);
798 MallocHook::InvokeNewHook(result
, size
);
803 void LibcInfoWithPatchFunctions
<T
>::Perftools_free(void* ptr
) __THROW
{
804 MallocHook::InvokeDeleteHook(ptr
);
805 // This calls the windows free if do_free decides ptr was not
806 // allocated by tcmalloc. Note it calls the origstub_free from
807 // *this* templatized instance of LibcInfo. See "template
808 // trickiness" above.
809 do_free_with_callback(ptr
, (void (*)(void*))origstub_fn_
[kFree
]);
813 void* LibcInfoWithPatchFunctions
<T
>::Perftools_realloc(
814 void* old_ptr
, size_t new_size
) __THROW
{
815 if (old_ptr
== NULL
) {
816 void* result
= do_malloc_or_cpp_alloc(new_size
);
817 MallocHook::InvokeNewHook(result
, new_size
);
821 MallocHook::InvokeDeleteHook(old_ptr
);
822 do_free_with_callback(old_ptr
,
823 (void (*)(void*))origstub_fn_
[kFree
]);
826 return do_realloc_with_callback(
828 (void (*)(void*))origstub_fn_
[kFree
],
829 (size_t (*)(const void*))origstub_fn_
[k_Msize
]);
833 void* LibcInfoWithPatchFunctions
<T
>::Perftools_calloc(
834 size_t n
, size_t elem_size
) __THROW
{
835 void* result
= do_calloc(n
, elem_size
);
836 MallocHook::InvokeNewHook(result
, n
* elem_size
);
841 void* LibcInfoWithPatchFunctions
<T
>::Perftools_new(size_t size
) {
842 void* p
= cpp_alloc(size
, false);
843 MallocHook::InvokeNewHook(p
, size
);
848 void* LibcInfoWithPatchFunctions
<T
>::Perftools_newarray(size_t size
) {
849 void* p
= cpp_alloc(size
, false);
850 MallocHook::InvokeNewHook(p
, size
);
855 void LibcInfoWithPatchFunctions
<T
>::Perftools_delete(void *p
) {
856 MallocHook::InvokeDeleteHook(p
);
857 do_free_with_callback(p
, (void (*)(void*))origstub_fn_
[kFree
]);
861 void LibcInfoWithPatchFunctions
<T
>::Perftools_deletearray(void *p
) {
862 MallocHook::InvokeDeleteHook(p
);
863 do_free_with_callback(p
, (void (*)(void*))origstub_fn_
[kFree
]);
867 void* LibcInfoWithPatchFunctions
<T
>::Perftools_new_nothrow(
868 size_t size
, const std::nothrow_t
&) __THROW
{
869 void* p
= cpp_alloc(size
, true);
870 MallocHook::InvokeNewHook(p
, size
);
875 void* LibcInfoWithPatchFunctions
<T
>::Perftools_newarray_nothrow(
876 size_t size
, const std::nothrow_t
&) __THROW
{
877 void* p
= cpp_alloc(size
, true);
878 MallocHook::InvokeNewHook(p
, size
);
883 void LibcInfoWithPatchFunctions
<T
>::Perftools_delete_nothrow(
884 void *p
, const std::nothrow_t
&) __THROW
{
885 MallocHook::InvokeDeleteHook(p
);
886 do_free_with_callback(p
, (void (*)(void*))origstub_fn_
[kFree
]);
890 void LibcInfoWithPatchFunctions
<T
>::Perftools_deletearray_nothrow(
891 void *p
, const std::nothrow_t
&) __THROW
{
892 MallocHook::InvokeDeleteHook(p
);
893 do_free_with_callback(p
, (void (*)(void*))origstub_fn_
[kFree
]);
897 // _msize() lets you figure out how much space is reserved for a
898 // pointer, in Windows. Even if applications don't call it, any DLL
899 // with global constructors will call (transitively) something called
900 // __dllonexit_lk in order to make sure the destructors get called
901 // when the dll unloads. And that will call msize -- horrible things
902 // can ensue if this is not hooked. Other parts of libc may also call
906 size_t LibcInfoWithPatchFunctions
<T
>::Perftools__msize(void* ptr
) __THROW
{
907 return GetSizeWithCallback(ptr
, (size_t (*)(const void*))origstub_fn_
[k_Msize
]);
910 // We need to define this because internal windows functions like to
911 // call into it(?). _expand() is like realloc but doesn't move the
912 // pointer. We punt, which will cause callers to fall back on realloc.
914 void* LibcInfoWithPatchFunctions
<T
>::Perftools__expand(void *ptr
,
915 size_t size
) __THROW
{
919 LPVOID WINAPI
WindowsInfo::Perftools_HeapAlloc(HANDLE hHeap
, DWORD dwFlags
,
921 LPVOID result
= ((LPVOID (WINAPI
*)(HANDLE
, DWORD
, DWORD_PTR
))
922 function_info_
[kHeapAlloc
].origstub_fn
)(
923 hHeap
, dwFlags
, dwBytes
);
924 MallocHook::InvokeNewHook(result
, dwBytes
);
928 BOOL WINAPI
WindowsInfo::Perftools_HeapFree(HANDLE hHeap
, DWORD dwFlags
,
930 MallocHook::InvokeDeleteHook(lpMem
);
931 return ((BOOL (WINAPI
*)(HANDLE
, DWORD
, LPVOID
))
932 function_info_
[kHeapFree
].origstub_fn
)(
933 hHeap
, dwFlags
, lpMem
);
936 LPVOID WINAPI
WindowsInfo::Perftools_VirtualAllocEx(HANDLE process
,
938 SIZE_T size
, DWORD type
,
940 LPVOID result
= ((LPVOID (WINAPI
*)(HANDLE
, LPVOID
, SIZE_T
, DWORD
, DWORD
))
941 function_info_
[kVirtualAllocEx
].origstub_fn
)(
942 process
, address
, size
, type
, protect
);
943 // VirtualAllocEx() seems to be the Windows equivalent of mmap()
944 MallocHook::InvokeMmapHook(result
, address
, size
, protect
, type
, -1, 0);
948 BOOL WINAPI
WindowsInfo::Perftools_VirtualFreeEx(HANDLE process
, LPVOID address
,
949 SIZE_T size
, DWORD type
) {
950 MallocHook::InvokeMunmapHook(address
, size
);
951 return ((BOOL (WINAPI
*)(HANDLE
, LPVOID
, SIZE_T
, DWORD
))
952 function_info_
[kVirtualFreeEx
].origstub_fn
)(
953 process
, address
, size
, type
);
956 LPVOID WINAPI
WindowsInfo::Perftools_MapViewOfFileEx(
957 HANDLE hFileMappingObject
, DWORD dwDesiredAccess
, DWORD dwFileOffsetHigh
,
958 DWORD dwFileOffsetLow
, SIZE_T dwNumberOfBytesToMap
, LPVOID lpBaseAddress
) {
959 // For this function pair, you always deallocate the full block of
960 // data that you allocate, so NewHook/DeleteHook is the right API.
961 LPVOID result
= ((LPVOID (WINAPI
*)(HANDLE
, DWORD
, DWORD
, DWORD
,
963 function_info_
[kMapViewOfFileEx
].origstub_fn
)(
964 hFileMappingObject
, dwDesiredAccess
, dwFileOffsetHigh
,
965 dwFileOffsetLow
, dwNumberOfBytesToMap
, lpBaseAddress
);
966 MallocHook::InvokeNewHook(result
, dwNumberOfBytesToMap
);
970 BOOL WINAPI
WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress
) {
971 MallocHook::InvokeDeleteHook(lpBaseAddress
);
972 return ((BOOL (WINAPI
*)(LPCVOID
))
973 function_info_
[kUnmapViewOfFile
].origstub_fn
)(
977 // g_load_map holds a copy of windows' refcount for how many times
978 // each currently loaded module has been loaded and unloaded. We use
979 // it as an optimization when the same module is loaded more than
980 // once: as long as the refcount stays above 1, we don't need to worry
981 // about patching because it's already patched. Likewise, we don't
982 // need to unpatch until the refcount drops to 0. load_map is
983 // maintained in LoadLibraryExW and FreeLibrary, and only covers
984 // modules explicitly loaded/freed via those interfaces.
985 static std::map
<HMODULE
, int>* g_load_map
= NULL
;
987 HMODULE WINAPI
WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName
,
991 // Check to see if the modules is already loaded, flag 0 gets a
992 // reference if it was loaded. If it was loaded no need to call
993 // PatchAllModules, just increase the reference count to match
994 // what GetModuleHandleExW does internally inside windows.
995 if (::GetModuleHandleExW(0, lpFileName
, &rv
)) {
998 // Not already loaded, so load it.
999 rv
= ((HMODULE (WINAPI
*)(LPCWSTR
, HANDLE
, DWORD
))
1000 function_info_
[kLoadLibraryExW
].origstub_fn
)(
1001 lpFileName
, hFile
, dwFlags
);
1002 // This will patch any newly loaded libraries, if patching needs
1010 BOOL WINAPI
WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule
) {
1011 BOOL rv
= ((BOOL (WINAPI
*)(HMODULE
))
1012 function_info_
[kFreeLibrary
].origstub_fn
)(hLibModule
);
1014 // Check to see if the module is still loaded by passing the base
1015 // address and seeing if it comes back with the same address. If it
1016 // is the same address it's still loaded, so the FreeLibrary() call
1017 // was a noop, and there's no need to redo the patching.
1018 HMODULE owner
= NULL
;
1019 BOOL result
= ::GetModuleHandleExW(
1020 (GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
|
1021 GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT
),
1022 (LPCWSTR
)hLibModule
,
1024 if (result
&& owner
== hLibModule
)
1027 PatchAllModules(); // this will fix up the list of patched libraries
1032 // ---------------------------------------------------------------------
1033 // PatchWindowsFunctions()
1034 // This is the function that is exposed to the outside world.
1035 // It should be called before the program becomes multi-threaded,
1036 // since main_executable_windows.Patch() is not thread-safe.
1037 // ---------------------------------------------------------------------
1039 void PatchWindowsFunctions() {
1040 // This does the libc patching in every module, and the main executable.
1042 main_executable_windows
.Patch();
1046 // It's possible to unpatch all the functions when we are exiting.
1048 // The idea is to handle properly windows-internal data that is
1049 // allocated before PatchWindowsFunctions is called. If all
1050 // destruction happened in reverse order from construction, then we
1051 // could call UnpatchWindowsFunctions at just the right time, so that
1052 // that early-allocated data would be freed using the windows
1053 // allocation functions rather than tcmalloc. The problem is that
1054 // windows allocates some structures lazily, so it would allocate them
1055 // late (using tcmalloc) and then try to deallocate them late as well.
1056 // So instead of unpatching, we just modify all the tcmalloc routines
1057 // so they call through to the libc rountines if the memory in
1058 // question doesn't seem to have been allocated with tcmalloc. I keep
1059 // this unpatch code around for reference.
1061 void UnpatchWindowsFunctions() {
1062 // We need to go back to the system malloc/etc at global destruct time,
1063 // so objects that were constructed before tcmalloc, using the system
1064 // malloc, can destroy themselves using the system free. This depends
1065 // on DLLs unloading in the reverse order in which they load!
1067 // We also go back to the default HeapAlloc/etc, just for consistency.
1068 // Who knows, it may help avoid weird bugs in some situations.
1069 main_executable_windows
.Unpatch();
1070 main_executable
.Unpatch();
1071 if (libc1
.is_valid()) libc1
.Unpatch();
1072 if (libc2
.is_valid()) libc2
.Unpatch();
1073 if (libc3
.is_valid()) libc3
.Unpatch();
1074 if (libc4
.is_valid()) libc4
.Unpatch();
1075 if (libc5
.is_valid()) libc5
.Unpatch();
1076 if (libc6
.is_valid()) libc6
.Unpatch();
1077 if (libc7
.is_valid()) libc7
.Unpatch();
1078 if (libc8
.is_valid()) libc8
.Unpatch();