1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 // Author: Sanjay Ghemawat
34 #include <errno.h> // for EAGAIN, errno
35 #include <fcntl.h> // for open, O_RDWR
36 #include <stddef.h> // for size_t, NULL, ptrdiff_t
37 #if defined HAVE_STDINT_H
38 #include <stdint.h> // for uintptr_t, intptr_t
39 #elif defined HAVE_INTTYPES_H
42 #include <sys/types.h>
45 #include <sys/mman.h> // for munmap, mmap, MADV_DONTNEED, etc
48 #include <unistd.h> // for sbrk, getpagesize, off_t
50 #include <new> // for operator new
51 #include <gperftools/malloc_extension.h>
52 #include "base/basictypes.h"
53 #include "base/commandlineflags.h"
54 #include "base/spinlock.h" // for SpinLockHolder, SpinLock, etc
56 #include "internal_logging.h"
58 // On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
59 // form of the name instead.
61 # define MAP_ANONYMOUS MAP_ANON
64 // MADV_FREE is specifically designed for use by malloc(), but only
65 // FreeBSD supports it; in linux we fall back to the somewhat inferior
67 #if !defined(MADV_FREE) && defined(MADV_DONTNEED)
68 # define MADV_FREE MADV_DONTNEED
71 // Solaris has a bug where it doesn't declare madvise() for C++.
72 // http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0
73 #if defined(__sun) && defined(__SVR4)
74 # include <sys/types.h> // for caddr_t
75 extern "C" { extern int madvise(caddr_t
, size_t, int); }
78 // Set kDebugMode mode so that we can have use C++ conditionals
79 // instead of preprocessor conditionals.
81 static const bool kDebugMode
= false;
83 static const bool kDebugMode
= true;
86 // TODO(sanjay): Move the code below into the tcmalloc namespace
90 // Anonymous namespace to avoid name conflicts on "CheckAddressBits".
93 // Check that no bit is set at position ADDRESS_BITS or higher.
94 template <int ADDRESS_BITS
> bool CheckAddressBits(uintptr_t ptr
) {
95 return (ptr
>> ADDRESS_BITS
) == 0;
98 // Specialize for the bit width of a pointer to avoid undefined shift.
99 template <> bool CheckAddressBits
<8 * sizeof(void*)>(uintptr_t ptr
) {
103 #if (defined(OS_LINUX) || defined(OS_CHROMEOS)) && defined(__x86_64__)
104 #define ASLR_IS_SUPPORTED
107 #if defined(ASLR_IS_SUPPORTED)
108 // From libdieharder, public domain library by Bob Jenkins (rngav.c).
109 // Described at http://burtleburtle.net/bob/rand/smallprng.html.
110 // Not cryptographically secure, but good enough for what we need.
119 #define rot(x,k) (((x)<<(k))|((x)>>(32-(k))))
121 u4
ranval(ranctx
* x
) {
122 /* xxx: the generator being tested */
123 u4 e
= x
->a
- rot(x
->b
, 27);
124 x
->a
= x
->b
^ rot(x
->c
, 17);
131 void raninit(ranctx
* x
, u4 seed
) {
134 x
->b
= x
->c
= x
->d
= seed
;
135 for (i
= 0; i
< 20; ++i
) {
140 // If the kernel cannot honor the hint in arch_get_unmapped_area_topdown, it
141 // will simply ignore it. So we give a hint that has a good chance of
143 // The mmap top-down allocator will normally allocate below TASK_SIZE - gap,
144 // with a gap that depends on the max stack size. See x86/mm/mmap.c. We
145 // should make allocations that are below this area, which would be
147 // We use 0x3ffffffff000 as the mask so that we only "pollute" half of the
148 // address space. In the unlikely case where fragmentation would become an
149 // issue, the kernel will still have another half to use.
150 const uint64_t kRandomAddressMask
= 0x3ffffffff000ULL
;
152 #endif // defined(ASLR_IS_SUPPORTED)
154 // Give a random "hint" that is suitable for use with mmap(). This cannot make
155 // mmap fail, as the kernel will simply not follow the hint if it can't.
156 // However, this will create address space fragmentation. Currently, we only
157 // implement it on x86_64, where we have a 47 bits userland address space and
158 // fragmentation is not an issue.
159 void* GetRandomAddrHint() {
160 #if !defined(ASLR_IS_SUPPORTED)
163 // Note: we are protected by the general TCMalloc_SystemAlloc spinlock. Given
164 // the nature of what we're doing, it wouldn't be critical if we weren't for
165 // ctx, but it is for the "initialized" variable.
166 // It's nice to share the state between threads, because scheduling will add
167 // some randomness to the succession of ranval() calls.
169 static bool initialized
= false;
172 // We really want this to be a stack variable and don't want any compiler
173 // optimization. We're using its address as a poor-man source of
176 // Pre-initialize our seed with a "random" address in case /dev/urandom is
178 uint32_t seed
= (reinterpret_cast<uint64_t>(&c
) >> 32) ^
179 reinterpret_cast<uint64_t>(&c
);
180 int urandom_fd
= open("/dev/urandom", O_RDONLY
);
181 if (urandom_fd
>= 0) {
183 len
= read(urandom_fd
, &seed
, sizeof(seed
));
184 ASSERT(len
== sizeof(seed
));
185 int ret
= close(urandom_fd
);
190 uint64_t random_address
= (static_cast<uint64_t>(ranval(&ctx
)) << 32) |
192 // A a bit-wise "and" won't bias our random distribution.
193 random_address
&= kRandomAddressMask
;
194 return reinterpret_cast<void*>(random_address
);
195 #endif // ASLR_IS_SUPPORTED
198 // Allocate |length| bytes of memory using mmap(). The memory will be
199 // readable and writeable, but not executable.
200 // Like mmap(), we will return MAP_FAILED on failure.
201 // |is_aslr_enabled| controls address space layout randomization. When true, we
202 // will put the first mapping at a random address and will then try to grow it.
203 // If it's not possible to grow an existing mapping, a new one will be created.
204 void* AllocWithMmap(size_t length
, bool is_aslr_enabled
) {
205 // Note: we are protected by the general TCMalloc_SystemAlloc spinlock.
206 static void* address_hint
= NULL
;
207 #if defined(ASLR_IS_SUPPORTED)
208 if (is_aslr_enabled
&&
210 reinterpret_cast<uint64_t>(address_hint
) & ~kRandomAddressMask
)) {
211 address_hint
= GetRandomAddrHint();
213 #endif // ASLR_IS_SUPPORTED
215 // address_hint is likely to make us grow an existing mapping.
216 void* result
= mmap(address_hint
, length
, PROT_READ
|PROT_WRITE
,
217 MAP_PRIVATE
|MAP_ANONYMOUS
, -1, 0);
218 #if defined(ASLR_IS_SUPPORTED)
219 if (result
== address_hint
) {
220 // If mmap() succeeded at a address_hint, our next mmap() will try to grow
221 // the current mapping as long as it's compatible with our ASLR mask.
222 // This has been done for performance reasons, see crbug.com/173371.
223 // It should be possible to strike a better balance between performance
224 // and security but will be done at a later date.
225 // If this overflows, it could only set address_hint to NULL, which is
226 // what we want (and can't happen on the currently supported architecture).
227 address_hint
= static_cast<char*>(result
) + length
;
229 // mmap failed or a collision prevented the kernel from honoring the hint,
233 #endif // ASLR_IS_SUPPORTED
237 } // Anonymous namespace to avoid name conflicts on "CheckAddressBits".
239 COMPILE_ASSERT(kAddressBits
<= 8 * sizeof(void*),
240 address_bits_larger_than_pointer_size
);
242 // Structure for discovering alignment
243 union MemoryAligner
{
249 static SpinLock
spinlock(SpinLock::LINKER_INITIALIZED
);
251 #if defined(HAVE_MMAP) || defined(MADV_FREE)
252 #ifdef HAVE_GETPAGESIZE
253 static size_t pagesize
= 0;
257 // The current system allocator
258 SysAllocator
* sys_alloc
= NULL
;
260 // Configuration parameters.
261 DEFINE_int32(malloc_devmem_start
,
262 EnvToInt("TCMALLOC_DEVMEM_START", 0),
263 "Physical memory starting location in MB for /dev/mem allocation."
264 " Setting this to 0 disables /dev/mem allocation");
265 DEFINE_int32(malloc_devmem_limit
,
266 EnvToInt("TCMALLOC_DEVMEM_LIMIT", 0),
267 "Physical memory limit location in MB for /dev/mem allocation."
268 " Setting this to 0 means no limit.");
269 DEFINE_bool(malloc_skip_sbrk
,
270 EnvToBool("TCMALLOC_SKIP_SBRK", false),
271 "Whether sbrk can be used to obtain memory.");
272 DEFINE_bool(malloc_skip_mmap
,
273 EnvToBool("TCMALLOC_SKIP_MMAP", false),
274 "Whether mmap can be used to obtain memory.");
276 DEFINE_bool(malloc_random_allocator
,
277 #if defined(ASLR_IS_SUPPORTED)
278 EnvToBool("TCMALLOC_ASLR", true),
280 EnvToBool("TCMALLOC_ASLR", false),
282 "Whether to randomize the address space via mmap().");
285 class SbrkSysAllocator
: public SysAllocator
{
287 SbrkSysAllocator() : SysAllocator() {
289 void* Alloc(size_t size
, size_t *actual_size
, size_t alignment
);
291 static char sbrk_space
[sizeof(SbrkSysAllocator
)];
293 class MmapSysAllocator
: public SysAllocator
{
295 MmapSysAllocator() : SysAllocator() {
297 void* Alloc(size_t size
, size_t *actual_size
, size_t alignment
);
299 static char mmap_space
[sizeof(MmapSysAllocator
)];
301 class DevMemSysAllocator
: public SysAllocator
{
303 DevMemSysAllocator() : SysAllocator() {
305 void* Alloc(size_t size
, size_t *actual_size
, size_t alignment
);
308 class DefaultSysAllocator
: public SysAllocator
{
310 DefaultSysAllocator() : SysAllocator() {
311 for (int i
= 0; i
< kMaxAllocators
; i
++) {
317 void SetChildAllocator(SysAllocator
* alloc
, unsigned int index
,
319 if (index
< kMaxAllocators
&& alloc
!= NULL
) {
320 allocs_
[index
] = alloc
;
321 failed_
[index
] = false;
322 names_
[index
] = name
;
325 void* Alloc(size_t size
, size_t *actual_size
, size_t alignment
);
328 static const int kMaxAllocators
= 2;
329 bool failed_
[kMaxAllocators
];
330 SysAllocator
* allocs_
[kMaxAllocators
];
331 const char* names_
[kMaxAllocators
];
333 static char default_space
[sizeof(DefaultSysAllocator
)];
334 static const char sbrk_name
[] = "SbrkSysAllocator";
335 static const char mmap_name
[] = "MmapSysAllocator";
338 void* SbrkSysAllocator::Alloc(size_t size
, size_t *actual_size
,
343 // Check if we should use sbrk allocation.
344 // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized
345 // state) and eventually gets initialized to the specified value. Note
346 // that this code runs for a while before the flags are initialized.
347 // That means that even if this flag is set to true, some (initial)
348 // memory will be allocated with sbrk before the flag takes effect.
349 if (FLAGS_malloc_skip_sbrk
) {
353 // sbrk will release memory if passed a negative number, so we do
354 // a strict check here
355 if (static_cast<ptrdiff_t>(size
+ alignment
) < 0) return NULL
;
357 // This doesn't overflow because TCMalloc_SystemAlloc has already
358 // tested for overflow at the alignment boundary.
359 size
= ((size
+ alignment
- 1) / alignment
) * alignment
;
361 // "actual_size" indicates that the bytes from the returned pointer
362 // p up to and including (p + actual_size - 1) have been allocated.
367 // Check that we we're not asking for so much more memory that we'd
368 // wrap around the end of the virtual address space. (This seems
369 // like something sbrk() should check for us, and indeed opensolaris
370 // does, but glibc does not:
371 // http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/lib/libc/port/sys/sbrk.c?a=true
372 // http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/libc/misc/sbrk.c?rev=1.1.2.1&content-type=text/plain&cvsroot=glibc
373 // Without this check, sbrk may succeed when it ought to fail.)
374 if (reinterpret_cast<intptr_t>(sbrk(0)) + size
< size
) {
378 void* result
= sbrk(size
);
379 if (result
== reinterpret_cast<void*>(-1)) {
384 uintptr_t ptr
= reinterpret_cast<uintptr_t>(result
);
385 if ((ptr
& (alignment
-1)) == 0) return result
;
387 // Try to get more memory for alignment
388 size_t extra
= alignment
- (ptr
& (alignment
-1));
389 void* r2
= sbrk(extra
);
390 if (reinterpret_cast<uintptr_t>(r2
) == (ptr
+ size
)) {
391 // Contiguous with previous result
392 return reinterpret_cast<void*>(ptr
+ extra
);
395 // Give up and ask for "size + alignment - 1" bytes so
396 // that we can find an aligned region within it.
397 result
= sbrk(size
+ alignment
- 1);
398 if (result
== reinterpret_cast<void*>(-1)) {
401 ptr
= reinterpret_cast<uintptr_t>(result
);
402 if ((ptr
& (alignment
-1)) != 0) {
403 ptr
+= alignment
- (ptr
& (alignment
-1));
405 return reinterpret_cast<void*>(ptr
);
409 void* MmapSysAllocator::Alloc(size_t size
, size_t *actual_size
,
414 // Check if we should use mmap allocation.
415 // FLAGS_malloc_skip_mmap starts out as false (its uninitialized
416 // state) and eventually gets initialized to the specified value. Note
417 // that this code runs for a while before the flags are initialized.
418 // Chances are we never get here before the flags are initialized since
419 // sbrk is used until the heap is exhausted (before mmap is used).
420 if (FLAGS_malloc_skip_mmap
) {
424 // Enforce page alignment
425 if (pagesize
== 0) pagesize
= getpagesize();
426 if (alignment
< pagesize
) alignment
= pagesize
;
427 size_t aligned_size
= ((size
+ alignment
- 1) / alignment
) * alignment
;
428 if (aligned_size
< size
) {
433 // "actual_size" indicates that the bytes from the returned pointer
434 // p up to and including (p + actual_size - 1) have been allocated.
439 // Ask for extra memory if alignment > pagesize
441 if (alignment
> pagesize
) {
442 extra
= alignment
- pagesize
;
445 // Note: size + extra does not overflow since:
446 // size + alignment < (1<<NBITS).
447 // and extra <= alignment
448 // therefore size + extra < (1<<NBITS)
449 void* result
= AllocWithMmap(size
+ extra
, FLAGS_malloc_random_allocator
);
450 if (result
== reinterpret_cast<void*>(MAP_FAILED
)) {
454 // Adjust the return memory so it is aligned
455 uintptr_t ptr
= reinterpret_cast<uintptr_t>(result
);
457 if ((ptr
& (alignment
- 1)) != 0) {
458 adjust
= alignment
- (ptr
& (alignment
- 1));
461 // Return the unused memory to the system
463 munmap(reinterpret_cast<void*>(ptr
), adjust
);
465 if (adjust
< extra
) {
466 munmap(reinterpret_cast<void*>(ptr
+ adjust
+ size
), extra
- adjust
);
470 return reinterpret_cast<void*>(ptr
);
474 void* DevMemSysAllocator::Alloc(size_t size
, size_t *actual_size
,
479 static bool initialized
= false;
480 static off_t physmem_base
; // next physical memory address to allocate
481 static off_t physmem_limit
; // maximum physical address allowed
482 static int physmem_fd
; // file descriptor for /dev/mem
484 // Check if we should use /dev/mem allocation. Note that it may take
485 // a while to get this flag initialized, so meanwhile we fall back to
486 // the next allocator. (It looks like 7MB gets allocated before
487 // this flag gets initialized -khr.)
488 if (FLAGS_malloc_devmem_start
== 0) {
489 // NOTE: not a devmem_failure - we'd like TCMalloc_SystemAlloc to
490 // try us again next time.
495 physmem_fd
= open("/dev/mem", O_RDWR
);
496 if (physmem_fd
< 0) {
499 physmem_base
= FLAGS_malloc_devmem_start
*1024LL*1024LL;
500 physmem_limit
= FLAGS_malloc_devmem_limit
*1024LL*1024LL;
504 // Enforce page alignment
505 if (pagesize
== 0) pagesize
= getpagesize();
506 if (alignment
< pagesize
) alignment
= pagesize
;
507 size_t aligned_size
= ((size
+ alignment
- 1) / alignment
) * alignment
;
508 if (aligned_size
< size
) {
513 // "actual_size" indicates that the bytes from the returned pointer
514 // p up to and including (p + actual_size - 1) have been allocated.
519 // Ask for extra memory if alignment > pagesize
521 if (alignment
> pagesize
) {
522 extra
= alignment
- pagesize
;
525 // check to see if we have any memory left
526 if (physmem_limit
!= 0 &&
527 ((size
+ extra
) > (physmem_limit
- physmem_base
))) {
531 // Note: size + extra does not overflow since:
532 // size + alignment < (1<<NBITS).
533 // and extra <= alignment
534 // therefore size + extra < (1<<NBITS)
535 void *result
= mmap(0, size
+ extra
, PROT_WRITE
|PROT_READ
,
536 MAP_SHARED
, physmem_fd
, physmem_base
);
537 if (result
== reinterpret_cast<void*>(MAP_FAILED
)) {
540 uintptr_t ptr
= reinterpret_cast<uintptr_t>(result
);
542 // Adjust the return memory so it is aligned
544 if ((ptr
& (alignment
- 1)) != 0) {
545 adjust
= alignment
- (ptr
& (alignment
- 1));
548 // Return the unused virtual memory to the system
550 munmap(reinterpret_cast<void*>(ptr
), adjust
);
552 if (adjust
< extra
) {
553 munmap(reinterpret_cast<void*>(ptr
+ adjust
+ size
), extra
- adjust
);
557 physmem_base
+= adjust
+ size
;
559 return reinterpret_cast<void*>(ptr
);
563 void* DefaultSysAllocator::Alloc(size_t size
, size_t *actual_size
,
565 for (int i
= 0; i
< kMaxAllocators
; i
++) {
566 if (!failed_
[i
] && allocs_
[i
] != NULL
) {
567 void* result
= allocs_
[i
]->Alloc(size
, actual_size
, alignment
);
568 if (result
!= NULL
) {
574 // After both failed, reset "failed_" to false so that a single failed
575 // allocation won't make the allocator never work again.
576 for (int i
= 0; i
< kMaxAllocators
; i
++) {
582 static bool system_alloc_inited
= false;
583 void InitSystemAllocators(void) {
584 MmapSysAllocator
*mmap
= new (mmap_space
) MmapSysAllocator();
585 SbrkSysAllocator
*sbrk
= new (sbrk_space
) SbrkSysAllocator();
587 // In 64-bit debug mode, place the mmap allocator first since it
588 // allocates pointers that do not fit in 32 bits and therefore gives
589 // us better testing of code's 64-bit correctness. It also leads to
590 // less false negatives in heap-checking code. (Numbers are less
591 // likely to look like pointers and therefore the conservative gc in
592 // the heap-checker is less likely to misinterpret a number as a
594 DefaultSysAllocator
*sdef
= new (default_space
) DefaultSysAllocator();
595 // Unfortunately, this code runs before flags are initialized. So
596 // we can't use FLAGS_malloc_random_allocator.
597 #if defined(ASLR_IS_SUPPORTED)
598 // Our only random allocator is mmap.
599 sdef
->SetChildAllocator(mmap
, 0, mmap_name
);
601 if (kDebugMode
&& sizeof(void*) > 4) {
602 sdef
->SetChildAllocator(mmap
, 0, mmap_name
);
603 sdef
->SetChildAllocator(sbrk
, 1, sbrk_name
);
605 sdef
->SetChildAllocator(sbrk
, 0, sbrk_name
);
606 sdef
->SetChildAllocator(mmap
, 1, mmap_name
);
608 #endif // ASLR_IS_SUPPORTED
612 void* TCMalloc_SystemAlloc(size_t size
, size_t *actual_size
,
614 // Discard requests that overflow
615 if (size
+ alignment
< size
) return NULL
;
617 SpinLockHolder
lock_holder(&spinlock
);
619 if (!system_alloc_inited
) {
620 InitSystemAllocators();
621 system_alloc_inited
= true;
624 // Enforce minimum alignment
625 if (alignment
< sizeof(MemoryAligner
)) alignment
= sizeof(MemoryAligner
);
627 void* result
= sys_alloc
->Alloc(size
, actual_size
, alignment
);
628 if (result
!= NULL
) {
630 CheckAddressBits
<kAddressBits
>(
631 reinterpret_cast<uintptr_t>(result
) + *actual_size
- 1);
633 CheckAddressBits
<kAddressBits
>(
634 reinterpret_cast<uintptr_t>(result
) + size
- 1);
640 size_t TCMalloc_SystemAddGuard(void* start
, size_t size
) {
641 #ifdef HAVE_GETPAGESIZE
643 pagesize
= getpagesize();
645 if (size
< pagesize
|| (reinterpret_cast<size_t>(start
) % pagesize
) != 0)
648 if (!mprotect(start
, pagesize
, PROT_NONE
))
655 void TCMalloc_SystemRelease(void* start
, size_t length
) {
657 if (FLAGS_malloc_devmem_start
) {
658 // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been
659 // mapping /dev/mem for heap memory.
662 if (pagesize
== 0) pagesize
= getpagesize();
663 const size_t pagemask
= pagesize
- 1;
665 size_t new_start
= reinterpret_cast<size_t>(start
);
666 size_t end
= new_start
+ length
;
667 size_t new_end
= end
;
669 // Round up the starting address and round down the ending address
670 // to be page aligned:
671 new_start
= (new_start
+ pagesize
- 1) & ~pagemask
;
672 new_end
= new_end
& ~pagemask
;
674 ASSERT((new_start
& pagemask
) == 0);
675 ASSERT((new_end
& pagemask
) == 0);
676 ASSERT(new_start
>= reinterpret_cast<size_t>(start
));
677 ASSERT(new_end
<= end
);
679 if (new_end
> new_start
) {
680 // Note -- ignoring most return codes, because if this fails it
682 while (madvise(reinterpret_cast<char*>(new_start
), new_end
- new_start
,
691 void TCMalloc_SystemCommit(void* start
, size_t length
) {
692 // Nothing to do here. TCMalloc_SystemRelease does not alter pages
693 // such that they need to be re-committed before they can be used by the