1 // Copyright (c) 2005, 2007, Google Inc.
2 // All rights reserved.
3 // Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011 Apple Inc. All rights reserved.
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 // Author: Sanjay Ghemawat <opensource@google.com>
34 // A malloc that uses a per-thread cache to satisfy small malloc requests.
35 // (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
37 // See doc/tcmalloc.html for a high-level
38 // description of how this malloc works.
41 // 1. The thread-specific lists are accessed without acquiring any locks.
42 // This is safe because each such list is only accessed by one thread.
43 // 2. We have a lock per central free-list, and hold it while manipulating
44 // the central free list for a particular size.
45 // 3. The central page allocator is protected by "pageheap_lock".
46 // 4. The pagemap (which maps from page-number to descriptor),
47 // can be read without holding any locks, and written while holding
48 // the "pageheap_lock".
49 // 5. To improve performance, a subset of the information one can get
50 // from the pagemap is cached in a data structure, pagemap_cache_,
51 // that atomically reads and writes its entries. This cache can be
52 // read and written without locking.
54 // This multi-threaded access to the pagemap is safe for fairly
55 // subtle reasons. We basically assume that when an object X is
56 // allocated by thread A and deallocated by thread B, there must
57 // have been appropriate synchronization in the handoff of object
58 // X from thread A to thread B. The same logic applies to pagemap_cache_.
60 // THE PAGEID-TO-SIZECLASS CACHE
61 // Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache
62 // returns 0 for a particular PageID then that means "no information," not that
63 // the sizeclass is 0. The cache may have stale information for pages that do
64 // not hold the beginning of any free()'able object. Staleness is eliminated
65 // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
66 // do_memalign() for all other relevant pages.
68 // TODO: Bias reclamation to larger addresses
69 // TODO: implement mallinfo/mallopt
70 // TODO: Better testing
72 // 9/28/2003 (new page-level allocator replaces ptmalloc2):
73 // * malloc/free of small objects goes from ~300 ns to ~50 ns.
74 // * allocation of a reasonably complicated struct
75 // goes from about 1100 ns to about 300 ns.
78 #include "FastMalloc.h"
80 #include "Assertions.h"
82 #if ENABLE(WTF_MULTIPLE_THREADS)
85 #include <wtf/StdLibExtras.h>
87 #ifndef NO_TCMALLOC_SAMPLES
89 #define NO_TCMALLOC_SAMPLES
93 #if !(defined(USE_SYSTEM_MALLOC) && USE_SYSTEM_MALLOC) && defined(NDEBUG)
94 #define FORCE_SYSTEM_MALLOC 0
96 #define FORCE_SYSTEM_MALLOC 1
99 // Use a background thread to periodically scavenge memory to release back to the system
100 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 0
105 #if ENABLE(WTF_MULTIPLE_THREADS)
106 static pthread_key_t isForbiddenKey
;
107 static pthread_once_t isForbiddenKeyOnce
= PTHREAD_ONCE_INIT
;
108 static void initializeIsForbiddenKey()
110 pthread_key_create(&isForbiddenKey
, 0);
114 static bool isForbidden()
116 pthread_once(&isForbiddenKeyOnce
, initializeIsForbiddenKey
);
117 return !!pthread_getspecific(isForbiddenKey
);
121 void fastMallocForbid()
123 pthread_once(&isForbiddenKeyOnce
, initializeIsForbiddenKey
);
124 pthread_setspecific(isForbiddenKey
, &isForbiddenKey
);
127 void fastMallocAllow()
129 pthread_once(&isForbiddenKeyOnce
, initializeIsForbiddenKey
);
130 pthread_setspecific(isForbiddenKey
, 0);
135 static bool staticIsForbidden
;
136 static bool isForbidden()
138 return staticIsForbidden
;
141 void fastMallocForbid()
143 staticIsForbidden
= true;
146 void fastMallocAllow()
148 staticIsForbidden
= false;
150 #endif // ENABLE(WTF_MULTIPLE_THREADS)
161 #if !ENABLE(WTF_MALLOC_VALIDATION)
162 void fastMallocMatchFailed(void*);
164 COMPILE_ASSERT(((sizeof(ValidationHeader
) % sizeof(AllocAlignmentInteger
)) == 0), ValidationHeader_must_produce_correct_alignment
);
166 void fastMallocMatchFailed(void*)
171 } // namespace Internal
174 void* fastZeroedMalloc(size_t n
)
176 void* result
= fastMalloc(n
);
177 memset(result
, 0, n
);
181 char* fastStrDup(const char* src
)
183 size_t len
= strlen(src
) + 1;
184 char* dup
= static_cast<char*>(fastMalloc(len
));
185 memcpy(dup
, src
, len
);
189 TryMallocReturnValue
tryFastZeroedMalloc(size_t n
)
192 if (!tryFastMalloc(n
).getValue(result
))
194 memset(result
, 0, n
);
200 #if FORCE_SYSTEM_MALLOC
203 #include "brew/SystemMallocBrew.h"
207 #include <malloc/malloc.h>
214 TryMallocReturnValue
tryFastMalloc(size_t n
)
216 ASSERT(!isForbidden());
218 #if ENABLE(WTF_MALLOC_VALIDATION)
219 if (std::numeric_limits
<size_t>::max() - Internal::ValidationBufferSize
<= n
) // If overflow would occur...
222 void* result
= malloc(n
+ Internal::ValidationBufferSize
);
225 Internal::ValidationHeader
* header
= static_cast<Internal::ValidationHeader
*>(result
);
227 header
->m_type
= Internal::AllocTypeMalloc
;
228 header
->m_prefix
= static_cast<unsigned>(Internal::ValidationPrefix
);
230 *Internal::fastMallocValidationSuffix(result
) = Internal::ValidationSuffix
;
231 fastMallocValidate(result
);
238 void* fastMalloc(size_t n
)
240 ASSERT(!isForbidden());
242 #if ENABLE(WTF_MALLOC_VALIDATION)
243 TryMallocReturnValue returnValue
= tryFastMalloc(n
);
245 if (!returnValue
.getValue(result
))
248 void* result
= malloc(n
);
253 // The behavior of malloc(0) is implementation defined.
254 // To make sure that fastMalloc never returns 0, retry with fastMalloc(1).
256 return fastMalloc(1);
264 TryMallocReturnValue
tryFastCalloc(size_t n_elements
, size_t element_size
)
266 ASSERT(!isForbidden());
268 #if ENABLE(WTF_MALLOC_VALIDATION)
269 size_t totalBytes
= n_elements
* element_size
;
270 if (n_elements
> 1 && element_size
&& (totalBytes
/ element_size
) != n_elements
)
273 TryMallocReturnValue returnValue
= tryFastMalloc(totalBytes
);
275 if (!returnValue
.getValue(result
))
277 memset(result
, 0, totalBytes
);
278 fastMallocValidate(result
);
281 return calloc(n_elements
, element_size
);
285 void* fastCalloc(size_t n_elements
, size_t element_size
)
287 ASSERT(!isForbidden());
289 #if ENABLE(WTF_MALLOC_VALIDATION)
290 TryMallocReturnValue returnValue
= tryFastCalloc(n_elements
, element_size
);
292 if (!returnValue
.getValue(result
))
295 void* result
= calloc(n_elements
, element_size
);
300 // If either n_elements or element_size is 0, the behavior of calloc is implementation defined.
301 // To make sure that fastCalloc never returns 0, retry with fastCalloc(1, 1).
302 if (!n_elements
|| !element_size
)
303 return fastCalloc(1, 1);
311 void fastFree(void* p
)
313 ASSERT(!isForbidden());
315 #if ENABLE(WTF_MALLOC_VALIDATION)
319 fastMallocMatchValidateFree(p
, Internal::AllocTypeMalloc
);
320 Internal::ValidationHeader
* header
= Internal::fastMallocValidationHeader(p
);
321 memset(p
, 0xCC, header
->m_size
);
328 TryMallocReturnValue
tryFastRealloc(void* p
, size_t n
)
330 ASSERT(!isForbidden());
332 #if ENABLE(WTF_MALLOC_VALIDATION)
334 if (std::numeric_limits
<size_t>::max() - Internal::ValidationBufferSize
<= n
) // If overflow would occur...
336 fastMallocValidate(p
);
337 Internal::ValidationHeader
* result
= static_cast<Internal::ValidationHeader
*>(realloc(Internal::fastMallocValidationHeader(p
), n
+ Internal::ValidationBufferSize
));
342 *fastMallocValidationSuffix(result
) = Internal::ValidationSuffix
;
343 fastMallocValidate(result
);
346 return fastMalloc(n
);
349 return realloc(p
, n
);
353 void* fastRealloc(void* p
, size_t n
)
355 ASSERT(!isForbidden());
357 #if ENABLE(WTF_MALLOC_VALIDATION)
358 TryMallocReturnValue returnValue
= tryFastRealloc(p
, n
);
360 if (!returnValue
.getValue(result
))
363 void* result
= realloc(p
, n
);
371 void releaseFastMallocFreeMemory() { }
373 FastMallocStatistics
fastMallocStatistics()
375 FastMallocStatistics statistics
= { 0, 0, 0 };
379 size_t fastMallocSize(const void* p
)
381 #if ENABLE(WTF_MALLOC_VALIDATION)
382 return Internal::fastMallocValidationHeader(const_cast<void*>(p
))->m_size
;
384 return malloc_size(p
);
385 #elif OS(WINDOWS) && !PLATFORM(BREWMP)
386 // Brew MP uses its own memory allocator, so _msize does not work on the Brew MP simulator.
387 return _msize(const_cast<void*>(p
));
396 // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled.
397 // It will never be used in this case, so it's type and value are less interesting than its presence.
398 extern "C" const int jscore_fastmalloc_introspection
= 0;
401 #else // FORCE_SYSTEM_MALLOC
405 #elif HAVE(INTTYPES_H)
406 #include <inttypes.h>
408 #include <sys/types.h>
411 #include "AlwaysInline.h"
412 #include "Assertions.h"
413 #include "TCPackedCache.h"
414 #include "TCPageMap.h"
415 #include "TCSpinLock.h"
416 #include "TCSystemAlloc.h"
430 #ifndef WIN32_LEAN_AND_MEAN
431 #define WIN32_LEAN_AND_MEAN
439 #include "MallocZoneSupport.h"
440 #include <wtf/HashSet.h>
441 #include <wtf/Vector.h>
444 #if HAVE(HEADER_DETECTION_H)
445 #include "HeaderDetection.h"
449 #include <dispatch/dispatch.h>
452 #if HAVE(PTHREAD_MACHDEP_H)
453 #include <System/pthread_machdep.h>
455 #if defined(__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0)
456 #define WTF_USE_PTHREAD_GETSPECIFIC_DIRECT 1
464 // Calling pthread_getspecific through a global function pointer is faster than a normal
465 // call to the function on Mac OS X, and it's used in performance-critical code. So we
466 // use a function pointer. But that's not necessarily faster on other platforms, and we had
467 // problems with this technique on Windows, so we'll do this only on Mac OS X.
469 #if !USE(PTHREAD_GETSPECIFIC_DIRECT)
470 static void* (*pthread_getspecific_function_pointer
)(pthread_key_t
) = pthread_getspecific
;
471 #define pthread_getspecific(key) pthread_getspecific_function_pointer(key)
473 #define pthread_getspecific(key) _pthread_getspecific_direct(key)
474 #define pthread_setspecific(key, val) _pthread_setspecific_direct(key, (val))
478 #define DEFINE_VARIABLE(type, name, value, meaning) \
479 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \
480 type FLAGS_##name(value); \
481 char FLAGS_no##name; \
483 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
485 #define DEFINE_int64(name, value, meaning) \
486 DEFINE_VARIABLE(int64_t, name, value, meaning)
488 #define DEFINE_double(name, value, meaning) \
489 DEFINE_VARIABLE(double, name, value, meaning)
493 #define malloc fastMalloc
494 #define calloc fastCalloc
495 #define free fastFree
496 #define realloc fastRealloc
498 #define MESSAGE LOG_ERROR
499 #define CHECK_CONDITION ASSERT
503 class TCMalloc_Central_FreeListPadded
;
504 class TCMalloc_PageHeap
;
505 class TCMalloc_ThreadCache
;
506 template <typename T
> class PageHeapAllocator
;
508 class FastMallocZone
{
512 static kern_return_t
enumerate(task_t
, void*, unsigned typeMmask
, vm_address_t zoneAddress
, memory_reader_t
, vm_range_recorder_t
);
513 static size_t goodSize(malloc_zone_t
*, size_t size
) { return size
; }
514 static boolean_t
check(malloc_zone_t
*) { return true; }
515 static void print(malloc_zone_t
*, boolean_t
) { }
516 static void log(malloc_zone_t
*, void*) { }
517 static void forceLock(malloc_zone_t
*) { }
518 static void forceUnlock(malloc_zone_t
*) { }
519 static void statistics(malloc_zone_t
*, malloc_statistics_t
* stats
) { memset(stats
, 0, sizeof(malloc_statistics_t
)); }
522 FastMallocZone(TCMalloc_PageHeap
*, TCMalloc_ThreadCache
**, TCMalloc_Central_FreeListPadded
*, PageHeapAllocator
<Span
>*, PageHeapAllocator
<TCMalloc_ThreadCache
>*);
523 static size_t size(malloc_zone_t
*, const void*);
524 static void* zoneMalloc(malloc_zone_t
*, size_t);
525 static void* zoneCalloc(malloc_zone_t
*, size_t numItems
, size_t size
);
526 static void zoneFree(malloc_zone_t
*, void*);
527 static void* zoneRealloc(malloc_zone_t
*, void*, size_t);
528 static void* zoneValloc(malloc_zone_t
*, size_t) { LOG_ERROR("valloc is not supported"); return 0; }
529 static void zoneDestroy(malloc_zone_t
*) { }
531 malloc_zone_t m_zone
;
532 TCMalloc_PageHeap
* m_pageHeap
;
533 TCMalloc_ThreadCache
** m_threadHeaps
;
534 TCMalloc_Central_FreeListPadded
* m_centralCaches
;
535 PageHeapAllocator
<Span
>* m_spanAllocator
;
536 PageHeapAllocator
<TCMalloc_ThreadCache
>* m_pageHeapAllocator
;
544 // This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if
545 // you're porting to a system where you really can't get a stacktrace.
546 #ifdef NO_TCMALLOC_SAMPLES
547 // We use #define so code compiles even if you #include stacktrace.h somehow.
548 # define GetStackTrace(stack, depth, skip) (0)
550 # include <google/stacktrace.h>
554 // Even if we have support for thread-local storage in the compiler
555 // and linker, the OS may not support it. We need to check that at
556 // runtime. Right now, we have to keep a manual set of "bad" OSes.
557 #if defined(HAVE_TLS)
558 static bool kernel_supports_tls
= false; // be conservative
559 static inline bool KernelSupportsTLS() {
560 return kernel_supports_tls
;
562 # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS
563 static void CheckIfKernelSupportsTLS() {
564 kernel_supports_tls
= false;
567 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
568 static void CheckIfKernelSupportsTLS() {
570 if (uname(&buf
) != 0) { // should be impossible
571 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno
);
572 kernel_supports_tls
= false;
573 } else if (strcasecmp(buf
.sysname
, "linux") == 0) {
574 // The linux case: the first kernel to support TLS was 2.6.0
575 if (buf
.release
[0] < '2' && buf
.release
[1] == '.') // 0.x or 1.x
576 kernel_supports_tls
= false;
577 else if (buf
.release
[0] == '2' && buf
.release
[1] == '.' &&
578 buf
.release
[2] >= '0' && buf
.release
[2] < '6' &&
579 buf
.release
[3] == '.') // 2.0 - 2.5
580 kernel_supports_tls
= false;
582 kernel_supports_tls
= true;
583 } else { // some other kernel, we'll be optimisitic
584 kernel_supports_tls
= true;
586 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
588 # endif // HAVE_DECL_UNAME
591 // __THROW is defined in glibc systems. It means, counter-intuitively,
592 // "This function will never throw an exception." It's an optional
593 // optimization tool, but we may need to use it to match glibc prototypes.
594 #ifndef __THROW // I guess we're not on a glibc system
595 # define __THROW // __THROW is just an optimization, so ok to make it ""
598 //-------------------------------------------------------------------
600 //-------------------------------------------------------------------
602 // Not all possible combinations of the following parameters make
603 // sense. In particular, if kMaxSize increases, you may have to
604 // increase kNumClasses as well.
605 static const size_t kPageShift
= 12;
606 static const size_t kPageSize
= 1 << kPageShift
;
607 static const size_t kMaxSize
= 8u * kPageSize
;
608 static const size_t kAlignShift
= 3;
609 static const size_t kAlignment
= 1 << kAlignShift
;
610 static const size_t kNumClasses
= 68;
612 // Allocates a big block of memory for the pagemap once we reach more than
614 static const size_t kPageMapBigAllocationThreshold
= 128 << 20;
616 // Minimum number of pages to fetch from system at a time. Must be
617 // significantly bigger than kPageSize to amortize system-call
618 // overhead, and also to reduce external fragementation. Also, we
619 // should keep this value big because various incarnations of Linux
620 // have small limits on the number of mmap() regions per
622 static const size_t kMinSystemAlloc
= 1 << (20 - kPageShift
);
624 // Number of objects to move between a per-thread list and a central
625 // list in one shot. We want this to be not too small so we can
626 // amortize the lock overhead for accessing the central list. Making
627 // it too big may temporarily cause unnecessary memory wastage in the
628 // per-thread free list until the scavenger cleans up the list.
629 static int num_objects_to_move
[kNumClasses
];
631 // Maximum length we allow a per-thread free-list to have before we
632 // move objects from it into the corresponding central free-list. We
633 // want this big to avoid locking the central free-list too often. It
634 // should not hurt to make this list somewhat big because the
635 // scavenging code will shrink it down when its contents are not in use.
636 static const int kMaxFreeListLength
= 256;
638 // Lower and upper bounds on the per-thread cache sizes
639 static const size_t kMinThreadCacheSize
= kMaxSize
* 2;
640 static const size_t kMaxThreadCacheSize
= 512 * 1024;
642 // Default bound on the total amount of thread caches
643 static const size_t kDefaultOverallThreadCacheSize
= 16 << 20;
645 // For all span-lengths < kMaxPages we keep an exact-size list.
646 // REQUIRED: kMaxPages >= kMinSystemAlloc;
647 static const size_t kMaxPages
= kMinSystemAlloc
;
649 /* The smallest prime > 2^n */
650 static int primes_list
[] = {
651 // Small values might cause high rates of sampling
652 // and hence commented out.
653 // 2, 5, 11, 17, 37, 67, 131, 257,
654 // 521, 1031, 2053, 4099, 8209, 16411,
655 32771, 65537, 131101, 262147, 524309, 1048583,
656 2097169, 4194319, 8388617, 16777259, 33554467 };
658 // Twice the approximate gap between sampling actions.
659 // I.e., we take one sample approximately once every
660 // tcmalloc_sample_parameter/2
661 // bytes of allocation, i.e., ~ once every 128KB.
662 // Must be a prime number.
663 #ifdef NO_TCMALLOC_SAMPLES
664 DEFINE_int64(tcmalloc_sample_parameter
, 0,
665 "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
666 static size_t sample_period
= 0;
668 DEFINE_int64(tcmalloc_sample_parameter
, 262147,
669 "Twice the approximate gap between sampling actions."
670 " Must be a prime number. Otherwise will be rounded up to a "
671 " larger prime number");
672 static size_t sample_period
= 262147;
675 // Protects sample_period above
676 static SpinLock sample_period_lock
= SPINLOCK_INITIALIZER
;
678 // Parameters for controlling how fast memory is returned to the OS.
680 DEFINE_double(tcmalloc_release_rate
, 1,
681 "Rate at which we release unused memory to the system. "
682 "Zero means we never release memory back to the system. "
683 "Increase this flag to return memory faster; decrease it "
684 "to return memory slower. Reasonable rates are in the "
687 //-------------------------------------------------------------------
688 // Mapping from size to size_class and vice versa
689 //-------------------------------------------------------------------
691 // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
692 // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
693 // So for these larger sizes we have an array indexed by ceil(size/128).
695 // We flatten both logical arrays into one physical array and use
696 // arithmetic to compute an appropriate index. The constants used by
697 // ClassIndex() were selected to make the flattening work.
700 // Size Expression Index
701 // -------------------------------------------------------
705 // 1024 (1024 + 7) / 8 128
706 // 1025 (1025 + 127 + (120<<7)) / 128 129
708 // 32768 (32768 + 127 + (120<<7)) / 128 376
709 static const size_t kMaxSmallSize
= 1024;
710 static const int shift_amount
[2] = { 3, 7 }; // For divides by 8 or 128
711 static const int add_amount
[2] = { 7, 127 + (120 << 7) };
712 static unsigned char class_array
[377];
714 // Compute index of the class_array[] entry for a given size
715 static inline int ClassIndex(size_t s
) {
716 const int i
= (s
> kMaxSmallSize
);
717 return static_cast<int>((s
+ add_amount
[i
]) >> shift_amount
[i
]);
720 // Mapping from size class to max size storable in that class
721 static size_t class_to_size
[kNumClasses
];
723 // Mapping from size class to number of pages to allocate at a time
724 static size_t class_to_pages
[kNumClasses
];
726 // TransferCache is used to cache transfers of num_objects_to_move[size_class]
727 // back and forth between thread caches and the central cache for a given size
730 void *head
; // Head of chain of objects.
731 void *tail
; // Tail of chain of objects.
733 // A central cache freelist can have anywhere from 0 to kNumTransferEntries
734 // slots to put link list chains into. To keep memory usage bounded the total
735 // number of TCEntries across size classes is fixed. Currently each size
736 // class is initially given one TCEntry which also means that the maximum any
737 // one class can have is kNumClasses.
738 static const int kNumTransferEntries
= kNumClasses
;
740 // Note: the following only works for "n"s that fit in 32-bits, but
741 // that is fine since we only use it for small sizes.
742 static inline int LgFloor(size_t n
) {
744 for (int i
= 4; i
>= 0; --i
) {
745 int shift
= (1 << i
);
746 size_t x
= n
>> shift
;
756 // Some very basic linked list functions for dealing with using void * as
759 static inline void *SLL_Next(void *t
) {
760 return *(reinterpret_cast<void**>(t
));
763 static inline void SLL_SetNext(void *t
, void *n
) {
764 *(reinterpret_cast<void**>(t
)) = n
;
767 static inline void SLL_Push(void **list
, void *element
) {
768 SLL_SetNext(element
, *list
);
772 static inline void *SLL_Pop(void **list
) {
773 void *result
= *list
;
774 *list
= SLL_Next(*list
);
779 // Remove N elements from a linked list to which head points. head will be
780 // modified to point to the new head. start and end will point to the first
781 // and last nodes of the range. Note that end will point to NULL after this
782 // function is called.
783 static inline void SLL_PopRange(void **head
, int N
, void **start
, void **end
) {
791 for (int i
= 1; i
< N
; ++i
) {
797 *head
= SLL_Next(tmp
);
798 // Unlink range from list.
799 SLL_SetNext(tmp
, NULL
);
802 static inline void SLL_PushRange(void **head
, void *start
, void *end
) {
804 SLL_SetNext(end
, *head
);
808 static inline size_t SLL_Size(void *head
) {
812 head
= SLL_Next(head
);
817 // Setup helper functions.
819 static ALWAYS_INLINE
size_t SizeClass(size_t size
) {
820 return class_array
[ClassIndex(size
)];
823 // Get the byte-size for a specified class
824 static ALWAYS_INLINE
size_t ByteSizeForClass(size_t cl
) {
825 return class_to_size
[cl
];
827 static int NumMoveSize(size_t size
) {
828 if (size
== 0) return 0;
829 // Use approx 64k transfers between thread and central caches.
830 int num
= static_cast<int>(64.0 * 1024.0 / size
);
831 if (num
< 2) num
= 2;
832 // Clamp well below kMaxFreeListLength to avoid ping pong between central
833 // and thread caches.
834 if (num
> static_cast<int>(0.8 * kMaxFreeListLength
))
835 num
= static_cast<int>(0.8 * kMaxFreeListLength
);
837 // Also, avoid bringing in too many objects into small object free
838 // lists. There are lots of such lists, and if we allow each one to
839 // fetch too many at a time, we end up having to scavenge too often
840 // (especially when there are lots of threads and each thread gets a
841 // small allowance for its thread cache).
843 // TODO: Make thread cache free list sizes dynamic so that we do not
844 // have to equally divide a fixed resource amongst lots of threads.
845 if (num
> 32) num
= 32;
850 // Initialize the mapping arrays
851 static void InitSizeClasses() {
852 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
853 if (ClassIndex(0) < 0) {
854 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
857 if (static_cast<size_t>(ClassIndex(kMaxSize
)) >= sizeof(class_array
)) {
858 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize
));
862 // Compute the size classes we want to use
863 size_t sc
= 1; // Next size class to assign
864 unsigned char alignshift
= kAlignShift
;
866 for (size_t size
= kAlignment
; size
<= kMaxSize
; size
+= (1 << alignshift
)) {
867 int lg
= LgFloor(size
);
869 // Increase alignment every so often.
871 // Since we double the alignment every time size doubles and
872 // size >= 128, this means that space wasted due to alignment is
873 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256
874 // bytes, so the space wasted as a percentage starts falling for
876 if ((lg
>= 7) && (alignshift
< 8)) {
882 // Allocate enough pages so leftover is less than 1/8 of total.
883 // This bounds wasted space to at most 12.5%.
884 size_t psize
= kPageSize
;
885 while ((psize
% size
) > (psize
>> 3)) {
888 const size_t my_pages
= psize
>> kPageShift
;
890 if (sc
> 1 && my_pages
== class_to_pages
[sc
-1]) {
891 // See if we can merge this into the previous class without
892 // increasing the fragmentation of the previous class.
893 const size_t my_objects
= (my_pages
<< kPageShift
) / size
;
894 const size_t prev_objects
= (class_to_pages
[sc
-1] << kPageShift
)
895 / class_to_size
[sc
-1];
896 if (my_objects
== prev_objects
) {
897 // Adjust last class to include this size
898 class_to_size
[sc
-1] = size
;
904 class_to_pages
[sc
] = my_pages
;
905 class_to_size
[sc
] = size
;
908 if (sc
!= kNumClasses
) {
909 MESSAGE("wrong number of size classes: found %" PRIuS
" instead of %d\n",
910 sc
, int(kNumClasses
));
914 // Initialize the mapping arrays
916 for (unsigned char c
= 1; c
< kNumClasses
; c
++) {
917 const size_t max_size_in_class
= class_to_size
[c
];
918 for (size_t s
= next_size
; s
<= max_size_in_class
; s
+= kAlignment
) {
919 class_array
[ClassIndex(s
)] = c
;
921 next_size
= static_cast<int>(max_size_in_class
+ kAlignment
);
924 // Double-check sizes just to be safe
925 for (size_t size
= 0; size
<= kMaxSize
; size
++) {
926 const size_t sc
= SizeClass(size
);
928 MESSAGE("Bad size class %" PRIuS
" for %" PRIuS
"\n", sc
, size
);
931 if (sc
> 1 && size
<= class_to_size
[sc
-1]) {
932 MESSAGE("Allocating unnecessarily large class %" PRIuS
" for %" PRIuS
936 if (sc
>= kNumClasses
) {
937 MESSAGE("Bad size class %" PRIuS
" for %" PRIuS
"\n", sc
, size
);
940 const size_t s
= class_to_size
[sc
];
942 MESSAGE("Bad size %" PRIuS
" for %" PRIuS
" (sc = %" PRIuS
")\n", s
, size
, sc
);
946 MESSAGE("Bad size %" PRIuS
" for %" PRIuS
" (sc = %" PRIuS
")\n", s
, size
, sc
);
951 // Initialize the num_objects_to_move array.
952 for (size_t cl
= 1; cl
< kNumClasses
; ++cl
) {
953 num_objects_to_move
[cl
] = NumMoveSize(ByteSizeForClass(cl
));
958 // Dump class sizes and maximum external wastage per size class
959 for (size_t cl
= 1; cl
< kNumClasses
; ++cl
) {
960 const int alloc_size
= class_to_pages
[cl
] << kPageShift
;
961 const int alloc_objs
= alloc_size
/ class_to_size
[cl
];
962 const int min_used
= (class_to_size
[cl
-1] + 1) * alloc_objs
;
963 const int max_waste
= alloc_size
- min_used
;
964 MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
966 int(class_to_size
[cl
-1] + 1),
967 int(class_to_size
[cl
]),
968 int(class_to_pages
[cl
] << kPageShift
),
969 max_waste
* 100.0 / alloc_size
976 // -------------------------------------------------------------------------
977 // Simple allocator for objects of a specified type. External locking
978 // is required before accessing one of these objects.
979 // -------------------------------------------------------------------------
981 // Metadata allocator -- keeps stats about how many bytes allocated
982 static uint64_t metadata_system_bytes
= 0;
983 static void* MetaDataAlloc(size_t bytes
) {
984 void* result
= TCMalloc_SystemAlloc(bytes
, 0);
985 if (result
!= NULL
) {
986 metadata_system_bytes
+= bytes
;
992 class PageHeapAllocator
{
994 // How much to allocate from system at a time
995 static const size_t kAllocIncrement
= 32 << 10;
998 static const size_t kAlignedSize
999 = (((sizeof(T
) + kAlignment
- 1) / kAlignment
) * kAlignment
);
1001 // Free area from which to carve new objects
1005 // Linked list of all regions allocated by this allocator
1006 void* allocated_regions_
;
1008 // Free list of already carved objects
1011 // Number of allocated but unfreed objects
1016 ASSERT(kAlignedSize
<= kAllocIncrement
);
1018 allocated_regions_
= 0;
1025 // Consult free list
1027 if (free_list_
!= NULL
) {
1028 result
= free_list_
;
1029 free_list_
= *(reinterpret_cast<void**>(result
));
1031 if (free_avail_
< kAlignedSize
) {
1033 char* new_allocation
= reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement
));
1034 if (!new_allocation
)
1037 *reinterpret_cast_ptr
<void**>(new_allocation
) = allocated_regions_
;
1038 allocated_regions_
= new_allocation
;
1039 free_area_
= new_allocation
+ kAlignedSize
;
1040 free_avail_
= kAllocIncrement
- kAlignedSize
;
1042 result
= free_area_
;
1043 free_area_
+= kAlignedSize
;
1044 free_avail_
-= kAlignedSize
;
1047 return reinterpret_cast<T
*>(result
);
1051 *(reinterpret_cast<void**>(p
)) = free_list_
;
1056 int inuse() const { return inuse_
; }
1058 #if defined(WTF_CHANGES) && OS(DARWIN)
1059 template <class Recorder
>
1060 void recordAdministrativeRegions(Recorder
& recorder
, const RemoteMemoryReader
& reader
)
1062 for (void* adminAllocation
= allocated_regions_
; adminAllocation
; adminAllocation
= reader
.nextEntryInLinkedList(reinterpret_cast<void**>(adminAllocation
)))
1063 recorder
.recordRegion(reinterpret_cast<vm_address_t
>(adminAllocation
), kAllocIncrement
);
1068 // -------------------------------------------------------------------------
1069 // Span - a contiguous run of pages
1070 // -------------------------------------------------------------------------
1072 // Type that can hold a page number
1073 typedef uintptr_t PageID
;
1075 // Type that can hold the length of a run of pages
1076 typedef uintptr_t Length
;
1078 static const Length kMaxValidPages
= (~static_cast<Length
>(0)) >> kPageShift
;
1080 // Convert byte size into pages. This won't overflow, but may return
1081 // an unreasonably large value if bytes is huge enough.
1082 static inline Length
pages(size_t bytes
) {
1083 return (bytes
>> kPageShift
) +
1084 ((bytes
& (kPageSize
- 1)) > 0 ? 1 : 0);
1087 // Convert a user size into the number of bytes that will actually be
1089 static size_t AllocationSize(size_t bytes
) {
1090 if (bytes
> kMaxSize
) {
1091 // Large object: we allocate an integral number of pages
1092 ASSERT(bytes
<= (kMaxValidPages
<< kPageShift
));
1093 return pages(bytes
) << kPageShift
;
1095 // Small object: find the size class to which it belongs
1096 return ByteSizeForClass(SizeClass(bytes
));
1100 // Information kept for a span (a contiguous run of pages).
1102 PageID start
; // Starting page number
1103 Length length
; // Number of pages in span
1104 Span
* next
; // Used when in link list
1105 Span
* prev
; // Used when in link list
1106 void* objects
; // Linked list of free objects
1107 unsigned int free
: 1; // Is the span free
1108 #ifndef NO_TCMALLOC_SAMPLES
1109 unsigned int sample
: 1; // Sampled object?
1111 unsigned int sizeclass
: 8; // Size-class for small objects (or 0)
1112 unsigned int refcount
: 11; // Number of non-free objects
1113 bool decommitted
: 1;
1117 // For debugging, we can keep a log events per span
1124 #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted)
1127 void Event(Span
* span
, char op
, int v
= 0) {
1128 span
->history
[span
->nexthistory
] = op
;
1129 span
->value
[span
->nexthistory
] = v
;
1130 span
->nexthistory
++;
1131 if (span
->nexthistory
== sizeof(span
->history
)) span
->nexthistory
= 0;
1134 #define Event(s,o,v) ((void) 0)
1137 // Allocator/deallocator for spans
1138 static PageHeapAllocator
<Span
> span_allocator
;
1139 static Span
* NewSpan(PageID p
, Length len
) {
1140 Span
* result
= span_allocator
.New();
1141 memset(result
, 0, sizeof(*result
));
1143 result
->length
= len
;
1145 result
->nexthistory
= 0;
1150 static inline void DeleteSpan(Span
* span
) {
1152 // In debug mode, trash the contents of deleted Spans
1153 memset(span
, 0x3f, sizeof(*span
));
1155 span_allocator
.Delete(span
);
1158 // -------------------------------------------------------------------------
1159 // Doubly linked list of spans.
1160 // -------------------------------------------------------------------------
1162 static inline void DLL_Init(Span
* list
) {
1167 static inline void DLL_Remove(Span
* span
) {
1168 span
->prev
->next
= span
->next
;
1169 span
->next
->prev
= span
->prev
;
1174 static ALWAYS_INLINE
bool DLL_IsEmpty(const Span
* list
) {
1175 return list
->next
== list
;
1178 static int DLL_Length(const Span
* list
) {
1180 for (Span
* s
= list
->next
; s
!= list
; s
= s
->next
) {
1186 #if 0 /* Not needed at the moment -- causes compiler warnings if not used */
1187 static void DLL_Print(const char* label
, const Span
* list
) {
1188 MESSAGE("%-10s %p:", label
, list
);
1189 for (const Span
* s
= list
->next
; s
!= list
; s
= s
->next
) {
1190 MESSAGE(" <%p,%u,%u>", s
, s
->start
, s
->length
);
1196 static inline void DLL_Prepend(Span
* list
, Span
* span
) {
1197 ASSERT(span
->next
== NULL
);
1198 ASSERT(span
->prev
== NULL
);
1199 span
->next
= list
->next
;
1201 list
->next
->prev
= span
;
1205 // -------------------------------------------------------------------------
1206 // Stack traces kept for sampled allocations
1207 // The following state is protected by pageheap_lock_.
1208 // -------------------------------------------------------------------------
1210 // size/depth are made the same size as a pointer so that some generic
1211 // code below can conveniently cast them back and forth to void*.
1212 static const int kMaxStackDepth
= 31;
1214 uintptr_t size
; // Size of object
1215 uintptr_t depth
; // Number of PC values stored in array below
1216 void* stack
[kMaxStackDepth
];
1218 static PageHeapAllocator
<StackTrace
> stacktrace_allocator
;
1219 static Span sampled_objects
;
1221 // -------------------------------------------------------------------------
1222 // Map from page-id to per-page data
1223 // -------------------------------------------------------------------------
1225 // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
1226 // We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
1227 // because sometimes the sizeclass is all the information we need.
1229 // Selector class -- general selector uses 3-level map
1230 template <int BITS
> class MapSelector
{
1232 typedef TCMalloc_PageMap3
<BITS
-kPageShift
> Type
;
1233 typedef PackedCache
<BITS
, uint64_t> CacheType
;
1236 #if defined(WTF_CHANGES)
1238 // On all known X86-64 platforms, the upper 16 bits are always unused and therefore
1239 // can be excluded from the PageMap key.
1240 // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
1242 static const size_t kBitsUnusedOn64Bit
= 16;
1244 static const size_t kBitsUnusedOn64Bit
= 0;
1247 // A three-level map for 64-bit machines
1248 template <> class MapSelector
<64> {
1250 typedef TCMalloc_PageMap3
<64 - kPageShift
- kBitsUnusedOn64Bit
> Type
;
1251 typedef PackedCache
<64, uint64_t> CacheType
;
1255 // A two-level map for 32-bit machines
1256 template <> class MapSelector
<32> {
1258 typedef TCMalloc_PageMap2
<32 - kPageShift
> Type
;
1259 typedef PackedCache
<32 - kPageShift
, uint16_t> CacheType
;
1262 // -------------------------------------------------------------------------
1263 // Page-level allocator
1264 // * Eager coalescing
1266 // Heap for page-level allocation. We allow allocating and freeing a
1267 // contiguous runs of pages (called a "span").
1268 // -------------------------------------------------------------------------
1270 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1271 // The page heap maintains a free list for spans that are no longer in use by
1272 // the central cache or any thread caches. We use a background thread to
1273 // periodically scan the free list and release a percentage of it back to the OS.
1275 // If free_committed_pages_ exceeds kMinimumFreeCommittedPageCount, the
1276 // background thread:
1278 // - pauses for kScavengeDelayInSeconds
1279 // - returns to the OS a percentage of the memory that remained unused during
1280 // that pause (kScavengePercentage * min_free_committed_pages_since_last_scavenge_)
1281 // The goal of this strategy is to reduce memory pressure in a timely fashion
1282 // while avoiding thrashing the OS allocator.
1284 // Time delay before the page heap scavenger will consider returning pages to
1286 static const int kScavengeDelayInSeconds
= 2;
1288 // Approximate percentage of free committed pages to return to the OS in one
1290 static const float kScavengePercentage
= .5f
;
1292 // number of span lists to keep spans in when memory is returned.
1293 static const int kMinSpanListsWithSpans
= 32;
1295 // Number of free committed pages that we want to keep around. The minimum number of pages used when there
1296 // is 1 span in each of the first kMinSpanListsWithSpans spanlists. Currently 528 pages.
1297 static const size_t kMinimumFreeCommittedPageCount
= kMinSpanListsWithSpans
* ((1.0f
+kMinSpanListsWithSpans
) / 2.0f
);
1301 static SpinLock pageheap_lock
= SPINLOCK_INITIALIZER
;
1303 class TCMalloc_PageHeap
{
1307 // Allocate a run of "n" pages. Returns zero if out of memory.
1308 Span
* New(Length n
);
1310 // Delete the span "[p, p+n-1]".
1311 // REQUIRES: span was returned by earlier call to New() and
1312 // has not yet been deleted.
1313 void Delete(Span
* span
);
1315 // Mark an allocated span as being used for small objects of the
1316 // specified size-class.
1317 // REQUIRES: span was returned by an earlier call to New()
1318 // and has not yet been deleted.
1319 void RegisterSizeClass(Span
* span
, size_t sc
);
1321 // Split an allocated span into two spans: one of length "n" pages
1322 // followed by another span of length "span->length - n" pages.
1323 // Modifies "*span" to point to the first span of length "n" pages.
1324 // Returns a pointer to the second span.
1326 // REQUIRES: "0 < n < span->length"
1327 // REQUIRES: !span->free
1328 // REQUIRES: span->sizeclass == 0
1329 Span
* Split(Span
* span
, Length n
);
1331 // Return the descriptor for the specified page.
1332 inline Span
* GetDescriptor(PageID p
) const {
1333 return reinterpret_cast<Span
*>(pagemap_
.get(p
));
1337 inline Span
* GetDescriptorEnsureSafe(PageID p
)
1339 pagemap_
.Ensure(p
, 1);
1340 return GetDescriptor(p
);
1343 size_t ReturnedBytes() const;
1346 // Dump state to stderr
1348 void Dump(TCMalloc_Printer
* out
);
1351 // Return number of bytes allocated from system
1352 inline uint64_t SystemBytes() const { return system_bytes_
; }
1354 // Return number of free bytes in heap
1355 uint64_t FreeBytes() const {
1356 return (static_cast<uint64_t>(free_pages_
) << kPageShift
);
1360 bool CheckList(Span
* list
, Length min_pages
, Length max_pages
, bool decommitted
);
1362 // Release all pages on the free list for reuse by the OS:
1363 void ReleaseFreePages();
1365 // Return 0 if we have no information, or else the correct sizeclass for p.
1366 // Reads and writes to pagemap_cache_ do not require locking.
1367 // The entries are 64 bits on 64-bit hardware and 16 bits on
1368 // 32-bit hardware, and we don't mind raciness as long as each read of
1369 // an entry yields a valid entry, not a partially updated entry.
1370 size_t GetSizeClassIfCached(PageID p
) const {
1371 return pagemap_cache_
.GetOrDefault(p
, 0);
1373 void CacheSizeClass(PageID p
, size_t cl
) const { pagemap_cache_
.Put(p
, cl
); }
1376 // Pick the appropriate map and cache types based on pointer size
1377 typedef MapSelector
<8*sizeof(uintptr_t)>::Type PageMap
;
1378 typedef MapSelector
<8*sizeof(uintptr_t)>::CacheType PageMapCache
;
1380 mutable PageMapCache pagemap_cache_
;
1382 // We segregate spans of a given size into two circular linked
1383 // lists: one for normal spans, and one for spans whose memory
1384 // has been returned to the system.
1390 // List of free spans of length >= kMaxPages
1393 // Array mapping from span length to a doubly linked list of free spans
1394 SpanList free_
[kMaxPages
];
1396 // Number of pages kept in free lists
1397 uintptr_t free_pages_
;
1399 // Bytes allocated from system
1400 uint64_t system_bytes_
;
1402 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1403 // Number of pages kept in free lists that are still committed.
1404 Length free_committed_pages_
;
1406 // Minimum number of free committed pages since last scavenge. (Can be 0 if
1407 // we've committed new pages since the last scavenge.)
1408 Length min_free_committed_pages_since_last_scavenge_
;
1411 bool GrowHeap(Length n
);
1413 // REQUIRES span->length >= n
1414 // Remove span from its free list, and move any leftover part of
1415 // span into appropriate free lists. Also update "span" to have
1416 // length exactly "n" and mark it as non-free so it can be returned
1419 // "released" is true iff "span" was found on a "returned" list.
1420 void Carve(Span
* span
, Length n
, bool released
);
1422 void RecordSpan(Span
* span
) {
1423 pagemap_
.set(span
->start
, span
);
1424 if (span
->length
> 1) {
1425 pagemap_
.set(span
->start
+ span
->length
- 1, span
);
1429 // Allocate a large span of length == n. If successful, returns a
1430 // span of exactly the specified length. Else, returns NULL.
1431 Span
* AllocLarge(Length n
);
1433 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1434 // Incrementally release some memory to the system.
1435 // IncrementalScavenge(n) is called whenever n pages are freed.
1436 void IncrementalScavenge(Length n
);
1439 // Number of pages to deallocate before doing more scavenging
1440 int64_t scavenge_counter_
;
1442 // Index of last free list we scavenged
1443 size_t scavenge_index_
;
1445 #if defined(WTF_CHANGES) && OS(DARWIN)
1446 friend class FastMallocZone
;
1449 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1450 void initializeScavenger();
1451 ALWAYS_INLINE
void signalScavenger();
1453 ALWAYS_INLINE
bool shouldScavenge() const;
1455 #if HAVE(DISPATCH_H) || OS(WINDOWS)
1456 void periodicScavenge();
1457 ALWAYS_INLINE
bool isScavengerSuspended();
1458 ALWAYS_INLINE
void scheduleScavenger();
1459 ALWAYS_INLINE
void rescheduleScavenger();
1460 ALWAYS_INLINE
void suspendScavenger();
1463 #if HAVE(DISPATCH_H)
1464 dispatch_queue_t m_scavengeQueue
;
1465 dispatch_source_t m_scavengeTimer
;
1466 bool m_scavengingSuspended
;
1468 static void CALLBACK
scavengerTimerFired(void*, BOOLEAN
);
1469 HANDLE m_scavengeQueueTimer
;
1471 static NO_RETURN_WITH_VALUE
void* runScavengerThread(void*);
1472 NO_RETURN
void scavengerThread();
1474 // Keeps track of whether the background thread is actively scavenging memory every kScavengeDelayInSeconds, or
1475 // it's blocked waiting for more pages to be deleted.
1476 bool m_scavengeThreadActive
;
1478 pthread_mutex_t m_scavengeMutex
;
1479 pthread_cond_t m_scavengeCondition
;
1482 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1485 void TCMalloc_PageHeap::init()
1487 pagemap_
.init(MetaDataAlloc
);
1488 pagemap_cache_
= PageMapCache(0);
1492 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1493 free_committed_pages_
= 0;
1494 min_free_committed_pages_since_last_scavenge_
= 0;
1495 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1497 scavenge_counter_
= 0;
1498 // Start scavenging at kMaxPages list
1499 scavenge_index_
= kMaxPages
-1;
1500 COMPILE_ASSERT(kNumClasses
<= (1 << PageMapCache::kValuebits
), valuebits
);
1501 DLL_Init(&large_
.normal
);
1502 DLL_Init(&large_
.returned
);
1503 for (size_t i
= 0; i
< kMaxPages
; i
++) {
1504 DLL_Init(&free_
[i
].normal
);
1505 DLL_Init(&free_
[i
].returned
);
1508 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1509 initializeScavenger();
1510 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1513 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1515 #if HAVE(DISPATCH_H)
1517 void TCMalloc_PageHeap::initializeScavenger()
1519 m_scavengeQueue
= dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL
);
1520 m_scavengeTimer
= dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER
, 0, 0, m_scavengeQueue
);
1521 dispatch_time_t startTime
= dispatch_time(DISPATCH_TIME_NOW
, kScavengeDelayInSeconds
* NSEC_PER_SEC
);
1522 dispatch_source_set_timer(m_scavengeTimer
, startTime
, kScavengeDelayInSeconds
* NSEC_PER_SEC
, 1000 * NSEC_PER_USEC
);
1523 dispatch_source_set_event_handler(m_scavengeTimer
, ^{ periodicScavenge(); });
1524 m_scavengingSuspended
= true;
1527 ALWAYS_INLINE
bool TCMalloc_PageHeap::isScavengerSuspended()
1529 ASSERT(pageheap_lock
.IsHeld());
1530 return m_scavengingSuspended
;
1533 ALWAYS_INLINE
void TCMalloc_PageHeap::scheduleScavenger()
1535 ASSERT(pageheap_lock
.IsHeld());
1536 m_scavengingSuspended
= false;
1537 dispatch_resume(m_scavengeTimer
);
1540 ALWAYS_INLINE
void TCMalloc_PageHeap::rescheduleScavenger()
1542 // Nothing to do here for libdispatch.
1545 ALWAYS_INLINE
void TCMalloc_PageHeap::suspendScavenger()
1547 ASSERT(pageheap_lock
.IsHeld());
1548 m_scavengingSuspended
= true;
1549 dispatch_suspend(m_scavengeTimer
);
1554 void TCMalloc_PageHeap::scavengerTimerFired(void* context
, BOOLEAN
)
1556 static_cast<TCMalloc_PageHeap
*>(context
)->periodicScavenge();
1559 void TCMalloc_PageHeap::initializeScavenger()
1561 m_scavengeQueueTimer
= 0;
1564 ALWAYS_INLINE
bool TCMalloc_PageHeap::isScavengerSuspended()
1566 ASSERT(IsHeld(pageheap_lock
));
1567 return !m_scavengeQueueTimer
;
1570 ALWAYS_INLINE
void TCMalloc_PageHeap::scheduleScavenger()
1572 // We need to use WT_EXECUTEONLYONCE here and reschedule the timer, because
1573 // Windows will fire the timer event even when the function is already running.
1574 ASSERT(IsHeld(pageheap_lock
));
1575 CreateTimerQueueTimer(&m_scavengeQueueTimer
, 0, scavengerTimerFired
, this, kScavengeDelayInSeconds
* 1000, 0, WT_EXECUTEONLYONCE
);
1578 ALWAYS_INLINE
void TCMalloc_PageHeap::rescheduleScavenger()
1580 // We must delete the timer and create it again, because it is not possible to retrigger a timer on Windows.
1582 scheduleScavenger();
1585 ALWAYS_INLINE
void TCMalloc_PageHeap::suspendScavenger()
1587 ASSERT(IsHeld(pageheap_lock
));
1588 HANDLE scavengeQueueTimer
= m_scavengeQueueTimer
;
1589 m_scavengeQueueTimer
= 0;
1590 DeleteTimerQueueTimer(0, scavengeQueueTimer
, 0);
1595 void TCMalloc_PageHeap::initializeScavenger()
1597 // Create a non-recursive mutex.
1598 #if !defined(PTHREAD_MUTEX_NORMAL) || PTHREAD_MUTEX_NORMAL == PTHREAD_MUTEX_DEFAULT
1599 pthread_mutex_init(&m_scavengeMutex
, 0);
1601 pthread_mutexattr_t attr
;
1602 pthread_mutexattr_init(&attr
);
1603 pthread_mutexattr_settype(&attr
, PTHREAD_MUTEX_NORMAL
);
1605 pthread_mutex_init(&m_scavengeMutex
, &attr
);
1607 pthread_mutexattr_destroy(&attr
);
1610 pthread_cond_init(&m_scavengeCondition
, 0);
1611 m_scavengeThreadActive
= true;
1613 pthread_create(&thread
, 0, runScavengerThread
, this);
1616 void* TCMalloc_PageHeap::runScavengerThread(void* context
)
1618 static_cast<TCMalloc_PageHeap
*>(context
)->scavengerThread();
1619 #if (COMPILER(MSVC) || COMPILER(SUNCC))
1620 // Without this, Visual Studio and Sun Studio will complain that this method does not return a value.
1625 ALWAYS_INLINE
void TCMalloc_PageHeap::signalScavenger()
1627 // m_scavengeMutex should be held before accessing m_scavengeThreadActive.
1628 ASSERT(pthread_mutex_trylock(m_scavengeMutex
));
1629 if (!m_scavengeThreadActive
&& shouldScavenge())
1630 pthread_cond_signal(&m_scavengeCondition
);
1635 void TCMalloc_PageHeap::scavenge()
1637 size_t pagesToRelease
= min_free_committed_pages_since_last_scavenge_
* kScavengePercentage
;
1638 size_t targetPageCount
= std::max
<size_t>(kMinimumFreeCommittedPageCount
, free_committed_pages_
- pagesToRelease
);
1640 Length lastFreeCommittedPages
= free_committed_pages_
;
1641 while (free_committed_pages_
> targetPageCount
) {
1643 for (int i
= kMaxPages
; i
> 0 && free_committed_pages_
>= targetPageCount
; i
--) {
1644 SpanList
* slist
= (static_cast<size_t>(i
) == kMaxPages
) ? &large_
: &free_
[i
];
1645 // If the span size is bigger than kMinSpanListsWithSpans pages return all the spans in the list, else return all but 1 span.
1646 // Return only 50% of a spanlist at a time so spans of size 1 are not the only ones left.
1647 size_t length
= DLL_Length(&slist
->normal
);
1648 size_t numSpansToReturn
= (i
> kMinSpanListsWithSpans
) ? length
: length
/ 2;
1649 for (int j
= 0; static_cast<size_t>(j
) < numSpansToReturn
&& !DLL_IsEmpty(&slist
->normal
) && free_committed_pages_
> targetPageCount
; j
++) {
1650 Span
* s
= slist
->normal
.prev
;
1652 ASSERT(!s
->decommitted
);
1653 if (!s
->decommitted
) {
1654 TCMalloc_SystemRelease(reinterpret_cast<void*>(s
->start
<< kPageShift
),
1655 static_cast<size_t>(s
->length
<< kPageShift
));
1656 ASSERT(free_committed_pages_
>= s
->length
);
1657 free_committed_pages_
-= s
->length
;
1658 s
->decommitted
= true;
1660 DLL_Prepend(&slist
->returned
, s
);
1664 if (lastFreeCommittedPages
== free_committed_pages_
)
1666 lastFreeCommittedPages
= free_committed_pages_
;
1669 min_free_committed_pages_since_last_scavenge_
= free_committed_pages_
;
1672 ALWAYS_INLINE
bool TCMalloc_PageHeap::shouldScavenge() const
1674 return free_committed_pages_
> kMinimumFreeCommittedPageCount
;
1677 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1679 inline Span
* TCMalloc_PageHeap::New(Length n
) {
1683 // Find first size >= n that has a non-empty list
1684 for (Length s
= n
; s
< kMaxPages
; s
++) {
1686 bool released
= false;
1687 if (!DLL_IsEmpty(&free_
[s
].normal
)) {
1688 // Found normal span
1689 ll
= &free_
[s
].normal
;
1690 } else if (!DLL_IsEmpty(&free_
[s
].returned
)) {
1691 // Found returned span; reallocate it
1692 ll
= &free_
[s
].returned
;
1695 // Keep looking in larger classes
1699 Span
* result
= ll
->next
;
1700 Carve(result
, n
, released
);
1701 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1702 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the
1703 // free committed pages count.
1704 ASSERT(free_committed_pages_
>= n
);
1705 free_committed_pages_
-= n
;
1706 if (free_committed_pages_
< min_free_committed_pages_since_last_scavenge_
)
1707 min_free_committed_pages_since_last_scavenge_
= free_committed_pages_
;
1708 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1714 Span
* result
= AllocLarge(n
);
1715 if (result
!= NULL
) {
1716 ASSERT_SPAN_COMMITTED(result
);
1720 // Grow the heap and try again
1726 return AllocLarge(n
);
1729 Span
* TCMalloc_PageHeap::AllocLarge(Length n
) {
1730 // find the best span (closest to n in size).
1731 // The following loops implements address-ordered best-fit.
1732 bool from_released
= false;
1735 // Search through normal list
1736 for (Span
* span
= large_
.normal
.next
;
1737 span
!= &large_
.normal
;
1738 span
= span
->next
) {
1739 if (span
->length
>= n
) {
1741 || (span
->length
< best
->length
)
1742 || ((span
->length
== best
->length
) && (span
->start
< best
->start
))) {
1744 from_released
= false;
1749 // Search through released list in case it has a better fit
1750 for (Span
* span
= large_
.returned
.next
;
1751 span
!= &large_
.returned
;
1752 span
= span
->next
) {
1753 if (span
->length
>= n
) {
1755 || (span
->length
< best
->length
)
1756 || ((span
->length
== best
->length
) && (span
->start
< best
->start
))) {
1758 from_released
= true;
1764 Carve(best
, n
, from_released
);
1765 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1766 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the
1767 // free committed pages count.
1768 ASSERT(free_committed_pages_
>= n
);
1769 free_committed_pages_
-= n
;
1770 if (free_committed_pages_
< min_free_committed_pages_since_last_scavenge_
)
1771 min_free_committed_pages_since_last_scavenge_
= free_committed_pages_
;
1772 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1780 Span
* TCMalloc_PageHeap::Split(Span
* span
, Length n
) {
1782 ASSERT(n
< span
->length
);
1783 ASSERT(!span
->free
);
1784 ASSERT(span
->sizeclass
== 0);
1785 Event(span
, 'T', n
);
1787 const Length extra
= span
->length
- n
;
1788 Span
* leftover
= NewSpan(span
->start
+ n
, extra
);
1789 Event(leftover
, 'U', extra
);
1790 RecordSpan(leftover
);
1791 pagemap_
.set(span
->start
+ n
- 1, span
); // Update map from pageid to span
1797 inline void TCMalloc_PageHeap::Carve(Span
* span
, Length n
, bool released
) {
1801 Event(span
, 'A', n
);
1804 // If the span chosen to carve from is decommited, commit the entire span at once to avoid committing spans 1 page at a time.
1805 ASSERT(span
->decommitted
);
1806 TCMalloc_SystemCommit(reinterpret_cast<void*>(span
->start
<< kPageShift
), static_cast<size_t>(span
->length
<< kPageShift
));
1807 span
->decommitted
= false;
1808 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1809 free_committed_pages_
+= span
->length
;
1813 const int extra
= static_cast<int>(span
->length
- n
);
1816 Span
* leftover
= NewSpan(span
->start
+ n
, extra
);
1818 leftover
->decommitted
= false;
1819 Event(leftover
, 'S', extra
);
1820 RecordSpan(leftover
);
1822 // Place leftover span on appropriate free list
1823 SpanList
* listpair
= (static_cast<size_t>(extra
) < kMaxPages
) ? &free_
[extra
] : &large_
;
1824 Span
* dst
= &listpair
->normal
;
1825 DLL_Prepend(dst
, leftover
);
1828 pagemap_
.set(span
->start
+ n
- 1, span
);
1832 static ALWAYS_INLINE
void mergeDecommittedStates(Span
* destination
, Span
* other
)
1834 if (destination
->decommitted
&& !other
->decommitted
) {
1835 TCMalloc_SystemRelease(reinterpret_cast<void*>(other
->start
<< kPageShift
),
1836 static_cast<size_t>(other
->length
<< kPageShift
));
1837 } else if (other
->decommitted
&& !destination
->decommitted
) {
1838 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination
->start
<< kPageShift
),
1839 static_cast<size_t>(destination
->length
<< kPageShift
));
1840 destination
->decommitted
= true;
1844 inline void TCMalloc_PageHeap::Delete(Span
* span
) {
1846 ASSERT(!span
->free
);
1847 ASSERT(span
->length
> 0);
1848 ASSERT(GetDescriptor(span
->start
) == span
);
1849 ASSERT(GetDescriptor(span
->start
+ span
->length
- 1) == span
);
1850 span
->sizeclass
= 0;
1851 #ifndef NO_TCMALLOC_SAMPLES
1855 // Coalesce -- we guarantee that "p" != 0, so no bounds checking
1856 // necessary. We do not bother resetting the stale pagemap
1857 // entries for the pieces we are merging together because we only
1858 // care about the pagemap entries for the boundaries.
1859 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1860 // Track the total size of the neighboring free spans that are committed.
1861 Length neighboringCommittedSpansLength
= 0;
1863 const PageID p
= span
->start
;
1864 const Length n
= span
->length
;
1865 Span
* prev
= GetDescriptor(p
-1);
1866 if (prev
!= NULL
&& prev
->free
) {
1867 // Merge preceding span into this span
1868 ASSERT(prev
->start
+ prev
->length
== p
);
1869 const Length len
= prev
->length
;
1870 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1871 if (!prev
->decommitted
)
1872 neighboringCommittedSpansLength
+= len
;
1874 mergeDecommittedStates(span
, prev
);
1878 span
->length
+= len
;
1879 pagemap_
.set(span
->start
, span
);
1880 Event(span
, 'L', len
);
1882 Span
* next
= GetDescriptor(p
+n
);
1883 if (next
!= NULL
&& next
->free
) {
1884 // Merge next span into this span
1885 ASSERT(next
->start
== p
+n
);
1886 const Length len
= next
->length
;
1887 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1888 if (!next
->decommitted
)
1889 neighboringCommittedSpansLength
+= len
;
1891 mergeDecommittedStates(span
, next
);
1894 span
->length
+= len
;
1895 pagemap_
.set(span
->start
+ span
->length
- 1, span
);
1896 Event(span
, 'R', len
);
1899 Event(span
, 'D', span
->length
);
1901 if (span
->decommitted
) {
1902 if (span
->length
< kMaxPages
)
1903 DLL_Prepend(&free_
[span
->length
].returned
, span
);
1905 DLL_Prepend(&large_
.returned
, span
);
1907 if (span
->length
< kMaxPages
)
1908 DLL_Prepend(&free_
[span
->length
].normal
, span
);
1910 DLL_Prepend(&large_
.normal
, span
);
1914 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1915 if (span
->decommitted
) {
1916 // If the merged span is decommitted, that means we decommitted any neighboring spans that were
1917 // committed. Update the free committed pages count.
1918 free_committed_pages_
-= neighboringCommittedSpansLength
;
1919 if (free_committed_pages_
< min_free_committed_pages_since_last_scavenge_
)
1920 min_free_committed_pages_since_last_scavenge_
= free_committed_pages_
;
1922 // If the merged span remains committed, add the deleted span's size to the free committed pages count.
1923 free_committed_pages_
+= n
;
1926 // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system.
1929 IncrementalScavenge(n
);
1935 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1936 void TCMalloc_PageHeap::IncrementalScavenge(Length n
) {
1937 // Fast path; not yet time to release memory
1938 scavenge_counter_
-= n
;
1939 if (scavenge_counter_
>= 0) return; // Not yet time to scavenge
1941 static const size_t kDefaultReleaseDelay
= 64;
1943 // Find index of free list to scavenge
1944 size_t index
= scavenge_index_
+ 1;
1945 for (size_t i
= 0; i
< kMaxPages
+1; i
++) {
1946 if (index
> kMaxPages
) index
= 0;
1947 SpanList
* slist
= (index
== kMaxPages
) ? &large_
: &free_
[index
];
1948 if (!DLL_IsEmpty(&slist
->normal
)) {
1949 // Release the last span on the normal portion of this list
1950 Span
* s
= slist
->normal
.prev
;
1952 TCMalloc_SystemRelease(reinterpret_cast<void*>(s
->start
<< kPageShift
),
1953 static_cast<size_t>(s
->length
<< kPageShift
));
1954 s
->decommitted
= true;
1955 DLL_Prepend(&slist
->returned
, s
);
1957 scavenge_counter_
= std::max
<size_t>(16UL, std::min
<size_t>(kDefaultReleaseDelay
, kDefaultReleaseDelay
- (free_pages_
/ kDefaultReleaseDelay
)));
1959 if (index
== kMaxPages
&& !DLL_IsEmpty(&slist
->normal
))
1960 scavenge_index_
= index
- 1;
1962 scavenge_index_
= index
;
1968 // Nothing to scavenge, delay for a while
1969 scavenge_counter_
= kDefaultReleaseDelay
;
1973 void TCMalloc_PageHeap::RegisterSizeClass(Span
* span
, size_t sc
) {
1974 // Associate span object with all interior pages as well
1975 ASSERT(!span
->free
);
1976 ASSERT(GetDescriptor(span
->start
) == span
);
1977 ASSERT(GetDescriptor(span
->start
+span
->length
-1) == span
);
1978 Event(span
, 'C', sc
);
1979 span
->sizeclass
= static_cast<unsigned int>(sc
);
1980 for (Length i
= 1; i
< span
->length
-1; i
++) {
1981 pagemap_
.set(span
->start
+i
, span
);
1986 size_t TCMalloc_PageHeap::ReturnedBytes() const {
1988 for (unsigned s
= 0; s
< kMaxPages
; s
++) {
1989 const int r_length
= DLL_Length(&free_
[s
].returned
);
1990 unsigned r_pages
= s
* r_length
;
1991 result
+= r_pages
<< kPageShift
;
1994 for (Span
* s
= large_
.returned
.next
; s
!= &large_
.returned
; s
= s
->next
)
1995 result
+= s
->length
<< kPageShift
;
2001 static double PagesToMB(uint64_t pages
) {
2002 return (pages
<< kPageShift
) / 1048576.0;
2005 void TCMalloc_PageHeap::Dump(TCMalloc_Printer
* out
) {
2006 int nonempty_sizes
= 0;
2007 for (int s
= 0; s
< kMaxPages
; s
++) {
2008 if (!DLL_IsEmpty(&free_
[s
].normal
) || !DLL_IsEmpty(&free_
[s
].returned
)) {
2012 out
->printf("------------------------------------------------\n");
2013 out
->printf("PageHeap: %d sizes; %6.1f MB free\n",
2014 nonempty_sizes
, PagesToMB(free_pages_
));
2015 out
->printf("------------------------------------------------\n");
2016 uint64_t total_normal
= 0;
2017 uint64_t total_returned
= 0;
2018 for (int s
= 0; s
< kMaxPages
; s
++) {
2019 const int n_length
= DLL_Length(&free_
[s
].normal
);
2020 const int r_length
= DLL_Length(&free_
[s
].returned
);
2021 if (n_length
+ r_length
> 0) {
2022 uint64_t n_pages
= s
* n_length
;
2023 uint64_t r_pages
= s
* r_length
;
2024 total_normal
+= n_pages
;
2025 total_returned
+= r_pages
;
2026 out
->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
2027 "; unmapped: %6.1f MB; %6.1f MB cum\n",
2029 (n_length
+ r_length
),
2030 PagesToMB(n_pages
+ r_pages
),
2031 PagesToMB(total_normal
+ total_returned
),
2033 PagesToMB(total_returned
));
2037 uint64_t n_pages
= 0;
2038 uint64_t r_pages
= 0;
2041 out
->printf("Normal large spans:\n");
2042 for (Span
* s
= large_
.normal
.next
; s
!= &large_
.normal
; s
= s
->next
) {
2043 out
->printf(" [ %6" PRIuS
" pages ] %6.1f MB\n",
2044 s
->length
, PagesToMB(s
->length
));
2045 n_pages
+= s
->length
;
2048 out
->printf("Unmapped large spans:\n");
2049 for (Span
* s
= large_
.returned
.next
; s
!= &large_
.returned
; s
= s
->next
) {
2050 out
->printf(" [ %6" PRIuS
" pages ] %6.1f MB\n",
2051 s
->length
, PagesToMB(s
->length
));
2052 r_pages
+= s
->length
;
2055 total_normal
+= n_pages
;
2056 total_returned
+= r_pages
;
2057 out
->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum"
2058 "; unmapped: %6.1f MB; %6.1f MB cum\n",
2059 (n_spans
+ r_spans
),
2060 PagesToMB(n_pages
+ r_pages
),
2061 PagesToMB(total_normal
+ total_returned
),
2063 PagesToMB(total_returned
));
2067 bool TCMalloc_PageHeap::GrowHeap(Length n
) {
2068 ASSERT(kMaxPages
>= kMinSystemAlloc
);
2069 if (n
> kMaxValidPages
) return false;
2070 Length ask
= (n
>kMinSystemAlloc
) ? n
: static_cast<Length
>(kMinSystemAlloc
);
2072 void* ptr
= TCMalloc_SystemAlloc(ask
<< kPageShift
, &actual_size
, kPageSize
);
2075 // Try growing just "n" pages
2077 ptr
= TCMalloc_SystemAlloc(ask
<< kPageShift
, &actual_size
, kPageSize
);
2079 if (ptr
== NULL
) return false;
2081 ask
= actual_size
>> kPageShift
;
2083 uint64_t old_system_bytes
= system_bytes_
;
2084 system_bytes_
+= (ask
<< kPageShift
);
2085 const PageID p
= reinterpret_cast<uintptr_t>(ptr
) >> kPageShift
;
2088 // If we have already a lot of pages allocated, just pre allocate a bunch of
2089 // memory for the page map. This prevents fragmentation by pagemap metadata
2090 // when a program keeps allocating and freeing large blocks.
2092 if (old_system_bytes
< kPageMapBigAllocationThreshold
2093 && system_bytes_
>= kPageMapBigAllocationThreshold
) {
2094 pagemap_
.PreallocateMoreMemory();
2097 // Make sure pagemap_ has entries for all of the new pages.
2098 // Plus ensure one before and one after so coalescing code
2099 // does not need bounds-checking.
2100 if (pagemap_
.Ensure(p
-1, ask
+2)) {
2101 // Pretend the new area is allocated and then Delete() it to
2102 // cause any necessary coalescing to occur.
2104 // We do not adjust free_pages_ here since Delete() will do it for us.
2105 Span
* span
= NewSpan(p
, ask
);
2111 // We could not allocate memory within "pagemap_"
2112 // TODO: Once we can return memory to the system, return the new span
2117 bool TCMalloc_PageHeap::Check() {
2118 ASSERT(free_
[0].normal
.next
== &free_
[0].normal
);
2119 ASSERT(free_
[0].returned
.next
== &free_
[0].returned
);
2120 CheckList(&large_
.normal
, kMaxPages
, 1000000000, false);
2121 CheckList(&large_
.returned
, kMaxPages
, 1000000000, true);
2122 for (Length s
= 1; s
< kMaxPages
; s
++) {
2123 CheckList(&free_
[s
].normal
, s
, s
, false);
2124 CheckList(&free_
[s
].returned
, s
, s
, true);
2130 bool TCMalloc_PageHeap::CheckList(Span
*, Length
, Length
, bool) {
2134 bool TCMalloc_PageHeap::CheckList(Span
* list
, Length min_pages
, Length max_pages
, bool decommitted
) {
2135 for (Span
* s
= list
->next
; s
!= list
; s
= s
->next
) {
2136 CHECK_CONDITION(s
->free
);
2137 CHECK_CONDITION(s
->length
>= min_pages
);
2138 CHECK_CONDITION(s
->length
<= max_pages
);
2139 CHECK_CONDITION(GetDescriptor(s
->start
) == s
);
2140 CHECK_CONDITION(GetDescriptor(s
->start
+s
->length
-1) == s
);
2141 CHECK_CONDITION(s
->decommitted
== decommitted
);
2147 static void ReleaseFreeList(Span
* list
, Span
* returned
) {
2148 // Walk backwards through list so that when we push these
2149 // spans on the "returned" list, we preserve the order.
2150 while (!DLL_IsEmpty(list
)) {
2151 Span
* s
= list
->prev
;
2153 s
->decommitted
= true;
2154 DLL_Prepend(returned
, s
);
2155 TCMalloc_SystemRelease(reinterpret_cast<void*>(s
->start
<< kPageShift
),
2156 static_cast<size_t>(s
->length
<< kPageShift
));
2160 void TCMalloc_PageHeap::ReleaseFreePages() {
2161 for (Length s
= 0; s
< kMaxPages
; s
++) {
2162 ReleaseFreeList(&free_
[s
].normal
, &free_
[s
].returned
);
2164 ReleaseFreeList(&large_
.normal
, &large_
.returned
);
2168 //-------------------------------------------------------------------
2170 //-------------------------------------------------------------------
2172 class TCMalloc_ThreadCache_FreeList
{
2174 void* list_
; // Linked list of nodes
2175 uint16_t length_
; // Current length
2176 uint16_t lowater_
; // Low water mark for list length
2185 // Return current length of list
2186 int length() const {
2191 bool empty() const {
2192 return list_
== NULL
;
2195 // Low-water mark management
2196 int lowwatermark() const { return lowater_
; }
2197 void clear_lowwatermark() { lowater_
= length_
; }
2199 ALWAYS_INLINE
void Push(void* ptr
) {
2200 SLL_Push(&list_
, ptr
);
2204 void PushRange(int N
, void *start
, void *end
) {
2205 SLL_PushRange(&list_
, start
, end
);
2206 length_
= length_
+ static_cast<uint16_t>(N
);
2209 void PopRange(int N
, void **start
, void **end
) {
2210 SLL_PopRange(&list_
, N
, start
, end
);
2211 ASSERT(length_
>= N
);
2212 length_
= length_
- static_cast<uint16_t>(N
);
2213 if (length_
< lowater_
) lowater_
= length_
;
2216 ALWAYS_INLINE
void* Pop() {
2217 ASSERT(list_
!= NULL
);
2219 if (length_
< lowater_
) lowater_
= length_
;
2220 return SLL_Pop(&list_
);
2224 template <class Finder
, class Reader
>
2225 void enumerateFreeObjects(Finder
& finder
, const Reader
& reader
)
2227 for (void* nextObject
= list_
; nextObject
; nextObject
= reader
.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject
)))
2228 finder
.visit(nextObject
);
2233 //-------------------------------------------------------------------
2234 // Data kept per thread
2235 //-------------------------------------------------------------------
2237 class TCMalloc_ThreadCache
{
2239 typedef TCMalloc_ThreadCache_FreeList FreeList
;
2241 typedef DWORD ThreadIdentifier
;
2243 typedef pthread_t ThreadIdentifier
;
2246 size_t size_
; // Combined size of data
2247 ThreadIdentifier tid_
; // Which thread owns it
2248 bool in_setspecific_
; // Called pthread_setspecific?
2249 FreeList list_
[kNumClasses
]; // Array indexed by size-class
2251 // We sample allocations, biased by the size of the allocation
2252 uint32_t rnd_
; // Cheap random number generator
2253 size_t bytes_until_sample_
; // Bytes until we sample next
2255 // Allocate a new heap. REQUIRES: pageheap_lock is held.
2256 static inline TCMalloc_ThreadCache
* NewHeap(ThreadIdentifier tid
);
2258 // Use only as pthread thread-specific destructor function.
2259 static void DestroyThreadCache(void* ptr
);
2261 // All ThreadCache objects are kept in a linked list (for stats collection)
2262 TCMalloc_ThreadCache
* next_
;
2263 TCMalloc_ThreadCache
* prev_
;
2265 void Init(ThreadIdentifier tid
);
2268 // Accessors (mostly just for printing stats)
2269 int freelist_length(size_t cl
) const { return list_
[cl
].length(); }
2271 // Total byte size in cache
2272 size_t Size() const { return size_
; }
2274 ALWAYS_INLINE
void* Allocate(size_t size
);
2275 void Deallocate(void* ptr
, size_t size_class
);
2277 ALWAYS_INLINE
void FetchFromCentralCache(size_t cl
, size_t allocationSize
);
2278 void ReleaseToCentralCache(size_t cl
, int N
);
2282 // Record allocation of "k" bytes. Return true iff allocation
2283 // should be sampled
2284 bool SampleAllocation(size_t k
);
2286 // Pick next sampling point
2287 void PickNextSample(size_t k
);
2289 static void InitModule();
2290 static void InitTSD();
2291 static TCMalloc_ThreadCache
* GetThreadHeap();
2292 static TCMalloc_ThreadCache
* GetCache();
2293 static TCMalloc_ThreadCache
* GetCacheIfPresent();
2294 static TCMalloc_ThreadCache
* CreateCacheIfNecessary();
2295 static void DeleteCache(TCMalloc_ThreadCache
* heap
);
2296 static void BecomeIdle();
2297 static void RecomputeThreadCacheSize();
2300 template <class Finder
, class Reader
>
2301 void enumerateFreeObjects(Finder
& finder
, const Reader
& reader
)
2303 for (unsigned sizeClass
= 0; sizeClass
< kNumClasses
; sizeClass
++)
2304 list_
[sizeClass
].enumerateFreeObjects(finder
, reader
);
2309 //-------------------------------------------------------------------
2310 // Data kept per size-class in central cache
2311 //-------------------------------------------------------------------
2313 class TCMalloc_Central_FreeList
{
2315 void Init(size_t cl
);
2317 // These methods all do internal locking.
2319 // Insert the specified range into the central freelist. N is the number of
2320 // elements in the range.
2321 void InsertRange(void *start
, void *end
, int N
);
2323 // Returns the actual number of fetched elements into N.
2324 void RemoveRange(void **start
, void **end
, int *N
);
2326 // Returns the number of free objects in cache.
2328 SpinLockHolder
h(&lock_
);
2332 // Returns the number of free objects in the transfer cache.
2334 SpinLockHolder
h(&lock_
);
2335 return used_slots_
* num_objects_to_move
[size_class_
];
2339 template <class Finder
, class Reader
>
2340 void enumerateFreeObjects(Finder
& finder
, const Reader
& reader
, TCMalloc_Central_FreeList
* remoteCentralFreeList
)
2342 for (Span
* span
= &empty_
; span
&& span
!= &empty_
; span
= (span
->next
? reader(span
->next
) : 0))
2343 ASSERT(!span
->objects
);
2345 ASSERT(!nonempty_
.objects
);
2346 static const ptrdiff_t nonemptyOffset
= reinterpret_cast<const char*>(&nonempty_
) - reinterpret_cast<const char*>(this);
2348 Span
* remoteNonempty
= reinterpret_cast<Span
*>(reinterpret_cast<char*>(remoteCentralFreeList
) + nonemptyOffset
);
2349 Span
* remoteSpan
= nonempty_
.next
;
2351 for (Span
* span
= reader(remoteSpan
); span
&& remoteSpan
!= remoteNonempty
; remoteSpan
= span
->next
, span
= (span
->next
? reader(span
->next
) : 0)) {
2352 for (void* nextObject
= span
->objects
; nextObject
; nextObject
= reader
.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject
)))
2353 finder
.visit(nextObject
);
2359 // REQUIRES: lock_ is held
2360 // Remove object from cache and return.
2361 // Return NULL if no free entries in cache.
2362 void* FetchFromSpans();
2364 // REQUIRES: lock_ is held
2365 // Remove object from cache and return. Fetches
2366 // from pageheap if cache is empty. Only returns
2367 // NULL on allocation failure.
2368 void* FetchFromSpansSafe();
2370 // REQUIRES: lock_ is held
2371 // Release a linked list of objects to spans.
2372 // May temporarily release lock_.
2373 void ReleaseListToSpans(void *start
);
2375 // REQUIRES: lock_ is held
2376 // Release an object to spans.
2377 // May temporarily release lock_.
2378 ALWAYS_INLINE
void ReleaseToSpans(void* object
);
2380 // REQUIRES: lock_ is held
2381 // Populate cache by fetching from the page heap.
2382 // May temporarily release lock_.
2383 ALWAYS_INLINE
void Populate();
2385 // REQUIRES: lock is held.
2386 // Tries to make room for a TCEntry. If the cache is full it will try to
2387 // expand it at the cost of some other cache size. Return false if there is
2389 bool MakeCacheSpace();
2391 // REQUIRES: lock_ for locked_size_class is held.
2392 // Picks a "random" size class to steal TCEntry slot from. In reality it
2393 // just iterates over the sizeclasses but does so without taking a lock.
2394 // Returns true on success.
2395 // May temporarily lock a "random" size class.
2396 static ALWAYS_INLINE
bool EvictRandomSizeClass(size_t locked_size_class
, bool force
);
2398 // REQUIRES: lock_ is *not* held.
2399 // Tries to shrink the Cache. If force is true it will relase objects to
2400 // spans if it allows it to shrink the cache. Return false if it failed to
2401 // shrink the cache. Decrements cache_size_ on succeess.
2402 // May temporarily take lock_. If it takes lock_, the locked_size_class
2403 // lock is released to the thread from holding two size class locks
2404 // concurrently which could lead to a deadlock.
2405 bool ShrinkCache(int locked_size_class
, bool force
);
2407 // This lock protects all the data members. cached_entries and cache_size_
2408 // may be looked at without holding the lock.
2411 // We keep linked lists of empty and non-empty spans.
2412 size_t size_class_
; // My size class
2413 Span empty_
; // Dummy header for list of empty spans
2414 Span nonempty_
; // Dummy header for list of non-empty spans
2415 size_t counter_
; // Number of free objects in cache entry
2417 // Here we reserve space for TCEntry cache slots. Since one size class can
2418 // end up getting all the TCEntries quota in the system we just preallocate
2419 // sufficient number of entries here.
2420 TCEntry tc_slots_
[kNumTransferEntries
];
2422 // Number of currently used cached entries in tc_slots_. This variable is
2423 // updated under a lock but can be read without one.
2424 int32_t used_slots_
;
2425 // The current number of slots for this size class. This is an
2426 // adaptive value that is increased if there is lots of traffic
2427 // on a given size class.
2428 int32_t cache_size_
;
2431 // Pad each CentralCache object to multiple of 64 bytes
2432 class TCMalloc_Central_FreeListPadded
: public TCMalloc_Central_FreeList
{
2434 char pad_
[(64 - (sizeof(TCMalloc_Central_FreeList
) % 64)) % 64];
2437 //-------------------------------------------------------------------
2439 //-------------------------------------------------------------------
2441 // Central cache -- a collection of free-lists, one per size-class.
2442 // We have a separate lock per free-list to reduce contention.
2443 static TCMalloc_Central_FreeListPadded central_cache
[kNumClasses
];
2445 // Page-level allocator
2446 static AllocAlignmentInteger pageheap_memory
[(sizeof(TCMalloc_PageHeap
) + sizeof(AllocAlignmentInteger
) - 1) / sizeof(AllocAlignmentInteger
)];
2447 static bool phinited
= false;
2449 // Avoid extra level of indirection by making "pageheap" be just an alias
2450 // of pageheap_memory.
2453 TCMalloc_PageHeap
* m_pageHeap
;
2456 static inline TCMalloc_PageHeap
* getPageHeap()
2458 PageHeapUnion u
= { &pageheap_memory
[0] };
2459 return u
.m_pageHeap
;
2462 #define pageheap getPageHeap()
2464 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2466 #if HAVE(DISPATCH_H) || OS(WINDOWS)
2468 void TCMalloc_PageHeap::periodicScavenge()
2470 SpinLockHolder
h(&pageheap_lock
);
2471 pageheap
->scavenge();
2473 if (shouldScavenge()) {
2474 rescheduleScavenger();
2481 ALWAYS_INLINE
void TCMalloc_PageHeap::signalScavenger()
2483 ASSERT(pageheap_lock
.IsHeld());
2484 if (isScavengerSuspended() && shouldScavenge())
2485 scheduleScavenger();
2490 void TCMalloc_PageHeap::scavengerThread()
2492 #if HAVE(PTHREAD_SETNAME_NP)
2493 pthread_setname_np("JavaScriptCore: FastMalloc scavenger");
2497 if (!shouldScavenge()) {
2498 pthread_mutex_lock(&m_scavengeMutex
);
2499 m_scavengeThreadActive
= false;
2500 // Block until there are enough free committed pages to release back to the system.
2501 pthread_cond_wait(&m_scavengeCondition
, &m_scavengeMutex
);
2502 m_scavengeThreadActive
= true;
2503 pthread_mutex_unlock(&m_scavengeMutex
);
2505 sleep(kScavengeDelayInSeconds
);
2507 SpinLockHolder
h(&pageheap_lock
);
2508 pageheap
->scavenge();
2517 // If TLS is available, we also store a copy
2518 // of the per-thread object in a __thread variable
2519 // since __thread variables are faster to read
2520 // than pthread_getspecific(). We still need
2521 // pthread_setspecific() because __thread
2522 // variables provide no way to run cleanup
2523 // code when a thread is destroyed.
2525 static __thread TCMalloc_ThreadCache
*threadlocal_heap
;
2527 // Thread-specific key. Initialization here is somewhat tricky
2528 // because some Linux startup code invokes malloc() before it
2529 // is in a good enough state to handle pthread_keycreate().
2530 // Therefore, we use TSD keys only after tsd_inited is set to true.
2531 // Until then, we use a slow path to get the heap object.
2532 static bool tsd_inited
= false;
2533 #if USE(PTHREAD_GETSPECIFIC_DIRECT)
2534 static const pthread_key_t heap_key
= __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0
;
2536 static pthread_key_t heap_key
;
2539 DWORD tlsIndex
= TLS_OUT_OF_INDEXES
;
2542 static ALWAYS_INLINE
void setThreadHeap(TCMalloc_ThreadCache
* heap
)
2544 #if USE(PTHREAD_GETSPECIFIC_DIRECT)
2545 // Can't have two libraries both doing this in the same process,
2546 // so check and make this crash right away.
2547 if (pthread_getspecific(heap_key
))
2551 // Still do pthread_setspecific even if there's an alternate form
2552 // of thread-local storage in use, to benefit from the delete callback.
2553 pthread_setspecific(heap_key
, heap
);
2556 TlsSetValue(tlsIndex
, heap
);
2560 // Allocator for thread heaps
2561 static PageHeapAllocator
<TCMalloc_ThreadCache
> threadheap_allocator
;
2563 // Linked list of heap objects. Protected by pageheap_lock.
2564 static TCMalloc_ThreadCache
* thread_heaps
= NULL
;
2565 static int thread_heap_count
= 0;
2567 // Overall thread cache size. Protected by pageheap_lock.
2568 static size_t overall_thread_cache_size
= kDefaultOverallThreadCacheSize
;
2570 // Global per-thread cache size. Writes are protected by
2571 // pageheap_lock. Reads are done without any locking, which should be
2572 // fine as long as size_t can be written atomically and we don't place
2573 // invariants between this variable and other pieces of state.
2574 static volatile size_t per_thread_cache_size
= kMaxThreadCacheSize
;
2576 //-------------------------------------------------------------------
2577 // Central cache implementation
2578 //-------------------------------------------------------------------
2580 void TCMalloc_Central_FreeList::Init(size_t cl
) {
2584 DLL_Init(&nonempty_
);
2589 ASSERT(cache_size_
<= kNumTransferEntries
);
2592 void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start
) {
2594 void *next
= SLL_Next(start
);
2595 ReleaseToSpans(start
);
2600 ALWAYS_INLINE
void TCMalloc_Central_FreeList::ReleaseToSpans(void* object
) {
2601 const PageID p
= reinterpret_cast<uintptr_t>(object
) >> kPageShift
;
2602 Span
* span
= pageheap
->GetDescriptor(p
);
2603 ASSERT(span
!= NULL
);
2604 ASSERT(span
->refcount
> 0);
2606 // If span is empty, move it to non-empty list
2607 if (span
->objects
== NULL
) {
2609 DLL_Prepend(&nonempty_
, span
);
2610 Event(span
, 'N', 0);
2613 // The following check is expensive, so it is disabled by default
2615 // Check that object does not occur in list
2617 for (void* p
= span
->objects
; p
!= NULL
; p
= *((void**) p
)) {
2618 ASSERT(p
!= object
);
2621 ASSERT(got
+ span
->refcount
==
2622 (span
->length
<<kPageShift
)/ByteSizeForClass(span
->sizeclass
));
2627 if (span
->refcount
== 0) {
2628 Event(span
, '#', 0);
2629 counter_
-= (span
->length
<<kPageShift
) / ByteSizeForClass(span
->sizeclass
);
2632 // Release central list lock while operating on pageheap
2635 SpinLockHolder
h(&pageheap_lock
);
2636 pageheap
->Delete(span
);
2640 *(reinterpret_cast<void**>(object
)) = span
->objects
;
2641 span
->objects
= object
;
2645 ALWAYS_INLINE
bool TCMalloc_Central_FreeList::EvictRandomSizeClass(
2646 size_t locked_size_class
, bool force
) {
2647 static int race_counter
= 0;
2648 int t
= race_counter
++; // Updated without a lock, but who cares.
2649 if (t
>= static_cast<int>(kNumClasses
)) {
2650 while (t
>= static_cast<int>(kNumClasses
)) {
2656 ASSERT(t
< static_cast<int>(kNumClasses
));
2657 if (t
== static_cast<int>(locked_size_class
)) return false;
2658 return central_cache
[t
].ShrinkCache(static_cast<int>(locked_size_class
), force
);
2661 bool TCMalloc_Central_FreeList::MakeCacheSpace() {
2662 // Is there room in the cache?
2663 if (used_slots_
< cache_size_
) return true;
2664 // Check if we can expand this cache?
2665 if (cache_size_
== kNumTransferEntries
) return false;
2666 // Ok, we'll try to grab an entry from some other size class.
2667 if (EvictRandomSizeClass(size_class_
, false) ||
2668 EvictRandomSizeClass(size_class_
, true)) {
2669 // Succeeded in evicting, we're going to make our cache larger.
2678 class LockInverter
{
2680 SpinLock
*held_
, *temp_
;
2682 inline explicit LockInverter(SpinLock
* held
, SpinLock
*temp
)
2683 : held_(held
), temp_(temp
) { held_
->Unlock(); temp_
->Lock(); }
2684 inline ~LockInverter() { temp_
->Unlock(); held_
->Lock(); }
2688 bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class
, bool force
) {
2689 // Start with a quick check without taking a lock.
2690 if (cache_size_
== 0) return false;
2691 // We don't evict from a full cache unless we are 'forcing'.
2692 if (force
== false && used_slots_
== cache_size_
) return false;
2694 // Grab lock, but first release the other lock held by this thread. We use
2695 // the lock inverter to ensure that we never hold two size class locks
2696 // concurrently. That can create a deadlock because there is no well
2697 // defined nesting order.
2698 LockInverter
li(¢ral_cache
[locked_size_class
].lock_
, &lock_
);
2699 ASSERT(used_slots_
<= cache_size_
);
2700 ASSERT(0 <= cache_size_
);
2701 if (cache_size_
== 0) return false;
2702 if (used_slots_
== cache_size_
) {
2703 if (force
== false) return false;
2704 // ReleaseListToSpans releases the lock, so we have to make all the
2705 // updates to the central list before calling it.
2708 ReleaseListToSpans(tc_slots_
[used_slots_
].head
);
2715 void TCMalloc_Central_FreeList::InsertRange(void *start
, void *end
, int N
) {
2716 SpinLockHolder
h(&lock_
);
2717 if (N
== num_objects_to_move
[size_class_
] &&
2719 int slot
= used_slots_
++;
2721 ASSERT(slot
< kNumTransferEntries
);
2722 TCEntry
*entry
= &tc_slots_
[slot
];
2723 entry
->head
= start
;
2727 ReleaseListToSpans(start
);
2730 void TCMalloc_Central_FreeList::RemoveRange(void **start
, void **end
, int *N
) {
2734 SpinLockHolder
h(&lock_
);
2735 if (num
== num_objects_to_move
[size_class_
] && used_slots_
> 0) {
2736 int slot
= --used_slots_
;
2738 TCEntry
*entry
= &tc_slots_
[slot
];
2739 *start
= entry
->head
;
2744 // TODO: Prefetch multiple TCEntries?
2745 void *tail
= FetchFromSpansSafe();
2747 // We are completely out of memory.
2748 *start
= *end
= NULL
;
2753 SLL_SetNext(tail
, NULL
);
2756 while (count
< num
) {
2757 void *t
= FetchFromSpans();
2768 void* TCMalloc_Central_FreeList::FetchFromSpansSafe() {
2769 void *t
= FetchFromSpans();
2772 t
= FetchFromSpans();
2777 void* TCMalloc_Central_FreeList::FetchFromSpans() {
2778 if (DLL_IsEmpty(&nonempty_
)) return NULL
;
2779 Span
* span
= nonempty_
.next
;
2781 ASSERT(span
->objects
!= NULL
);
2782 ASSERT_SPAN_COMMITTED(span
);
2784 void* result
= span
->objects
;
2785 span
->objects
= *(reinterpret_cast<void**>(result
));
2786 if (span
->objects
== NULL
) {
2787 // Move to empty list
2789 DLL_Prepend(&empty_
, span
);
2790 Event(span
, 'E', 0);
2796 // Fetch memory from the system and add to the central cache freelist.
2797 ALWAYS_INLINE
void TCMalloc_Central_FreeList::Populate() {
2798 // Release central list lock while operating on pageheap
2800 const size_t npages
= class_to_pages
[size_class_
];
2804 SpinLockHolder
h(&pageheap_lock
);
2805 span
= pageheap
->New(npages
);
2806 if (span
) pageheap
->RegisterSizeClass(span
, size_class_
);
2810 MESSAGE("allocation failed: %d\n", errno
);
2812 MESSAGE("allocation failed: %d\n", ::GetLastError());
2814 MESSAGE("allocation failed\n");
2819 ASSERT_SPAN_COMMITTED(span
);
2820 ASSERT(span
->length
== npages
);
2821 // Cache sizeclass info eagerly. Locking is not necessary.
2822 // (Instead of being eager, we could just replace any stale info
2823 // about this span, but that seems to be no better in practice.)
2824 for (size_t i
= 0; i
< npages
; i
++) {
2825 pageheap
->CacheSizeClass(span
->start
+ i
, size_class_
);
2828 // Split the block into pieces and add to the free-list
2829 // TODO: coloring of objects to avoid cache conflicts?
2830 void** tail
= &span
->objects
;
2831 char* ptr
= reinterpret_cast<char*>(span
->start
<< kPageShift
);
2832 char* limit
= ptr
+ (npages
<< kPageShift
);
2833 const size_t size
= ByteSizeForClass(size_class_
);
2836 while ((nptr
= ptr
+ size
) <= limit
) {
2838 tail
= reinterpret_cast_ptr
<void**>(ptr
);
2842 ASSERT(ptr
<= limit
);
2844 span
->refcount
= 0; // No sub-object in use yet
2846 // Add span to list of non-empty spans
2848 DLL_Prepend(&nonempty_
, span
);
2852 //-------------------------------------------------------------------
2853 // TCMalloc_ThreadCache implementation
2854 //-------------------------------------------------------------------
2856 inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k
) {
2857 if (bytes_until_sample_
< k
) {
2861 bytes_until_sample_
-= k
;
2866 void TCMalloc_ThreadCache::Init(ThreadIdentifier tid
) {
2871 in_setspecific_
= false;
2872 for (size_t cl
= 0; cl
< kNumClasses
; ++cl
) {
2876 // Initialize RNG -- run it for a bit to get to good values
2877 bytes_until_sample_
= 0;
2878 rnd_
= static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
2879 for (int i
= 0; i
< 100; i
++) {
2880 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter
* 2));
2884 void TCMalloc_ThreadCache::Cleanup() {
2885 // Put unused memory back into central cache
2886 for (size_t cl
= 0; cl
< kNumClasses
; ++cl
) {
2887 if (list_
[cl
].length() > 0) {
2888 ReleaseToCentralCache(cl
, list_
[cl
].length());
2893 ALWAYS_INLINE
void* TCMalloc_ThreadCache::Allocate(size_t size
) {
2894 ASSERT(size
<= kMaxSize
);
2895 const size_t cl
= SizeClass(size
);
2896 FreeList
* list
= &list_
[cl
];
2897 size_t allocationSize
= ByteSizeForClass(cl
);
2898 if (list
->empty()) {
2899 FetchFromCentralCache(cl
, allocationSize
);
2900 if (list
->empty()) return NULL
;
2902 size_
-= allocationSize
;
2906 inline void TCMalloc_ThreadCache::Deallocate(void* ptr
, size_t cl
) {
2907 size_
+= ByteSizeForClass(cl
);
2908 FreeList
* list
= &list_
[cl
];
2910 // If enough data is free, put back into central cache
2911 if (list
->length() > kMaxFreeListLength
) {
2912 ReleaseToCentralCache(cl
, num_objects_to_move
[cl
]);
2914 if (size_
>= per_thread_cache_size
) Scavenge();
2917 // Remove some objects of class "cl" from central cache and add to thread heap
2918 ALWAYS_INLINE
void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl
, size_t allocationSize
) {
2919 int fetch_count
= num_objects_to_move
[cl
];
2921 central_cache
[cl
].RemoveRange(&start
, &end
, &fetch_count
);
2922 list_
[cl
].PushRange(fetch_count
, start
, end
);
2923 size_
+= allocationSize
* fetch_count
;
2926 // Remove some objects of class "cl" from thread heap and add to central cache
2927 inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl
, int N
) {
2929 FreeList
* src
= &list_
[cl
];
2930 if (N
> src
->length()) N
= src
->length();
2931 size_
-= N
*ByteSizeForClass(cl
);
2933 // We return prepackaged chains of the correct size to the central cache.
2934 // TODO: Use the same format internally in the thread caches?
2935 int batch_size
= num_objects_to_move
[cl
];
2936 while (N
> batch_size
) {
2938 src
->PopRange(batch_size
, &head
, &tail
);
2939 central_cache
[cl
].InsertRange(head
, tail
, batch_size
);
2943 src
->PopRange(N
, &head
, &tail
);
2944 central_cache
[cl
].InsertRange(head
, tail
, N
);
2947 // Release idle memory to the central cache
2948 inline void TCMalloc_ThreadCache::Scavenge() {
2949 // If the low-water mark for the free list is L, it means we would
2950 // not have had to allocate anything from the central cache even if
2951 // we had reduced the free list size by L. We aim to get closer to
2952 // that situation by dropping L/2 nodes from the free list. This
2953 // may not release much memory, but if so we will call scavenge again
2954 // pretty soon and the low-water marks will be high on that call.
2955 //int64 start = CycleClock::Now();
2957 for (size_t cl
= 0; cl
< kNumClasses
; cl
++) {
2958 FreeList
* list
= &list_
[cl
];
2959 const int lowmark
= list
->lowwatermark();
2961 const int drop
= (lowmark
> 1) ? lowmark
/2 : 1;
2962 ReleaseToCentralCache(cl
, drop
);
2964 list
->clear_lowwatermark();
2967 //int64 finish = CycleClock::Now();
2969 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
2972 void TCMalloc_ThreadCache::PickNextSample(size_t k
) {
2973 // Make next "random" number
2974 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
2975 static const uint32_t kPoly
= (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
2977 rnd_
= (r
<< 1) ^ ((static_cast<int32_t>(r
) >> 31) & kPoly
);
2979 // Next point is "rnd_ % (sample_period)". I.e., average
2980 // increment is "sample_period/2".
2981 const int flag_value
= static_cast<int>(FLAGS_tcmalloc_sample_parameter
);
2982 static int last_flag_value
= -1;
2984 if (flag_value
!= last_flag_value
) {
2985 SpinLockHolder
h(&sample_period_lock
);
2987 for (i
= 0; i
< (static_cast<int>(sizeof(primes_list
)/sizeof(primes_list
[0])) - 1); i
++) {
2988 if (primes_list
[i
] >= flag_value
) {
2992 sample_period
= primes_list
[i
];
2993 last_flag_value
= flag_value
;
2996 bytes_until_sample_
+= rnd_
% sample_period
;
2998 if (k
> (static_cast<size_t>(-1) >> 2)) {
2999 // If the user has asked for a huge allocation then it is possible
3000 // for the code below to loop infinitely. Just return (note that
3001 // this throws off the sampling accuracy somewhat, but a user who
3002 // is allocating more than 1G of memory at a time can live with a
3003 // minor inaccuracy in profiling of small allocations, and also
3004 // would rather not wait for the loop below to terminate).
3008 while (bytes_until_sample_
< k
) {
3009 // Increase bytes_until_sample_ by enough average sampling periods
3010 // (sample_period >> 1) to allow us to sample past the current
3012 bytes_until_sample_
+= (sample_period
>> 1);
3015 bytes_until_sample_
-= k
;
3018 void TCMalloc_ThreadCache::InitModule() {
3019 // There is a slight potential race here because of double-checked
3020 // locking idiom. However, as long as the program does a small
3021 // allocation before switching to multi-threaded mode, we will be
3022 // fine. We increase the chances of doing such a small allocation
3023 // by doing one in the constructor of the module_enter_exit_hook
3024 // object declared below.
3025 SpinLockHolder
h(&pageheap_lock
);
3031 threadheap_allocator
.Init();
3032 span_allocator
.Init();
3033 span_allocator
.New(); // Reduce cache conflicts
3034 span_allocator
.New(); // Reduce cache conflicts
3035 stacktrace_allocator
.Init();
3036 DLL_Init(&sampled_objects
);
3037 for (size_t i
= 0; i
< kNumClasses
; ++i
) {
3038 central_cache
[i
].Init(i
);
3042 #if defined(WTF_CHANGES) && OS(DARWIN)
3043 FastMallocZone::init();
3048 inline TCMalloc_ThreadCache
* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid
) {
3049 // Create the heap and add it to the linked list
3050 TCMalloc_ThreadCache
*heap
= threadheap_allocator
.New();
3052 heap
->next_
= thread_heaps
;
3054 if (thread_heaps
!= NULL
) thread_heaps
->prev_
= heap
;
3055 thread_heaps
= heap
;
3056 thread_heap_count
++;
3057 RecomputeThreadCacheSize();
3061 inline TCMalloc_ThreadCache
* TCMalloc_ThreadCache::GetThreadHeap() {
3063 // __thread is faster, but only when the kernel supports it
3064 if (KernelSupportsTLS())
3065 return threadlocal_heap
;
3067 return static_cast<TCMalloc_ThreadCache
*>(TlsGetValue(tlsIndex
));
3069 return static_cast<TCMalloc_ThreadCache
*>(pthread_getspecific(heap_key
));
3073 inline TCMalloc_ThreadCache
* TCMalloc_ThreadCache::GetCache() {
3074 TCMalloc_ThreadCache
* ptr
= NULL
;
3078 ptr
= GetThreadHeap();
3080 if (ptr
== NULL
) ptr
= CreateCacheIfNecessary();
3084 // In deletion paths, we do not try to create a thread-cache. This is
3085 // because we may be in the thread destruction code and may have
3086 // already cleaned up the cache for this thread.
3087 inline TCMalloc_ThreadCache
* TCMalloc_ThreadCache::GetCacheIfPresent() {
3088 if (!tsd_inited
) return NULL
;
3089 void* const p
= GetThreadHeap();
3090 return reinterpret_cast<TCMalloc_ThreadCache
*>(p
);
3093 void TCMalloc_ThreadCache::InitTSD() {
3094 ASSERT(!tsd_inited
);
3095 #if USE(PTHREAD_GETSPECIFIC_DIRECT)
3096 pthread_key_init_np(heap_key
, DestroyThreadCache
);
3098 pthread_key_create(&heap_key
, DestroyThreadCache
);
3101 tlsIndex
= TlsAlloc();
3106 // We may have used a fake pthread_t for the main thread. Fix it.
3108 memset(&zero
, 0, sizeof(zero
));
3111 SpinLockHolder
h(&pageheap_lock
);
3113 ASSERT(pageheap_lock
.IsHeld());
3115 for (TCMalloc_ThreadCache
* h
= thread_heaps
; h
!= NULL
; h
= h
->next_
) {
3118 h
->tid_
= GetCurrentThreadId();
3121 if (pthread_equal(h
->tid_
, zero
)) {
3122 h
->tid_
= pthread_self();
3128 TCMalloc_ThreadCache
* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
3129 // Initialize per-thread data if necessary
3130 TCMalloc_ThreadCache
* heap
= NULL
;
3132 SpinLockHolder
h(&pageheap_lock
);
3139 me
= GetCurrentThreadId();
3142 // Early on in glibc's life, we cannot even call pthread_self()
3145 memset(&me
, 0, sizeof(me
));
3147 me
= pthread_self();
3151 // This may be a recursive malloc call from pthread_setspecific()
3152 // In that case, the heap for this thread has already been created
3153 // and added to the linked list. So we search for that first.
3154 for (TCMalloc_ThreadCache
* h
= thread_heaps
; h
!= NULL
; h
= h
->next_
) {
3156 if (h
->tid_
== me
) {
3158 if (pthread_equal(h
->tid_
, me
)) {
3165 if (heap
== NULL
) heap
= NewHeap(me
);
3168 // We call pthread_setspecific() outside the lock because it may
3169 // call malloc() recursively. The recursive call will never get
3170 // here again because it will find the already allocated heap in the
3171 // linked list of heaps.
3172 if (!heap
->in_setspecific_
&& tsd_inited
) {
3173 heap
->in_setspecific_
= true;
3174 setThreadHeap(heap
);
3179 void TCMalloc_ThreadCache::BecomeIdle() {
3180 if (!tsd_inited
) return; // No caches yet
3181 TCMalloc_ThreadCache
* heap
= GetThreadHeap();
3182 if (heap
== NULL
) return; // No thread cache to remove
3183 if (heap
->in_setspecific_
) return; // Do not disturb the active caller
3185 heap
->in_setspecific_
= true;
3186 setThreadHeap(NULL
);
3188 // Also update the copy in __thread
3189 threadlocal_heap
= NULL
;
3191 heap
->in_setspecific_
= false;
3192 if (GetThreadHeap() == heap
) {
3193 // Somehow heap got reinstated by a recursive call to malloc
3194 // from pthread_setspecific. We give up in this case.
3198 // We can now get rid of the heap
3202 void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr
) {
3203 // Note that "ptr" cannot be NULL since pthread promises not
3204 // to invoke the destructor on NULL values, but for safety,
3206 if (ptr
== NULL
) return;
3208 // Prevent fast path of GetThreadHeap() from returning heap.
3209 threadlocal_heap
= NULL
;
3211 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache
*>(ptr
));
3214 void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache
* heap
) {
3215 // Remove all memory from heap
3218 // Remove from linked list
3219 SpinLockHolder
h(&pageheap_lock
);
3220 if (heap
->next_
!= NULL
) heap
->next_
->prev_
= heap
->prev_
;
3221 if (heap
->prev_
!= NULL
) heap
->prev_
->next_
= heap
->next_
;
3222 if (thread_heaps
== heap
) thread_heaps
= heap
->next_
;
3223 thread_heap_count
--;
3224 RecomputeThreadCacheSize();
3226 threadheap_allocator
.Delete(heap
);
3229 void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
3230 // Divide available space across threads
3231 int n
= thread_heap_count
> 0 ? thread_heap_count
: 1;
3232 size_t space
= overall_thread_cache_size
/ n
;
3234 // Limit to allowed range
3235 if (space
< kMinThreadCacheSize
) space
= kMinThreadCacheSize
;
3236 if (space
> kMaxThreadCacheSize
) space
= kMaxThreadCacheSize
;
3238 per_thread_cache_size
= space
;
3241 void TCMalloc_ThreadCache::Print() const {
3242 for (size_t cl
= 0; cl
< kNumClasses
; ++cl
) {
3243 MESSAGE(" %5" PRIuS
" : %4d len; %4d lo\n",
3244 ByteSizeForClass(cl
),
3246 list_
[cl
].lowwatermark());
3250 // Extract interesting stats
3251 struct TCMallocStats
{
3252 uint64_t system_bytes
; // Bytes alloced from system
3253 uint64_t thread_bytes
; // Bytes in thread caches
3254 uint64_t central_bytes
; // Bytes in central cache
3255 uint64_t transfer_bytes
; // Bytes in central transfer cache
3256 uint64_t pageheap_bytes
; // Bytes in page heap
3257 uint64_t metadata_bytes
; // Bytes alloced for metadata
3261 // Get stats into "r". Also get per-size-class counts if class_count != NULL
3262 static void ExtractStats(TCMallocStats
* r
, uint64_t* class_count
) {
3263 r
->central_bytes
= 0;
3264 r
->transfer_bytes
= 0;
3265 for (int cl
= 0; cl
< kNumClasses
; ++cl
) {
3266 const int length
= central_cache
[cl
].length();
3267 const int tc_length
= central_cache
[cl
].tc_length();
3268 r
->central_bytes
+= static_cast<uint64_t>(ByteSizeForClass(cl
)) * length
;
3269 r
->transfer_bytes
+=
3270 static_cast<uint64_t>(ByteSizeForClass(cl
)) * tc_length
;
3271 if (class_count
) class_count
[cl
] = length
+ tc_length
;
3274 // Add stats from per-thread heaps
3275 r
->thread_bytes
= 0;
3277 SpinLockHolder
h(&pageheap_lock
);
3278 for (TCMalloc_ThreadCache
* h
= thread_heaps
; h
!= NULL
; h
= h
->next_
) {
3279 r
->thread_bytes
+= h
->Size();
3281 for (size_t cl
= 0; cl
< kNumClasses
; ++cl
) {
3282 class_count
[cl
] += h
->freelist_length(cl
);
3289 SpinLockHolder
h(&pageheap_lock
);
3290 r
->system_bytes
= pageheap
->SystemBytes();
3291 r
->metadata_bytes
= metadata_system_bytes
;
3292 r
->pageheap_bytes
= pageheap
->FreeBytes();
3298 // WRITE stats to "out"
3299 static void DumpStats(TCMalloc_Printer
* out
, int level
) {
3300 TCMallocStats stats
;
3301 uint64_t class_count
[kNumClasses
];
3302 ExtractStats(&stats
, (level
>= 2 ? class_count
: NULL
));
3305 out
->printf("------------------------------------------------\n");
3306 uint64_t cumulative
= 0;
3307 for (int cl
= 0; cl
< kNumClasses
; ++cl
) {
3308 if (class_count
[cl
] > 0) {
3309 uint64_t class_bytes
= class_count
[cl
] * ByteSizeForClass(cl
);
3310 cumulative
+= class_bytes
;
3311 out
->printf("class %3d [ %8" PRIuS
" bytes ] : "
3312 "%8" PRIu64
" objs; %5.1f MB; %5.1f cum MB\n",
3313 cl
, ByteSizeForClass(cl
),
3315 class_bytes
/ 1048576.0,
3316 cumulative
/ 1048576.0);
3320 SpinLockHolder
h(&pageheap_lock
);
3321 pageheap
->Dump(out
);
3324 const uint64_t bytes_in_use
= stats
.system_bytes
3325 - stats
.pageheap_bytes
3326 - stats
.central_bytes
3327 - stats
.transfer_bytes
3328 - stats
.thread_bytes
;
3330 out
->printf("------------------------------------------------\n"
3331 "MALLOC: %12" PRIu64
" Heap size\n"
3332 "MALLOC: %12" PRIu64
" Bytes in use by application\n"
3333 "MALLOC: %12" PRIu64
" Bytes free in page heap\n"
3334 "MALLOC: %12" PRIu64
" Bytes free in central cache\n"
3335 "MALLOC: %12" PRIu64
" Bytes free in transfer cache\n"
3336 "MALLOC: %12" PRIu64
" Bytes free in thread caches\n"
3337 "MALLOC: %12" PRIu64
" Spans in use\n"
3338 "MALLOC: %12" PRIu64
" Thread heaps in use\n"
3339 "MALLOC: %12" PRIu64
" Metadata allocated\n"
3340 "------------------------------------------------\n",
3343 stats
.pageheap_bytes
,
3344 stats
.central_bytes
,
3345 stats
.transfer_bytes
,
3347 uint64_t(span_allocator
.inuse()),
3348 uint64_t(threadheap_allocator
.inuse()),
3349 stats
.metadata_bytes
);
3352 static void PrintStats(int level
) {
3353 const int kBufferSize
= 16 << 10;
3354 char* buffer
= new char[kBufferSize
];
3355 TCMalloc_Printer
printer(buffer
, kBufferSize
);
3356 DumpStats(&printer
, level
);
3357 write(STDERR_FILENO
, buffer
, strlen(buffer
));
3361 static void** DumpStackTraces() {
3362 // Count how much space we need
3363 int needed_slots
= 0;
3365 SpinLockHolder
h(&pageheap_lock
);
3366 for (Span
* s
= sampled_objects
.next
; s
!= &sampled_objects
; s
= s
->next
) {
3367 StackTrace
* stack
= reinterpret_cast<StackTrace
*>(s
->objects
);
3368 needed_slots
+= 3 + stack
->depth
;
3370 needed_slots
+= 100; // Slop in case sample grows
3371 needed_slots
+= needed_slots
/8; // An extra 12.5% slop
3374 void** result
= new void*[needed_slots
];
3375 if (result
== NULL
) {
3376 MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
3381 SpinLockHolder
h(&pageheap_lock
);
3383 for (Span
* s
= sampled_objects
.next
; s
!= &sampled_objects
; s
= s
->next
) {
3384 ASSERT(used_slots
< needed_slots
); // Need to leave room for terminator
3385 StackTrace
* stack
= reinterpret_cast<StackTrace
*>(s
->objects
);
3386 if (used_slots
+ 3 + stack
->depth
>= needed_slots
) {
3391 result
[used_slots
+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
3392 result
[used_slots
+1] = reinterpret_cast<void*>(stack
->size
);
3393 result
[used_slots
+2] = reinterpret_cast<void*>(stack
->depth
);
3394 for (int d
= 0; d
< stack
->depth
; d
++) {
3395 result
[used_slots
+3+d
] = stack
->stack
[d
];
3397 used_slots
+= 3 + stack
->depth
;
3399 result
[used_slots
] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
3406 // TCMalloc's support for extra malloc interfaces
3407 class TCMallocImplementation
: public MallocExtension
{
3409 virtual void GetStats(char* buffer
, int buffer_length
) {
3410 ASSERT(buffer_length
> 0);
3411 TCMalloc_Printer
printer(buffer
, buffer_length
);
3413 // Print level one stats unless lots of space is available
3414 if (buffer_length
< 10000) {
3415 DumpStats(&printer
, 1);
3417 DumpStats(&printer
, 2);
3421 virtual void** ReadStackTraces() {
3422 return DumpStackTraces();
3425 virtual bool GetNumericProperty(const char* name
, size_t* value
) {
3426 ASSERT(name
!= NULL
);
3428 if (strcmp(name
, "generic.current_allocated_bytes") == 0) {
3429 TCMallocStats stats
;
3430 ExtractStats(&stats
, NULL
);
3431 *value
= stats
.system_bytes
3432 - stats
.thread_bytes
3433 - stats
.central_bytes
3434 - stats
.pageheap_bytes
;
3438 if (strcmp(name
, "generic.heap_size") == 0) {
3439 TCMallocStats stats
;
3440 ExtractStats(&stats
, NULL
);
3441 *value
= stats
.system_bytes
;
3445 if (strcmp(name
, "tcmalloc.slack_bytes") == 0) {
3446 // We assume that bytes in the page heap are not fragmented too
3447 // badly, and are therefore available for allocation.
3448 SpinLockHolder
l(&pageheap_lock
);
3449 *value
= pageheap
->FreeBytes();
3453 if (strcmp(name
, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3454 SpinLockHolder
l(&pageheap_lock
);
3455 *value
= overall_thread_cache_size
;
3459 if (strcmp(name
, "tcmalloc.current_total_thread_cache_bytes") == 0) {
3460 TCMallocStats stats
;
3461 ExtractStats(&stats
, NULL
);
3462 *value
= stats
.thread_bytes
;
3469 virtual bool SetNumericProperty(const char* name
, size_t value
) {
3470 ASSERT(name
!= NULL
);
3472 if (strcmp(name
, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3473 // Clip the value to a reasonable range
3474 if (value
< kMinThreadCacheSize
) value
= kMinThreadCacheSize
;
3475 if (value
> (1<<30)) value
= (1<<30); // Limit to 1GB
3477 SpinLockHolder
l(&pageheap_lock
);
3478 overall_thread_cache_size
= static_cast<size_t>(value
);
3479 TCMalloc_ThreadCache::RecomputeThreadCacheSize();
3486 virtual void MarkThreadIdle() {
3487 TCMalloc_ThreadCache::BecomeIdle();
3490 virtual void ReleaseFreeMemory() {
3491 SpinLockHolder
h(&pageheap_lock
);
3492 pageheap
->ReleaseFreePages();
3497 // The constructor allocates an object to ensure that initialization
3498 // runs before main(), and therefore we do not have a chance to become
3499 // multi-threaded before initialization. We also create the TSD key
3500 // here. Presumably by the time this constructor runs, glibc is in
3501 // good enough shape to handle pthread_key_create().
3503 // The constructor also takes the opportunity to tell STL to use
3504 // tcmalloc. We want to do this early, before construct time, so
3505 // all user STL allocations go through tcmalloc (which works really
3508 // The destructor prints stats when the program exits.
3509 class TCMallocGuard
{
3513 #ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS
3514 // Check whether the kernel also supports TLS (needs to happen at runtime)
3515 CheckIfKernelSupportsTLS();
3518 #ifdef WIN32 // patch the windows VirtualAlloc, etc.
3519 PatchWindowsFunctions(); // defined in windows/patch_functions.cc
3523 TCMalloc_ThreadCache::InitTSD();
3526 MallocExtension::Register(new TCMallocImplementation
);
3532 const char* env
= getenv("MALLOCSTATS");
3534 int level
= atoi(env
);
3535 if (level
< 1) level
= 1;
3539 UnpatchWindowsFunctions();
3546 static TCMallocGuard module_enter_exit_hook
;
3550 //-------------------------------------------------------------------
3551 // Helpers for the exported routines below
3552 //-------------------------------------------------------------------
3556 static Span
* DoSampledAllocation(size_t size
) {
3558 // Grab the stack trace outside the heap lock
3560 tmp
.depth
= GetStackTrace(tmp
.stack
, kMaxStackDepth
, 1);
3563 SpinLockHolder
h(&pageheap_lock
);
3565 Span
*span
= pageheap
->New(pages(size
== 0 ? 1 : size
));
3570 // Allocate stack trace
3571 StackTrace
*stack
= stacktrace_allocator
.New();
3572 if (stack
== NULL
) {
3573 // Sampling failed because of lack of memory
3579 span
->objects
= stack
;
3580 DLL_Prepend(&sampled_objects
, span
);
3586 static inline bool CheckCachedSizeClass(void *ptr
) {
3587 PageID p
= reinterpret_cast<uintptr_t>(ptr
) >> kPageShift
;
3588 size_t cached_value
= pageheap
->GetSizeClassIfCached(p
);
3589 return cached_value
== 0 ||
3590 cached_value
== pageheap
->GetDescriptor(p
)->sizeclass
;
3593 static inline void* CheckedMallocResult(void *result
)
3595 ASSERT(result
== 0 || CheckCachedSizeClass(result
));
3599 static inline void* SpanToMallocResult(Span
*span
) {
3600 ASSERT_SPAN_COMMITTED(span
);
3601 pageheap
->CacheSizeClass(span
->start
, 0);
3603 CheckedMallocResult(reinterpret_cast<void*>(span
->start
<< kPageShift
));
3607 template <bool crashOnFailure
>
3609 static ALWAYS_INLINE
void* do_malloc(size_t size
) {
3613 ASSERT(!isForbidden());
3616 // The following call forces module initialization
3617 TCMalloc_ThreadCache
* heap
= TCMalloc_ThreadCache::GetCache();
3619 if ((FLAGS_tcmalloc_sample_parameter
> 0) && heap
->SampleAllocation(size
)) {
3620 Span
* span
= DoSampledAllocation(size
);
3622 ret
= SpanToMallocResult(span
);
3626 if (size
> kMaxSize
) {
3627 // Use page-level allocator
3628 SpinLockHolder
h(&pageheap_lock
);
3629 Span
* span
= pageheap
->New(pages(size
));
3631 ret
= SpanToMallocResult(span
);
3634 // The common case, and also the simplest. This just pops the
3635 // size-appropriate freelist, afer replenishing it if it's empty.
3636 ret
= CheckedMallocResult(heap
->Allocate(size
));
3640 if (crashOnFailure
) // This branch should be optimized out by the compiler.
3649 static ALWAYS_INLINE
void do_free(void* ptr
) {
3650 if (ptr
== NULL
) return;
3651 ASSERT(pageheap
!= NULL
); // Should not call free() before malloc()
3652 const PageID p
= reinterpret_cast<uintptr_t>(ptr
) >> kPageShift
;
3654 size_t cl
= pageheap
->GetSizeClassIfCached(p
);
3657 span
= pageheap
->GetDescriptor(p
);
3658 cl
= span
->sizeclass
;
3659 pageheap
->CacheSizeClass(p
, cl
);
3662 #ifndef NO_TCMALLOC_SAMPLES
3663 ASSERT(!pageheap
->GetDescriptor(p
)->sample
);
3665 TCMalloc_ThreadCache
* heap
= TCMalloc_ThreadCache::GetCacheIfPresent();
3667 heap
->Deallocate(ptr
, cl
);
3669 // Delete directly into central cache
3670 SLL_SetNext(ptr
, NULL
);
3671 central_cache
[cl
].InsertRange(ptr
, ptr
, 1);
3674 SpinLockHolder
h(&pageheap_lock
);
3675 ASSERT(reinterpret_cast<uintptr_t>(ptr
) % kPageSize
== 0);
3676 ASSERT(span
!= NULL
&& span
->start
== p
);
3677 #ifndef NO_TCMALLOC_SAMPLES
3680 stacktrace_allocator
.Delete(reinterpret_cast<StackTrace
*>(span
->objects
));
3681 span
->objects
= NULL
;
3684 pageheap
->Delete(span
);
3689 // For use by exported routines below that want specific alignments
3691 // Note: this code can be slow, and can significantly fragment memory.
3692 // The expectation is that memalign/posix_memalign/valloc/pvalloc will
3693 // not be invoked very often. This requirement simplifies our
3694 // implementation and allows us to tune for expected allocation
3696 static void* do_memalign(size_t align
, size_t size
) {
3697 ASSERT((align
& (align
- 1)) == 0);
3699 if (pageheap
== NULL
) TCMalloc_ThreadCache::InitModule();
3701 // Allocate at least one byte to avoid boundary conditions below
3702 if (size
== 0) size
= 1;
3704 if (size
<= kMaxSize
&& align
< kPageSize
) {
3705 // Search through acceptable size classes looking for one with
3706 // enough alignment. This depends on the fact that
3707 // InitSizeClasses() currently produces several size classes that
3708 // are aligned at powers of two. We will waste time and space if
3709 // we miss in the size class array, but that is deemed acceptable
3710 // since memalign() should be used rarely.
3711 size_t cl
= SizeClass(size
);
3712 while (cl
< kNumClasses
&& ((class_to_size
[cl
] & (align
- 1)) != 0)) {
3715 if (cl
< kNumClasses
) {
3716 TCMalloc_ThreadCache
* heap
= TCMalloc_ThreadCache::GetCache();
3717 return CheckedMallocResult(heap
->Allocate(class_to_size
[cl
]));
3721 // We will allocate directly from the page heap
3722 SpinLockHolder
h(&pageheap_lock
);
3724 if (align
<= kPageSize
) {
3725 // Any page-level allocation will be fine
3726 // TODO: We could put the rest of this page in the appropriate
3727 // TODO: cache but it does not seem worth it.
3728 Span
* span
= pageheap
->New(pages(size
));
3729 return span
== NULL
? NULL
: SpanToMallocResult(span
);
3732 // Allocate extra pages and carve off an aligned portion
3733 const Length alloc
= pages(size
+ align
);
3734 Span
* span
= pageheap
->New(alloc
);
3735 if (span
== NULL
) return NULL
;
3737 // Skip starting portion so that we end up aligned
3739 while ((((span
->start
+skip
) << kPageShift
) & (align
- 1)) != 0) {
3742 ASSERT(skip
< alloc
);
3744 Span
* rest
= pageheap
->Split(span
, skip
);
3745 pageheap
->Delete(span
);
3749 // Skip trailing portion that we do not need to return
3750 const Length needed
= pages(size
);
3751 ASSERT(span
->length
>= needed
);
3752 if (span
->length
> needed
) {
3753 Span
* trailer
= pageheap
->Split(span
, needed
);
3754 pageheap
->Delete(trailer
);
3756 return SpanToMallocResult(span
);
3760 // Helpers for use by exported routines below:
3763 static inline void do_malloc_stats() {
3768 static inline int do_mallopt(int, int) {
3769 return 1; // Indicates error
3772 #ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance
3773 static inline struct mallinfo
do_mallinfo() {
3774 TCMallocStats stats
;
3775 ExtractStats(&stats
, NULL
);
3777 // Just some of the fields are filled in.
3778 struct mallinfo info
;
3779 memset(&info
, 0, sizeof(info
));
3781 // Unfortunately, the struct contains "int" field, so some of the
3782 // size values will be truncated.
3783 info
.arena
= static_cast<int>(stats
.system_bytes
);
3784 info
.fsmblks
= static_cast<int>(stats
.thread_bytes
3785 + stats
.central_bytes
3786 + stats
.transfer_bytes
);
3787 info
.fordblks
= static_cast<int>(stats
.pageheap_bytes
);
3788 info
.uordblks
= static_cast<int>(stats
.system_bytes
3789 - stats
.thread_bytes
3790 - stats
.central_bytes
3791 - stats
.transfer_bytes
3792 - stats
.pageheap_bytes
);
3798 //-------------------------------------------------------------------
3799 // Exported routines
3800 //-------------------------------------------------------------------
3802 // CAVEAT: The code structure below ensures that MallocHook methods are always
3803 // called from the stack frame of the invoked allocation function.
3804 // heap-checker.cc depends on this to start a stack trace from
3805 // the call to the (de)allocation function.
3810 #define do_malloc do_malloc<crashOnFailure>
3812 template <bool crashOnFailure
>
3813 ALWAYS_INLINE
void* malloc(size_t);
3815 void* fastMalloc(size_t size
)
3817 return malloc
<true>(size
);
3820 TryMallocReturnValue
tryFastMalloc(size_t size
)
3822 return malloc
<false>(size
);
3825 template <bool crashOnFailure
>
3828 void* malloc(size_t size
) {
3829 #if ENABLE(WTF_MALLOC_VALIDATION)
3830 if (std::numeric_limits
<size_t>::max() - Internal::ValidationBufferSize
<= size
) // If overflow would occur...
3832 void* result
= do_malloc(size
+ Internal::ValidationBufferSize
);
3836 Internal::ValidationHeader
* header
= static_cast<Internal::ValidationHeader
*>(result
);
3837 header
->m_size
= size
;
3838 header
->m_type
= Internal::AllocTypeMalloc
;
3839 header
->m_prefix
= static_cast<unsigned>(Internal::ValidationPrefix
);
3840 result
= header
+ 1;
3841 *Internal::fastMallocValidationSuffix(result
) = Internal::ValidationSuffix
;
3842 fastMallocValidate(result
);
3844 void* result
= do_malloc(size
);
3848 MallocHook::InvokeNewHook(result
, size
);
3856 void free(void* ptr
) {
3858 MallocHook::InvokeDeleteHook(ptr
);
3861 #if ENABLE(WTF_MALLOC_VALIDATION)
3865 fastMallocValidate(ptr
);
3866 Internal::ValidationHeader
* header
= Internal::fastMallocValidationHeader(ptr
);
3867 memset(ptr
, 0xCC, header
->m_size
);
3877 template <bool crashOnFailure
>
3878 ALWAYS_INLINE
void* calloc(size_t, size_t);
3880 void* fastCalloc(size_t n
, size_t elem_size
)
3882 void* result
= calloc
<true>(n
, elem_size
);
3883 #if ENABLE(WTF_MALLOC_VALIDATION)
3884 fastMallocValidate(result
);
3889 TryMallocReturnValue
tryFastCalloc(size_t n
, size_t elem_size
)
3891 void* result
= calloc
<false>(n
, elem_size
);
3892 #if ENABLE(WTF_MALLOC_VALIDATION)
3893 fastMallocValidate(result
);
3898 template <bool crashOnFailure
>
3901 void* calloc(size_t n
, size_t elem_size
) {
3902 size_t totalBytes
= n
* elem_size
;
3904 // Protect against overflow
3905 if (n
> 1 && elem_size
&& (totalBytes
/ elem_size
) != n
)
3908 #if ENABLE(WTF_MALLOC_VALIDATION)
3909 void* result
= malloc
<crashOnFailure
>(totalBytes
);
3913 memset(result
, 0, totalBytes
);
3914 fastMallocValidate(result
);
3916 void* result
= do_malloc(totalBytes
);
3917 if (result
!= NULL
) {
3918 memset(result
, 0, totalBytes
);
3923 MallocHook::InvokeNewHook(result
, totalBytes
);
3928 // Since cfree isn't used anywhere, we don't compile it in.
3933 void cfree(void* ptr
) {
3935 MallocHook::InvokeDeleteHook(ptr
);
3944 template <bool crashOnFailure
>
3945 ALWAYS_INLINE
void* realloc(void*, size_t);
3947 void* fastRealloc(void* old_ptr
, size_t new_size
)
3949 #if ENABLE(WTF_MALLOC_VALIDATION)
3950 fastMallocValidate(old_ptr
);
3952 void* result
= realloc
<true>(old_ptr
, new_size
);
3953 #if ENABLE(WTF_MALLOC_VALIDATION)
3954 fastMallocValidate(result
);
3959 TryMallocReturnValue
tryFastRealloc(void* old_ptr
, size_t new_size
)
3961 #if ENABLE(WTF_MALLOC_VALIDATION)
3962 fastMallocValidate(old_ptr
);
3964 void* result
= realloc
<false>(old_ptr
, new_size
);
3965 #if ENABLE(WTF_MALLOC_VALIDATION)
3966 fastMallocValidate(result
);
3971 template <bool crashOnFailure
>
3974 void* realloc(void* old_ptr
, size_t new_size
) {
3975 if (old_ptr
== NULL
) {
3976 #if ENABLE(WTF_MALLOC_VALIDATION)
3977 void* result
= malloc
<crashOnFailure
>(new_size
);
3979 void* result
= do_malloc(new_size
);
3981 MallocHook::InvokeNewHook(result
, new_size
);
3986 if (new_size
== 0) {
3988 MallocHook::InvokeDeleteHook(old_ptr
);
3994 #if ENABLE(WTF_MALLOC_VALIDATION)
3995 if (std::numeric_limits
<size_t>::max() - Internal::ValidationBufferSize
<= new_size
) // If overflow would occur...
3997 Internal::ValidationHeader
* header
= Internal::fastMallocValidationHeader(old_ptr
);
3998 fastMallocValidate(old_ptr
);
4000 header
->m_size
= new_size
;
4001 new_size
+= Internal::ValidationBufferSize
;
4004 // Get the size of the old entry
4005 const PageID p
= reinterpret_cast<uintptr_t>(old_ptr
) >> kPageShift
;
4006 size_t cl
= pageheap
->GetSizeClassIfCached(p
);
4010 span
= pageheap
->GetDescriptor(p
);
4011 cl
= span
->sizeclass
;
4012 pageheap
->CacheSizeClass(p
, cl
);
4015 old_size
= ByteSizeForClass(cl
);
4017 ASSERT(span
!= NULL
);
4018 old_size
= span
->length
<< kPageShift
;
4021 // Reallocate if the new size is larger than the old size,
4022 // or if the new size is significantly smaller than the old size.
4023 if ((new_size
> old_size
) || (AllocationSize(new_size
) < old_size
)) {
4024 // Need to reallocate
4025 void* new_ptr
= do_malloc(new_size
);
4026 if (new_ptr
== NULL
) {
4030 MallocHook::InvokeNewHook(new_ptr
, new_size
);
4032 memcpy(new_ptr
, old_ptr
, ((old_size
< new_size
) ? old_size
: new_size
));
4034 MallocHook::InvokeDeleteHook(old_ptr
);
4036 // We could use a variant of do_free() that leverages the fact
4037 // that we already know the sizeclass of old_ptr. The benefit
4038 // would be small, so don't bother.
4040 #if ENABLE(WTF_MALLOC_VALIDATION)
4041 new_ptr
= static_cast<Internal::ValidationHeader
*>(new_ptr
) + 1;
4042 *Internal::fastMallocValidationSuffix(new_ptr
) = Internal::ValidationSuffix
;
4046 #if ENABLE(WTF_MALLOC_VALIDATION)
4047 old_ptr
= static_cast<Internal::ValidationHeader
*>(old_ptr
) + 1; // Set old_ptr back to the user pointer.
4048 *Internal::fastMallocValidationSuffix(old_ptr
) = Internal::ValidationSuffix
;
4058 static SpinLock set_new_handler_lock
= SPINLOCK_INITIALIZER
;
4060 static inline void* cpp_alloc(size_t size
, bool nothrow
) {
4062 void* p
= do_malloc(size
);
4066 if (p
== NULL
) { // allocation failed
4067 // Get the current new handler. NB: this function is not
4068 // thread-safe. We make a feeble stab at making it so here, but
4069 // this lock only protects against tcmalloc interfering with
4070 // itself, not with other libraries calling set_new_handler.
4071 std::new_handler nh
;
4073 SpinLockHolder
h(&set_new_handler_lock
);
4074 nh
= std::set_new_handler(0);
4075 (void) std::set_new_handler(nh
);
4077 // If no new_handler is established, the allocation failed.
4079 if (nothrow
) return 0;
4080 throw std::bad_alloc();
4082 // Otherwise, try the new_handler. If it returns, retry the
4083 // allocation. If it throws std::bad_alloc, fail the allocation.
4084 // if it throws something else, don't interfere.
4087 } catch (const std::bad_alloc
&) {
4088 if (!nothrow
) throw;
4091 } else { // allocation success
4098 #if ENABLE(GLOBAL_FASTMALLOC_NEW)
4100 void* operator new(size_t size
) {
4101 void* p
= cpp_alloc(size
, false);
4102 // We keep this next instruction out of cpp_alloc for a reason: when
4103 // it's in, and new just calls cpp_alloc, the optimizer may fold the
4104 // new call into cpp_alloc, which messes up our whole section-based
4105 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
4106 // isn't the last thing this fn calls, and prevents the folding.
4107 MallocHook::InvokeNewHook(p
, size
);
4111 void* operator new(size_t size
, const std::nothrow_t
&) __THROW
{
4112 void* p
= cpp_alloc(size
, true);
4113 MallocHook::InvokeNewHook(p
, size
);
4117 void operator delete(void* p
) __THROW
{
4118 MallocHook::InvokeDeleteHook(p
);
4122 void operator delete(void* p
, const std::nothrow_t
&) __THROW
{
4123 MallocHook::InvokeDeleteHook(p
);
4127 void* operator new[](size_t size
) {
4128 void* p
= cpp_alloc(size
, false);
4129 // We keep this next instruction out of cpp_alloc for a reason: when
4130 // it's in, and new just calls cpp_alloc, the optimizer may fold the
4131 // new call into cpp_alloc, which messes up our whole section-based
4132 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
4133 // isn't the last thing this fn calls, and prevents the folding.
4134 MallocHook::InvokeNewHook(p
, size
);
4138 void* operator new[](size_t size
, const std::nothrow_t
&) __THROW
{
4139 void* p
= cpp_alloc(size
, true);
4140 MallocHook::InvokeNewHook(p
, size
);
4144 void operator delete[](void* p
) __THROW
{
4145 MallocHook::InvokeDeleteHook(p
);
4149 void operator delete[](void* p
, const std::nothrow_t
&) __THROW
{
4150 MallocHook::InvokeDeleteHook(p
);
4156 extern "C" void* memalign(size_t align
, size_t size
) __THROW
{
4157 void* result
= do_memalign(align
, size
);
4158 MallocHook::InvokeNewHook(result
, size
);
4162 extern "C" int posix_memalign(void** result_ptr
, size_t align
, size_t size
)
4164 if (((align
% sizeof(void*)) != 0) ||
4165 ((align
& (align
- 1)) != 0) ||
4170 void* result
= do_memalign(align
, size
);
4171 MallocHook::InvokeNewHook(result
, size
);
4172 if (result
== NULL
) {
4175 *result_ptr
= result
;
4180 static size_t pagesize
= 0;
4182 extern "C" void* valloc(size_t size
) __THROW
{
4183 // Allocate page-aligned object of length >= size bytes
4184 if (pagesize
== 0) pagesize
= getpagesize();
4185 void* result
= do_memalign(pagesize
, size
);
4186 MallocHook::InvokeNewHook(result
, size
);
4190 extern "C" void* pvalloc(size_t size
) __THROW
{
4191 // Round up size to a multiple of pagesize
4192 if (pagesize
== 0) pagesize
= getpagesize();
4193 size
= (size
+ pagesize
- 1) & ~(pagesize
- 1);
4194 void* result
= do_memalign(pagesize
, size
);
4195 MallocHook::InvokeNewHook(result
, size
);
4199 extern "C" void malloc_stats(void) {
4203 extern "C" int mallopt(int cmd
, int value
) {
4204 return do_mallopt(cmd
, value
);
4207 #ifdef HAVE_STRUCT_MALLINFO
4208 extern "C" struct mallinfo
mallinfo(void) {
4209 return do_mallinfo();
4213 //-------------------------------------------------------------------
4214 // Some library routines on RedHat 9 allocate memory using malloc()
4215 // and free it using __libc_free() (or vice-versa). Since we provide
4216 // our own implementations of malloc/free, we need to make sure that
4217 // the __libc_XXX variants (defined as part of glibc) also point to
4218 // the same implementations.
4219 //-------------------------------------------------------------------
4221 #if defined(__GLIBC__)
4223 #if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__)
4224 // Potentially faster variants that use the gcc alias extension.
4225 // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check.
4226 # define ALIAS(x) __attribute__ ((weak, alias (x)))
4227 void* __libc_malloc(size_t size
) ALIAS("malloc");
4228 void __libc_free(void* ptr
) ALIAS("free");
4229 void* __libc_realloc(void* ptr
, size_t size
) ALIAS("realloc");
4230 void* __libc_calloc(size_t n
, size_t size
) ALIAS("calloc");
4231 void __libc_cfree(void* ptr
) ALIAS("cfree");
4232 void* __libc_memalign(size_t align
, size_t s
) ALIAS("memalign");
4233 void* __libc_valloc(size_t size
) ALIAS("valloc");
4234 void* __libc_pvalloc(size_t size
) ALIAS("pvalloc");
4235 int __posix_memalign(void** r
, size_t a
, size_t s
) ALIAS("posix_memalign");
4237 # else /* not __GNUC__ */
4238 // Portable wrappers
4239 void* __libc_malloc(size_t size
) { return malloc(size
); }
4240 void __libc_free(void* ptr
) { free(ptr
); }
4241 void* __libc_realloc(void* ptr
, size_t size
) { return realloc(ptr
, size
); }
4242 void* __libc_calloc(size_t n
, size_t size
) { return calloc(n
, size
); }
4243 void __libc_cfree(void* ptr
) { cfree(ptr
); }
4244 void* __libc_memalign(size_t align
, size_t s
) { return memalign(align
, s
); }
4245 void* __libc_valloc(size_t size
) { return valloc(size
); }
4246 void* __libc_pvalloc(size_t size
) { return pvalloc(size
); }
4247 int __posix_memalign(void** r
, size_t a
, size_t s
) {
4248 return posix_memalign(r
, a
, s
);
4250 # endif /* __GNUC__ */
4252 #endif /* __GLIBC__ */
4254 // Override __libc_memalign in libc on linux boxes specially.
4255 // They have a bug in libc that causes them to (very rarely) allocate
4256 // with __libc_memalign() yet deallocate with free() and the
4257 // definitions above don't catch it.
4258 // This function is an exception to the rule of calling MallocHook method
4259 // from the stack frame of the allocation function;
4260 // heap-checker handles this special case explicitly.
4261 static void *MemalignOverride(size_t align
, size_t size
, const void *caller
)
4263 void* result
= do_memalign(align
, size
);
4264 MallocHook::InvokeNewHook(result
, size
);
4267 void *(*__memalign_hook
)(size_t, size_t, const void *) = MemalignOverride
;
4272 void releaseFastMallocFreeMemory()
4274 // Flush free pages in the current thread cache back to the page heap.
4275 // Low watermark mechanism in Scavenge() prevents full return on the first pass.
4276 // The second pass flushes everything.
4277 if (TCMalloc_ThreadCache
* threadCache
= TCMalloc_ThreadCache::GetCacheIfPresent()) {
4278 threadCache
->Scavenge();
4279 threadCache
->Scavenge();
4282 SpinLockHolder
h(&pageheap_lock
);
4283 pageheap
->ReleaseFreePages();
4286 FastMallocStatistics
fastMallocStatistics()
4288 FastMallocStatistics statistics
;
4290 SpinLockHolder
lockHolder(&pageheap_lock
);
4291 statistics
.reservedVMBytes
= static_cast<size_t>(pageheap
->SystemBytes());
4292 statistics
.committedVMBytes
= statistics
.reservedVMBytes
- pageheap
->ReturnedBytes();
4294 statistics
.freeListBytes
= 0;
4295 for (unsigned cl
= 0; cl
< kNumClasses
; ++cl
) {
4296 const int length
= central_cache
[cl
].length();
4297 const int tc_length
= central_cache
[cl
].tc_length();
4299 statistics
.freeListBytes
+= ByteSizeForClass(cl
) * (length
+ tc_length
);
4301 for (TCMalloc_ThreadCache
* threadCache
= thread_heaps
; threadCache
; threadCache
= threadCache
->next_
)
4302 statistics
.freeListBytes
+= threadCache
->Size();
4307 size_t fastMallocSize(const void* ptr
)
4309 #if ENABLE(WTF_MALLOC_VALIDATION)
4310 return Internal::fastMallocValidationHeader(const_cast<void*>(ptr
))->m_size
;
4312 const PageID p
= reinterpret_cast<uintptr_t>(ptr
) >> kPageShift
;
4313 Span
* span
= pageheap
->GetDescriptorEnsureSafe(p
);
4315 if (!span
|| span
->free
)
4318 for (void* free
= span
->objects
; free
!= NULL
; free
= *((void**) free
)) {
4323 if (size_t cl
= span
->sizeclass
)
4324 return ByteSizeForClass(cl
);
4326 return span
->length
<< kPageShift
;
4332 class FreeObjectFinder
{
4333 const RemoteMemoryReader
& m_reader
;
4334 HashSet
<void*> m_freeObjects
;
4337 FreeObjectFinder(const RemoteMemoryReader
& reader
) : m_reader(reader
) { }
4339 void visit(void* ptr
) { m_freeObjects
.add(ptr
); }
4340 bool isFreeObject(void* ptr
) const { return m_freeObjects
.contains(ptr
); }
4341 bool isFreeObject(vm_address_t ptr
) const { return isFreeObject(reinterpret_cast<void*>(ptr
)); }
4342 size_t freeObjectCount() const { return m_freeObjects
.size(); }
4344 void findFreeObjects(TCMalloc_ThreadCache
* threadCache
)
4346 for (; threadCache
; threadCache
= (threadCache
->next_
? m_reader(threadCache
->next_
) : 0))
4347 threadCache
->enumerateFreeObjects(*this, m_reader
);
4350 void findFreeObjects(TCMalloc_Central_FreeListPadded
* centralFreeList
, size_t numSizes
, TCMalloc_Central_FreeListPadded
* remoteCentralFreeList
)
4352 for (unsigned i
= 0; i
< numSizes
; i
++)
4353 centralFreeList
[i
].enumerateFreeObjects(*this, m_reader
, remoteCentralFreeList
+ i
);
4357 class PageMapFreeObjectFinder
{
4358 const RemoteMemoryReader
& m_reader
;
4359 FreeObjectFinder
& m_freeObjectFinder
;
4362 PageMapFreeObjectFinder(const RemoteMemoryReader
& reader
, FreeObjectFinder
& freeObjectFinder
)
4364 , m_freeObjectFinder(freeObjectFinder
)
4367 int visit(void* ptr
) const
4372 Span
* span
= m_reader(reinterpret_cast<Span
*>(ptr
));
4377 void* ptr
= reinterpret_cast<void*>(span
->start
<< kPageShift
);
4378 m_freeObjectFinder
.visit(ptr
);
4379 } else if (span
->sizeclass
) {
4380 // Walk the free list of the small-object span, keeping track of each object seen
4381 for (void* nextObject
= span
->objects
; nextObject
; nextObject
= m_reader
.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject
)))
4382 m_freeObjectFinder
.visit(nextObject
);
4384 return span
->length
;
4388 class PageMapMemoryUsageRecorder
{
4391 unsigned m_typeMask
;
4392 vm_range_recorder_t
* m_recorder
;
4393 const RemoteMemoryReader
& m_reader
;
4394 const FreeObjectFinder
& m_freeObjectFinder
;
4396 HashSet
<void*> m_seenPointers
;
4397 Vector
<Span
*> m_coalescedSpans
;
4400 PageMapMemoryUsageRecorder(task_t task
, void* context
, unsigned typeMask
, vm_range_recorder_t
* recorder
, const RemoteMemoryReader
& reader
, const FreeObjectFinder
& freeObjectFinder
)
4402 , m_context(context
)
4403 , m_typeMask(typeMask
)
4404 , m_recorder(recorder
)
4406 , m_freeObjectFinder(freeObjectFinder
)
4409 ~PageMapMemoryUsageRecorder()
4411 ASSERT(!m_coalescedSpans
.size());
4414 void recordPendingRegions()
4416 Span
* lastSpan
= m_coalescedSpans
[m_coalescedSpans
.size() - 1];
4417 vm_range_t ptrRange
= { m_coalescedSpans
[0]->start
<< kPageShift
, 0 };
4418 ptrRange
.size
= (lastSpan
->start
<< kPageShift
) - ptrRange
.address
+ (lastSpan
->length
* kPageSize
);
4420 // Mark the memory region the spans represent as a candidate for containing pointers
4421 if (m_typeMask
& MALLOC_PTR_REGION_RANGE_TYPE
)
4422 (*m_recorder
)(m_task
, m_context
, MALLOC_PTR_REGION_RANGE_TYPE
, &ptrRange
, 1);
4424 if (!(m_typeMask
& MALLOC_PTR_IN_USE_RANGE_TYPE
)) {
4425 m_coalescedSpans
.clear();
4429 Vector
<vm_range_t
, 1024> allocatedPointers
;
4430 for (size_t i
= 0; i
< m_coalescedSpans
.size(); ++i
) {
4431 Span
*theSpan
= m_coalescedSpans
[i
];
4435 vm_address_t spanStartAddress
= theSpan
->start
<< kPageShift
;
4436 vm_size_t spanSizeInBytes
= theSpan
->length
* kPageSize
;
4438 if (!theSpan
->sizeclass
) {
4439 // If it's an allocated large object span, mark it as in use
4440 if (!m_freeObjectFinder
.isFreeObject(spanStartAddress
))
4441 allocatedPointers
.append((vm_range_t
){spanStartAddress
, spanSizeInBytes
});
4443 const size_t objectSize
= ByteSizeForClass(theSpan
->sizeclass
);
4445 // Mark each allocated small object within the span as in use
4446 const vm_address_t endOfSpan
= spanStartAddress
+ spanSizeInBytes
;
4447 for (vm_address_t object
= spanStartAddress
; object
+ objectSize
<= endOfSpan
; object
+= objectSize
) {
4448 if (!m_freeObjectFinder
.isFreeObject(object
))
4449 allocatedPointers
.append((vm_range_t
){object
, objectSize
});
4454 (*m_recorder
)(m_task
, m_context
, MALLOC_PTR_IN_USE_RANGE_TYPE
, allocatedPointers
.data(), allocatedPointers
.size());
4456 m_coalescedSpans
.clear();
4459 int visit(void* ptr
)
4464 Span
* span
= m_reader(reinterpret_cast<Span
*>(ptr
));
4465 if (!span
|| !span
->start
)
4468 if (m_seenPointers
.contains(ptr
))
4469 return span
->length
;
4470 m_seenPointers
.add(ptr
);
4472 if (!m_coalescedSpans
.size()) {
4473 m_coalescedSpans
.append(span
);
4474 return span
->length
;
4477 Span
* previousSpan
= m_coalescedSpans
[m_coalescedSpans
.size() - 1];
4478 vm_address_t previousSpanStartAddress
= previousSpan
->start
<< kPageShift
;
4479 vm_size_t previousSpanSizeInBytes
= previousSpan
->length
* kPageSize
;
4481 // If the new span is adjacent to the previous span, do nothing for now.
4482 vm_address_t spanStartAddress
= span
->start
<< kPageShift
;
4483 if (spanStartAddress
== previousSpanStartAddress
+ previousSpanSizeInBytes
) {
4484 m_coalescedSpans
.append(span
);
4485 return span
->length
;
4488 // New span is not adjacent to previous span, so record the spans coalesced so far.
4489 recordPendingRegions();
4490 m_coalescedSpans
.append(span
);
4492 return span
->length
;
4496 class AdminRegionRecorder
{
4499 unsigned m_typeMask
;
4500 vm_range_recorder_t
* m_recorder
;
4501 const RemoteMemoryReader
& m_reader
;
4503 Vector
<vm_range_t
, 1024> m_pendingRegions
;
4506 AdminRegionRecorder(task_t task
, void* context
, unsigned typeMask
, vm_range_recorder_t
* recorder
, const RemoteMemoryReader
& reader
)
4508 , m_context(context
)
4509 , m_typeMask(typeMask
)
4510 , m_recorder(recorder
)
4514 void recordRegion(vm_address_t ptr
, size_t size
)
4516 if (m_typeMask
& MALLOC_ADMIN_REGION_RANGE_TYPE
)
4517 m_pendingRegions
.append((vm_range_t
){ ptr
, size
});
4520 void visit(void *ptr
, size_t size
)
4522 recordRegion(reinterpret_cast<vm_address_t
>(ptr
), size
);
4525 void recordPendingRegions()
4527 if (m_pendingRegions
.size()) {
4528 (*m_recorder
)(m_task
, m_context
, MALLOC_ADMIN_REGION_RANGE_TYPE
, m_pendingRegions
.data(), m_pendingRegions
.size());
4529 m_pendingRegions
.clear();
4533 ~AdminRegionRecorder()
4535 ASSERT(!m_pendingRegions
.size());
4539 kern_return_t
FastMallocZone::enumerate(task_t task
, void* context
, unsigned typeMask
, vm_address_t zoneAddress
, memory_reader_t reader
, vm_range_recorder_t recorder
)
4541 RemoteMemoryReader
memoryReader(task
, reader
);
4545 FastMallocZone
* mzone
= memoryReader(reinterpret_cast<FastMallocZone
*>(zoneAddress
));
4546 TCMalloc_PageHeap
* pageHeap
= memoryReader(mzone
->m_pageHeap
);
4547 TCMalloc_ThreadCache
** threadHeapsPointer
= memoryReader(mzone
->m_threadHeaps
);
4548 TCMalloc_ThreadCache
* threadHeaps
= memoryReader(*threadHeapsPointer
);
4550 TCMalloc_Central_FreeListPadded
* centralCaches
= memoryReader(mzone
->m_centralCaches
, sizeof(TCMalloc_Central_FreeListPadded
) * kNumClasses
);
4552 FreeObjectFinder
finder(memoryReader
);
4553 finder
.findFreeObjects(threadHeaps
);
4554 finder
.findFreeObjects(centralCaches
, kNumClasses
, mzone
->m_centralCaches
);
4556 TCMalloc_PageHeap::PageMap
* pageMap
= &pageHeap
->pagemap_
;
4557 PageMapFreeObjectFinder
pageMapFinder(memoryReader
, finder
);
4558 pageMap
->visitValues(pageMapFinder
, memoryReader
);
4560 PageMapMemoryUsageRecorder
usageRecorder(task
, context
, typeMask
, recorder
, memoryReader
, finder
);
4561 pageMap
->visitValues(usageRecorder
, memoryReader
);
4562 usageRecorder
.recordPendingRegions();
4564 AdminRegionRecorder
adminRegionRecorder(task
, context
, typeMask
, recorder
, memoryReader
);
4565 pageMap
->visitAllocations(adminRegionRecorder
, memoryReader
);
4567 PageHeapAllocator
<Span
>* spanAllocator
= memoryReader(mzone
->m_spanAllocator
);
4568 PageHeapAllocator
<TCMalloc_ThreadCache
>* pageHeapAllocator
= memoryReader(mzone
->m_pageHeapAllocator
);
4570 spanAllocator
->recordAdministrativeRegions(adminRegionRecorder
, memoryReader
);
4571 pageHeapAllocator
->recordAdministrativeRegions(adminRegionRecorder
, memoryReader
);
4573 adminRegionRecorder
.recordPendingRegions();
4578 size_t FastMallocZone::size(malloc_zone_t
*, const void*)
4583 void* FastMallocZone::zoneMalloc(malloc_zone_t
*, size_t)
4588 void* FastMallocZone::zoneCalloc(malloc_zone_t
*, size_t, size_t)
4593 void FastMallocZone::zoneFree(malloc_zone_t
*, void* ptr
)
4595 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer
4596 // is not in this zone. When this happens, the pointer being freed was not allocated by any
4597 // zone so we need to print a useful error for the application developer.
4598 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr
);
4601 void* FastMallocZone::zoneRealloc(malloc_zone_t
*, void*, size_t)
4613 malloc_introspection_t jscore_fastmalloc_introspection
= { &FastMallocZone::enumerate
, &FastMallocZone::goodSize
, &FastMallocZone::check
, &FastMallocZone::print
,
4614 &FastMallocZone::log
, &FastMallocZone::forceLock
, &FastMallocZone::forceUnlock
, &FastMallocZone::statistics
4616 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher.
4617 , 0, 0, 0, 0 // These members will not be used unless the zone advertises itself as version seven or higher.
4622 FastMallocZone::FastMallocZone(TCMalloc_PageHeap
* pageHeap
, TCMalloc_ThreadCache
** threadHeaps
, TCMalloc_Central_FreeListPadded
* centralCaches
, PageHeapAllocator
<Span
>* spanAllocator
, PageHeapAllocator
<TCMalloc_ThreadCache
>* pageHeapAllocator
)
4623 : m_pageHeap(pageHeap
)
4624 , m_threadHeaps(threadHeaps
)
4625 , m_centralCaches(centralCaches
)
4626 , m_spanAllocator(spanAllocator
)
4627 , m_pageHeapAllocator(pageHeapAllocator
)
4629 memset(&m_zone
, 0, sizeof(m_zone
));
4631 m_zone
.zone_name
= "JavaScriptCore FastMalloc";
4632 m_zone
.size
= &FastMallocZone::size
;
4633 m_zone
.malloc
= &FastMallocZone::zoneMalloc
;
4634 m_zone
.calloc
= &FastMallocZone::zoneCalloc
;
4635 m_zone
.realloc
= &FastMallocZone::zoneRealloc
;
4636 m_zone
.free
= &FastMallocZone::zoneFree
;
4637 m_zone
.valloc
= &FastMallocZone::zoneValloc
;
4638 m_zone
.destroy
= &FastMallocZone::zoneDestroy
;
4639 m_zone
.introspect
= &jscore_fastmalloc_introspection
;
4640 malloc_zone_register(&m_zone
);
4644 void FastMallocZone::init()
4646 static FastMallocZone
zone(pageheap
, &thread_heaps
, static_cast<TCMalloc_Central_FreeListPadded
*>(central_cache
), &span_allocator
, &threadheap_allocator
);
4649 #endif // OS(DARWIN)
4652 #endif // WTF_CHANGES
4654 #endif // FORCE_SYSTEM_MALLOC