X-Git-Url: https://git.saurik.com/apple/javascriptcore.git/blobdiff_plain/9dae56ea45a0f5f8136a5c93d6f3a7f99399ca73..14957cd040308e3eeec43d26bae5d76da13fcd85:/wtf/FastMalloc.cpp?ds=sidebyside diff --git a/wtf/FastMalloc.cpp b/wtf/FastMalloc.cpp index 468aaac..c42c3c4 100644 --- a/wtf/FastMalloc.cpp +++ b/wtf/FastMalloc.cpp @@ -1,6 +1,6 @@ // Copyright (c) 2005, 2007, Google Inc. // All rights reserved. -// Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. +// Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011 Apple Inc. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -78,9 +78,11 @@ #include "FastMalloc.h" #include "Assertions.h" -#if ENABLE(JSC_MULTIPLE_THREADS) +#include +#if ENABLE(WTF_MULTIPLE_THREADS) #include #endif +#include #ifndef NO_TCMALLOC_SAMPLES #ifdef WTF_CHANGES @@ -88,18 +90,19 @@ #endif #endif -#if !defined(USE_SYSTEM_MALLOC) && defined(NDEBUG) +#if !(defined(USE_SYSTEM_MALLOC) && USE_SYSTEM_MALLOC) && defined(NDEBUG) #define FORCE_SYSTEM_MALLOC 0 #else #define FORCE_SYSTEM_MALLOC 1 #endif -#define TCMALLOC_TRACK_DECOMMITED_SPANS (HAVE(VIRTUALALLOC)) +// Use a background thread to periodically scavenge memory to release back to the system +#define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 0 #ifndef NDEBUG namespace WTF { -#if ENABLE(JSC_MULTIPLE_THREADS) +#if ENABLE(WTF_MULTIPLE_THREADS) static pthread_key_t isForbiddenKey; static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT; static void initializeIsForbiddenKey() @@ -107,11 +110,13 @@ static void initializeIsForbiddenKey() pthread_key_create(&isForbiddenKey, 0); } +#if !ASSERT_DISABLED static bool isForbidden() { pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); return !!pthread_getspecific(isForbiddenKey); } +#endif void fastMallocForbid() { @@ -142,7 +147,7 @@ void fastMallocAllow() { staticIsForbidden = false; } -#endif // ENABLE(JSC_MULTIPLE_THREADS) +#endif // ENABLE(WTF_MULTIPLE_THREADS) } // namespace WTF #endif // NDEBUG @@ -151,17 +156,40 @@ void fastMallocAllow() namespace WTF { + +namespace Internal { +#if !ENABLE(WTF_MALLOC_VALIDATION) +void fastMallocMatchFailed(void*); +#else +COMPILE_ASSERT(((sizeof(ValidationHeader) % sizeof(AllocAlignmentInteger)) == 0), ValidationHeader_must_produce_correct_alignment); +#endif +void fastMallocMatchFailed(void*) +{ + CRASH(); +} + +} // namespace Internal + + void* fastZeroedMalloc(size_t n) { void* result = fastMalloc(n); memset(result, 0, n); return result; } - -void* tryFastZeroedMalloc(size_t n) + +char* fastStrDup(const char* src) { - void* result = tryFastMalloc(n); - if (!result) + size_t len = strlen(src) + 1; + char* dup = static_cast(fastMalloc(len)); + memcpy(dup, src, len); + return dup; +} + +TryMallocReturnValue tryFastZeroedMalloc(size_t n) +{ + void* result; + if (!tryFastMalloc(n).getValue(result)) return 0; memset(result, 0, n); return result; @@ -171,61 +199,170 @@ void* tryFastZeroedMalloc(size_t n) #if FORCE_SYSTEM_MALLOC -#include -#if !PLATFORM(WIN_OS) - #include -#else - #include "windows.h" +#if PLATFORM(BREWMP) +#include "brew/SystemMallocBrew.h" +#endif + +#if OS(DARWIN) +#include +#elif OS(WINDOWS) +#include #endif namespace WTF { -void* tryFastMalloc(size_t n) +TryMallocReturnValue tryFastMalloc(size_t n) { ASSERT(!isForbidden()); + +#if ENABLE(WTF_MALLOC_VALIDATION) + if (std::numeric_limits::max() - Internal::ValidationBufferSize <= n) // If overflow would occur... + return 0; + + void* result = malloc(n + Internal::ValidationBufferSize); + if (!result) + return 0; + Internal::ValidationHeader* header = static_cast(result); + header->m_size = n; + header->m_type = Internal::AllocTypeMalloc; + header->m_prefix = static_cast(Internal::ValidationPrefix); + result = header + 1; + *Internal::fastMallocValidationSuffix(result) = Internal::ValidationSuffix; + fastMallocValidate(result); + return result; +#else return malloc(n); +#endif } void* fastMalloc(size_t n) { ASSERT(!isForbidden()); + +#if ENABLE(WTF_MALLOC_VALIDATION) + TryMallocReturnValue returnValue = tryFastMalloc(n); + void* result; + if (!returnValue.getValue(result)) + CRASH(); +#else void* result = malloc(n); - if (!result) +#endif + + if (!result) { +#if PLATFORM(BREWMP) + // The behavior of malloc(0) is implementation defined. + // To make sure that fastMalloc never returns 0, retry with fastMalloc(1). + if (!n) + return fastMalloc(1); +#endif CRASH(); + } + return result; } -void* tryFastCalloc(size_t n_elements, size_t element_size) +TryMallocReturnValue tryFastCalloc(size_t n_elements, size_t element_size) { ASSERT(!isForbidden()); + +#if ENABLE(WTF_MALLOC_VALIDATION) + size_t totalBytes = n_elements * element_size; + if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements) + return 0; + + TryMallocReturnValue returnValue = tryFastMalloc(totalBytes); + void* result; + if (!returnValue.getValue(result)) + return 0; + memset(result, 0, totalBytes); + fastMallocValidate(result); + return result; +#else return calloc(n_elements, element_size); +#endif } void* fastCalloc(size_t n_elements, size_t element_size) { ASSERT(!isForbidden()); + +#if ENABLE(WTF_MALLOC_VALIDATION) + TryMallocReturnValue returnValue = tryFastCalloc(n_elements, element_size); + void* result; + if (!returnValue.getValue(result)) + CRASH(); +#else void* result = calloc(n_elements, element_size); - if (!result) +#endif + + if (!result) { +#if PLATFORM(BREWMP) + // If either n_elements or element_size is 0, the behavior of calloc is implementation defined. + // To make sure that fastCalloc never returns 0, retry with fastCalloc(1, 1). + if (!n_elements || !element_size) + return fastCalloc(1, 1); +#endif CRASH(); + } + return result; } void fastFree(void* p) { ASSERT(!isForbidden()); + +#if ENABLE(WTF_MALLOC_VALIDATION) + if (!p) + return; + + fastMallocMatchValidateFree(p, Internal::AllocTypeMalloc); + Internal::ValidationHeader* header = Internal::fastMallocValidationHeader(p); + memset(p, 0xCC, header->m_size); + free(header); +#else free(p); +#endif } -void* tryFastRealloc(void* p, size_t n) +TryMallocReturnValue tryFastRealloc(void* p, size_t n) { ASSERT(!isForbidden()); + +#if ENABLE(WTF_MALLOC_VALIDATION) + if (p) { + if (std::numeric_limits::max() - Internal::ValidationBufferSize <= n) // If overflow would occur... + return 0; + fastMallocValidate(p); + Internal::ValidationHeader* result = static_cast(realloc(Internal::fastMallocValidationHeader(p), n + Internal::ValidationBufferSize)); + if (!result) + return 0; + result->m_size = n; + result = result + 1; + *fastMallocValidationSuffix(result) = Internal::ValidationSuffix; + fastMallocValidate(result); + return result; + } else { + return fastMalloc(n); + } +#else return realloc(p, n); +#endif } void* fastRealloc(void* p, size_t n) { ASSERT(!isForbidden()); + +#if ENABLE(WTF_MALLOC_VALIDATION) + TryMallocReturnValue returnValue = tryFastRealloc(p, n); + void* result; + if (!returnValue.getValue(result)) + CRASH(); +#else void* result = realloc(p, n); +#endif + if (!result) CRASH(); return result; @@ -235,13 +372,27 @@ void releaseFastMallocFreeMemory() { } FastMallocStatistics fastMallocStatistics() { - FastMallocStatistics statistics = { 0, 0, 0, 0 }; + FastMallocStatistics statistics = { 0, 0, 0 }; return statistics; } +size_t fastMallocSize(const void* p) +{ +#if ENABLE(WTF_MALLOC_VALIDATION) + return Internal::fastMallocValidationHeader(const_cast(p))->m_size; +#elif OS(DARWIN) + return malloc_size(p); +#elif OS(WINDOWS) && !PLATFORM(BREWMP) + // Brew MP uses its own memory allocator, so _msize does not work on the Brew MP simulator. + return _msize(const_cast(p)); +#else + return 1; +#endif +} + } // namespace WTF -#if PLATFORM(DARWIN) +#if OS(DARWIN) // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled. // It will never be used in this case, so it's type and value are less interesting than its presence. extern "C" const int jscore_fastmalloc_introspection = 0; @@ -264,24 +415,46 @@ extern "C" const int jscore_fastmalloc_introspection = 0; #include "TCSpinLock.h" #include "TCSystemAlloc.h" #include -#include -#include +#include #include #include #include #include -#if COMPILER(MSVC) +#if HAVE(ERRNO_H) +#include +#endif +#if OS(UNIX) +#include +#endif +#if OS(WINDOWS) #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #endif -#if WTF_CHANGES +#ifdef WTF_CHANGES -#if PLATFORM(DARWIN) +#if OS(DARWIN) #include "MallocZoneSupport.h" #include +#include +#endif + +#if HAVE(HEADER_DETECTION_H) +#include "HeaderDetection.h" +#endif + +#if HAVE(DISPATCH_H) +#include +#endif + +#if HAVE(PTHREAD_MACHDEP_H) +#include + +#if defined(__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0) +#define WTF_USE_PTHREAD_GETSPECIFIC_DIRECT 1 +#endif #endif #ifndef PRIuS @@ -292,9 +465,14 @@ extern "C" const int jscore_fastmalloc_introspection = 0; // call to the function on Mac OS X, and it's used in performance-critical code. So we // use a function pointer. But that's not necessarily faster on other platforms, and we had // problems with this technique on Windows, so we'll do this only on Mac OS X. -#if PLATFORM(DARWIN) +#if OS(DARWIN) +#if !USE(PTHREAD_GETSPECIFIC_DIRECT) static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific; #define pthread_getspecific(key) pthread_getspecific_function_pointer(key) +#else +#define pthread_getspecific(key) _pthread_getspecific_direct(key) +#define pthread_setspecific(key, val) _pthread_setspecific_direct(key, (val)) +#endif #endif #define DEFINE_VARIABLE(type, name, value, meaning) \ @@ -320,10 +498,12 @@ namespace WTF { #define MESSAGE LOG_ERROR #define CHECK_CONDITION ASSERT -#if PLATFORM(DARWIN) +#if OS(DARWIN) +struct Span; +class TCMalloc_Central_FreeListPadded; class TCMalloc_PageHeap; class TCMalloc_ThreadCache; -class TCMalloc_Central_FreeListPadded; +template class PageHeapAllocator; class FastMallocZone { public: @@ -339,7 +519,7 @@ public: static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); } private: - FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*); + FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator*, PageHeapAllocator*); static size_t size(malloc_zone_t*, const void*); static void* zoneMalloc(malloc_zone_t*, size_t); static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size); @@ -352,6 +532,8 @@ private: TCMalloc_PageHeap* m_pageHeap; TCMalloc_ThreadCache** m_threadHeaps; TCMalloc_Central_FreeListPadded* m_centralCaches; + PageHeapAllocator* m_spanAllocator; + PageHeapAllocator* m_pageHeapAllocator; }; #endif @@ -432,7 +614,7 @@ static const size_t kNumClasses = 68; static const size_t kPageMapBigAllocationThreshold = 128 << 20; // Minimum number of pages to fetch from system at a time. Must be -// significantly bigger than kBlockSize to amortize system-call +// significantly bigger than kPageSize to amortize system-call // overhead, and also to reduce external fragementation. Also, we // should keep this value big because various incarnations of Linux // have small limits on the number of mmap() regions per @@ -820,6 +1002,9 @@ class PageHeapAllocator { char* free_area_; size_t free_avail_; + // Linked list of all regions allocated by this allocator + void* allocated_regions_; + // Free list of already carved objects void* free_list_; @@ -830,6 +1015,7 @@ class PageHeapAllocator { void Init() { ASSERT(kAlignedSize <= kAllocIncrement); inuse_ = 0; + allocated_regions_ = 0; free_area_ = NULL; free_avail_ = 0; free_list_ = NULL; @@ -844,9 +1030,14 @@ class PageHeapAllocator { } else { if (free_avail_ < kAlignedSize) { // Need more room - free_area_ = reinterpret_cast(MetaDataAlloc(kAllocIncrement)); - if (free_area_ == NULL) CRASH(); - free_avail_ = kAllocIncrement; + char* new_allocation = reinterpret_cast(MetaDataAlloc(kAllocIncrement)); + if (!new_allocation) + CRASH(); + + *reinterpret_cast_ptr(new_allocation) = allocated_regions_; + allocated_regions_ = new_allocation; + free_area_ = new_allocation + kAlignedSize; + free_avail_ = kAllocIncrement - kAlignedSize; } result = free_area_; free_area_ += kAlignedSize; @@ -863,6 +1054,15 @@ class PageHeapAllocator { } int inuse() const { return inuse_; } + +#if defined(WTF_CHANGES) && OS(DARWIN) + template + void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader) + { + for (void* adminAllocation = allocated_regions_; adminAllocation; adminAllocation = reader.nextEntryInLinkedList(reinterpret_cast(adminAllocation))) + recorder.recordRegion(reinterpret_cast(adminAllocation), kAllocIncrement); + } +#endif }; // ------------------------------------------------------------------------- @@ -921,11 +1121,7 @@ struct Span { #endif }; -#if TCMALLOC_TRACK_DECOMMITED_SPANS #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted) -#else -#define ASSERT_SPAN_COMMITTED(span) -#endif #ifdef SPAN_HISTORY void Event(Span* span, char op, int v = 0) { @@ -1038,7 +1234,7 @@ template class MapSelector { }; #if defined(WTF_CHANGES) -#if PLATFORM(X86_64) +#if CPU(X86_64) // On all known X86-64 platforms, the upper 16 bits are always unused and therefore // can be excluded from the PageMap key. // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details @@ -1071,6 +1267,39 @@ template <> class MapSelector<32> { // contiguous runs of pages (called a "span"). // ------------------------------------------------------------------------- +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY +// The page heap maintains a free list for spans that are no longer in use by +// the central cache or any thread caches. We use a background thread to +// periodically scan the free list and release a percentage of it back to the OS. + +// If free_committed_pages_ exceeds kMinimumFreeCommittedPageCount, the +// background thread: +// - wakes up +// - pauses for kScavengeDelayInSeconds +// - returns to the OS a percentage of the memory that remained unused during +// that pause (kScavengePercentage * min_free_committed_pages_since_last_scavenge_) +// The goal of this strategy is to reduce memory pressure in a timely fashion +// while avoiding thrashing the OS allocator. + +// Time delay before the page heap scavenger will consider returning pages to +// the OS. +static const int kScavengeDelayInSeconds = 2; + +// Approximate percentage of free committed pages to return to the OS in one +// scavenge. +static const float kScavengePercentage = .5f; + +// number of span lists to keep spans in when memory is returned. +static const int kMinSpanListsWithSpans = 32; + +// Number of free committed pages that we want to keep around. The minimum number of pages used when there +// is 1 span in each of the first kMinSpanListsWithSpans spanlists. Currently 528 pages. +static const size_t kMinimumFreeCommittedPageCount = kMinSpanListsWithSpans * ((1.0f+kMinSpanListsWithSpans) / 2.0f); + +#endif + +static SpinLock pageheap_lock = SPINLOCK_INITIALIZER; + class TCMalloc_PageHeap { public: void init(); @@ -1128,7 +1357,7 @@ class TCMalloc_PageHeap { } bool Check(); - bool CheckList(Span* list, Length min_pages, Length max_pages); + bool CheckList(Span* list, Length min_pages, Length max_pages, bool decommitted); // Release all pages on the free list for reuse by the OS: void ReleaseFreePages(); @@ -1170,6 +1399,15 @@ class TCMalloc_PageHeap { // Bytes allocated from system uint64_t system_bytes_; +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + // Number of pages kept in free lists that are still committed. + Length free_committed_pages_; + + // Minimum number of free committed pages since last scavenge. (Can be 0 if + // we've committed new pages since the last scavenge.) + Length min_free_committed_pages_since_last_scavenge_; +#endif + bool GrowHeap(Length n); // REQUIRES span->length >= n @@ -1192,9 +1430,11 @@ class TCMalloc_PageHeap { // span of exactly the specified length. Else, returns NULL. Span* AllocLarge(Length n); +#if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY // Incrementally release some memory to the system. // IncrementalScavenge(n) is called whenever n pages are freed. void IncrementalScavenge(Length n); +#endif // Number of pages to deallocate before doing more scavenging int64_t scavenge_counter_; @@ -1202,9 +1442,44 @@ class TCMalloc_PageHeap { // Index of last free list we scavenged size_t scavenge_index_; -#if defined(WTF_CHANGES) && PLATFORM(DARWIN) +#if defined(WTF_CHANGES) && OS(DARWIN) friend class FastMallocZone; #endif + +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + void initializeScavenger(); + ALWAYS_INLINE void signalScavenger(); + void scavenge(); + ALWAYS_INLINE bool shouldScavenge() const; + +#if HAVE(DISPATCH_H) || OS(WINDOWS) + void periodicScavenge(); + ALWAYS_INLINE bool isScavengerSuspended(); + ALWAYS_INLINE void scheduleScavenger(); + ALWAYS_INLINE void rescheduleScavenger(); + ALWAYS_INLINE void suspendScavenger(); +#endif + +#if HAVE(DISPATCH_H) + dispatch_queue_t m_scavengeQueue; + dispatch_source_t m_scavengeTimer; + bool m_scavengingSuspended; +#elif OS(WINDOWS) + static void CALLBACK scavengerTimerFired(void*, BOOLEAN); + HANDLE m_scavengeQueueTimer; +#else + static NO_RETURN_WITH_VALUE void* runScavengerThread(void*); + NO_RETURN void scavengerThread(); + + // Keeps track of whether the background thread is actively scavenging memory every kScavengeDelayInSeconds, or + // it's blocked waiting for more pages to be deleted. + bool m_scavengeThreadActive; + + pthread_mutex_t m_scavengeMutex; + pthread_cond_t m_scavengeCondition; +#endif + +#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY }; void TCMalloc_PageHeap::init() @@ -1213,6 +1488,12 @@ void TCMalloc_PageHeap::init() pagemap_cache_ = PageMapCache(0); free_pages_ = 0; system_bytes_ = 0; + +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + free_committed_pages_ = 0; + min_free_committed_pages_since_last_scavenge_ = 0; +#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + scavenge_counter_ = 0; // Start scavenging at kMaxPages list scavenge_index_ = kMaxPages-1; @@ -1223,8 +1504,178 @@ void TCMalloc_PageHeap::init() DLL_Init(&free_[i].normal); DLL_Init(&free_[i].returned); } + +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + initializeScavenger(); +#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY +} + +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + +#if HAVE(DISPATCH_H) + +void TCMalloc_PageHeap::initializeScavenger() +{ + m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL); + m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue); + dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, kScavengeDelayInSeconds * NSEC_PER_SEC); + dispatch_source_set_timer(m_scavengeTimer, startTime, kScavengeDelayInSeconds * NSEC_PER_SEC, 1000 * NSEC_PER_USEC); + dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); }); + m_scavengingSuspended = true; +} + +ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() +{ + ASSERT(pageheap_lock.IsHeld()); + return m_scavengingSuspended; +} + +ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() +{ + ASSERT(pageheap_lock.IsHeld()); + m_scavengingSuspended = false; + dispatch_resume(m_scavengeTimer); +} + +ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() +{ + // Nothing to do here for libdispatch. +} + +ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() +{ + ASSERT(pageheap_lock.IsHeld()); + m_scavengingSuspended = true; + dispatch_suspend(m_scavengeTimer); +} + +#elif OS(WINDOWS) + +void TCMalloc_PageHeap::scavengerTimerFired(void* context, BOOLEAN) +{ + static_cast(context)->periodicScavenge(); +} + +void TCMalloc_PageHeap::initializeScavenger() +{ + m_scavengeQueueTimer = 0; +} + +ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() +{ + ASSERT(IsHeld(pageheap_lock)); + return !m_scavengeQueueTimer; +} + +ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() +{ + // We need to use WT_EXECUTEONLYONCE here and reschedule the timer, because + // Windows will fire the timer event even when the function is already running. + ASSERT(IsHeld(pageheap_lock)); + CreateTimerQueueTimer(&m_scavengeQueueTimer, 0, scavengerTimerFired, this, kScavengeDelayInSeconds * 1000, 0, WT_EXECUTEONLYONCE); +} + +ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() +{ + // We must delete the timer and create it again, because it is not possible to retrigger a timer on Windows. + suspendScavenger(); + scheduleScavenger(); +} + +ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() +{ + ASSERT(IsHeld(pageheap_lock)); + HANDLE scavengeQueueTimer = m_scavengeQueueTimer; + m_scavengeQueueTimer = 0; + DeleteTimerQueueTimer(0, scavengeQueueTimer, 0); +} + +#else + +void TCMalloc_PageHeap::initializeScavenger() +{ + // Create a non-recursive mutex. +#if !defined(PTHREAD_MUTEX_NORMAL) || PTHREAD_MUTEX_NORMAL == PTHREAD_MUTEX_DEFAULT + pthread_mutex_init(&m_scavengeMutex, 0); +#else + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL); + + pthread_mutex_init(&m_scavengeMutex, &attr); + + pthread_mutexattr_destroy(&attr); +#endif + + pthread_cond_init(&m_scavengeCondition, 0); + m_scavengeThreadActive = true; + pthread_t thread; + pthread_create(&thread, 0, runScavengerThread, this); +} + +void* TCMalloc_PageHeap::runScavengerThread(void* context) +{ + static_cast(context)->scavengerThread(); +#if (COMPILER(MSVC) || COMPILER(SUNCC)) + // Without this, Visual Studio and Sun Studio will complain that this method does not return a value. + return 0; +#endif +} + +ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() +{ + // m_scavengeMutex should be held before accessing m_scavengeThreadActive. + ASSERT(pthread_mutex_trylock(m_scavengeMutex)); + if (!m_scavengeThreadActive && shouldScavenge()) + pthread_cond_signal(&m_scavengeCondition); +} + +#endif + +void TCMalloc_PageHeap::scavenge() +{ + size_t pagesToRelease = min_free_committed_pages_since_last_scavenge_ * kScavengePercentage; + size_t targetPageCount = std::max(kMinimumFreeCommittedPageCount, free_committed_pages_ - pagesToRelease); + + Length lastFreeCommittedPages = free_committed_pages_; + while (free_committed_pages_ > targetPageCount) { + ASSERT(Check()); + for (int i = kMaxPages; i > 0 && free_committed_pages_ >= targetPageCount; i--) { + SpanList* slist = (static_cast(i) == kMaxPages) ? &large_ : &free_[i]; + // If the span size is bigger than kMinSpanListsWithSpans pages return all the spans in the list, else return all but 1 span. + // Return only 50% of a spanlist at a time so spans of size 1 are not the only ones left. + size_t length = DLL_Length(&slist->normal); + size_t numSpansToReturn = (i > kMinSpanListsWithSpans) ? length : length / 2; + for (int j = 0; static_cast(j) < numSpansToReturn && !DLL_IsEmpty(&slist->normal) && free_committed_pages_ > targetPageCount; j++) { + Span* s = slist->normal.prev; + DLL_Remove(s); + ASSERT(!s->decommitted); + if (!s->decommitted) { + TCMalloc_SystemRelease(reinterpret_cast(s->start << kPageShift), + static_cast(s->length << kPageShift)); + ASSERT(free_committed_pages_ >= s->length); + free_committed_pages_ -= s->length; + s->decommitted = true; + } + DLL_Prepend(&slist->returned, s); + } + } + + if (lastFreeCommittedPages == free_committed_pages_) + break; + lastFreeCommittedPages = free_committed_pages_; + } + + min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; } +ALWAYS_INLINE bool TCMalloc_PageHeap::shouldScavenge() const +{ + return free_committed_pages_ > kMinimumFreeCommittedPageCount; +} + +#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + inline Span* TCMalloc_PageHeap::New(Length n) { ASSERT(Check()); ASSERT(n > 0); @@ -1247,12 +1698,14 @@ inline Span* TCMalloc_PageHeap::New(Length n) { Span* result = ll->next; Carve(result, n, released); -#if TCMALLOC_TRACK_DECOMMITED_SPANS - if (result->decommitted) { - TCMalloc_SystemCommit(reinterpret_cast(result->start << kPageShift), static_cast(n << kPageShift)); - result->decommitted = false; - } -#endif +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + // The newly allocated memory is from a span that's in the normal span list (already committed). Update the + // free committed pages count. + ASSERT(free_committed_pages_ >= n); + free_committed_pages_ -= n; + if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) + min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; +#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY ASSERT(Check()); free_pages_ -= n; return result; @@ -1309,12 +1762,14 @@ Span* TCMalloc_PageHeap::AllocLarge(Length n) { if (best != NULL) { Carve(best, n, from_released); -#if TCMALLOC_TRACK_DECOMMITED_SPANS - if (best->decommitted) { - TCMalloc_SystemCommit(reinterpret_cast(best->start << kPageShift), static_cast(n << kPageShift)); - best->decommitted = false; - } -#endif +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + // The newly allocated memory is from a span that's in the normal span list (already committed). Update the + // free committed pages count. + ASSERT(free_committed_pages_ >= n); + free_committed_pages_ -= n; + if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) + min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; +#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY ASSERT(Check()); free_pages_ -= n; return best; @@ -1339,33 +1794,34 @@ Span* TCMalloc_PageHeap::Split(Span* span, Length n) { return leftover; } -#if !TCMALLOC_TRACK_DECOMMITED_SPANS -static ALWAYS_INLINE void propagateDecommittedState(Span*, Span*) { } -#else -static ALWAYS_INLINE void propagateDecommittedState(Span* destination, Span* source) -{ - destination->decommitted = source->decommitted; -} -#endif - inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { ASSERT(n > 0); DLL_Remove(span); span->free = 0; Event(span, 'A', n); + if (released) { + // If the span chosen to carve from is decommited, commit the entire span at once to avoid committing spans 1 page at a time. + ASSERT(span->decommitted); + TCMalloc_SystemCommit(reinterpret_cast(span->start << kPageShift), static_cast(span->length << kPageShift)); + span->decommitted = false; +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + free_committed_pages_ += span->length; +#endif + } + const int extra = static_cast(span->length - n); ASSERT(extra >= 0); if (extra > 0) { Span* leftover = NewSpan(span->start + n, extra); leftover->free = 1; - propagateDecommittedState(leftover, span); + leftover->decommitted = false; Event(leftover, 'S', extra); RecordSpan(leftover); // Place leftover span on appropriate free list SpanList* listpair = (static_cast(extra) < kMaxPages) ? &free_[extra] : &large_; - Span* dst = released ? &listpair->returned : &listpair->normal; + Span* dst = &listpair->normal; DLL_Prepend(dst, leftover); span->length = n; @@ -1373,15 +1829,17 @@ inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { } } -#if !TCMALLOC_TRACK_DECOMMITED_SPANS -static ALWAYS_INLINE void mergeDecommittedStates(Span*, Span*) { } -#else static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other) { - if (other->decommitted) + if (destination->decommitted && !other->decommitted) { + TCMalloc_SystemRelease(reinterpret_cast(other->start << kPageShift), + static_cast(other->length << kPageShift)); + } else if (other->decommitted && !destination->decommitted) { + TCMalloc_SystemRelease(reinterpret_cast(destination->start << kPageShift), + static_cast(destination->length << kPageShift)); destination->decommitted = true; + } } -#endif inline void TCMalloc_PageHeap::Delete(Span* span) { ASSERT(Check()); @@ -1398,10 +1856,10 @@ inline void TCMalloc_PageHeap::Delete(Span* span) { // necessary. We do not bother resetting the stale pagemap // entries for the pieces we are merging together because we only // care about the pagemap entries for the boundaries. - // - // Note that the spans we merge into "span" may come out of - // a "returned" list. For simplicity, we move these into the - // "normal" list of the appropriate size class. +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + // Track the total size of the neighboring free spans that are committed. + Length neighboringCommittedSpansLength = 0; +#endif const PageID p = span->start; const Length n = span->length; Span* prev = GetDescriptor(p-1); @@ -1409,6 +1867,10 @@ inline void TCMalloc_PageHeap::Delete(Span* span) { // Merge preceding span into this span ASSERT(prev->start + prev->length == p); const Length len = prev->length; +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + if (!prev->decommitted) + neighboringCommittedSpansLength += len; +#endif mergeDecommittedStates(span, prev); DLL_Remove(prev); DeleteSpan(prev); @@ -1422,6 +1884,10 @@ inline void TCMalloc_PageHeap::Delete(Span* span) { // Merge next span into this span ASSERT(next->start == p+n); const Length len = next->length; +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + if (!next->decommitted) + neighboringCommittedSpansLength += len; +#endif mergeDecommittedStates(span, next); DLL_Remove(next); DeleteSpan(next); @@ -1432,17 +1898,41 @@ inline void TCMalloc_PageHeap::Delete(Span* span) { Event(span, 'D', span->length); span->free = 1; - if (span->length < kMaxPages) { - DLL_Prepend(&free_[span->length].normal, span); + if (span->decommitted) { + if (span->length < kMaxPages) + DLL_Prepend(&free_[span->length].returned, span); + else + DLL_Prepend(&large_.returned, span); } else { - DLL_Prepend(&large_.normal, span); + if (span->length < kMaxPages) + DLL_Prepend(&free_[span->length].normal, span); + else + DLL_Prepend(&large_.normal, span); } free_pages_ += n; +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + if (span->decommitted) { + // If the merged span is decommitted, that means we decommitted any neighboring spans that were + // committed. Update the free committed pages count. + free_committed_pages_ -= neighboringCommittedSpansLength; + if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) + min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; + } else { + // If the merged span remains committed, add the deleted span's size to the free committed pages count. + free_committed_pages_ += n; + } + + // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system. + signalScavenger(); +#else IncrementalScavenge(n); +#endif + ASSERT(Check()); } +#if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY void TCMalloc_PageHeap::IncrementalScavenge(Length n) { // Fast path; not yet time to release memory scavenge_counter_ -= n; @@ -1461,9 +1951,7 @@ void TCMalloc_PageHeap::IncrementalScavenge(Length n) { DLL_Remove(s); TCMalloc_SystemRelease(reinterpret_cast(s->start << kPageShift), static_cast(s->length << kPageShift)); -#if TCMALLOC_TRACK_DECOMMITED_SPANS s->decommitted = true; -#endif DLL_Prepend(&slist->returned, s); scavenge_counter_ = std::max(16UL, std::min(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay))); @@ -1480,6 +1968,7 @@ void TCMalloc_PageHeap::IncrementalScavenge(Length n) { // Nothing to scavenge, delay for a while scavenge_counter_ = kDefaultReleaseDelay; } +#endif void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) { // Associate span object with all interior pages as well @@ -1628,27 +2117,28 @@ bool TCMalloc_PageHeap::GrowHeap(Length n) { bool TCMalloc_PageHeap::Check() { ASSERT(free_[0].normal.next == &free_[0].normal); ASSERT(free_[0].returned.next == &free_[0].returned); - CheckList(&large_.normal, kMaxPages, 1000000000); - CheckList(&large_.returned, kMaxPages, 1000000000); + CheckList(&large_.normal, kMaxPages, 1000000000, false); + CheckList(&large_.returned, kMaxPages, 1000000000, true); for (Length s = 1; s < kMaxPages; s++) { - CheckList(&free_[s].normal, s, s); - CheckList(&free_[s].returned, s, s); + CheckList(&free_[s].normal, s, s, false); + CheckList(&free_[s].returned, s, s, true); } return true; } #if ASSERT_DISABLED -bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) { +bool TCMalloc_PageHeap::CheckList(Span*, Length, Length, bool) { return true; } #else -bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) { +bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, bool decommitted) { for (Span* s = list->next; s != list; s = s->next) { CHECK_CONDITION(s->free); CHECK_CONDITION(s->length >= min_pages); CHECK_CONDITION(s->length <= max_pages); CHECK_CONDITION(GetDescriptor(s->start) == s); CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); + CHECK_CONDITION(s->decommitted == decommitted); } return true; } @@ -1660,6 +2150,7 @@ static void ReleaseFreeList(Span* list, Span* returned) { while (!DLL_IsEmpty(list)) { Span* s = list->prev; DLL_Remove(s); + s->decommitted = true; DLL_Prepend(returned, s); TCMalloc_SystemRelease(reinterpret_cast(s->start << kPageShift), static_cast(s->length << kPageShift)); @@ -1733,7 +2224,7 @@ class TCMalloc_ThreadCache_FreeList { template void enumerateFreeObjects(Finder& finder, const Reader& reader) { - for (void* nextObject = list_; nextObject; nextObject = *reader(reinterpret_cast(nextObject))) + for (void* nextObject = list_; nextObject; nextObject = reader.nextEntryInLinkedList(reinterpret_cast(nextObject))) finder.visit(nextObject); } #endif @@ -1746,7 +2237,7 @@ class TCMalloc_ThreadCache_FreeList { class TCMalloc_ThreadCache { private: typedef TCMalloc_ThreadCache_FreeList FreeList; -#if COMPILER(MSVC) +#if OS(WINDOWS) typedef DWORD ThreadIdentifier; #else typedef pthread_t ThreadIdentifier; @@ -1780,10 +2271,10 @@ class TCMalloc_ThreadCache { // Total byte size in cache size_t Size() const { return size_; } - void* Allocate(size_t size); + ALWAYS_INLINE void* Allocate(size_t size); void Deallocate(void* ptr, size_t size_class); - void FetchFromCentralCache(size_t cl, size_t allocationSize); + ALWAYS_INLINE void FetchFromCentralCache(size_t cl, size_t allocationSize); void ReleaseToCentralCache(size_t cl, int N); void Scavenge(); void Print() const; @@ -1858,7 +2349,7 @@ class TCMalloc_Central_FreeList { Span* remoteSpan = nonempty_.next; for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) { - for (void* nextObject = span->objects; nextObject; nextObject = *reader(reinterpret_cast(nextObject))) + for (void* nextObject = span->objects; nextObject; nextObject = reader.nextEntryInLinkedList(reinterpret_cast(nextObject))) finder.visit(nextObject); } } @@ -1884,12 +2375,12 @@ class TCMalloc_Central_FreeList { // REQUIRES: lock_ is held // Release an object to spans. // May temporarily release lock_. - void ReleaseToSpans(void* object); + ALWAYS_INLINE void ReleaseToSpans(void* object); // REQUIRES: lock_ is held // Populate cache by fetching from the page heap. // May temporarily release lock_. - void Populate(); + ALWAYS_INLINE void Populate(); // REQUIRES: lock is held. // Tries to make room for a TCEntry. If the cache is full it will try to @@ -1902,7 +2393,7 @@ class TCMalloc_Central_FreeList { // just iterates over the sizeclasses but does so without taking a lock. // Returns true on success. // May temporarily lock a "random" size class. - static bool EvictRandomSizeClass(size_t locked_size_class, bool force); + static ALWAYS_INLINE bool EvictRandomSizeClass(size_t locked_size_class, bool force); // REQUIRES: lock_ is *not* held. // Tries to shrink the Cache. If force is true it will relase objects to @@ -1952,13 +2443,7 @@ class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList { static TCMalloc_Central_FreeListPadded central_cache[kNumClasses]; // Page-level allocator -static SpinLock pageheap_lock = SPINLOCK_INITIALIZER; - -#if PLATFORM(ARM) -static void* pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(void*) - 1) / sizeof(void*)] __attribute__((aligned)); -#else -static void* pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(void*) - 1) / sizeof(void*)]; -#endif +static AllocAlignmentInteger pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(AllocAlignmentInteger) - 1) / sizeof(AllocAlignmentInteger)]; static bool phinited = false; // Avoid extra level of indirection by making "pageheap" be just an alias @@ -1976,6 +2461,59 @@ static inline TCMalloc_PageHeap* getPageHeap() #define pageheap getPageHeap() +#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY + +#if HAVE(DISPATCH_H) || OS(WINDOWS) + +void TCMalloc_PageHeap::periodicScavenge() +{ + SpinLockHolder h(&pageheap_lock); + pageheap->scavenge(); + + if (shouldScavenge()) { + rescheduleScavenger(); + return; + } + + suspendScavenger(); +} + +ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() +{ + ASSERT(pageheap_lock.IsHeld()); + if (isScavengerSuspended() && shouldScavenge()) + scheduleScavenger(); +} + +#else + +void TCMalloc_PageHeap::scavengerThread() +{ +#if HAVE(PTHREAD_SETNAME_NP) + pthread_setname_np("JavaScriptCore: FastMalloc scavenger"); +#endif + + while (1) { + if (!shouldScavenge()) { + pthread_mutex_lock(&m_scavengeMutex); + m_scavengeThreadActive = false; + // Block until there are enough free committed pages to release back to the system. + pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex); + m_scavengeThreadActive = true; + pthread_mutex_unlock(&m_scavengeMutex); + } + sleep(kScavengeDelayInSeconds); + { + SpinLockHolder h(&pageheap_lock); + pageheap->scavenge(); + } + } +} + +#endif + +#endif + // If TLS is available, we also store a copy // of the per-thread object in a __thread variable // since __thread variables are faster to read @@ -1992,17 +2530,29 @@ static __thread TCMalloc_ThreadCache *threadlocal_heap; // Therefore, we use TSD keys only after tsd_inited is set to true. // Until then, we use a slow path to get the heap object. static bool tsd_inited = false; +#if USE(PTHREAD_GETSPECIFIC_DIRECT) +static const pthread_key_t heap_key = __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0; +#else static pthread_key_t heap_key; -#if COMPILER(MSVC) +#endif +#if OS(WINDOWS) DWORD tlsIndex = TLS_OUT_OF_INDEXES; #endif static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap) { - // still do pthread_setspecific when using MSVC fast TLS to - // benefit from the delete callback. +#if USE(PTHREAD_GETSPECIFIC_DIRECT) + // Can't have two libraries both doing this in the same process, + // so check and make this crash right away. + if (pthread_getspecific(heap_key)) + CRASH(); +#endif + + // Still do pthread_setspecific even if there's an alternate form + // of thread-local storage in use, to benefit from the delete callback. pthread_setspecific(heap_key, heap); -#if COMPILER(MSVC) + +#if OS(WINDOWS) TlsSetValue(tlsIndex, heap); #endif } @@ -2063,7 +2613,7 @@ ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) { // The following check is expensive, so it is disabled by default if (false) { // Check that object does not occur in list - int got = 0; + unsigned got = 0; for (void* p = span->objects; p != NULL; p = *((void**) p)) { ASSERT(p != object); got++; @@ -2256,7 +2806,13 @@ ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() { if (span) pageheap->RegisterSizeClass(span, size_class_); } if (span == NULL) { +#if HAVE(ERRNO_H) MESSAGE("allocation failed: %d\n", errno); +#elif OS(WINDOWS) + MESSAGE("allocation failed: %d\n", ::GetLastError()); +#else + MESSAGE("allocation failed\n"); +#endif lock_.Lock(); return; } @@ -2279,7 +2835,7 @@ ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() { char* nptr; while ((nptr = ptr + size) <= limit) { *tail = ptr; - tail = reinterpret_cast(ptr); + tail = reinterpret_cast_ptr(ptr); ptr = nptr; num++; } @@ -2483,7 +3039,7 @@ void TCMalloc_ThreadCache::InitModule() { } pageheap->init(); phinited = 1; -#if defined(WTF_CHANGES) && PLATFORM(DARWIN) +#if defined(WTF_CHANGES) && OS(DARWIN) FastMallocZone::init(); #endif } @@ -2507,7 +3063,7 @@ inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() { // __thread is faster, but only when the kernel supports it if (KernelSupportsTLS()) return threadlocal_heap; -#elif COMPILER(MSVC) +#elif OS(WINDOWS) return static_cast(TlsGetValue(tlsIndex)); #else return static_cast(pthread_getspecific(heap_key)); @@ -2536,13 +3092,17 @@ inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() { void TCMalloc_ThreadCache::InitTSD() { ASSERT(!tsd_inited); +#if USE(PTHREAD_GETSPECIFIC_DIRECT) + pthread_key_init_np(heap_key, DestroyThreadCache); +#else pthread_key_create(&heap_key, DestroyThreadCache); -#if COMPILER(MSVC) +#endif +#if OS(WINDOWS) tlsIndex = TlsAlloc(); #endif tsd_inited = true; -#if !COMPILER(MSVC) +#if !OS(WINDOWS) // We may have used a fake pthread_t for the main thread. Fix it. pthread_t zero; memset(&zero, 0, sizeof(zero)); @@ -2553,7 +3113,7 @@ void TCMalloc_ThreadCache::InitTSD() { ASSERT(pageheap_lock.IsHeld()); #endif for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { -#if COMPILER(MSVC) +#if OS(WINDOWS) if (h->tid_ == 0) { h->tid_ = GetCurrentThreadId(); } @@ -2571,7 +3131,7 @@ TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() { { SpinLockHolder h(&pageheap_lock); -#if COMPILER(MSVC) +#if OS(WINDOWS) DWORD me; if (!tsd_inited) { me = 0; @@ -2592,7 +3152,7 @@ TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() { // In that case, the heap for this thread has already been created // and added to the linked list. So we search for that first. for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { -#if COMPILER(MSVC) +#if OS(WINDOWS) if (h->tid_ == me) { #else if (pthread_equal(h->tid_, me)) { @@ -2623,7 +3183,7 @@ void TCMalloc_ThreadCache::BecomeIdle() { if (heap->in_setspecific_) return; // Do not disturb the active caller heap->in_setspecific_ = true; - pthread_setspecific(heap_key, NULL); + setThreadHeap(NULL); #ifdef HAVE_TLS // Also update the copy in __thread threadlocal_heap = NULL; @@ -3250,14 +3810,14 @@ extern "C" #define do_malloc do_malloc template -void* malloc(size_t); +ALWAYS_INLINE void* malloc(size_t); void* fastMalloc(size_t size) { return malloc(size); } -void* tryFastMalloc(size_t size) +TryMallocReturnValue tryFastMalloc(size_t size) { return malloc(size); } @@ -3266,7 +3826,24 @@ template ALWAYS_INLINE #endif void* malloc(size_t size) { - void* result = do_malloc(size); +#if ENABLE(WTF_MALLOC_VALIDATION) + if (std::numeric_limits::max() - Internal::ValidationBufferSize <= size) // If overflow would occur... + return 0; + void* result = do_malloc(size + Internal::ValidationBufferSize); + if (!result) + return 0; + + Internal::ValidationHeader* header = static_cast(result); + header->m_size = size; + header->m_type = Internal::AllocTypeMalloc; + header->m_prefix = static_cast(Internal::ValidationPrefix); + result = header + 1; + *Internal::fastMallocValidationSuffix(result) = Internal::ValidationSuffix; + fastMallocValidate(result); +#else + void* result = do_malloc(size); +#endif + #ifndef WTF_CHANGES MallocHook::InvokeNewHook(result, size); #endif @@ -3280,39 +3857,68 @@ void free(void* ptr) { #ifndef WTF_CHANGES MallocHook::InvokeDeleteHook(ptr); #endif - do_free(ptr); + +#if ENABLE(WTF_MALLOC_VALIDATION) + if (!ptr) + return; + + fastMallocValidate(ptr); + Internal::ValidationHeader* header = Internal::fastMallocValidationHeader(ptr); + memset(ptr, 0xCC, header->m_size); + do_free(header); +#else + do_free(ptr); +#endif } #ifndef WTF_CHANGES extern "C" #else template -void* calloc(size_t, size_t); +ALWAYS_INLINE void* calloc(size_t, size_t); void* fastCalloc(size_t n, size_t elem_size) { - return calloc(n, elem_size); + void* result = calloc(n, elem_size); +#if ENABLE(WTF_MALLOC_VALIDATION) + fastMallocValidate(result); +#endif + return result; } -void* tryFastCalloc(size_t n, size_t elem_size) +TryMallocReturnValue tryFastCalloc(size_t n, size_t elem_size) { - return calloc(n, elem_size); + void* result = calloc(n, elem_size); +#if ENABLE(WTF_MALLOC_VALIDATION) + fastMallocValidate(result); +#endif + return result; } template ALWAYS_INLINE #endif void* calloc(size_t n, size_t elem_size) { - const size_t totalBytes = n * elem_size; + size_t totalBytes = n * elem_size; // Protect against overflow if (n > 1 && elem_size && (totalBytes / elem_size) != n) return 0; - - void* result = do_malloc(totalBytes); - if (result != NULL) { + +#if ENABLE(WTF_MALLOC_VALIDATION) + void* result = malloc(totalBytes); + if (!result) + return 0; + memset(result, 0, totalBytes); - } + fastMallocValidate(result); +#else + void* result = do_malloc(totalBytes); + if (result != NULL) { + memset(result, 0, totalBytes); + } +#endif + #ifndef WTF_CHANGES MallocHook::InvokeNewHook(result, totalBytes); #endif @@ -3336,16 +3942,30 @@ void cfree(void* ptr) { extern "C" #else template -void* realloc(void*, size_t); +ALWAYS_INLINE void* realloc(void*, size_t); void* fastRealloc(void* old_ptr, size_t new_size) { - return realloc(old_ptr, new_size); +#if ENABLE(WTF_MALLOC_VALIDATION) + fastMallocValidate(old_ptr); +#endif + void* result = realloc(old_ptr, new_size); +#if ENABLE(WTF_MALLOC_VALIDATION) + fastMallocValidate(result); +#endif + return result; } -void* tryFastRealloc(void* old_ptr, size_t new_size) +TryMallocReturnValue tryFastRealloc(void* old_ptr, size_t new_size) { - return realloc(old_ptr, new_size); +#if ENABLE(WTF_MALLOC_VALIDATION) + fastMallocValidate(old_ptr); +#endif + void* result = realloc(old_ptr, new_size); +#if ENABLE(WTF_MALLOC_VALIDATION) + fastMallocValidate(result); +#endif + return result; } template @@ -3353,9 +3973,13 @@ ALWAYS_INLINE #endif void* realloc(void* old_ptr, size_t new_size) { if (old_ptr == NULL) { +#if ENABLE(WTF_MALLOC_VALIDATION) + void* result = malloc(new_size); +#else void* result = do_malloc(new_size); #ifndef WTF_CHANGES MallocHook::InvokeNewHook(result, new_size); +#endif #endif return result; } @@ -3367,6 +3991,16 @@ void* realloc(void* old_ptr, size_t new_size) { return NULL; } +#if ENABLE(WTF_MALLOC_VALIDATION) + if (std::numeric_limits::max() - Internal::ValidationBufferSize <= new_size) // If overflow would occur... + return 0; + Internal::ValidationHeader* header = Internal::fastMallocValidationHeader(old_ptr); + fastMallocValidate(old_ptr); + old_ptr = header; + header->m_size = new_size; + new_size += Internal::ValidationBufferSize; +#endif + // Get the size of the old entry const PageID p = reinterpret_cast(old_ptr) >> kPageShift; size_t cl = pageheap->GetSizeClassIfCached(p); @@ -3403,8 +4037,16 @@ void* realloc(void* old_ptr, size_t new_size) { // that we already know the sizeclass of old_ptr. The benefit // would be small, so don't bother. do_free(old_ptr); +#if ENABLE(WTF_MALLOC_VALIDATION) + new_ptr = static_cast(new_ptr) + 1; + *Internal::fastMallocValidationSuffix(new_ptr) = Internal::ValidationSuffix; +#endif return new_ptr; } else { +#if ENABLE(WTF_MALLOC_VALIDATION) + old_ptr = static_cast(old_ptr) + 1; // Set old_ptr back to the user pointer. + *Internal::fastMallocValidationSuffix(old_ptr) = Internal::ValidationSuffix; +#endif return old_ptr; } } @@ -3453,6 +4095,8 @@ static inline void* cpp_alloc(size_t size, bool nothrow) { } } +#if ENABLE(GLOBAL_FASTMALLOC_NEW) + void* operator new(size_t size) { void* p = cpp_alloc(size, false); // We keep this next instruction out of cpp_alloc for a reason: when @@ -3507,6 +4151,8 @@ void operator delete[](void* p, const std::nothrow_t&) __THROW { do_free(p); } +#endif + extern "C" void* memalign(size_t align, size_t size) __THROW { void* result = do_memalign(align, size); MallocHook::InvokeNewHook(result, size); @@ -3574,7 +4220,7 @@ extern "C" struct mallinfo mallinfo(void) { #if defined(__GLIBC__) extern "C" { -# if defined(__GNUC__) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__) +#if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__) // Potentially faster variants that use the gcc alias extension. // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check. # define ALIAS(x) __attribute__ ((weak, alias (x))) @@ -3622,7 +4268,66 @@ void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; #endif -#if defined(WTF_CHANGES) && PLATFORM(DARWIN) +#ifdef WTF_CHANGES +void releaseFastMallocFreeMemory() +{ + // Flush free pages in the current thread cache back to the page heap. + // Low watermark mechanism in Scavenge() prevents full return on the first pass. + // The second pass flushes everything. + if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) { + threadCache->Scavenge(); + threadCache->Scavenge(); + } + + SpinLockHolder h(&pageheap_lock); + pageheap->ReleaseFreePages(); +} + +FastMallocStatistics fastMallocStatistics() +{ + FastMallocStatistics statistics; + + SpinLockHolder lockHolder(&pageheap_lock); + statistics.reservedVMBytes = static_cast(pageheap->SystemBytes()); + statistics.committedVMBytes = statistics.reservedVMBytes - pageheap->ReturnedBytes(); + + statistics.freeListBytes = 0; + for (unsigned cl = 0; cl < kNumClasses; ++cl) { + const int length = central_cache[cl].length(); + const int tc_length = central_cache[cl].tc_length(); + + statistics.freeListBytes += ByteSizeForClass(cl) * (length + tc_length); + } + for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_) + statistics.freeListBytes += threadCache->Size(); + + return statistics; +} + +size_t fastMallocSize(const void* ptr) +{ +#if ENABLE(WTF_MALLOC_VALIDATION) + return Internal::fastMallocValidationHeader(const_cast(ptr))->m_size; +#else + const PageID p = reinterpret_cast(ptr) >> kPageShift; + Span* span = pageheap->GetDescriptorEnsureSafe(p); + + if (!span || span->free) + return 0; + + for (void* free = span->objects; free != NULL; free = *((void**) free)) { + if (ptr == free) + return 0; + } + + if (size_t cl = span->sizeclass) + return ByteSizeForClass(cl); + + return span->length << kPageShift; +#endif +} + +#if OS(DARWIN) class FreeObjectFinder { const RemoteMemoryReader& m_reader; @@ -3633,6 +4338,7 @@ public: void visit(void* ptr) { m_freeObjects.add(ptr); } bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); } + bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast(ptr)); } size_t freeObjectCount() const { return m_freeObjects.size(); } void findFreeObjects(TCMalloc_ThreadCache* threadCache) @@ -3664,12 +4370,15 @@ public: return 1; Span* span = m_reader(reinterpret_cast(ptr)); + if (!span) + return 1; + if (span->free) { void* ptr = reinterpret_cast(span->start << kPageShift); m_freeObjectFinder.visit(ptr); } else if (span->sizeclass) { // Walk the free list of the small-object span, keeping track of each object seen - for (void* nextObject = span->objects; nextObject; nextObject = *m_reader(reinterpret_cast(nextObject))) + for (void* nextObject = span->objects; nextObject; nextObject = m_reader.nextEntryInLinkedList(reinterpret_cast(nextObject))) m_freeObjectFinder.visit(nextObject); } return span->length; @@ -3683,7 +4392,9 @@ class PageMapMemoryUsageRecorder { vm_range_recorder_t* m_recorder; const RemoteMemoryReader& m_reader; const FreeObjectFinder& m_freeObjectFinder; - mutable HashSet m_seenPointers; + + HashSet m_seenPointers; + Vector m_coalescedSpans; public: PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder) @@ -3695,51 +4406,133 @@ public: , m_freeObjectFinder(freeObjectFinder) { } - int visit(void* ptr) const + ~PageMapMemoryUsageRecorder() + { + ASSERT(!m_coalescedSpans.size()); + } + + void recordPendingRegions() + { + Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; + vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 }; + ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize); + + // Mark the memory region the spans represent as a candidate for containing pointers + if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE) + (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1); + + if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) { + m_coalescedSpans.clear(); + return; + } + + Vector allocatedPointers; + for (size_t i = 0; i < m_coalescedSpans.size(); ++i) { + Span *theSpan = m_coalescedSpans[i]; + if (theSpan->free) + continue; + + vm_address_t spanStartAddress = theSpan->start << kPageShift; + vm_size_t spanSizeInBytes = theSpan->length * kPageSize; + + if (!theSpan->sizeclass) { + // If it's an allocated large object span, mark it as in use + if (!m_freeObjectFinder.isFreeObject(spanStartAddress)) + allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes}); + } else { + const size_t objectSize = ByteSizeForClass(theSpan->sizeclass); + + // Mark each allocated small object within the span as in use + const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes; + for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) { + if (!m_freeObjectFinder.isFreeObject(object)) + allocatedPointers.append((vm_range_t){object, objectSize}); + } + } + } + + (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size()); + + m_coalescedSpans.clear(); + } + + int visit(void* ptr) { if (!ptr) return 1; Span* span = m_reader(reinterpret_cast(ptr)); + if (!span || !span->start) + return 1; + if (m_seenPointers.contains(ptr)) return span->length; m_seenPointers.add(ptr); - // Mark the memory used for the Span itself as an administrative region - vm_range_t ptrRange = { reinterpret_cast(ptr), sizeof(Span) }; - if (m_typeMask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) - (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, &ptrRange, 1); + if (!m_coalescedSpans.size()) { + m_coalescedSpans.append(span); + return span->length; + } - ptrRange.address = span->start << kPageShift; - ptrRange.size = span->length * kPageSize; + Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; + vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift; + vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize; - // Mark the memory region the span represents as candidates for containing pointers - if (m_typeMask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) - (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1); + // If the new span is adjacent to the previous span, do nothing for now. + vm_address_t spanStartAddress = span->start << kPageShift; + if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) { + m_coalescedSpans.append(span); + return span->length; + } - if (!span->free && (m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) { - // If it's an allocated large object span, mark it as in use - if (span->sizeclass == 0 && !m_freeObjectFinder.isFreeObject(reinterpret_cast(ptrRange.address))) - (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, &ptrRange, 1); - else if (span->sizeclass) { - const size_t byteSize = ByteSizeForClass(span->sizeclass); - unsigned totalObjects = (span->length << kPageShift) / byteSize; - ASSERT(span->refcount <= totalObjects); - char* ptr = reinterpret_cast(span->start << kPageShift); + // New span is not adjacent to previous span, so record the spans coalesced so far. + recordPendingRegions(); + m_coalescedSpans.append(span); - // Mark each allocated small object within the span as in use - for (unsigned i = 0; i < totalObjects; i++) { - char* thisObject = ptr + (i * byteSize); - if (m_freeObjectFinder.isFreeObject(thisObject)) - continue; + return span->length; + } +}; - vm_range_t objectRange = { reinterpret_cast(thisObject), byteSize }; - (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, &objectRange, 1); - } - } +class AdminRegionRecorder { + task_t m_task; + void* m_context; + unsigned m_typeMask; + vm_range_recorder_t* m_recorder; + const RemoteMemoryReader& m_reader; + + Vector m_pendingRegions; + +public: + AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader) + : m_task(task) + , m_context(context) + , m_typeMask(typeMask) + , m_recorder(recorder) + , m_reader(reader) + { } + + void recordRegion(vm_address_t ptr, size_t size) + { + if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE) + m_pendingRegions.append((vm_range_t){ ptr, size }); + } + + void visit(void *ptr, size_t size) + { + recordRegion(reinterpret_cast(ptr), size); + } + + void recordPendingRegions() + { + if (m_pendingRegions.size()) { + (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size()); + m_pendingRegions.clear(); } + } - return span->length; + ~AdminRegionRecorder() + { + ASSERT(!m_pendingRegions.size()); } }; @@ -3762,10 +4555,22 @@ kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typ TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_; PageMapFreeObjectFinder pageMapFinder(memoryReader, finder); - pageMap->visit(pageMapFinder, memoryReader); + pageMap->visitValues(pageMapFinder, memoryReader); PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder); - pageMap->visit(usageRecorder, memoryReader); + pageMap->visitValues(usageRecorder, memoryReader); + usageRecorder.recordPendingRegions(); + + AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader); + pageMap->visitAllocations(adminRegionRecorder, memoryReader); + + PageHeapAllocator* spanAllocator = memoryReader(mzone->m_spanAllocator); + PageHeapAllocator* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator); + + spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); + pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); + + adminRegionRecorder.recordPendingRegions(); return 0; } @@ -3806,15 +4611,23 @@ void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t) extern "C" { malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print, - &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics }; + &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics + + , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher. + , 0, 0, 0, 0 // These members will not be used unless the zone advertises itself as version seven or higher. + + }; } -FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches) +FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator* spanAllocator, PageHeapAllocator* pageHeapAllocator) : m_pageHeap(pageHeap) , m_threadHeaps(threadHeaps) , m_centralCaches(centralCaches) + , m_spanAllocator(spanAllocator) + , m_pageHeapAllocator(pageHeapAllocator) { memset(&m_zone, 0, sizeof(m_zone)); + m_zone.version = 4; m_zone.zone_name = "JavaScriptCore FastMalloc"; m_zone.size = &FastMallocZone::size; m_zone.malloc = &FastMallocZone::zoneMalloc; @@ -3830,47 +4643,12 @@ FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache void FastMallocZone::init() { - static FastMallocZone zone(pageheap, &thread_heaps, static_cast(central_cache)); + static FastMallocZone zone(pageheap, &thread_heaps, static_cast(central_cache), &span_allocator, &threadheap_allocator); } -#endif - -#if WTF_CHANGES -void releaseFastMallocFreeMemory() -{ - // Flush free pages in the current thread cache back to the page heap. - // Low watermark mechanism in Scavenge() prevents full return on the first pass. - // The second pass flushes everything. - if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) { - threadCache->Scavenge(); - threadCache->Scavenge(); - } - - SpinLockHolder h(&pageheap_lock); - pageheap->ReleaseFreePages(); -} - -FastMallocStatistics fastMallocStatistics() -{ - FastMallocStatistics statistics; - { - SpinLockHolder lockHolder(&pageheap_lock); - statistics.heapSize = static_cast(pageheap->SystemBytes()); - statistics.freeSizeInHeap = static_cast(pageheap->FreeBytes()); - statistics.returnedSize = pageheap->ReturnedBytes(); - statistics.freeSizeInCaches = 0; - for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_) - statistics.freeSizeInCaches += threadCache->Size(); - } - for (unsigned cl = 0; cl < kNumClasses; ++cl) { - const int length = central_cache[cl].length(); - const int tc_length = central_cache[cl].tc_length(); - statistics.freeSizeInCaches += ByteSizeForClass(cl) * (length + tc_length); - } - return statistics; -} +#endif // OS(DARWIN) } // namespace WTF -#endif +#endif // WTF_CHANGES #endif // FORCE_SYSTEM_MALLOC