// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- // Copyright (c) 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // --- // Author: Sanjay Ghemawat #include #ifdef HAVE_INTTYPES_H #include // for PRIuPTR #endif #include // for ENOMEM, errno #include // for MallocRange, etc #include "base/basictypes.h" #include "base/commandlineflags.h" #include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc #include "page_heap_allocator.h" // for PageHeapAllocator #include "static_vars.h" // for Static #include "system-alloc.h" // for TCMalloc_SystemAlloc, etc DEFINE_double(tcmalloc_release_rate, EnvToDouble("TCMALLOC_RELEASE_RATE", 1.0), "Rate at which we release unused memory to the system. " "Zero means we never release memory back to the system. " "Increase this flag to return memory faster; decrease it " "to return memory slower. Reasonable rates are in the " "range [0,10]"); DEFINE_int64(tcmalloc_heap_limit_mb, EnvToInt("TCMALLOC_HEAP_LIMIT_MB", 0), "Limit total size of the process heap to the " "specified number of MiB. " "When we approach the limit the memory is released " "to the system more aggressively (more minor page faults). " "Zero means to allocate as long as system allows."); namespace tcmalloc { PageHeap::PageHeap() : pagemap_(MetaDataAlloc), pagemap_cache_(0), scavenge_counter_(0), // Start scavenging at kMaxPages list release_index_(kMaxPages), aggressive_decommit_(false) { COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); DLL_Init(&large_.normal); DLL_Init(&large_.returned); for (int i = 0; i < kMaxPages; i++) { DLL_Init(&free_[i].normal); DLL_Init(&free_[i].returned); } } Span* PageHeap::SearchFreeAndLargeLists(Length n) { ASSERT(Check()); ASSERT(n > 0); // Find first size >= n that has a non-empty list for (Length s = n; s < kMaxPages; s++) { Span* ll = &free_[s].normal; // If we're lucky, ll is non-empty, meaning it has a suitable span. if (!DLL_IsEmpty(ll)) { ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST); return Carve(ll->next, n); } // Alternatively, maybe there's a usable returned span. ll = &free_[s].returned; if (!DLL_IsEmpty(ll)) { // We did not call EnsureLimit before, to avoid releasing the span // that will be taken immediately back. // Calling EnsureLimit here is not very expensive, as it fails only if // there is no more normal spans (and it fails efficiently) // or SystemRelease does not work (there is probably no returned spans). if (EnsureLimit(n)) { // ll may have became empty due to coalescing if (!DLL_IsEmpty(ll)) { ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST); return Carve(ll->next, n); } } } } // No luck in free lists, our last chance is in a larger class. return AllocLarge(n); // May be NULL } static const size_t kForcedCoalesceInterval = 128*1024*1024; Span* PageHeap::New(Length n) { ASSERT(Check()); ASSERT(n > 0); Span* result = SearchFreeAndLargeLists(n); if (result != NULL) return result; if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0 && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4 && (stats_.system_bytes / kForcedCoalesceInterval != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) { // We're about to grow heap, but there are lots of free pages. // tcmalloc's design decision to keep unmapped and free spans // separately and never coalesce them means that sometimes there // can be free pages span of sufficient size, but it consists of // "segments" of different type so page heap search cannot find // it. In order to prevent growing heap and wasting memory in such // case we're going to unmap all free pages. So that all free // spans are maximally coalesced. // // We're also limiting 'rate' of going into this path to be at // most once per 128 megs of heap growth. Otherwise programs that // grow heap frequently (and that means by small amount) could be // penalized with higher count of minor page faults. // // See also large_heap_fragmentation_unittest.cc and // https://code.google.com/p/gperftools/issues/detail?id=368 ReleaseAtLeastNPages(static_cast(0x7fffffff)); // then try again. If we are forced to grow heap because of large // spans fragmentation and not because of problem described above, // then at the very least we've just unmapped free but // insufficiently big large spans back to OS. So in case of really // unlucky memory fragmentation we'll be consuming virtual address // space, but not real memory result = SearchFreeAndLargeLists(n); if (result != NULL) return result; } // Grow the heap and try again. if (!GrowHeap(n)) { ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); ASSERT(Check()); // underlying SysAllocator likely set ENOMEM but we can get here // due to EnsureLimit so we set it here too. // // Setting errno to ENOMEM here allows us to avoid dealing with it // in fast-path. errno = ENOMEM; return NULL; } return SearchFreeAndLargeLists(n); } Span* PageHeap::AllocLarge(Length n) { // find the best span (closest to n in size). // The following loops implements address-ordered best-fit. Span *best = NULL; // Search through normal list for (Span* span = large_.normal.next; span != &large_.normal; span = span->next) { if (span->length >= n) { if ((best == NULL) || (span->length < best->length) || ((span->length == best->length) && (span->start < best->start))) { best = span; ASSERT(best->location == Span::ON_NORMAL_FREELIST); } } } Span *bestNormal = best; // Search through released list in case it has a better fit for (Span* span = large_.returned.next; span != &large_.returned; span = span->next) { if (span->length >= n) { if ((best == NULL) || (span->length < best->length) || ((span->length == best->length) && (span->start < best->start))) { best = span; ASSERT(best->location == Span::ON_RETURNED_FREELIST); } } } if (best == bestNormal) { return best == NULL ? NULL : Carve(best, n); } // best comes from returned list. if (EnsureLimit(n, false)) { return Carve(best, n); } if (EnsureLimit(n, true)) { // best could have been destroyed by coalescing. // bestNormal is not a best-fit, and it could be destroyed as well. // We retry, the limit is already ensured: return AllocLarge(n); } // If bestNormal existed, EnsureLimit would succeeded: ASSERT(bestNormal == NULL); // We are not allowed to take best from returned list. return NULL; } Span* PageHeap::Split(Span* span, Length n) { ASSERT(0 < n); ASSERT(n < span->length); ASSERT(span->location == Span::IN_USE); ASSERT(span->sizeclass == 0); Event(span, 'T', n); const int extra = span->length - n; Span* leftover = NewSpan(span->start + n, extra); ASSERT(leftover->location == Span::IN_USE); Event(leftover, 'U', extra); RecordSpan(leftover); pagemap_.set(span->start + n - 1, span); // Update map from pageid to span span->length = n; return leftover; } void PageHeap::CommitSpan(Span* span) { TCMalloc_SystemCommit(reinterpret_cast(span->start << kPageShift), static_cast(span->length << kPageShift)); stats_.committed_bytes += span->length << kPageShift; } bool PageHeap::DecommitSpan(Span* span) { bool rv = TCMalloc_SystemRelease(reinterpret_cast(span->start << kPageShift), static_cast(span->length << kPageShift)); if (rv) { stats_.committed_bytes -= span->length << kPageShift; } return rv; } Span* PageHeap::Carve(Span* span, Length n) { ASSERT(n > 0); ASSERT(span->location != Span::IN_USE); const int old_location = span->location; RemoveFromFreeList(span); span->location = Span::IN_USE; Event(span, 'A', n); const int extra = span->length - n; ASSERT(extra >= 0); if (extra > 0) { Span* leftover = NewSpan(span->start + n, extra); leftover->location = old_location; Event(leftover, 'S', extra); RecordSpan(leftover); // The previous span of |leftover| was just splitted -- no need to // coalesce them. The next span of |leftover| was not previously coalesced // with |span|, i.e. is NULL or has got location other than |old_location|. #ifndef NDEBUG const PageID p = leftover->start; const Length len = leftover->length; Span* next = GetDescriptor(p+len); ASSERT (next == NULL || next->location == Span::IN_USE || next->location != leftover->location); #endif PrependToFreeList(leftover); // Skip coalescing - no candidates possible span->length = n; pagemap_.set(span->start + n - 1, span); } ASSERT(Check()); if (old_location == Span::ON_RETURNED_FREELIST) { // We need to recommit this address space. CommitSpan(span); } ASSERT(span->location == Span::IN_USE); ASSERT(span->length == n); ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); return span; } void PageHeap::Delete(Span* span) { ASSERT(Check()); ASSERT(span->location == Span::IN_USE); ASSERT(span->length > 0); ASSERT(GetDescriptor(span->start) == span); ASSERT(GetDescriptor(span->start + span->length - 1) == span); const Length n = span->length; span->sizeclass = 0; span->sample = 0; span->location = Span::ON_NORMAL_FREELIST; Event(span, 'D', span->length); MergeIntoFreeList(span); // Coalesces if possible IncrementalScavenge(n); ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); ASSERT(Check()); } bool PageHeap::MayMergeSpans(Span *span, Span *other) { if (aggressive_decommit_) { return other->location != Span::IN_USE; } return span->location == other->location; } void PageHeap::MergeIntoFreeList(Span* span) { ASSERT(span->location != Span::IN_USE); // Coalesce -- we guarantee that "p" != 0, so no bounds checking // necessary. We do not bother resetting the stale pagemap // entries for the pieces we are merging together because we only // care about the pagemap entries for the boundaries. // // Note: depending on aggressive_decommit_ mode we allow only // similar spans to be coalesced. // // The following applies if aggressive_decommit_ is enabled: // // Note that the adjacent spans we merge into "span" may come out of a // "normal" (committed) list, and cleanly merge with our IN_USE span, which // is implicitly committed. If the adjacents spans are on the "returned" // (decommitted) list, then we must get both spans into the same state before // or after we coalesce them. The current code always decomits. This is // achieved by blindly decommitting the entire coalesced region, which may // include any combination of committed and decommitted spans, at the end of // the method. // TODO(jar): "Always decommit" causes some extra calls to commit when we are // called in GrowHeap() during an allocation :-/. We need to eval the cost of // that oscillation, and possibly do something to reduce it. // TODO(jar): We need a better strategy for deciding to commit, or decommit, // based on memory usage and free heap sizes. uint64_t temp_committed = 0; const PageID p = span->start; const Length n = span->length; Span* prev = GetDescriptor(p-1); if (prev != NULL && MayMergeSpans(span, prev)) { // Merge preceding span into this span ASSERT(prev->start + prev->length == p); const Length len = prev->length; if (aggressive_decommit_ && prev->location == Span::ON_RETURNED_FREELIST) { // We're about to put the merge span into the returned freelist and call // DecommitSpan() on it, which will mark the entire span including this // one as released and decrease stats_.committed_bytes by the size of the // merged span. To make the math work out we temporarily increase the // stats_.committed_bytes amount. temp_committed = prev->length << kPageShift; } RemoveFromFreeList(prev); DeleteSpan(prev); span->start -= len; span->length += len; pagemap_.set(span->start, span); Event(span, 'L', len); } Span* next = GetDescriptor(p+n); if (next != NULL && MayMergeSpans(span, next)) { // Merge next span into this span ASSERT(next->start == p+n); const Length len = next->length; if (aggressive_decommit_ && next->location == Span::ON_RETURNED_FREELIST) { // See the comment below 'if (prev->location ...' for explanation. temp_committed += next->length << kPageShift; } RemoveFromFreeList(next); DeleteSpan(next); span->length += len; pagemap_.set(span->start + span->length - 1, span); Event(span, 'R', len); } if (aggressive_decommit_) { if (DecommitSpan(span)) { span->location = Span::ON_RETURNED_FREELIST; stats_.committed_bytes += temp_committed; } else { ASSERT(temp_committed == 0); } } PrependToFreeList(span); } void PageHeap::PrependToFreeList(Span* span) { ASSERT(span->location != Span::IN_USE); SpanList* list = (span->length < kMaxPages) ? &free_[span->length] : &large_; if (span->location == Span::ON_NORMAL_FREELIST) { stats_.free_bytes += (span->length << kPageShift); DLL_Prepend(&list->normal, span); } else { stats_.unmapped_bytes += (span->length << kPageShift); DLL_Prepend(&list->returned, span); } } void PageHeap::RemoveFromFreeList(Span* span) { ASSERT(span->location != Span::IN_USE); if (span->location == Span::ON_NORMAL_FREELIST) { stats_.free_bytes -= (span->length << kPageShift); } else { stats_.unmapped_bytes -= (span->length << kPageShift); } DLL_Remove(span); } void PageHeap::IncrementalScavenge(Length n) { // Fast path; not yet time to release memory scavenge_counter_ -= n; if (scavenge_counter_ >= 0) return; // Not yet time to scavenge const double rate = FLAGS_tcmalloc_release_rate; if (rate <= 1e-6) { // Tiny release rate means that releasing is disabled. scavenge_counter_ = kDefaultReleaseDelay; return; } Length released_pages = ReleaseAtLeastNPages(1); if (released_pages == 0) { // Nothing to scavenge, delay for a while. scavenge_counter_ = kDefaultReleaseDelay; } else { // Compute how long to wait until we return memory. // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages // after releasing one page. const double mult = 1000.0 / rate; double wait = mult * static_cast(released_pages); if (wait > kMaxReleaseDelay) { // Avoid overflow and bound to reasonable range. wait = kMaxReleaseDelay; } scavenge_counter_ = static_cast(wait); } } Length PageHeap::ReleaseLastNormalSpan(SpanList* slist) { Span* s = slist->normal.prev; ASSERT(s->location == Span::ON_NORMAL_FREELIST); if (DecommitSpan(s)) { RemoveFromFreeList(s); const Length n = s->length; s->location = Span::ON_RETURNED_FREELIST; MergeIntoFreeList(s); // Coalesces if possible. return n; } return 0; } Length PageHeap::ReleaseAtLeastNPages(Length num_pages) { Length released_pages = 0; // Round robin through the lists of free spans, releasing the last // span in each list. Stop after releasing at least num_pages // or when there is nothing more to release. while (released_pages < num_pages && stats_.free_bytes > 0) { for (int i = 0; i < kMaxPages+1 && released_pages < num_pages; i++, release_index_++) { if (release_index_ > kMaxPages) release_index_ = 0; SpanList* slist = (release_index_ == kMaxPages) ? &large_ : &free_[release_index_]; if (!DLL_IsEmpty(&slist->normal)) { Length released_len = ReleaseLastNormalSpan(slist); // Some systems do not support release if (released_len == 0) return released_pages; released_pages += released_len; } } } return released_pages; } bool PageHeap::EnsureLimit(Length n, bool withRelease) { Length limit = (FLAGS_tcmalloc_heap_limit_mb*1024*1024) >> kPageShift; if (limit == 0) return true; //there is no limit // We do not use stats_.system_bytes because it does not take // MetaDataAllocs into account. Length takenPages = TCMalloc_SystemTaken >> kPageShift; //XXX takenPages may be slightly bigger than limit for two reasons: //* MetaDataAllocs ignore the limit (it is not easy to handle // out of memory there) //* sys_alloc may round allocation up to huge page size, // although smaller limit was ensured ASSERT(takenPages >= stats_.unmapped_bytes >> kPageShift); takenPages -= stats_.unmapped_bytes >> kPageShift; if (takenPages + n > limit && withRelease) { takenPages -= ReleaseAtLeastNPages(takenPages + n - limit); } return takenPages + n <= limit; } void PageHeap::RegisterSizeClass(Span* span, size_t sc) { // Associate span object with all interior pages as well ASSERT(span->location == Span::IN_USE); ASSERT(GetDescriptor(span->start) == span); ASSERT(GetDescriptor(span->start+span->length-1) == span); Event(span, 'C', sc); span->sizeclass = sc; for (Length i = 1; i < span->length-1; i++) { pagemap_.set(span->start+i, span); } } void PageHeap::GetSmallSpanStats(SmallSpanStats* result) { for (int s = 0; s < kMaxPages; s++) { result->normal_length[s] = DLL_Length(&free_[s].normal); result->returned_length[s] = DLL_Length(&free_[s].returned); } } void PageHeap::GetLargeSpanStats(LargeSpanStats* result) { result->spans = 0; result->normal_pages = 0; result->returned_pages = 0; for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) { result->normal_pages += s->length;; result->spans++; } for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) { result->returned_pages += s->length; result->spans++; } } bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) { Span* span = reinterpret_cast(pagemap_.Next(start)); if (span == NULL) { return false; } r->address = span->start << kPageShift; r->length = span->length << kPageShift; r->fraction = 0; switch (span->location) { case Span::IN_USE: r->type = base::MallocRange::INUSE; r->fraction = 1; if (span->sizeclass > 0) { // Only some of the objects in this span may be in use. const size_t osize = Static::sizemap()->class_to_size(span->sizeclass); r->fraction = (1.0 * osize * span->refcount) / r->length; } break; case Span::ON_NORMAL_FREELIST: r->type = base::MallocRange::FREE; break; case Span::ON_RETURNED_FREELIST: r->type = base::MallocRange::UNMAPPED; break; default: r->type = base::MallocRange::UNKNOWN; break; } return true; } static void RecordGrowth(size_t growth) { StackTrace* t = Static::stacktrace_allocator()->New(); t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3); t->size = growth; t->stack[kMaxStackDepth-1] = reinterpret_cast(Static::growth_stacks()); Static::set_growth_stacks(t); } bool PageHeap::GrowHeap(Length n) { ASSERT(kMaxPages >= kMinSystemAlloc); if (n > kMaxValidPages) return false; Length ask = (n>kMinSystemAlloc) ? n : static_cast(kMinSystemAlloc); size_t actual_size; void* ptr = NULL; if (EnsureLimit(ask)) { ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); } if (ptr == NULL) { if (n < ask) { // Try growing just "n" pages ask = n; if (EnsureLimit(ask)) { ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); } } if (ptr == NULL) return false; } ask = actual_size >> kPageShift; RecordGrowth(ask << kPageShift); uint64_t old_system_bytes = stats_.system_bytes; stats_.system_bytes += (ask << kPageShift); stats_.committed_bytes += (ask << kPageShift); const PageID p = reinterpret_cast(ptr) >> kPageShift; ASSERT(p > 0); // If we have already a lot of pages allocated, just pre allocate a bunch of // memory for the page map. This prevents fragmentation by pagemap metadata // when a program keeps allocating and freeing large blocks. if (old_system_bytes < kPageMapBigAllocationThreshold && stats_.system_bytes >= kPageMapBigAllocationThreshold) { pagemap_.PreallocateMoreMemory(); } // Make sure pagemap_ has entries for all of the new pages. // Plus ensure one before and one after so coalescing code // does not need bounds-checking. if (pagemap_.Ensure(p-1, ask+2)) { // Pretend the new area is allocated and then Delete() it to cause // any necessary coalescing to occur. Span* span = NewSpan(p, ask); RecordSpan(span); Delete(span); ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); ASSERT(Check()); return true; } else { // We could not allocate memory within "pagemap_" // TODO: Once we can return memory to the system, return the new span return false; } } bool PageHeap::Check() { ASSERT(free_[0].normal.next == &free_[0].normal); ASSERT(free_[0].returned.next == &free_[0].returned); return true; } bool PageHeap::CheckExpensive() { bool result = Check(); CheckList(&large_.normal, kMaxPages, 1000000000, Span::ON_NORMAL_FREELIST); CheckList(&large_.returned, kMaxPages, 1000000000, Span::ON_RETURNED_FREELIST); for (Length s = 1; s < kMaxPages; s++) { CheckList(&free_[s].normal, s, s, Span::ON_NORMAL_FREELIST); CheckList(&free_[s].returned, s, s, Span::ON_RETURNED_FREELIST); } return result; } bool PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, int freelist) { for (Span* s = list->next; s != list; s = s->next) { CHECK_CONDITION(s->location == freelist); // NORMAL or RETURNED CHECK_CONDITION(s->length >= min_pages); CHECK_CONDITION(s->length <= max_pages); CHECK_CONDITION(GetDescriptor(s->start) == s); CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); } return true; } } // namespace tcmalloc