thread_cache.cc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
  2. // Copyright (c) 2008, Google Inc.
  3. // All rights reserved.
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // ---
  31. // Author: Ken Ashcraft <opensource@google.com>
  32. #include <config.h>
  33. #include "thread_cache.h"
  34. #include <errno.h>
  35. #include <string.h> // for memcpy
  36. #include <algorithm> // for max, min
  37. #include "base/commandlineflags.h" // for SpinLockHolder
  38. #include "base/spinlock.h" // for SpinLockHolder
  39. #include "getenv_safe.h" // for TCMallocGetenvSafe
  40. #include "central_freelist.h" // for CentralFreeListPadded
  41. #include "maybe_threads.h"
  42. using std::min;
  43. using std::max;
  44. // Note: this is initialized manually in InitModule to ensure that
  45. // it's configured at right time
  46. //
  47. // DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
  48. // EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
  49. // kDefaultOverallThreadCacheSize),
  50. // "Bound on the total amount of bytes allocated to "
  51. // "thread caches. This bound is not strict, so it is possible "
  52. // "for the cache to go over this bound in certain circumstances. "
  53. // "Maximum value of this flag is capped to 1 GB.");
  54. namespace tcmalloc {
  55. static bool phinited = false;
  56. volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
  57. size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
  58. ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
  59. PageHeapAllocator<ThreadCache> threadcache_allocator;
  60. ThreadCache* ThreadCache::thread_heaps_ = NULL;
  61. int ThreadCache::thread_heap_count_ = 0;
  62. ThreadCache* ThreadCache::next_memory_steal_ = NULL;
  63. #ifdef HAVE_TLS
  64. __thread ThreadCache::ThreadLocalData ThreadCache::threadlocal_data_
  65. ATTR_INITIAL_EXEC
  66. = {0, 0};
  67. #endif
  68. bool ThreadCache::tsd_inited_ = false;
  69. pthread_key_t ThreadCache::heap_key_;
  70. void ThreadCache::Init(pthread_t tid) {
  71. size_ = 0;
  72. max_size_ = 0;
  73. IncreaseCacheLimitLocked();
  74. if (max_size_ == 0) {
  75. // There isn't enough memory to go around. Just give the minimum to
  76. // this thread.
  77. max_size_ = kMinThreadCacheSize;
  78. // Take unclaimed_cache_space_ negative.
  79. unclaimed_cache_space_ -= kMinThreadCacheSize;
  80. ASSERT(unclaimed_cache_space_ < 0);
  81. }
  82. next_ = NULL;
  83. prev_ = NULL;
  84. tid_ = tid;
  85. in_setspecific_ = false;
  86. for (size_t cl = 0; cl < kNumClasses; ++cl) {
  87. list_[cl].Init();
  88. }
  89. uint32_t sampler_seed;
  90. memcpy(&sampler_seed, &tid, sizeof(sampler_seed));
  91. sampler_.Init(sampler_seed);
  92. }
  93. void ThreadCache::Cleanup() {
  94. // Put unused memory back into central cache
  95. for (int cl = 0; cl < kNumClasses; ++cl) {
  96. if (list_[cl].length() > 0) {
  97. ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
  98. }
  99. }
  100. }
  101. // Remove some objects of class "cl" from central cache and add to thread heap.
  102. // On success, return the first object for immediate use; otherwise return NULL.
  103. void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) {
  104. FreeList* list = &list_[cl];
  105. ASSERT(list->empty());
  106. const int batch_size = Static::sizemap()->num_objects_to_move(cl);
  107. const int num_to_move = min<int>(list->max_length(), batch_size);
  108. void *start, *end;
  109. int fetch_count = Static::central_cache()[cl].RemoveRange(
  110. &start, &end, num_to_move);
  111. ASSERT((start == NULL) == (fetch_count == 0));
  112. if (--fetch_count >= 0) {
  113. size_ += byte_size * fetch_count;
  114. list->PushRange(fetch_count, SLL_Next(start), end);
  115. }
  116. // Increase max length slowly up to batch_size. After that,
  117. // increase by batch_size in one shot so that the length is a
  118. // multiple of batch_size.
  119. if (list->max_length() < batch_size) {
  120. list->set_max_length(list->max_length() + 1);
  121. } else {
  122. // Don't let the list get too long. In 32 bit builds, the length
  123. // is represented by a 16 bit int, so we need to watch out for
  124. // integer overflow.
  125. int new_length = min<int>(list->max_length() + batch_size,
  126. kMaxDynamicFreeListLength);
  127. // The list's max_length must always be a multiple of batch_size,
  128. // and kMaxDynamicFreeListLength is not necessarily a multiple
  129. // of batch_size.
  130. new_length -= new_length % batch_size;
  131. ASSERT(new_length % batch_size == 0);
  132. list->set_max_length(new_length);
  133. }
  134. return start;
  135. }
  136. void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
  137. const int batch_size = Static::sizemap()->num_objects_to_move(cl);
  138. ReleaseToCentralCache(list, cl, batch_size);
  139. // If the list is too long, we need to transfer some number of
  140. // objects to the central cache. Ideally, we would transfer
  141. // num_objects_to_move, so the code below tries to make max_length
  142. // converge on num_objects_to_move.
  143. if (list->max_length() < batch_size) {
  144. // Slow start the max_length so we don't overreserve.
  145. list->set_max_length(list->max_length() + 1);
  146. } else if (list->max_length() > batch_size) {
  147. // If we consistently go over max_length, shrink max_length. If we don't
  148. // shrink it, some amount of memory will always stay in this freelist.
  149. list->set_length_overages(list->length_overages() + 1);
  150. if (list->length_overages() > kMaxOverages) {
  151. ASSERT(list->max_length() > batch_size);
  152. list->set_max_length(list->max_length() - batch_size);
  153. list->set_length_overages(0);
  154. }
  155. }
  156. }
  157. // Remove some objects of class "cl" from thread heap and add to central cache
  158. void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) {
  159. ASSERT(src == &list_[cl]);
  160. if (N > src->length()) N = src->length();
  161. size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl);
  162. // We return prepackaged chains of the correct size to the central cache.
  163. // TODO: Use the same format internally in the thread caches?
  164. int batch_size = Static::sizemap()->num_objects_to_move(cl);
  165. while (N > batch_size) {
  166. void *tail, *head;
  167. src->PopRange(batch_size, &head, &tail);
  168. Static::central_cache()[cl].InsertRange(head, tail, batch_size);
  169. N -= batch_size;
  170. }
  171. void *tail, *head;
  172. src->PopRange(N, &head, &tail);
  173. Static::central_cache()[cl].InsertRange(head, tail, N);
  174. size_ -= delta_bytes;
  175. }
  176. // Release idle memory to the central cache
  177. void ThreadCache::Scavenge() {
  178. // If the low-water mark for the free list is L, it means we would
  179. // not have had to allocate anything from the central cache even if
  180. // we had reduced the free list size by L. We aim to get closer to
  181. // that situation by dropping L/2 nodes from the free list. This
  182. // may not release much memory, but if so we will call scavenge again
  183. // pretty soon and the low-water marks will be high on that call.
  184. for (int cl = 0; cl < kNumClasses; cl++) {
  185. FreeList* list = &list_[cl];
  186. const int lowmark = list->lowwatermark();
  187. if (lowmark > 0) {
  188. const int drop = (lowmark > 1) ? lowmark/2 : 1;
  189. ReleaseToCentralCache(list, cl, drop);
  190. // Shrink the max length if it isn't used. Only shrink down to
  191. // batch_size -- if the thread was active enough to get the max_length
  192. // above batch_size, it will likely be that active again. If
  193. // max_length shinks below batch_size, the thread will have to
  194. // go through the slow-start behavior again. The slow-start is useful
  195. // mainly for threads that stay relatively idle for their entire
  196. // lifetime.
  197. const int batch_size = Static::sizemap()->num_objects_to_move(cl);
  198. if (list->max_length() > batch_size) {
  199. list->set_max_length(
  200. max<int>(list->max_length() - batch_size, batch_size));
  201. }
  202. }
  203. list->clear_lowwatermark();
  204. }
  205. IncreaseCacheLimit();
  206. }
  207. void ThreadCache::IncreaseCacheLimit() {
  208. SpinLockHolder h(Static::pageheap_lock());
  209. IncreaseCacheLimitLocked();
  210. }
  211. void ThreadCache::IncreaseCacheLimitLocked() {
  212. if (unclaimed_cache_space_ > 0) {
  213. // Possibly make unclaimed_cache_space_ negative.
  214. unclaimed_cache_space_ -= kStealAmount;
  215. max_size_ += kStealAmount;
  216. return;
  217. }
  218. // Don't hold pageheap_lock too long. Try to steal from 10 other
  219. // threads before giving up. The i < 10 condition also prevents an
  220. // infinite loop in case none of the existing thread heaps are
  221. // suitable places to steal from.
  222. for (int i = 0; i < 10;
  223. ++i, next_memory_steal_ = next_memory_steal_->next_) {
  224. // Reached the end of the linked list. Start at the beginning.
  225. if (next_memory_steal_ == NULL) {
  226. ASSERT(thread_heaps_ != NULL);
  227. next_memory_steal_ = thread_heaps_;
  228. }
  229. if (next_memory_steal_ == this ||
  230. next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
  231. continue;
  232. }
  233. next_memory_steal_->max_size_ -= kStealAmount;
  234. max_size_ += kStealAmount;
  235. next_memory_steal_ = next_memory_steal_->next_;
  236. return;
  237. }
  238. }
  239. int ThreadCache::GetSamplePeriod() {
  240. return sampler_.GetSamplePeriod();
  241. }
  242. void ThreadCache::InitModule() {
  243. SpinLockHolder h(Static::pageheap_lock());
  244. if (!phinited) {
  245. const char *tcb = TCMallocGetenvSafe("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES");
  246. if (tcb) {
  247. set_overall_thread_cache_size(strtoll(tcb, NULL, 10));
  248. }
  249. Static::InitStaticVars();
  250. threadcache_allocator.Init();
  251. phinited = 1;
  252. }
  253. }
  254. void ThreadCache::InitTSD() {
  255. ASSERT(!tsd_inited_);
  256. perftools_pthread_key_create(&heap_key_, DestroyThreadCache);
  257. tsd_inited_ = true;
  258. #ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
  259. // We may have used a fake pthread_t for the main thread. Fix it.
  260. pthread_t zero;
  261. memset(&zero, 0, sizeof(zero));
  262. SpinLockHolder h(Static::pageheap_lock());
  263. for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
  264. if (h->tid_ == zero) {
  265. h->tid_ = pthread_self();
  266. }
  267. }
  268. #endif
  269. }
  270. ThreadCache* ThreadCache::CreateCacheIfNecessary() {
  271. // Initialize per-thread data if necessary
  272. ThreadCache* heap = NULL;
  273. {
  274. SpinLockHolder h(Static::pageheap_lock());
  275. // On some old glibc's, and on freebsd's libc (as of freebsd 8.1),
  276. // calling pthread routines (even pthread_self) too early could
  277. // cause a segfault. Since we can call pthreads quite early, we
  278. // have to protect against that in such situations by making a
  279. // 'fake' pthread. This is not ideal since it doesn't work well
  280. // when linking tcmalloc statically with apps that create threads
  281. // before main, so we only do it if we have to.
  282. #ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
  283. pthread_t me;
  284. if (!tsd_inited_) {
  285. memset(&me, 0, sizeof(me));
  286. } else {
  287. me = pthread_self();
  288. }
  289. #else
  290. const pthread_t me = pthread_self();
  291. #endif
  292. // This may be a recursive malloc call from pthread_setspecific()
  293. // In that case, the heap for this thread has already been created
  294. // and added to the linked list. So we search for that first.
  295. for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
  296. if (h->tid_ == me) {
  297. heap = h;
  298. break;
  299. }
  300. }
  301. if (heap == NULL) heap = NewHeap(me);
  302. }
  303. // We call pthread_setspecific() outside the lock because it may
  304. // call malloc() recursively. We check for the recursive call using
  305. // the "in_setspecific_" flag so that we can avoid calling
  306. // pthread_setspecific() if we are already inside pthread_setspecific().
  307. if (!heap->in_setspecific_ && tsd_inited_) {
  308. heap->in_setspecific_ = true;
  309. perftools_pthread_setspecific(heap_key_, heap);
  310. #ifdef HAVE_TLS
  311. // Also keep a copy in __thread for faster retrieval
  312. threadlocal_data_.heap = heap;
  313. SetMinSizeForSlowPath(kMaxSize + 1);
  314. #endif
  315. heap->in_setspecific_ = false;
  316. }
  317. return heap;
  318. }
  319. ThreadCache* ThreadCache::NewHeap(pthread_t tid) {
  320. // Create the heap and add it to the linked list
  321. ThreadCache *heap = threadcache_allocator.New();
  322. heap->Init(tid);
  323. heap->next_ = thread_heaps_;
  324. heap->prev_ = NULL;
  325. if (thread_heaps_ != NULL) {
  326. thread_heaps_->prev_ = heap;
  327. } else {
  328. // This is the only thread heap at the momment.
  329. ASSERT(next_memory_steal_ == NULL);
  330. next_memory_steal_ = heap;
  331. }
  332. thread_heaps_ = heap;
  333. thread_heap_count_++;
  334. return heap;
  335. }
  336. void ThreadCache::BecomeIdle() {
  337. if (!tsd_inited_) return; // No caches yet
  338. ThreadCache* heap = GetThreadHeap();
  339. if (heap == NULL) return; // No thread cache to remove
  340. if (heap->in_setspecific_) return; // Do not disturb the active caller
  341. heap->in_setspecific_ = true;
  342. perftools_pthread_setspecific(heap_key_, NULL);
  343. #ifdef HAVE_TLS
  344. // Also update the copy in __thread
  345. threadlocal_data_.heap = NULL;
  346. SetMinSizeForSlowPath(0);
  347. #endif
  348. heap->in_setspecific_ = false;
  349. if (GetThreadHeap() == heap) {
  350. // Somehow heap got reinstated by a recursive call to malloc
  351. // from pthread_setspecific. We give up in this case.
  352. return;
  353. }
  354. // We can now get rid of the heap
  355. DeleteCache(heap);
  356. }
  357. void ThreadCache::BecomeTemporarilyIdle() {
  358. ThreadCache* heap = GetCacheIfPresent();
  359. if (heap)
  360. heap->Cleanup();
  361. }
  362. void ThreadCache::DestroyThreadCache(void* ptr) {
  363. // Note that "ptr" cannot be NULL since pthread promises not
  364. // to invoke the destructor on NULL values, but for safety,
  365. // we check anyway.
  366. if (ptr == NULL) return;
  367. #ifdef HAVE_TLS
  368. // Prevent fast path of GetThreadHeap() from returning heap.
  369. threadlocal_data_.heap = NULL;
  370. SetMinSizeForSlowPath(0);
  371. #endif
  372. DeleteCache(reinterpret_cast<ThreadCache*>(ptr));
  373. }
  374. void ThreadCache::DeleteCache(ThreadCache* heap) {
  375. // Remove all memory from heap
  376. heap->Cleanup();
  377. // Remove from linked list
  378. SpinLockHolder h(Static::pageheap_lock());
  379. if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
  380. if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
  381. if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
  382. thread_heap_count_--;
  383. if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_;
  384. if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_;
  385. unclaimed_cache_space_ += heap->max_size_;
  386. threadcache_allocator.Delete(heap);
  387. }
  388. void ThreadCache::RecomputePerThreadCacheSize() {
  389. // Divide available space across threads
  390. int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
  391. size_t space = overall_thread_cache_size_ / n;
  392. // Limit to allowed range
  393. if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
  394. if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
  395. double ratio = space / max<double>(1, per_thread_cache_size_);
  396. size_t claimed = 0;
  397. for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
  398. // Increasing the total cache size should not circumvent the
  399. // slow-start growth of max_size_.
  400. if (ratio < 1.0) {
  401. h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
  402. }
  403. claimed += h->max_size_;
  404. }
  405. unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
  406. per_thread_cache_size_ = space;
  407. }
  408. void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
  409. for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
  410. *total_bytes += h->Size();
  411. if (class_count) {
  412. for (int cl = 0; cl < kNumClasses; ++cl) {
  413. class_count[cl] += h->freelist_length(cl);
  414. }
  415. }
  416. }
  417. }
  418. void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
  419. // Clip the value to a reasonable range
  420. if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
  421. if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB
  422. overall_thread_cache_size_ = new_size;
  423. RecomputePerThreadCacheSize();
  424. }
  425. } // namespace tcmalloc