common.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
  2. // Copyright (c) 2008, Google Inc.
  3. // All rights reserved.
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // ---
  31. // Author: Sanjay Ghemawat <opensource@google.com>
  32. //
  33. // Common definitions for tcmalloc code.
  34. #ifndef TCMALLOC_COMMON_H_
  35. #define TCMALLOC_COMMON_H_
  36. #include "config.h"
  37. #include <stddef.h> // for size_t
  38. #ifdef HAVE_STDINT_H
  39. #include <stdint.h> // for uintptr_t, uint64_t
  40. #endif
  41. #include "internal_logging.h" // for ASSERT, etc
  42. #include "base/basictypes.h" // for LIKELY, etc
  43. #ifdef HAVE_BUILTIN_EXPECT
  44. #define LIKELY(x) __builtin_expect(!!(x), 1)
  45. #define UNLIKELY(x) __builtin_expect(!!(x), 0)
  46. #else
  47. #define LIKELY(x) (x)
  48. #define UNLIKELY(x) (x)
  49. #endif
  50. // Type that can hold a page number
  51. typedef uintptr_t PageID;
  52. // Type that can hold the length of a run of pages
  53. typedef uintptr_t Length;
  54. //-------------------------------------------------------------------
  55. // Configuration
  56. //-------------------------------------------------------------------
  57. #if defined(TCMALLOC_ALIGN_8BYTES)
  58. // Unless we force to use 8 bytes alignment we use an alignment of
  59. // at least 16 bytes to statisfy requirements for some SSE types.
  60. // Keep in mind when using the 16 bytes alignment you can have a space
  61. // waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
  62. static const size_t kMinAlign = 8;
  63. // Number of classes created until reach page size 128.
  64. static const size_t kBaseClasses = 16;
  65. #else
  66. static const size_t kMinAlign = 16;
  67. static const size_t kBaseClasses = 9;
  68. #endif
  69. // Using large pages speeds up the execution at a cost of larger memory use.
  70. // Deallocation may speed up by a factor as the page map gets 8x smaller, so
  71. // lookups in the page map result in fewer L2 cache misses, which translates to
  72. // speedup for application/platform combinations with high L2 cache pressure.
  73. // As the number of size classes increases with large pages, we increase
  74. // the thread cache allowance to avoid passing more free ranges to and from
  75. // central lists. Also, larger pages are less likely to get freed.
  76. // These two factors cause a bounded increase in memory use.
  77. #if defined(TCMALLOC_32K_PAGES)
  78. static const size_t kPageShift = 15;
  79. static const size_t kNumClasses = kBaseClasses + 69;
  80. #elif defined(TCMALLOC_64K_PAGES)
  81. static const size_t kPageShift = 16;
  82. static const size_t kNumClasses = kBaseClasses + 73;
  83. #else
  84. static const size_t kPageShift = 13;
  85. static const size_t kNumClasses = kBaseClasses + 79;
  86. #endif
  87. static const size_t kMaxThreadCacheSize = 4 << 20;
  88. static const size_t kPageSize = 1 << kPageShift;
  89. static const size_t kMaxSize = 256 * 1024;
  90. static const size_t kAlignment = 8;
  91. static const size_t kLargeSizeClass = 0;
  92. // For all span-lengths < kMaxPages we keep an exact-size list.
  93. static const size_t kMaxPages = 1 << (20 - kPageShift);
  94. // Default bound on the total amount of thread caches.
  95. #ifdef TCMALLOC_SMALL_BUT_SLOW
  96. // Make the overall thread cache no bigger than that of a single thread
  97. // for the small memory footprint case.
  98. static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
  99. #else
  100. static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
  101. #endif
  102. // Lower bound on the per-thread cache sizes
  103. static const size_t kMinThreadCacheSize = kMaxSize * 2;
  104. // The number of bytes one ThreadCache will steal from another when
  105. // the first ThreadCache is forced to Scavenge(), delaying the
  106. // next call to Scavenge for this thread.
  107. static const size_t kStealAmount = 1 << 16;
  108. // The number of times that a deallocation can cause a freelist to
  109. // go over its max_length() before shrinking max_length().
  110. static const int kMaxOverages = 3;
  111. // Maximum length we allow a per-thread free-list to have before we
  112. // move objects from it into the corresponding central free-list. We
  113. // want this big to avoid locking the central free-list too often. It
  114. // should not hurt to make this list somewhat big because the
  115. // scavenging code will shrink it down when its contents are not in use.
  116. static const int kMaxDynamicFreeListLength = 8192;
  117. static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
  118. #if defined __x86_64__
  119. // All current and planned x86_64 processors only look at the lower 48 bits
  120. // in virtual to physical address translation. The top 16 are thus unused.
  121. // TODO(rus): Under what operating systems can we increase it safely to 17?
  122. // This lets us use smaller page maps. On first allocation, a 36-bit page map
  123. // uses only 96 KB instead of the 4.5 MB used by a 52-bit page map.
  124. static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
  125. #else
  126. static const int kAddressBits = 8 * sizeof(void*);
  127. #endif
  128. namespace tcmalloc {
  129. // Convert byte size into pages. This won't overflow, but may return
  130. // an unreasonably large value if bytes is huge enough.
  131. inline Length pages(size_t bytes) {
  132. return (bytes >> kPageShift) +
  133. ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
  134. }
  135. // For larger allocation sizes, we use larger memory alignments to
  136. // reduce the number of size classes.
  137. int AlignmentForSize(size_t size);
  138. // Size-class information + mapping
  139. class SizeMap {
  140. private:
  141. // Number of objects to move between a per-thread list and a central
  142. // list in one shot. We want this to be not too small so we can
  143. // amortize the lock overhead for accessing the central list. Making
  144. // it too big may temporarily cause unnecessary memory wastage in the
  145. // per-thread free list until the scavenger cleans up the list.
  146. int num_objects_to_move_[kNumClasses];
  147. //-------------------------------------------------------------------
  148. // Mapping from size to size_class and vice versa
  149. //-------------------------------------------------------------------
  150. // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
  151. // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
  152. // So for these larger sizes we have an array indexed by ceil(size/128).
  153. //
  154. // We flatten both logical arrays into one physical array and use
  155. // arithmetic to compute an appropriate index. The constants used by
  156. // ClassIndex() were selected to make the flattening work.
  157. //
  158. // Examples:
  159. // Size Expression Index
  160. // -------------------------------------------------------
  161. // 0 (0 + 7) / 8 0
  162. // 1 (1 + 7) / 8 1
  163. // ...
  164. // 1024 (1024 + 7) / 8 128
  165. // 1025 (1025 + 127 + (120<<7)) / 128 129
  166. // ...
  167. // 32768 (32768 + 127 + (120<<7)) / 128 376
  168. static const int kMaxSmallSize = 1024;
  169. static const size_t kClassArraySize =
  170. ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
  171. unsigned char class_array_[kClassArraySize];
  172. static inline size_t SmallSizeClass(size_t s) {
  173. return (static_cast<uint32_t>(s) + 7) >> 3;
  174. }
  175. static inline size_t LargeSizeClass(size_t s) {
  176. return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
  177. }
  178. // Compute index of the class_array[] entry for a given size
  179. static inline size_t ClassIndex(size_t s) {
  180. // Use unsigned arithmetic to avoid unnecessary sign extensions.
  181. ASSERT(0 <= s);
  182. ASSERT(s <= kMaxSize);
  183. if (LIKELY(s <= kMaxSmallSize)) {
  184. return SmallSizeClass(s);
  185. } else {
  186. return LargeSizeClass(s);
  187. }
  188. }
  189. int NumMoveSize(size_t size);
  190. // Mapping from size class to max size storable in that class
  191. size_t class_to_size_[kNumClasses];
  192. // Mapping from size class to number of pages to allocate at a time
  193. size_t class_to_pages_[kNumClasses];
  194. public:
  195. // Constructor should do nothing since we rely on explicit Init()
  196. // call, which may or may not be called before the constructor runs.
  197. SizeMap() { }
  198. // Initialize the mapping arrays
  199. void Init();
  200. inline int SizeClass(size_t size) {
  201. return class_array_[ClassIndex(size)];
  202. }
  203. inline bool MaybeSizeClass(size_t size, size_t *size_class) {
  204. size_t class_idx;
  205. if (LIKELY(size <= kMaxSmallSize)) {
  206. class_idx = SmallSizeClass(size);
  207. } else if (size <= kMaxSize) {
  208. class_idx = LargeSizeClass(size);
  209. } else {
  210. return false;
  211. }
  212. *size_class = class_array_[class_idx];
  213. return true;
  214. }
  215. // Get the byte-size for a specified class
  216. inline size_t ByteSizeForClass(size_t cl) {
  217. return class_to_size_[cl];
  218. }
  219. // Mapping from size class to max size storable in that class
  220. inline size_t class_to_size(size_t cl) {
  221. return class_to_size_[cl];
  222. }
  223. // Mapping from size class to number of pages to allocate at a time
  224. inline size_t class_to_pages(size_t cl) {
  225. return class_to_pages_[cl];
  226. }
  227. // Number of objects to move between a per-thread list and a central
  228. // list in one shot. We want this to be not too small so we can
  229. // amortize the lock overhead for accessing the central list. Making
  230. // it too big may temporarily cause unnecessary memory wastage in the
  231. // per-thread free list until the scavenger cleans up the list.
  232. inline int num_objects_to_move(size_t cl) {
  233. return num_objects_to_move_[cl];
  234. }
  235. };
  236. // Allocates "bytes" worth of memory and returns it. Increments
  237. // metadata_system_bytes appropriately. May return NULL if allocation
  238. // fails. Requires pageheap_lock is held.
  239. void* MetaDataAlloc(size_t bytes);
  240. // Returns the total number of bytes allocated from the system.
  241. // Requires pageheap_lock is held.
  242. uint64_t metadata_system_bytes();
  243. // size/depth are made the same size as a pointer so that some generic
  244. // code below can conveniently cast them back and forth to void*.
  245. static const int kMaxStackDepth = 31;
  246. struct StackTrace {
  247. uintptr_t size; // Size of object
  248. uintptr_t depth; // Number of PC values stored in array below
  249. void* stack[kMaxStackDepth];
  250. };
  251. } // namespace tcmalloc
  252. #endif // TCMALLOC_COMMON_H_