shim_brk.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_brk.c
  15. *
  16. * Implementation of system call "brk".
  17. */
  18. #include <shim_internal.h>
  19. #include <shim_utils.h>
  20. #include <shim_table.h>
  21. #include <shim_vma.h>
  22. #include <shim_checkpoint.h>
  23. #include <shim_profile.h>
  24. #include <pal.h>
  25. #include <sys/mman.h>
  26. #define BRK_SIZE 4096
  27. struct shim_brk_info {
  28. size_t data_segment_size;
  29. void* brk_start;
  30. void* brk_end;
  31. void* brk_current;
  32. };
  33. static struct shim_brk_info region;
  34. DEFINE_PROFILE_OCCURENCE(brk, memory);
  35. DEFINE_PROFILE_OCCURENCE(brk_count, memory);
  36. DEFINE_PROFILE_OCCURENCE(brk_migrate_count, memory);
  37. void get_brk_region (void ** start, void ** end, void ** current)
  38. {
  39. MASTER_LOCK();
  40. *start = region.brk_start;
  41. *end = region.brk_end;
  42. *current = region.brk_current;
  43. MASTER_UNLOCK();
  44. }
  45. int init_brk_region(void* brk_region, size_t data_segment_size) {
  46. if (region.brk_start)
  47. return 0;
  48. data_segment_size = ALLOC_ALIGN_UP(data_segment_size);
  49. uint64_t brk_max_size = DEFAULT_BRK_MAX_SIZE;
  50. if (root_config) {
  51. char brk_cfg[CONFIG_MAX];
  52. if (get_config(root_config, "sys.brk.size", brk_cfg, sizeof(brk_cfg)) > 0)
  53. brk_max_size = parse_int(brk_cfg);
  54. }
  55. set_rlimit_cur(RLIMIT_DATA, brk_max_size + data_segment_size);
  56. int flags = MAP_PRIVATE|MAP_ANONYMOUS;
  57. bool brk_on_heap = true;
  58. const int TRIES = 10;
  59. /*
  60. * Chia-Che 8/24/2017
  61. * Adding an argument to specify the initial starting
  62. * address of brk region.
  63. * The general assumption of Linux is that the brk region
  64. * should be within [exec-data-end, exec-data-end + 0x2000000)
  65. */
  66. if (brk_region) {
  67. size_t max_brk = 0;
  68. if (PAL_CB(user_address.end) >= PAL_CB(executable_range.end))
  69. max_brk = PAL_CB(user_address.end) - PAL_CB(executable_range.end);
  70. if (PAL_CB(user_address_hole.end) - PAL_CB(user_address_hole.start) > 0) {
  71. /* XXX: This assumes that we always want brk to be after the hole. */
  72. brk_region = MAX(brk_region, PAL_CB(user_address_hole.end));
  73. max_brk = MIN(max_brk, (size_t) (PAL_CB(user_address.end) - PAL_CB(user_address_hole.end)));
  74. }
  75. /* Check whether the brk region can potentially be located after exec at all. */
  76. if (brk_max_size <= max_brk) {
  77. int try;
  78. for (try = TRIES; try > 0; try--) {
  79. uint32_t rand = 0;
  80. #if ENABLE_ASLR == 1
  81. int ret = DkRandomBitsRead(&rand, sizeof(rand));
  82. if (ret < 0)
  83. return -convert_pal_errno(-ret);
  84. rand %= MIN((size_t)0x2000000,
  85. (size_t)(PAL_CB(user_address.end) - brk_region - brk_max_size));
  86. rand = ALLOC_ALIGN_DOWN(rand);
  87. if (brk_region + rand + brk_max_size >= PAL_CB(user_address.end))
  88. continue;
  89. #else
  90. /* Without randomization there is no point to retry here */
  91. if (brk_region + rand + brk_max_size >= PAL_CB(user_address.end))
  92. break;
  93. #endif
  94. struct shim_vma_val vma;
  95. if (lookup_overlap_vma(brk_region + rand, brk_max_size, &vma) == -ENOENT) {
  96. /* Found a place for brk */
  97. brk_region += rand;
  98. brk_on_heap = false;
  99. break;
  100. }
  101. #if !(ENABLE_ASLR == 1)
  102. /* Without randomization, try memory directly after the overlapping block */
  103. brk_region = vma.addr + vma.length;
  104. #endif
  105. }
  106. }
  107. }
  108. if (brk_on_heap) {
  109. brk_region = bkeep_unmapped_heap(brk_max_size, PROT_READ|PROT_WRITE,
  110. flags|VMA_UNMAPPED, NULL, 0, "brk");
  111. if (!brk_region) {
  112. return -ENOMEM;
  113. }
  114. } else {
  115. /*
  116. * Create the bookkeeping before allocating the brk region.
  117. * The bookkeeping should never fail because we've already confirmed
  118. * the availability.
  119. */
  120. if (bkeep_mmap(brk_region, brk_max_size, PROT_READ|PROT_WRITE,
  121. flags|VMA_UNMAPPED, NULL, 0, "brk") < 0)
  122. BUG();
  123. }
  124. void * end_brk_region = NULL;
  125. /* Allocate the whole brk region */
  126. void * ret = (void *) DkVirtualMemoryAlloc(brk_region, brk_max_size, 0,
  127. PAL_PROT_READ|PAL_PROT_WRITE);
  128. /* Checking if the PAL call succeeds. */
  129. if (!ret) {
  130. bkeep_munmap(brk_region, brk_max_size, flags);
  131. return -ENOMEM;
  132. }
  133. ADD_PROFILE_OCCURENCE(brk, brk_max_size);
  134. INC_PROFILE_OCCURENCE(brk_count);
  135. end_brk_region = brk_region + BRK_SIZE;
  136. region.data_segment_size = data_segment_size;
  137. region.brk_start = brk_region;
  138. region.brk_end = end_brk_region;
  139. region.brk_current = brk_region;
  140. debug("brk area: %p - %p\n", brk_region, end_brk_region);
  141. debug("brk reserved area: %p - %p\n", end_brk_region,
  142. brk_region + brk_max_size);
  143. /*
  144. * Create another bookkeeping for the current brk region. The remaining
  145. * space will be marked as unmapped so that the library OS can reuse the
  146. * space for other purpose.
  147. */
  148. if (bkeep_mmap(brk_region, BRK_SIZE, PROT_READ|PROT_WRITE, flags,
  149. NULL, 0, "brk") < 0)
  150. BUG();
  151. return 0;
  152. }
  153. int reset_brk (void)
  154. {
  155. MASTER_LOCK();
  156. if (!region.brk_start) {
  157. MASTER_UNLOCK();
  158. return 0;
  159. }
  160. int ret = shim_do_munmap(region.brk_start,
  161. region.brk_end - region.brk_start);
  162. if (ret < 0) {
  163. MASTER_UNLOCK();
  164. return ret;
  165. }
  166. region.brk_start = region.brk_end = region.brk_current = NULL;
  167. MASTER_UNLOCK();
  168. return 0;
  169. }
  170. void* shim_do_brk (void* brk) {
  171. MASTER_LOCK();
  172. if (init_brk_region(NULL, 0) < 0) { // If brk is never initialized, assume no executable
  173. debug("Failed to initialize brk!\n");
  174. brk = NULL;
  175. goto out;
  176. }
  177. if (!brk) {
  178. unchanged:
  179. brk = region.brk_current;
  180. goto out;
  181. }
  182. if (brk < region.brk_start)
  183. goto unchanged;
  184. if (brk > region.brk_end) {
  185. uint64_t rlim_data = get_rlimit_cur(RLIMIT_DATA);
  186. // Check if there is enough space within the system limit
  187. if (rlim_data < region.data_segment_size) {
  188. brk = NULL;
  189. goto out;
  190. }
  191. uint64_t brk_max_size = rlim_data - region.data_segment_size;
  192. if (brk > region.brk_start + brk_max_size)
  193. goto unchanged;
  194. void * brk_end = region.brk_end;
  195. while (brk_end < brk)
  196. brk_end += BRK_SIZE;
  197. debug("brk area: %p - %p\n", region.brk_start, brk_end);
  198. debug("brk reserved area: %p - %p\n", brk_end,
  199. region.brk_start + brk_max_size);
  200. bkeep_mmap(region.brk_start, brk_end - region.brk_start,
  201. PROT_READ|PROT_WRITE,
  202. MAP_ANONYMOUS|MAP_PRIVATE, NULL, 0, "brk");
  203. region.brk_current = brk;
  204. region.brk_end = brk_end;
  205. goto out;
  206. }
  207. region.brk_current = brk;
  208. out:
  209. MASTER_UNLOCK();
  210. return brk;
  211. }
  212. BEGIN_CP_FUNC(brk)
  213. {
  214. __UNUSED(obj);
  215. __UNUSED(size);
  216. __UNUSED(objp);
  217. if (region.brk_start) {
  218. ADD_CP_FUNC_ENTRY((ptr_t)region.brk_start);
  219. ADD_CP_ENTRY(ADDR, region.brk_current);
  220. ADD_CP_ENTRY(SIZE, region.brk_end - region.brk_start);
  221. ADD_CP_ENTRY(SIZE, region.data_segment_size);
  222. }
  223. }
  224. END_CP_FUNC(bek)
  225. BEGIN_RS_FUNC(brk)
  226. {
  227. __UNUSED(rebase);
  228. region.brk_start = (void *) GET_CP_FUNC_ENTRY();
  229. region.brk_current = (void *) GET_CP_ENTRY(ADDR);
  230. region.brk_end = region.brk_start + GET_CP_ENTRY(SIZE);
  231. region.data_segment_size = GET_CP_ENTRY(SIZE);
  232. debug("brk area: %p - %p\n", region.brk_start, region.brk_end);
  233. size_t brk_size = region.brk_end - region.brk_start;
  234. uint64_t rlim_data = get_rlimit_cur(RLIMIT_DATA);
  235. assert(rlim_data > region.data_segment_size);
  236. uint64_t brk_max_size = rlim_data - region.data_segment_size;
  237. if (brk_size < brk_max_size) {
  238. void * alloc_addr = region.brk_end;
  239. size_t alloc_size = brk_max_size - brk_size;
  240. struct shim_vma_val vma;
  241. if (!lookup_overlap_vma(alloc_addr, alloc_size, &vma)) {
  242. /* if memory are already allocated here, adjust RLIMIT_DATA */
  243. alloc_size = vma.addr - alloc_addr;
  244. set_rlimit_cur(RLIMIT_DATA, (uint64_t)brk_size + alloc_size + region.data_segment_size);
  245. }
  246. int ret = bkeep_mmap(alloc_addr, alloc_size,
  247. PROT_READ|PROT_WRITE,
  248. MAP_ANONYMOUS|MAP_PRIVATE|VMA_UNMAPPED,
  249. NULL, 0, "brk");
  250. if (ret < 0)
  251. return ret;
  252. void * ptr = DkVirtualMemoryAlloc(alloc_addr, alloc_size, 0,
  253. PAL_PROT_READ|PAL_PROT_WRITE);
  254. __UNUSED(ptr);
  255. assert(ptr == alloc_addr);
  256. ADD_PROFILE_OCCURENCE(brk, alloc_size);
  257. INC_PROFILE_OCCURENCE(brk_migrate_count);
  258. debug("brk reserved area: %p - %p\n", alloc_addr,
  259. alloc_addr + alloc_size);
  260. }
  261. DEBUG_RS("current=%p,region=%p-%p", region.brk_current, region.brk_start,
  262. region.brk_end);
  263. }
  264. END_RS_FUNC(brk)