/* Copyright (C) 2014 Stony Brook University This file is part of Graphene Library OS. Graphene Library OS is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Graphene Library OS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . */ /* * shim_brk.c * * Implementation of system call "brk". */ #include #include #include #include #include #include #include #include #define BRK_SIZE 4096 struct shim_brk_info { size_t data_segment_size; void* brk_start; void* brk_end; void* brk_current; }; static struct shim_brk_info region; DEFINE_PROFILE_OCCURENCE(brk, memory); DEFINE_PROFILE_OCCURENCE(brk_count, memory); DEFINE_PROFILE_OCCURENCE(brk_migrate_count, memory); void get_brk_region (void ** start, void ** end, void ** current) { MASTER_LOCK(); *start = region.brk_start; *end = region.brk_end; *current = region.brk_current; MASTER_UNLOCK(); } int init_brk_region(void* brk_region, size_t data_segment_size) { if (region.brk_start) return 0; data_segment_size = ALIGN_UP(data_segment_size); uint64_t brk_max_size = DEFAULT_BRK_MAX_SIZE; if (root_config) { char brk_cfg[CONFIG_MAX]; if (get_config(root_config, "sys.brk.size", brk_cfg, CONFIG_MAX) > 0) brk_max_size = parse_int(brk_cfg); } set_rlimit_cur(RLIMIT_DATA, brk_max_size + data_segment_size); int flags = MAP_PRIVATE|MAP_ANONYMOUS; bool brk_on_heap = true; const int TRIES = 10; /* * Chia-Che 8/24/2017 * Adding an argument to specify the initial starting * address of brk region. * The general assumption of Linux is that the brk region * should be within [exec-data-end, exec-data-end + 0x2000000) */ if (brk_region) { size_t max_brk = 0; if (PAL_CB(user_address.end) >= PAL_CB(executable_range.end)) max_brk = PAL_CB(user_address.end) - PAL_CB(executable_range.end); /* Check whether the brk region can potentially be located after exec at all. */ if (brk_max_size <= max_brk) { int try; for (try = TRIES; try > 0; try--) { uint32_t rand = 0; #if ENABLE_ASLR == 1 int ret = DkRandomBitsRead(&rand, sizeof(rand)); if (ret < 0) return -convert_pal_errno(-ret); rand %= MIN((size_t)0x2000000, (size_t)(PAL_CB(user_address.end) - brk_region - brk_max_size)); rand = ALIGN_DOWN(rand); if (brk_region + rand + brk_max_size >= PAL_CB(user_address.end)) continue; #else /* Without randomization there is no point to retry here */ if (brk_region + rand + brk_max_size >= PAL_CB(user_address.end)) break; #endif struct shim_vma_val vma; if (lookup_overlap_vma(brk_region + rand, brk_max_size, &vma) == -ENOENT) { /* Found a place for brk */ brk_region += rand; brk_on_heap = false; break; } #if !(ENABLE_ASLR == 1) /* Without randomization, try memory directly after the overlapping block */ brk_region = vma.addr + vma.length; #endif } } } if (brk_on_heap) { brk_region = bkeep_unmapped_heap(brk_max_size, PROT_READ|PROT_WRITE, flags|VMA_UNMAPPED, NULL, 0, "brk"); if (!brk_region) { return -ENOMEM; } } else { /* * Create the bookkeeping before allocating the brk region. * The bookkeeping should never fail because we've already confirmed * the availability. */ if (bkeep_mmap(brk_region, brk_max_size, PROT_READ|PROT_WRITE, flags|VMA_UNMAPPED, NULL, 0, "brk") < 0) BUG(); } void * end_brk_region = NULL; /* Allocate the whole brk region */ void * ret = (void *) DkVirtualMemoryAlloc(brk_region, brk_max_size, 0, PAL_PROT_READ|PAL_PROT_WRITE); /* Checking if the PAL call succeeds. */ if (!ret) { bkeep_munmap(brk_region, brk_max_size, flags); return -ENOMEM; } ADD_PROFILE_OCCURENCE(brk, brk_max_size); INC_PROFILE_OCCURENCE(brk_count); end_brk_region = brk_region + BRK_SIZE; region.data_segment_size = data_segment_size; region.brk_start = brk_region; region.brk_end = end_brk_region; region.brk_current = brk_region; debug("brk area: %p - %p\n", brk_region, end_brk_region); debug("brk reserved area: %p - %p\n", end_brk_region, brk_region + brk_max_size); /* * Create another bookkeeping for the current brk region. The remaining * space will be marked as unmapped so that the library OS can reuse the * space for other purpose. */ if (bkeep_mmap(brk_region, BRK_SIZE, PROT_READ|PROT_WRITE, flags, NULL, 0, "brk") < 0) BUG(); return 0; } int reset_brk (void) { MASTER_LOCK(); if (!region.brk_start) { MASTER_UNLOCK(); return 0; } int ret = shim_do_munmap(region.brk_start, region.brk_end - region.brk_start); if (ret < 0) { MASTER_UNLOCK(); return ret; } region.brk_start = region.brk_end = region.brk_current = NULL; MASTER_UNLOCK(); return 0; } void* shim_do_brk (void* brk) { MASTER_LOCK(); if (init_brk_region(NULL, 0) < 0) { // If brk is never initialized, assume no executable debug("Failed to initialize brk!\n"); brk = NULL; goto out; } if (!brk) { unchanged: brk = region.brk_current; goto out; } if (brk < region.brk_start) goto unchanged; if (brk > region.brk_end) { uint64_t rlim_data = get_rlimit_cur(RLIMIT_DATA); // Check if there is enough space within the system limit if (rlim_data < region.data_segment_size) { brk = NULL; goto out; } uint64_t brk_max_size = rlim_data - region.data_segment_size; if (brk > region.brk_start + brk_max_size) goto unchanged; void * brk_end = region.brk_end; while (brk_end < brk) brk_end += BRK_SIZE; debug("brk area: %p - %p\n", region.brk_start, brk_end); debug("brk reserved area: %p - %p\n", brk_end, region.brk_start + brk_max_size); bkeep_mmap(region.brk_start, brk_end - region.brk_start, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, NULL, 0, "brk"); region.brk_current = brk; region.brk_end = brk_end; goto out; } region.brk_current = brk; out: MASTER_UNLOCK(); return brk; } BEGIN_CP_FUNC(brk) { __UNUSED(obj); __UNUSED(size); __UNUSED(objp); if (region.brk_start) { ADD_CP_FUNC_ENTRY((ptr_t)region.brk_start); ADD_CP_ENTRY(ADDR, region.brk_current); ADD_CP_ENTRY(SIZE, region.brk_end - region.brk_start); ADD_CP_ENTRY(SIZE, region.data_segment_size); } } END_CP_FUNC(bek) BEGIN_RS_FUNC(brk) { __UNUSED(rebase); region.brk_start = (void *) GET_CP_FUNC_ENTRY(); region.brk_current = (void *) GET_CP_ENTRY(ADDR); region.brk_end = region.brk_start + GET_CP_ENTRY(SIZE); region.data_segment_size = GET_CP_ENTRY(SIZE); debug("brk area: %p - %p\n", region.brk_start, region.brk_end); size_t brk_size = region.brk_end - region.brk_start; uint64_t rlim_data = get_rlimit_cur(RLIMIT_DATA); assert(rlim_data > region.data_segment_size); uint64_t brk_max_size = rlim_data - region.data_segment_size; if (brk_size < brk_max_size) { void * alloc_addr = region.brk_end; size_t alloc_size = brk_max_size - brk_size; struct shim_vma_val vma; if (!lookup_overlap_vma(alloc_addr, alloc_size, &vma)) { /* if memory are already allocated here, adjust RLIMIT_DATA */ alloc_size = vma.addr - alloc_addr; set_rlimit_cur(RLIMIT_DATA, (uint64_t)brk_size + alloc_size + region.data_segment_size); } int ret = bkeep_mmap(alloc_addr, alloc_size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE|VMA_UNMAPPED, NULL, 0, "brk"); if (ret < 0) return ret; void * ptr = DkVirtualMemoryAlloc(alloc_addr, alloc_size, 0, PAL_PROT_READ|PAL_PROT_WRITE); assert(ptr == alloc_addr); ADD_PROFILE_OCCURENCE(brk, alloc_size); INC_PROFILE_OCCURENCE(brk_migrate_count); debug("brk reserved area: %p - %p\n", alloc_addr, alloc_addr + alloc_size); } DEBUG_RS("current=%p,region=%p-%p", region.brk_current, region.brk_start, region.brk_end); } END_RS_FUNC(brk)