/* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */ /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */ /* Copyright (C) 2014 Stony Brook University This file is part of Graphene Library OS. Graphene Library OS is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Graphene Library OS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . */ /* * shim_vma.c * * This file contains code to maintain bookkeeping of VMAs in library OS. */ #include #include #include #include #include #include #include #include #include #include #include /* * Internal bookkeeping for VMAs (virtual memory areas). This data * structure can only be accessed in this source file, with vma_list_lock * held. No reference counting needed in this data structure. */ DEFINE_LIST(shim_vma); /* struct shim_vma tracks the area of [start, end) */ struct shim_vma { LIST_TYPE(shim_vma) list; void * start; void * end; int prot; int flags; uint64_t offset; struct shim_handle * file; char comment[VMA_COMMENT_LEN]; }; #define VMA_MGR_ALLOC DEFAULT_VMA_COUNT #define PAGE_SIZE allocsize #define RESERVED_VMAS 4 static struct shim_vma * reserved_vmas[RESERVED_VMAS]; static struct shim_vma early_vmas[RESERVED_VMAS]; static void * __bkeep_unmapped (void * top_addr, void * bottom_addr, uint64_t length, int prot, int flags, struct shim_handle * file, uint64_t offset, const char * comment); /* * Because the default system_malloc() must create VMA(s), we need * a new system_malloc() to avoid cicular dependency. This __malloc() * stores the VMA address and size in the current thread to delay the * bookkeeping until the allocator finishes extension. */ static inline void * __malloc (size_t size) { void * addr; size = ALIGN_UP(size); /* * Chia-Che 3/3/18: We must enforce the policy that all VMAs have to * be created before issuing the PAL calls. */ addr = __bkeep_unmapped(PAL_CB(user_address.end), PAL_CB(user_address.start), size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL, NULL, 0, "vma"); debug("allocate %p-%p for vmas\n", addr, addr + size); return (void *) DkVirtualMemoryAlloc(addr, size, 0, PAL_PROT_WRITE|PAL_PROT_READ); } #undef system_malloc #define system_malloc __malloc #define OBJ_TYPE struct shim_vma #include /* * "vma_mgr" has no specific lock. "vma_list_lock" must be held when * allocating or freeing any VMAs. */ static MEM_MGR vma_mgr = NULL; /* * "vma_list" contains a sorted list of non-overlapping VMAs. * "vma_list_lock" must be held when accessing either the vma_list or any * field of a VMA. */ DEFINE_LISTP(shim_vma); static LISTP_TYPE(shim_vma) vma_list = LISTP_INIT; static LOCKTYPE vma_list_lock; /* * Return true if [s, e) is exactly the area represented by vma. */ static inline bool test_vma_equal (struct shim_vma * vma, void * s, void * e) { return vma->start == s && vma->end == e; } /* * Return true if [s, e) is part of the area represented by vma. */ static inline bool test_vma_contain (struct shim_vma * vma, void * s, void * e) { return vma->start <= s && vma->end >= e; } /* * Return true if [s, e) contains the starting address of vma. */ static inline bool test_vma_startin (struct shim_vma * vma, void * s, void * e) { return vma->start >= s && vma->start < e; } /* * Return true if [s, e) contains the ending address of vma. */ static inline bool test_vma_endin (struct shim_vma * vma, void * s, void * e) { return vma->end > s && vma->end <= e; } /* * Return true if [s, e) overlaps with the area represented by vma. */ static inline bool test_vma_overlap (struct shim_vma * vma, void * s, void * e) { return test_vma_contain(vma, s, s + 1) || test_vma_contain(vma, e - 1, e) || test_vma_startin(vma, s, e); } static inline void __assert_vma_list (void) { struct shim_vma * tmp; struct shim_vma * prev __attribute__((unused)) = NULL; listp_for_each_entry(tmp, &vma_list, list) { /* Assert we are really sorted */ assert(tmp->end > tmp->start); assert(!prev || prev->end <= tmp->start); prev = tmp; } } // In a debug build only, assert that the VMA list is // sorted. This should be called with the vma_list_lock held. static inline void assert_vma_list (void) { #ifdef DEBUG __assert_vma_list(); #endif } /* * __lookup_vma() returns the VMA that contains the address; otherwise, * returns NULL. "pprev" returns the highest VMA below the address. * __lookup_vma() fills "pprev" even when the function cannot find a * matching vma for "addr". * * vma_list_lock must be held when calling this function. */ static inline struct shim_vma * __lookup_vma (void * addr, struct shim_vma ** pprev) { struct shim_vma * vma, * prev = NULL; listp_for_each_entry(vma, &vma_list, list) { if (addr < vma->start) goto none; if (test_vma_contain(vma, addr, addr + 1)) goto out; assert(vma->end > vma->start); assert(!prev || prev->end <= vma->start); prev = vma; } none: vma = NULL; out: if (pprev) *pprev = prev; return vma; } /* * __insert_vma() places "vma" after "prev", or at the beginning of * vma_list if "prev" is NULL. vma_list_lock must be held when calling * this function. */ static inline void __insert_vma (struct shim_vma * vma, struct shim_vma * prev) { assert(!prev || prev->end <= vma->start); assert(vma != prev); /* check the next entry */ struct shim_vma * next = prev ? listp_next_entry(prev, &vma_list, list) : listp_first_entry(&vma_list, struct shim_vma, list); assert(!next || vma->end <= next->start); if (prev) listp_add_after(vma, prev, &vma_list, list); else listp_add(vma, &vma_list, list); } /* * __remove_vma() removes "vma" after "prev", or at the beginnning of * vma_list if "prev" is NULL. vma_list_lock must be held when calling * this function. */ static inline void __remove_vma (struct shim_vma * vma, struct shim_vma * prev) { assert(vma != prev); listp_del(vma, &vma_list, list); } /* * Storing a cursor pointing to the current heap top. With ASLR, the cursor * is randomized at initialization. The cursor is monotonically decremented * when allocating user VMAs. Updating this cursor needs holding vma_list_lock. */ static void * current_heap_top; static int __bkeep_mmap (struct shim_vma * prev, void * start, void * end, int prot, int flags, struct shim_handle * file, uint64_t offset, const char * comment); static int __bkeep_munmap (struct shim_vma ** prev, void * start, void * end, int flags); static int __bkeep_mprotect (struct shim_vma * prev, void * start, void * end, int prot, int flags); static int __bkeep_preloaded (void * start, void * end, int prot, int flags, const char * comment) { if (!start || !end || start == end) return 0; struct shim_vma * prev = NULL; __lookup_vma(start, &prev); return __bkeep_mmap(prev, start, end, prot, flags, NULL, 0, comment); } int init_vma (void) { int ret; for (int i = 0 ; i < RESERVED_VMAS ; i++) reserved_vmas[i] = &early_vmas[i]; /* Bookkeeping for preloaded areas */ ret = __bkeep_preloaded(PAL_CB(executable_range.start), PAL_CB(executable_range.end), PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|VMA_UNMAPPED, "exec"); if (ret < 0) return ret; ret = __bkeep_preloaded(PAL_CB(manifest_preload.start), PAL_CB(manifest_preload.end), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL, "manifest"); if (ret < 0) return ret; /* Initialize the allocator */ if (!(vma_mgr = create_mem_mgr(init_align_up(VMA_MGR_ALLOC)))) { debug("failed creating the VMA allocator\n"); return -ENOMEM; } for (int i = 0 ; i < RESERVED_VMAS ; i++) { if (!reserved_vmas[i]) { struct shim_vma * new = get_mem_obj_from_mgr(vma_mgr); assert(new); struct shim_vma * e = &early_vmas[i]; struct shim_vma * prev = listp_prev_entry(e, &vma_list, list); debug("Converting early VMA [%p] %p-%p\n", e, e->start, e->end); memcpy(new, e, sizeof(*e)); INIT_LIST_HEAD(new, list); __remove_vma(e, prev); __insert_vma(new, prev); } /* replace all reserved VMAs */ reserved_vmas[i] = get_mem_obj_from_mgr(vma_mgr); assert(reserved_vmas[i]); } create_lock(vma_list_lock); current_heap_top = PAL_CB(user_address.end); #if ENABLE_ASLR == 1 /* * Randomize the heap top in top 5/6 of the user address space. * This is a simplified version of the mmap_base() logic in the Linux * kernel: https://elixir.bootlin.com/linux/v4.8/ident/mmap_base */ uint64_t addr_rand_size = (PAL_CB(user_address.end) - PAL_CB(user_address.start)) * 5 / 6; uint64_t rand; getrand(&rand, sizeof(rand)); current_heap_top -= ALIGN_DOWN(rand % addr_rand_size); #endif debug("heap top adjusted to %p\n", current_heap_top); return 0; } static inline struct shim_vma * __get_new_vma (void) { struct shim_vma * tmp; if (vma_mgr) { tmp = get_mem_obj_from_mgr(vma_mgr); if (tmp) goto out; } for (int i = 0 ; i < RESERVED_VMAS ; i++) if (reserved_vmas[i]) { tmp = reserved_vmas[i]; reserved_vmas[i] = NULL; goto out; } /* Should never reach here; if this happens, increase RESERVED_VMAS */ debug("failed to allocate new vma\n"); bug(); return NULL; out: memset(tmp, 0, sizeof(*tmp)); INIT_LIST_HEAD(tmp, list); return tmp; } static inline void __restore_reserved_vmas (void) { bool nothing_reserved; do { nothing_reserved = true; for (int i = 0 ; i < RESERVED_VMAS ; i++) if (!reserved_vmas[i]) { struct shim_vma * new = get_mem_obj_from_mgr_enlarge(vma_mgr, size_align_up(VMA_MGR_ALLOC)); /* this allocation must succeed */ assert(new); reserved_vmas[i] = new; nothing_reserved = false; } } while (!nothing_reserved); } static inline void __drop_vma (struct shim_vma * vma) { if (vma->file) put_handle(vma->file); for (int i = 0 ; i < RESERVED_VMAS ; i++) if (!reserved_vmas[i]) { reserved_vmas[i] = vma; return; } free_mem_obj_to_mgr(vma_mgr, vma); } static inline void __assert_vma_flags (const struct shim_vma * vma, int flags) { if (!(vma->flags & VMA_UNMAPPED) && VMA_TYPE(vma->flags) != VMA_TYPE(flags)) { debug("Check vma flag failure: vma flags %x, checked flags %x\n", vma->flags, flags); bug(); } } static inline void __set_vma_comment (struct shim_vma * vma, const char * comment) { if (!comment) { vma->comment[0] = 0; return; } uint64_t len = strlen(comment); if (len > VMA_COMMENT_LEN - 1) len = VMA_COMMENT_LEN - 1; memcpy(vma->comment, comment, len); vma->comment[len] = 0; } /* * Add bookkeeping for mmap(). "prev" must point to the the immediately * precedent vma of the address to map, or be NULL if no vma is lower than * the address. If the bookkeeping area overlaps with some existing vmas, * we must check whether the caller (from user, internal code, or checkpointing * procedure) is allowed to overwrite the existing vmas. * * Bookkeeping convention (must follow): * Create the bookkeeping BEFORE any allocation PAL calls * (DkVirtualMemoryAlloc() or DkStreamMap()). */ static int __bkeep_mmap (struct shim_vma * prev, void * start, void * end, int prot, int flags, struct shim_handle * file, uint64_t offset, const char * comment) { int ret = 0; struct shim_vma * new = __get_new_vma(); /* First, remove any overlapping VMAs */ ret = __bkeep_munmap(&prev, start, end, flags); if (ret < 0) { __drop_vma(new); return ret; } /* Inserting the new VMA */ new->start = start; new->end = end; new->prot = prot; new->flags = flags|((file && (prot & PROT_WRITE)) ? VMA_TAINTED : 0); new->file = file; if (new->file) get_handle(new->file); new->offset = offset; __set_vma_comment(new, comment); __insert_vma(new, prev); return 0; } int bkeep_mmap (void * addr, uint64_t length, int prot, int flags, struct shim_handle * file, uint64_t offset, const char * comment) { if (!addr || !length) return -EINVAL; if (comment && !comment[0]) comment = NULL; debug("bkeep_mmap: %p-%p\n", addr, addr + length); lock(vma_list_lock); struct shim_vma * prev = NULL; __lookup_vma(addr, &prev); int ret = __bkeep_mmap(prev, addr, addr + length, prot, flags, file, offset, comment); assert_vma_list(); __restore_reserved_vmas(); unlock(vma_list_lock); return ret; } /* * __shrink_vma() removes the area in a VMA that overlaps with [start, end). * The function deals with three cases: * (1) [start, end) overlaps with the beginning of the VMA. * (2) [start, end) overlaps with the ending of the VMA. * (3) [start, end) overlaps with the middle of the VMA. In this case, the VMA * is splitted into two. The new VMA is stored in 'tailptr'. * In either of these cases, "vma" is the only one changed among vma_list. */ static inline void __shrink_vma (struct shim_vma * vma, void * start, void * end, struct shim_vma ** tailptr) { /* * Dealing with the head: if the starting address of "vma" is in * [start, end), move the starting address. */ if (test_vma_startin(vma, start, end)) { if (end < vma->end) { if (vma->file) /* must adjust offset */ vma->offset += end - vma->start; vma->start = end; } else { if (vma->file) /* must adjust offset */ vma->offset += vma->end - vma->start; vma->start = vma->end; } goto finish; } /* * Dealing with the tail: if the ending address of "vma" is in * [start, end), move the ending address. */ if (test_vma_endin(vma, start, end)) { if (start > vma->start) { vma->end = start; } else { vma->end = vma->start; } /* offset is not affected */ goto finish; } /* * If [start, end) is inside the range of "vma", divide up * the VMA. A new VMA is created to represent the remaining tail. */ if (test_vma_contain(vma, start, end)) { void * old_end = vma->end; vma->end = start; /* Remaining space after [start, end), creating a new VMA */ if (old_end > end) { struct shim_vma * tail = __get_new_vma(); tail->start = end; tail->end = old_end; tail->prot = vma->prot; tail->flags = vma->flags; tail->file = vma->file; if (tail->file) { get_handle(tail->file); tail->offset = vma->offset + (tail->start - vma->start); } else { tail->offset = 0; } memcpy(tail->comment, vma->comment, VMA_COMMENT_LEN); *tailptr = tail; } goto finish; } /* Never reach here */ bug(); finish: assert(!test_vma_overlap(vma, start, end)); assert(vma->start < vma->end); } /* * Update bookkeeping for munmap(). "*pprev" must point to the immediately * precedent vma of the address to unmap, or be NULL if no vma is lower than * the address. If the bookkeeping area overlaps with some existing vmas, * we must check whether the caller (from user, internal code, or checkpointing * procedure) is allowed to overwrite the existing vmas. "pprev" can be * updated if a new vma lower than the unmapping address is added. * * Bookkeeping convention (must follow): * Make deallocation PAL calls (DkVirtualMemoryFree() or DkStreamUnmap()) * BEFORE updating the bookkeeping. */ static int __bkeep_munmap (struct shim_vma ** pprev, void * start, void * end, int flags) { struct shim_vma * prev = *pprev; struct shim_vma * cur, * next; if (!prev) { cur = listp_first_entry(&vma_list, struct shim_vma, list); if (!cur) return 0; } else { cur = listp_next_entry(prev, &vma_list, list); } next = cur ? listp_next_entry(cur, &vma_list, list) : NULL; while (cur) { struct shim_vma * tail = NULL; /* Stop unmapping if "cur" no longer overlaps with [start, end) */ if (!test_vma_overlap(cur, start, end)) break; if (VMA_TYPE(cur->flags) != VMA_TYPE(flags)) return -EACCES; /* If [start, end) contains the VMA, just drop the VMA. */ if (start <= cur->start && cur->end <= end) { __remove_vma(cur, prev); __drop_vma(cur); goto cont; } __shrink_vma(cur, start, end, &tail); if (cur->end <= start) { prev = cur; if (tail) { __insert_vma(tail, cur); /* insert "tail" after "cur" */ cur = tail; /* "tail" is the new "cur" */ break; } } else if (cur->start >= end) { /* __shrink_vma() only creates a new VMA when the beginning of the * original VMA is preserved. */ assert(!tail); break; } else { /* __shrink_vma() should never allow this case. */ bug(); } cont: cur = next; next = cur ? listp_next_entry(cur, &vma_list, list) : NULL; } if (prev) assert(cur == listp_next_entry(prev, &vma_list, list)); else assert(cur == listp_first_entry(&vma_list, struct shim_vma, list)); assert(!prev || prev->end <= start); assert(!cur || end <= cur->start); *pprev = prev; return 0; } int bkeep_munmap (void * addr, uint64_t length, int flags) { if (!length) return -EINVAL; debug("bkeep_munmap: %p-%p\n", addr, addr + length); lock(vma_list_lock); struct shim_vma * prev = NULL; __lookup_vma(addr, &prev); int ret = __bkeep_munmap(&prev, addr, addr + length, flags); assert_vma_list(); __restore_reserved_vmas(); unlock(vma_list_lock); return ret; } /* * Update bookkeeping for mprotect(). "prev" must point to the immediately * precedent vma of the address to protect, or be NULL if no vma is lower than * the address. If the bookkeeping area overlaps with some existing vmas, * we must check whether the caller (from user, internal code, or checkpointing * procedure) is allowed to overwrite the existing vmas. * * Bookkeeping convention (must follow): * Update the bookkeeping BEFORE calling DkVirtualMemoryProtect(). */ static int __bkeep_mprotect (struct shim_vma * prev, void * start, void * end, int prot, int flags) { struct shim_vma * cur, * next; if (!prev) { cur = listp_first_entry(&vma_list, struct shim_vma, list); if (!cur) return 0; } else { cur = listp_next_entry(prev, &vma_list, list); } next = cur ? listp_next_entry(cur, &vma_list, list) : NULL; while (cur) { struct shim_vma * new, * tail = NULL; /* Stop protecting if "cur" no longer overlaps with [start, end) */ if (!test_vma_overlap(cur, start, end)) break; if (VMA_TYPE(cur->flags) != VMA_TYPE(flags)) /* For now, just shout loudly. */ return -EACCES; /* If protection doesn't change anything, move on to the next */ if (cur->prot == prot) goto cont; /* If [start, end) contains the VMA, just update its protection. */ if (start <= cur->start && cur->end <= end) { cur->prot = prot; goto cont; } /* Create a new VMA for the protected area */ new = __get_new_vma(); new->start = cur->start > start ? cur->start : start; new->end = cur->end < end ? cur->end : end; new->prot = prot; new->flags = cur->flags; new->file = cur->file; if (new->file) { get_handle(new->file); new->offset = cur->offset + (new->start - cur->start); } else { new->offset = 0; } memcpy(new->comment, cur->comment, VMA_COMMENT_LEN); /* Like unmapping, shrink (and potentially split) the VMA first. */ __shrink_vma(cur, start, end, &tail); if (cur->end <= start) { prev = cur; if (tail) { __insert_vma(tail, cur); /* insert "tail" after "cur" */ cur = tail; /* "tail" is the new "cur" */ /* "next" is the same */ } } else if (cur->start >= end) { /* __shrink_vma() only creates a new VMA when the beginning of the * original VMA is preserved. */ assert(!tail); } else { /* __shrink_vma() should never allow this case. */ bug(); } /* Now insert the new protected vma between prev and cur */ __insert_vma(new, prev); assert(!prev || prev->end <= new->end); assert(new->start < new->end); cont: prev = cur; cur = next; next = cur ? listp_next_entry(cur, &vma_list, list) : NULL; } return 0; } int bkeep_mprotect (void * addr, uint64_t length, int prot, int flags) { if (!addr || !length) return -EINVAL; debug("bkeep_mprotect: %p-%p\n", addr, addr + length); lock(vma_list_lock); struct shim_vma * prev = NULL; __lookup_vma(addr, &prev); int ret = __bkeep_mprotect(prev, addr, addr + length, prot, flags); assert_vma_list(); __restore_reserved_vmas(); unlock(vma_list_lock); return ret; } /* * Search for an unmapped area within [bottom, top) that is big enough * to allocate "length" bytes. The search approach is top-down. * If this function returns a non-NULL address, the corresponding VMA is * added to the VMA list. */ static void * __bkeep_unmapped (void * top_addr, void * bottom_addr, uint64_t length, int prot, int flags, struct shim_handle * file, uint64_t offset, const char * comment) { assert(top_addr > bottom_addr); if (!length || length > top_addr - bottom_addr) return NULL; struct shim_vma * prev = NULL; struct shim_vma * cur = __lookup_vma(top_addr, &prev); while (true) { /* Set the range for searching */ void * end = cur ? cur->start : top_addr; void * start = (prev && prev->end > bottom_addr) ? prev->end : bottom_addr; assert(start <= end); /* Check if there is enough space between prev and cur */ if (length <= end - start) { /* create a new VMA at the top of the range */ __bkeep_mmap(prev, end - length, end, prot, flags, file, offset, comment); assert_vma_list(); debug("bkeep_unmapped: %p-%p%s%s\n", end - length, end, comment ? " => " : "", comment ? : ""); return end - length; } if (!prev || prev->start <= bottom_addr) break; cur = prev; prev = listp_prev_entry(cur, &vma_list, list); } return NULL; } void * bkeep_unmapped (void * top_addr, void * bottom_addr, uint64_t length, int prot, int flags, struct shim_handle * file, uint64_t offset, const char * comment) { lock(vma_list_lock); void * addr = __bkeep_unmapped(top_addr, bottom_addr, length, prot, flags, file, offset, comment); assert_vma_list(); __restore_reserved_vmas(); unlock(vma_list_lock); return addr; } void * bkeep_unmapped_heap (uint64_t length, int prot, int flags, struct shim_handle * file, uint64_t offset, const char * comment) { lock(vma_list_lock); void * bottom_addr = PAL_CB(user_address.start); void * top_addr = current_heap_top; void * heap_max = PAL_CB(user_address.end); void * addr = NULL; #ifdef MAP_32BIT /* * If MAP_32BIT is given in the flags, force the searching range to * be lower than 1ULL << 32. */ #define ADDR_32BIT ((void *) (1ULL << 32)) if (flags & MAP_32BIT) { /* Try the lower 4GB memory space */ if (heap_max > ADDR_32BIT) heap_max = ADDR_32BIT; if (top_addr > heap_max) top_addr = heap_max; } #endif if (top_addr <= bottom_addr) goto again; /* Try first time */ addr = __bkeep_unmapped(top_addr, bottom_addr, length, prot, flags, file, offset, comment); assert_vma_list(); if (addr) { /* * we only update the current heap top when we get the * address from [bottom_addr, current_heap_top). */ if (top_addr == current_heap_top) { debug("heap top adjusted to %p\n", addr); current_heap_top = addr; } goto out; } again: if (top_addr >= heap_max) goto out; /* Try to allocate above the current heap top */ addr = __bkeep_unmapped(heap_max, bottom_addr, length, prot, flags, file, offset, comment); assert_vma_list(); out: __restore_reserved_vmas(); unlock(vma_list_lock); #ifdef MAP_32BIT assert(!(flags & MAP_32BIT) || !addr || addr + length <= ADDR_32BIT); #endif return addr; } static inline void __dump_vma (struct shim_vma_val * val, const struct shim_vma * vma) { val->addr = vma->start; val->length = vma->end - vma->start; val->prot = vma->prot; val->flags = vma->flags; val->file = vma->file; if (val->file) get_handle(val->file); val->offset = vma->offset; memcpy(val->comment, vma->comment, VMA_COMMENT_LEN); } int lookup_vma (void * addr, struct shim_vma_val * res) { lock(vma_list_lock); struct shim_vma * vma = __lookup_vma(addr, NULL); if (!vma) { unlock(vma_list_lock); return -ENOENT; } if (res) __dump_vma(res, vma); unlock(vma_list_lock); return 0; } int lookup_overlap_vma (void * addr, uint64_t length, struct shim_vma_val * res) { struct shim_vma * tmp, * vma = NULL; lock(vma_list_lock); listp_for_each_entry(tmp, &vma_list, list) if (test_vma_overlap (tmp, addr, addr + length)) { vma = tmp; break; } if (!vma) { unlock(vma_list_lock); return -ENOENT; } if (res) __dump_vma(res, vma); unlock(vma_list_lock); return 0; } int dump_all_vmas (struct shim_vma_val * vmas, size_t max_count) { struct shim_vma_val * val = vmas; struct shim_vma * vma; int cnt = 0; lock(vma_list_lock); listp_for_each_entry(vma, &vma_list, list) { if (VMA_TYPE(vma->flags)) continue; if (vma->flags & VMA_UNMAPPED) continue; if (cnt == max_count) { cnt = -EOVERFLOW; for (int i = 0 ; i < max_count ; i++) if (vmas[i].file) put_handle(vmas[i].file); break; } __dump_vma(val, vma); cnt++; val++; } unlock(vma_list_lock); return cnt; } BEGIN_CP_FUNC(vma) { assert(size == sizeof(struct shim_vma_val)); struct shim_vma_val * vma = (struct shim_vma_val *) obj; struct shim_vma_val * new_vma = NULL; PAL_FLG pal_prot = PAL_PROT(vma->prot, 0); ptr_t off = GET_FROM_CP_MAP(obj); if (!off) { off = ADD_CP_OFFSET(sizeof(*vma)); ADD_TO_CP_MAP(obj, off); new_vma = (struct shim_vma_val *) (base + off); memcpy(new_vma, vma, sizeof(*vma)); if (vma->file) DO_CP(handle, vma->file, &new_vma->file); void * need_mapped = vma->addr; #if MIGRATE_MORE_GIPC == 1 if (store->use_gipc ? !NEED_MIGRATE_MEMORY_IF_GIPC(vma) : !NEED_MIGRATE_MEMORY(vma)) #else if (!NEED_MIGRATE_MEMORY(vma)) #endif goto no_mem; void * send_addr = vma->addr; uint64_t send_size = vma->length; if (vma->file) { /* * Chia-Che 8/13/2017: * A fix for cloning a private VMA which maps a file to a process. * * (1) Application can access any page backed by the file, wholly * or partially. * * (2) Access beyond the last file-backed page will cause SIGBUS. * For reducing fork latency, the following code truncates the * memory size for migrating a process. The memory size is * truncated to the file size, round up to pages. * * (3) Data in the last file-backed page is valid before or after * forking. Has to be included in process migration. */ uint64_t file_len = get_file_size(vma->file); if (file_len >= 0 && vma->offset + vma->length > file_len) { send_size = file_len > vma->offset ? file_len - vma->offset : 0; send_size = ALIGN_UP(send_size); } } if (!send_size) goto no_mem; #if HASH_GIPC == 1 if (!(pal_prot & PAL_PROT_READ)) { #else if (!store->use_gipc && !(pal_prot & PAL_PROT_READ)) { #endif /* Make the area readable */ DkVirtualMemoryProtect(send_addr, send_size, pal_prot|PAL_PROT_READ); } if (store->use_gipc) { struct shim_gipc_entry * gipc; DO_CP_SIZE(gipc, send_addr, send_size, &gipc); gipc->mem.prot = pal_prot; } else { struct shim_mem_entry * mem; DO_CP_SIZE(memory, send_addr, send_size, &mem); mem->prot = pal_prot; } need_mapped = vma->addr + vma->length; #if HASH_GIPC == 1 if (store->use_gipc && !(pal_prot & PAL_PROT_READ)) DkVirtualMemoryProtect(send_addr, send_size, pal_prot); #endif no_mem: ADD_CP_FUNC_ENTRY(off); ADD_CP_ENTRY(ADDR, need_mapped); } else { new_vma = (struct shim_vma_val *) (base + off); } if (objp) *objp = (void *) new_vma; } END_CP_FUNC(vma) DEFINE_PROFILE_CATAGORY(inside_rs_vma, resume_func); DEFINE_PROFILE_INTERVAL(vma_add_bookkeep, inside_rs_vma); DEFINE_PROFILE_INTERVAL(vma_map_file, inside_rs_vma); DEFINE_PROFILE_INTERVAL(vma_map_anonymous, inside_rs_vma); BEGIN_RS_FUNC(vma) { struct shim_vma_val * vma = (void *) (base + GET_CP_FUNC_ENTRY()); void * need_mapped = (void *) GET_CP_ENTRY(ADDR); BEGIN_PROFILE_INTERVAL(); CP_REBASE(vma->file); int ret = bkeep_mmap(vma->addr, vma->length, vma->prot, vma->flags, vma->file, vma->offset, vma->comment); if (ret < 0) return ret; SAVE_PROFILE_INTERVAL(vma_add_bookkeep); DEBUG_RS("vma: %p-%p flags %x prot %p\n", vma->addr, vma->addr + vma->length, vma->flags, vma->prot); if (!(vma->flags & VMA_UNMAPPED)) { if (vma->file) { struct shim_mount * fs = vma->file->fs; get_handle(vma->file); if (need_mapped < vma->addr + vma->length) { /* first try, use hstat to force it resumes pal handle */ assert(vma->file->fs && vma->file->fs->fs_ops && vma->file->fs->fs_ops->mmap); void * addr = need_mapped; int ret = fs->fs_ops->mmap(vma->file, &addr, vma->addr + vma->length - need_mapped, vma->prot, vma->flags, vma->offset + (need_mapped - vma->addr)); if (ret < 0) return ret; if (!addr) return -ENOMEM; if (addr != need_mapped) return -EACCES; need_mapped += vma->length; SAVE_PROFILE_INTERVAL(vma_map_file); } } if (need_mapped < vma->addr + vma->length) { int pal_alloc_type = 0; int pal_prot = vma->prot; if (DkVirtualMemoryAlloc(need_mapped, vma->addr + vma->length - need_mapped, pal_alloc_type, pal_prot)) { need_mapped += vma->length; SAVE_PROFILE_INTERVAL(vma_map_anonymous); } } if (need_mapped < vma->addr + vma->length) sys_printf("vma %p-%p cannot be allocated!\n", need_mapped, vma->addr + vma->length); } if (vma->file) get_handle(vma->file); if (vma->file) DEBUG_RS("%p-%p,size=%d,prot=%08x,flags=%08x,off=%d,path=%s,uri=%s", vma->addr, vma->addr + vma->length, vma->length, vma->prot, vma->flags, vma->offset, qstrgetstr(&vma->file->path), qstrgetstr(&vma->file->uri)); else DEBUG_RS("%p-%p,size=%d,prot=%08x,flags=%08x,off=%d", vma->addr, vma->addr + vma->length, vma->length, vma->prot, vma->flags, vma->offset); } END_RS_FUNC(vma) BEGIN_CP_FUNC(all_vmas) { size_t count = DEFAULT_VMA_COUNT; struct shim_vma_val * vmas = malloc(sizeof(*vmas) * count); int ret; if (!vmas) return -ENOMEM; retry_dump_vmas: ret = dump_all_vmas(vmas, count); if (ret == -EOVERFLOW) { struct shim_vma_val * new_vmas = malloc(sizeof(*new_vmas) * count * 2); if (!new_vmas) { free(vmas); return -ENOMEM; } free(vmas); vmas = new_vmas; count *= 2; goto retry_dump_vmas; } if (ret < 0) return ret; count = ret; for (struct shim_vma_val * vma = &vmas[count - 1] ; vma >= vmas ; vma--) DO_CP(vma, vma, NULL); free_vma_val_array(vmas, count); } END_CP_FUNC_NO_RS(all_vmas) void debug_print_vma_list (void) { sys_printf("vma bookkeeping:\n"); struct shim_vma * vma; listp_for_each_entry(vma, &vma_list, list) { const char * type = "", * name = ""; if (vma->file) { if (!qstrempty(&vma->file->path)) { type = " path="; name = qstrgetstr(&vma->file->path); } else if (!qstrempty(&vma->file->uri)) { type = " uri="; name = qstrgetstr(&vma->file->uri); } } sys_printf("[%p-%p] prot=%08x flags=%08x%s%s offset=%d%s%s%s%s\n", vma->start, vma->end, vma->prot, vma->flags & ~(VMA_INTERNAL|VMA_UNMAPPED|VMA_TAINTED|VMA_CP), type, name, vma->offset, vma->flags & VMA_INTERNAL ? " (internal)" : "", vma->flags & VMA_UNMAPPED ? " (unmapped)" : "", vma->comment[0] ? " comment=" : "", vma->comment[0] ? vma->comment : ""); } }