Browse Source

Cleaning up and rewriting VMA bookkeeping code (#183)

1. Redesign of the VMA bookkeeping logic in the library OS
2. ASLR reimplementation
3. Support MAP_32BITS flags for mmap()
4. Safeguarding library OS internal memory from user memory and checkpoint buffers
5. Eliminating race conditions at VMA lookup and bookkeeping
6. Enable early VMA bookkeeping during initialization
7. Adding documentation for the VMA implementation
Chia-Che Tsai 4 years ago
parent
commit
f8bf469018
73 changed files with 2094 additions and 1945 deletions
  1. 4 4
      LibOS/shim/include/shim_checkpoint.h
  2. 12 0
      LibOS/shim/include/shim_defs.h
  3. 8 6
      LibOS/shim/include/shim_fs.h
  4. 3 3
      LibOS/shim/include/shim_internal.h
  5. 3 3
      LibOS/shim/include/shim_ipc.h
  6. 2 1
      LibOS/shim/include/shim_signal.h
  7. 0 9
      LibOS/shim/include/shim_thread.h
  8. 1 1
      LibOS/shim/include/shim_types.h
  9. 25 21
      LibOS/shim/include/shim_utils.h
  10. 67 22
      LibOS/shim/include/shim_vma.h
  11. 26 27
      LibOS/shim/src/bookkeep/shim_handle.c
  12. 10 18
      LibOS/shim/src/bookkeep/shim_signal.c
  13. 6 8
      LibOS/shim/src/bookkeep/shim_thread.c
  14. 721 862
      LibOS/shim/src/bookkeep/shim_vma.c
  15. 11 19
      LibOS/shim/src/elf/shim_rtld.c
  16. 122 95
      LibOS/shim/src/fs/chroot/fs.c
  17. 5 6
      LibOS/shim/src/fs/dev/fs.c
  18. 3 4
      LibOS/shim/src/fs/proc/info.c
  19. 90 21
      LibOS/shim/src/fs/proc/thread.c
  20. 1 0
      LibOS/shim/src/fs/shim_dcache.c
  21. 71 46
      LibOS/shim/src/fs/shim_fs.c
  22. 2 2
      LibOS/shim/src/fs/shim_fs_hash.c
  23. 17 4
      LibOS/shim/src/fs/shim_namei.c
  24. 0 1
      LibOS/shim/src/fs/str/fs.c
  25. 1 2
      LibOS/shim/src/ipc/shim_ipc.c
  26. 3 7
      LibOS/shim/src/ipc/shim_ipc_nsimpl.h
  27. 3 3
      LibOS/shim/src/ipc/shim_ipc_pid.c
  28. 2 1
      LibOS/shim/src/ipc/shim_ipc_sysv.c
  29. 1 1
      LibOS/shim/src/shim-debug.map
  30. 178 88
      LibOS/shim/src/shim_checkpoint.c
  31. 6 0
      LibOS/shim/src/shim_debug.c
  32. 105 81
      LibOS/shim/src/shim_init.c
  33. 68 187
      LibOS/shim/src/shim_malloc.c
  34. 6 25
      LibOS/shim/src/shim_random.c
  35. 0 17
      LibOS/shim/src/shim_syscalls.c
  36. 62 31
      LibOS/shim/src/sys/shim_brk.c
  37. 8 12
      LibOS/shim/src/sys/shim_clone.c
  38. 48 3
      LibOS/shim/src/sys/shim_exec.c
  39. 1 1
      LibOS/shim/src/sys/shim_migrate.c
  40. 95 74
      LibOS/shim/src/sys/shim_mmap.c
  41. 8 5
      LibOS/shim/src/sys/shim_open.c
  42. 1 1
      LibOS/shim/src/utils/strobjs.c
  43. 5 0
      LibOS/shim/test/apps/lmbench/Makefile.lmbench
  44. 4 0
      LibOS/shim/test/apps/lmbench/hello.manifest.template
  45. 7 9
      LibOS/shim/test/apps/lmbench/sh.manifest.template
  46. 1 1
      LibOS/shim/test/apps/ltp/fetch.py
  47. 4 4
      LibOS/shim/test/regression/Makefile
  48. 1 1
      Pal/lib/api.h
  49. 0 1
      Pal/lib/assert.h
  50. 2 2
      Pal/lib/graphene/config.c
  51. 41 26
      Pal/lib/list.h
  52. 35 12
      Pal/lib/memmgr.h
  53. 97 44
      Pal/lib/slabmgr.h
  54. 6 6
      Pal/regression/Makefile
  55. 5 5
      Pal/src/db_main.c
  56. 3 3
      Pal/src/db_rtld.c
  57. 1 1
      Pal/src/db_streams.c
  58. 2 2
      Pal/src/host/FreeBSD/db_files.c
  59. 13 34
      Pal/src/host/FreeBSD/db_misc.c
  60. 2 2
      Pal/src/host/FreeBSD/db_streams.c
  61. 6 1
      Pal/src/host/Linux-SGX/db_files.c
  62. 1 1
      Pal/src/host/Linux-SGX/db_mutex.c
  63. 2 2
      Pal/src/host/Linux-SGX/db_streams.c
  64. 1 2
      Pal/src/host/Linux-SGX/enclave_framework.c
  65. 8 8
      Pal/src/host/Linux-SGX/enclave_ocalls.c
  66. 12 6
      Pal/src/host/Linux-SGX/enclave_pages.c
  67. 9 37
      Pal/src/host/Linux-SGX/sgx_main.c
  68. 2 2
      Pal/src/host/Linux/db_files.c
  69. 1 1
      Pal/src/host/Linux/db_sockets.c
  70. 2 2
      Pal/src/host/Linux/db_streams.c
  71. 1 1
      Pal/src/pal_internal.h
  72. 13 6
      Pal/src/slab.c
  73. 1 1
      Runtime/Makefile

+ 4 - 4
LibOS/shim/include/shim_checkpoint.h

@@ -92,7 +92,7 @@ struct shim_cp_entry
 struct shim_mem_entry {
     struct shim_mem_entry * prev;
     void * addr;
-    int size;
+    size_t size;
     void ** paddr;
     int prot;
     void * data;
@@ -118,7 +118,7 @@ struct shim_cp_store {
     struct shim_handle * cp_file;
 
     /* allocation method for check point area */
-    void * (*alloc) (struct shim_cp_store * store, void * mem, int size);
+    void * (*alloc) (struct shim_cp_store *, void *, size_t);
 
     /* check point area */
     ptr_t base, offset, bound;
@@ -179,8 +179,8 @@ enum {
                 new_bound *= 2;                                     \
                                                                     \
             void * buf = store->alloc(store,                        \
-                                      (void *) store->base + store->bound, \
-                                      new_bound - store->bound);    \
+                            (void *) store->base + store->bound,    \
+                            new_bound - store->bound);              \
             if (!buf)                                               \
                 return -ENOMEM;                                     \
                                                                     \

+ 12 - 0
LibOS/shim/include/shim_defs.h

@@ -8,6 +8,14 @@
 
 #define HASH_GIPC                   0
 
+/*
+ * If enable CPSTORE_DERANDOMIZATION, the library OS will try to
+ * load the checkpoint (either from the parent or a file) at the
+ * exact address it was created. Currently this option is disabled
+ * to prevent internal fragmentation of virtual memory space.
+ */
+#define CPSTORE_DERANDOMIZATION     0
+
 #define DEFAULT_HEAP_MIN_SIZE       (256 * 1024 * 1024) /* 256MB */
 #define DEFAULT_MEM_MAX_NPAGES      (1024 * 1024)       /* 4GB */
 #define DEFAULT_BRK_MAX_SIZE        (256 * 1024)        /* 256KB */
@@ -17,8 +25,12 @@
 
 #define EXECVE_RTLD                 1
 
+#define ENABLE_ASLR                 1
+
 /* debug message printout */
 #define DEBUGBUF_SIZE               256
 #define DEBUGBUF_BREAK              0
 
+#define DEFAULT_VMA_COUNT           64
+
 #endif /* _SHIM_DEFS_H_ */

+ 8 - 6
LibOS/shim/include/shim_fs.h

@@ -26,6 +26,8 @@
 #ifndef _SHIM_FS_H_
 #define _SHIM_FS_H_
 
+#include <stdbool.h>
+
 #include <shim_types.h>
 #include <shim_defs.h>
 #include <shim_handle.h>
@@ -229,7 +231,7 @@ struct shim_d_ops {
 
 DEFINE_LIST(shim_mount);
 struct shim_mount {
-    char type[8];
+    char type[8];  // Null-terminated.
 
     struct shim_dentry * mount_point;
 
@@ -310,7 +312,7 @@ const char * get_file_name (const char * path, size_t len);
 /* file system operations */
 int mount_fs (const char * mount_type, const char * mount_uri,
               const char * mount_point, struct shim_dentry *parent,
-              struct shim_dentry **dentp, int make_ancestor);
+              struct shim_dentry **dentp, bool make_ancestor);
 int unmount_fs (const char * mount_point);
 int search_builtin_fs (const char * type, struct shim_mount ** fs);
 
@@ -374,7 +376,7 @@ int lookup_dentry (struct shim_dentry * parent, const char * name, int namelen,
  */
 int __path_lookupat (struct shim_dentry * start, const char * path, int flags,
                      struct shim_dentry ** dent, int link_depth,
-                     struct shim_mount *fs, int make_ancestor);
+                     struct shim_mount *fs, bool make_ancestor);
 
 /* Just wraps __path_lookupat, but also acquires and releases the dcache_lock.
  */
@@ -438,7 +440,7 @@ void get_dentry (struct shim_dentry * dent);
 /* Decrement the reference count on dent */
 void put_dentry (struct shim_dentry * dent);
 
-static_inline
+static_always_inline
 void fast_pathcpy (char * dst, const char * src, int size, char ** ptr)
 {
     char * d = dst;
@@ -448,7 +450,7 @@ void fast_pathcpy (char * dst, const char * src, int size, char ** ptr)
     *ptr = d;
 }
 
-static_inline
+static_always_inline
 char * dentry_get_path (struct shim_dentry * dent, bool on_stack,
                         int * sizeptr)
 {
@@ -491,7 +493,7 @@ char * dentry_get_path (struct shim_dentry * dent, bool on_stack,
     return buffer;
 }
 
-static inline __attribute__((always_inline))
+static_always_inline
 const char * dentry_get_name (struct shim_dentry * dent)
 {
     return qstrgetstr(&dent->name);

+ 3 - 3
LibOS/shim/include/shim_internal.h

@@ -35,7 +35,7 @@
 #define extern_alias(name) \
     extern __typeof(name) shim_##name __attribute ((alias (alias_str(name))))
 
-#define static_inline static inline __attribute__((always_inline))
+#define static_always_inline static inline __attribute__((always_inline))
 
 #include <shim_types.h>
 #include <shim_defs.h>
@@ -555,11 +555,11 @@ extern LOCKTYPE __master_lock;
 # define master_lock()                                              \
     do {                                                            \
         lock(__master_lock);                                        \
-        pal_printf("maste lock " __FILE__ ":%d\n", __LINE__);       \
+        pal_printf("master lock " __FILE__ ":%d\n", __LINE__);       \
     } while (0)
 # define master_unlock()                                            \
     do {                                                            \
-        pal_printf("maste unlock " __FILE__ ":%d\n", __LINE__);     \
+        pal_printf("master unlock " __FILE__ ":%d\n", __LINE__);     \
         unlock(__master_lock);                                      \
     } while (0)
 #else

+ 3 - 3
LibOS/shim/include/shim_ipc.h

@@ -540,7 +540,7 @@ struct shim_ipc_info * discover_client (struct shim_ipc_port * port,
 int __init_ipc_msg (struct shim_ipc_msg * msg, int code, int size, IDTYPE dest);
 struct shim_ipc_msg * create_ipc_msg (int code, int size, IDTYPE dest);
 
-static_inline
+static_always_inline
 struct shim_ipc_msg * create_ipc_msg_on_stack (int code, int size, IDTYPE dest)
 {
     struct shim_ipc_msg * msg = __alloca(IPC_MSG_SIZE(size));
@@ -553,7 +553,7 @@ int __init_ipc_msg_duplex (struct shim_ipc_msg_obj * msg, int code, int size,
 struct shim_ipc_msg_obj *
 create_ipc_msg_duplex (int code, int size, IDTYPE dest);
 
-static_inline
+static_always_inline
 struct shim_ipc_msg_obj *
 create_ipc_msg_duplex_on_stack (int code, int size, IDTYPE dest)
 {
@@ -568,7 +568,7 @@ int __init_ipc_resp_msg (struct shim_ipc_msg * resp, int ret,
 struct shim_ipc_msg *
 create_ipc_resp_msg (int ret, IDTYPE dest, unsigned long seq);
 
-static_inline
+static_always_inline
 struct shim_ipc_msg *
 create_ipc_resp_msg_on_stack (int ret, IDTYPE dest, unsigned long seq)
 {

+ 2 - 1
LibOS/shim/include/shim_signal.h

@@ -111,7 +111,8 @@ struct shim_signal_log {
 
 extern const char * const siglist[NUM_KNOWN_SIGS + 1];
 
-static_inline const char * signal_name (int sig)
+static_always_inline
+const char * signal_name (int sig)
 {
     if (sig <= NUM_KNOWN_SIGS)
         return siglist[sig];

+ 0 - 9
LibOS/shim/include/shim_thread.h

@@ -88,9 +88,6 @@ struct shim_thread {
     bool user_tcb; /* is tcb assigned by user? */
     void * frameptr;
 
-    /* to save vma bookkeeping */
-    struct { void * addr; uint64_t length; } delayed_bkeep_mmap;
-
     REFTYPE ref_count;
     LOCKTYPE lock;
 
@@ -172,12 +169,6 @@ void set_cur_thread (struct shim_thread * thread)
     shim_tcb_t * tcb = SHIM_GET_TLS();
     IDTYPE tid = 0;
 
-#ifndef container_of
-# define container_of(ptr, type, member) ({                 \
-    const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
-    (type *)( (char *)__mptr - offsetof(type,member) );})
-#endif
-
     if (thread) {
         if (tcb->tp && tcb->tp != thread)
             put_thread(tcb->tp);

+ 1 - 1
LibOS/shim/include/shim_types.h

@@ -483,7 +483,7 @@ struct shim_str {
 
 #define QSTR_SIZE   32
 
-/* Use qstr for names. This has fix size string + string object
+/* Use qstr for names. This has fixed size string + string object
  * if len > SHIM_QSTR_SIZE then use overflow string */
 struct shim_qstr {
     HASHTYPE    hash;

+ 25 - 21
LibOS/shim/include/shim_utils.h

@@ -21,8 +21,8 @@
  * shim_utils.h
  */
 
-#ifndef _SHIM_UTILITIES_H_
-#define _SHIM_UTILITIES_H_
+#ifndef _SHIM_UTILS_H_
+#define _SHIM_UTILS_H_
 
 #include <shim_handle.h>
 
@@ -72,8 +72,11 @@ static inline char * qstrsetstr (struct shim_qstr * qstr,
     char * buf = qstr->name;
 
     if (size >= QSTR_SIZE) {
-        if (!qstr->oflow)
+        if (!qstr->oflow) {
             qstr->oflow = get_str_obj();
+            if (!qstr->oflow)
+                return NULL;
+        }
         buf = qstr->oflow->str;
     } else {
         if (qstr->oflow) {
@@ -82,13 +85,9 @@ static inline char * qstrsetstr (struct shim_qstr * qstr,
         }
     }
 
-    qstr->len = 0;
-    if (str) {
-        if (size)
-            memcpy(buf, str, size);
-        buf[size] = 0;
-        qstr->len = size;
-    }
+    memcpy(buf, str, size);
+    buf[size] = 0;
+    qstr->len = size;
 
     return buf;
 }
@@ -108,8 +107,12 @@ static inline char * qstrsetstrs (struct shim_qstr * qstr,
     char * buf = qstr->name;
 
     if (total_size >= QSTR_SIZE) {
-        if (!qstr->oflow)
+        if (!qstr->oflow) {
+            // TODO: alloc proper size.
             qstr->oflow = get_str_obj();
+            if (!qstr->oflow)
+                return NULL;
+        }
         buf = qstr->oflow->str;
     }
 
@@ -153,21 +156,23 @@ static inline int qstrcmpstr (const struct shim_qstr * qstr,
 
 /* heap allocation functions */
 int init_slab (void);
+
 #if defined(SLAB_DEBUG_PRINT) || defined(SLAB_DEBUG_TRACE)
 void * __malloc_debug (size_t size, const char * file, int line);
 #define malloc(size) __malloc_debug((size), __FILE__, __LINE__)
 void __free_debug (void * mem, const char * file, int line);
 #define free(mem) __free_debug((mem), __FILE__, __LINE__)
-void * __remalloc_debug (const void * mem, size_t size,
-                         const char * file, int line);
-#define remalloc(mem, size) __remalloc_debug((mem), (size), __FILE__, __LINE__)
+void * __malloc_copy_debug (const void * mem, size_t size,
+                             const char * file, int line);
+#define malloc_copy(mem, size) __malloc_copy_debug((mem), (size), __FILE__, __LINE__)
 #else
 void * malloc (size_t size);
 void free (void * mem);
-void * remalloc (const void * mem, size_t size);
+void * malloc_copy (const void * mem, size_t size);
 #endif
 
-static_inline char * qstrtostr (struct shim_qstr * qstr, bool on_stack)
+static_always_inline
+char * qstrtostr (struct shim_qstr * qstr, bool on_stack)
 {
     int len = qstr->len;
     char * buf = on_stack ? __alloca(len + 1) : malloc(len + 1);
@@ -175,8 +180,7 @@ static_inline char * qstrtostr (struct shim_qstr * qstr, bool on_stack)
     if (!buf)
         return NULL;
 
-    if (len)
-        memcpy(buf, qstrgetstr(qstr), len);
+    memcpy(buf, qstrgetstr(qstr), len);
 
     buf[len] = 0;
     return buf;
@@ -204,8 +208,8 @@ void md5_final (struct shim_md5_ctx * mdContext);
 /* prompt user for confirmation */
 int message_confirm (const char * message, const char * options);
 
-/* get random number */
-int getrand (void * buffer, size_t size);
+/* get random bytes (not for crypto!) */
+void getrand (void * buffer, size_t size);
 
 /* ELF binary loading */
 int check_elf_object (struct shim_handle * file);
@@ -243,4 +247,4 @@ int terminate_async_helper (void);
 
 extern struct config_store * root_config;
 
-#endif /* _SHIM_UTILITIES_H */
+#endif /* _SHIM_UTILS_H */

+ 67 - 22
LibOS/shim/include/shim_vma.h

@@ -31,6 +31,7 @@
 #include <shim_handle.h>
 
 #include <pal.h>
+#include <api.h>
 #include <list.h>
 
 #include <asm/mman.h>
@@ -39,28 +40,52 @@ struct shim_handle;
 
 #define VMA_COMMENT_LEN     16
 
-DEFINE_LIST(shim_vma);
-struct shim_vma {
-    REFTYPE                 ref_count;
+/*
+ * struct shim_vma_val is the published version of struct shim_vma
+ * (struct shim_vma is defined in bookkeep/shim_vma.c).
+ */
+struct shim_vma_val {
     void *                  addr;
     uint64_t                length;
     int                     prot;
     int                     flags;
     uint64_t                offset;
     struct shim_handle *    file;
-    LIST_TYPE(shim_vma)     list;
     char                    comment[VMA_COMMENT_LEN];
 };
 
+static inline
+void free_vma_val_array (struct shim_vma_val * vmas, size_t count)
+{
+    for (int i = 0 ; i < count ; i++) {
+        /* need to release the file handle */
+        if (vmas[i].file)
+            put_handle(vmas[i].file);
+    }
+
+    free(vmas);
+}
+
 /* an additional flag */
 #define VMA_UNMAPPED 0x10000000   /* vma is kept for bookkeeping, but the
                                      memory is not actually allocated */
-#define VMA_INTERNAL 0x20000000
+#define VMA_INTERNAL 0x20000000   /* vma is used internally */
 
 #define VMA_TAINTED  0x40000000   /* vma has been protected as writeable,
                                      so it has to be checkpointed during
                                      migration */
 
+#define VMA_CP       0x80000000   /* vma is used for dumping checkpoint
+                                     data */
+
+#define VMA_TYPE(flags)     ((flags) & (VMA_INTERNAL | VMA_CP))
+
+/*
+ * We distinguish checkpoint VMAs from user VMAs and other internal VMAs,
+ * to prevent corrupting internal data when creating processes.
+ */
+#define CP_VMA_FLAGS  (MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL|VMA_CP)
+
 #define NEED_MIGRATE_MEMORY(vma)                                \
         (((vma)->flags & VMA_TAINTED || !(vma)->file) &&        \
         !((vma)->flags & VMA_UNMAPPED))
@@ -90,8 +115,10 @@ static inline PAL_FLG PAL_PROT (int prot, int flags)
 int init_vma (void);
 
 /* Bookkeeping mmap() system call */
-int bkeep_mmap (void * addr, uint64_t length, int prot, int flags,
-                struct shim_handle * file, uint64_t offset, const char * comment);
+int bkeep_mmap (void * addr, uint64_t length,
+                int prot, int flags,
+                struct shim_handle * file, uint64_t offset,
+                const char * comment);
 
 /* Bookkeeping munmap() system call */
 int bkeep_munmap (void * addr, uint64_t length, int flags);
@@ -99,30 +126,48 @@ int bkeep_munmap (void * addr, uint64_t length, int flags);
 /* Bookkeeping mprotect() system call */
 int bkeep_mprotect (void * addr, uint64_t length, int prot, int flags);
 
-/* Get vma bookkeeping handle */
-void get_vma (struct shim_vma * vma);
-void put_vma (struct shim_vma * vma);
-
-/* Returns 0 on success, -E* on failure.
-   Calls `get_vma` on the result before returning it.
-*/
-int lookup_supervma (const void * addr, uint64_t len, struct shim_vma ** vma);
-int lookup_overlap_vma (const void * addr, uint64_t len, struct shim_vma ** vma);
+/* Looking up VMA that contains [addr, length) */
+int lookup_vma (void * addr, struct shim_vma_val * vma);
 
-struct shim_vma * next_vma (struct shim_vma * vma);
+/* Looking up VMA that overlaps with [addr, length) */
+int lookup_overlap_vma (void * addr, uint64_t length,
+                        struct shim_vma_val * vma);
 
-void * get_unmapped_vma (uint64_t len, int flags);
-void * get_unmapped_vma_for_cp (uint64_t len);
+/*
+ * Looking for an unmapped space and then adding the corresponding bookkeeping
+ * (more info in bookkeep/shim_vma.c).
+ *
+ * Note: the first argument is "top_addr" because the search is top-down.
+ */
+void * bkeep_unmapped (void * top_addr, void * bottom_addr, uint64_t length,
+                       int prot, int flags, struct shim_handle * file,
+                       uint64_t offset, const char * comment);
+
+static inline void *
+bkeep_unmapped_any (uint64_t length, int prot, int flags,
+                    struct shim_handle * file, uint64_t offset,
+                    const char * comment)
+{
+    return bkeep_unmapped(PAL_CB(user_address.end),
+                          PAL_CB(user_address.start),
+                          length, prot, flags, file, offset, comment);
+}
 
-int dump_all_vmas (struct shim_thread * thread, char * buf, uint64_t size);
+void * bkeep_unmapped_heap (uint64_t length, int prot, int flags,
+                            struct shim_handle * file, uint64_t offset,
+                            const char * comment);
 
-void unmap_all_vmas (void);
+/*
+ * Dumping all *non-internal* VMAs into a user-allocated buffer ("max_count" is
+ * the maximal number of entries in the buffer). Return number of filled entries
+ * if succeeded, or -EOVERFLOW if the buffer is too small.
+ */
+int dump_all_vmas (struct shim_vma_val * vmas, size_t max_count);
 
 /* Debugging */
 void debug_print_vma_list (void);
 
 /* Constants */
-extern unsigned long mem_max_npages;
 extern unsigned long brk_max_size;
 extern unsigned long sys_stack_size;
 

+ 26 - 27
LibOS/shim/src/bookkeep/shim_handle.c

@@ -424,7 +424,7 @@ extend:
         ret = fd;
 out:
     unlock(handle_map->lock);
-    return fd;
+    return ret;
 }
 
 void flush_handle (struct shim_handle * hdl)
@@ -603,23 +603,18 @@ void dup_fd_handle (struct shim_handle_map * map,
 static struct shim_handle_map * get_new_handle_map (FDTYPE size)
 {
     struct shim_handle_map * handle_map =
-                    malloc(sizeof(struct shim_handle_map));
+        calloc(1, sizeof(struct shim_handle_map));
 
-    if (handle_map == NULL)
+    if (!handle_map)
         return NULL;
 
-    memset(handle_map, 0, sizeof(struct shim_handle_map));
-
-    handle_map->map = malloc(sizeof(struct shim_fd_handle) * size);
+    handle_map->map = calloc(size, sizeof(struct shim_fd_handle));
 
-    if (handle_map->map == NULL) {
+    if (!handle_map->map) {
         free(handle_map);
         return NULL;
     }
 
-    memset(handle_map->map, 0,
-           sizeof(struct shim_fd_handle) * size);
-
     handle_map->fd_top  = FD_NULL;
     handle_map->fd_size = size;
     create_lock(handle_map->lock);
@@ -631,25 +626,19 @@ static struct shim_handle_map * __enlarge_handle_map
                      (struct shim_handle_map * map, FDTYPE size)
 {
     if (size <= map->fd_size)
-        return NULL;
+        return map;
 
-    struct shim_fd_handle ** old_map = map->map;
+    struct shim_fd_handle ** new_map = calloc(size, sizeof(new_map[0]));
 
-    map->map = malloc(sizeof(struct shim_fd_handle *) * size);
-
-    if (map->map == NULL) {
-        map->map = old_map;
+    if (!new_map)
         return NULL;
-    }
 
-    size_t copy_size = sizeof(struct shim_fd_handle *) * map->fd_size;
+    memcpy(new_map, map->map, map->fd_size * sizeof(new_map[0]));
+    memset(new_map + map->fd_size, 0,
+           (size - map->fd_size) * sizeof(new_map[0]));
+    free(map->map);
+    map->map = new_map;
     map->fd_size = size;
-    memset(map->map, 0, sizeof(struct shim_fd_handle *) * size);
-    if (old_map) {
-        if (copy_size)
-            memcpy(map->map, old_map, copy_size);
-        free(old_map);
-    }
     return map;
 }
 
@@ -668,7 +657,7 @@ int dup_handle_map (struct shim_handle_map ** new,
     if (old_map->fd_top == FD_NULL)
         goto done;
 
-    for (int i = 0 ; i <= old_map->fd_top ; i++) {
+    for (int i = 0; i <= old_map->fd_top; i++) {
         struct shim_fd_handle * fd_old = old_map->map[i];
         struct shim_fd_handle * fd_new;
 
@@ -678,8 +667,19 @@ int dup_handle_map (struct shim_handle_map ** new,
             /* first, get the handle to prevent it from being deleted */
             struct shim_handle * hdl = fd_old->handle;
             open_handle(hdl);
-            /* DP: I assume we really need a deep copy of the handle map? */
+
             fd_new = malloc(sizeof(struct shim_fd_handle));
+            if (!fd_new) {
+                for (int j = 0; j < i; j++) {
+                    close_handle(new_map->map[j]->handle);
+                    free(new_map->map[j]);
+                }
+                unlock(old_map->lock);
+                *new = NULL;
+                return -ENOMEM;
+            }
+
+            /* DP: I assume we really need a deep copy of the handle map? */
             new_map->map[i] = fd_new;
             fd_new->vfd    = fd_old->vfd;
             fd_new->handle = hdl;
@@ -690,7 +690,6 @@ int dup_handle_map (struct shim_handle_map ** new,
 done:
     unlock(old_map->lock);
     *new = new_map;
-
     return 0;
 }
 

+ 10 - 18
LibOS/shim/src/bookkeep/shim_signal.c

@@ -173,7 +173,7 @@ void deliver_signal (siginfo_t * info, PAL_CONTEXT * context)
 
 delay:
     {
-        if (!(signal = remalloc(signal,sizeof(struct shim_signal))))
+        if (!(signal = malloc_copy(signal,sizeof(struct shim_signal))))
             goto out;
 
         struct shim_signal ** signal_log = allocate_signal_log(cur_thread, sig);
@@ -256,24 +256,23 @@ internal:
     if (context)
         debug("memory fault at %p (IP = %p)\n", arg, context->IP);
 
-    struct shim_vma * vma = NULL;
+    struct shim_vma_val vma;
     int signo = SIGSEGV;
     int code;
     if (!arg) {
         code = SEGV_MAPERR;
-    } else if (!lookup_supervma((void *) arg, 0, &vma)) {
-        if (vma->flags & VMA_INTERNAL) {
-            put_vma(vma);
+    } else if (!lookup_vma((void *) arg, &vma)) {
+        if (vma.flags & VMA_INTERNAL) {
             goto internal;
         }
-        if (vma->file && vma->file->type == TYPE_FILE) {
+        if (vma.file && vma.file->type == TYPE_FILE) {
             /* DEP 3/3/17: If the mapping exceeds end of a file (but is in the VMA)
              * then return a SIGBUS. */
-            uint64_t eof_in_vma = (uint64_t) vma->addr + vma->offset + vma->file->info.file.size;
+            uint64_t eof_in_vma = (uint64_t) vma.addr + vma.offset + vma.file->info.file.size;
             if (arg > eof_in_vma) {
                 signo = SIGBUS;
                 code = BUS_ADRERR;
-            } else if ((context->err & 4) && !(vma->flags & PROT_WRITE)) {
+            } else if ((context->err & 4) && !(vma.flags & PROT_WRITE)) {
                 /* DEP 3/3/17: If the page fault gives a write error, and
                  * the VMA is read-only, return SIGSEGV+SEGV_ACCERR */
                 signo = SIGSEGV;
@@ -286,7 +285,6 @@ internal:
         } else {
             code = SEGV_ACCERR;
         }
-        put_vma(vma);
     } else {
         code = SEGV_MAPERR;
     }
@@ -306,21 +304,15 @@ internal:
         goto ret_exception;
     }
 
-    struct shim_vma * vma = NULL;
+    struct shim_vma_val vma;
 
-    if (!(lookup_supervma((void *) arg, 0, &vma)) &&
-        !(vma->flags & VMA_INTERNAL)) {
+    if (!(lookup_vma((void *) arg, &vma)) &&
+        !(vma.flags & VMA_INTERNAL)) {
         if (context)
             debug("illegal instruction at %p\n", context->IP);
 
-        if (vma)
-            put_vma(vma);
-
         deliver_signal(ALLOC_SIGINFO(SIGILL, ILL_ILLOPC, si_addr, (void *) arg), context);
     } else {
-        if (vma)
-            put_vma(vma);
-
         goto internal;
     }
 

+ 6 - 8
LibOS/shim/src/bookkeep/shim_thread.c

@@ -29,6 +29,7 @@
 #include <shim_vma.h>
 #include <shim_fs.h>
 #include <shim_checkpoint.h>
+#include <shim_utils.h>
 
 #include <pal.h>
 #include <list.h>
@@ -152,11 +153,10 @@ static IDTYPE get_internal_pid (void)
 
 struct shim_thread * alloc_new_thread (void)
 {
-    struct shim_thread * thread = malloc(sizeof(struct shim_thread));
+    struct shim_thread * thread = calloc(1, sizeof(struct shim_thread));
     if (!thread)
         return NULL;
 
-    memset(thread, 0, sizeof(struct shim_thread));
     REF_SET(thread->ref_count, 1);
     INIT_LISTP(&thread->children);
     INIT_LIST_HEAD(thread, siblings);
@@ -204,8 +204,8 @@ struct shim_thread * get_new_thread (IDTYPE new_tid)
                 continue;
 
             thread->signal_handles[i].action =
-                    remalloc(cur_thread->signal_handles[i].action,
-                             sizeof(struct shim_signal_handle));
+                    malloc_copy(cur_thread->signal_handles[i].action,
+                                sizeof(struct shim_signal_handle));
         }
 
         memcpy(&thread->signal_mask, &cur_thread->signal_mask,
@@ -340,10 +340,8 @@ void put_thread (struct shim_thread * thread)
             DkObjectClose(thread->child_exit_event);
         destroy_lock(thread->lock);
 
-        if (MEMORY_MIGRATED(thread))
-            memset(thread, 0, sizeof(struct shim_thread));
-        else
-            free(thread);
+        free(thread->signal_logs);
+        free(thread);
     }
 }
 

File diff suppressed because it is too large
+ 721 - 862
LibOS/shim/src/bookkeep/shim_vma.c


+ 11 - 19
LibOS/shim/src/elf/shim_rtld.c

@@ -187,13 +187,13 @@ static int protect_page (struct link_map * l, void * addr, size_t size)
     }
 
     if ((prot & (PROT_READ|PROT_WRITE)) == (PROT_READ|PROT_WRITE)) {
-        struct shim_vma * vma = NULL;
+        struct shim_vma_val vma;
+
         /* the actual protection of the vma might be changed */
-        if (lookup_supervma(addr, size, &vma) < 0)
+        if (lookup_vma(addr, &vma) < 0)
             return 0;
 
-        prot = vma->prot;
-        put_vma(vma);
+        prot = vma.prot;
 
         if ((prot & (PROT_READ|PROT_WRITE)) == (PROT_READ|PROT_WRITE))
             return 0;
@@ -522,8 +522,11 @@ call_lose:
             if (addr)
                 mappref = (ElfW(Addr)) c->mapstart + (ElfW(Addr)) addr;
             else
-                mappref = (ElfW(Addr)) get_unmapped_vma(ALIGN_UP(maplength),
-                                            MAP_PRIVATE|MAP_ANONYMOUS);
+                mappref = (ElfW(Addr))
+                    bkeep_unmapped_heap(ALIGN_UP(maplength), c->prot,
+                                        c->flags|MAP_PRIVATE|
+                                        (type == OBJECT_INTERNAL ? VMA_INTERNAL : 0),
+                                        file, c->mapoff, NULL);
 
             /* Remember which part of the address space this object uses.  */
             errval = (*mmap) (file, (void **) &mappref, ALIGN_UP(maplength),
@@ -540,17 +543,6 @@ map_error:
 
         l->l_map_start = mappref;
         l->l_map_end = l->l_map_start + maplength;
-
-#if BOOKKEEP_INTERNAL_OBJ == 0
-        if (type != OBJECT_INTERNAL && type != OBJECT_USER)
-#else
-        if (type != OBJECT_USER)
-#endif
-            bkeep_mmap((void *) mappref, ALIGN_UP(maplength), c->prot,
-                       c->flags|MAP_PRIVATE|
-                       (type == OBJECT_INTERNAL ? VMA_INTERNAL : 0),
-                       file, c->mapoff, NULL);
-
         l->l_addr = l->l_map_start - c->mapstart;
 
         if (has_holes) {
@@ -693,7 +685,7 @@ postmap:
         }
     } else {
         l->l_real_ld = (ElfW(Dyn) *) RELOCATE(l, l->l_ld);
-        l->l_ld = remalloc(l->l_real_ld, sizeof(ElfW(Dyn)) * l->l_ldnum);
+        l->l_ld = malloc_copy(l->l_real_ld, sizeof(ElfW(Dyn)) * l->l_ldnum);
     }
 
     elf_get_dynamic_info(l);
@@ -704,7 +696,7 @@ postmap:
         /* DEP 3/12/18: This string is not stable; copy it. */
         char * tmp = (char *) (D_PTR (l->l_info[DT_STRTAB])
                               + D_PTR (l->l_info[DT_SONAME]));
-        l->l_soname = remalloc(tmp, strlen(tmp) + 1);
+        l->l_soname = malloc_copy(tmp, strlen(tmp) + 1);
     }
 
     if (l->l_phdr == NULL) {

+ 122 - 95
LibOS/shim/src/fs/chroot/fs.c

@@ -147,12 +147,10 @@ static inline int concat_uri (char * buffer, int size, int type,
    handle is not linked to a dentry */
 static struct shim_file_data * __create_data (void)
 {
-    struct shim_file_data * data = malloc(sizeof(struct shim_file_data));
-
+    struct shim_file_data * data = calloc(1, sizeof(struct shim_file_data));
     if (!data)
         return NULL;
 
-    memset(data, 0, sizeof(struct shim_file_data));
     create_lock(data->lock);
     return data;
 }
@@ -251,7 +249,7 @@ static int __query_attr (struct shim_dentry * dent,
                 return ret;
             }
         }
-        
+
         /* DEP 3/18/17: If we have a directory, we need to find out how many
          * children it has by hand. */
         /* XXX: Keep coherent with rmdir/mkdir/creat, etc */
@@ -264,7 +262,6 @@ static int __query_attr (struct shim_dentry * dent,
             for (d = dbuf; d; d = d->next)
                 nlink++;
             free(dbuf);
-            debug("Querying a directory; I count %d links.\n", nlink);
         } else
             nlink = 2; // Educated guess...
         data->nlink = nlink;
@@ -274,7 +271,7 @@ static int __query_attr (struct shim_dentry * dent,
          */ 
         data->nlink = 1;
     }
-    
+
     data->queried = true;
 
     return 0;
@@ -351,7 +348,6 @@ static int query_dentry (struct shim_dentry * dent, PAL_HANDLE pal_handle,
         stat->st_ctime  = (time_t) data->ctime;
         stat->st_nlink  = data->nlink;
 
-        
         switch (data->type) {
             case FILE_REGULAR:
                 stat->st_mode |= S_IFREG;
@@ -365,7 +361,6 @@ static int query_dentry (struct shim_dentry * dent, PAL_HANDLE pal_handle,
                 break;
             default:            break;
         }
-        debug("Stat: Returning link count %d\n", stat->st_nlink);
     }
 
     unlock(data->lock);
@@ -567,7 +562,6 @@ static int chroot_recreate (struct shim_handle * hdl)
     }
 
     /*
-     * Chia-Che Tsai 8/24/2017:
      * when recreating a file handle after migration, the file should
      * not be created again.
      */
@@ -623,7 +617,9 @@ static int chroot_flush (struct shim_handle * hdl)
 
         if (mapbuf) {
             DkStreamUnmap(mapbuf, mapsize);
-            bkeep_munmap(mapbuf, mapsize, VMA_INTERNAL);
+
+            if (bkeep_munmap(mapbuf, mapsize, VMA_INTERNAL) < 0)
+                bug();
         }
     }
 
@@ -640,7 +636,9 @@ static inline int __map_buffer (struct shim_handle * hdl, int size)
             return 0;
 
         DkStreamUnmap(file->mapbuf, file->mapsize);
-        bkeep_munmap(file->mapbuf, file->mapsize, VMA_INTERNAL);
+
+        if (bkeep_munmap(file->mapbuf, file->mapsize, VMA_INTERNAL) < 0)
+            bug();
 
         file->mapbuf    = NULL;
         file->mapoffset = 0;
@@ -648,23 +646,34 @@ static inline int __map_buffer (struct shim_handle * hdl, int size)
 
     /* second, reallocate the buffer */
     uint64_t bufsize = file->mapsize ? : FILE_BUFMAP_SIZE;
-    int prot = PAL_PROT_READ;
     uint64_t mapoff = file->marker & ~(bufsize - 1);
-    uint64_t maplen = bufsize;	
+    uint64_t maplen = bufsize;
+    int flags = MAP_FILE | MAP_PRIVATE | VMA_INTERNAL;
+    int prot = PROT_READ;
 
-    if (hdl->acc_mode & MAY_WRITE)
-        prot |= PAL_PROT_WRITE;
+    if (hdl->acc_mode & MAY_WRITE) {
+        flags = MAP_FILE | MAP_SHARED | VMA_INTERNAL;
+        prot |= PROT_WRITE;
+    }
 
     while (mapoff + maplen < file->marker + size)
         maplen *= 2;
 
-    void * mapbuf =
-        (void *) DkStreamMap(hdl->pal_handle, NULL, prot, mapoff, maplen);
+    /* create the bookkeeping before allocating the memory */
+    void * mapbuf = bkeep_unmapped_any(maplen, prot, flags, hdl, mapoff,
+                                       "filebuf");
     if (!mapbuf)
+        return -ENOMEM;
+
+    PAL_PTR mapped = DkStreamMap(hdl->pal_handle, mapbuf, PAL_PROT(prot, flags),
+                                 mapoff, maplen);
+
+    if (!mapped) {
+        bkeep_munmap(mapbuf, maplen, flags);
         return -PAL_ERRNO;
+    }
 
-    bkeep_mmap(mapbuf, maplen, prot, MAP_FILE|MAP_SHARED|VMA_INTERNAL,
-               hdl, mapoff, NULL);
+    assert((void *) mapped == mapbuf);
 
     file->mapbuf    = mapbuf;
     file->mapoffset = mapoff;
@@ -955,116 +964,133 @@ static int chroot_dput (struct shim_dentry * dent)
     return 0;
 }
 
-#define DEFAULT_DBUF_SIZE   1024
-
 static int chroot_readdir (struct shim_dentry * dent,
                            struct shim_dirent ** dirent)
 {
-    int ret;
     struct shim_file_data * data;
+    int ret;
+
     if ((ret = try_create_data(dent, NULL, 0, &data)) < 0)
         return ret;
 
     chroot_update_ino(dent);
+    const char * uri = qstrgetstr(&data->host_uri);
+    assert(strpartcmp_static(uri, "dir:"));
 
-    assert(strpartcmp_static(qstrgetstr(&data->host_uri), "dir:"));
-
-    PAL_HANDLE pal_hdl = DkStreamOpen(qstrgetstr(&data->host_uri),
-                                      PAL_ACCESS_RDONLY, 0, 0, 0);
+    PAL_HANDLE pal_hdl = DkStreamOpen(uri, PAL_ACCESS_RDONLY, 0, 0, 0);
     if (!pal_hdl)
         return -PAL_ERRNO;
 
-    int buf_size = 0, new_size = MAX_PATH;
-    int bytes;
-    char * buf = NULL, * new_buf;
-
-    int dbufsize = MAX_PATH;
-    struct shim_dirent * dbuf = malloc(dbufsize);
-    struct shim_dirent * d = dbuf, ** last = NULL;
-
-retry:
-    new_buf = __alloca(new_size);
-    if (buf)
-        memcpy(new_buf, buf, buf_size);
-    buf_size = new_size;
-    buf = new_buf;
-
-    while (1) {
-        bytes = DkStreamRead(pal_hdl, 0, buf_size, buf, NULL, 0);
+    size_t buf_size = MAX_PATH, bytes = 0;
+    char * buf = malloc(buf_size);
+    if (!buf) {
+        ret = -ENOMEM;
+        goto out_hdl;
+    }
 
-        if (bytes == 0) {
-            if (PAL_NATIVE_ERRNO == PAL_ERROR_ENDOFSTREAM)
-                break;
+    /*
+     * Try to read the directory list from the host. DkStreamRead
+     * does not accept offset for directory listing. Therefore, we retry
+     * several times if the buffer is not large enough.
+     */
+retry_read:
+    bytes = DkStreamRead(pal_hdl, 0, buf_size, buf, NULL, 0);
+    if (!bytes) {
+        ret = 0;
+        if (PAL_NATIVE_ERRNO == PAL_ERROR_ENDOFSTREAM)
+            goto out;
 
-            if (PAL_NATIVE_ERRNO == PAL_ERROR_OVERFLOW) {
-                new_size = buf_size * 2;
-                goto retry;
+        if (PAL_NATIVE_ERRNO == PAL_ERROR_OVERFLOW) {
+            char * new_buf = malloc(buf_size * 2);
+            if (!new_buf) {
+                ret = -ENOMEM;
+                goto out;
             }
 
-            ret = -PAL_ERRNO;
-            goto out;
+            free(buf);
+            buf_size *= 2;
+            buf = new_buf;
+            goto retry_read;
         }
 
-        char * b = buf, * next_b;
-        int blen;
-
-        while (b < buf + bytes) {
-            blen = strlen(b);
-            next_b = b + blen + 1;
-            bool isdir = false;
-
-            if (b[blen - 1] == '/') {
-                isdir = true;
-                b[blen - 1] = 0;
-                blen--;
-            }
+        ret = -PAL_ERRNO;
+        goto out;
+    }
 
-            int dsize = sizeof(struct shim_dirent) + blen + 1;
+    /* Now emitting the dirent data */
+    size_t dbuf_size = MAX_PATH;
+    struct shim_dirent * dbuf = malloc(dbuf_size);
+    if (!dbuf)
+        goto out;
 
-            if ((void *) d + dsize > (void *) dbuf + dbufsize) {
-                int newsize = dbufsize * 2;
-                while ((void *) d + dsize > (void *) dbuf + newsize)
-                    newsize *= 2;
+    struct shim_dirent * d = dbuf, ** last = NULL;
+    char * b = buf, * next_b;
+    int blen;
+
+    /* Scanning the directory names in the buffer */
+    while (b < buf + bytes) {
+        blen = strlen(b);
+        next_b = b + blen + 1;
+        bool isdir = false;
+
+        /* The PAL convention: if the name is ended with "/",
+           it is a directory. */
+        if (b[blen - 1] == '/') {
+            isdir = true;
+            b[blen - 1] = 0;
+            blen--;
+        }
 
-                struct shim_dirent * new_dbuf = malloc(newsize);
+        /* Populating a dirent */
+        int dsize = sizeof(struct shim_dirent) + blen + 1;
 
-                memcpy(new_dbuf, dbuf, (void *) d - (void *) dbuf);
-                struct shim_dirent * d1 = new_dbuf;
-                struct shim_dirent * d2 = dbuf;
-                while (d2 != d) {
-                    d1->next = (void *) d1 + ((void *) d2->next - (void *) d2);
-                    d1 = d1->next;
-                    d2 = d2->next;
-                }
+        /* dbuf is not large enough, reallocate the dirent buffer */
+        if ((void *) d + dsize > (void *) dbuf + dbuf_size) {
+            int newsize = dbuf_size * 2;
+            while ((void *) d + dsize > (void *) dbuf + newsize)
+                newsize *= 2;
 
+            struct shim_dirent * new_dbuf = malloc(newsize);
+            if (!new_dbuf) {
+                ret = -ENOMEM;
                 free(dbuf);
-                dbuf = new_dbuf;
-                d = d1;
-                dbufsize = newsize;
+                goto out;
             }
 
-            HASHTYPE hash = rehash_name(dent->ino, b, blen);
-
-            d->next = (void *) (d + 1) + blen + 1;
-            d->ino = hash;
-            d->type = isdir ? LINUX_DT_DIR : LINUX_DT_REG;
-            memcpy(d->name, b, blen + 1);
+            memcpy(new_dbuf, dbuf, (void *) d - (void *) dbuf);
+            struct shim_dirent * d1 = new_dbuf;
+            struct shim_dirent * d2 = dbuf;
+            while (d2 != d) {
+                d1->next = (void *) d1 + ((void *) d2->next - (void *) d2);
+                d1 = d1->next;
+                d2 = d2->next;
+            }
 
-            b = next_b;
-            last = &d->next;
-            d = d->next;
+            free(dbuf);
+            dbuf = new_dbuf;
+            d = d1;
+            dbuf_size = newsize;
         }
-    }
 
-    if (!last) {
-        free(dbuf);
-        goto out;
+        /* Fill up the dirent buffer */
+        HASHTYPE hash = rehash_name(dent->ino, b, blen);
+
+        d->next = (void *) (d + 1) + blen + 1;
+        d->ino = hash;
+        d->type = isdir ? LINUX_DT_DIR : LINUX_DT_REG;
+        memcpy(d->name, b, blen + 1);
+
+        b = next_b;
+        last = &d->next;
+        d = d->next;
     }
 
     *last = NULL;
     *dirent = dbuf;
 
 out:
+    free(buf);
+out_hdl:
     DkObjectClose(pal_hdl);
     return ret;
 }
@@ -1082,7 +1108,6 @@ static int chroot_checkout (struct shim_handle * hdl)
 
     if (hdl->pal_handle) {
         /*
-         * Chia-Che 8/24/2017:
          * if the file still exists in the host, no need to send
          * the handle over RPC; otherwise, send it.
          */
@@ -1113,6 +1138,8 @@ static int chroot_migrate (void * checkpoint, void ** mount_data)
                     sizeof(struct mount_data) + 1;
 
     void * new_data = malloc(alloc_len);
+    if (!new_data)
+        return -ENOMEM;
 
     memcpy(new_data, mdata, alloc_len);
     *mount_data = new_data;

+ 5 - 6
LibOS/shim/src/fs/dev/fs.c

@@ -115,19 +115,18 @@ static int dev_random_mode (const char * name, mode_t * mode)
 }
 
 static int dev_random_read (struct shim_handle * hdl, void * buf,
-                             size_t count)
+                            size_t count)
 {
-    int rv;
-    rv = DkRandomBitsRead(buf, count);
+    int rv = DkRandomBitsRead(buf, count);
     return rv;
 }
 
 static int dev_urandom_read (struct shim_handle * hdl, void * buf,
                              size_t count)
 {
-    int rv;
-    rv = getrand(buf, count);
-    return rv;
+    // THIS IS NOT CRYPTO-SECURE, FIX!!!
+    getrand(buf, count);
+    return count;
 }
 
 static int dev_random_stat (const char * name, struct stat * stat)

+ 3 - 4
LibOS/shim/src/fs/proc/info.c

@@ -50,9 +50,9 @@ static int proc_meminfo_open (struct shim_handle * hdl, const char * name,
         };
 
 retry:
-    if (str) free(str);
     max *= 2;
     len = 0;
+    free(str);
     str = malloc(max);
     if (!str)
         return -ENOMEM;
@@ -105,9 +105,9 @@ static int proc_cpuinfo_open (struct shim_handle * hdl, const char * name,
         };
 
 retry:
-    if (str) free(str);
     max *= 2;
     len = 0;
+    free(str);
     str = malloc(max);
     if (!str)
         return -ENOMEM;
@@ -132,13 +132,12 @@ retry:
         str[len] = 0;
     }
 
-    struct shim_str_data * data = malloc(sizeof(struct shim_str_data));
+    struct shim_str_data * data = calloc(1, sizeof(struct shim_str_data));
     if (!data) {
         free(str);
         return -ENOMEM;
     }
 
-    memset(data, 0, sizeof(struct shim_str_data));
     data->str = str;
     data->len = len;
     hdl->type = TYPE_STR;

+ 90 - 21
LibOS/shim/src/fs/proc/thread.c

@@ -21,8 +21,6 @@
 #include <asm/unistd.h>
 #include <asm/prctl.h>
 
-#define DEFAULT_BUFFER_SIZE 256
-
 static int parse_thread_name (const char * name,
                               const char ** next, int * next_len,
                               const char ** nextnext)
@@ -482,6 +480,7 @@ static int proc_thread_maps_open (struct shim_handle * hdl,
     const char * next;
     int next_len;
     int pid = parse_thread_name(name, &next, &next_len, NULL);
+    int ret = 0;
 
     if (pid < 0)
         return pid;
@@ -491,54 +490,124 @@ static int proc_thread_maps_open (struct shim_handle * hdl,
     if (!thread)
         return -ENOENT;
 
-    int size = DEFAULT_BUFFER_SIZE;
-    char * strbuf = malloc(size);
-    int ret = 0, len = 0;
+    size_t count = DEFAULT_VMA_COUNT;
+    struct shim_vma_val * vmas = malloc(sizeof(struct shim_vma_val) * count);
 
-    if (!strbuf) {
+    if (!vmas) {
         ret = -ENOMEM;
         goto out;
     }
 
-retry:
-    ret = dump_all_vmas(thread, strbuf, size);
+retry_dump_vmas:
+    ret = dump_all_vmas(vmas, count);
 
     if (ret == -EOVERFLOW) {
-        char * newbuf = malloc(size * 2);
-        if (!newbuf) {
+        struct shim_vma_val * new_vmas
+                = malloc(sizeof(struct shim_vma_val) * count * 2);
+        if (!new_vmas) {
             ret = -ENOMEM;
             goto err;
         }
-        free(strbuf);
-        strbuf = newbuf;
-        size *= 2;
-        goto retry;
+        free(vmas);
+        vmas = new_vmas;
+        count *= 2;
+        goto retry_dump_vmas;
     }
 
     if (ret < 0)
         goto err;
 
-    len = ret;
+#define DEFAULT_VMA_BUFFER_SIZE     256
+
+    count = ret;
+    size_t buffer_size = DEFAULT_VMA_BUFFER_SIZE, offset = 0;
+    char * buffer = malloc(buffer_size);
+    if (!buffer) {
+        ret = -ENOMEM;
+        goto err;
+    }
+
+    for (struct shim_vma_val * vma = vmas ; vma < vmas + count ; vma++) {
+        size_t old_offset = offset;
+        uint64_t start = (uint64_t) vma->addr;
+        uint64_t end   = (uint64_t) vma->addr + vma->length;
+        char pt[3] = {
+            (vma->prot & PROT_READ)  ? 'r' : '-',
+            (vma->prot & PROT_WRITE) ? 'w' : '-',
+            (vma->prot & PROT_EXEC)  ? 'x' : '-',
+        };
+        char pr = (vma->flags & MAP_PRIVATE) ? 'p' : 's';
+
+#define ADDR_FMT(addr) ((addr) > 0xffffffff ? "%lx" : "%08x")
+#define EMIT(fmt ...)                                                   \
+        do {                                                            \
+            offset += snprintf(buffer + offset, buffer_size - offset,   \
+                               fmt);                                    \
+        } while (0)
+
+retry_emit_vma:
+        if (vma->file) {
+            int dev_major = 0, dev_minor = 0;
+            unsigned long ino = vma->file->dentry ? vma->file->dentry->ino : 0;
+            const char * name = "[unknown]";
+
+            if (!qstrempty(&vma->file->path))
+                name = qstrgetstr(&vma->file->path);
+
+            EMIT(ADDR_FMT(start), start);
+            EMIT("-");
+            EMIT(ADDR_FMT(end),   end);
+            EMIT(" %c%c%c%c %08lx %02d:%02d %u %s\n", pt[0], pt[1], pt[2], pr,
+                 vma->offset, dev_major, dev_minor, ino, name);
+        } else {
+            EMIT(ADDR_FMT(start), start);
+            EMIT("-");
+            EMIT(ADDR_FMT(end),   end);
+            if (vma->comment[0])
+                EMIT(" %c%c%c%c 00000000 00:00 0 %s\n", pt[0], pt[1], pt[2], pr,
+                     vma->comment);
+            else
+                EMIT(" %c%c%c%c 00000000 00:00 0\n", pt[0], pt[1], pt[2], pr);
+        }
+
+        if (offset >= buffer_size) {
+            char * new_buffer = malloc(buffer_size * 2);
+            if (!new_buffer) {
+                ret = -ENOMEM;
+                goto err;
+            }
+
+            offset = old_offset;
+            memcpy(new_buffer, buffer, old_offset);
+            free(buffer);
+            buffer = new_buffer;
+            buffer_size *= 2;
+            goto retry_emit_vma;
+        }
+    }
 
-    struct shim_str_data * data = malloc(sizeof(struct shim_str_data));
+    struct shim_str_data * data = calloc(1, sizeof(struct shim_str_data));
     if (!data) {
         ret = -ENOMEM;
         goto err;
     }
 
-    memset(data, 0, sizeof(struct shim_str_data));
-    data->str = strbuf;
-    data->len = len;
-    hdl->type = TYPE_STR;
+    data->str  = buffer;
+    data->len  = offset;
+    hdl->type  = TYPE_STR;
     hdl->flags = flags & ~O_RDONLY;
     hdl->acc_mode = MAY_READ;
     hdl->info.str.data = data;
     ret = 0;
 out:
     put_thread(thread);
+    if (vmas)
+        free_vma_val_array(vmas, count);
     return ret;
+
 err:
-    free(strbuf);
+    if (buffer)
+        free(buffer);
     goto out;
 }
 

+ 1 - 0
LibOS/shim/src/fs/shim_dcache.c

@@ -61,6 +61,7 @@ static struct shim_dentry * alloc_dentry (void)
 
     memset(dent, 0, sizeof(struct shim_dentry));
 
+    REF_SET(dent->ref_count, 0);
     dent->mode = NO_MODE;
 
     INIT_LIST_HEAD(dent, hlist);

+ 71 - 46
LibOS/shim/src/fs/shim_fs.c

@@ -105,18 +105,15 @@ static int __mount_root (struct shim_dentry ** root)
         debug("mounting root filesystem: %s from %s\n", type, uri);
         if ((ret = mount_fs(type, uri, "/", NULL, root, 0)) < 0) {
             debug("mounting root filesystem failed (%d)\n", ret);
-            goto out;
+            return ret;
         }
-        goto out;
+        return ret;
     }
 
     debug("mounting default root filesystem\n");
     if ((ret = mount_fs("chroot", "file:", "/", NULL, root, 0)) < 0) {
         debug("mounting root filesystem failed (%d)\n", ret);
-        goto out;
     }
-
-out:
     return ret;
 }
 
@@ -188,6 +185,9 @@ static int __mount_one_other (const char * key, int keylen)
 
 static int __mount_others (void)
 {
+    char * keybuf;
+    int ret = 0;
+
     if (!root_config)
         return 0;
 
@@ -197,11 +197,14 @@ static int __mount_others (void)
     if (keybuf_size < 0)
         return 0;
 
-    char * keybuf = __alloca(keybuf_size);
+    keybuf = malloc(keybuf_size);
+    if (!keybuf)
+        return -ENOMEM;
+
     nkeys = get_config_entries(root_config, "fs.mount", keybuf, keybuf_size);
 
-    if (nkeys < 0)
-        return 0;
+    if (nkeys <= 0)
+        goto out;
 
     const char * key = keybuf, * next = NULL;
     for (int n = 0 ; n < nkeys ; key = next, n++) {
@@ -209,10 +212,12 @@ static int __mount_others (void)
         next++;
         int ret = __mount_one_other(key, next - key - 1);
         if (ret < 0)
-            return ret;
+            goto out;
     }
 
-    return 0;
+out:
+    free(keybuf);
+    return ret;
 }
 
 int init_mount_root (void)
@@ -248,7 +253,7 @@ int init_mount (void)
 static inline struct shim_fs * find_fs (const char * type)
 {
     struct shim_fs * fs = NULL;
-    int len = strlen(type);
+    size_t len = strlen(type);
 
     for (int i = 0 ; i < NUM_MOUNTABLE_FS ; i++)
         if (!memcmp(type, mountable_fs[i].name, len + 1)) {
@@ -261,7 +266,7 @@ static inline struct shim_fs * find_fs (const char * type)
 
 int search_builtin_fs (const char * type, struct shim_mount ** fs)
 {
-    int len = strlen(type);
+    size_t len = strlen(type);
 
     for (int i = 0 ; i < NUM_BUILTIN_FS ; i++)
         if (!memcmp(type, builtin_fs[i]->type, len + 1)) {
@@ -291,6 +296,10 @@ int __mount_fs (struct shim_mount * mount, struct shim_dentry * dent)
         if (ret < 0) {
             /* Try getting rid of ESKIPPED case */
             assert (ret != -ESKIPPED);
+            // TODO: `mount_root` leaks here, but fixing this would require
+            // fixing `get_new_dentry` semantics (its result has sometimes
+            // refcount set to 0).
+            // put_dentry(mount_root);
             return ret;
         }
         mount->root = mount_root;
@@ -340,6 +349,34 @@ int __mount_fs (struct shim_mount * mount, struct shim_dentry * dent)
     return 0;
 }
 
+// Extracts the last component of the `path`. If there's none, `*last_elem_len`
+// is set to 0 and `*last_elem` is set to NULL.
+static void find_last_component(const char* path, const char** last_comp,
+                                size_t* last_comp_len) {
+    *last_comp = NULL;
+    size_t last_len = 0;
+    size_t path_len = strlen(path);
+    if (path_len == 0)
+        goto out;
+
+    // Drop any trailing slashes.
+    const char* last = path + path_len - 1;
+    while (last > path && *last == '/')
+        last--;
+    if (*last == '/')
+        goto out;
+
+    // Skip the last component.
+    last_len = 1;
+    while (last > path && *(last-1) != '/') {
+        last--;
+        last_len++;
+    }
+    *last_comp = last;
+out:
+    *last_comp_len = last_len;
+}
+
 /* Parent is optional, but helpful.
  * dentp (optional) memoizes the dentry of the newly-mounted FS, on success. 
  *
@@ -349,7 +386,7 @@ int __mount_fs (struct shim_mount * mount, struct shim_dentry * dent)
  */
 int mount_fs (const char * type, const char * uri, const char * mount_point,
               struct shim_dentry *parent, struct shim_dentry **dentp,
-              int make_ancestor)
+              bool make_ancestor)
 {
     int ret = 0;
     struct shim_fs * fs = find_fs(type);
@@ -360,37 +397,23 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
     }
 
     /* Split the mount point into the prefix and atom */
-    int mount_point_len = strlen(mount_point);
-    const char * last = &mount_point[mount_point_len - 1];
-    int left = mount_point_len;
-    int last_len = 1;
-    // Drop any trailing slashes
-    while (left && *last == '/') {
-        left--;
-        last--;
-        if (last_len != 0)
-            last_len--;
-    }
-    // Skip the atom
-    while (left && *last != '/') {
-        left--;
-        last--;
-        last_len++;
-    }
-    if (*last == '/') {
-        // Move forward one
-        last++;
-        last_len--;
+    size_t mount_point_len = strlen(mount_point);
+    if (mount_point_len == 0) {
+        ret = -EINVAL;
+        goto out;
     }
+    const char* last;
+    size_t last_len;
+    find_last_component(mount_point, &last, &last_len);
 
     if (!parent) {
         // See if we are not at the root mount
-        if (mount_point_len != 1 || mount_point[0] != '/') {
+        if (last_len > 0) {
             // Look up the parent
-            char * parent_path = __alloca(mount_point_len);
-            memset(parent_path, 0, mount_point_len);
-            assert(last_len >= 1 && (mount_point_len - last_len) >= 0);
-            memcpy(parent_path, mount_point, mount_point_len - last_len);
+            size_t parent_len = last - mount_point;
+            char * parent_path = __alloca(parent_len + 1);
+            memcpy(parent_path, mount_point, parent_len);
+            parent_path[parent_len] = 0;
             if ((ret = __path_lookupat(dentry_root, parent_path, 0, &parent, 0,
                                        dentry_root->fs, make_ancestor)) < 0) {
                 debug("Path lookup failed %d\n", ret);
@@ -406,10 +429,10 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
 
     /* call fs-specific mount to allocate mount_data */
     if ((ret = fs->fs_ops->mount(uri, mount_point, &mount_data)) < 0)
-        goto out;
+        goto out_with_unlock;
 
 
-    int uri_len = uri ? strlen(uri) : 0;
+    size_t uri_len = uri ? strlen(uri) : 0;
     qstrsetstr(&mount->path, mount_point, mount_point_len);
     qstrsetstr(&mount->uri, uri, uri_len);
     memcpy(mount->type, fs->name, sizeof(fs->name));
@@ -420,14 +443,14 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
     /* Get the negative dentry from the cache, if one exists */
     struct shim_dentry * dent, *dent2;
     /* Special case the root */
-    if (mount_point_len == 1 && mount_point[0] == '/')
+    if (last_len == 0)
         dent = dentry_root;
     else {
         dent = __lookup_dcache(parent, last,
                                last_len,
                                NULL, 0, NULL);
 
-        if(!dent) {
+        if (!dent) {
             dent = get_new_dentry(mount, parent, last, last_len, NULL);
             get_dentry(dent);
         }
@@ -443,7 +466,7 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
     /*Now go ahead and do a lookup so the dentry is valid */
     if ((ret = __path_lookupat(dentry_root, mount_point, 0, &dent2, 0,
                                parent ? parent->fs : mount, make_ancestor)) < 0) 
-        goto out;
+        goto out_with_unlock;
 
     assert(dent == dent2);
 
@@ -456,7 +479,7 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
 
     // If we made it this far and the dentry is still negative, clear
     // the negative flag from the denry. 
-    if ((!ret) && (dent->state & DENTRY_NEGATIVE)) 
+    if (!ret && (dent->state & DENTRY_NEGATIVE))
         dent->state &= ~DENTRY_NEGATIVE;
     
     /* Set the file system at the mount point properly */
@@ -464,8 +487,10 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
     
     if (dentp && !ret)
         *dentp = dent;
-out:
+
+out_with_unlock:
     unlock(dcache_lock);
+out:
     return ret;
 }
 

+ 2 - 2
LibOS/shim/src/fs/shim_fs_hash.c

@@ -176,7 +176,7 @@ HASHTYPE rehash_name (HASHTYPE parent_hbuf,
     return ret;
 }
 
-HASHTYPE rehash_path (HASHTYPE ancester_hbuf,
+HASHTYPE rehash_path (HASHTYPE ancestor_hbuf,
                       const char * path, int size, const char * sep)
 {
     HASHTYPE ctx = 0;
@@ -203,6 +203,6 @@ HASHTYPE rehash_path (HASHTYPE ancester_hbuf,
         digest ^= ctx;
     }
 
-    hbuf = ancester_hbuf ^ digest;
+    hbuf = ancestor_hbuf ^ digest;
     return hbuf;
 }

+ 17 - 4
LibOS/shim/src/fs/shim_namei.c

@@ -25,6 +25,8 @@
  * directory cache.
  */
 
+#include <stdbool.h>
+
 #include <shim_internal.h>
 #include <shim_utils.h>
 #include <shim_thread.h>
@@ -175,6 +177,8 @@ int lookup_dentry (struct shim_dentry * parent, const char * name, int namelen,
 
     if (!dent) {
         dent = get_new_dentry(fs, parent, name, namelen, NULL);
+        if (!dent)
+            return -ENOMEM;
         do_fs_lookup = 1;
         // In the case we make a new dentry, go ahead and increment the
         // ref count; in other cases, __lookup_dcache does this
@@ -257,7 +261,7 @@ int lookup_dentry (struct shim_dentry * parent, const char * name, int namelen,
  */
 int __path_lookupat (struct shim_dentry * start, const char * path, int flags,
                      struct shim_dentry ** dent, int link_depth,
-                     struct shim_mount * fs, int make_ancestor)
+                     struct shim_mount * fs, bool make_ancestor)
 {
     // Basic idea: recursively iterate over path, peeling off one atom at a
     // time.
@@ -390,6 +394,8 @@ int __path_lookupat (struct shim_dentry * start, const char * path, int flags,
             get_mount(my_dent->fs);
             err = __path_lookupat (my_dent, my_path, flags, dent, link_depth,
                                    my_dent->fs, make_ancestor);
+            if (err < 0)
+                goto out;
             /* If we aren't returning a live reference to the target dentry, go
              * ahead and release the ref count when we unwind the recursion.
              */
@@ -401,7 +407,8 @@ int __path_lookupat (struct shim_dentry * start, const char * path, int flags,
                 my_dent->state |= DENTRY_ANCESTOR;
                 my_dent->state |= DENTRY_ISDIRECTORY;
                 my_dent->state &= ~DENTRY_NEGATIVE;
-                if (err == -ENOENT) err = 0;
+                if (err == -ENOENT)
+                    err = 0;
             }
             base_case = 1;
         }
@@ -595,6 +602,10 @@ int dentry_open (struct shim_handle * hdl, struct shim_dentry * dent,
         hdl->info.dir.ptr = (void *)-1;
     }
     path = dentry_get_path(dent, true, &size);
+    if (!path) {
+        ret = -ENOMEM;
+        goto out;
+    }
     qstrsetstr(&hdl->path, path, size);
 
     /* truncate the file if O_TRUNC is given */
@@ -676,8 +687,10 @@ int list_directory_dentry (struct shim_dentry *dent) {
 
     struct shim_dirent * dirent = NULL;
 
-    if ((ret = fs->d_ops->readdir(dent, &dirent)) < 0 || !dirent)
+    if ((ret = fs->d_ops->readdir(dent, &dirent)) < 0 || !dirent) {
+        dirent = NULL;
         goto done_read;
+    }
     
     struct shim_dirent * d = dirent;
     for ( ; d ; d = d->next) {
@@ -697,11 +710,11 @@ int list_directory_dentry (struct shim_dentry *dent) {
         child->ino = d->ino;
     }
 
-    free(dirent);
     dent->state |= DENTRY_LISTED;
 
 done_read:
     unlock(dcache_lock);
+    free(dirent);
     return ret;
 }
 

+ 0 - 1
LibOS/shim/src/fs/str/fs.c

@@ -163,7 +163,6 @@ int str_write (struct shim_handle * hdl, const void * buf,
         }
 
         char * newbuf = malloc(newlen);
-
         if (!newbuf)
             return -ENOMEM;
 

+ 1 - 2
LibOS/shim/src/ipc/shim_ipc.c

@@ -254,11 +254,10 @@ struct shim_ipc_info * discover_client (struct shim_ipc_port * port,
 
 struct shim_process * create_new_process (bool inherit_parent)
 {
-    struct shim_process * new_process = malloc(sizeof(struct shim_process));
+    struct shim_process * new_process = calloc(1, sizeof(struct shim_process));
     if (!new_process)
         return NULL;
 
-    memset(new_process, 0, sizeof(struct shim_process));
     new_process->parent = get_new_ipc_info(cur_process.vmid, NULL, 0);
 
     if (!inherit_parent)

+ 3 - 7
LibOS/shim/src/ipc/shim_ipc_nsimpl.h

@@ -174,12 +174,11 @@ static int __extend_range_bitmap (int expected)
     if (range_map)
         size = range_map->map_size;
 
-    while(size <= expected)
+    while (size <= expected)
         size *= 2;
 
     struct range_bitmap * new_map = malloc(sizeof(struct range_bitmap) +
                                            size / BITS);
-
     if (!new_map)
         return -ENOMEM;
 
@@ -353,7 +352,6 @@ int CONCAT3(add, NS, subrange) (IDTYPE idx, IDTYPE owner,
     int off = (idx - 1) / RANGE_SIZE, err = 0;
     IDTYPE base = off * RANGE_SIZE + 1;
     struct subrange * s = malloc(sizeof(struct subrange));
-
     if (!s)
         return -ENOMEM;
 
@@ -383,12 +381,11 @@ int CONCAT3(add, NS, subrange) (IDTYPE idx, IDTYPE owner,
     }
 
     if (!r->subranges) {
-        r->subranges = malloc(sizeof(struct sub_map));
+        r->subranges = calloc(1, sizeof(struct sub_map));
         if (!r->subranges) {
             err = -ENOMEM;
             goto failed;
         }
-        memset(r->subranges, 0, sizeof(struct sub_map));
     }
 
     struct subrange ** m = &r->subranges->map[idx - base];
@@ -645,10 +642,9 @@ IDTYPE CONCAT2(allocate, NS) (IDTYPE min, IDTYPE max)
         if (idx < base)
             idx = base;
         if (!r->used) {
-            r->used = malloc(sizeof(struct idx_bitmap));
+            r->used = calloc(1, sizeof(struct idx_bitmap));
             if (!r->used)
                 continue;
-            memset(r->used, 0, sizeof(struct idx_bitmap));
         }
 
         int i = (idx - base) / BITS;

+ 3 - 3
LibOS/shim/src/ipc/shim_ipc_pid.c

@@ -298,8 +298,8 @@ int ipc_pid_retstatus_callback (IPC_CALLBACK_ARGS)
         struct pid_status ** status = (struct pid_status **) obj->private;
 
         if (status) {
-            *status = remalloc(msgin->status, sizeof(struct pid_status) *
-                               msgin->nstatus);
+            *status = malloc_copy(msgin->status, sizeof(struct pid_status) *
+                                  msgin->nstatus);
 
             obj->retval = msgin->nstatus;
         }
@@ -619,7 +619,7 @@ int ipc_pid_retmeta_callback (IPC_CALLBACK_ARGS)
 
         if (data)
             *data = msgin->datasize ?
-                    remalloc(msgin->data, msgin->datasize) : NULL;
+                    malloc_copy(msgin->data, msgin->datasize) : NULL;
 
         obj->retval = msgin->datasize;
 

+ 2 - 1
LibOS/shim/src/ipc/shim_ipc_sysv.c

@@ -962,7 +962,8 @@ int ipc_sysv_semreply_callback (IPC_CALLBACK_ARGS)
 
     PAL_NUM ** semids = obj->private;
     if (semids)
-        *semids = remalloc(msgin->host_sem_ids, sizeof(PAL_NUM) * msgin->nsems);
+        *semids = malloc_copy(msgin->host_sem_ids,
+                              sizeof(PAL_NUM) * msgin->nsems);
     obj->retval = msgin->nsems;
 
     if (obj->thread)

+ 1 - 1
LibOS/shim/src/shim-debug.map

@@ -7,5 +7,5 @@ SHIM {
         memcpy; memmove; memset; memcmp;
         __htonl; __ntohl; __htons; __ntohs; inet_pton;
         vfputchar; vfputs; vfprintf; snprintf;
-        malloc; free; remalloc;
+        malloc; free; malloc_copy;
 };

+ 178 - 88
LibOS/shim/src/shim_checkpoint.c

@@ -468,12 +468,12 @@ static int send_checkpoint_on_stream (PAL_HANDLE stream,
         }
     }
 
-    int total_bytes = store->offset;
-    int bytes = 0;
+    size_t total_bytes = store->offset;
+    size_t bytes = 0;
 
     do {
-        int ret = DkStreamWrite(stream, 0, total_bytes - bytes,
-                                (void *) store->base + bytes, NULL);
+        size_t ret = DkStreamWrite(stream, 0, total_bytes - bytes,
+                                   (void *) store->base + bytes, NULL);
 
         if (!ret)
             return -PAL_ERRNO;
@@ -484,18 +484,21 @@ static int send_checkpoint_on_stream (PAL_HANDLE stream,
     ADD_PROFILE_OCCURENCE(migrate_send_on_stream, total_bytes);
 
     for (int i = 0 ; i < mem_nentries ; i++) {
-        int mem_size = mem_entries[i]->size;
+        size_t mem_size = mem_entries[i]->size;
         void * mem_addr = mem_entries[i]->addr;
         bytes = 0;
         do {
-            int ret = DkStreamWrite(stream, 0, mem_size - bytes,
-                                    mem_addr + bytes, NULL);
+            size_t ret = DkStreamWrite(stream, 0, mem_size - bytes,
+                                       mem_addr + bytes, NULL);
             if (!ret)
                 return -PAL_ERRNO;
 
             bytes += ret;
         } while (bytes < mem_entries[i]->size);
 
+        if (!(mem_entries[i]->prot & PAL_PROT_READ))
+            DkVirtualMemoryProtect(mem_addr, mem_size, mem_entries[i]->prot);
+
         mem_entries[i]->size = mem_size;
         ADD_PROFILE_OCCURENCE(migrate_send_on_stream, mem_size);
     }
@@ -613,8 +616,8 @@ int restore_checkpoint (struct cp_header * cphdr, struct mem_header * memhdr,
         rs_func rs = (&__rs_func) [cpent->cp_type - CP_FUNC_BASE];
         ret = (*rs) (cpent, base, offset, rebase);
         if (ret < 0) {
-            debug("restoring %s failed at %p (err=%d)\n", CP_FUNC_NAME(cpent->cp_type),
-                  base + offset, -ret);
+            sys_printf("restore_checkpoint() at %s (%d)\n",
+                       CP_FUNC_NAME(cpent->cp_type), ret);
             return ret;
         }
 next:
@@ -801,36 +804,55 @@ int receive_handles_on_stream (struct palhdl_header * hdr, ptr_t base,
     return 0;
 }
 
-static void * cp_alloc (struct shim_cp_store * store, void * addr, int size)
+static void * cp_alloc (struct shim_cp_store * store, void * addr, size_t size)
 {
     if (addr) {
-        // Caller specified an exact region to alloc.
-        struct shim_vma * vma;
-        bool found = !lookup_overlap_vma(addr, size, &vma);
-        if (found) {
-            bool allocable = vma->addr == addr && vma->length == size
-                             && (vma->flags & VMA_UNMAPPED);
-            if (!allocable) {
-                put_vma(vma);
-                return NULL;
-            }
-        }
-        return DkVirtualMemoryAlloc(addr, size, 0,
-                                    PAL_PROT_READ|PAL_PROT_WRITE);
+        /*
+         * If the checkpoint needs more space, try to extend the checkpoint
+         * store at the current address.
+         */
+        debug("try extend checkpoint store: %p-%p (size = %ld)\n",
+              addr, addr + size, size);
+
+        if (bkeep_mmap(addr, size, PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
+                       NULL, 0, "cpstore") < 0)
+            return NULL;
     } else {
-        // Alloc on any address, with specified size.
-        // We need to retry because `get_unmapped_vma_for_cp` is randomized.
-        // TODO: Fix this to remove the need for retrying.
-        while (true) {
-            addr = get_unmapped_vma_for_cp(size);
-            if (!addr)
-                return NULL;
-            addr = (void *) DkVirtualMemoryAlloc(addr, size, 0,
-                                                 PAL_PROT_READ|PAL_PROT_WRITE);
-            if (addr)
-                return addr;
-        }
+        /*
+         * Here we use a strategy to reduce internal fragmentation of virtual
+         * memory space. Because we need a relatively large, continuous space
+         * for dumping the checkpoint data, internal fragmentation can cause
+         * the process to drain the virtual address space after forking a few
+         * times. The previous space used for checkpoint may be fragmented
+         * at the next fork.
+         *
+         * A simple trick we use here is to reserve some space right after the
+         * checkpoint space. The reserved space is half of the size of the
+         * checkpoint space, but can be further fine-tuned.
+         */
+        size_t reserve_size = ALIGN_UP(size >> 1);
+
+        debug("try allocate checkpoint store (size = %ld, reserve = %ld)\n",
+              size, reserve_size);
+
+        /*
+         * Allocating the checkpoint space at the first space found from the
+         * top of the virtual address space.
+         */
+        addr = bkeep_unmapped_any(size + reserve_size, PROT_READ|PROT_WRITE,
+                                  CP_VMA_FLAGS, NULL, 0, "cpstore");
+        if (!addr)
+            return NULL;
+
+        bkeep_munmap(addr + size, reserve_size, CP_VMA_FLAGS);
     }
+
+    addr = (void *) DkVirtualMemoryAlloc(addr, size, 0,
+                                         PAL_PROT_READ|PAL_PROT_WRITE);
+    if (!addr)
+        bkeep_munmap(addr, size, CP_VMA_FLAGS);
+
+    return addr;
 }
 
 DEFINE_PROFILE_CATAGORY(migrate_proc, migrate);
@@ -847,6 +869,18 @@ DEFINE_PROFILE_INTERVAL(migrate_send_pal_handles, migrate_proc);
 DEFINE_PROFILE_INTERVAL(migrate_free_checkpoint,  migrate_proc);
 DEFINE_PROFILE_INTERVAL(migrate_wait_response,    migrate_proc);
 
+static bool warn_no_gipc __attribute_migratable = true;
+
+/*
+ * Create a new process and migrate the process states to the new process.
+ *
+ * @migrate: migration function defined by the caller
+ * @exec: the executable to load in the new process
+ * @argv: arguments passed to the new process
+ * @thread: thread handle to be migrated to the new process
+ *
+ * The remaining arguments are passed into the migration function.
+ */
 int do_migrate_process (int (*migrate) (struct shim_cp_store *,
                                         struct shim_thread *,
                                         struct shim_process *, va_list),
@@ -867,6 +901,12 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 #endif
     BEGIN_PROFILE_INTERVAL();
 
+    /*
+     * Create the process first. The new process requires some time
+     * to initialize before starting to receive checkpoint data.
+     * Parallizing the process creation and checkpointing can improve
+     * the latency of forking.
+     */
     PAL_HANDLE proc = DkProcessCreate(exec ? qstrgetstr(&exec->uri) :
                                       pal_control.executable,
                                       0, argv);
@@ -878,6 +918,11 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 
     SAVE_PROFILE_INTERVAL(migrate_create_process);
 
+    /*
+     * Detect if GIPC is supported by the host. If GIPC is not supported
+     * forking may be slow because we have to use RPC streams for migrating
+     * user memory.
+     */
     bool use_gipc = false;
     PAL_NUM gipc_key;
     PAL_HANDLE gipc_hdl = DkCreatePhysicalMemoryChannel(&gipc_key);
@@ -887,10 +932,14 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
         use_gipc = true;
         SAVE_PROFILE_INTERVAL(migrate_create_gipc);
     } else {
-        sys_printf("WARNING: no physical memory support, process creation "
-                   "will be slow.\n");
+        if (warn_no_gipc) {
+            warn_no_gipc = false;
+            sys_printf("WARNING: no physical memory support, process creation "
+                       "may be slow.\n");
+        }
     }
 
+    /* Create process and IPC bookkeepings */
     if (!(new_process = create_new_process(true))) {
         ret = -ENOMEM;
         goto err;
@@ -903,6 +952,7 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 
     SAVE_PROFILE_INTERVAL(migrate_connect_ipc);
 
+    /* Allocate a space for dumping the checkpoint data. */
     cpstore = __alloca(sizeof(struct shim_cp_store));
     memset(cpstore, 0, sizeof(struct shim_cp_store));
     cpstore->alloc    = cp_alloc;
@@ -910,10 +960,14 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
     cpstore->bound    = CP_INIT_VMA_SIZE;
 
     while (1) {
-        debug("try allocate checkpoint store (size = %d)\n", cpstore->bound);
+        /*
+         * Try allocating a space of a certain size. If the allocation fails,
+         * continue to try with smaller sizes.
+         */
         cpstore->base = (ptr_t) cp_alloc(cpstore, 0, cpstore->bound);
         if (cpstore->base)
             break;
+
         cpstore->bound >>= 1;
         if (cpstore->bound < allocsize)
             break;
@@ -927,6 +981,7 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 
     SAVE_PROFILE_INTERVAL(migrate_init_checkpoint);
 
+    /* Calling the migration function defined by the caller. */
     va_list ap;
     va_start(ap, thread);
     ret = (*migrate) (cpstore, thread, new_process, ap);
@@ -941,6 +996,7 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
     unsigned long checkpoint_time = GET_PROFILE_INTERVAL();
     unsigned long checkpoint_size = cpstore->offset + cpstore->mem_size;
 
+    /* Checkpoint data created. */
     debug("checkpoint of %u bytes created, %lu microsecond is spent.\n",
           checkpoint_size, checkpoint_time);
 
@@ -976,6 +1032,10 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
     hdr.write_proc_time = GET_PROFILE_INTERVAL();
 #endif
 
+    /*
+     * Sending a header to the new process through the RPC stream to
+     * notify the process to start receiving the checkpoint.
+     */
     bytes = DkStreamWrite(proc, 0, sizeof(struct newproc_header), &hdr, NULL);
     if (!bytes) {
         ret = -PAL_ERRNO;
@@ -989,6 +1049,7 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
     ADD_PROFILE_OCCURENCE(migrate_send_on_stream, bytes);
     SAVE_PROFILE_INTERVAL(migrate_send_header);
 
+    /* Sending the checkpoint either through GIPC or the RPC stream */
     ret = cpstore->use_gipc ? send_checkpoint_by_gipc(gipc_hdl, cpstore) :
           send_checkpoint_on_stream(proc, cpstore);
 
@@ -999,14 +1060,27 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 
     SAVE_PROFILE_INTERVAL(migrate_send_checkpoint);
 
+    /*
+     * For socket and RPC streams, we need to migrate the PAL handles
+     * to the new process using PAL calls.
+     */
     if ((ret = send_handles_on_stream(proc, cpstore)) < 0)
         goto err;
 
     SAVE_PROFILE_INTERVAL(migrate_send_pal_handles);
 
-    system_free((void *) cpstore->base, cpstore->bound);
+    /* Free the checkpoint space */
+    if ((ret = bkeep_munmap((void *) cpstore->base, cpstore->bound,
+                            CP_VMA_FLAGS)) < 0) {
+        debug("failed unmaping checkpoint (ret = %d)\n", ret);
+        goto err;
+    }
+
+    DkVirtualMemoryFree((PAL_PTR) cpstore->base, cpstore->bound);
+
     SAVE_PROFILE_INTERVAL(migrate_free_checkpoint);
 
+    /* Wait for the response from the new process */
     struct newproc_response res;
     bytes = DkStreamRead(proc, 0, sizeof(struct newproc_response), &res,
                          NULL, 0);
@@ -1020,10 +1094,12 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
     if (gipc_hdl)
         DkObjectClose(gipc_hdl);
 
+    /* Notify the namespace manager regarding the subleasing of TID */
     ipc_pid_sublease_send(res.child_vmid, thread->tid,
                           qstrgetstr(&new_process->self->uri),
                           NULL);
 
+    /* Listen on the RPC stream to the new process */
     add_ipc_port_by_id(res.child_vmid, proc,
                        IPC_PORT_DIRCLD|IPC_PORT_LISTEN|IPC_PORT_KEEPALIVE,
                        &ipc_child_exit,
@@ -1043,73 +1119,92 @@ err:
     return ret;
 }
 
+/*
+ * Loading the checkpoint from the parent process or a checkpoint file
+ *
+ * @hdr: checkpoint header
+ * @cpptr: returning the pointer of the loaded checkpoint
+ */
 int do_migration (struct newproc_cp_header * hdr, void ** cpptr)
 {
-    ptr_t base = (ptr_t) hdr->hdr.addr;
-    int   size = hdr->hdr.size;
+    void * base = NULL;
+    size_t size = hdr->hdr.size;
     PAL_PTR mapaddr;
     PAL_NUM mapsize;
-    unsigned long mapoff;
     long rebase;
     bool use_gipc = !!hdr->gipc.uri[0];
     PAL_HANDLE gipc_store;
     int ret = 0;
+    BEGIN_PROFILE_INTERVAL();
 
-    debug("checkpoint detected (%d bytes, expected at %p)\n",
-          size, base);
-
-    if (base && lookup_overlap_vma((void *) base, size, NULL) == -ENOENT) {
+    /*
+     * Allocate a large enough space to load the checkpoint data.
+     *
+     * If CPSTORE_DERANDOMIZATION is enabled, try to allocate the space
+     * at the exact address where the checkpoint is created. Otherwise,
+     * just allocate at the first space we found from the top of the virtual
+     * memory space.
+     */
+
+#if CPSTORE_DERANDOMIZATION == 1
+    if (hdr->hdr.addr
+        && lookup_overlap_vma(hdr->hdr.addr, size, NULL) == -ENOENT) {
+
+        /* Try to load the checkpoint at the same address */
+        base = hdr->hdr.addr;
         mapaddr = (PAL_PTR) ALIGN_DOWN(base);
         mapsize = (PAL_PTR) ALIGN_UP(base + size) - mapaddr;
-        mapoff  = base - (ptr_t) mapaddr;
-    } else {
-        mapaddr = (PAL_PTR) 0;
-        mapsize = ALIGN_UP(size);
-        mapoff  = 0;
+
+        /* Need to create VMA before allocation */
+        ret = bkeep_mmap((void *) mapaddr, mapsize,
+                         PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
+                         NULL, 0, "cpstore");
+        if (ret < 0)
+            base = NULL;
     }
+#endif
 
-    BEGIN_PROFILE_INTERVAL();
+    if (!base) {
+        base = bkeep_unmapped_any(ALIGN_UP(size),
+                                  PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
+                                  NULL, 0, "cpstore");
+        if (!base)
+            return -ENOMEM;
+
+        mapaddr = (PAL_PTR) base;
+        mapsize = (PAL_NUM) ALIGN_UP(size);
+    }
+
+    debug("checkpoint mapped at %p-%p\n", base, base + size);
+
+    PAL_FLG pal_prot = PAL_PROT_READ|PAL_PROT_WRITE;
+    PAL_PTR mapped = mapaddr;
 
     if (use_gipc) {
         debug("open gipc store: %s\n", hdr->gipc.uri);
 
-        PAL_FLG mapprot = PAL_PROT_READ|PAL_PROT_WRITE;
         gipc_store = DkStreamOpen(hdr->gipc.uri, 0, 0, 0, 0);
         if (!gipc_store ||
-            !DkPhysicalMemoryMap(gipc_store, 1, &mapaddr, &mapsize, &mapprot))
+            !DkPhysicalMemoryMap(gipc_store, 1, &mapped, &mapsize, &pal_prot))
             return -PAL_ERRNO;
 
         SAVE_PROFILE_INTERVAL(child_load_checkpoint_by_gipc);
     } else {
-        void * mapped = NULL;
-
-        for (int tries = 3 ; tries ; tries--) {
-            if ((mapped = DkVirtualMemoryAlloc(mapaddr, mapsize, 0,
-                                               PAL_PROT_READ|PAL_PROT_WRITE)))
-                break;
-
-            debug("cannot map address %p-%p\n", mapaddr, mapaddr + mapsize);
-            ret =-PAL_ERRNO;
-            mapaddr = NULL;
-        }
-
+        void * mapped = DkVirtualMemoryAlloc(mapaddr, mapsize, 0, pal_prot);
         if (!mapped)
-            return ret;
-
-        mapaddr = mapped;
+            return -PAL_ERRNO;
     }
 
-    bkeep_mmap((void *) mapaddr, mapsize,
-               PROT_READ|PROT_WRITE,
-               MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL,
-               NULL, 0, NULL);
-
-    base = (ptr_t) mapaddr + mapoff;
-    rebase = (long) base - (long) hdr->hdr.addr;
-    debug("checkpoint loaded at %p\n", base);
+    assert(mapaddr == mapped);
+    /*
+     * If the checkpoint is loaded at a different address from where it is
+     * created, we need to rebase the pointers in the checkpoint.
+     */
+    rebase = (long) ((uintptr_t) base - (uintptr_t) hdr->hdr.addr);
 
+    /* Load the memory data sent separately over GIPC or the RPC stream. */
     if (use_gipc) {
-        if ((ret = restore_gipc(gipc_store, &hdr->gipc, base, rebase)) < 0)
+        if ((ret = restore_gipc(gipc_store, &hdr->gipc, (ptr_t) base, rebase)) < 0)
             return ret;
 
         SAVE_PROFILE_INTERVAL(child_load_memory_by_gipc);
@@ -1131,17 +1226,12 @@ int do_migration (struct newproc_cp_header * hdr, void ** cpptr)
         debug("%d bytes read on stream\n", total_bytes);
     }
 
-    struct newproc_response res;
-    res.child_vmid = cur_process.vmid;
-    res.failure = 0;
-    int bytes = DkStreamWrite(PAL_CB(parent_process), 0,
-                              sizeof(struct newproc_response),
-                              &res, NULL);
-    if (!bytes)
-        return -PAL_ERRNO;
-
-    if ((ret = receive_handles_on_stream(&hdr->palhdl, base, rebase)) < 0)
+    /* Receive socket or RPC handles from the parent process. */
+    ret = receive_handles_on_stream(&hdr->palhdl, (ptr_t) base, rebase);
+    if (ret < 0) {
+        /* TODO: unload the checkpoint space */
         return ret;
+    }
 
     SAVE_PROFILE_INTERVAL(child_receive_handles);
 

+ 6 - 0
LibOS/shim/src/shim_debug.c

@@ -104,9 +104,15 @@ void remove_r_debug (void * addr)
 void append_r_debug (const char * uri, void * addr, void * dyn_addr)
 {
     struct gdb_link_map * new = malloc(sizeof(struct gdb_link_map));
+    if (!new)
+        return;
 
     int uri_len = strlen(uri);
     char * new_uri = malloc(uri_len + 1);
+    if (!new_uri) {
+        free(new);
+        return;
+    }
     memcpy(new_uri, uri, uri_len + 1);
 
     new->l_addr = addr;

+ 105 - 81
LibOS/shim/src/shim_init.c

@@ -175,7 +175,7 @@ void * migrated_shim_addr;
 void * initial_stack;
 const char ** initial_envp __attribute_migratable;
 
-const char ** library_paths;
+char ** library_paths;
 
 LOCKTYPE __master_lock;
 bool lock_enabled;
@@ -248,15 +248,22 @@ void * allocate_stack (size_t size, size_t protect_size, bool user)
 
     /* preserve a non-readable, non-writeable page below the user
        stack to stop user program to clobber other vmas */
-    void * stack = user ?
-                   get_unmapped_vma(size + protect_size, STACK_FLAGS) :
-                   NULL;
+    void * stack = NULL;
+    int flags = STACK_FLAGS|(user ? 0 : VMA_INTERNAL);
 
-    if (user)
-        stack = (void *) DkVirtualMemoryAlloc(stack, size + protect_size,
-                                0, PAL_PROT_READ|PAL_PROT_WRITE);
-    else
+    if (user) {
+        stack = bkeep_unmapped_heap(size + protect_size, PROT_NONE,
+                                    flags, NULL, 0, "stack");
+
+        if (!stack)
+            return NULL;
+
+        stack = (void *)
+            DkVirtualMemoryAlloc(stack, size + protect_size,
+                                 0, PAL_PROT_NONE);
+    } else {
         stack = system_malloc(size + protect_size);
+    }
 
     if (!stack)
         return NULL;
@@ -264,22 +271,11 @@ void * allocate_stack (size_t size, size_t protect_size, bool user)
     ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
     INC_PROFILE_OCCURENCE(alloc_stack_count);
 
-    if (protect_size &&
-        !DkVirtualMemoryProtect(stack, protect_size, PAL_PROT_NONE))
-        return NULL;
-
     stack += protect_size;
+    DkVirtualMemoryProtect(stack, size, PAL_PROT_READ|PAL_PROT_WRITE);
 
-    if (user) {
-        if (bkeep_mmap(stack, size, PROT_READ|PROT_WRITE,
-                       STACK_FLAGS, NULL, 0, "stack") < 0)
-            return NULL;
-
-        if (protect_size &&
-            bkeep_mmap(stack - protect_size, protect_size, 0,
-                       STACK_FLAGS, NULL, 0, NULL) < 0)
-            return NULL;
-    }
+    if (bkeep_mprotect(stack, size, PROT_READ|PROT_WRITE, flags) < 0)
+        return NULL;
 
     debug("allocated stack at %p (size = %d)\n", stack, size);
     return stack;
@@ -392,39 +388,38 @@ int init_stack (const char ** argv, const char ** envp, const char *** argpp,
 int read_environs (const char ** envp)
 {
     for (const char ** e = envp ; *e ; e++) {
-        switch ((*e)[0]) {
-            case 'L': {
-                if (strpartcmp_static(*e, "LD_LIBRARY_PATH=")) {
-                    const char * s = *e + static_strlen("LD_LIBRARY_PATH=");
-                    int npaths = 0;
-                    for (const char * tmp = s ; *tmp ; tmp++)
-                        if (*tmp == ':')
-                            npaths++;
-                    const char ** paths = malloc(sizeof(const char *) *
-                                                 (npaths + 1));
-                    if (!paths)
-                        return -ENOMEM;
-
-                    int cnt = 0;
-                    while (*s) {
-                        const char * next;
-                        for (next = s ; *next && *next != ':' ; next++);
-                        int len = next - s;
-                        char * str = malloc(len + 1);
-                        if (!str)
-                            return -ENOMEM;
-                        memcpy(str, s, len);
-                        str[len] = 0;
-                        paths[cnt++] = str;
-                        s = *next ? next + 1 : next;
-                    }
-
-                    paths[cnt] = NULL;
-                    library_paths = paths;
-                    break;
+        if (strpartcmp_static(*e, "LD_LIBRARY_PATH=")) {
+            const char * s = *e + static_strlen("LD_LIBRARY_PATH=");
+            size_t npaths = 2; // One for the first entry, one for the last
+                               // NULL.
+            for (const char * tmp = s ; *tmp ; tmp++)
+                if (*tmp == ':')
+                    npaths++;
+            char** paths = malloc(sizeof(const char *) *
+                                  npaths);
+            if (!paths)
+                return -ENOMEM;
+
+            size_t cnt = 0;
+            while (*s) {
+                const char * next;
+                for (next = s ; *next && *next != ':' ; next++);
+                size_t len = next - s;
+                char * str = malloc(len + 1);
+                if (!str) {
+                    for (size_t i = 0; i < cnt; i++)
+                        free(paths[cnt]);
+                    return -ENOMEM;
                 }
-                break;
+                memcpy(str, s, len);
+                str[len] = 0;
+                paths[cnt++] = str;
+                s = *next ? next + 1 : next;
             }
+
+            paths[cnt] = NULL;
+            library_paths = paths;
+            return 0;
         }
     }
 
@@ -445,8 +440,11 @@ static void __free (void * mem)
 
 int init_manifest (PAL_HANDLE manifest_handle)
 {
-    void * addr;
-    unsigned int size;
+    int ret = 0;
+    void * addr = NULL;
+    size_t size = 0, map_size = 0;
+
+#define MAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL)
 
     if (PAL_CB(manifest_preload.start)) {
         addr = PAL_CB(manifest_preload.start);
@@ -457,34 +455,53 @@ int init_manifest (PAL_HANDLE manifest_handle)
             return -PAL_ERRNO;
 
         size = attr.pending_size;
-        addr = (void *) DkStreamMap(manifest_handle, NULL,
-                                  PAL_PROT_READ, 0,
-                                  ALIGN_UP(size));
-
+        map_size = ALIGN_UP(size);
+        addr = bkeep_unmapped_any(map_size, PROT_READ, MAP_FLAGS,
+                                  NULL, 0, "manifest");
         if (!addr)
-            return -PAL_ERRNO;
+            return -ENOMEM;
+
+        void * ret_addr = DkStreamMap(manifest_handle, addr,
+                                      PAL_PROT_READ, 0,
+                                      ALIGN_UP(size));
+
+        if (!ret_addr) {
+            bkeep_munmap(addr, map_size, MAP_FLAGS);
+            return -ENOMEM;
+        } else {
+            assert(addr == ret_addr);
+        }
     }
 
-    bkeep_mmap(addr, ALIGN_UP(size), PROT_READ,
-               MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL, NULL, 0,
-               "manifest");
+    struct config_store * new_root_config = malloc(sizeof(struct config_store));
+    if (!new_root_config) {
+        ret = -ENOMEM;
+        goto fail;
+    }
 
-    root_config = malloc(sizeof(struct config_store));
-    root_config->raw_data = addr;
-    root_config->raw_size = size;
-    root_config->malloc = __malloc;
-    root_config->free = __free;
+    new_root_config->raw_data = addr;
+    new_root_config->raw_size = size;
+    new_root_config->malloc = __malloc;
+    new_root_config->free = __free;
 
     const char * errstring = "Unexpected error";
-    int ret = 0;
 
-    if ((ret = read_config(root_config, NULL, &errstring)) < 0) {
-        root_config = NULL;
+    if ((ret = read_config(new_root_config, NULL, &errstring)) < 0) {
         sys_printf("Unable to read manifest file: %s\n", errstring);
-        return ret;
+        goto fail;
     }
 
+    root_config = new_root_config;
     return 0;
+
+fail:
+    if (map_size) {
+        DkStreamUnmap(addr, map_size);
+        if (bkeep_munmap(addr, map_size, MAP_FLAGS) < 0)
+            bug();
+    }
+    free(new_root_config);
+    return ret;
 }
 
 #ifdef PROFILE
@@ -606,11 +623,10 @@ DEFINE_PROFILE_INTERVAL(pal_child_creation_time,        pal);
 
 DEFINE_PROFILE_CATAGORY(init, );
 DEFINE_PROFILE_INTERVAL(init_randgen,               init);
-DEFINE_PROFILE_INTERVAL(init_heap,                  init);
+DEFINE_PROFILE_INTERVAL(init_vma,                   init);
 DEFINE_PROFILE_INTERVAL(init_slab,                  init);
 DEFINE_PROFILE_INTERVAL(init_str_mgr,               init);
 DEFINE_PROFILE_INTERVAL(init_internal_map,          init);
-DEFINE_PROFILE_INTERVAL(init_vma,                   init);
 DEFINE_PROFILE_INTERVAL(init_fs,                    init);
 DEFINE_PROFILE_INTERVAL(init_dcache,                init);
 DEFINE_PROFILE_INTERVAL(init_handle,                init);
@@ -638,7 +654,7 @@ DEFINE_PROFILE_INTERVAL(init_signal,                init);
     do {                                                                \
         int _err = CALL_INIT(func, ##__VA_ARGS__);                      \
         if (_err < 0) {                                                 \
-            debug("initialization failed in " #func " (%d)\n", _err);   \
+            sys_printf("shim_init() in " #func " (%d)\n", _err);        \
             shim_terminate();                                           \
         }                                                               \
         SAVE_PROFILE_INTERVAL(func);                                    \
@@ -693,12 +709,11 @@ int shim_init (int argc, void * args, void ** return_stack)
 
     BEGIN_PROFILE_INTERVAL();
     RUN_INIT(init_randgen);
-    RUN_INIT(init_heap);
+    RUN_INIT(init_vma);
     RUN_INIT(init_slab);
     RUN_INIT(read_environs, envp);
     RUN_INIT(init_str_mgr);
     RUN_INIT(init_internal_map);
-    RUN_INIT(init_vma);
     RUN_INIT(init_fs);
     RUN_INIT(init_dcache);
     RUN_INIT(init_handle);
@@ -751,6 +766,17 @@ restore:
     RUN_INIT(init_ipc_helper);
     RUN_INIT(init_signal);
 
+    if (PAL_CB(parent_process)) {
+        /* Notify the parent process */
+        struct newproc_response res;
+        res.child_vmid = cur_process.vmid;
+        res.failure = 0;
+        if (!DkStreamWrite(PAL_CB(parent_process), 0,
+                           sizeof(struct newproc_response),
+                           &res, NULL))
+            return -PAL_ERRNO;
+    }
+
     debug("shim process initialized\n");
 
 #ifdef PROFILE
@@ -819,8 +845,7 @@ static int name_pipe (char * uri, size_t size, void * id)
 {
     IDTYPE pipeid;
     int len;
-    if (getrand(&pipeid, sizeof(IDTYPE)) < sizeof(IDTYPE))
-        return -EACCES;
+    getrand(&pipeid, sizeof(pipeid));
     debug("creating pipe: pipe.srv:%u\n", pipeid);
     if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) == size)
         return -ERANGE;
@@ -869,8 +894,7 @@ static int name_path (char * path, size_t size, void * id)
     unsigned int suffix;
     int prefix_len = strlen(path);
     int len;
-    if (getrand(&suffix, sizeof(unsigned int)) < sizeof(unsigned int))
-        return -EACCES;
+    getrand(&suffix, sizeof(suffix));
     len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
     if (len == size)
         return -ERANGE;

+ 68 - 187
LibOS/shim/src/shim_malloc.c

@@ -25,22 +25,6 @@
  * 
  * When existing slabs are not sufficient, or a large (4k or greater) 
  * allocation is requested, it ends up here (__system_alloc and __system_free).
- * 
- * There are two modes this file executes in: early initialization (before
- * VMAs are available), and post-initialization.  
- * 
- * Before VMAs are available, allocations