Browse Source

Cleaning up and rewriting VMA bookkeeping code (#183)

1. Redesign of the VMA bookkeeping logic in the library OS
2. ASLR reimplementation
3. Support MAP_32BITS flags for mmap()
4. Safeguarding library OS internal memory from user memory and checkpoint buffers
5. Eliminating race conditions at VMA lookup and bookkeeping
6. Enable early VMA bookkeeping during initialization
7. Adding documentation for the VMA implementation
Chia-Che Tsai 6 years ago
parent
commit
f8bf469018
73 changed files with 2094 additions and 1945 deletions
  1. 4 4
      LibOS/shim/include/shim_checkpoint.h
  2. 12 0
      LibOS/shim/include/shim_defs.h
  3. 8 6
      LibOS/shim/include/shim_fs.h
  4. 3 3
      LibOS/shim/include/shim_internal.h
  5. 3 3
      LibOS/shim/include/shim_ipc.h
  6. 2 1
      LibOS/shim/include/shim_signal.h
  7. 0 9
      LibOS/shim/include/shim_thread.h
  8. 1 1
      LibOS/shim/include/shim_types.h
  9. 25 21
      LibOS/shim/include/shim_utils.h
  10. 67 22
      LibOS/shim/include/shim_vma.h
  11. 26 27
      LibOS/shim/src/bookkeep/shim_handle.c
  12. 10 18
      LibOS/shim/src/bookkeep/shim_signal.c
  13. 6 8
      LibOS/shim/src/bookkeep/shim_thread.c
  14. 721 862
      LibOS/shim/src/bookkeep/shim_vma.c
  15. 11 19
      LibOS/shim/src/elf/shim_rtld.c
  16. 122 95
      LibOS/shim/src/fs/chroot/fs.c
  17. 5 6
      LibOS/shim/src/fs/dev/fs.c
  18. 3 4
      LibOS/shim/src/fs/proc/info.c
  19. 90 21
      LibOS/shim/src/fs/proc/thread.c
  20. 1 0
      LibOS/shim/src/fs/shim_dcache.c
  21. 71 46
      LibOS/shim/src/fs/shim_fs.c
  22. 2 2
      LibOS/shim/src/fs/shim_fs_hash.c
  23. 17 4
      LibOS/shim/src/fs/shim_namei.c
  24. 0 1
      LibOS/shim/src/fs/str/fs.c
  25. 1 2
      LibOS/shim/src/ipc/shim_ipc.c
  26. 3 7
      LibOS/shim/src/ipc/shim_ipc_nsimpl.h
  27. 3 3
      LibOS/shim/src/ipc/shim_ipc_pid.c
  28. 2 1
      LibOS/shim/src/ipc/shim_ipc_sysv.c
  29. 1 1
      LibOS/shim/src/shim-debug.map
  30. 178 88
      LibOS/shim/src/shim_checkpoint.c
  31. 6 0
      LibOS/shim/src/shim_debug.c
  32. 105 81
      LibOS/shim/src/shim_init.c
  33. 68 187
      LibOS/shim/src/shim_malloc.c
  34. 6 25
      LibOS/shim/src/shim_random.c
  35. 0 17
      LibOS/shim/src/shim_syscalls.c
  36. 62 31
      LibOS/shim/src/sys/shim_brk.c
  37. 8 12
      LibOS/shim/src/sys/shim_clone.c
  38. 48 3
      LibOS/shim/src/sys/shim_exec.c
  39. 1 1
      LibOS/shim/src/sys/shim_migrate.c
  40. 95 74
      LibOS/shim/src/sys/shim_mmap.c
  41. 8 5
      LibOS/shim/src/sys/shim_open.c
  42. 1 1
      LibOS/shim/src/utils/strobjs.c
  43. 5 0
      LibOS/shim/test/apps/lmbench/Makefile.lmbench
  44. 4 0
      LibOS/shim/test/apps/lmbench/hello.manifest.template
  45. 7 9
      LibOS/shim/test/apps/lmbench/sh.manifest.template
  46. 1 1
      LibOS/shim/test/apps/ltp/fetch.py
  47. 4 4
      LibOS/shim/test/regression/Makefile
  48. 1 1
      Pal/lib/api.h
  49. 0 1
      Pal/lib/assert.h
  50. 2 2
      Pal/lib/graphene/config.c
  51. 41 26
      Pal/lib/list.h
  52. 35 12
      Pal/lib/memmgr.h
  53. 97 44
      Pal/lib/slabmgr.h
  54. 6 6
      Pal/regression/Makefile
  55. 5 5
      Pal/src/db_main.c
  56. 3 3
      Pal/src/db_rtld.c
  57. 1 1
      Pal/src/db_streams.c
  58. 2 2
      Pal/src/host/FreeBSD/db_files.c
  59. 13 34
      Pal/src/host/FreeBSD/db_misc.c
  60. 2 2
      Pal/src/host/FreeBSD/db_streams.c
  61. 6 1
      Pal/src/host/Linux-SGX/db_files.c
  62. 1 1
      Pal/src/host/Linux-SGX/db_mutex.c
  63. 2 2
      Pal/src/host/Linux-SGX/db_streams.c
  64. 1 2
      Pal/src/host/Linux-SGX/enclave_framework.c
  65. 8 8
      Pal/src/host/Linux-SGX/enclave_ocalls.c
  66. 12 6
      Pal/src/host/Linux-SGX/enclave_pages.c
  67. 9 37
      Pal/src/host/Linux-SGX/sgx_main.c
  68. 2 2
      Pal/src/host/Linux/db_files.c
  69. 1 1
      Pal/src/host/Linux/db_sockets.c
  70. 2 2
      Pal/src/host/Linux/db_streams.c
  71. 1 1
      Pal/src/pal_internal.h
  72. 13 6
      Pal/src/slab.c
  73. 1 1
      Runtime/Makefile

+ 4 - 4
LibOS/shim/include/shim_checkpoint.h

@@ -92,7 +92,7 @@ struct shim_cp_entry
 struct shim_mem_entry {
 struct shim_mem_entry {
     struct shim_mem_entry * prev;
     struct shim_mem_entry * prev;
     void * addr;
     void * addr;
-    int size;
+    size_t size;
     void ** paddr;
     void ** paddr;
     int prot;
     int prot;
     void * data;
     void * data;
@@ -118,7 +118,7 @@ struct shim_cp_store {
     struct shim_handle * cp_file;
     struct shim_handle * cp_file;
 
 
     /* allocation method for check point area */
     /* allocation method for check point area */
-    void * (*alloc) (struct shim_cp_store * store, void * mem, int size);
+    void * (*alloc) (struct shim_cp_store *, void *, size_t);
 
 
     /* check point area */
     /* check point area */
     ptr_t base, offset, bound;
     ptr_t base, offset, bound;
@@ -179,8 +179,8 @@ enum {
                 new_bound *= 2;                                     \
                 new_bound *= 2;                                     \
                                                                     \
                                                                     \
             void * buf = store->alloc(store,                        \
             void * buf = store->alloc(store,                        \
-                                      (void *) store->base + store->bound, \
-                                      new_bound - store->bound);    \
+                            (void *) store->base + store->bound,    \
+                            new_bound - store->bound);              \
             if (!buf)                                               \
             if (!buf)                                               \
                 return -ENOMEM;                                     \
                 return -ENOMEM;                                     \
                                                                     \
                                                                     \

+ 12 - 0
LibOS/shim/include/shim_defs.h

@@ -8,6 +8,14 @@
 
 
 #define HASH_GIPC                   0
 #define HASH_GIPC                   0
 
 
+/*
+ * If enable CPSTORE_DERANDOMIZATION, the library OS will try to
+ * load the checkpoint (either from the parent or a file) at the
+ * exact address it was created. Currently this option is disabled
+ * to prevent internal fragmentation of virtual memory space.
+ */
+#define CPSTORE_DERANDOMIZATION     0
+
 #define DEFAULT_HEAP_MIN_SIZE       (256 * 1024 * 1024) /* 256MB */
 #define DEFAULT_HEAP_MIN_SIZE       (256 * 1024 * 1024) /* 256MB */
 #define DEFAULT_MEM_MAX_NPAGES      (1024 * 1024)       /* 4GB */
 #define DEFAULT_MEM_MAX_NPAGES      (1024 * 1024)       /* 4GB */
 #define DEFAULT_BRK_MAX_SIZE        (256 * 1024)        /* 256KB */
 #define DEFAULT_BRK_MAX_SIZE        (256 * 1024)        /* 256KB */
@@ -17,8 +25,12 @@
 
 
 #define EXECVE_RTLD                 1
 #define EXECVE_RTLD                 1
 
 
+#define ENABLE_ASLR                 1
+
 /* debug message printout */
 /* debug message printout */
 #define DEBUGBUF_SIZE               256
 #define DEBUGBUF_SIZE               256
 #define DEBUGBUF_BREAK              0
 #define DEBUGBUF_BREAK              0
 
 
+#define DEFAULT_VMA_COUNT           64
+
 #endif /* _SHIM_DEFS_H_ */
 #endif /* _SHIM_DEFS_H_ */

+ 8 - 6
LibOS/shim/include/shim_fs.h

@@ -26,6 +26,8 @@
 #ifndef _SHIM_FS_H_
 #ifndef _SHIM_FS_H_
 #define _SHIM_FS_H_
 #define _SHIM_FS_H_
 
 
+#include <stdbool.h>
+
 #include <shim_types.h>
 #include <shim_types.h>
 #include <shim_defs.h>
 #include <shim_defs.h>
 #include <shim_handle.h>
 #include <shim_handle.h>
@@ -229,7 +231,7 @@ struct shim_d_ops {
 
 
 DEFINE_LIST(shim_mount);
 DEFINE_LIST(shim_mount);
 struct shim_mount {
 struct shim_mount {
-    char type[8];
+    char type[8];  // Null-terminated.
 
 
     struct shim_dentry * mount_point;
     struct shim_dentry * mount_point;
 
 
@@ -310,7 +312,7 @@ const char * get_file_name (const char * path, size_t len);
 /* file system operations */
 /* file system operations */
 int mount_fs (const char * mount_type, const char * mount_uri,
 int mount_fs (const char * mount_type, const char * mount_uri,
               const char * mount_point, struct shim_dentry *parent,
               const char * mount_point, struct shim_dentry *parent,
-              struct shim_dentry **dentp, int make_ancestor);
+              struct shim_dentry **dentp, bool make_ancestor);
 int unmount_fs (const char * mount_point);
 int unmount_fs (const char * mount_point);
 int search_builtin_fs (const char * type, struct shim_mount ** fs);
 int search_builtin_fs (const char * type, struct shim_mount ** fs);
 
 
@@ -374,7 +376,7 @@ int lookup_dentry (struct shim_dentry * parent, const char * name, int namelen,
  */
  */
 int __path_lookupat (struct shim_dentry * start, const char * path, int flags,
 int __path_lookupat (struct shim_dentry * start, const char * path, int flags,
                      struct shim_dentry ** dent, int link_depth,
                      struct shim_dentry ** dent, int link_depth,
-                     struct shim_mount *fs, int make_ancestor);
+                     struct shim_mount *fs, bool make_ancestor);
 
 
 /* Just wraps __path_lookupat, but also acquires and releases the dcache_lock.
 /* Just wraps __path_lookupat, but also acquires and releases the dcache_lock.
  */
  */
@@ -438,7 +440,7 @@ void get_dentry (struct shim_dentry * dent);
 /* Decrement the reference count on dent */
 /* Decrement the reference count on dent */
 void put_dentry (struct shim_dentry * dent);
 void put_dentry (struct shim_dentry * dent);
 
 
-static_inline
+static_always_inline
 void fast_pathcpy (char * dst, const char * src, int size, char ** ptr)
 void fast_pathcpy (char * dst, const char * src, int size, char ** ptr)
 {
 {
     char * d = dst;
     char * d = dst;
@@ -448,7 +450,7 @@ void fast_pathcpy (char * dst, const char * src, int size, char ** ptr)
     *ptr = d;
     *ptr = d;
 }
 }
 
 
-static_inline
+static_always_inline
 char * dentry_get_path (struct shim_dentry * dent, bool on_stack,
 char * dentry_get_path (struct shim_dentry * dent, bool on_stack,
                         int * sizeptr)
                         int * sizeptr)
 {
 {
@@ -491,7 +493,7 @@ char * dentry_get_path (struct shim_dentry * dent, bool on_stack,
     return buffer;
     return buffer;
 }
 }
 
 
-static inline __attribute__((always_inline))
+static_always_inline
 const char * dentry_get_name (struct shim_dentry * dent)
 const char * dentry_get_name (struct shim_dentry * dent)
 {
 {
     return qstrgetstr(&dent->name);
     return qstrgetstr(&dent->name);

+ 3 - 3
LibOS/shim/include/shim_internal.h

@@ -35,7 +35,7 @@
 #define extern_alias(name) \
 #define extern_alias(name) \
     extern __typeof(name) shim_##name __attribute ((alias (alias_str(name))))
     extern __typeof(name) shim_##name __attribute ((alias (alias_str(name))))
 
 
-#define static_inline static inline __attribute__((always_inline))
+#define static_always_inline static inline __attribute__((always_inline))
 
 
 #include <shim_types.h>
 #include <shim_types.h>
 #include <shim_defs.h>
 #include <shim_defs.h>
@@ -555,11 +555,11 @@ extern LOCKTYPE __master_lock;
 # define master_lock()                                              \
 # define master_lock()                                              \
     do {                                                            \
     do {                                                            \
         lock(__master_lock);                                        \
         lock(__master_lock);                                        \
-        pal_printf("maste lock " __FILE__ ":%d\n", __LINE__);       \
+        pal_printf("master lock " __FILE__ ":%d\n", __LINE__);       \
     } while (0)
     } while (0)
 # define master_unlock()                                            \
 # define master_unlock()                                            \
     do {                                                            \
     do {                                                            \
-        pal_printf("maste unlock " __FILE__ ":%d\n", __LINE__);     \
+        pal_printf("master unlock " __FILE__ ":%d\n", __LINE__);     \
         unlock(__master_lock);                                      \
         unlock(__master_lock);                                      \
     } while (0)
     } while (0)
 #else
 #else

+ 3 - 3
LibOS/shim/include/shim_ipc.h

@@ -540,7 +540,7 @@ struct shim_ipc_info * discover_client (struct shim_ipc_port * port,
 int __init_ipc_msg (struct shim_ipc_msg * msg, int code, int size, IDTYPE dest);
 int __init_ipc_msg (struct shim_ipc_msg * msg, int code, int size, IDTYPE dest);
 struct shim_ipc_msg * create_ipc_msg (int code, int size, IDTYPE dest);
 struct shim_ipc_msg * create_ipc_msg (int code, int size, IDTYPE dest);
 
 
-static_inline
+static_always_inline
 struct shim_ipc_msg * create_ipc_msg_on_stack (int code, int size, IDTYPE dest)
 struct shim_ipc_msg * create_ipc_msg_on_stack (int code, int size, IDTYPE dest)
 {
 {
     struct shim_ipc_msg * msg = __alloca(IPC_MSG_SIZE(size));
     struct shim_ipc_msg * msg = __alloca(IPC_MSG_SIZE(size));
@@ -553,7 +553,7 @@ int __init_ipc_msg_duplex (struct shim_ipc_msg_obj * msg, int code, int size,
 struct shim_ipc_msg_obj *
 struct shim_ipc_msg_obj *
 create_ipc_msg_duplex (int code, int size, IDTYPE dest);
 create_ipc_msg_duplex (int code, int size, IDTYPE dest);
 
 
-static_inline
+static_always_inline
 struct shim_ipc_msg_obj *
 struct shim_ipc_msg_obj *
 create_ipc_msg_duplex_on_stack (int code, int size, IDTYPE dest)
 create_ipc_msg_duplex_on_stack (int code, int size, IDTYPE dest)
 {
 {
@@ -568,7 +568,7 @@ int __init_ipc_resp_msg (struct shim_ipc_msg * resp, int ret,
 struct shim_ipc_msg *
 struct shim_ipc_msg *
 create_ipc_resp_msg (int ret, IDTYPE dest, unsigned long seq);
 create_ipc_resp_msg (int ret, IDTYPE dest, unsigned long seq);
 
 
-static_inline
+static_always_inline
 struct shim_ipc_msg *
 struct shim_ipc_msg *
 create_ipc_resp_msg_on_stack (int ret, IDTYPE dest, unsigned long seq)
 create_ipc_resp_msg_on_stack (int ret, IDTYPE dest, unsigned long seq)
 {
 {

+ 2 - 1
LibOS/shim/include/shim_signal.h

@@ -111,7 +111,8 @@ struct shim_signal_log {
 
 
 extern const char * const siglist[NUM_KNOWN_SIGS + 1];
 extern const char * const siglist[NUM_KNOWN_SIGS + 1];
 
 
-static_inline const char * signal_name (int sig)
+static_always_inline
+const char * signal_name (int sig)
 {
 {
     if (sig <= NUM_KNOWN_SIGS)
     if (sig <= NUM_KNOWN_SIGS)
         return siglist[sig];
         return siglist[sig];

+ 0 - 9
LibOS/shim/include/shim_thread.h

@@ -88,9 +88,6 @@ struct shim_thread {
     bool user_tcb; /* is tcb assigned by user? */
     bool user_tcb; /* is tcb assigned by user? */
     void * frameptr;
     void * frameptr;
 
 
-    /* to save vma bookkeeping */
-    struct { void * addr; uint64_t length; } delayed_bkeep_mmap;
-
     REFTYPE ref_count;
     REFTYPE ref_count;
     LOCKTYPE lock;
     LOCKTYPE lock;
 
 
@@ -172,12 +169,6 @@ void set_cur_thread (struct shim_thread * thread)
     shim_tcb_t * tcb = SHIM_GET_TLS();
     shim_tcb_t * tcb = SHIM_GET_TLS();
     IDTYPE tid = 0;
     IDTYPE tid = 0;
 
 
-#ifndef container_of
-# define container_of(ptr, type, member) ({                 \
-    const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
-    (type *)( (char *)__mptr - offsetof(type,member) );})
-#endif
-
     if (thread) {
     if (thread) {
         if (tcb->tp && tcb->tp != thread)
         if (tcb->tp && tcb->tp != thread)
             put_thread(tcb->tp);
             put_thread(tcb->tp);

+ 1 - 1
LibOS/shim/include/shim_types.h

@@ -483,7 +483,7 @@ struct shim_str {
 
 
 #define QSTR_SIZE   32
 #define QSTR_SIZE   32
 
 
-/* Use qstr for names. This has fix size string + string object
+/* Use qstr for names. This has fixed size string + string object
  * if len > SHIM_QSTR_SIZE then use overflow string */
  * if len > SHIM_QSTR_SIZE then use overflow string */
 struct shim_qstr {
 struct shim_qstr {
     HASHTYPE    hash;
     HASHTYPE    hash;

+ 25 - 21
LibOS/shim/include/shim_utils.h

@@ -21,8 +21,8 @@
  * shim_utils.h
  * shim_utils.h
  */
  */
 
 
-#ifndef _SHIM_UTILITIES_H_
-#define _SHIM_UTILITIES_H_
+#ifndef _SHIM_UTILS_H_
+#define _SHIM_UTILS_H_
 
 
 #include <shim_handle.h>
 #include <shim_handle.h>
 
 
@@ -72,8 +72,11 @@ static inline char * qstrsetstr (struct shim_qstr * qstr,
     char * buf = qstr->name;
     char * buf = qstr->name;
 
 
     if (size >= QSTR_SIZE) {
     if (size >= QSTR_SIZE) {
-        if (!qstr->oflow)
+        if (!qstr->oflow) {
             qstr->oflow = get_str_obj();
             qstr->oflow = get_str_obj();
+            if (!qstr->oflow)
+                return NULL;
+        }
         buf = qstr->oflow->str;
         buf = qstr->oflow->str;
     } else {
     } else {
         if (qstr->oflow) {
         if (qstr->oflow) {
@@ -82,13 +85,9 @@ static inline char * qstrsetstr (struct shim_qstr * qstr,
         }
         }
     }
     }
 
 
-    qstr->len = 0;
-    if (str) {
-        if (size)
-            memcpy(buf, str, size);
-        buf[size] = 0;
-        qstr->len = size;
-    }
+    memcpy(buf, str, size);
+    buf[size] = 0;
+    qstr->len = size;
 
 
     return buf;
     return buf;
 }
 }
@@ -108,8 +107,12 @@ static inline char * qstrsetstrs (struct shim_qstr * qstr,
     char * buf = qstr->name;
     char * buf = qstr->name;
 
 
     if (total_size >= QSTR_SIZE) {
     if (total_size >= QSTR_SIZE) {
-        if (!qstr->oflow)
+        if (!qstr->oflow) {
+            // TODO: alloc proper size.
             qstr->oflow = get_str_obj();
             qstr->oflow = get_str_obj();
+            if (!qstr->oflow)
+                return NULL;
+        }
         buf = qstr->oflow->str;
         buf = qstr->oflow->str;
     }
     }
 
 
@@ -153,21 +156,23 @@ static inline int qstrcmpstr (const struct shim_qstr * qstr,
 
 
 /* heap allocation functions */
 /* heap allocation functions */
 int init_slab (void);
 int init_slab (void);
+
 #if defined(SLAB_DEBUG_PRINT) || defined(SLAB_DEBUG_TRACE)
 #if defined(SLAB_DEBUG_PRINT) || defined(SLAB_DEBUG_TRACE)
 void * __malloc_debug (size_t size, const char * file, int line);
 void * __malloc_debug (size_t size, const char * file, int line);
 #define malloc(size) __malloc_debug((size), __FILE__, __LINE__)
 #define malloc(size) __malloc_debug((size), __FILE__, __LINE__)
 void __free_debug (void * mem, const char * file, int line);
 void __free_debug (void * mem, const char * file, int line);
 #define free(mem) __free_debug((mem), __FILE__, __LINE__)
 #define free(mem) __free_debug((mem), __FILE__, __LINE__)
-void * __remalloc_debug (const void * mem, size_t size,
-                         const char * file, int line);
-#define remalloc(mem, size) __remalloc_debug((mem), (size), __FILE__, __LINE__)
+void * __malloc_copy_debug (const void * mem, size_t size,
+                             const char * file, int line);
+#define malloc_copy(mem, size) __malloc_copy_debug((mem), (size), __FILE__, __LINE__)
 #else
 #else
 void * malloc (size_t size);
 void * malloc (size_t size);
 void free (void * mem);
 void free (void * mem);
-void * remalloc (const void * mem, size_t size);
+void * malloc_copy (const void * mem, size_t size);
 #endif
 #endif
 
 
-static_inline char * qstrtostr (struct shim_qstr * qstr, bool on_stack)
+static_always_inline
+char * qstrtostr (struct shim_qstr * qstr, bool on_stack)
 {
 {
     int len = qstr->len;
     int len = qstr->len;
     char * buf = on_stack ? __alloca(len + 1) : malloc(len + 1);
     char * buf = on_stack ? __alloca(len + 1) : malloc(len + 1);
@@ -175,8 +180,7 @@ static_inline char * qstrtostr (struct shim_qstr * qstr, bool on_stack)
     if (!buf)
     if (!buf)
         return NULL;
         return NULL;
 
 
-    if (len)
-        memcpy(buf, qstrgetstr(qstr), len);
+    memcpy(buf, qstrgetstr(qstr), len);
 
 
     buf[len] = 0;
     buf[len] = 0;
     return buf;
     return buf;
@@ -204,8 +208,8 @@ void md5_final (struct shim_md5_ctx * mdContext);
 /* prompt user for confirmation */
 /* prompt user for confirmation */
 int message_confirm (const char * message, const char * options);
 int message_confirm (const char * message, const char * options);
 
 
-/* get random number */
-int getrand (void * buffer, size_t size);
+/* get random bytes (not for crypto!) */
+void getrand (void * buffer, size_t size);
 
 
 /* ELF binary loading */
 /* ELF binary loading */
 int check_elf_object (struct shim_handle * file);
 int check_elf_object (struct shim_handle * file);
@@ -243,4 +247,4 @@ int terminate_async_helper (void);
 
 
 extern struct config_store * root_config;
 extern struct config_store * root_config;
 
 
-#endif /* _SHIM_UTILITIES_H */
+#endif /* _SHIM_UTILS_H */

+ 67 - 22
LibOS/shim/include/shim_vma.h

@@ -31,6 +31,7 @@
 #include <shim_handle.h>
 #include <shim_handle.h>
 
 
 #include <pal.h>
 #include <pal.h>
+#include <api.h>
 #include <list.h>
 #include <list.h>
 
 
 #include <asm/mman.h>
 #include <asm/mman.h>
@@ -39,28 +40,52 @@ struct shim_handle;
 
 
 #define VMA_COMMENT_LEN     16
 #define VMA_COMMENT_LEN     16
 
 
-DEFINE_LIST(shim_vma);
-struct shim_vma {
-    REFTYPE                 ref_count;
+/*
+ * struct shim_vma_val is the published version of struct shim_vma
+ * (struct shim_vma is defined in bookkeep/shim_vma.c).
+ */
+struct shim_vma_val {
     void *                  addr;
     void *                  addr;
     uint64_t                length;
     uint64_t                length;
     int                     prot;
     int                     prot;
     int                     flags;
     int                     flags;
     uint64_t                offset;
     uint64_t                offset;
     struct shim_handle *    file;
     struct shim_handle *    file;
-    LIST_TYPE(shim_vma)     list;
     char                    comment[VMA_COMMENT_LEN];
     char                    comment[VMA_COMMENT_LEN];
 };
 };
 
 
+static inline
+void free_vma_val_array (struct shim_vma_val * vmas, size_t count)
+{
+    for (int i = 0 ; i < count ; i++) {
+        /* need to release the file handle */
+        if (vmas[i].file)
+            put_handle(vmas[i].file);
+    }
+
+    free(vmas);
+}
+
 /* an additional flag */
 /* an additional flag */
 #define VMA_UNMAPPED 0x10000000   /* vma is kept for bookkeeping, but the
 #define VMA_UNMAPPED 0x10000000   /* vma is kept for bookkeeping, but the
                                      memory is not actually allocated */
                                      memory is not actually allocated */
-#define VMA_INTERNAL 0x20000000
+#define VMA_INTERNAL 0x20000000   /* vma is used internally */
 
 
 #define VMA_TAINTED  0x40000000   /* vma has been protected as writeable,
 #define VMA_TAINTED  0x40000000   /* vma has been protected as writeable,
                                      so it has to be checkpointed during
                                      so it has to be checkpointed during
                                      migration */
                                      migration */
 
 
+#define VMA_CP       0x80000000   /* vma is used for dumping checkpoint
+                                     data */
+
+#define VMA_TYPE(flags)     ((flags) & (VMA_INTERNAL | VMA_CP))
+
+/*
+ * We distinguish checkpoint VMAs from user VMAs and other internal VMAs,
+ * to prevent corrupting internal data when creating processes.
+ */
+#define CP_VMA_FLAGS  (MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL|VMA_CP)
+
 #define NEED_MIGRATE_MEMORY(vma)                                \
 #define NEED_MIGRATE_MEMORY(vma)                                \
         (((vma)->flags & VMA_TAINTED || !(vma)->file) &&        \
         (((vma)->flags & VMA_TAINTED || !(vma)->file) &&        \
         !((vma)->flags & VMA_UNMAPPED))
         !((vma)->flags & VMA_UNMAPPED))
@@ -90,8 +115,10 @@ static inline PAL_FLG PAL_PROT (int prot, int flags)
 int init_vma (void);
 int init_vma (void);
 
 
 /* Bookkeeping mmap() system call */
 /* Bookkeeping mmap() system call */
-int bkeep_mmap (void * addr, uint64_t length, int prot, int flags,
-                struct shim_handle * file, uint64_t offset, const char * comment);
+int bkeep_mmap (void * addr, uint64_t length,
+                int prot, int flags,
+                struct shim_handle * file, uint64_t offset,
+                const char * comment);
 
 
 /* Bookkeeping munmap() system call */
 /* Bookkeeping munmap() system call */
 int bkeep_munmap (void * addr, uint64_t length, int flags);
 int bkeep_munmap (void * addr, uint64_t length, int flags);
@@ -99,30 +126,48 @@ int bkeep_munmap (void * addr, uint64_t length, int flags);
 /* Bookkeeping mprotect() system call */
 /* Bookkeeping mprotect() system call */
 int bkeep_mprotect (void * addr, uint64_t length, int prot, int flags);
 int bkeep_mprotect (void * addr, uint64_t length, int prot, int flags);
 
 
-/* Get vma bookkeeping handle */
-void get_vma (struct shim_vma * vma);
-void put_vma (struct shim_vma * vma);
-
-/* Returns 0 on success, -E* on failure.
-   Calls `get_vma` on the result before returning it.
-*/
-int lookup_supervma (const void * addr, uint64_t len, struct shim_vma ** vma);
-int lookup_overlap_vma (const void * addr, uint64_t len, struct shim_vma ** vma);
+/* Looking up VMA that contains [addr, length) */
+int lookup_vma (void * addr, struct shim_vma_val * vma);
 
 
-struct shim_vma * next_vma (struct shim_vma * vma);
+/* Looking up VMA that overlaps with [addr, length) */
+int lookup_overlap_vma (void * addr, uint64_t length,
+                        struct shim_vma_val * vma);
 
 
-void * get_unmapped_vma (uint64_t len, int flags);
-void * get_unmapped_vma_for_cp (uint64_t len);
+/*
+ * Looking for an unmapped space and then adding the corresponding bookkeeping
+ * (more info in bookkeep/shim_vma.c).
+ *
+ * Note: the first argument is "top_addr" because the search is top-down.
+ */
+void * bkeep_unmapped (void * top_addr, void * bottom_addr, uint64_t length,
+                       int prot, int flags, struct shim_handle * file,
+                       uint64_t offset, const char * comment);
+
+static inline void *
+bkeep_unmapped_any (uint64_t length, int prot, int flags,
+                    struct shim_handle * file, uint64_t offset,
+                    const char * comment)
+{
+    return bkeep_unmapped(PAL_CB(user_address.end),
+                          PAL_CB(user_address.start),
+                          length, prot, flags, file, offset, comment);
+}
 
 
-int dump_all_vmas (struct shim_thread * thread, char * buf, uint64_t size);
+void * bkeep_unmapped_heap (uint64_t length, int prot, int flags,
+                            struct shim_handle * file, uint64_t offset,
+                            const char * comment);
 
 
-void unmap_all_vmas (void);
+/*
+ * Dumping all *non-internal* VMAs into a user-allocated buffer ("max_count" is
+ * the maximal number of entries in the buffer). Return number of filled entries
+ * if succeeded, or -EOVERFLOW if the buffer is too small.
+ */
+int dump_all_vmas (struct shim_vma_val * vmas, size_t max_count);
 
 
 /* Debugging */
 /* Debugging */
 void debug_print_vma_list (void);
 void debug_print_vma_list (void);
 
 
 /* Constants */
 /* Constants */
-extern unsigned long mem_max_npages;
 extern unsigned long brk_max_size;
 extern unsigned long brk_max_size;
 extern unsigned long sys_stack_size;
 extern unsigned long sys_stack_size;
 
 

+ 26 - 27
LibOS/shim/src/bookkeep/shim_handle.c

@@ -424,7 +424,7 @@ extend:
         ret = fd;
         ret = fd;
 out:
 out:
     unlock(handle_map->lock);
     unlock(handle_map->lock);
-    return fd;
+    return ret;
 }
 }
 
 
 void flush_handle (struct shim_handle * hdl)
 void flush_handle (struct shim_handle * hdl)
@@ -603,23 +603,18 @@ void dup_fd_handle (struct shim_handle_map * map,
 static struct shim_handle_map * get_new_handle_map (FDTYPE size)
 static struct shim_handle_map * get_new_handle_map (FDTYPE size)
 {
 {
     struct shim_handle_map * handle_map =
     struct shim_handle_map * handle_map =
-                    malloc(sizeof(struct shim_handle_map));
+        calloc(1, sizeof(struct shim_handle_map));
 
 
-    if (handle_map == NULL)
+    if (!handle_map)
         return NULL;
         return NULL;
 
 
-    memset(handle_map, 0, sizeof(struct shim_handle_map));
-
-    handle_map->map = malloc(sizeof(struct shim_fd_handle) * size);
+    handle_map->map = calloc(size, sizeof(struct shim_fd_handle));
 
 
-    if (handle_map->map == NULL) {
+    if (!handle_map->map) {
         free(handle_map);
         free(handle_map);
         return NULL;
         return NULL;
     }
     }
 
 
-    memset(handle_map->map, 0,
-           sizeof(struct shim_fd_handle) * size);
-
     handle_map->fd_top  = FD_NULL;
     handle_map->fd_top  = FD_NULL;
     handle_map->fd_size = size;
     handle_map->fd_size = size;
     create_lock(handle_map->lock);
     create_lock(handle_map->lock);
@@ -631,25 +626,19 @@ static struct shim_handle_map * __enlarge_handle_map
                      (struct shim_handle_map * map, FDTYPE size)
                      (struct shim_handle_map * map, FDTYPE size)
 {
 {
     if (size <= map->fd_size)
     if (size <= map->fd_size)
-        return NULL;
+        return map;
 
 
-    struct shim_fd_handle ** old_map = map->map;
+    struct shim_fd_handle ** new_map = calloc(size, sizeof(new_map[0]));
 
 
-    map->map = malloc(sizeof(struct shim_fd_handle *) * size);
-
-    if (map->map == NULL) {
-        map->map = old_map;
+    if (!new_map)
         return NULL;
         return NULL;
-    }
 
 
-    size_t copy_size = sizeof(struct shim_fd_handle *) * map->fd_size;
+    memcpy(new_map, map->map, map->fd_size * sizeof(new_map[0]));
+    memset(new_map + map->fd_size, 0,
+           (size - map->fd_size) * sizeof(new_map[0]));
+    free(map->map);
+    map->map = new_map;
     map->fd_size = size;
     map->fd_size = size;
-    memset(map->map, 0, sizeof(struct shim_fd_handle *) * size);
-    if (old_map) {
-        if (copy_size)
-            memcpy(map->map, old_map, copy_size);
-        free(old_map);
-    }
     return map;
     return map;
 }
 }
 
 
@@ -668,7 +657,7 @@ int dup_handle_map (struct shim_handle_map ** new,
     if (old_map->fd_top == FD_NULL)
     if (old_map->fd_top == FD_NULL)
         goto done;
         goto done;
 
 
-    for (int i = 0 ; i <= old_map->fd_top ; i++) {
+    for (int i = 0; i <= old_map->fd_top; i++) {
         struct shim_fd_handle * fd_old = old_map->map[i];
         struct shim_fd_handle * fd_old = old_map->map[i];
         struct shim_fd_handle * fd_new;
         struct shim_fd_handle * fd_new;
 
 
@@ -678,8 +667,19 @@ int dup_handle_map (struct shim_handle_map ** new,
             /* first, get the handle to prevent it from being deleted */
             /* first, get the handle to prevent it from being deleted */
             struct shim_handle * hdl = fd_old->handle;
             struct shim_handle * hdl = fd_old->handle;
             open_handle(hdl);
             open_handle(hdl);
-            /* DP: I assume we really need a deep copy of the handle map? */
+
             fd_new = malloc(sizeof(struct shim_fd_handle));
             fd_new = malloc(sizeof(struct shim_fd_handle));
+            if (!fd_new) {
+                for (int j = 0; j < i; j++) {
+                    close_handle(new_map->map[j]->handle);
+                    free(new_map->map[j]);
+                }
+                unlock(old_map->lock);
+                *new = NULL;
+                return -ENOMEM;
+            }
+
+            /* DP: I assume we really need a deep copy of the handle map? */
             new_map->map[i] = fd_new;
             new_map->map[i] = fd_new;
             fd_new->vfd    = fd_old->vfd;
             fd_new->vfd    = fd_old->vfd;
             fd_new->handle = hdl;
             fd_new->handle = hdl;
@@ -690,7 +690,6 @@ int dup_handle_map (struct shim_handle_map ** new,
 done:
 done:
     unlock(old_map->lock);
     unlock(old_map->lock);
     *new = new_map;
     *new = new_map;
-
     return 0;
     return 0;
 }
 }
 
 

+ 10 - 18
LibOS/shim/src/bookkeep/shim_signal.c

@@ -173,7 +173,7 @@ void deliver_signal (siginfo_t * info, PAL_CONTEXT * context)
 
 
 delay:
 delay:
     {
     {
-        if (!(signal = remalloc(signal,sizeof(struct shim_signal))))
+        if (!(signal = malloc_copy(signal,sizeof(struct shim_signal))))
             goto out;
             goto out;
 
 
         struct shim_signal ** signal_log = allocate_signal_log(cur_thread, sig);
         struct shim_signal ** signal_log = allocate_signal_log(cur_thread, sig);
@@ -256,24 +256,23 @@ internal:
     if (context)
     if (context)
         debug("memory fault at %p (IP = %p)\n", arg, context->IP);
         debug("memory fault at %p (IP = %p)\n", arg, context->IP);
 
 
-    struct shim_vma * vma = NULL;
+    struct shim_vma_val vma;
     int signo = SIGSEGV;
     int signo = SIGSEGV;
     int code;
     int code;
     if (!arg) {
     if (!arg) {
         code = SEGV_MAPERR;
         code = SEGV_MAPERR;
-    } else if (!lookup_supervma((void *) arg, 0, &vma)) {
-        if (vma->flags & VMA_INTERNAL) {
-            put_vma(vma);
+    } else if (!lookup_vma((void *) arg, &vma)) {
+        if (vma.flags & VMA_INTERNAL) {
             goto internal;
             goto internal;
         }
         }
-        if (vma->file && vma->file->type == TYPE_FILE) {
+        if (vma.file && vma.file->type == TYPE_FILE) {
             /* DEP 3/3/17: If the mapping exceeds end of a file (but is in the VMA)
             /* DEP 3/3/17: If the mapping exceeds end of a file (but is in the VMA)
              * then return a SIGBUS. */
              * then return a SIGBUS. */
-            uint64_t eof_in_vma = (uint64_t) vma->addr + vma->offset + vma->file->info.file.size;
+            uint64_t eof_in_vma = (uint64_t) vma.addr + vma.offset + vma.file->info.file.size;
             if (arg > eof_in_vma) {
             if (arg > eof_in_vma) {
                 signo = SIGBUS;
                 signo = SIGBUS;
                 code = BUS_ADRERR;
                 code = BUS_ADRERR;
-            } else if ((context->err & 4) && !(vma->flags & PROT_WRITE)) {
+            } else if ((context->err & 4) && !(vma.flags & PROT_WRITE)) {
                 /* DEP 3/3/17: If the page fault gives a write error, and
                 /* DEP 3/3/17: If the page fault gives a write error, and
                  * the VMA is read-only, return SIGSEGV+SEGV_ACCERR */
                  * the VMA is read-only, return SIGSEGV+SEGV_ACCERR */
                 signo = SIGSEGV;
                 signo = SIGSEGV;
@@ -286,7 +285,6 @@ internal:
         } else {
         } else {
             code = SEGV_ACCERR;
             code = SEGV_ACCERR;
         }
         }
-        put_vma(vma);
     } else {
     } else {
         code = SEGV_MAPERR;
         code = SEGV_MAPERR;
     }
     }
@@ -306,21 +304,15 @@ internal:
         goto ret_exception;
         goto ret_exception;
     }
     }
 
 
-    struct shim_vma * vma = NULL;
+    struct shim_vma_val vma;
 
 
-    if (!(lookup_supervma((void *) arg, 0, &vma)) &&
-        !(vma->flags & VMA_INTERNAL)) {
+    if (!(lookup_vma((void *) arg, &vma)) &&
+        !(vma.flags & VMA_INTERNAL)) {
         if (context)
         if (context)
             debug("illegal instruction at %p\n", context->IP);
             debug("illegal instruction at %p\n", context->IP);
 
 
-        if (vma)
-            put_vma(vma);
-
         deliver_signal(ALLOC_SIGINFO(SIGILL, ILL_ILLOPC, si_addr, (void *) arg), context);
         deliver_signal(ALLOC_SIGINFO(SIGILL, ILL_ILLOPC, si_addr, (void *) arg), context);
     } else {
     } else {
-        if (vma)
-            put_vma(vma);
-
         goto internal;
         goto internal;
     }
     }
 
 

+ 6 - 8
LibOS/shim/src/bookkeep/shim_thread.c

@@ -29,6 +29,7 @@
 #include <shim_vma.h>
 #include <shim_vma.h>
 #include <shim_fs.h>
 #include <shim_fs.h>
 #include <shim_checkpoint.h>
 #include <shim_checkpoint.h>
+#include <shim_utils.h>
 
 
 #include <pal.h>
 #include <pal.h>
 #include <list.h>
 #include <list.h>
@@ -152,11 +153,10 @@ static IDTYPE get_internal_pid (void)
 
 
 struct shim_thread * alloc_new_thread (void)
 struct shim_thread * alloc_new_thread (void)
 {
 {
-    struct shim_thread * thread = malloc(sizeof(struct shim_thread));
+    struct shim_thread * thread = calloc(1, sizeof(struct shim_thread));
     if (!thread)
     if (!thread)
         return NULL;
         return NULL;
 
 
-    memset(thread, 0, sizeof(struct shim_thread));
     REF_SET(thread->ref_count, 1);
     REF_SET(thread->ref_count, 1);
     INIT_LISTP(&thread->children);
     INIT_LISTP(&thread->children);
     INIT_LIST_HEAD(thread, siblings);
     INIT_LIST_HEAD(thread, siblings);
@@ -204,8 +204,8 @@ struct shim_thread * get_new_thread (IDTYPE new_tid)
                 continue;
                 continue;
 
 
             thread->signal_handles[i].action =
             thread->signal_handles[i].action =
-                    remalloc(cur_thread->signal_handles[i].action,
-                             sizeof(struct shim_signal_handle));
+                    malloc_copy(cur_thread->signal_handles[i].action,
+                                sizeof(struct shim_signal_handle));
         }
         }
 
 
         memcpy(&thread->signal_mask, &cur_thread->signal_mask,
         memcpy(&thread->signal_mask, &cur_thread->signal_mask,
@@ -340,10 +340,8 @@ void put_thread (struct shim_thread * thread)
             DkObjectClose(thread->child_exit_event);
             DkObjectClose(thread->child_exit_event);
         destroy_lock(thread->lock);
         destroy_lock(thread->lock);
 
 
-        if (MEMORY_MIGRATED(thread))
-            memset(thread, 0, sizeof(struct shim_thread));
-        else
-            free(thread);
+        free(thread->signal_logs);
+        free(thread);
     }
     }
 }
 }
 
 

File diff suppressed because it is too large
+ 721 - 862
LibOS/shim/src/bookkeep/shim_vma.c


+ 11 - 19
LibOS/shim/src/elf/shim_rtld.c

@@ -187,13 +187,13 @@ static int protect_page (struct link_map * l, void * addr, size_t size)
     }
     }
 
 
     if ((prot & (PROT_READ|PROT_WRITE)) == (PROT_READ|PROT_WRITE)) {
     if ((prot & (PROT_READ|PROT_WRITE)) == (PROT_READ|PROT_WRITE)) {
-        struct shim_vma * vma = NULL;
+        struct shim_vma_val vma;
+
         /* the actual protection of the vma might be changed */
         /* the actual protection of the vma might be changed */
-        if (lookup_supervma(addr, size, &vma) < 0)
+        if (lookup_vma(addr, &vma) < 0)
             return 0;
             return 0;
 
 
-        prot = vma->prot;
-        put_vma(vma);
+        prot = vma.prot;
 
 
         if ((prot & (PROT_READ|PROT_WRITE)) == (PROT_READ|PROT_WRITE))
         if ((prot & (PROT_READ|PROT_WRITE)) == (PROT_READ|PROT_WRITE))
             return 0;
             return 0;
@@ -522,8 +522,11 @@ call_lose:
             if (addr)
             if (addr)
                 mappref = (ElfW(Addr)) c->mapstart + (ElfW(Addr)) addr;
                 mappref = (ElfW(Addr)) c->mapstart + (ElfW(Addr)) addr;
             else
             else
-                mappref = (ElfW(Addr)) get_unmapped_vma(ALIGN_UP(maplength),
-                                            MAP_PRIVATE|MAP_ANONYMOUS);
+                mappref = (ElfW(Addr))
+                    bkeep_unmapped_heap(ALIGN_UP(maplength), c->prot,
+                                        c->flags|MAP_PRIVATE|
+                                        (type == OBJECT_INTERNAL ? VMA_INTERNAL : 0),
+                                        file, c->mapoff, NULL);
 
 
             /* Remember which part of the address space this object uses.  */
             /* Remember which part of the address space this object uses.  */
             errval = (*mmap) (file, (void **) &mappref, ALIGN_UP(maplength),
             errval = (*mmap) (file, (void **) &mappref, ALIGN_UP(maplength),
@@ -540,17 +543,6 @@ map_error:
 
 
         l->l_map_start = mappref;
         l->l_map_start = mappref;
         l->l_map_end = l->l_map_start + maplength;
         l->l_map_end = l->l_map_start + maplength;
-
-#if BOOKKEEP_INTERNAL_OBJ == 0
-        if (type != OBJECT_INTERNAL && type != OBJECT_USER)
-#else
-        if (type != OBJECT_USER)
-#endif
-            bkeep_mmap((void *) mappref, ALIGN_UP(maplength), c->prot,
-                       c->flags|MAP_PRIVATE|
-                       (type == OBJECT_INTERNAL ? VMA_INTERNAL : 0),
-                       file, c->mapoff, NULL);
-
         l->l_addr = l->l_map_start - c->mapstart;
         l->l_addr = l->l_map_start - c->mapstart;
 
 
         if (has_holes) {
         if (has_holes) {
@@ -693,7 +685,7 @@ postmap:
         }
         }
     } else {
     } else {
         l->l_real_ld = (ElfW(Dyn) *) RELOCATE(l, l->l_ld);
         l->l_real_ld = (ElfW(Dyn) *) RELOCATE(l, l->l_ld);
-        l->l_ld = remalloc(l->l_real_ld, sizeof(ElfW(Dyn)) * l->l_ldnum);
+        l->l_ld = malloc_copy(l->l_real_ld, sizeof(ElfW(Dyn)) * l->l_ldnum);
     }
     }
 
 
     elf_get_dynamic_info(l);
     elf_get_dynamic_info(l);
@@ -704,7 +696,7 @@ postmap:
         /* DEP 3/12/18: This string is not stable; copy it. */
         /* DEP 3/12/18: This string is not stable; copy it. */
         char * tmp = (char *) (D_PTR (l->l_info[DT_STRTAB])
         char * tmp = (char *) (D_PTR (l->l_info[DT_STRTAB])
                               + D_PTR (l->l_info[DT_SONAME]));
                               + D_PTR (l->l_info[DT_SONAME]));
-        l->l_soname = remalloc(tmp, strlen(tmp) + 1);
+        l->l_soname = malloc_copy(tmp, strlen(tmp) + 1);
     }
     }
 
 
     if (l->l_phdr == NULL) {
     if (l->l_phdr == NULL) {

+ 122 - 95
LibOS/shim/src/fs/chroot/fs.c

@@ -147,12 +147,10 @@ static inline int concat_uri (char * buffer, int size, int type,
    handle is not linked to a dentry */
    handle is not linked to a dentry */
 static struct shim_file_data * __create_data (void)
 static struct shim_file_data * __create_data (void)
 {
 {
-    struct shim_file_data * data = malloc(sizeof(struct shim_file_data));
-
+    struct shim_file_data * data = calloc(1, sizeof(struct shim_file_data));
     if (!data)
     if (!data)
         return NULL;
         return NULL;
 
 
-    memset(data, 0, sizeof(struct shim_file_data));
     create_lock(data->lock);
     create_lock(data->lock);
     return data;
     return data;
 }
 }
@@ -251,7 +249,7 @@ static int __query_attr (struct shim_dentry * dent,
                 return ret;
                 return ret;
             }
             }
         }
         }
-        
+
         /* DEP 3/18/17: If we have a directory, we need to find out how many
         /* DEP 3/18/17: If we have a directory, we need to find out how many
          * children it has by hand. */
          * children it has by hand. */
         /* XXX: Keep coherent with rmdir/mkdir/creat, etc */
         /* XXX: Keep coherent with rmdir/mkdir/creat, etc */
@@ -264,7 +262,6 @@ static int __query_attr (struct shim_dentry * dent,
             for (d = dbuf; d; d = d->next)
             for (d = dbuf; d; d = d->next)
                 nlink++;
                 nlink++;
             free(dbuf);
             free(dbuf);
-            debug("Querying a directory; I count %d links.\n", nlink);
         } else
         } else
             nlink = 2; // Educated guess...
             nlink = 2; // Educated guess...
         data->nlink = nlink;
         data->nlink = nlink;
@@ -274,7 +271,7 @@ static int __query_attr (struct shim_dentry * dent,
          */ 
          */ 
         data->nlink = 1;
         data->nlink = 1;
     }
     }
-    
+
     data->queried = true;
     data->queried = true;
 
 
     return 0;
     return 0;
@@ -351,7 +348,6 @@ static int query_dentry (struct shim_dentry * dent, PAL_HANDLE pal_handle,
         stat->st_ctime  = (time_t) data->ctime;
         stat->st_ctime  = (time_t) data->ctime;
         stat->st_nlink  = data->nlink;
         stat->st_nlink  = data->nlink;
 
 
-        
         switch (data->type) {
         switch (data->type) {
             case FILE_REGULAR:
             case FILE_REGULAR:
                 stat->st_mode |= S_IFREG;
                 stat->st_mode |= S_IFREG;
@@ -365,7 +361,6 @@ static int query_dentry (struct shim_dentry * dent, PAL_HANDLE pal_handle,
                 break;
                 break;
             default:            break;
             default:            break;
         }
         }
-        debug("Stat: Returning link count %d\n", stat->st_nlink);
     }
     }
 
 
     unlock(data->lock);
     unlock(data->lock);
@@ -567,7 +562,6 @@ static int chroot_recreate (struct shim_handle * hdl)
     }
     }
 
 
     /*
     /*
-     * Chia-Che Tsai 8/24/2017:
      * when recreating a file handle after migration, the file should
      * when recreating a file handle after migration, the file should
      * not be created again.
      * not be created again.
      */
      */
@@ -623,7 +617,9 @@ static int chroot_flush (struct shim_handle * hdl)
 
 
         if (mapbuf) {
         if (mapbuf) {
             DkStreamUnmap(mapbuf, mapsize);
             DkStreamUnmap(mapbuf, mapsize);
-            bkeep_munmap(mapbuf, mapsize, VMA_INTERNAL);
+
+            if (bkeep_munmap(mapbuf, mapsize, VMA_INTERNAL) < 0)
+                bug();
         }
         }
     }
     }
 
 
@@ -640,7 +636,9 @@ static inline int __map_buffer (struct shim_handle * hdl, int size)
             return 0;
             return 0;
 
 
         DkStreamUnmap(file->mapbuf, file->mapsize);
         DkStreamUnmap(file->mapbuf, file->mapsize);
-        bkeep_munmap(file->mapbuf, file->mapsize, VMA_INTERNAL);
+
+        if (bkeep_munmap(file->mapbuf, file->mapsize, VMA_INTERNAL) < 0)
+            bug();
 
 
         file->mapbuf    = NULL;
         file->mapbuf    = NULL;
         file->mapoffset = 0;
         file->mapoffset = 0;
@@ -648,23 +646,34 @@ static inline int __map_buffer (struct shim_handle * hdl, int size)
 
 
     /* second, reallocate the buffer */
     /* second, reallocate the buffer */
     uint64_t bufsize = file->mapsize ? : FILE_BUFMAP_SIZE;
     uint64_t bufsize = file->mapsize ? : FILE_BUFMAP_SIZE;
-    int prot = PAL_PROT_READ;
     uint64_t mapoff = file->marker & ~(bufsize - 1);
     uint64_t mapoff = file->marker & ~(bufsize - 1);
-    uint64_t maplen = bufsize;	
+    uint64_t maplen = bufsize;
+    int flags = MAP_FILE | MAP_PRIVATE | VMA_INTERNAL;
+    int prot = PROT_READ;
 
 
-    if (hdl->acc_mode & MAY_WRITE)
-        prot |= PAL_PROT_WRITE;
+    if (hdl->acc_mode & MAY_WRITE) {
+        flags = MAP_FILE | MAP_SHARED | VMA_INTERNAL;
+        prot |= PROT_WRITE;
+    }
 
 
     while (mapoff + maplen < file->marker + size)
     while (mapoff + maplen < file->marker + size)
         maplen *= 2;
         maplen *= 2;
 
 
-    void * mapbuf =
-        (void *) DkStreamMap(hdl->pal_handle, NULL, prot, mapoff, maplen);
+    /* create the bookkeeping before allocating the memory */
+    void * mapbuf = bkeep_unmapped_any(maplen, prot, flags, hdl, mapoff,
+                                       "filebuf");
     if (!mapbuf)
     if (!mapbuf)
+        return -ENOMEM;
+
+    PAL_PTR mapped = DkStreamMap(hdl->pal_handle, mapbuf, PAL_PROT(prot, flags),
+                                 mapoff, maplen);
+
+    if (!mapped) {
+        bkeep_munmap(mapbuf, maplen, flags);
         return -PAL_ERRNO;
         return -PAL_ERRNO;
+    }
 
 
-    bkeep_mmap(mapbuf, maplen, prot, MAP_FILE|MAP_SHARED|VMA_INTERNAL,
-               hdl, mapoff, NULL);
+    assert((void *) mapped == mapbuf);
 
 
     file->mapbuf    = mapbuf;
     file->mapbuf    = mapbuf;
     file->mapoffset = mapoff;
     file->mapoffset = mapoff;
@@ -955,116 +964,133 @@ static int chroot_dput (struct shim_dentry * dent)
     return 0;
     return 0;
 }
 }
 
 
-#define DEFAULT_DBUF_SIZE   1024
-
 static int chroot_readdir (struct shim_dentry * dent,
 static int chroot_readdir (struct shim_dentry * dent,
                            struct shim_dirent ** dirent)
                            struct shim_dirent ** dirent)
 {
 {
-    int ret;
     struct shim_file_data * data;
     struct shim_file_data * data;
+    int ret;
+
     if ((ret = try_create_data(dent, NULL, 0, &data)) < 0)
     if ((ret = try_create_data(dent, NULL, 0, &data)) < 0)
         return ret;
         return ret;
 
 
     chroot_update_ino(dent);
     chroot_update_ino(dent);
+    const char * uri = qstrgetstr(&data->host_uri);
+    assert(strpartcmp_static(uri, "dir:"));
 
 
-    assert(strpartcmp_static(qstrgetstr(&data->host_uri), "dir:"));
-
-    PAL_HANDLE pal_hdl = DkStreamOpen(qstrgetstr(&data->host_uri),
-                                      PAL_ACCESS_RDONLY, 0, 0, 0);
+    PAL_HANDLE pal_hdl = DkStreamOpen(uri, PAL_ACCESS_RDONLY, 0, 0, 0);
     if (!pal_hdl)
     if (!pal_hdl)
         return -PAL_ERRNO;
         return -PAL_ERRNO;
 
 
-    int buf_size = 0, new_size = MAX_PATH;
-    int bytes;
-    char * buf = NULL, * new_buf;
-
-    int dbufsize = MAX_PATH;
-    struct shim_dirent * dbuf = malloc(dbufsize);
-    struct shim_dirent * d = dbuf, ** last = NULL;
-
-retry:
-    new_buf = __alloca(new_size);
-    if (buf)
-        memcpy(new_buf, buf, buf_size);
-    buf_size = new_size;
-    buf = new_buf;
-
-    while (1) {
-        bytes = DkStreamRead(pal_hdl, 0, buf_size, buf, NULL, 0);
+    size_t buf_size = MAX_PATH, bytes = 0;
+    char * buf = malloc(buf_size);
+    if (!buf) {
+        ret = -ENOMEM;
+        goto out_hdl;
+    }
 
 
-        if (bytes == 0) {
-            if (PAL_NATIVE_ERRNO == PAL_ERROR_ENDOFSTREAM)
-                break;
+    /*
+     * Try to read the directory list from the host. DkStreamRead
+     * does not accept offset for directory listing. Therefore, we retry
+     * several times if the buffer is not large enough.
+     */
+retry_read:
+    bytes = DkStreamRead(pal_hdl, 0, buf_size, buf, NULL, 0);
+    if (!bytes) {
+        ret = 0;
+        if (PAL_NATIVE_ERRNO == PAL_ERROR_ENDOFSTREAM)
+            goto out;
 
 
-            if (PAL_NATIVE_ERRNO == PAL_ERROR_OVERFLOW) {
-                new_size = buf_size * 2;
-                goto retry;
+        if (PAL_NATIVE_ERRNO == PAL_ERROR_OVERFLOW) {
+            char * new_buf = malloc(buf_size * 2);
+            if (!new_buf) {
+                ret = -ENOMEM;
+                goto out;
             }
             }
 
 
-            ret = -PAL_ERRNO;
-            goto out;
+            free(buf);
+            buf_size *= 2;
+            buf = new_buf;
+            goto retry_read;
         }
         }
 
 
-        char * b = buf, * next_b;
-        int blen;
-
-        while (b < buf + bytes) {
-            blen = strlen(b);
-            next_b = b + blen + 1;
-            bool isdir = false;
-
-            if (b[blen - 1] == '/') {
-                isdir = true;
-                b[blen - 1] = 0;
-                blen--;
-            }
+        ret = -PAL_ERRNO;
+        goto out;
+    }
 
 
-            int dsize = sizeof(struct shim_dirent) + blen + 1;
+    /* Now emitting the dirent data */
+    size_t dbuf_size = MAX_PATH;
+    struct shim_dirent * dbuf = malloc(dbuf_size);
+    if (!dbuf)
+        goto out;
 
 
-            if ((void *) d + dsize > (void *) dbuf + dbufsize) {
-                int newsize = dbufsize * 2;
-                while ((void *) d + dsize > (void *) dbuf + newsize)
-                    newsize *= 2;
+    struct shim_dirent * d = dbuf, ** last = NULL;
+    char * b = buf, * next_b;
+    int blen;
+
+    /* Scanning the directory names in the buffer */
+    while (b < buf + bytes) {
+        blen = strlen(b);
+        next_b = b + blen + 1;
+        bool isdir = false;
+
+        /* The PAL convention: if the name is ended with "/",
+           it is a directory. */
+        if (b[blen - 1] == '/') {
+            isdir = true;
+            b[blen - 1] = 0;
+            blen--;
+        }
 
 
-                struct shim_dirent * new_dbuf = malloc(newsize);
+        /* Populating a dirent */
+        int dsize = sizeof(struct shim_dirent) + blen + 1;
 
 
-                memcpy(new_dbuf, dbuf, (void *) d - (void *) dbuf);
-                struct shim_dirent * d1 = new_dbuf;
-                struct shim_dirent * d2 = dbuf;
-                while (d2 != d) {
-                    d1->next = (void *) d1 + ((void *) d2->next - (void *) d2);
-                    d1 = d1->next;
-                    d2 = d2->next;
-                }
+        /* dbuf is not large enough, reallocate the dirent buffer */
+        if ((void *) d + dsize > (void *) dbuf + dbuf_size) {
+            int newsize = dbuf_size * 2;
+            while ((void *) d + dsize > (void *) dbuf + newsize)
+                newsize *= 2;
 
 
+            struct shim_dirent * new_dbuf = malloc(newsize);
+            if (!new_dbuf) {
+                ret = -ENOMEM;
                 free(dbuf);
                 free(dbuf);
-                dbuf = new_dbuf;
-                d = d1;
-                dbufsize = newsize;
+                goto out;
             }
             }
 
 
-            HASHTYPE hash = rehash_name(dent->ino, b, blen);
-
-            d->next = (void *) (d + 1) + blen + 1;
-            d->ino = hash;
-            d->type = isdir ? LINUX_DT_DIR : LINUX_DT_REG;
-            memcpy(d->name, b, blen + 1);
+            memcpy(new_dbuf, dbuf, (void *) d - (void *) dbuf);
+            struct shim_dirent * d1 = new_dbuf;
+            struct shim_dirent * d2 = dbuf;
+            while (d2 != d) {
+                d1->next = (void *) d1 + ((void *) d2->next - (void *) d2);
+                d1 = d1->next;
+                d2 = d2->next;
+            }
 
 
-            b = next_b;
-            last = &d->next;
-            d = d->next;
+            free(dbuf);
+            dbuf = new_dbuf;
+            d = d1;
+            dbuf_size = newsize;
         }
         }
-    }
 
 
-    if (!last) {
-        free(dbuf);
-        goto out;
+        /* Fill up the dirent buffer */
+        HASHTYPE hash = rehash_name(dent->ino, b, blen);
+
+        d->next = (void *) (d + 1) + blen + 1;
+        d->ino = hash;
+        d->type = isdir ? LINUX_DT_DIR : LINUX_DT_REG;
+        memcpy(d->name, b, blen + 1);
+
+        b = next_b;
+        last = &d->next;
+        d = d->next;
     }
     }
 
 
     *last = NULL;
     *last = NULL;
     *dirent = dbuf;
     *dirent = dbuf;
 
 
 out:
 out:
+    free(buf);
+out_hdl:
     DkObjectClose(pal_hdl);
     DkObjectClose(pal_hdl);
     return ret;
     return ret;
 }
 }
@@ -1082,7 +1108,6 @@ static int chroot_checkout (struct shim_handle * hdl)
 
 
     if (hdl->pal_handle) {
     if (hdl->pal_handle) {
         /*
         /*
-         * Chia-Che 8/24/2017:
          * if the file still exists in the host, no need to send
          * if the file still exists in the host, no need to send
          * the handle over RPC; otherwise, send it.
          * the handle over RPC; otherwise, send it.
          */
          */
@@ -1113,6 +1138,8 @@ static int chroot_migrate (void * checkpoint, void ** mount_data)
                     sizeof(struct mount_data) + 1;
                     sizeof(struct mount_data) + 1;
 
 
     void * new_data = malloc(alloc_len);
     void * new_data = malloc(alloc_len);
+    if (!new_data)
+        return -ENOMEM;
 
 
     memcpy(new_data, mdata, alloc_len);
     memcpy(new_data, mdata, alloc_len);
     *mount_data = new_data;
     *mount_data = new_data;

+ 5 - 6
LibOS/shim/src/fs/dev/fs.c

@@ -115,19 +115,18 @@ static int dev_random_mode (const char * name, mode_t * mode)
 }
 }
 
 
 static int dev_random_read (struct shim_handle * hdl, void * buf,
 static int dev_random_read (struct shim_handle * hdl, void * buf,
-                             size_t count)
+                            size_t count)
 {
 {
-    int rv;
-    rv = DkRandomBitsRead(buf, count);
+    int rv = DkRandomBitsRead(buf, count);
     return rv;
     return rv;
 }
 }
 
 
 static int dev_urandom_read (struct shim_handle * hdl, void * buf,
 static int dev_urandom_read (struct shim_handle * hdl, void * buf,
                              size_t count)
                              size_t count)
 {
 {
-    int rv;
-    rv = getrand(buf, count);
-    return rv;
+    // THIS IS NOT CRYPTO-SECURE, FIX!!!
+    getrand(buf, count);
+    return count;
 }
 }
 
 
 static int dev_random_stat (const char * name, struct stat * stat)
 static int dev_random_stat (const char * name, struct stat * stat)

+ 3 - 4
LibOS/shim/src/fs/proc/info.c

@@ -50,9 +50,9 @@ static int proc_meminfo_open (struct shim_handle * hdl, const char * name,
         };
         };
 
 
 retry:
 retry:
-    if (str) free(str);
     max *= 2;
     max *= 2;
     len = 0;
     len = 0;
+    free(str);
     str = malloc(max);
     str = malloc(max);
     if (!str)
     if (!str)
         return -ENOMEM;
         return -ENOMEM;
@@ -105,9 +105,9 @@ static int proc_cpuinfo_open (struct shim_handle * hdl, const char * name,
         };
         };
 
 
 retry:
 retry:
-    if (str) free(str);
     max *= 2;
     max *= 2;
     len = 0;
     len = 0;
+    free(str);
     str = malloc(max);
     str = malloc(max);
     if (!str)
     if (!str)
         return -ENOMEM;
         return -ENOMEM;
@@ -132,13 +132,12 @@ retry:
         str[len] = 0;
         str[len] = 0;
     }
     }
 
 
-    struct shim_str_data * data = malloc(sizeof(struct shim_str_data));
+    struct shim_str_data * data = calloc(1, sizeof(struct shim_str_data));
     if (!data) {
     if (!data) {
         free(str);
         free(str);
         return -ENOMEM;
         return -ENOMEM;
     }
     }
 
 
-    memset(data, 0, sizeof(struct shim_str_data));
     data->str = str;
     data->str = str;
     data->len = len;
     data->len = len;
     hdl->type = TYPE_STR;
     hdl->type = TYPE_STR;

+ 90 - 21
LibOS/shim/src/fs/proc/thread.c

@@ -21,8 +21,6 @@
 #include <asm/unistd.h>
 #include <asm/unistd.h>
 #include <asm/prctl.h>
 #include <asm/prctl.h>
 
 
-#define DEFAULT_BUFFER_SIZE 256
-
 static int parse_thread_name (const char * name,
 static int parse_thread_name (const char * name,
                               const char ** next, int * next_len,
                               const char ** next, int * next_len,
                               const char ** nextnext)
                               const char ** nextnext)
@@ -482,6 +480,7 @@ static int proc_thread_maps_open (struct shim_handle * hdl,
     const char * next;
     const char * next;
     int next_len;
     int next_len;
     int pid = parse_thread_name(name, &next, &next_len, NULL);
     int pid = parse_thread_name(name, &next, &next_len, NULL);
+    int ret = 0;
 
 
     if (pid < 0)
     if (pid < 0)
         return pid;
         return pid;
@@ -491,54 +490,124 @@ static int proc_thread_maps_open (struct shim_handle * hdl,
     if (!thread)
     if (!thread)
         return -ENOENT;
         return -ENOENT;
 
 
-    int size = DEFAULT_BUFFER_SIZE;
-    char * strbuf = malloc(size);
-    int ret = 0, len = 0;
+    size_t count = DEFAULT_VMA_COUNT;
+    struct shim_vma_val * vmas = malloc(sizeof(struct shim_vma_val) * count);
 
 
-    if (!strbuf) {
+    if (!vmas) {
         ret = -ENOMEM;
         ret = -ENOMEM;
         goto out;
         goto out;
     }
     }
 
 
-retry:
-    ret = dump_all_vmas(thread, strbuf, size);
+retry_dump_vmas:
+    ret = dump_all_vmas(vmas, count);
 
 
     if (ret == -EOVERFLOW) {
     if (ret == -EOVERFLOW) {
-        char * newbuf = malloc(size * 2);
-        if (!newbuf) {
+        struct shim_vma_val * new_vmas
+                = malloc(sizeof(struct shim_vma_val) * count * 2);
+        if (!new_vmas) {
             ret = -ENOMEM;
             ret = -ENOMEM;
             goto err;
             goto err;
         }
         }
-        free(strbuf);
-        strbuf = newbuf;
-        size *= 2;
-        goto retry;
+        free(vmas);
+        vmas = new_vmas;
+        count *= 2;
+        goto retry_dump_vmas;
     }
     }
 
 
     if (ret < 0)
     if (ret < 0)
         goto err;
         goto err;
 
 
-    len = ret;
+#define DEFAULT_VMA_BUFFER_SIZE     256
+
+    count = ret;
+    size_t buffer_size = DEFAULT_VMA_BUFFER_SIZE, offset = 0;
+    char * buffer = malloc(buffer_size);
+    if (!buffer) {
+        ret = -ENOMEM;
+        goto err;
+    }
+
+    for (struct shim_vma_val * vma = vmas ; vma < vmas + count ; vma++) {
+        size_t old_offset = offset;
+        uint64_t start = (uint64_t) vma->addr;
+        uint64_t end   = (uint64_t) vma->addr + vma->length;
+        char pt[3] = {
+            (vma->prot & PROT_READ)  ? 'r' : '-',
+            (vma->prot & PROT_WRITE) ? 'w' : '-',
+            (vma->prot & PROT_EXEC)  ? 'x' : '-',
+        };
+        char pr = (vma->flags & MAP_PRIVATE) ? 'p' : 's';
+
+#define ADDR_FMT(addr) ((addr) > 0xffffffff ? "%lx" : "%08x")
+#define EMIT(fmt ...)                                                   \
+        do {                                                            \
+            offset += snprintf(buffer + offset, buffer_size - offset,   \
+                               fmt);                                    \
+        } while (0)
+
+retry_emit_vma:
+        if (vma->file) {
+            int dev_major = 0, dev_minor = 0;
+            unsigned long ino = vma->file->dentry ? vma->file->dentry->ino : 0;
+            const char * name = "[unknown]";
+
+            if (!qstrempty(&vma->file->path))
+                name = qstrgetstr(&vma->file->path);
+
+            EMIT(ADDR_FMT(start), start);
+            EMIT("-");
+            EMIT(ADDR_FMT(end),   end);
+            EMIT(" %c%c%c%c %08lx %02d:%02d %u %s\n", pt[0], pt[1], pt[2], pr,
+                 vma->offset, dev_major, dev_minor, ino, name);
+        } else {
+            EMIT(ADDR_FMT(start), start);
+            EMIT("-");
+            EMIT(ADDR_FMT(end),   end);
+            if (vma->comment[0])
+                EMIT(" %c%c%c%c 00000000 00:00 0 %s\n", pt[0], pt[1], pt[2], pr,
+                     vma->comment);
+            else
+                EMIT(" %c%c%c%c 00000000 00:00 0\n", pt[0], pt[1], pt[2], pr);
+        }
+
+        if (offset >= buffer_size) {
+            char * new_buffer = malloc(buffer_size * 2);
+            if (!new_buffer) {
+                ret = -ENOMEM;
+                goto err;
+            }
+
+            offset = old_offset;
+            memcpy(new_buffer, buffer, old_offset);
+            free(buffer);
+            buffer = new_buffer;
+            buffer_size *= 2;
+            goto retry_emit_vma;
+        }
+    }
 
 
-    struct shim_str_data * data = malloc(sizeof(struct shim_str_data));
+    struct shim_str_data * data = calloc(1, sizeof(struct shim_str_data));
     if (!data) {
     if (!data) {
         ret = -ENOMEM;
         ret = -ENOMEM;
         goto err;
         goto err;
     }
     }
 
 
-    memset(data, 0, sizeof(struct shim_str_data));
-    data->str = strbuf;
-    data->len = len;
-    hdl->type = TYPE_STR;
+    data->str  = buffer;
+    data->len  = offset;
+    hdl->type  = TYPE_STR;
     hdl->flags = flags & ~O_RDONLY;
     hdl->flags = flags & ~O_RDONLY;
     hdl->acc_mode = MAY_READ;
     hdl->acc_mode = MAY_READ;
     hdl->info.str.data = data;
     hdl->info.str.data = data;
     ret = 0;
     ret = 0;
 out:
 out:
     put_thread(thread);
     put_thread(thread);
+    if (vmas)
+        free_vma_val_array(vmas, count);
     return ret;
     return ret;
+
 err:
 err:
-    free(strbuf);
+    if (buffer)
+        free(buffer);
     goto out;
     goto out;
 }
 }
 
 

+ 1 - 0
LibOS/shim/src/fs/shim_dcache.c

@@ -61,6 +61,7 @@ static struct shim_dentry * alloc_dentry (void)
 
 
     memset(dent, 0, sizeof(struct shim_dentry));
     memset(dent, 0, sizeof(struct shim_dentry));
 
 
+    REF_SET(dent->ref_count, 0);
     dent->mode = NO_MODE;
     dent->mode = NO_MODE;
 
 
     INIT_LIST_HEAD(dent, hlist);
     INIT_LIST_HEAD(dent, hlist);

+ 71 - 46
LibOS/shim/src/fs/shim_fs.c

@@ -105,18 +105,15 @@ static int __mount_root (struct shim_dentry ** root)
         debug("mounting root filesystem: %s from %s\n", type, uri);
         debug("mounting root filesystem: %s from %s\n", type, uri);
         if ((ret = mount_fs(type, uri, "/", NULL, root, 0)) < 0) {
         if ((ret = mount_fs(type, uri, "/", NULL, root, 0)) < 0) {
             debug("mounting root filesystem failed (%d)\n", ret);
             debug("mounting root filesystem failed (%d)\n", ret);
-            goto out;
+            return ret;
         }
         }
-        goto out;
+        return ret;
     }
     }
 
 
     debug("mounting default root filesystem\n");
     debug("mounting default root filesystem\n");
     if ((ret = mount_fs("chroot", "file:", "/", NULL, root, 0)) < 0) {
     if ((ret = mount_fs("chroot", "file:", "/", NULL, root, 0)) < 0) {
         debug("mounting root filesystem failed (%d)\n", ret);
         debug("mounting root filesystem failed (%d)\n", ret);
-        goto out;
     }
     }
-
-out:
     return ret;
     return ret;
 }
 }
 
 
@@ -188,6 +185,9 @@ static int __mount_one_other (const char * key, int keylen)
 
 
 static int __mount_others (void)
 static int __mount_others (void)
 {
 {
+    char * keybuf;
+    int ret = 0;
+
     if (!root_config)
     if (!root_config)
         return 0;
         return 0;
 
 
@@ -197,11 +197,14 @@ static int __mount_others (void)
     if (keybuf_size < 0)
     if (keybuf_size < 0)
         return 0;
         return 0;
 
 
-    char * keybuf = __alloca(keybuf_size);
+    keybuf = malloc(keybuf_size);
+    if (!keybuf)
+        return -ENOMEM;
+
     nkeys = get_config_entries(root_config, "fs.mount", keybuf, keybuf_size);
     nkeys = get_config_entries(root_config, "fs.mount", keybuf, keybuf_size);
 
 
-    if (nkeys < 0)
-        return 0;
+    if (nkeys <= 0)
+        goto out;
 
 
     const char * key = keybuf, * next = NULL;
     const char * key = keybuf, * next = NULL;
     for (int n = 0 ; n < nkeys ; key = next, n++) {
     for (int n = 0 ; n < nkeys ; key = next, n++) {
@@ -209,10 +212,12 @@ static int __mount_others (void)
         next++;
         next++;
         int ret = __mount_one_other(key, next - key - 1);
         int ret = __mount_one_other(key, next - key - 1);
         if (ret < 0)
         if (ret < 0)
-            return ret;
+            goto out;
     }
     }
 
 
-    return 0;
+out:
+    free(keybuf);
+    return ret;
 }
 }
 
 
 int init_mount_root (void)
 int init_mount_root (void)
@@ -248,7 +253,7 @@ int init_mount (void)
 static inline struct shim_fs * find_fs (const char * type)
 static inline struct shim_fs * find_fs (const char * type)
 {
 {
     struct shim_fs * fs = NULL;
     struct shim_fs * fs = NULL;
-    int len = strlen(type);
+    size_t len = strlen(type);
 
 
     for (int i = 0 ; i < NUM_MOUNTABLE_FS ; i++)
     for (int i = 0 ; i < NUM_MOUNTABLE_FS ; i++)
         if (!memcmp(type, mountable_fs[i].name, len + 1)) {
         if (!memcmp(type, mountable_fs[i].name, len + 1)) {
@@ -261,7 +266,7 @@ static inline struct shim_fs * find_fs (const char * type)
 
 
 int search_builtin_fs (const char * type, struct shim_mount ** fs)
 int search_builtin_fs (const char * type, struct shim_mount ** fs)
 {
 {
-    int len = strlen(type);
+    size_t len = strlen(type);
 
 
     for (int i = 0 ; i < NUM_BUILTIN_FS ; i++)
     for (int i = 0 ; i < NUM_BUILTIN_FS ; i++)
         if (!memcmp(type, builtin_fs[i]->type, len + 1)) {
         if (!memcmp(type, builtin_fs[i]->type, len + 1)) {
@@ -291,6 +296,10 @@ int __mount_fs (struct shim_mount * mount, struct shim_dentry * dent)
         if (ret < 0) {
         if (ret < 0) {
             /* Try getting rid of ESKIPPED case */
             /* Try getting rid of ESKIPPED case */
             assert (ret != -ESKIPPED);
             assert (ret != -ESKIPPED);
+            // TODO: `mount_root` leaks here, but fixing this would require
+            // fixing `get_new_dentry` semantics (its result has sometimes
+            // refcount set to 0).
+            // put_dentry(mount_root);
             return ret;
             return ret;
         }
         }
         mount->root = mount_root;
         mount->root = mount_root;
@@ -340,6 +349,34 @@ int __mount_fs (struct shim_mount * mount, struct shim_dentry * dent)
     return 0;
     return 0;
 }
 }
 
 
+// Extracts the last component of the `path`. If there's none, `*last_elem_len`
+// is set to 0 and `*last_elem` is set to NULL.
+static void find_last_component(const char* path, const char** last_comp,
+                                size_t* last_comp_len) {
+    *last_comp = NULL;
+    size_t last_len = 0;
+    size_t path_len = strlen(path);
+    if (path_len == 0)
+        goto out;
+
+    // Drop any trailing slashes.
+    const char* last = path + path_len - 1;
+    while (last > path && *last == '/')
+        last--;
+    if (*last == '/')
+        goto out;
+
+    // Skip the last component.
+    last_len = 1;
+    while (last > path && *(last-1) != '/') {
+        last--;
+        last_len++;
+    }
+    *last_comp = last;
+out:
+    *last_comp_len = last_len;
+}
+
 /* Parent is optional, but helpful.
 /* Parent is optional, but helpful.
  * dentp (optional) memoizes the dentry of the newly-mounted FS, on success. 
  * dentp (optional) memoizes the dentry of the newly-mounted FS, on success. 
  *
  *
@@ -349,7 +386,7 @@ int __mount_fs (struct shim_mount * mount, struct shim_dentry * dent)
  */
  */
 int mount_fs (const char * type, const char * uri, const char * mount_point,
 int mount_fs (const char * type, const char * uri, const char * mount_point,
               struct shim_dentry *parent, struct shim_dentry **dentp,
               struct shim_dentry *parent, struct shim_dentry **dentp,
-              int make_ancestor)
+              bool make_ancestor)
 {
 {
     int ret = 0;
     int ret = 0;
     struct shim_fs * fs = find_fs(type);
     struct shim_fs * fs = find_fs(type);
@@ -360,37 +397,23 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
     }
     }
 
 
     /* Split the mount point into the prefix and atom */
     /* Split the mount point into the prefix and atom */
-    int mount_point_len = strlen(mount_point);
-    const char * last = &mount_point[mount_point_len - 1];
-    int left = mount_point_len;
-    int last_len = 1;
-    // Drop any trailing slashes
-    while (left && *last == '/') {
-        left--;
-        last--;
-        if (last_len != 0)
-            last_len--;
-    }
-    // Skip the atom
-    while (left && *last != '/') {
-        left--;
-        last--;
-        last_len++;
-    }
-    if (*last == '/') {
-        // Move forward one
-        last++;
-        last_len--;
+    size_t mount_point_len = strlen(mount_point);
+    if (mount_point_len == 0) {
+        ret = -EINVAL;
+        goto out;
     }
     }
+    const char* last;
+    size_t last_len;
+    find_last_component(mount_point, &last, &last_len);
 
 
     if (!parent) {
     if (!parent) {
         // See if we are not at the root mount
         // See if we are not at the root mount
-        if (mount_point_len != 1 || mount_point[0] != '/') {
+        if (last_len > 0) {
             // Look up the parent
             // Look up the parent
-            char * parent_path = __alloca(mount_point_len);
-            memset(parent_path, 0, mount_point_len);
-            assert(last_len >= 1 && (mount_point_len - last_len) >= 0);
-            memcpy(parent_path, mount_point, mount_point_len - last_len);
+            size_t parent_len = last - mount_point;
+            char * parent_path = __alloca(parent_len + 1);
+            memcpy(parent_path, mount_point, parent_len);
+            parent_path[parent_len] = 0;
             if ((ret = __path_lookupat(dentry_root, parent_path, 0, &parent, 0,
             if ((ret = __path_lookupat(dentry_root, parent_path, 0, &parent, 0,
                                        dentry_root->fs, make_ancestor)) < 0) {
                                        dentry_root->fs, make_ancestor)) < 0) {
                 debug("Path lookup failed %d\n", ret);
                 debug("Path lookup failed %d\n", ret);
@@ -406,10 +429,10 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
 
 
     /* call fs-specific mount to allocate mount_data */
     /* call fs-specific mount to allocate mount_data */
     if ((ret = fs->fs_ops->mount(uri, mount_point, &mount_data)) < 0)
     if ((ret = fs->fs_ops->mount(uri, mount_point, &mount_data)) < 0)
-        goto out;
+        goto out_with_unlock;
 
 
 
 
-    int uri_len = uri ? strlen(uri) : 0;
+    size_t uri_len = uri ? strlen(uri) : 0;
     qstrsetstr(&mount->path, mount_point, mount_point_len);
     qstrsetstr(&mount->path, mount_point, mount_point_len);
     qstrsetstr(&mount->uri, uri, uri_len);
     qstrsetstr(&mount->uri, uri, uri_len);
     memcpy(mount->type, fs->name, sizeof(fs->name));
     memcpy(mount->type, fs->name, sizeof(fs->name));
@@ -420,14 +443,14 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
     /* Get the negative dentry from the cache, if one exists */
     /* Get the negative dentry from the cache, if one exists */
     struct shim_dentry * dent, *dent2;
     struct shim_dentry * dent, *dent2;
     /* Special case the root */
     /* Special case the root */
-    if (mount_point_len == 1 && mount_point[0] == '/')
+    if (last_len == 0)
         dent = dentry_root;
         dent = dentry_root;
     else {
     else {
         dent = __lookup_dcache(parent, last,
         dent = __lookup_dcache(parent, last,
                                last_len,
                                last_len,
                                NULL, 0, NULL);
                                NULL, 0, NULL);
 
 
-        if(!dent) {
+        if (!dent) {
             dent = get_new_dentry(mount, parent, last, last_len, NULL);
             dent = get_new_dentry(mount, parent, last, last_len, NULL);
             get_dentry(dent);
             get_dentry(dent);
         }
         }
@@ -443,7 +466,7 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
     /*Now go ahead and do a lookup so the dentry is valid */
     /*Now go ahead and do a lookup so the dentry is valid */
     if ((ret = __path_lookupat(dentry_root, mount_point, 0, &dent2, 0,
     if ((ret = __path_lookupat(dentry_root, mount_point, 0, &dent2, 0,
                                parent ? parent->fs : mount, make_ancestor)) < 0) 
                                parent ? parent->fs : mount, make_ancestor)) < 0) 
-        goto out;
+        goto out_with_unlock;
 
 
     assert(dent == dent2);
     assert(dent == dent2);
 
 
@@ -456,7 +479,7 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
 
 
     // If we made it this far and the dentry is still negative, clear
     // If we made it this far and the dentry is still negative, clear
     // the negative flag from the denry. 
     // the negative flag from the denry. 
-    if ((!ret) && (dent->state & DENTRY_NEGATIVE)) 
+    if (!ret && (dent->state & DENTRY_NEGATIVE))
         dent->state &= ~DENTRY_NEGATIVE;
         dent->state &= ~DENTRY_NEGATIVE;
     
     
     /* Set the file system at the mount point properly */
     /* Set the file system at the mount point properly */
@@ -464,8 +487,10 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
     
     
     if (dentp && !ret)
     if (dentp && !ret)
         *dentp = dent;
         *dentp = dent;
-out:
+
+out_with_unlock:
     unlock(dcache_lock);
     unlock(dcache_lock);
+out:
     return ret;
     return ret;
 }
 }
 
 

+ 2 - 2
LibOS/shim/src/fs/shim_fs_hash.c

@@ -176,7 +176,7 @@ HASHTYPE rehash_name (HASHTYPE parent_hbuf,
     return ret;
     return ret;
 }
 }
 
 
-HASHTYPE rehash_path (HASHTYPE ancester_hbuf,
+HASHTYPE rehash_path (HASHTYPE ancestor_hbuf,
                       const char * path, int size, const char * sep)
                       const char * path, int size, const char * sep)
 {
 {
     HASHTYPE ctx = 0;
     HASHTYPE ctx = 0;
@@ -203,6 +203,6 @@ HASHTYPE rehash_path (HASHTYPE ancester_hbuf,
         digest ^= ctx;
         digest ^= ctx;
     }
     }
 
 
-    hbuf = ancester_hbuf ^ digest;
+    hbuf = ancestor_hbuf ^ digest;
     return hbuf;
     return hbuf;
 }
 }

+ 17 - 4
LibOS/shim/src/fs/shim_namei.c

@@ -25,6 +25,8 @@
  * directory cache.
  * directory cache.
  */
  */
 
 
+#include <stdbool.h>
+
 #include <shim_internal.h>
 #include <shim_internal.h>
 #include <shim_utils.h>
 #include <shim_utils.h>
 #include <shim_thread.h>
 #include <shim_thread.h>
@@ -175,6 +177,8 @@ int lookup_dentry (struct shim_dentry * parent, const char * name, int namelen,
 
 
     if (!dent) {
     if (!dent) {
         dent = get_new_dentry(fs, parent, name, namelen, NULL);
         dent = get_new_dentry(fs, parent, name, namelen, NULL);
+        if (!dent)
+            return -ENOMEM;
         do_fs_lookup = 1;
         do_fs_lookup = 1;
         // In the case we make a new dentry, go ahead and increment the
         // In the case we make a new dentry, go ahead and increment the
         // ref count; in other cases, __lookup_dcache does this
         // ref count; in other cases, __lookup_dcache does this
@@ -257,7 +261,7 @@ int lookup_dentry (struct shim_dentry * parent, const char * name, int namelen,
  */
  */
 int __path_lookupat (struct shim_dentry * start, const char * path, int flags,
 int __path_lookupat (struct shim_dentry * start, const char * path, int flags,
                      struct shim_dentry ** dent, int link_depth,
                      struct shim_dentry ** dent, int link_depth,
-                     struct shim_mount * fs, int make_ancestor)
+                     struct shim_mount * fs, bool make_ancestor)
 {
 {
     // Basic idea: recursively iterate over path, peeling off one atom at a
     // Basic idea: recursively iterate over path, peeling off one atom at a
     // time.
     // time.
@@ -390,6 +394,8 @@ int __path_lookupat (struct shim_dentry * start, const char * path, int flags,
             get_mount(my_dent->fs);
             get_mount(my_dent->fs);
             err = __path_lookupat (my_dent, my_path, flags, dent, link_depth,
             err = __path_lookupat (my_dent, my_path, flags, dent, link_depth,
                                    my_dent->fs, make_ancestor);
                                    my_dent->fs, make_ancestor);
+            if (err < 0)
+                goto out;
             /* If we aren't returning a live reference to the target dentry, go
             /* If we aren't returning a live reference to the target dentry, go
              * ahead and release the ref count when we unwind the recursion.
              * ahead and release the ref count when we unwind the recursion.
              */
              */
@@ -401,7 +407,8 @@ int __path_lookupat (struct shim_dentry * start, const char * path, int flags,
                 my_dent->state |= DENTRY_ANCESTOR;
                 my_dent->state |= DENTRY_ANCESTOR;
                 my_dent->state |= DENTRY_ISDIRECTORY;
                 my_dent->state |= DENTRY_ISDIRECTORY;
                 my_dent->state &= ~DENTRY_NEGATIVE;
                 my_dent->state &= ~DENTRY_NEGATIVE;
-                if (err == -ENOENT) err = 0;
+                if (err == -ENOENT)
+                    err = 0;
             }
             }
             base_case = 1;
             base_case = 1;
         }
         }
@@ -595,6 +602,10 @@ int dentry_open (struct shim_handle * hdl, struct shim_dentry * dent,
         hdl->info.dir.ptr = (void *)-1;
         hdl->info.dir.ptr = (void *)-1;
     }
     }
     path = dentry_get_path(dent, true, &size);
     path = dentry_get_path(dent, true, &size);
+    if (!path) {
+        ret = -ENOMEM;
+        goto out;
+    }
     qstrsetstr(&hdl->path, path, size);
     qstrsetstr(&hdl->path, path, size);
 
 
     /* truncate the file if O_TRUNC is given */
     /* truncate the file if O_TRUNC is given */
@@ -676,8 +687,10 @@ int list_directory_dentry (struct shim_dentry *dent) {
 
 
     struct shim_dirent * dirent = NULL;
     struct shim_dirent * dirent = NULL;
 
 
-    if ((ret = fs->d_ops->readdir(dent, &dirent)) < 0 || !dirent)
+    if ((ret = fs->d_ops->readdir(dent, &dirent)) < 0 || !dirent) {
+        dirent = NULL;
         goto done_read;
         goto done_read;
+    }
     
     
     struct shim_dirent * d = dirent;
     struct shim_dirent * d = dirent;
     for ( ; d ; d = d->next) {
     for ( ; d ; d = d->next) {
@@ -697,11 +710,11 @@ int list_directory_dentry (struct shim_dentry *dent) {
         child->ino = d->ino;
         child->ino = d->ino;
     }
     }
 
 
-    free(dirent);
     dent->state |= DENTRY_LISTED;
     dent->state |= DENTRY_LISTED;
 
 
 done_read:
 done_read:
     unlock(dcache_lock);
     unlock(dcache_lock);
+    free(dirent);
     return ret;
     return ret;
 }
 }
 
 

+ 0 - 1
LibOS/shim/src/fs/str/fs.c

@@ -163,7 +163,6 @@ int str_write (struct shim_handle * hdl, const void * buf,
         }
         }
 
 
         char * newbuf = malloc(newlen);
         char * newbuf = malloc(newlen);
-
         if (!newbuf)
         if (!newbuf)
             return -ENOMEM;
             return -ENOMEM;
 
 

+ 1 - 2
LibOS/shim/src/ipc/shim_ipc.c

@@ -254,11 +254,10 @@ struct shim_ipc_info * discover_client (struct shim_ipc_port * port,
 
 
 struct shim_process * create_new_process (bool inherit_parent)
 struct shim_process * create_new_process (bool inherit_parent)
 {
 {
-    struct shim_process * new_process = malloc(sizeof(struct shim_process));
+    struct shim_process * new_process = calloc(1, sizeof(struct shim_process));
     if (!new_process)
     if (!new_process)
         return NULL;
         return NULL;
 
 
-    memset(new_process, 0, sizeof(struct shim_process));
     new_process->parent = get_new_ipc_info(cur_process.vmid, NULL, 0);
     new_process->parent = get_new_ipc_info(cur_process.vmid, NULL, 0);
 
 
     if (!inherit_parent)
     if (!inherit_parent)

+ 3 - 7
LibOS/shim/src/ipc/shim_ipc_nsimpl.h

@@ -174,12 +174,11 @@ static int __extend_range_bitmap (int expected)
     if (range_map)
     if (range_map)
         size = range_map->map_size;
         size = range_map->map_size;
 
 
-    while(size <= expected)
+    while (size <= expected)
         size *= 2;
         size *= 2;
 
 
     struct range_bitmap * new_map = malloc(sizeof(struct range_bitmap) +
     struct range_bitmap * new_map = malloc(sizeof(struct range_bitmap) +
                                            size / BITS);
                                            size / BITS);
-
     if (!new_map)
     if (!new_map)
         return -ENOMEM;
         return -ENOMEM;
 
 
@@ -353,7 +352,6 @@ int CONCAT3(add, NS, subrange) (IDTYPE idx, IDTYPE owner,
     int off = (idx - 1) / RANGE_SIZE, err = 0;
     int off = (idx - 1) / RANGE_SIZE, err = 0;
     IDTYPE base = off * RANGE_SIZE + 1;
     IDTYPE base = off * RANGE_SIZE + 1;
     struct subrange * s = malloc(sizeof(struct subrange));
     struct subrange * s = malloc(sizeof(struct subrange));
-
     if (!s)
     if (!s)
         return -ENOMEM;
         return -ENOMEM;
 
 
@@ -383,12 +381,11 @@ int CONCAT3(add, NS, subrange) (IDTYPE idx, IDTYPE owner,
     }
     }
 
 
     if (!r->subranges) {
     if (!r->subranges) {
-        r->subranges = malloc(sizeof(struct sub_map));
+        r->subranges = calloc(1, sizeof(struct sub_map));
         if (!r->subranges) {
         if (!r->subranges) {
             err = -ENOMEM;
             err = -ENOMEM;
             goto failed;
             goto failed;
         }
         }
-        memset(r->subranges, 0, sizeof(struct sub_map));
     }
     }
 
 
     struct subrange ** m = &r->subranges->map[idx - base];
     struct subrange ** m = &r->subranges->map[idx - base];
@@ -645,10 +642,9 @@ IDTYPE CONCAT2(allocate, NS) (IDTYPE min, IDTYPE max)
         if (idx < base)
         if (idx < base)
             idx = base;
             idx = base;
         if (!r->used) {
         if (!r->used) {
-            r->used = malloc(sizeof(struct idx_bitmap));
+            r->used = calloc(1, sizeof(struct idx_bitmap));
             if (!r->used)
             if (!r->used)
                 continue;
                 continue;
-            memset(r->used, 0, sizeof(struct idx_bitmap));
         }
         }
 
 
         int i = (idx - base) / BITS;
         int i = (idx - base) / BITS;

+ 3 - 3
LibOS/shim/src/ipc/shim_ipc_pid.c

@@ -298,8 +298,8 @@ int ipc_pid_retstatus_callback (IPC_CALLBACK_ARGS)
         struct pid_status ** status = (struct pid_status **) obj->private;
         struct pid_status ** status = (struct pid_status **) obj->private;
 
 
         if (status) {
         if (status) {
-            *status = remalloc(msgin->status, sizeof(struct pid_status) *
-                               msgin->nstatus);
+            *status = malloc_copy(msgin->status, sizeof(struct pid_status) *
+                                  msgin->nstatus);
 
 
             obj->retval = msgin->nstatus;
             obj->retval = msgin->nstatus;
         }
         }
@@ -619,7 +619,7 @@ int ipc_pid_retmeta_callback (IPC_CALLBACK_ARGS)
 
 
         if (data)
         if (data)
             *data = msgin->datasize ?
             *data = msgin->datasize ?
-                    remalloc(msgin->data, msgin->datasize) : NULL;
+                    malloc_copy(msgin->data, msgin->datasize) : NULL;
 
 
         obj->retval = msgin->datasize;
         obj->retval = msgin->datasize;
 
 

+ 2 - 1
LibOS/shim/src/ipc/shim_ipc_sysv.c

@@ -962,7 +962,8 @@ int ipc_sysv_semreply_callback (IPC_CALLBACK_ARGS)
 
 
     PAL_NUM ** semids = obj->private;
     PAL_NUM ** semids = obj->private;
     if (semids)
     if (semids)
-        *semids = remalloc(msgin->host_sem_ids, sizeof(PAL_NUM) * msgin->nsems);
+        *semids = malloc_copy(msgin->host_sem_ids,
+                              sizeof(PAL_NUM) * msgin->nsems);
     obj->retval = msgin->nsems;
     obj->retval = msgin->nsems;
 
 
     if (obj->thread)
     if (obj->thread)

+ 1 - 1
LibOS/shim/src/shim-debug.map

@@ -7,5 +7,5 @@ SHIM {
         memcpy; memmove; memset; memcmp;
         memcpy; memmove; memset; memcmp;
         __htonl; __ntohl; __htons; __ntohs; inet_pton;
         __htonl; __ntohl; __htons; __ntohs; inet_pton;
         vfputchar; vfputs; vfprintf; snprintf;
         vfputchar; vfputs; vfprintf; snprintf;
-        malloc; free; remalloc;
+        malloc; free; malloc_copy;
 };
 };

+ 178 - 88
LibOS/shim/src/shim_checkpoint.c

@@ -468,12 +468,12 @@ static int send_checkpoint_on_stream (PAL_HANDLE stream,
         }
         }
     }
     }
 
 
-    int total_bytes = store->offset;
-    int bytes = 0;
+    size_t total_bytes = store->offset;
+    size_t bytes = 0;
 
 
     do {
     do {
-        int ret = DkStreamWrite(stream, 0, total_bytes - bytes,
-                                (void *) store->base + bytes, NULL);
+        size_t ret = DkStreamWrite(stream, 0, total_bytes - bytes,
+                                   (void *) store->base + bytes, NULL);
 
 
         if (!ret)
         if (!ret)
             return -PAL_ERRNO;
             return -PAL_ERRNO;
@@ -484,18 +484,21 @@ static int send_checkpoint_on_stream (PAL_HANDLE stream,
     ADD_PROFILE_OCCURENCE(migrate_send_on_stream, total_bytes);
     ADD_PROFILE_OCCURENCE(migrate_send_on_stream, total_bytes);
 
 
     for (int i = 0 ; i < mem_nentries ; i++) {
     for (int i = 0 ; i < mem_nentries ; i++) {
-        int mem_size = mem_entries[i]->size;
+        size_t mem_size = mem_entries[i]->size;
         void * mem_addr = mem_entries[i]->addr;
         void * mem_addr = mem_entries[i]->addr;
         bytes = 0;
         bytes = 0;
         do {
         do {
-            int ret = DkStreamWrite(stream, 0, mem_size - bytes,
-                                    mem_addr + bytes, NULL);
+            size_t ret = DkStreamWrite(stream, 0, mem_size - bytes,
+                                       mem_addr + bytes, NULL);
             if (!ret)
             if (!ret)
                 return -PAL_ERRNO;
                 return -PAL_ERRNO;
 
 
             bytes += ret;
             bytes += ret;
         } while (bytes < mem_entries[i]->size);
         } while (bytes < mem_entries[i]->size);
 
 
+        if (!(mem_entries[i]->prot & PAL_PROT_READ))
+            DkVirtualMemoryProtect(mem_addr, mem_size, mem_entries[i]->prot);
+
         mem_entries[i]->size = mem_size;
         mem_entries[i]->size = mem_size;
         ADD_PROFILE_OCCURENCE(migrate_send_on_stream, mem_size);
         ADD_PROFILE_OCCURENCE(migrate_send_on_stream, mem_size);
     }
     }
@@ -613,8 +616,8 @@ int restore_checkpoint (struct cp_header * cphdr, struct mem_header * memhdr,
         rs_func rs = (&__rs_func) [cpent->cp_type - CP_FUNC_BASE];
         rs_func rs = (&__rs_func) [cpent->cp_type - CP_FUNC_BASE];
         ret = (*rs) (cpent, base, offset, rebase);
         ret = (*rs) (cpent, base, offset, rebase);
         if (ret < 0) {
         if (ret < 0) {
-            debug("restoring %s failed at %p (err=%d)\n", CP_FUNC_NAME(cpent->cp_type),
-                  base + offset, -ret);
+            sys_printf("restore_checkpoint() at %s (%d)\n",
+                       CP_FUNC_NAME(cpent->cp_type), ret);
             return ret;
             return ret;
         }
         }
 next:
 next:
@@ -801,36 +804,55 @@ int receive_handles_on_stream (struct palhdl_header * hdr, ptr_t base,
     return 0;
     return 0;
 }
 }
 
 
-static void * cp_alloc (struct shim_cp_store * store, void * addr, int size)
+static void * cp_alloc (struct shim_cp_store * store, void * addr, size_t size)
 {
 {
     if (addr) {
     if (addr) {
-        // Caller specified an exact region to alloc.
-        struct shim_vma * vma;
-        bool found = !lookup_overlap_vma(addr, size, &vma);
-        if (found) {
-            bool allocable = vma->addr == addr && vma->length == size
-                             && (vma->flags & VMA_UNMAPPED);
-            if (!allocable) {
-                put_vma(vma);
-                return NULL;
-            }
-        }
-        return DkVirtualMemoryAlloc(addr, size, 0,
-                                    PAL_PROT_READ|PAL_PROT_WRITE);
+        /*
+         * If the checkpoint needs more space, try to extend the checkpoint
+         * store at the current address.
+         */
+        debug("try extend checkpoint store: %p-%p (size = %ld)\n",
+              addr, addr + size, size);
+
+        if (bkeep_mmap(addr, size, PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
+                       NULL, 0, "cpstore") < 0)
+            return NULL;
     } else {
     } else {
-        // Alloc on any address, with specified size.
-        // We need to retry because `get_unmapped_vma_for_cp` is randomized.
-        // TODO: Fix this to remove the need for retrying.
-        while (true) {
-            addr = get_unmapped_vma_for_cp(size);
-            if (!addr)
-                return NULL;
-            addr = (void *) DkVirtualMemoryAlloc(addr, size, 0,
-                                                 PAL_PROT_READ|PAL_PROT_WRITE);
-            if (addr)
-                return addr;
-        }
+        /*
+         * Here we use a strategy to reduce internal fragmentation of virtual
+         * memory space. Because we need a relatively large, continuous space
+         * for dumping the checkpoint data, internal fragmentation can cause
+         * the process to drain the virtual address space after forking a few
+         * times. The previous space used for checkpoint may be fragmented
+         * at the next fork.
+         *
+         * A simple trick we use here is to reserve some space right after the
+         * checkpoint space. The reserved space is half of the size of the
+         * checkpoint space, but can be further fine-tuned.
+         */
+        size_t reserve_size = ALIGN_UP(size >> 1);
+
+        debug("try allocate checkpoint store (size = %ld, reserve = %ld)\n",
+              size, reserve_size);
+
+        /*
+         * Allocating the checkpoint space at the first space found from the
+         * top of the virtual address space.
+         */
+        addr = bkeep_unmapped_any(size + reserve_size, PROT_READ|PROT_WRITE,
+                                  CP_VMA_FLAGS, NULL, 0, "cpstore");
+        if (!addr)
+            return NULL;
+
+        bkeep_munmap(addr + size, reserve_size, CP_VMA_FLAGS);
     }
     }
+
+    addr = (void *) DkVirtualMemoryAlloc(addr, size, 0,
+                                         PAL_PROT_READ|PAL_PROT_WRITE);
+    if (!addr)
+        bkeep_munmap(addr, size, CP_VMA_FLAGS);
+
+    return addr;
 }
 }
 
 
 DEFINE_PROFILE_CATAGORY(migrate_proc, migrate);
 DEFINE_PROFILE_CATAGORY(migrate_proc, migrate);
@@ -847,6 +869,18 @@ DEFINE_PROFILE_INTERVAL(migrate_send_pal_handles, migrate_proc);
 DEFINE_PROFILE_INTERVAL(migrate_free_checkpoint,  migrate_proc);
 DEFINE_PROFILE_INTERVAL(migrate_free_checkpoint,  migrate_proc);
 DEFINE_PROFILE_INTERVAL(migrate_wait_response,    migrate_proc);
 DEFINE_PROFILE_INTERVAL(migrate_wait_response,    migrate_proc);
 
 
+static bool warn_no_gipc __attribute_migratable = true;
+
+/*
+ * Create a new process and migrate the process states to the new process.
+ *
+ * @migrate: migration function defined by the caller
+ * @exec: the executable to load in the new process
+ * @argv: arguments passed to the new process
+ * @thread: thread handle to be migrated to the new process
+ *
+ * The remaining arguments are passed into the migration function.
+ */
 int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 int do_migrate_process (int (*migrate) (struct shim_cp_store *,
                                         struct shim_thread *,
                                         struct shim_thread *,
                                         struct shim_process *, va_list),
                                         struct shim_process *, va_list),
@@ -867,6 +901,12 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 #endif
 #endif
     BEGIN_PROFILE_INTERVAL();
     BEGIN_PROFILE_INTERVAL();
 
 
+    /*
+     * Create the process first. The new process requires some time
+     * to initialize before starting to receive checkpoint data.
+     * Parallizing the process creation and checkpointing can improve
+     * the latency of forking.
+     */
     PAL_HANDLE proc = DkProcessCreate(exec ? qstrgetstr(&exec->uri) :
     PAL_HANDLE proc = DkProcessCreate(exec ? qstrgetstr(&exec->uri) :
                                       pal_control.executable,
                                       pal_control.executable,
                                       0, argv);
                                       0, argv);
@@ -878,6 +918,11 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 
 
     SAVE_PROFILE_INTERVAL(migrate_create_process);
     SAVE_PROFILE_INTERVAL(migrate_create_process);
 
 
+    /*
+     * Detect if GIPC is supported by the host. If GIPC is not supported
+     * forking may be slow because we have to use RPC streams for migrating
+     * user memory.
+     */
     bool use_gipc = false;
     bool use_gipc = false;
     PAL_NUM gipc_key;
     PAL_NUM gipc_key;
     PAL_HANDLE gipc_hdl = DkCreatePhysicalMemoryChannel(&gipc_key);
     PAL_HANDLE gipc_hdl = DkCreatePhysicalMemoryChannel(&gipc_key);
@@ -887,10 +932,14 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
         use_gipc = true;
         use_gipc = true;
         SAVE_PROFILE_INTERVAL(migrate_create_gipc);
         SAVE_PROFILE_INTERVAL(migrate_create_gipc);
     } else {
     } else {
-        sys_printf("WARNING: no physical memory support, process creation "
-                   "will be slow.\n");
+        if (warn_no_gipc) {
+            warn_no_gipc = false;
+            sys_printf("WARNING: no physical memory support, process creation "
+                       "may be slow.\n");
+        }
     }
     }
 
 
+    /* Create process and IPC bookkeepings */
     if (!(new_process = create_new_process(true))) {
     if (!(new_process = create_new_process(true))) {
         ret = -ENOMEM;
         ret = -ENOMEM;
         goto err;
         goto err;
@@ -903,6 +952,7 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 
 
     SAVE_PROFILE_INTERVAL(migrate_connect_ipc);
     SAVE_PROFILE_INTERVAL(migrate_connect_ipc);
 
 
+    /* Allocate a space for dumping the checkpoint data. */
     cpstore = __alloca(sizeof(struct shim_cp_store));
     cpstore = __alloca(sizeof(struct shim_cp_store));
     memset(cpstore, 0, sizeof(struct shim_cp_store));
     memset(cpstore, 0, sizeof(struct shim_cp_store));
     cpstore->alloc    = cp_alloc;
     cpstore->alloc    = cp_alloc;
@@ -910,10 +960,14 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
     cpstore->bound    = CP_INIT_VMA_SIZE;
     cpstore->bound    = CP_INIT_VMA_SIZE;
 
 
     while (1) {
     while (1) {
-        debug("try allocate checkpoint store (size = %d)\n", cpstore->bound);
+        /*
+         * Try allocating a space of a certain size. If the allocation fails,
+         * continue to try with smaller sizes.
+         */
         cpstore->base = (ptr_t) cp_alloc(cpstore, 0, cpstore->bound);
         cpstore->base = (ptr_t) cp_alloc(cpstore, 0, cpstore->bound);
         if (cpstore->base)
         if (cpstore->base)
             break;
             break;
+
         cpstore->bound >>= 1;
         cpstore->bound >>= 1;
         if (cpstore->bound < allocsize)
         if (cpstore->bound < allocsize)
             break;
             break;
@@ -927,6 +981,7 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 
 
     SAVE_PROFILE_INTERVAL(migrate_init_checkpoint);
     SAVE_PROFILE_INTERVAL(migrate_init_checkpoint);
 
 
+    /* Calling the migration function defined by the caller. */
     va_list ap;
     va_list ap;
     va_start(ap, thread);
     va_start(ap, thread);
     ret = (*migrate) (cpstore, thread, new_process, ap);
     ret = (*migrate) (cpstore, thread, new_process, ap);
@@ -941,6 +996,7 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
     unsigned long checkpoint_time = GET_PROFILE_INTERVAL();
     unsigned long checkpoint_time = GET_PROFILE_INTERVAL();
     unsigned long checkpoint_size = cpstore->offset + cpstore->mem_size;
     unsigned long checkpoint_size = cpstore->offset + cpstore->mem_size;
 
 
+    /* Checkpoint data created. */
     debug("checkpoint of %u bytes created, %lu microsecond is spent.\n",
     debug("checkpoint of %u bytes created, %lu microsecond is spent.\n",
           checkpoint_size, checkpoint_time);
           checkpoint_size, checkpoint_time);
 
 
@@ -976,6 +1032,10 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
     hdr.write_proc_time = GET_PROFILE_INTERVAL();
     hdr.write_proc_time = GET_PROFILE_INTERVAL();
 #endif
 #endif
 
 
+    /*
+     * Sending a header to the new process through the RPC stream to
+     * notify the process to start receiving the checkpoint.
+     */
     bytes = DkStreamWrite(proc, 0, sizeof(struct newproc_header), &hdr, NULL);
     bytes = DkStreamWrite(proc, 0, sizeof(struct newproc_header), &hdr, NULL);
     if (!bytes) {
     if (!bytes) {
         ret = -PAL_ERRNO;
         ret = -PAL_ERRNO;
@@ -989,6 +1049,7 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
     ADD_PROFILE_OCCURENCE(migrate_send_on_stream, bytes);
     ADD_PROFILE_OCCURENCE(migrate_send_on_stream, bytes);
     SAVE_PROFILE_INTERVAL(migrate_send_header);
     SAVE_PROFILE_INTERVAL(migrate_send_header);
 
 
+    /* Sending the checkpoint either through GIPC or the RPC stream */
     ret = cpstore->use_gipc ? send_checkpoint_by_gipc(gipc_hdl, cpstore) :
     ret = cpstore->use_gipc ? send_checkpoint_by_gipc(gipc_hdl, cpstore) :
           send_checkpoint_on_stream(proc, cpstore);
           send_checkpoint_on_stream(proc, cpstore);
 
 
@@ -999,14 +1060,27 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
 
 
     SAVE_PROFILE_INTERVAL(migrate_send_checkpoint);
     SAVE_PROFILE_INTERVAL(migrate_send_checkpoint);
 
 
+    /*
+     * For socket and RPC streams, we need to migrate the PAL handles
+     * to the new process using PAL calls.
+     */
     if ((ret = send_handles_on_stream(proc, cpstore)) < 0)
     if ((ret = send_handles_on_stream(proc, cpstore)) < 0)
         goto err;
         goto err;
 
 
     SAVE_PROFILE_INTERVAL(migrate_send_pal_handles);
     SAVE_PROFILE_INTERVAL(migrate_send_pal_handles);
 
 
-    system_free((void *) cpstore->base, cpstore->bound);
+    /* Free the checkpoint space */
+    if ((ret = bkeep_munmap((void *) cpstore->base, cpstore->bound,
+                            CP_VMA_FLAGS)) < 0) {
+        debug("failed unmaping checkpoint (ret = %d)\n", ret);
+        goto err;
+    }
+
+    DkVirtualMemoryFree((PAL_PTR) cpstore->base, cpstore->bound);
+
     SAVE_PROFILE_INTERVAL(migrate_free_checkpoint);
     SAVE_PROFILE_INTERVAL(migrate_free_checkpoint);
 
 
+    /* Wait for the response from the new process */
     struct newproc_response res;
     struct newproc_response res;
     bytes = DkStreamRead(proc, 0, sizeof(struct newproc_response), &res,
     bytes = DkStreamRead(proc, 0, sizeof(struct newproc_response), &res,
                          NULL, 0);
                          NULL, 0);
@@ -1020,10 +1094,12 @@ int do_migrate_process (int (*migrate) (struct shim_cp_store *,
     if (gipc_hdl)
     if (gipc_hdl)
         DkObjectClose(gipc_hdl);
         DkObjectClose(gipc_hdl);
 
 
+    /* Notify the namespace manager regarding the subleasing of TID */
     ipc_pid_sublease_send(res.child_vmid, thread->tid,
     ipc_pid_sublease_send(res.child_vmid, thread->tid,
                           qstrgetstr(&new_process->self->uri),
                           qstrgetstr(&new_process->self->uri),
                           NULL);
                           NULL);
 
 
+    /* Listen on the RPC stream to the new process */
     add_ipc_port_by_id(res.child_vmid, proc,
     add_ipc_port_by_id(res.child_vmid, proc,
                        IPC_PORT_DIRCLD|IPC_PORT_LISTEN|IPC_PORT_KEEPALIVE,
                        IPC_PORT_DIRCLD|IPC_PORT_LISTEN|IPC_PORT_KEEPALIVE,
                        &ipc_child_exit,
                        &ipc_child_exit,
@@ -1043,73 +1119,92 @@ err:
     return ret;
     return ret;
 }
 }
 
 
+/*
+ * Loading the checkpoint from the parent process or a checkpoint file
+ *
+ * @hdr: checkpoint header
+ * @cpptr: returning the pointer of the loaded checkpoint
+ */
 int do_migration (struct newproc_cp_header * hdr, void ** cpptr)
 int do_migration (struct newproc_cp_header * hdr, void ** cpptr)
 {
 {
-    ptr_t base = (ptr_t) hdr->hdr.addr;
-    int   size = hdr->hdr.size;
+    void * base = NULL;
+    size_t size = hdr->hdr.size;
     PAL_PTR mapaddr;
     PAL_PTR mapaddr;
     PAL_NUM mapsize;
     PAL_NUM mapsize;
-    unsigned long mapoff;
     long rebase;
     long rebase;
     bool use_gipc = !!hdr->gipc.uri[0];
     bool use_gipc = !!hdr->gipc.uri[0];
     PAL_HANDLE gipc_store;
     PAL_HANDLE gipc_store;
     int ret = 0;
     int ret = 0;
+    BEGIN_PROFILE_INTERVAL();
 
 
-    debug("checkpoint detected (%d bytes, expected at %p)\n",
-          size, base);
-
-    if (base && lookup_overlap_vma((void *) base, size, NULL) == -ENOENT) {
+    /*
+     * Allocate a large enough space to load the checkpoint data.
+     *
+     * If CPSTORE_DERANDOMIZATION is enabled, try to allocate the space
+     * at the exact address where the checkpoint is created. Otherwise,
+     * just allocate at the first space we found from the top of the virtual
+     * memory space.
+     */
+
+#if CPSTORE_DERANDOMIZATION == 1
+    if (hdr->hdr.addr
+        && lookup_overlap_vma(hdr->hdr.addr, size, NULL) == -ENOENT) {
+
+        /* Try to load the checkpoint at the same address */
+        base = hdr->hdr.addr;
         mapaddr = (PAL_PTR) ALIGN_DOWN(base);
         mapaddr = (PAL_PTR) ALIGN_DOWN(base);
         mapsize = (PAL_PTR) ALIGN_UP(base + size) - mapaddr;
         mapsize = (PAL_PTR) ALIGN_UP(base + size) - mapaddr;
-        mapoff  = base - (ptr_t) mapaddr;
-    } else {
-        mapaddr = (PAL_PTR) 0;
-        mapsize = ALIGN_UP(size);
-        mapoff  = 0;
+
+        /* Need to create VMA before allocation */
+        ret = bkeep_mmap((void *) mapaddr, mapsize,
+                         PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
+                         NULL, 0, "cpstore");
+        if (ret < 0)
+            base = NULL;
     }
     }
+#endif
 
 
-    BEGIN_PROFILE_INTERVAL();
+    if (!base) {
+        base = bkeep_unmapped_any(ALIGN_UP(size),
+                                  PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
+                                  NULL, 0, "cpstore");
+        if (!base)
+            return -ENOMEM;
+
+        mapaddr = (PAL_PTR) base;
+        mapsize = (PAL_NUM) ALIGN_UP(size);
+    }
+
+    debug("checkpoint mapped at %p-%p\n", base, base + size);
+
+    PAL_FLG pal_prot = PAL_PROT_READ|PAL_PROT_WRITE;
+    PAL_PTR mapped = mapaddr;
 
 
     if (use_gipc) {
     if (use_gipc) {
         debug("open gipc store: %s\n", hdr->gipc.uri);
         debug("open gipc store: %s\n", hdr->gipc.uri);
 
 
-        PAL_FLG mapprot = PAL_PROT_READ|PAL_PROT_WRITE;
         gipc_store = DkStreamOpen(hdr->gipc.uri, 0, 0, 0, 0);
         gipc_store = DkStreamOpen(hdr->gipc.uri, 0, 0, 0, 0);
         if (!gipc_store ||
         if (!gipc_store ||
-            !DkPhysicalMemoryMap(gipc_store, 1, &mapaddr, &mapsize, &mapprot))
+            !DkPhysicalMemoryMap(gipc_store, 1, &mapped, &mapsize, &pal_prot))
             return -PAL_ERRNO;
             return -PAL_ERRNO;
 
 
         SAVE_PROFILE_INTERVAL(child_load_checkpoint_by_gipc);
         SAVE_PROFILE_INTERVAL(child_load_checkpoint_by_gipc);
     } else {
     } else {
-        void * mapped = NULL;
-
-        for (int tries = 3 ; tries ; tries--) {
-            if ((mapped = DkVirtualMemoryAlloc(mapaddr, mapsize, 0,
-                                               PAL_PROT_READ|PAL_PROT_WRITE)))
-                break;
-
-            debug("cannot map address %p-%p\n", mapaddr, mapaddr + mapsize);
-            ret =-PAL_ERRNO;
-            mapaddr = NULL;
-        }
-
+        void * mapped = DkVirtualMemoryAlloc(mapaddr, mapsize, 0, pal_prot);
         if (!mapped)
         if (!mapped)
-            return ret;
-
-        mapaddr = mapped;
+            return -PAL_ERRNO;
     }
     }
 
 
-    bkeep_mmap((void *) mapaddr, mapsize,
-               PROT_READ|PROT_WRITE,
-               MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL,
-               NULL, 0, NULL);
-
-    base = (ptr_t) mapaddr + mapoff;
-    rebase = (long) base - (long) hdr->hdr.addr;
-    debug("checkpoint loaded at %p\n", base);
+    assert(mapaddr == mapped);
+    /*
+     * If the checkpoint is loaded at a different address from where it is
+     * created, we need to rebase the pointers in the checkpoint.
+     */
+    rebase = (long) ((uintptr_t) base - (uintptr_t) hdr->hdr.addr);
 
 
+    /* Load the memory data sent separately over GIPC or the RPC stream. */
     if (use_gipc) {
     if (use_gipc) {
-        if ((ret = restore_gipc(gipc_store, &hdr->gipc, base, rebase)) < 0)
+        if ((ret = restore_gipc(gipc_store, &hdr->gipc, (ptr_t) base, rebase)) < 0)
             return ret;
             return ret;
 
 
         SAVE_PROFILE_INTERVAL(child_load_memory_by_gipc);
         SAVE_PROFILE_INTERVAL(child_load_memory_by_gipc);
@@ -1131,17 +1226,12 @@ int do_migration (struct newproc_cp_header * hdr, void ** cpptr)
         debug("%d bytes read on stream\n", total_bytes);
         debug("%d bytes read on stream\n", total_bytes);
     }
     }
 
 
-    struct newproc_response res;
-    res.child_vmid = cur_process.vmid;
-    res.failure = 0;
-    int bytes = DkStreamWrite(PAL_CB(parent_process), 0,
-                              sizeof(struct newproc_response),
-                              &res, NULL);
-    if (!bytes)
-        return -PAL_ERRNO;
-
-    if ((ret = receive_handles_on_stream(&hdr->palhdl, base, rebase)) < 0)
+    /* Receive socket or RPC handles from the parent process. */
+    ret = receive_handles_on_stream(&hdr->palhdl, (ptr_t) base, rebase);
+    if (ret < 0) {
+        /* TODO: unload the checkpoint space */
         return ret;
         return ret;
+    }
 
 
     SAVE_PROFILE_INTERVAL(child_receive_handles);
     SAVE_PROFILE_INTERVAL(child_receive_handles);
 
 

+ 6 - 0
LibOS/shim/src/shim_debug.c

@@ -104,9 +104,15 @@ void remove_r_debug (void * addr)
 void append_r_debug (const char * uri, void * addr, void * dyn_addr)
 void append_r_debug (const char * uri, void * addr, void * dyn_addr)
 {
 {
     struct gdb_link_map * new = malloc(sizeof(struct gdb_link_map));
     struct gdb_link_map * new = malloc(sizeof(struct gdb_link_map));
+    if (!new)
+        return;
 
 
     int uri_len = strlen(uri);
     int uri_len = strlen(uri);
     char * new_uri = malloc(uri_len + 1);
     char * new_uri = malloc(uri_len + 1);
+    if (!new_uri) {
+        free(new);
+        return;
+    }
     memcpy(new_uri, uri, uri_len + 1);
     memcpy(new_uri, uri, uri_len + 1);
 
 
     new->l_addr = addr;
     new->l_addr = addr;

+ 105 - 81
LibOS/shim/src/shim_init.c

@@ -175,7 +175,7 @@ void * migrated_shim_addr;
 void * initial_stack;
 void * initial_stack;
 const char ** initial_envp __attribute_migratable;
 const char ** initial_envp __attribute_migratable;
 
 
-const char ** library_paths;
+char ** library_paths;
 
 
 LOCKTYPE __master_lock;
 LOCKTYPE __master_lock;
 bool lock_enabled;
 bool lock_enabled;
@@ -248,15 +248,22 @@ void * allocate_stack (size_t size, size_t protect_size, bool user)
 
 
     /* preserve a non-readable, non-writeable page below the user
     /* preserve a non-readable, non-writeable page below the user
        stack to stop user program to clobber other vmas */
        stack to stop user program to clobber other vmas */
-    void * stack = user ?
-                   get_unmapped_vma(size + protect_size, STACK_FLAGS) :
-                   NULL;
+    void * stack = NULL;
+    int flags = STACK_FLAGS|(user ? 0 : VMA_INTERNAL);
 
 
-    if (user)
-        stack = (void *) DkVirtualMemoryAlloc(stack, size + protect_size,
-                                0, PAL_PROT_READ|PAL_PROT_WRITE);
-    else
+    if (user) {
+        stack = bkeep_unmapped_heap(size + protect_size, PROT_NONE,
+                                    flags, NULL, 0, "stack");
+
+        if (!stack)
+            return NULL;
+
+        stack = (void *)
+            DkVirtualMemoryAlloc(stack, size + protect_size,
+                                 0, PAL_PROT_NONE);
+    } else {
         stack = system_malloc(size + protect_size);
         stack = system_malloc(size + protect_size);
+    }
 
 
     if (!stack)
     if (!stack)
         return NULL;
         return NULL;
@@ -264,22 +271,11 @@ void * allocate_stack (size_t size, size_t protect_size, bool user)
     ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
     ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
     INC_PROFILE_OCCURENCE(alloc_stack_count);
     INC_PROFILE_OCCURENCE(alloc_stack_count);
 
 
-    if (protect_size &&
-        !DkVirtualMemoryProtect(stack, protect_size, PAL_PROT_NONE))
-        return NULL;
-
     stack += protect_size;
     stack += protect_size;
+    DkVirtualMemoryProtect(stack, size, PAL_PROT_READ|PAL_PROT_WRITE);
 
 
-    if (user) {
-        if (bkeep_mmap(stack, size, PROT_READ|PROT_WRITE,
-                       STACK_FLAGS, NULL, 0, "stack") < 0)
-            return NULL;
-
-        if (protect_size &&
-            bkeep_mmap(stack - protect_size, protect_size, 0,
-                       STACK_FLAGS, NULL, 0, NULL) < 0)
-            return NULL;
-    }
+    if (bkeep_mprotect(stack, size, PROT_READ|PROT_WRITE, flags) < 0)
+        return NULL;
 
 
     debug("allocated stack at %p (size = %d)\n", stack, size);
     debug("allocated stack at %p (size = %d)\n", stack, size);
     return stack;
     return stack;
@@ -392,39 +388,38 @@ int init_stack (const char ** argv, const char ** envp, const char *** argpp,
 int read_environs (const char ** envp)
 int read_environs (const char ** envp)
 {
 {
     for (const char ** e = envp ; *e ; e++) {
     for (const char ** e = envp ; *e ; e++) {
-        switch ((*e)[0]) {
-            case 'L': {
-                if (strpartcmp_static(*e, "LD_LIBRARY_PATH=")) {
-                    const char * s = *e + static_strlen("LD_LIBRARY_PATH=");
-                    int npaths = 0;
-                    for (const char * tmp = s ; *tmp ; tmp++)
-                        if (*tmp == ':')
-                            npaths++;
-                    const char ** paths = malloc(sizeof(const char *) *
-                                                 (npaths + 1));
-                    if (!paths)
-                        return -ENOMEM;
-
-                    int cnt = 0;
-                    while (*s) {
-                        const char * next;
-                        for (next = s ; *next && *next != ':' ; next++);
-                        int len = next - s;
-                        char * str = malloc(len + 1);
-                        if (!str)
-                            return -ENOMEM;
-                        memcpy(str, s, len);
-                        str[len] = 0;
-                        paths[cnt++] = str;
-                        s = *next ? next + 1 : next;
-                    }
-
-                    paths[cnt] = NULL;
-                    library_paths = paths;
-                    break;
+        if (strpartcmp_static(*e, "LD_LIBRARY_PATH=")) {
+            const char * s = *e + static_strlen("LD_LIBRARY_PATH=");
+            size_t npaths = 2; // One for the first entry, one for the last
+                               // NULL.
+            for (const char * tmp = s ; *tmp ; tmp++)
+                if (*tmp == ':')
+                    npaths++;
+            char** paths = malloc(sizeof(const char *) *
+                                  npaths);
+            if (!paths)
+                return -ENOMEM;
+
+            size_t cnt = 0;
+            while (*s) {
+                const char * next;
+                for (next = s ; *next && *next != ':' ; next++);
+                size_t len = next - s;
+                char * str = malloc(len + 1);
+                if (!str) {
+                    for (size_t i = 0; i < cnt; i++)
+                        free(paths[cnt]);
+                    return -ENOMEM;
                 }
                 }
-                break;
+                memcpy(str, s, len);
+                str[len] = 0;
+                paths[cnt++] = str;
+                s = *next ? next + 1 : next;
             }
             }
+
+            paths[cnt] = NULL;
+            library_paths = paths;
+            return 0;
         }
         }
     }
     }
 
 
@@ -445,8 +440,11 @@ static void __free (void * mem)
 
 
 int init_manifest (PAL_HANDLE manifest_handle)
 int init_manifest (PAL_HANDLE manifest_handle)
 {
 {
-    void * addr;
-    unsigned int size;
+    int ret = 0;
+    void * addr = NULL;
+    size_t size = 0, map_size = 0;
+
+#define MAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL)
 
 
     if (PAL_CB(manifest_preload.start)) {
     if (PAL_CB(manifest_preload.start)) {
         addr = PAL_CB(manifest_preload.start);
         addr = PAL_CB(manifest_preload.start);
@@ -457,34 +455,53 @@ int init_manifest (PAL_HANDLE manifest_handle)
             return -PAL_ERRNO;
             return -PAL_ERRNO;
 
 
         size = attr.pending_size;
         size = attr.pending_size;
-        addr = (void *) DkStreamMap(manifest_handle, NULL,
-                                  PAL_PROT_READ, 0,
-                                  ALIGN_UP(size));
-
+        map_size = ALIGN_UP(size);
+        addr = bkeep_unmapped_any(map_size, PROT_READ, MAP_FLAGS,
+                                  NULL, 0, "manifest");
         if (!addr)
         if (!addr)
-            return -PAL_ERRNO;
+            return -ENOMEM;
+
+        void * ret_addr = DkStreamMap(manifest_handle, addr,
+                                      PAL_PROT_READ, 0,
+                                      ALIGN_UP(size));
+
+        if (!ret_addr) {
+            bkeep_munmap(addr, map_size, MAP_FLAGS);
+            return -ENOMEM;
+        } else {
+            assert(addr == ret_addr);
+        }
     }
     }
 
 
-    bkeep_mmap(addr, ALIGN_UP(size), PROT_READ,
-               MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL, NULL, 0,
-               "manifest");
+    struct config_store * new_root_config = malloc(sizeof(struct config_store));
+    if (!new_root_config) {
+        ret = -ENOMEM;
+        goto fail;
+    }
 
 
-    root_config = malloc(sizeof(struct config_store));
-    root_config->raw_data = addr;
-    root_config->raw_size = size;
-    root_config->malloc = __malloc;
-    root_config->free = __free;
+    new_root_config->raw_data = addr;
+    new_root_config->raw_size = size;
+    new_root_config->malloc = __malloc;
+    new_root_config->free = __free;
 
 
     const char * errstring = "Unexpected error";
     const char * errstring = "Unexpected error";
-    int ret = 0;
 
 
-    if ((ret = read_config(root_config, NULL, &errstring)) < 0) {
-        root_config = NULL;
+    if ((ret = read_config(new_root_config, NULL, &errstring)) < 0) {
         sys_printf("Unable to read manifest file: %s\n", errstring);
         sys_printf("Unable to read manifest file: %s\n", errstring);
-        return ret;
+        goto fail;
     }
     }
 
 
+    root_config = new_root_config;
     return 0;
     return 0;
+
+fail:
+    if (map_size) {
+        DkStreamUnmap(addr, map_size);
+        if (bkeep_munmap(addr, map_size, MAP_FLAGS) < 0)
+            bug();
+    }
+    free(new_root_config);
+    return ret;
 }
 }
 
 
 #ifdef PROFILE
 #ifdef PROFILE
@@ -606,11 +623,10 @@ DEFINE_PROFILE_INTERVAL(pal_child_creation_time,        pal);
 
 
 DEFINE_PROFILE_CATAGORY(init, );
 DEFINE_PROFILE_CATAGORY(init, );
 DEFINE_PROFILE_INTERVAL(init_randgen,               init);
 DEFINE_PROFILE_INTERVAL(init_randgen,               init);
-DEFINE_PROFILE_INTERVAL(init_heap,                  init);
+DEFINE_PROFILE_INTERVAL(init_vma,                   init);
 DEFINE_PROFILE_INTERVAL(init_slab,                  init);
 DEFINE_PROFILE_INTERVAL(init_slab,                  init);
 DEFINE_PROFILE_INTERVAL(init_str_mgr,               init);
 DEFINE_PROFILE_INTERVAL(init_str_mgr,               init);
 DEFINE_PROFILE_INTERVAL(init_internal_map,          init);
 DEFINE_PROFILE_INTERVAL(init_internal_map,          init);
-DEFINE_PROFILE_INTERVAL(init_vma,                   init);
 DEFINE_PROFILE_INTERVAL(init_fs,                    init);
 DEFINE_PROFILE_INTERVAL(init_fs,                    init);
 DEFINE_PROFILE_INTERVAL(init_dcache,                init);
 DEFINE_PROFILE_INTERVAL(init_dcache,                init);
 DEFINE_PROFILE_INTERVAL(init_handle,                init);
 DEFINE_PROFILE_INTERVAL(init_handle,                init);
@@ -638,7 +654,7 @@ DEFINE_PROFILE_INTERVAL(init_signal,                init);
     do {                                                                \
     do {                                                                \
         int _err = CALL_INIT(func, ##__VA_ARGS__);                      \
         int _err = CALL_INIT(func, ##__VA_ARGS__);                      \
         if (_err < 0) {                                                 \
         if (_err < 0) {                                                 \
-            debug("initialization failed in " #func " (%d)\n", _err);   \
+            sys_printf("shim_init() in " #func " (%d)\n", _err);        \
             shim_terminate();                                           \
             shim_terminate();                                           \
         }                                                               \
         }                                                               \
         SAVE_PROFILE_INTERVAL(func);                                    \
         SAVE_PROFILE_INTERVAL(func);                                    \
@@ -693,12 +709,11 @@ int shim_init (int argc, void * args, void ** return_stack)
 
 
     BEGIN_PROFILE_INTERVAL();
     BEGIN_PROFILE_INTERVAL();
     RUN_INIT(init_randgen);
     RUN_INIT(init_randgen);
-    RUN_INIT(init_heap);
+    RUN_INIT(init_vma);
     RUN_INIT(init_slab);
     RUN_INIT(init_slab);
     RUN_INIT(read_environs, envp);
     RUN_INIT(read_environs, envp);
     RUN_INIT(init_str_mgr);
     RUN_INIT(init_str_mgr);
     RUN_INIT(init_internal_map);
     RUN_INIT(init_internal_map);
-    RUN_INIT(init_vma);
     RUN_INIT(init_fs);
     RUN_INIT(init_fs);
     RUN_INIT(init_dcache);
     RUN_INIT(init_dcache);
     RUN_INIT(init_handle);
     RUN_INIT(init_handle);
@@ -751,6 +766,17 @@ restore:
     RUN_INIT(init_ipc_helper);
     RUN_INIT(init_ipc_helper);
     RUN_INIT(init_signal);
     RUN_INIT(init_signal);
 
 
+    if (PAL_CB(parent_process)) {
+        /* Notify the parent process */
+        struct newproc_response res;
+        res.child_vmid = cur_process.vmid;
+        res.failure = 0;
+        if (!DkStreamWrite(PAL_CB(parent_process), 0,
+                           sizeof(struct newproc_response),
+                           &res, NULL))
+            return -PAL_ERRNO;
+    }
+
     debug("shim process initialized\n");
     debug("shim process initialized\n");
 
 
 #ifdef PROFILE
 #ifdef PROFILE
@@ -819,8 +845,7 @@ static int name_pipe (char * uri, size_t size, void * id)
 {
 {
     IDTYPE pipeid;
     IDTYPE pipeid;
     int len;
     int len;
-    if (getrand(&pipeid, sizeof(IDTYPE)) < sizeof(IDTYPE))
-        return -EACCES;
+    getrand(&pipeid, sizeof(pipeid));
     debug("creating pipe: pipe.srv:%u\n", pipeid);
     debug("creating pipe: pipe.srv:%u\n", pipeid);
     if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) == size)
     if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) == size)
         return -ERANGE;
         return -ERANGE;
@@ -869,8 +894,7 @@ static int name_path (char * path, size_t size, void * id)
     unsigned int suffix;
     unsigned int suffix;
     int prefix_len = strlen(path);
     int prefix_len = strlen(path);
     int len;
     int len;
-    if (getrand(&suffix, sizeof(unsigned int)) < sizeof(unsigned int))
-        return -EACCES;
+    getrand(&suffix, sizeof(suffix));
     len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
     len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
     if (len == size)
     if (len == size)
         return -ERANGE;
         return -ERANGE;

+ 68 - 187
LibOS/shim/src/shim_malloc.c

@@ -25,22 +25,6 @@
  * 
  * 
  * When existing slabs are not sufficient, or a large (4k or greater) 
  * When existing slabs are not sufficient, or a large (4k or greater) 
  * allocation is requested, it ends up here (__system_alloc and __system_free).
  * allocation is requested, it ends up here (__system_alloc and __system_free).
- * 
- * There are two modes this file executes in: early initialization (before
- * VMAs are available), and post-initialization.  
- * 
- * Before VMAs are available, allocations are tracked in the shim_heap_areas
- * array.  
- *
- * Once VMAs initialized, the contents of shim_heap_areas are added to the VMA
- * list.  In order to reduce the risk of virtual address collisions, the VMA 
- * for the shim_heap_area is never removed, but the pages themselves are
- * freed.   This approach effectively reserves part of the address space for
- * initialization-time bookkeeping.
- * 
- * After initialization, all allocations and frees just call
- * DkVirtualMemoryAlloc and DkVirtualMemory Free, and add/remove VMAs for the
- * results.
  */
  */
 
 
 #include <shim_internal.h>
 #include <shim_internal.h>
@@ -65,195 +49,52 @@ static LOCKTYPE slab_mgr_lock;
 #endif
 #endif
 
 
 #define SLAB_CANARY
 #define SLAB_CANARY
-#define STARTUP_SIZE    4
+#define STARTUP_SIZE    16
 
 
 #include <slabmgr.h>
 #include <slabmgr.h>
 
 
 static SLAB_MGR slab_mgr = NULL;
 static SLAB_MGR slab_mgr = NULL;
 
 
-#define MIN_SHIM_HEAP_PAGES      64
-#define MAX_SHIM_HEAP_AREAS      32
-
-#define INIT_SHIM_HEAP     256 * allocsize
-
-static int vmas_initialized = 0;
-
-static struct shim_heap {
-    void * start;
-    void * current;
-    void * end;
-} shim_heap_areas[MAX_SHIM_HEAP_AREAS];
-
-static LOCKTYPE shim_heap_lock;
-
 DEFINE_PROFILE_CATAGORY(memory, );
 DEFINE_PROFILE_CATAGORY(memory, );
 
 
-static struct shim_heap * __alloc_enough_heap (size_t size)
-{
-    struct shim_heap * heap = NULL, * first_empty = NULL, * smallest = NULL;
-    size_t smallest_size = 0;
-
-    for (int i = 0 ; i < MAX_SHIM_HEAP_AREAS ; i++)
-        if (shim_heap_areas[i].start) {
-            if (shim_heap_areas[i].end >= shim_heap_areas[i].current + size)
-                return &shim_heap_areas[i];
-
-            if (!smallest ||
-                shim_heap_areas[i].end <=
-                shim_heap_areas[i].current + smallest_size) {
-                smallest = &shim_heap_areas[i];
-                smallest_size = shim_heap_areas[i].end -
-                                shim_heap_areas[i].current;
-            }
-        } else {
-            if (!first_empty)
-                first_empty = &shim_heap_areas[i];
-        }
-
-    if (!heap) {
-        size_t heap_size = MIN_SHIM_HEAP_PAGES * allocsize;
-        void * start = NULL;
-        heap = first_empty ? : smallest;
-        assert(heap);
-
-        while (size > heap_size)
-            heap_size *= 2;
-
-        if (!(start = (void *) DkVirtualMemoryAlloc(NULL, heap_size, 0,
-                                    PAL_PROT_WRITE|PAL_PROT_READ)))
-            return NULL;
-
-        debug("allocate internal heap at %p - %p\n", start, start + heap_size);
-
-        if (heap == smallest && heap->current != heap->end) {
-            DkVirtualMemoryFree(heap->current, heap->end - heap->current);
-            int flags = VMA_INTERNAL;
-            unlock(shim_heap_lock);
-            bkeep_munmap(heap->current, heap->end - heap->current, flags);
-            lock(shim_heap_lock);
-        }
-
-        heap->start = heap->current = start;
-        heap->end = start + heap_size;
-
-        unlock(shim_heap_lock);
-        bkeep_mmap(start, heap_size, PROT_READ|PROT_WRITE,
-                   MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL, NULL, 0, NULL);
-        lock(shim_heap_lock);
-    }
-
-    return heap;
-}
-
 /* Returns NULL on failure */
 /* Returns NULL on failure */
 void * __system_malloc (size_t size)
 void * __system_malloc (size_t size)
 {
 {
     size_t alloc_size = ALIGN_UP(size);
     size_t alloc_size = ALIGN_UP(size);
-    void *addr, *addr_new;
-    
-    lock(shim_heap_lock);
-
-    if (vmas_initialized) {
-        /* If vmas are initialized, we need to request a free address range
-         * using get_unmapped_vma().  The current mmap code uses this function
-         * to synchronize all address allocation, via a "publication"
-         * pattern.  It is not safe to just call DkVirtualMemoryAlloc directly
-         * without reserving the vma region first.
-         */
-        int flags = MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL;
-        addr = get_unmapped_vma(alloc_size, flags);
-        if (!addr) {
-            unlock(shim_heap_lock);
-            return NULL;
-        }
-        addr_new = (void *) DkVirtualMemoryAlloc(addr, alloc_size, 0,
-                                                 PAL_PROT_WRITE|PAL_PROT_READ);
-        if (!addr_new) {
-            bkeep_munmap(addr, alloc_size, flags);
-            unlock(shim_heap_lock);
-            return NULL;
-        }
-        assert (addr == addr_new);
-        bkeep_mmap(addr, alloc_size, PROT_READ|PROT_WRITE,
-                   flags, NULL, 0, NULL);
-    } else {
-
-        struct shim_heap * heap = __alloc_enough_heap(alloc_size);
+    void * addr;
+    int flags = MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL;
+
+    /*
+     * If vmas are initialized, we need to request a free address range
+     * using bkeep_unmapped_any().  The current mmap code uses this function
+     * to synchronize all address allocation, via a "publication"
+     * pattern.  It is not safe to just call DkVirtualMemoryAlloc directly
+     * without reserving the vma region first.
+     */
+    addr = bkeep_unmapped_any(alloc_size, PROT_READ|PROT_WRITE, flags,
+                              NULL, 0, "slab");
+
+    if (!addr)
+        return NULL;
 
 
-        if (!heap) {
-            unlock(shim_heap_lock);
-            return NULL;
-        }
+    void * ret_addr = DkVirtualMemoryAlloc(addr, alloc_size, 0,
+                                           PAL_PROT_WRITE|PAL_PROT_READ);
 
 
-        addr = heap->current;
-        heap->current += alloc_size;
+    if (!ret_addr) {
+        bkeep_munmap(addr, alloc_size, flags);
+        return NULL;
     }
     }
 
 
-    unlock(shim_heap_lock);
-
+    assert(addr == ret_addr);
     return addr;
     return addr;
 }
 }
 
 
 void __system_free (void * addr, size_t size)
 void __system_free (void * addr, size_t size)
 {
 {
-    int in_reserved_area = 0;
     DkVirtualMemoryFree(addr, ALIGN_UP(size));
     DkVirtualMemoryFree(addr, ALIGN_UP(size));
-    int flags = VMA_INTERNAL;
-    for (int i = 0 ; i < MAX_SHIM_HEAP_AREAS ; i++)
-        if (shim_heap_areas[i].start) {
-            /* Here we assume that any allocation from the 
-             * shim_heap_area is a strict inclusion.  Allocations
-             * cannot partially overlap.
-             */
-            if (addr >= shim_heap_areas[i].start
-                && addr <= shim_heap_areas[i].end)
-                in_reserved_area = 1;
-        }
-    
-    if (! in_reserved_area)
-        bkeep_munmap(addr, ALIGN_UP(size), flags);
-}
-
-int init_heap (void)
-{
-    create_lock(shim_heap_lock);
-
-    void * start = (void *) DkVirtualMemoryAlloc(NULL, INIT_SHIM_HEAP, 0,
-                                    PAL_PROT_WRITE|PAL_PROT_READ);
-    if (!start)
-        return -ENOMEM;
-
-    debug("allocate internal heap at %p - %p\n", start,
-          start + INIT_SHIM_HEAP);
-
-    shim_heap_areas[0].start = shim_heap_areas[0].current = start;
-    shim_heap_areas[0].end = start + INIT_SHIM_HEAP;
-
-    return 0;
-}
 
 
-int bkeep_shim_heap (void)
-{
-    lock(shim_heap_lock);
-    
-    for (int i = 0 ; i < MAX_SHIM_HEAP_AREAS ; i++)
-        if (shim_heap_areas[i].start) {
-            /* Add a VMA for the active region */
-            bkeep_mmap(shim_heap_areas[i].start,
-                       shim_heap_areas[i].current - shim_heap_areas[i].start,
-                       PROT_READ|PROT_WRITE,
-                       MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL, NULL, 0, NULL);
-            /* Go ahead and free the reserved region */
-            if (shim_heap_areas[i].current < shim_heap_areas[i].end) {
-                DkVirtualMemoryFree(shim_heap_areas[i].current,
-                                    ALIGN_UP(((long unsigned int) shim_heap_areas[i].end) - ((long unsigned int) shim_heap_areas[i].current)));
-                shim_heap_areas[i].end = shim_heap_areas[i].current;
-            }
-        }
-    vmas_initialized = 1;
-    
-    unlock(shim_heap_lock);
-    return 0;
+    if (bkeep_munmap(addr, ALIGN_UP(size), VMA_INTERNAL) < 0)
+        bug();
 }
 }
 
 
 int init_slab (void)
 int init_slab (void)
@@ -336,6 +177,16 @@ void * malloc (size_t size)
     void * mem = slab_alloc(slab_mgr, size);
     void * mem = slab_alloc(slab_mgr, size);
 #endif
 #endif
 
 
+    if (!mem) {
+        /*
+         * Normally, the library OS should not run out of memory.
+         * If malloc() failed internally, we cannot handle the
+         * condition and must terminate the current process.
+         */
+        sys_printf("******** Out-of-memory in library OS ********\n");
+        __abort();
+    }
+
 #ifdef SLAB_DEBUG_PRINT
 #ifdef SLAB_DEBUG_PRINT
     debug("malloc(%d) = %p (%s:%d)\n", size, mem, file, line);
     debug("malloc(%d) = %p (%s:%d)\n", size, mem, file, line);
 #endif
 #endif
@@ -358,11 +209,41 @@ void * calloc (size_t nmemb, size_t size)
 }
 }
 extern_alias(calloc);
 extern_alias(calloc);
 
 
+#if 0 /* Temporarily disabling this code */
+void * realloc(void * ptr, size_t new_size)
+{
+    /* TODO: We can't deal with this case right now */
+    assert(!MEMORY_MIGRATED(ptr));
+
+    size_t old_size = slab_get_buf_size(slab_mgr, ptr);
+
+    /*
+     * TODO: this realloc() implementation follows the GLIBC design, which
+     * will avoid reallocation when the buffer is large enough. Potentially
+     * this design can cause memory draining if user resizes an extremely
+     * large object to much smaller.
+     */
+    if (old_size >= new_size)
+        return ptr;
+
+    void * new_buf = malloc(new_size);
+    if (!new_buf)
+        return NULL;
+
+    memcpy(new_buf, ptr, old_size);
+    /* realloc() does not zero the rest of the object */
+    free(ptr);
+    return new_buf;
+}
+extern_alias(realloc);
+#endif
+
+// Copies data from `mem` to a newly allocated buffer of a specified size.
 #if defined(SLAB_DEBUG_PRINT) || defined(SLABD_DEBUG_TRACE)
 #if defined(SLAB_DEBUG_PRINT) || defined(SLABD_DEBUG_TRACE)
-void * __remalloc_debug (const void * mem, size_t size,
-                   const char * file, int line)
+void * __malloc_copy_debug (const void * mem, size_t size,
+                         const char * file, int line)
 #else
 #else
-void * remalloc (const void * mem, size_t size)
+void * malloc_copy (const void * mem, size_t size)
 #endif
 #endif
 {
 {
 #if defined(SLAB_DEBUG_PRINT) || defined(SLABD_DEBUG_TRACE)
 #if defined(SLAB_DEBUG_PRINT) || defined(SLABD_DEBUG_TRACE)
@@ -375,7 +256,7 @@ void * remalloc (const void * mem, size_t size)
     return buff;
     return buff;
 }
 }
 #if !defined(SLAB_DEBUG_PRINT) && !defined(SLABD_DEBUG_TRACE)
 #if !defined(SLAB_DEBUG_PRINT) && !defined(SLABD_DEBUG_TRACE)
-extern_alias(remalloc);
+extern_alias(malloc_copy);
 #endif
 #endif
 
 
 DEFINE_PROFILE_OCCURENCE(free_0, memory);
 DEFINE_PROFILE_OCCURENCE(free_0, memory);

+ 6 - 25
LibOS/shim/src/shim_random.c

@@ -42,41 +42,22 @@ int init_randgen (void)
     return 0;
     return 0;
 }
 }
 
 
-int getrand (void * buffer, size_t size)
+void getrand (void * buffer, size_t size)
 {
 {
-    unsigned long old_randval = randval;
-    int bytes = 0;
+    size_t bytes = 0;
     lock(randgen_lock);
     lock(randgen_lock);
 
 
-    while (bytes + sizeof(unsigned long) <= size) {
-        *(unsigned long *) (buffer + bytes) = randval;
-        bytes += sizeof(unsigned long);
+    while (bytes + sizeof(uint64_t) <= size) {
+        *(uint64_t *) (buffer + bytes) = randval;
+        bytes += sizeof(uint64_t);
         randval = hash64(randval);
         randval = hash64(randval);
     }
     }
 
 
     if (bytes < size) {
     if (bytes < size) {
-        switch (size - bytes) {
-            case 4:
-                *(uint32_t *) (buffer + bytes) = randval & 0xffffffff;
-                bytes += 4;
-                break;
-
-            case 2:
-                *(uint16_t *) (buffer + bytes) = randval & 0xffff;
-                bytes += 2;
-                break;
-
-            case 1:
-                *(uint8_t *) (buffer + bytes) = randval & 0xff;
-                bytes++;
-                break;
-
-            default: break;
-        }
+        memcpy(buffer + bytes, &randval, size - bytes);
         randval = hash64(randval);
         randval = hash64(randval);
     }
     }
 
 
     unlock(randgen_lock);
     unlock(randgen_lock);
-    return bytes;
 }
 }
 extern_alias(getrand);
 extern_alias(getrand);

+ 0 - 17
LibOS/shim/src/shim_syscalls.c

@@ -179,23 +179,6 @@ DEFINE_SHIM_SYSCALL (munmap, 2, shim_do_munmap, int, void *, addr, size_t, len)
 
 
 DEFINE_SHIM_SYSCALL (brk, 1, shim_do_brk, void *, void *, brk)
 DEFINE_SHIM_SYSCALL (brk, 1, shim_do_brk, void *, void *, brk)
 
 
-#if 0 /* implemented */
-void * shim_do_brk (void * brk)
-{
-    brk = NULL; /* fix the warning */
-
-    /* lets return 0 ;
-     * libc falls back to mmap options if brk fails
-
-     * Following are comments from libc / malloc.c
-     *
-     *    If you'd like mmap to ALWAYS be, used, you can define MORECORE to be
-     *       a function that always returns MORECORE_FAILURE.
-     */
-    return (void *) -ENOMEM;
-}
-#endif
-
 /* rt_sigaction: sys/shim_sigaction.c */
 /* rt_sigaction: sys/shim_sigaction.c */
 DEFINE_SHIM_SYSCALL (rt_sigaction, 4, shim_do_sigaction, int, int, signum,
 DEFINE_SHIM_SYSCALL (rt_sigaction, 4, shim_do_sigaction, int, int, signum,
                      const struct __kernel_sigaction *, act,
                      const struct __kernel_sigaction *, act,

+ 62 - 31
LibOS/shim/src/sys/shim_brk.c

@@ -73,6 +73,8 @@ int init_brk_region (void * brk_region)
             brk_max_size = DEFAULT_BRK_MAX_SIZE;
             brk_max_size = DEFAULT_BRK_MAX_SIZE;
     }
     }
 
 
+    int flags = MAP_PRIVATE|MAP_ANONYMOUS;
+
     /*
     /*
      * Chia-Che 8/24/2017
      * Chia-Che 8/24/2017
      * Adding an argument to specify the initial starting
      * Adding an argument to specify the initial starting
@@ -87,30 +89,42 @@ int init_brk_region (void * brk_region)
             rand %= 0x2000000;
             rand %= 0x2000000;
             rand = ALIGN_UP(rand);
             rand = ALIGN_UP(rand);
 
 
-            struct shim_vma * vma;
+            struct shim_vma_val vma;
             if (lookup_overlap_vma(brk_region + rand, brk_max_size, &vma)
             if (lookup_overlap_vma(brk_region + rand, brk_max_size, &vma)
                 == -ENOENT) {
                 == -ENOENT) {
                 brk_region += rand;
                 brk_region += rand;
                 break;
                 break;
             }
             }
 
 
-            brk_region = vma->addr + vma->length;
-            put_vma(vma);
+            brk_region = vma.addr + vma.length;
         }
         }
+
+        /*
+         * Create the bookkeeping before allocating the brk region.
+         * The bookkeeping should never fail because we've already confirmed
+         * the availability.
+         */
+        if (bkeep_mmap(brk_region, brk_max_size, PROT_READ|PROT_WRITE,
+                       flags|VMA_UNMAPPED, NULL, 0, "brk") < 0)
+            bug();
     } else {
     } else {
-        brk_region = get_unmapped_vma(brk_max_size,
-                                      MAP_PRIVATE|MAP_ANONYMOUS);
+        brk_region = bkeep_unmapped_heap(brk_max_size, PROT_READ|PROT_WRITE,
+                                         flags|VMA_UNMAPPED, NULL, 0, "brk");
         if (!brk_region)
         if (!brk_region)
             return -ENOMEM;
             return -ENOMEM;
     }
     }
 
 
     void * end_brk_region = NULL;
     void * end_brk_region = NULL;
 
 
-    // brk region assigned
-    brk_region = (void *) DkVirtualMemoryAlloc(brk_region, brk_max_size, 0,
-                                    PAL_PROT_READ|PAL_PROT_WRITE);
-    if (!brk_region)
+    /* Allocate the whole brk region */
+    void * ret = (void *) DkVirtualMemoryAlloc(brk_region, brk_max_size, 0,
+                                               PAL_PROT_READ|PAL_PROT_WRITE);
+
+    /* Checking if the PAL call succeeds. */
+    if (!ret) {
+        bkeep_munmap(brk_region, brk_max_size, flags);
         return -ENOMEM;
         return -ENOMEM;
+    }
 
 
     ADD_PROFILE_OCCURENCE(brk, brk_max_size);
     ADD_PROFILE_OCCURENCE(brk, brk_max_size);
     INC_PROFILE_OCCURENCE(brk_count);
     INC_PROFILE_OCCURENCE(brk_count);
@@ -125,12 +139,14 @@ int init_brk_region (void * brk_region)
     debug("brk reserved area: %p - %p\n", end_brk_region,
     debug("brk reserved area: %p - %p\n", end_brk_region,
           brk_region + brk_max_size);
           brk_region + brk_max_size);
 
 
-    bkeep_mmap(brk_region, BRK_SIZE, PROT_READ|PROT_WRITE,
-               MAP_ANONYMOUS|MAP_PRIVATE, NULL, 0, "[heap]");
-    bkeep_mmap(end_brk_region, brk_max_size - BRK_SIZE,
-               PROT_READ|PROT_WRITE,
-               MAP_ANONYMOUS|MAP_PRIVATE|VMA_UNMAPPED,
-               NULL, 0, NULL);
+    /*
+     * Create another bookkeeping for the current brk region. The remaining
+     * space will be marked as unmapped so that the library OS can reuse the
+     * space for other purpose.
+     */
+    if (bkeep_mmap(brk_region, BRK_SIZE, PROT_READ|PROT_WRITE, flags,
+                   NULL, 0, "brk") < 0)
+        bug();
 
 
     return 0;
     return 0;
 }
 }
@@ -161,7 +177,12 @@ int reset_brk (void)
 void * shim_do_brk (void * brk)
 void * shim_do_brk (void * brk)
 {
 {
     master_lock();
     master_lock();
-    init_brk_region(NULL);
+
+    if (init_brk_region(NULL) < 0) {
+        debug("Failed to initialize brk!\n");
+        brk = NULL;
+        goto out;
+    }
 
 
     if (!brk) {
     if (!brk) {
 unchanged:
 unchanged:
@@ -186,7 +207,7 @@ unchanged:
 
 
         bkeep_mmap(region.brk_start, brk_end - region.brk_start,
         bkeep_mmap(region.brk_start, brk_end - region.brk_start,
                    PROT_READ|PROT_WRITE,
                    PROT_READ|PROT_WRITE,
-                   MAP_ANONYMOUS|MAP_PRIVATE, NULL, 0, "heap");
+                   MAP_ANONYMOUS|MAP_PRIVATE, NULL, 0, "brk");
 
 
         region.brk_current = brk;
         region.brk_current = brk;
         region.brk_end = brk_end;
         region.brk_end = brk_end;
@@ -220,25 +241,35 @@ BEGIN_RS_FUNC(brk)
 
 
     debug("brk area: %p - %p\n", region.brk_start, region.brk_end);
     debug("brk area: %p - %p\n", region.brk_start, region.brk_end);
 
 
-    unsigned long brk_size = region.brk_end - region.brk_start;
+    size_t brk_size = region.brk_end - region.brk_start;
 
 
     if (brk_size < brk_max_size) {
     if (brk_size < brk_max_size) {
-        void * brk_region = (void *) DkVirtualMemoryAlloc(region.brk_end,
-                                            brk_max_size - brk_size, 0,
-                                            PAL_PROT_READ|PAL_PROT_WRITE);
-        if (brk_region != region.brk_end)
-            return -EACCES;
+        void * alloc_addr = region.brk_end;
+        size_t alloc_size = brk_max_size - brk_size;
+        struct shim_vma_val vma;
+
+        if (!lookup_overlap_vma(alloc_addr, alloc_size, &vma)) {
+            /* if memory are already allocated here, adjust brk_max_size */
+            alloc_size = vma.addr - alloc_addr;
+            brk_max_size = brk_size + alloc_size;
+        }
 
 
-        ADD_PROFILE_OCCURENCE(brk, brk_max_size - brk_size);
-        INC_PROFILE_OCCURENCE(brk_migrate_count);
+        int ret = bkeep_mmap(alloc_addr, alloc_size,
+                             PROT_READ|PROT_WRITE,
+                             MAP_ANONYMOUS|MAP_PRIVATE|VMA_UNMAPPED,
+                             NULL, 0, "brk");
+        if (ret < 0)
+            return ret;
 
 
-        debug("brk reserved area: %p - %p\n", region.brk_end,
-              region.brk_start + brk_max_size);
+        void * ptr = DkVirtualMemoryAlloc(alloc_addr, alloc_size, 0,
+                                          PAL_PROT_READ|PAL_PROT_WRITE);
 
 
-        bkeep_mmap(region.brk_end, brk_max_size - brk_size,
-                   PROT_READ|PROT_WRITE,
-                   MAP_ANONYMOUS|MAP_PRIVATE|VMA_UNMAPPED, NULL, 0,
-                   NULL);
+        assert(ptr == alloc_addr);
+        ADD_PROFILE_OCCURENCE(brk, alloc_size);
+        INC_PROFILE_OCCURENCE(brk_migrate_count);
+
+        debug("brk reserved area: %p - %p\n", alloc_addr,
+              alloc_addr + alloc_size);
     }
     }
 
 
     DEBUG_RS("current=%p,region=%p-%p", region.brk_current, region.brk_start,
     DEBUG_RS("current=%p,region=%p-%p", region.brk_current, region.brk_start,

+ 8 - 12
LibOS/shim/src/sys/shim_clone.c

@@ -122,13 +122,10 @@ int clone_implementation_wrapper(struct clone_args * arg)
     void * stack = pcargs->stack;
     void * stack = pcargs->stack;
     void * return_pc = pcargs->return_pc;
     void * return_pc = pcargs->return_pc;
 
 
-    struct shim_vma * vma = NULL;
-    lookup_supervma(ALIGN_DOWN(stack), allocsize, &vma);
-    assert(vma);
-    my_thread->stack_top = vma->addr + vma->length;
-    my_thread->stack_red = my_thread->stack = vma->addr;
-    snprintf(vma->comment, VMA_COMMENT_LEN, "stack:%d", my_thread->tid);
-    put_vma(vma);
+    struct shim_vma_val vma;
+    lookup_vma(ALIGN_DOWN(stack), &vma);
+    my_thread->stack_top = vma.addr + vma.length;
+    my_thread->stack_red = my_thread->stack = vma.addr;
 
 
     /* Don't signal the initialize event until we are actually init-ed */ 
     /* Don't signal the initialize event until we are actually init-ed */ 
     DkEventSet(pcargs->initialize_event);
     DkEventSet(pcargs->initialize_event);
@@ -255,11 +252,10 @@ int shim_do_clone (int flags, void * user_stack_addr, int * parent_tidptr,
         }
         }
 
 
         if (user_stack_addr) {
         if (user_stack_addr) {
-            struct shim_vma * vma = NULL;
-            lookup_supervma(ALIGN_DOWN(user_stack_addr), allocsize, &vma);
-            assert(vma);
-            thread->stack_top = vma->addr + vma->length;
-            thread->stack_red = thread->stack = vma->addr;
+            struct shim_vma_val vma;
+            lookup_vma(ALIGN_DOWN(user_stack_addr), &vma);
+            thread->stack_top = vma.addr + vma.length;
+            thread->stack_red = thread->stack = vma.addr;
             tcb->shim_tcb.context.sp = user_stack_addr;
             tcb->shim_tcb.context.sp = user_stack_addr;
             tcb->shim_tcb.context.ret_ip = *(void **) user_stack_addr;
             tcb->shim_tcb.context.ret_ip = *(void **) user_stack_addr;
         }
         }

+ 48 - 3
LibOS/shim/src/sys/shim_exec.c

@@ -121,15 +121,60 @@ int shim_do_execve_rtld (struct shim_handle * hdl, const char ** argv,
 
 
     DkVirtualMemoryFree(old_stack, old_stack_top - old_stack);
     DkVirtualMemoryFree(old_stack, old_stack_top - old_stack);
     DkVirtualMemoryFree(old_stack_red, old_stack - old_stack_red);
     DkVirtualMemoryFree(old_stack_red, old_stack - old_stack_red);
-    bkeep_munmap(old_stack, old_stack_top - old_stack, /*flags=*/0);
-    bkeep_munmap(old_stack_red, old_stack - old_stack_red, /*flags=*/0);
+
+    if (bkeep_munmap(old_stack, old_stack_top - old_stack, 0) < 0 ||
+        bkeep_munmap(old_stack_red, old_stack - old_stack_red, 0) < 0)
+        bug();
 
 
     remove_loaded_libraries();
     remove_loaded_libraries();
     clean_link_map_list();
     clean_link_map_list();
     SAVE_PROFILE_INTERVAL(unmap_loaded_binaries_for_exec);
     SAVE_PROFILE_INTERVAL(unmap_loaded_binaries_for_exec);
 
 
     reset_brk();
     reset_brk();
-    unmap_all_vmas();
+
+    size_t count = DEFAULT_VMA_COUNT;
+    struct shim_vma_val * vmas = malloc(sizeof(struct shim_vma_val) * count);
+
+    if (!vmas)
+        return -ENOMEM;
+
+retry_dump_vmas:
+    ret = dump_all_vmas(vmas, count);
+
+    if (ret == -EOVERFLOW) {
+        struct shim_vma_val * new_vmas
+                = malloc(sizeof(struct shim_vma_val) * count * 2);
+        if (!new_vmas) {
+            free(vmas);
+            return -ENOMEM;
+        }
+        free(vmas);
+        vmas = new_vmas;
+        count *= 2;
+        goto retry_dump_vmas;
+    }
+
+    if (ret < 0) {
+        free(vmas);
+        return ret;
+    }
+
+    count = ret;
+    for (struct shim_vma_val * vma = vmas ; vma < vmas + count ; vma++) {
+        /* Don't free the current stack */
+        if (vma->addr == cur_thread->stack)
+            continue;
+
+        /* Free all the mapped VMAs */
+        if (!(vma->flags & VMA_UNMAPPED))
+            DkVirtualMemoryFree(vma->addr, vma->length);
+
+        /* Remove the VMAs */
+        bkeep_munmap(vma->addr, vma->length, vma->flags);
+    }
+
+    free_vma_val_array(vmas, count);
+
     SAVE_PROFILE_INTERVAL(unmap_all_vmas_for_exec);
     SAVE_PROFILE_INTERVAL(unmap_all_vmas_for_exec);
 
 
     if ((ret = load_elf_object(cur_thread->exec, NULL, 0)) < 0)
     if ((ret = load_elf_object(cur_thread->exec, NULL, 0)) < 0)

+ 1 - 1
LibOS/shim/src/sys/shim_migrate.c

@@ -209,7 +209,7 @@ int join_checkpoint (struct shim_thread * thread, ucontext_t * context,
     return ret;
     return ret;
 }
 }
 
 
-static void * file_alloc (struct shim_cp_store * store, void * addr, int size)
+static void * file_alloc (struct shim_cp_store * store, void * addr, size_t size)
 {
 {
     assert(store->cp_file);
     assert(store->cp_file);
     struct shim_mount * fs = store->cp_file->fs;
     struct shim_mount * fs = store->cp_file->fs;

+ 95 - 74
LibOS/shim/src/sys/shim_mmap.c

@@ -42,22 +42,34 @@ void * shim_do_mmap (void * addr, size_t length, int prot, int flags, int fd,
                      off_t offset)
                      off_t offset)
 {
 {
     struct shim_handle * hdl = NULL;
     struct shim_handle * hdl = NULL;
-    long ret = -ENOMEM;
-    bool reserved = false;
+    long ret = 0;
 
 
-    if (addr + length < addr) {
+    /*
+     * According to the manpage, both addr and offset have to be page-aligned,
+     * but not the length. mmap() will automatically round up the length.
+     */
+    if (addr && !ALIGNED(addr))
         return (void *) -EINVAL;
         return (void *) -EINVAL;
-    }
 
 
-    assert(!(flags & (VMA_UNMAPPED|VMA_TAINTED)));
+    if (fd >= 0 && !ALIGNED(offset))
+        return (void *) -EINVAL;
+
+    if (!ALIGNED(length))
+        length = ALIGN_UP(length);
+
+    if (addr + length < addr)
+        return (void *) -EINVAL;
+
+    /* ignore MAP_32BIT when MAP_FIXED is set */
+    if ((flags & (MAP_32BIT|MAP_FIXED)) == (MAP_32BIT|MAP_FIXED))
+        flags &= ~MAP_32BIT;
 
 
-    if (flags & MAP_32BIT)
-        return (void *) -ENOSYS;
+    assert(!(flags & (VMA_UNMAPPED|VMA_TAINTED)));
 
 
     int pal_alloc_type = 0;
     int pal_alloc_type = 0;
 
 
     if ((flags & MAP_FIXED) || addr) {
     if ((flags & MAP_FIXED) || addr) {
-        struct shim_vma * tmp = NULL;
+        struct shim_vma_val tmp;
 
 
         if (!lookup_overlap_vma(addr, length, &tmp)) {
         if (!lookup_overlap_vma(addr, length, &tmp)) {
             debug("mmap: allowing overlapping MAP_FIXED allocation at %p with length %lu\n",
             debug("mmap: allowing overlapping MAP_FIXED allocation at %p with length %lu\n",
@@ -68,106 +80,115 @@ void * shim_do_mmap (void * addr, size_t length, int prot, int flags, int fd,
         }
         }
     }
     }
 
 
-    if (!addr) {
-        addr = get_unmapped_vma(ALIGN_UP(length), flags);
+    if ((flags & (MAP_ANONYMOUS|MAP_FILE)) == MAP_FILE) {
+        if (fd < 0)
+            return (void *) -EINVAL;
+
+        hdl = get_fd_handle(fd, NULL, NULL);
+        if (!hdl)
+            return (void *) -EBADF;
 
 
-        if (addr) {
-            reserved = true;
-            // Approximate check only, to help root out bugs.
-            void * cur_stack = current_stack();
-            assert(cur_stack < addr || cur_stack > addr + length);
+        if (!hdl->fs || !hdl->fs->fs_ops || !hdl->fs->fs_ops->mmap) {
+            put_handle(hdl);
+            return (void *) -ENODEV;
         }
         }
     }
     }
 
 
-    void * mapped = ALIGN_DOWN((void *) addr);
-    void * mapped_end = ALIGN_UP((void *) addr + length);
+    if (addr) {
+        bkeep_mmap(addr, length, prot, flags, hdl, offset, NULL);
+    } else {
+        addr = bkeep_unmapped_heap(length, prot, flags, hdl, offset, NULL);
+        /*
+         * Let the library OS manages the address space. If we can't find
+         * proper space to allocate the memory, simply return failure.
+         */
+        if (!addr)
+            return (void *) -ENOMEM;
+    }
 
 
-    addr = mapped;
-    length = mapped_end - mapped;
+    // Approximate check only, to help root out bugs.
+    void * cur_stack = current_stack();
+    assert(cur_stack < addr || cur_stack > addr + length);
 
 
-    if (flags & MAP_ANONYMOUS) {
+    if (!hdl) {
         addr = (void *) DkVirtualMemoryAlloc(addr, length, pal_alloc_type,
         addr = (void *) DkVirtualMemoryAlloc(addr, length, pal_alloc_type,
                                              PAL_PROT(prot, 0));
                                              PAL_PROT(prot, 0));
 
 
         if (!addr) {
         if (!addr) {
-            ret = (PAL_NATIVE_ERRNO == PAL_ERROR_DENIED) ? -EPERM : -PAL_ERRNO;
-            goto free_reserved;
+            if (PAL_NATIVE_ERRNO == PAL_ERROR_DENIED)
+                ret = -EPERM;
+            else
+                ret = -PAL_ERRNO;
         }
         }
-
-        ADD_PROFILE_OCCURENCE(mmap, length);
     } else {
     } else {
-        if (fd < 0) {
-            ret = -EINVAL;
-            goto free_reserved;
-        }
-
-        hdl = get_fd_handle(fd, NULL, NULL);
-        if (!hdl) {
-            ret = -EBADF;
-            goto free_reserved;
-        }
-
-        if (!hdl->fs || !hdl->fs->fs_ops || !hdl->fs->fs_ops->mmap) {
-            put_handle(hdl);
-            ret = -ENODEV;
-            goto free_reserved;
-        }
-
-        if ((ret = hdl->fs->fs_ops->mmap(hdl, &addr, length, PAL_PROT(prot, flags),
-                                         flags, offset)) < 0) {
-            put_handle(hdl);
-            goto free_reserved;
-        }
+        ret = hdl->fs->fs_ops->mmap(hdl, &addr, length, PAL_PROT(prot, flags),
+                                    flags, offset);
     }
     }
 
 
-    if (addr != mapped) {
-        mapped = ALIGN_DOWN((void *) addr);
-        mapped_end = ALIGN_UP((void *) addr + length);
-    }
-
-    ret = bkeep_mmap((void *) mapped, mapped_end - mapped, prot,
-                     flags, hdl, offset, NULL);
-    assert(!ret);
     if (hdl)
     if (hdl)
         put_handle(hdl);
         put_handle(hdl);
-    return addr;
 
 
-free_reserved:
-    if (reserved)
-        bkeep_munmap((void *) mapped, mapped_end - mapped, flags);
-    return (void *) ret;
+    if (ret < 0) {
+        bkeep_munmap(addr, length, flags);
+        return (void *) ret;
+    }
+
+    ADD_PROFILE_OCCURENCE(mmap, length);
+    return addr;
 }
 }
 
 
-int shim_do_mprotect (void * addr, size_t len, int prot)
+int shim_do_mprotect (void * addr, size_t length, int prot)
 {
 {
-    uintptr_t mapped = ALIGN_DOWN((uintptr_t) addr);
-    uintptr_t mapped_end = ALIGN_UP((uintptr_t) addr + len);
-    if (bkeep_mprotect((void *) mapped, mapped_end - mapped, prot, /*flags=*/0) < 0)
-        return -EACCES;
+    /*
+     * According to the manpage, addr has to be page-aligned, but not the
+     * length. mprotect() will automatically round up the length.
+     */
+    if (!addr || !ALIGNED(addr))
+        return -EINVAL;
+
+    if (!ALIGNED(length))
+        length = ALIGN_UP(length);
+
+    if (bkeep_mprotect(addr, length, prot, 0) < 0)
+        return -EPERM;
 
 
-    if (!DkVirtualMemoryProtect((void *) mapped, mapped_end - mapped, prot))
+    if (!DkVirtualMemoryProtect(addr, length, prot))
         return -PAL_ERRNO;
         return -PAL_ERRNO;
 
 
     return 0;
     return 0;
 }
 }
 
 
-int shim_do_munmap (void * addr, size_t len)
+int shim_do_munmap (void * addr, size_t length)
 {
 {
-    struct shim_vma * tmp = NULL;
+    /*
+     * According to the manpage, addr has to be page-aligned, but not the
+     * length. munmap() will automatically round up the length.
+     */
+    if (!addr || !ALIGNED(addr))
+        return -EINVAL;
 
 
-    if (lookup_overlap_vma(addr, len, &tmp) < 0) {
+    if (!ALIGNED(length))
+        length = ALIGN_UP(length);
+
+    struct shim_vma_val vma;
+
+    if (lookup_overlap_vma(addr, length, &vma) < 0) {
         debug("can't find addr %p - %p in map, quit unmapping\n",
         debug("can't find addr %p - %p in map, quit unmapping\n",
-              addr, addr + len);
+              addr, addr + length);
 
 
         /* Really not an error */
         /* Really not an error */
         return -EFAULT;
         return -EFAULT;
     }
     }
 
 
-    uintptr_t mapped = ALIGN_DOWN((uintptr_t) addr);
-    uintptr_t mapped_end = ALIGN_UP((uintptr_t) addr + len);
-    if (bkeep_munmap((void *) mapped, mapped_end - mapped, /*flags=*/0) < 0)
-        return -EACCES;
+    /* Protect first to make sure no overlapping with internal
+     * mappings */
+    if (bkeep_mprotect(addr, length, PROT_NONE, 0) < 0)
+        return -EPERM;
+
+    DkVirtualMemoryFree(addr, length);
+
+    if (bkeep_munmap(addr, length, 0) < 0)
+        bug();
 
 
-    DkVirtualMemoryFree((void *) mapped, mapped_end - mapped);
     return 0;
     return 0;
 }
 }

+ 8 - 5
LibOS/shim/src/sys/shim_open.c

@@ -318,13 +318,13 @@ size_t shim_do_getdents (int fd, struct linux_dirent * buf, size_t count)
 
 
     if (hdl->type != TYPE_DIR) {
     if (hdl->type != TYPE_DIR) {
         ret = -ENOTDIR;
         ret = -ENOTDIR;
-        goto out;
+        goto out_no_unlock;
     }
     }
 
 
     /* DEP 3/3/17: Properly handle an unlinked directory */
     /* DEP 3/3/17: Properly handle an unlinked directory */
     if (hdl->dentry->state & DENTRY_NEGATIVE) {
     if (hdl->dentry->state & DENTRY_NEGATIVE) {
         ret = -ENOENT;
         ret = -ENOENT;
-        goto out;
+        goto out_no_unlock;
     }
     }
     
     
     /* we are grabbing the lock because the handle content is actually
     /* we are grabbing the lock because the handle content is actually
@@ -339,7 +339,8 @@ size_t shim_do_getdents (int fd, struct linux_dirent * buf, size_t count)
     /* If we haven't listed the directory, do this first */
     /* If we haven't listed the directory, do this first */
     if (!(dent->state & DENTRY_LISTED)) {
     if (!(dent->state & DENTRY_LISTED)) {
         ret = list_directory_dentry(dent);
         ret = list_directory_dentry(dent);
-        if (ret) goto out;
+        if (ret < 0)
+            goto out;
     }
     }
 
 
 #define DIRENT_SIZE(len)  (sizeof(struct linux_dirent) +                \
 #define DIRENT_SIZE(len)  (sizeof(struct linux_dirent) +                \
@@ -382,7 +383,8 @@ size_t shim_do_getdents (int fd, struct linux_dirent * buf, size_t count)
 
 
     if (dirhdl->ptr == (void *) -1) {
     if (dirhdl->ptr == (void *) -1) {
         ret = list_directory_handle(dent, hdl);
         ret = list_directory_handle(dent, hdl);
-        if (ret) goto out;
+        if (ret < 0)
+            goto out;
     }
     }
     
     
     while (dirhdl->ptr && *dirhdl->ptr) {
     while (dirhdl->ptr && *dirhdl->ptr) {
@@ -404,8 +406,9 @@ done:
     if (bytes == 0 && (dirhdl->dot || dirhdl->dotdot || 
     if (bytes == 0 && (dirhdl->dot || dirhdl->dotdot || 
                        (dirhdl->ptr && *dirhdl->ptr)))
                        (dirhdl->ptr && *dirhdl->ptr)))
         ret = -EINVAL;
         ret = -EINVAL;
-    unlock(hdl->lock);
 out:
 out:
+    unlock(hdl->lock);
+out_no_unlock:
     put_handle(hdl);
     put_handle(hdl);
     return ret;
     return ret;
 }
 }

+ 1 - 1
LibOS/shim/src/utils/strobjs.c

@@ -55,7 +55,7 @@ struct shim_str * get_str_obj (void)
 int free_str_obj (struct shim_str * str)
 int free_str_obj (struct shim_str * str)
 {
 {
     if (str == NULL)
     if (str == NULL)
-        return -ENOMEM;
+        return 0;
 
 
     if (MEMORY_MIGRATED(str)) {
     if (MEMORY_MIGRATED(str)) {
         memset(str, 0, sizeof(struct shim_str));
         memset(str, 0, sizeof(struct shim_str));

+ 5 - 0
LibOS/shim/test/apps/lmbench/Makefile.lmbench

@@ -13,5 +13,10 @@ target = $(manifests)
 level = ../../../../../
 level = ../../../../../
 include ../../../../../Makefile
 include ../../../../../Makefile
 
 
+sh.manifest.sgx: /tmp/hello
+
+/tmp/hello: hello
+	cp -f $< $@
+
 $(addsuffix .template,$(manifests)): %: ../../../%
 $(addsuffix .template,$(manifests)): %: ../../../%
 	ln -sf $< $@
 	ln -sf $< $@

+ 4 - 0
LibOS/shim/test/apps/lmbench/hello.manifest.template

@@ -7,6 +7,10 @@ fs.mount.lib.type = chroot
 fs.mount.lib.path = /lib
 fs.mount.lib.path = /lib
 fs.mount.lib.uri = file:$(LIBCDIR)
 fs.mount.lib.uri = file:$(LIBCDIR)
 
 
+fs.mount.tmp.type = chroot
+fs.mount.tmp.path = /tmp
+fs.mount.tmp.uri = file:
+
 sys.brk.size = 32M
 sys.brk.size = 32M
 sys.stack.size = 4M
 sys.stack.size = 4M
 
 

+ 7 - 9
LibOS/shim/test/apps/lmbench/sh.manifest.template

@@ -3,28 +3,26 @@ loader.exec = file:/bin/sh
 loader.env.LD_LIBRARY_PATH = /lib:/lib64
 loader.env.LD_LIBRARY_PATH = /lib:/lib64
 loader.debug_type = none
 loader.debug_type = none
 
 
-fs.mount.tmp1.type = chroot
-fs.mount.tmp1.path = /tmp
-fs.mount.tmp1.uri = file:/tmp
-
-fs.mount.tmp2.type = chroot
-fs.mount.tmp2.path = /var/tmp
-fs.mount.tmp2.uri = file:/var/tmp
+fs.mount.tmp.type = chroot
+fs.mount.tmp.path = /tmp
+fs.mount.tmp.uri = file:/tmp
 
 
 fs.mount.lib.type = chroot
 fs.mount.lib.type = chroot
 fs.mount.lib.path = /lib
 fs.mount.lib.path = /lib
 fs.mount.lib.uri = file:$(LIBCDIR)
 fs.mount.lib.uri = file:$(LIBCDIR)
 
 
-sgx.enclave_size = 128M
+glibc.heap_size = 16M
 sys.brk.size = 32M
 sys.brk.size = 32M
 sys.stack.size = 4M
 sys.stack.size = 4M
 
 
+sgx.enclave_size = 512M
+
 sgx.trusted_files.ld = file:$(LIBCDIR)/ld-linux-x86-64.so.2
 sgx.trusted_files.ld = file:$(LIBCDIR)/ld-linux-x86-64.so.2
 sgx.trusted_files.libc = file:$(LIBCDIR)/libc.so.6
 sgx.trusted_files.libc = file:$(LIBCDIR)/libc.so.6
 sgx.trusted_files.libdl = file:$(LIBCDIR)/libdl.so.2
 sgx.trusted_files.libdl = file:$(LIBCDIR)/libdl.so.2
 sgx.trusted_files.libm = file:$(LIBCDIR)/libm.so.6
 sgx.trusted_files.libm = file:$(LIBCDIR)/libm.so.6
 sgx.trusted_files.libpthread = file:$(LIBCDIR)/libpthread.so.0
 sgx.trusted_files.libpthread = file:$(LIBCDIR)/libpthread.so.0
 
 
-sgx.trusted_files.hello = file:hello
+sgx.trusted_files.hello = file:/tmp/hello
 
 
 sgx.trusted_children.hello = file:hello.sig
 sgx.trusted_children.hello = file:hello.sig

+ 1 - 1
LibOS/shim/test/apps/ltp/fetch.py

@@ -53,7 +53,7 @@ def finish(result):
                     continue
                     continue
 
 
                 # Drop this line so that we get consistent offsets
                 # Drop this line so that we get consistent offsets
-                if output == "WARNING: no physical memory support, process creation will be slow.\n":
+                if output == "WARNING: no physical memory support, process creation may be slow.\n":
                     continue
                     continue
 
 
                 if tokens[1].isdigit():
                 if tokens[1].isdigit():

+ 4 - 4
LibOS/shim/test/regression/Makefile

@@ -50,13 +50,13 @@ endif
 .PHONY: regression
 .PHONY: regression
 regression: $(target)
 regression: $(target)
 	@echo "\n\nBasic Bootstrapping:"
 	@echo "\n\nBasic Bootstrapping:"
-	@for f in $(wildcard 00_*.py); do env $(PYTHONENV) python $$f $(RUNTIME)/pal-$(PAL_HOST) || exit $?; done
+	@for f in $(wildcard 00_*.py); do env $(PYTHONENV) python $$f $(RUNTIME)/pal-$(PAL_HOST) || exit $$?; done
 	@echo "\n\nSyscall Support:"
 	@echo "\n\nSyscall Support:"
-	@for f in $(wildcard 30_*.py); do env $(PYTHONENV) python $$f $(RUNTIME)/pal-$(PAL_HOST) || exit $?; done
+	@for f in $(wildcard 30_*.py); do env $(PYTHONENV) python $$f $(RUNTIME)/pal-$(PAL_HOST) || exit $$?; done
 	@echo "\n\nSocket Support:"
 	@echo "\n\nSocket Support:"
-	@for f in $(wildcard 80_*.py); do env $(PYTHONENV) python $$f $(RUNTIME)/pal-$(PAL_HOST) || exit $?; done
+	@for f in $(wildcard 80_*.py); do env $(PYTHONENV) python $$f $(RUNTIME)/pal-$(PAL_HOST) || exit $$?; done
 	@echo "\n\nLarge File Support:"
 	@echo "\n\nLarge File Support:"
-	@for f in $(wildcard 90_*.py); do env $(PYTHONENV) python $$f $(RUNTIME)/pal-$(PAL_HOST) || exit $?; done
+	@for f in $(wildcard 90_*.py); do env $(PYTHONENV) python $$f $(RUNTIME)/pal-$(PAL_HOST) || exit $$?; done
 
 
 .PHONY: clean-tmp
 .PHONY: clean-tmp
 clean-tmp:
 clean-tmp:

+ 1 - 1
Pal/lib/api.h

@@ -135,7 +135,7 @@ int copy_config (struct config_store * store, struct config_store * new_store);
 int write_config (void * file, int (*write) (void *, void *, int),
 int write_config (void * file, int (*write) (void *, void *, int),
                   struct config_store * store);
                   struct config_store * store);
 ssize_t get_config (struct config_store * cfg, const char * key,
 ssize_t get_config (struct config_store * cfg, const char * key,
-                    char * val_buf, size_t size);
+                    char * val_buf, size_t buf_size);
 int get_config_entries (struct config_store * cfg, const char * key,
 int get_config_entries (struct config_store * cfg, const char * key,
                         char * key_buf, size_t key_bufsize);
                         char * key_buf, size_t key_bufsize);
 ssize_t get_config_entries_size (struct config_store * cfg, const char * key);
 ssize_t get_config_entries_size (struct config_store * cfg, const char * key);

+ 0 - 1
Pal/lib/assert.h

@@ -32,4 +32,3 @@ void __abort(void);
     })
     })
 
 
 #endif
 #endif
-

+ 2 - 2
Pal/lib/graphene/config.c

@@ -132,14 +132,14 @@ next:
 }
 }
 
 
 ssize_t get_config (struct config_store * store, const char * key,
 ssize_t get_config (struct config_store * store, const char * key,
-                    char * val_buf, size_t size)
+                    char * val_buf, size_t buf_size)
 {
 {
     struct config * e = __get_config(store, key);
     struct config * e = __get_config(store, key);
 
 
     if (!e || !e->val)
     if (!e || !e->val)
         return -PAL_ERROR_INVAL;
         return -PAL_ERROR_INVAL;
 
 
-    if (e->vlen >= size)
+    if (e->vlen >= buf_size)
         return -PAL_ERROR_TOOLONG;
         return -PAL_ERROR_TOOLONG;
 
 
     memcpy(val_buf, e->val, e->vlen);
     memcpy(val_buf, e->val, e->vlen);

+ 41 - 26
Pal/lib/list.h

@@ -119,6 +119,8 @@
 // There are a few places where knowing the listp for deletion is cumbersome;
 // There are a few places where knowing the listp for deletion is cumbersome;
 //    maybe drop this requirement?
 //    maybe drop this requirement?
 
 
+#include <stdbool.h>
+
 #ifdef DEBUG
 #ifdef DEBUG
 #include <assert.h>
 #include <assert.h>
 #define LIST_ASSERT(cond) assert(cond)
 #define LIST_ASSERT(cond) assert(cond)
@@ -232,7 +234,7 @@
         listp_del(NODE, HEAD, FIELD);           \
         listp_del(NODE, HEAD, FIELD);           \
         INIT_LIST_HEAD(NODE, FIELD);            \
         INIT_LIST_HEAD(NODE, FIELD);            \
     } while(0)
     } while(0)
-            
+
 /* Keep vestigial TYPE and FIELD parameters to minimize disruption
 /* Keep vestigial TYPE and FIELD parameters to minimize disruption
  * when switching from Linux list implementation */
  * when switching from Linux list implementation */
 #define listp_first_entry(LISTP, TYPE, FIELD) ((LISTP)->first)
 #define listp_first_entry(LISTP, TYPE, FIELD) ((LISTP)->first)
@@ -240,38 +242,51 @@
 /* New API: return last entry in list */
 /* New API: return last entry in list */
 #define listp_last_entry(LISTP, TYPE, FIELD) ((LISTP)->first->FIELD.prev)
 #define listp_last_entry(LISTP, TYPE, FIELD) ((LISTP)->first->FIELD.prev)
 
 
+/* New API: return next entry in list */
+#define listp_next_entry(NODE, LISTP, FIELD)                            \
+        ((NODE) == (LISTP)->first->FIELD.prev ? NULL : (NODE)->FIELD.next)
+
+/* New API: return previous entry in list */
+#define listp_prev_entry(NODE, LISTP, FIELD)                            \
+        ((NODE) == (LISTP)->first ? NULL : (NODE)->FIELD.prev)
+
 /* Vestigial - for compat with Linux list code; rename to listp?
 /* Vestigial - for compat with Linux list code; rename to listp?
  */
  */
 #define list_entry(LISTP, TYPE, FIELD) (LISTP)
 #define list_entry(LISTP, TYPE, FIELD) (LISTP)
 
 
 #define listp_for_each_entry(CURSOR, HEAD, FIELD)                       \
 #define listp_for_each_entry(CURSOR, HEAD, FIELD)                       \
-    for(int first_iter = ({ (CURSOR) = (HEAD)->first;                   \
-                (HEAD)->first ? 1 : 0; });                              \
-        first_iter || (CURSOR) != (HEAD)->first;                        \
-        (CURSOR) = (CURSOR)->FIELD.next, first_iter = 0)
-
-#define listp_for_each_entry_reverse(CURSOR, HEAD, FIELD)              \
-    for(int first_iter = ({(CURSOR) = ((HEAD)->first                   \
-                    ? (HEAD)->first->FIELD.prev :                      \
-                    (HEAD)->first); (HEAD)->first ? 1 : 0; });         \
-        first_iter || ((CURSOR) && (CURSOR)->FIELD.next != (HEAD)->first); \
-        (CURSOR) = (CURSOR)->FIELD.prev, first_iter = 0)
-
-#define listp_for_each_entry_safe(CURSOR, TMP, HEAD, FIELD)             \
-    for(int first_iter = ({(CURSOR) = (HEAD)->first;                    \
-                    (TMP) = ((CURSOR) ? (CURSOR)->FIELD.next : (CURSOR)); \
-                (HEAD)->first ? 1 : 0; });                              \
-        (first_iter || (CURSOR) != (HEAD)->first) && (HEAD)->first;     \
-        first_iter = (first_iter && (TMP) != (CURSOR) && (HEAD)->first == (TMP) ? \
-                      1: 0),                                            \
-            (CURSOR) = (TMP), (TMP) = (TMP)->FIELD.next)
+    for (bool first_iter = ((CURSOR) = (HEAD)->first,                   \
+                            !!(HEAD)->first);                           \
+         first_iter || (CURSOR) != (HEAD)->first;                       \
+         (CURSOR) = (CURSOR)->FIELD.next, first_iter = false)
+
+#define listp_for_each_entry_reverse(CURSOR, HEAD, FIELD)                   \
+    for (bool first_iter = ((CURSOR) = ((HEAD)->first                       \
+                                       ? (HEAD)->first->FIELD.prev          \
+                                       : (HEAD)->first),                    \
+                           !!(HEAD)->first);                                \
+         first_iter || ((CURSOR) && (CURSOR)->FIELD.next != (HEAD)->first); \
+         (CURSOR) = (CURSOR)->FIELD.prev, first_iter = false)
+
+#define listp_for_each_entry_safe(CURSOR, TMP, HEAD, FIELD)                 \
+    for (bool first_iter = ((CURSOR) = (HEAD)->first,                       \
+                            (TMP) = ((CURSOR)                               \
+                                     ? (CURSOR)->FIELD.next                 \
+                                     : (CURSOR)),                           \
+                            !!(HEAD)->first);                               \
+         (HEAD)->first && (first_iter || (CURSOR) != (HEAD)->first);        \
+         /* Handle the case where the first element was removed. */         \
+         first_iter = first_iter && (TMP) != (CURSOR) && (HEAD)->first == (TMP), \
+         (CURSOR) = (TMP),                                                  \
+         (TMP) = (TMP)->FIELD.next)
 
 
 /* Continue safe iteration with CURSOR->next */
 /* Continue safe iteration with CURSOR->next */
-#define listp_for_each_entry_safe_continue(CURSOR, TMP, HEAD, FIELD)    \
-    for((CURSOR) = (CURSOR)->FIELD.next,                                \
-        (TMP) = (CURSOR)->FIELD.next;                                   \
-        (CURSOR) != (HEAD)->first && (HEAD)->first;                     \
-        (CURSOR) = (TMP), (TMP) = (TMP)->FIELD.next)
+#define listp_for_each_entry_safe_continue(CURSOR, TMP, HEAD, FIELD)     \
+    for ((CURSOR) = (CURSOR)->FIELD.next,                                \
+         (TMP) = (CURSOR)->FIELD.next;                                   \
+         (CURSOR) != (HEAD)->first && (HEAD)->first;                     \
+         (CURSOR) = (TMP),                                               \
+         (TMP) = (TMP)->FIELD.next)
 
 
 /* Assertion code written in Graphene project */
 /* Assertion code written in Graphene project */
 #define check_list_head(TYPE, head, FIELD)                              \
 #define check_list_head(TYPE, head, FIELD)                              \

+ 35 - 12
Pal/lib/memmgr.h

@@ -20,7 +20,7 @@
 /*
 /*
  * memmgr.h
  * memmgr.h
  *
  *
- * This file contains implementation of fix-sized memory allocator.
+ * This file contains implementation of fixed-size memory allocator.
  */
  */
 
 
 #ifndef MEMMGR_H
 #ifndef MEMMGR_H
@@ -34,10 +34,10 @@
 #endif
 #endif
 
 
 #ifndef system_malloc
 #ifndef system_malloc
-#error "macro \"void * system_malloc(int size)\" not declared"
+#error "macro \"void * system_malloc (size_t size)\" not declared"
 #endif
 #endif
 #ifndef system_free
 #ifndef system_free
-#error "macro \"void * system_free(void * ptr, int size)\" not declared"
+#error "macro \"void * system_free (void * ptr, size_t size)\" not declared"
 #endif
 #endif
 #ifndef system_lock
 #ifndef system_lock
 #define system_lock() ({})
 #define system_lock() ({})
@@ -66,7 +66,9 @@ DEFINE_LISTP(mem_obj);
 typedef struct mem_mgr {
 typedef struct mem_mgr {
     LISTP_TYPE(mem_area) area_list;
     LISTP_TYPE(mem_area) area_list;
     LISTP_TYPE(mem_obj) free_list;
     LISTP_TYPE(mem_obj) free_list;
+    size_t size;
     MEM_OBJ_TYPE * obj, * obj_top;
     MEM_OBJ_TYPE * obj, * obj_top;
+    MEM_AREA active_area;
 } MEM_MGR_TYPE, * MEM_MGR;
 } MEM_MGR_TYPE, * MEM_MGR;
 
 
 #define __SUM_OBJ_SIZE(size) (sizeof(MEM_OBJ_TYPE) * (size))
 #define __SUM_OBJ_SIZE(size) (sizeof(MEM_OBJ_TYPE) * (size))
@@ -107,22 +109,25 @@ static inline int init_align_up (int size)
 }
 }
 #endif
 #endif
 
 
-static inline void __set_free_mem_area (MEM_AREA area, MEM_MGR mgr, int size)
+static inline void __set_free_mem_area (MEM_AREA area, MEM_MGR mgr)
 {
 {
+    mgr->size += area->size;
     mgr->obj = area->objs;
     mgr->obj = area->objs;
     mgr->obj_top = area->objs + area->size;
     mgr->obj_top = area->objs + area->size;
+    mgr->active_area = area;
 }
 }
 
 
 static inline MEM_MGR create_mem_mgr (unsigned int size)
 static inline MEM_MGR create_mem_mgr (unsigned int size)
 {
 {
-    unsigned long mem = (unsigned long) system_malloc(__MAX_MEM_SIZE(size));
+    void * mem = system_malloc(__MAX_MEM_SIZE(size));
     MEM_AREA area;
     MEM_AREA area;
     MEM_MGR mgr;
     MEM_MGR mgr;
 
 
-    if (mem <= 0)
+    if (!mem)
         return NULL;
         return NULL;
 
 
     mgr = (MEM_MGR) mem;
     mgr = (MEM_MGR) mem;
+    mgr->size = 0;
     area = (MEM_AREA) (mem + sizeof(MEM_MGR_TYPE));
     area = (MEM_AREA) (mem + sizeof(MEM_MGR_TYPE));
     area->size = size;
     area->size = size;
 
 
@@ -131,7 +136,7 @@ static inline MEM_MGR create_mem_mgr (unsigned int size)
     listp_add(area, &mgr->area_list, __list);
     listp_add(area, &mgr->area_list, __list);
 
 
     INIT_LISTP(&mgr->free_list);
     INIT_LISTP(&mgr->free_list);
-    __set_free_mem_area(area, mgr, size);
+    __set_free_mem_area(area, mgr);
 
 
     return mgr;
     return mgr;
 }
 }
@@ -142,14 +147,14 @@ static inline MEM_MGR enlarge_mem_mgr (MEM_MGR mgr, unsigned int size)
 
 
     area = (MEM_AREA) system_malloc(sizeof(MEM_AREA_TYPE) +
     area = (MEM_AREA) system_malloc(sizeof(MEM_AREA_TYPE) +
                                     __SUM_OBJ_SIZE(size));
                                     __SUM_OBJ_SIZE(size));
-    if (area <= 0)
+    if (!area)
         return NULL;
         return NULL;
 
 
     system_lock();
     system_lock();
     area->size = size;
     area->size = size;
     INIT_LIST_HEAD(area, __list);
     INIT_LIST_HEAD(area, __list);
     listp_add(area, &mgr->area_list, __list);
     listp_add(area, &mgr->area_list, __list);
-    __set_free_mem_area(area, mgr, size);
+    __set_free_mem_area(area, mgr);
     system_unlock();
     system_unlock();
     return mgr;
     return mgr;
 }
 }
@@ -161,13 +166,14 @@ static inline void destroy_mem_mgr (MEM_MGR mgr)
     first = tmp = listp_first_entry(&mgr->area_list, MEM_AREA_TYPE, __list);
     first = tmp = listp_first_entry(&mgr->area_list, MEM_AREA_TYPE, __list);
 
 
     if (!first)
     if (!first)
-        return;
+        goto free_mgr;
 
 
     listp_for_each_entry_safe_continue(tmp, n, &mgr->area_list, __list) {
     listp_for_each_entry_safe_continue(tmp, n, &mgr->area_list, __list) {
         listp_del(tmp, &mgr->area_list, __list);
         listp_del(tmp, &mgr->area_list, __list);
         system_free(tmp, sizeof(MEM_AREA_TYPE) + __SUM_OBJ_SIZE(tmp->size));
         system_free(tmp, sizeof(MEM_AREA_TYPE) + __SUM_OBJ_SIZE(tmp->size));
     }
     }
 
 
+free_mgr:
     system_free(mgr, __MAX_MEM_SIZE(first->size));
     system_free(mgr, __MAX_MEM_SIZE(first->size));
 }
 }
 
 
@@ -199,12 +205,23 @@ static inline OBJ_TYPE * get_mem_obj_from_mgr_enlarge (MEM_MGR mgr,
 
 
     system_lock();
     system_lock();
     if (mgr->obj == mgr->obj_top && listp_empty(&mgr->free_list)) {
     if (mgr->obj == mgr->obj_top && listp_empty(&mgr->free_list)) {
+        size_t mgr_size = mgr->size;
+        MEM_AREA area;
+
+        /* If there is a previously allocated area, just activate it. */
+        area = listp_prev_entry(mgr->active_area, &mgr->area_list, __list);
+        if (area) {
+            __set_free_mem_area(area, mgr);
+            goto alloc;
+        }
+
         system_unlock();
         system_unlock();
 
 
         if (!size)
         if (!size)
             return NULL;
             return NULL;
 
 
-        MEM_AREA area;
+        /* There can be concurrent attempt to try to enlarge the
+           allocator, but we prevent deadlocks or crashes. */
         area = (MEM_AREA) system_malloc(sizeof(MEM_AREA_TYPE) +
         area = (MEM_AREA) system_malloc(sizeof(MEM_AREA_TYPE) +
                                         __SUM_OBJ_SIZE(size));
                                         __SUM_OBJ_SIZE(size));
         if (!area)
         if (!area)
@@ -213,10 +230,16 @@ static inline OBJ_TYPE * get_mem_obj_from_mgr_enlarge (MEM_MGR mgr,
         system_lock();
         system_lock();
         area->size = size;
         area->size = size;
         INIT_LIST_HEAD(area, __list);
         INIT_LIST_HEAD(area, __list);
+
+        /* There can be concurrent operations to extend the manager. In case
+         * someone has already enlarged the space, we just add the new area to
+         * the list for later use. */
         listp_add(area, &mgr->area_list, __list);
         listp_add(area, &mgr->area_list, __list);
-        __set_free_mem_area(area, mgr, size);
+        if (mgr_size == mgr->size) /* check if the size has changed */
+            __set_free_mem_area(area, mgr);
     }
     }
 
 
+alloc:
     if (!listp_empty(&mgr->free_list)) {
     if (!listp_empty(&mgr->free_list)) {
         mobj = listp_first_entry(&mgr->free_list, MEM_OBJ_TYPE, __list);
         mobj = listp_first_entry(&mgr->free_list, MEM_OBJ_TYPE, __list);
         listp_del_init(mobj, &mgr->free_list, __list);
         listp_del_init(mobj, &mgr->free_list, __list);

+ 97 - 44
Pal/lib/slabmgr.h

@@ -29,8 +29,11 @@
 #include "list.h"
 #include "list.h"
 #include <pal_debug.h>
 #include <pal_debug.h>
 #include <assert.h>
 #include <assert.h>
+#include <errno.h>
 #include <sys/mman.h>
 #include <sys/mman.h>
 
 
+// Before calling any of `system_malloc` and `system_free` this library will
+// acquire `system_lock` (the systen_* implementation must not do it).
 #ifndef system_malloc
 #ifndef system_malloc
 #error "macro \"void * system_malloc(int size)\" not declared"
 #error "macro \"void * system_malloc(int size)\" not declared"
 #endif
 #endif
@@ -137,20 +140,23 @@ struct slab_debug {
 # endif
 # endif
 #endif
 #endif
 
 
-static int slab_levels[SLAB_LEVEL] = { SLAB_LEVEL_SIZES };
+// User buffer sizes on each level (not counting mandatory header
+// (SLAB_HDR_SIZE)).
+static const int slab_levels[SLAB_LEVEL] = { SLAB_LEVEL_SIZES };
 
 
 DEFINE_LISTP(slab_obj);
 DEFINE_LISTP(slab_obj);
 DEFINE_LISTP(slab_area);
 DEFINE_LISTP(slab_area);
 typedef struct slab_mgr {
 typedef struct slab_mgr {
     LISTP_TYPE(slab_area) area_list[SLAB_LEVEL];
     LISTP_TYPE(slab_area) area_list[SLAB_LEVEL];
     LISTP_TYPE(slab_obj) free_list[SLAB_LEVEL];
     LISTP_TYPE(slab_obj) free_list[SLAB_LEVEL];
-    unsigned int size[SLAB_LEVEL];
+    size_t size[SLAB_LEVEL];
     void * addr[SLAB_LEVEL], * addr_top[SLAB_LEVEL];
     void * addr[SLAB_LEVEL], * addr_top[SLAB_LEVEL];
+    SLAB_AREA active_area[SLAB_LEVEL];
 } SLAB_MGR_TYPE, * SLAB_MGR;
 } SLAB_MGR_TYPE, * SLAB_MGR;
 
 
 typedef struct __attribute__((packed)) large_mem_obj {
 typedef struct __attribute__((packed)) large_mem_obj {
     // offset 0
     // offset 0
-    unsigned long size;
+    unsigned long size;  // User buffer size (i.e. excluding control structures)
     unsigned char large_padding[LARGE_OBJ_PADDING];
     unsigned char large_padding[LARGE_OBJ_PADDING];
     // offset 16
     // offset 16
     unsigned char level;
     unsigned char level;
@@ -167,7 +173,7 @@ typedef struct __attribute__((packed)) large_mem_obj {
 #endif
 #endif
 
 
 #define RAW_TO_LEVEL(raw_ptr) \
 #define RAW_TO_LEVEL(raw_ptr) \
-            (*((unsigned char *) (raw_ptr) - OBJ_PADDING - 1))
+            (*((const unsigned char *) (raw_ptr) - OBJ_PADDING - 1))
 #define RAW_TO_OBJ(raw_ptr, type) container_of((raw_ptr), type, raw)
 #define RAW_TO_OBJ(raw_ptr, type) container_of((raw_ptr), type, raw)
 
 
 #define __SUM_OBJ_SIZE(slab_size, size) \
 #define __SUM_OBJ_SIZE(slab_size, size) \
@@ -228,22 +234,28 @@ static inline void __set_free_slab_area (SLAB_AREA area, SLAB_MGR mgr,
     mgr->addr[level] = (void *) area->raw;
     mgr->addr[level] = (void *) area->raw;
     mgr->addr_top[level] = (void *) area->raw + (area->size * slab_size);
     mgr->addr_top[level] = (void *) area->raw + (area->size * slab_size);
     mgr->size[level] += area->size;
     mgr->size[level] += area->size;
+    mgr->active_area[level] = area;
 }
 }
 
 
 static inline SLAB_MGR create_slab_mgr (void)
 static inline SLAB_MGR create_slab_mgr (void)
 {
 {
 #ifdef PAGE_SIZE
 #ifdef PAGE_SIZE
-    int size = init_size_align_up(STARTUP_SIZE);
+    size_t size = init_size_align_up(STARTUP_SIZE);
 #else
 #else
-    int size = STARTUP_SIZE;
+    size_t size = STARTUP_SIZE;
 #endif
 #endif
-    unsigned long mem;
+    void * mem = NULL;
     SLAB_AREA area;
     SLAB_AREA area;
     SLAB_MGR mgr;
     SLAB_MGR mgr;
 
 
-    mem = (unsigned long) system_malloc(__INIT_MAX_MEM_SIZE(size));
+    /* If the allocation failed, always try smaller sizes */
+    for (; size > 0; size >>= 1) {
+        mem = system_malloc(__INIT_MAX_MEM_SIZE(size));
+        if (mem)
+            break;
+    }
 
 
-    if (mem <= 0)
+    if (!mem)
         return NULL;
         return NULL;
 
 
     mgr = (SLAB_MGR) mem;
     mgr = (SLAB_MGR) mem;
@@ -252,7 +264,7 @@ static inline SLAB_MGR create_slab_mgr (void)
     int i;
     int i;
     for (i = 0 ; i < SLAB_LEVEL ; i++) {
     for (i = 0 ; i < SLAB_LEVEL ; i++) {
         area = (SLAB_AREA) addr;
         area = (SLAB_AREA) addr;
-        area->size = STARTUP_SIZE;
+        area->size = size;
 
 
         INIT_LIST_HEAD(area, __list);
         INIT_LIST_HEAD(area, __list);
         INIT_LISTP(&mgr->area_list[i]);
         INIT_LISTP(&mgr->area_list[i]);
@@ -288,39 +300,54 @@ static inline void destroy_slab_mgr (SLAB_MGR mgr)
     system_free(mgr, addr - (void *) mgr);
     system_free(mgr, addr - (void *) mgr);
 }
 }
 
 
-static inline SLAB_MGR enlarge_slab_mgr (SLAB_MGR mgr, int level)
+// system_lock needs to be held by the caller on entry.
+static inline int enlarge_slab_mgr (SLAB_MGR mgr, int level)
 {
 {
-    SLAB_AREA area;
-    int size;
-
-    /* DEP 11/24/17: I don't see how this case is possible.
-     * Either way, we should be consistent with whether to
-     * return with system_lock held or not.
-     * Commenting for now and replacing with an assert */
-    /*if (level >= SLAB_LEVEL) {
-        system_lock();
-        goto out;
-        }*/
     assert(level < SLAB_LEVEL);
     assert(level < SLAB_LEVEL);
-
     /* DEP 11/24/17: This strategy basically doubles a level's size 
     /* DEP 11/24/17: This strategy basically doubles a level's size 
      * every time it grows.  The assumption if we get this far is that
      * every time it grows.  The assumption if we get this far is that
      * mgr->addr == mgr->top_addr */
      * mgr->addr == mgr->top_addr */
-    assert (mgr->addr[level] == mgr->addr_top[level]);
-    size = mgr->size[level];
-    area = (SLAB_AREA) system_malloc(__MAX_MEM_SIZE(slab_levels[level], size));
-    if (area <= 0)
-        return NULL;
+    assert(mgr->addr[level] == mgr->addr_top[level]);
+
+    size_t size = mgr->size[level];
+    SLAB_AREA area;
+
+    /* If there is a previously allocated area, just activate it. */
+    area = listp_prev_entry(mgr->active_area[level], &mgr->area_list[level], __list);
+    if (area) {
+        __set_free_slab_area(area, mgr, level);
+        return 0;
+    }
+
+    /* system_malloc() may be blocking, so we release the lock before
+     * allocating more memory */
+    system_unlock();
+
+    /* If the allocation failed, always try smaller sizes */
+    for (; size > 0; size >>= 1) {
+        area = (SLAB_AREA) system_malloc(__MAX_MEM_SIZE(slab_levels[level], size));
+        if (area)
+            break;
+    }
+
+    if (!area) {
+        system_lock();
+        return -ENOMEM;
+    }
 
 
     system_lock();
     system_lock();
+
     area->size = size;
     area->size = size;
     INIT_LIST_HEAD(area, __list);
     INIT_LIST_HEAD(area, __list);
+
+    /* There can be concurrent operations to extend the SLAB manager. In case
+     * someone has already enlarged the space, we just add the new area to the
+     * list for later use. */
     listp_add(area, &mgr->area_list[level], __list);
     listp_add(area, &mgr->area_list[level], __list);
-    __set_free_slab_area(area, mgr, level);
-    system_unlock();
+    if (mgr->size[level] == size) /* check if the size has changed */
+        __set_free_slab_area(area, mgr, level);
 
 
-//out:
-    return mgr;
+    return 0;
 }
 }
 
 
 static inline void * slab_alloc (SLAB_MGR mgr, int size)
 static inline void * slab_alloc (SLAB_MGR mgr, int size)
@@ -350,10 +377,12 @@ static inline void * slab_alloc (SLAB_MGR mgr, int size)
     system_lock();
     system_lock();
     assert(mgr->addr[level] <= mgr->addr_top[level]);
     assert(mgr->addr[level] <= mgr->addr_top[level]);
     if (mgr->addr[level] == mgr->addr_top[level] &&
     if (mgr->addr[level] == mgr->addr_top[level] &&
-        listp_empty(&mgr->free_list[level])) {
-        system_unlock();
-        enlarge_slab_mgr(mgr, level);
-        system_lock();
+          listp_empty(&mgr->free_list[level])) {
+        int ret = enlarge_slab_mgr(mgr, level);
+        if (ret < 0) {
+            system_unlock();
+            return NULL;
+        }
     }
     }
 
 
     if (!listp_empty(&mgr->free_list[level])) {
     if (!listp_empty(&mgr->free_list[level])) {
@@ -402,14 +431,39 @@ static inline void * slab_alloc_debug (SLAB_MGR mgr, int size,
 }
 }
 #endif
 #endif
 
 
+// Returns user buffer size (i.e. excluding size of control structures).
+static inline size_t slab_get_buf_size(SLAB_MGR mgr, const void * ptr)
+{
+    assert(ptr);
+
+    unsigned char level = RAW_TO_LEVEL(ptr);
+
+    if (level == (unsigned char) -1) {
+        LARGE_MEM_OBJ mem = RAW_TO_OBJ(ptr, LARGE_MEM_OBJ_TYPE);
+        return mem->size;
+    }
+
+    if (level >= SLAB_LEVEL) {
+        pal_printf("Heap corruption detected: invalid heap level %u\n", level);
+        __abort();
+    }
+
+#ifdef SLAB_CANARY
+    const unsigned long * m = (const unsigned long *)(ptr + slab_levels[level]);
+    assert((*m) == SLAB_CANARY_STRING);
+#endif
+
+    return slab_levels[level];
+}
+
 static inline void slab_free (SLAB_MGR mgr, void * obj)
 static inline void slab_free (SLAB_MGR mgr, void * obj)
 {
 {
     /* In a general purpose allocator, free of NULL is allowed (and is a 
     /* In a general purpose allocator, free of NULL is allowed (and is a 
      * nop). We might want to enforce stricter rules for our allocator if
      * nop). We might want to enforce stricter rules for our allocator if
      * we're sure that no clients rely on being able to free NULL. */
      * we're sure that no clients rely on being able to free NULL. */
-    if (obj == NULL)
+    if (!obj)
         return;
         return;
-    
+
     unsigned char level = RAW_TO_LEVEL(obj);
     unsigned char level = RAW_TO_LEVEL(obj);
 
 
     if (level == (unsigned char) -1) {
     if (level == (unsigned char) -1) {
@@ -426,8 +480,8 @@ static inline void slab_free (SLAB_MGR mgr, void * obj)
      * more likely to be detected by adding a non-zero offset to the level,
      * more likely to be detected by adding a non-zero offset to the level,
      * so a level of 0 in the header would no longer be a valid level. */
      * so a level of 0 in the header would no longer be a valid level. */
     if (level >= SLAB_LEVEL) {
     if (level >= SLAB_LEVEL) {
-        pal_printf("Heap corruption detected: invalid heap level %ud\n", level);
-        assert(0); // panic
+        pal_printf("Heap corruption detected: invalid heap level %d\n", level);
+        __abort();
     }
     }
 
 
 #ifdef SLAB_CANARY
 #ifdef SLAB_CANARY
@@ -444,16 +498,15 @@ static inline void slab_free (SLAB_MGR mgr, void * obj)
 }
 }
 
 
 #ifdef SLAB_DEBUG
 #ifdef SLAB_DEBUG
-
 static inline void slab_free_debug (SLAB_MGR mgr, void * obj,
 static inline void slab_free_debug (SLAB_MGR mgr, void * obj,
                                     const char * file, int line)
                                     const char * file, int line)
 {
 {
-    if (obj == NULL)
+    if (!obj)
         return;
         return;
-    
+
     unsigned char level = RAW_TO_LEVEL(obj);
     unsigned char level = RAW_TO_LEVEL(obj);
 
 
-    if (level < SLAB_LEVEL) {
+    if (level < SLAB_LEVEL && level != (unsigned char) -1) {
         struct slab_debug * debug =
         struct slab_debug * debug =
                 (struct slab_debug *) (obj + slab_levels[level] +
                 (struct slab_debug *) (obj + slab_levels[level] +
                                        SLAB_CANARY_SIZE);
                                        SLAB_CANARY_SIZE);

+ 6 - 6
Pal/regression/Makefile

@@ -73,17 +73,17 @@ endif
 
 
 regression: $(call expand_target,$(target))
 regression: $(call expand_target,$(target))
 	@printf "\n\nBasic Bootstrapping:\n"
 	@printf "\n\nBasic Bootstrapping:\n"
-	@for f in $(wildcard 00_*.py); do env $(PYTHONENV) python $$f || exit $?; done
+	@for f in $(wildcard 00_*.py); do env $(PYTHONENV) python $$f || exit $$?; done
 	@printf "\n\nException Handling:\n"
 	@printf "\n\nException Handling:\n"
-	@for f in $(wildcard 01_*.py); do env $(PYTHONENV) python $$f || exit $?; done
+	@for f in $(wildcard 01_*.py); do env $(PYTHONENV) python $$f || exit $$?; done
 	@printf "\n\nSingle-Process Functionalities:\n"
 	@printf "\n\nSingle-Process Functionalities:\n"
-	@for f in $(wildcard 02_*.py); do env $(PYTHONENV) python $$f || exit $?; done
+	@for f in $(wildcard 02_*.py); do env $(PYTHONENV) python $$f || exit $$?; done
 	@printf "\n\nProcess Creation:\n"
 	@printf "\n\nProcess Creation:\n"
-	@for f in $(wildcard 03_*.py); do env $(PYTHONENV) python $$f || exit $?; done
+	@for f in $(wildcard 03_*.py); do env $(PYTHONENV) python $$f || exit $$?; done
 	@printf "\n\nMulti-Process Functionalities:\n"
 	@printf "\n\nMulti-Process Functionalities:\n"
-	@for f in $(wildcard 04_*.py); do env $(PYTHONENV) python $$f || exit $?; done
+	@for f in $(wildcard 04_*.py); do env $(PYTHONENV) python $$f || exit $$?; done
 	@printf "\n\nReference Monitor (Optional):\n"
 	@printf "\n\nReference Monitor (Optional):\n"
-	@for f in $(wildcard 05_*.py); do env $(PYTHONENV) python $$f || exit $?; done
+	@for f in $(wildcard 05_*.py); do env $(PYTHONENV) python $$f || exit $$?; done
 	@printf "\n\n"
 	@printf "\n\n"
 
 
 .PHONY: clean
 .PHONY: clean

+ 5 - 5
Pal/src/db_main.c

@@ -265,7 +265,7 @@ void pal_main (
         if (ret < 0)
         if (ret < 0)
             init_fail(-ret, "cannot get executable name");
             init_fail(-ret, "cannot get executable name");
 
 
-        exec_uri = remalloc(uri_buf, ret + 1);
+        exec_uri = malloc_copy(uri_buf, ret + 1);
     }
     }
 
 
     if (manifest_handle) {
     if (manifest_handle) {
@@ -273,7 +273,7 @@ void pal_main (
         if (ret < 0)
         if (ret < 0)
             init_fail(-ret, "cannot get manifest name");
             init_fail(-ret, "cannot get manifest name");
 
 
-        manifest_uri = remalloc(uri_buf, ret + 1);
+        manifest_uri = malloc_copy(uri_buf, ret + 1);
         goto has_manifest;
         goto has_manifest;
     }
     }
 
 
@@ -306,7 +306,7 @@ void pal_main (
 #endif
 #endif
 
 
     /* well, there is no manifest file, leave it alone */
     /* well, there is no manifest file, leave it alone */
-    printf("Can't fine any manifest, will run without one.\n");
+    printf("Can't find any manifest, will run without one.\n");
 
 
 has_manifest:
 has_manifest:
     /* load manifest if there is one */
     /* load manifest if there is one */
@@ -352,7 +352,7 @@ has_manifest:
         ret = get_config(pal_state.root_config, "loader.exec",
         ret = get_config(pal_state.root_config, "loader.exec",
                          uri_buf, URI_MAX);
                          uri_buf, URI_MAX);
         if (ret > 0) {
         if (ret > 0) {
-            exec_uri = remalloc(uri_buf, ret + 1);
+            exec_uri = malloc_copy(uri_buf, ret + 1);
             ret = _DkStreamOpen(&exec_handle, exec_uri, PAL_ACCESS_RDONLY,
             ret = _DkStreamOpen(&exec_handle, exec_uri, PAL_ACCESS_RDONLY,
                                 0, 0, 0);
                                 0, 0, 0);
             if (ret < 0)
             if (ret < 0)
@@ -412,7 +412,7 @@ has_manifest:
         ret = get_config(pal_state.root_config, "loader.execname", cfgbuf,
         ret = get_config(pal_state.root_config, "loader.execname", cfgbuf,
                          CONFIG_MAX);
                          CONFIG_MAX);
         if (ret > 0)
         if (ret > 0)
-            first_argument = remalloc(cfgbuf, ret + 1);
+            first_argument = malloc_copy(cfgbuf, ret + 1);
     }
     }
 
 
     read_environments(&environments);
     read_environments(&environments);

+ 3 - 3
Pal/src/db_rtld.c

@@ -86,7 +86,7 @@ new_elf_object (const char * realname, enum object_type type)
     memset(new, 0, sizeof(struct link_map));
     memset(new, 0, sizeof(struct link_map));
 
 
     new->l_name = realname ?
     new->l_name = realname ?
-                  remalloc(realname, strlen(realname) + 1) :
+                  malloc_copy(realname, strlen(realname) + 1) :
                   NULL;
                   NULL;
     new->l_type = type;
     new->l_type = type;
     return new;
     return new;
@@ -422,7 +422,7 @@ postmap:
             (ElfW(Dyn) *) ((ElfW(Addr)) l->l_ld + l->l_addr);
             (ElfW(Dyn) *) ((ElfW(Addr)) l->l_ld + l->l_addr);
 
 
         if (do_copy_dyn)
         if (do_copy_dyn)
-            l->l_ld = remalloc(l->l_ld, sizeof(ElfW(Dyn)) * l->l_ldnum);
+            l->l_ld = malloc_copy(l->l_ld, sizeof(ElfW(Dyn)) * l->l_ldnum);
     }
     }
 
 
     elf_get_dynamic_info(l->l_ld, l->l_info, l->l_addr);
     elf_get_dynamic_info(l->l_ld, l->l_info, l->l_addr);
@@ -557,7 +557,7 @@ int add_elf_object(void * addr, PAL_HANDLE handle, int type)
 
 
     map->l_real_ld = (ElfW(Dyn) *)
     map->l_real_ld = (ElfW(Dyn) *)
             ((char *) map->l_addr + (unsigned long) map->l_ld);
             ((char *) map->l_addr + (unsigned long) map->l_ld);
-    map->l_ld = remalloc(map->l_real_ld, sizeof(ElfW(Dyn)) * map->l_ldnum);
+    map->l_ld = malloc_copy(map->l_real_ld, sizeof(ElfW(Dyn)) * map->l_ldnum);
 
 
     elf_get_dynamic_info(map->l_ld, map->l_info, map->l_addr);
     elf_get_dynamic_info(map->l_ld, map->l_info, map->l_addr);
     setup_elf_hash(map);
     setup_elf_hash(map);

+ 1 - 1
Pal/src/db_streams.c

@@ -724,7 +724,7 @@ PAL_BOL DkSendHandle(PAL_HANDLE handle, PAL_HANDLE cargo)
       the new process environment? Should we initialize/modify some 
       the new process environment? Should we initialize/modify some 
       attibutes of the handle?
       attibutes of the handle?
     Ans - Yes, Initialize and make it compatibile in the target process
     Ans - Yes, Initialize and make it compatibile in the target process
-   3. Should remalloc be done or the process shares the same references?
+   3. Should malloc_copy be done or the process shares the same references?
     Ans - Variables members have to allocated data again.
     Ans - Variables members have to allocated data again.
 */
 */
 PAL_HANDLE DkReceiveHandle (PAL_HANDLE handle)
 PAL_HANDLE DkReceiveHandle (PAL_HANDLE handle)

+ 2 - 2
Pal/src/host/FreeBSD/db_files.c

@@ -275,7 +275,7 @@ static int file_rename (PAL_HANDLE handle, const char * type,
     if (IS_ERR(ret))
     if (IS_ERR(ret))
         return unix_to_pal_error(ERRNO(ret));
         return unix_to_pal_error(ERRNO(ret));
 
 
-    handle->file.realpath = remalloc(uri, strlen(uri));
+    handle->file.realpath = malloc_copy(uri, strlen(uri));
     return 0;
     return 0;
 }
 }
 
 
@@ -494,7 +494,7 @@ static int dir_rename (PAL_HANDLE handle, const char * type,
     if (IS_ERR(ret))
     if (IS_ERR(ret))
         return unix_to_pal_error(ERRNO(ret));
         return unix_to_pal_error(ERRNO(ret));
 
 
-    handle->dir.realpath = remalloc(uri, strlen(uri));
+    handle->dir.realpath = malloc_copy(uri, strlen(uri));
     return 0;
     return 0;
 }
 }
 
 

+ 13 - 34
Pal/src/host/FreeBSD/db_misc.c

@@ -199,54 +199,33 @@ static int init_randgen (void)
     _DkInternalUnlock(&lock);
     _DkInternalUnlock(&lock);
     return 0;
     return 0;
 }
 }
-int getrand (void * buffer, int size)
+
+void getrand (void * buffer, size_t size)
 {
 {
     unsigned long val;
     unsigned long val;
-    int bytes = 0;
+    size_t bytes = 0;
 
 
-    _DkInternalLock(&lock);
-    while (!randval) {
-        _DkInternalUnlock(&lock);
-        if (init_randgen() < 0)
-            return -PAL_ERROR_DENIED;
-        _DkInternalLock(&lock);
-    }
+    int ret = init_randgen();
+    if (ret < 0)
+        return ret;
 
 
+    _DkInternalLock(&lock);
     val = randval;
     val = randval;
-    randval++;
+    randval = hash64(~randval);
     _DkInternalUnlock(&lock);
     _DkInternalUnlock(&lock);
 
 
-    while (bytes + sizeof(unsigned long) <= size) {
-        *(unsigned long *) (buffer + bytes) = val;
+    while (bytes + sizeof(uint64_t) <= size) {
+        *(uint64_t *) (buffer + bytes) = val;
         val = hash64(val);
         val = hash64(val);
-        bytes += sizeof(unsigned long);
+        bytes += sizeof(uint64_t);
     }
     }
 
 
     if (bytes < size) {
     if (bytes < size) {
-        switch (size - bytes) {
-            case 4:
-                *(unsigned int *) (buffer + bytes) = randval & 0xffffffff;
-                bytes += 4;
-                break;
-
-            case 2:
-                *(unsigned short *) (buffer + bytes) = randval & 0xffff;
-                bytes += 2;
-                break;
-
-            case 1:
-                *(unsigned char *) (buffer + bytes) = randval & 0xff;
-                bytes++;
-                break;
-
-            default: break;
-        }
-        randval = hash64(randval);
+        memcpy(buffer + bytes, &val, size - bytes);
+        val = hash64(val);
     }
     }
 
 
     _DkInternalLock(&lock);
     _DkInternalLock(&lock);
     randval = val;
     randval = val;
     _DkInternalUnlock(&lock);
     _DkInternalUnlock(&lock);
-
-    return bytes;
 }
 }

+ 2 - 2
Pal/src/host/FreeBSD/db_streams.c

@@ -204,7 +204,7 @@ int handle_deserialize (PAL_HANDLE * handle, const void * data, int size)
         case pal_type_pipesrv:
         case pal_type_pipesrv:
         case pal_type_pipecli:
         case pal_type_pipecli:
         case pal_type_pipeprv:
         case pal_type_pipeprv:
-            hdl = remalloc(hdl_data, hdlsz);
+            hdl = malloc_copy(hdl_data, hdlsz);
             break;
             break;
         case pal_type_dev: {
         case pal_type_dev: {
             int l = hdl_data->dev.realpath ? strlen((const char *) data) + 1 : 0;
             int l = hdl_data->dev.realpath ? strlen((const char *) data) + 1 : 0;
@@ -255,7 +255,7 @@ int handle_deserialize (PAL_HANDLE * handle, const void * data, int size)
         }
         }
         case pal_type_gipc:
         case pal_type_gipc:
         case pal_type_process:
         case pal_type_process:
-            hdl = remalloc(hdl_data, hdlsz);
+            hdl = malloc_copy(hdl_data, hdlsz);
             break;
             break;
         default :
         default :
             return -PAL_ERROR_BADHANDLE;
             return -PAL_ERROR_BADHANDLE;

+ 6 - 1
Pal/src/host/Linux-SGX/db_files.c

@@ -186,7 +186,12 @@ static int file_map (PAL_HANDLE handle, void ** addr, int prot,
     void * umem;
     void * umem;
     int ret;
     int ret;
 
 
-    if (!stubs && !(prot & PAL_PROT_WRITECOPY)) {
+    /*
+     * If the file is listed in the manifest as an "allowed" file,
+     * we allow mapping the file outside the enclave, if the library OS
+     * does not request a specific address.
+     */
+    if (!mem && !stubs && !(prot & PAL_PROT_WRITECOPY)) {
         ret = ocall_map_untrusted(handle->file.fd, offset, size,
         ret = ocall_map_untrusted(handle->file.fd, offset, size,
                                   HOST_PROT(prot), &mem);
                                   HOST_PROT(prot), &mem);
         if (!ret)
         if (!ret)

+ 1 - 1
Pal/src/host/Linux-SGX/db_mutex.c

@@ -123,7 +123,7 @@ int _DkMutexUnlock (struct mutex_handle * m)
     int need_wake;
     int need_wake;
 
 
     /* Unlock */
     /* Unlock */
-    *(m->locked) = 0;
+    *(m->locked) = MUTEX_UNLOCKED; // TODO: this is not atomic!
     /* We need to make sure the write to locked is visible to lock-ers
     /* We need to make sure the write to locked is visible to lock-ers
      * before we read the waiter count. */
      * before we read the waiter count. */
     mb();
     mb();

+ 2 - 2
Pal/src/host/Linux-SGX/db_streams.c

@@ -194,7 +194,7 @@ int handle_deserialize (PAL_HANDLE * handle, const void * data, int size)
         case pal_type_pipesrv:
         case pal_type_pipesrv:
         case pal_type_pipecli:
         case pal_type_pipecli:
         case pal_type_pipeprv:
         case pal_type_pipeprv:
-            hdl = remalloc(hdl_data, hdlsz);
+            hdl = malloc_copy(hdl_data, hdlsz);
             break;
             break;
         case pal_type_dev: {
         case pal_type_dev: {
             int l = hdl_data->dev.realpath ? strlen((const char *) data) + 1 : 0;
             int l = hdl_data->dev.realpath ? strlen((const char *) data) + 1 : 0;
@@ -245,7 +245,7 @@ int handle_deserialize (PAL_HANDLE * handle, const void * data, int size)
         }
         }
         case pal_type_gipc:
         case pal_type_gipc:
         case pal_type_process:
         case pal_type_process:
-            hdl = remalloc(hdl_data, hdlsz);
+            hdl = malloc_copy(hdl_data, hdlsz);
             break;
             break;
         default :
         default :
             return -PAL_ERROR_BADHANDLE;
             return -PAL_ERROR_BADHANDLE;

+ 1 - 2
Pal/src/host/Linux-SGX/enclave_framework.c

@@ -21,8 +21,7 @@ static int register_trusted_file (const char * uri, const char * checksum_str);
 
 
 bool sgx_is_within_enclave (const void * addr, uint64_t size)
 bool sgx_is_within_enclave (const void * addr, uint64_t size)
 {
 {
-    return (addr >= enclave_base &&
-            addr + size <= enclave_top) ? 1 : 0;
+    return enclave_base <= addr && addr + size <= enclave_top;
 }
 }
 
 
 void * sgx_ocalloc (uint64_t size)
 void * sgx_ocalloc (uint64_t size)

+ 8 - 8
Pal/src/host/Linux-SGX/enclave_ocalls.c

@@ -19,7 +19,7 @@
     void * _tmp = sgx_ocalloc(len);     \
     void * _tmp = sgx_ocalloc(len);     \
     if (_tmp == NULL) {                 \
     if (_tmp == NULL) {                 \
         OCALL_EXIT();                   \
         OCALL_EXIT();                   \
-        return -PAL_ERROR_DENIED;       \
+        return -PAL_ERROR_DENIED;  /* TODO: remove this control-flow obfuscation */  \
     }                                   \
     }                                   \
     (val) = (type) _tmp;                \
     (val) = (type) _tmp;                \
 } while (0)
 } while (0)
@@ -33,20 +33,20 @@ int printf(const char * fmt, ...);
         sgx_ocfree();                                   \
         sgx_ocfree();                                   \
     } while (0)
     } while (0)
 
 
-#define ALLOC_IN_USER(var, size)                    \
+#define ALLOC_IN_USER(ptr, size)                    \
     ({                                              \
     ({                                              \
-        typeof(var) tmp = var;                      \
-        if (sgx_is_within_enclave(var, size)) {     \
+        typeof(ptr) tmp = ptr;                      \
+        if (sgx_is_within_enclave(ptr, size)) {     \
             OCALLOC(tmp, typeof(tmp), size);        \
             OCALLOC(tmp, typeof(tmp), size);        \
         }; tmp;                                     \
         }; tmp;                                     \
     })
     })
 
 
-#define COPY_TO_USER(var, size)                     \
+#define COPY_TO_USER(ptr, size)                     \
     ({                                              \
     ({                                              \
-        typeof(var) tmp = var;                      \
-        if (sgx_is_within_enclave(var, size)) {     \
+        typeof(ptr) tmp = ptr;                      \
+        if (sgx_is_within_enclave(ptr, size)) {     \
             OCALLOC(tmp, typeof(tmp), size);        \
             OCALLOC(tmp, typeof(tmp), size);        \
-            memcpy((void *) tmp, var, size);        \
+            memcpy((void *) tmp, ptr, size);        \
         }; tmp;                                     \
         }; tmp;                                     \
     })
     })
 
 

+ 12 - 6
Pal/src/host/Linux-SGX/enclave_pages.c

@@ -9,6 +9,8 @@
 
 
 #include <list.h>
 #include <list.h>
 
 
+#include <stdint.h>
+
 static unsigned long pgsz = PRESET_PAGESIZE;
 static unsigned long pgsz = PRESET_PAGESIZE;
 void * heap_base;
 void * heap_base;
 static uint64_t heap_size;
 static uint64_t heap_size;
@@ -83,8 +85,8 @@ void * get_reserved_pages(void * addr, uint64_t size)
     if (size & (pgsz - 1))
     if (size & (pgsz - 1))
         size = ((size + pgsz - 1) & ~(pgsz - 1));
         size = ((size + pgsz - 1) & ~(pgsz - 1));
 
 
-    if ((unsigned long) addr & (pgsz - 1))
-        addr = (void *) ((unsigned long) addr & ~(pgsz - 1));
+    if ((uintptr_t) addr & (pgsz - 1))
+        addr = (void *) ((uintptr_t) addr & ~(pgsz - 1));
 
 
     SGX_DBG(DBG_M, "allocate %d bytes at %p\n", size, addr);
     SGX_DBG(DBG_M, "allocate %d bytes at %p\n", size, addr);
 
 
@@ -223,6 +225,10 @@ allocated:
 
 
     if (!vma) {
     if (!vma) {
         vma = malloc(sizeof(struct heap_vma));
         vma = malloc(sizeof(struct heap_vma));
+        if (!vma) {
+            _DkInternalUnlock(&heap_vma_lock);
+            return NULL;
+        }
         vma->top = addr + size;
         vma->top = addr + size;
         vma->bottom = addr;
         vma->bottom = addr;
         INIT_LIST_HEAD(vma, list);
         INIT_LIST_HEAD(vma, list);
@@ -252,11 +258,11 @@ void free_pages(void * addr, uint64_t size)
     if (!addr || !size)
     if (!addr || !size)
         return;
         return;
 
 
-    if ((unsigned long) addr_top & (pgsz - 1))
-        addr = (void *) (((unsigned long) addr_top + pgsz + 1) & ~(pgsz - 1));
+    if ((uintptr_t) addr_top & (pgsz - 1))
+        addr = (void *) (((uintptr_t) addr_top + pgsz + 1) & ~(pgsz - 1));
 
 
-    if ((unsigned long) addr & (pgsz - 1))
-        addr = (void *) ((unsigned long) addr & ~(pgsz - 1));
+    if ((uintptr_t) addr & (pgsz - 1))
+        addr = (void *) ((uintptr_t) addr & ~(pgsz - 1));
 
 
     if (addr >= heap_base + heap_size)
     if (addr >= heap_base + heap_size)
         return;
         return;

+ 9 - 37
Pal/src/host/Linux-SGX/sgx_main.c

@@ -590,57 +590,29 @@ static int mcast_c (int port)
 
 
 static unsigned long randval = 0;
 static unsigned long randval = 0;
 
 
-int getrand (void * buffer, int size)
+void getrand (void * buffer, size_t size)
 {
 {
-    unsigned long val;
-    int bytes = 0;
+    size_t bytes = 0;
 
 
-    val = randval;
-    randval++;
-
-    while (bytes + sizeof(unsigned long) <= size) {
-        *(unsigned long *) (buffer + bytes) = val;
-        val = hash64(val);
-        bytes += sizeof(unsigned long);
+    while (bytes + sizeof(uint64_t) <= size) {
+        *(uint64_t*) (buffer + bytes) = randval;
+        randval = hash64(randval);
+        bytes += sizeof(uint64_t);
     }
     }
 
 
     if (bytes < size) {
     if (bytes < size) {
-        switch (size - bytes) {
-            case 4:
-                *(unsigned int *) (buffer + bytes) = randval & 0xffffffff;
-                bytes += 4;
-                break;
-
-            case 2:
-                *(unsigned short *) (buffer + bytes) = randval & 0xffff;
-                bytes += 2;
-                break;
-
-            case 1:
-                *(unsigned char *) (buffer + bytes) = randval & 0xff;
-                bytes++;
-                break;
-
-            default: break;
-        }
+        memcpy(buffer + bytes, &randval, size - bytes);
         randval = hash64(randval);
         randval = hash64(randval);
     }
     }
-
-    randval = val;
-    return bytes;
 }
 }
 
 
-static int create_instance (struct pal_sec * pal_sec)
+static void create_instance (struct pal_sec * pal_sec)
 {
 {
     unsigned int id;
     unsigned int id;
-    if (!getrand(&id, sizeof(unsigned int))) {
-        SGX_DBG(DBG_E, "Unable to generate random numbers\n");
-        return -PAL_ERROR_DENIED;
-    }
+    getrand(&id, sizeof(id));
     snprintf(pal_sec->pipe_prefix, sizeof(pal_sec->pipe_prefix),
     snprintf(pal_sec->pipe_prefix, sizeof(pal_sec->pipe_prefix),
              "/graphene/%x/", id);
              "/graphene/%x/", id);
     pal_sec->instance_id = id;
     pal_sec->instance_id = id;
-    return 0;
 }
 }
 
 
 int load_manifest (int fd, struct config_store ** config_ptr)
 int load_manifest (int fd, struct config_store ** config_ptr)

+ 2 - 2
Pal/src/host/Linux/db_files.c

@@ -273,7 +273,7 @@ static int file_rename (PAL_HANDLE handle, const char * type,
     if (IS_ERR(ret))
     if (IS_ERR(ret))
         return unix_to_pal_error(ERRNO(ret));
         return unix_to_pal_error(ERRNO(ret));
 
 
-    handle->file.realpath = remalloc(uri, strlen(uri));
+    handle->file.realpath = malloc_copy(uri, strlen(uri));
     return 0;
     return 0;
 }
 }
 
 
@@ -491,7 +491,7 @@ static int dir_rename (PAL_HANDLE handle, const char * type,
     if (IS_ERR(ret))
     if (IS_ERR(ret))
         return unix_to_pal_error(ERRNO(ret));
         return unix_to_pal_error(ERRNO(ret));
 
 
-    handle->dir.realpath = remalloc(uri, strlen(uri));
+    handle->dir.realpath = malloc_copy(uri, strlen(uri));
     return 0;
     return 0;
 }
 }
 
 

+ 1 - 1
Pal/src/host/Linux/db_sockets.c

@@ -1370,7 +1370,7 @@ PAL_HANDLE _DkBroadcastStreamOpen (void)
     hdl->mcast.cli = cli;
     hdl->mcast.cli = cli;
     hdl->mcast.port = (PAL_NUM) pal_sec.mcast_port;
     hdl->mcast.port = (PAL_NUM) pal_sec.mcast_port;
     hdl->mcast.nonblocking = PAL_FALSE;
     hdl->mcast.nonblocking = PAL_FALSE;
-    hdl->mcast.addr = (PAL_PTR) remalloc(&addr, sizeof(addr));
+    hdl->mcast.addr = (PAL_PTR) malloc_copy(&addr, sizeof(addr));
     return hdl;
     return hdl;
 
 
 err_cli:
 err_cli:

+ 2 - 2
Pal/src/host/Linux/db_streams.c

@@ -207,7 +207,7 @@ int handle_deserialize (PAL_HANDLE * handle, const void * data, int size)
         case pal_type_pipesrv:
         case pal_type_pipesrv:
         case pal_type_pipecli:
         case pal_type_pipecli:
         case pal_type_pipeprv:
         case pal_type_pipeprv:
-            hdl = remalloc(hdl_data, hdlsz);
+            hdl = malloc_copy(hdl_data, hdlsz);
             break;
             break;
         case pal_type_dev: {
         case pal_type_dev: {
             int l = hdl_data->dev.realpath ? strlen((const char *) data) + 1 : 0;
             int l = hdl_data->dev.realpath ? strlen((const char *) data) + 1 : 0;
@@ -258,7 +258,7 @@ int handle_deserialize (PAL_HANDLE * handle, const void * data, int size)
         }
         }
         case pal_type_gipc:
         case pal_type_gipc:
         case pal_type_process:
         case pal_type_process:
-            hdl = remalloc(hdl_data, hdlsz);
+            hdl = malloc_copy(hdl_data, hdlsz);
             break;
             break;
         default :
         default :
             return -PAL_ERROR_BADHANDLE;
             return -PAL_ERROR_BADHANDLE;

+ 1 - 1
Pal/src/pal_internal.h

@@ -366,7 +366,7 @@ int add_elf_object(void * addr, PAL_HANDLE handle, int type);
 #ifndef NO_INTERNAL_ALLOC
 #ifndef NO_INTERNAL_ALLOC
 void init_slab_mgr (int alignment);
 void init_slab_mgr (int alignment);
 void * malloc (size_t size);
 void * malloc (size_t size);
-void * remalloc (const void * mem, size_t size);
+void * malloc_copy(const void * mem, size_t size);
 void * calloc (size_t nmem, size_t size);
 void * calloc (size_t nmem, size_t size);
 char * strdup(const char *source);
 char * strdup(const char *source);
 void free (void * mem);
 void free (void * mem);

+ 13 - 6
Pal/src/slab.c

@@ -49,6 +49,7 @@ static void *mem_pool_end = &mem_pool[POOL_SIZE];
 
 
 #define STARTUP_SIZE    2
 #define STARTUP_SIZE    2
 
 
+/* This function is protected by slab_mgr_lock. */
 static inline void * __malloc (int size)
 static inline void * __malloc (int size)
 {
 {
     void * addr = NULL;
     void * addr = NULL;
@@ -117,18 +118,24 @@ void * malloc (size_t size)
         memset(ptr, 0xa5, size);
         memset(ptr, 0xa5, size);
 #endif
 #endif
 
 
+    if (!ptr) {
+        /*
+         * Normally, the PAL should not run out of memory.
+         * If malloc() failed internally, we cannot handle the
+         * condition and must terminate the current process.
+         */
+        printf("******** Out-of-memory in PAL ********\n");
+        _DkProcessExit(-1);
+    }
+
 #if PROFILING == 1
 #if PROFILING == 1
     pal_state.slab_time += _DkSystemTimeQuery() - before_slab;
     pal_state.slab_time += _DkSystemTimeQuery() - before_slab;
 #endif
 #endif
     return ptr;
     return ptr;
 }
 }
 
 
-/* This function is not realloc(). remalloc() allocates a new buffer
- * with with a provided size and copies the contents of the old buffer
- * to the new buffer. The old buffer is not freed. The old buffer must
- * be at least size bytes long. This function should probably be renamed
- * to something less likely to be confused with realloc. */
-void * remalloc (const void * mem, size_t size)
+// Copies data from `mem` to a newly allocated buffer of a specified size.
+void * malloc_copy (const void * mem, size_t size)
 {
 {
     void * nmem = malloc(size);
     void * nmem = malloc(size);
 
 

+ 1 - 1
Runtime/Makefile

@@ -1,6 +1,6 @@
 .PHONY: clean
 .PHONY: clean
 clean:
 clean:
-	rm -f libc.so.6 ld-linux-x86-64.so.2 libpthread.so.0 libm.so.6 libdl.so.2 libutil.so.1 crt1.o crti.o crtn.o liblibos.so.1 libnss_dns.so.2 libresolv.so.2 pal_gdb* pal-* pal_sec*
+	rm -f *.a *.o *.so *.so.* pal_gdb* pal-* pal_sec*
 
 
 .PHONY: all
 .PHONY: all
 all:
 all:

Some files were not shown because too many files changed in this diff