Browse Source

[LibOS] Emulate vfork() via fork()

Previous implementation of vfork() was both buggy (worked only in simple
cases) and not compatible with the new IPC implementation. We take a
shortcut for now and emulate vfork() via fork(); this is allowed by
POSIX. This commit also adds LibOS regression test for vfork().
Dmitrii Kuvaiskii 4 years ago
parent
commit
ea234412c9

+ 27 - 0
LibOS/shim/include/shim_defs.h

@@ -13,6 +13,33 @@
  */
 #define CPSTORE_DERANDOMIZATION     0
 
+/* This macro disables current vfork implementation and aliases it to fork.
+ *
+ * Rationale:
+ * Current vfork() implementation is broken and works only in simple cases.
+ * The implementation creates a new thread in the same process and runs it
+ * in place of the previous (parent) thread which called vfork(). When the
+ * "pseudo-process" new thread reaches execve(), it silently dies and
+ * switches execution back to the suspended parent thread (as per vfork
+ * semantics). Because execve() emulation creates a new host-OS process,
+ * this vfork implementation works in simple benign cases.
+ *
+ * However, this co-existence of the "pseudo-process" thread with threads
+ * of the parent process leads to bugs elsewhere in Graphene. In general,
+ * the rest of Graphene is not aware of such situation when two processes
+ * co-exist in the same Graphene instance and share memory. If the new
+ * "pseudo-process" thread makes syscalls in-between vfork() and execve()
+ * or abnormally dies or receives a signal, Graphene may hang or segfault
+ * or end up with inconsistent internal state.
+ *
+ * Therefore, instead of trying to support Linux semantics for vfork() --
+ * which requires adding corner-cases in signal handling and syscalls --
+ * we simply redirect vfork() as fork(). We assume that performance hit is
+ * negligible (Graphene has to migrate internal state anyway which is slow)
+ * and apps do not rely on insane Linux-specific semantics of vfork().
+ * */
+#define ALIAS_VFORK_AS_FORK 1
+
 #define DEFAULT_HEAP_MIN_SIZE       (256 * 1024 * 1024) /* 256MB */
 #define DEFAULT_MEM_MAX_NPAGES      (1024 * 1024)       /* 4GB */
 #define DEFAULT_BRK_MAX_SIZE        (256 * 1024)        /* 256KB */

+ 4 - 1
LibOS/shim/include/shim_thread.h

@@ -37,7 +37,7 @@ struct shim_thread {
     struct shim_thread * parent;
     /* thread leader */
     struct shim_thread * leader;
-    /* dummy thread */
+    /* dummy thread: stores blocked parent thread for vfork */
     struct shim_thread * dummy;
     /* child handles; protected by thread->lock */
     LISTP_TYPE(shim_thread) children;
@@ -264,7 +264,10 @@ void add_simple_thread (struct shim_simple_thread * thread);
 void del_simple_thread (struct shim_simple_thread * thread);
 
 int check_last_thread (struct shim_thread * self);
+
+#ifndef ALIAS_VFORK_AS_FORK
 void switch_dummy_thread (struct shim_thread * thread);
+#endif
 
 int walk_thread_list (int (*callback) (struct shim_thread *, void *, bool *),
                       void * arg);

+ 2 - 0
LibOS/shim/src/bookkeep/shim_thread.c

@@ -564,6 +564,7 @@ out:
     return ret;
 }
 
+#ifndef ALIAS_VFORK_AS_FORK
 void switch_dummy_thread (struct shim_thread * thread)
 {
     struct shim_thread * real_thread = thread->dummy;
@@ -596,6 +597,7 @@ void switch_dummy_thread (struct shim_thread * thread)
                        "a"(child)
                      : "memory");
 }
+#endif
 
 BEGIN_CP_FUNC(thread)
 {

+ 4 - 0
LibOS/shim/src/sys/shim_exit.c

@@ -173,11 +173,13 @@ noreturn int shim_do_exit_group (int error_code)
     if (debug_handle)
         sysparser_printf("---- shim_exit_group (returning %d)\n", error_code);
 
+#ifndef ALIAS_VFORK_AS_FORK
     if (cur_thread->dummy) {
         cur_thread->term_signal = 0;
         thread_exit(cur_thread, true);
         switch_dummy_thread(cur_thread);
     }
+#endif
 
     debug("now kill other threads in the process\n");
     do_kill_proc(cur_thread->tgid, cur_thread->tgid, SIGKILL, false);
@@ -202,11 +204,13 @@ noreturn int shim_do_exit (int error_code)
     if (debug_handle)
         sysparser_printf("---- shim_exit (returning %d)\n", error_code);
 
+#ifndef ALIAS_VFORK_AS_FORK
     if (cur_thread->dummy) {
         cur_thread->term_signal = 0;
         thread_exit(cur_thread, true);
         switch_dummy_thread(cur_thread);
     }
+#endif
 
     try_process_exit(error_code, 0);
 

+ 6 - 5
LibOS/shim/src/sys/shim_vfork.c

@@ -35,13 +35,13 @@
 #include <linux/futex.h>
 #include <errno.h>
 
-struct vfork_args {
-    PAL_HANDLE create_event;
-    struct shim_thread * thread;
-};
-
 int shim_do_vfork (void)
 {
+#ifdef ALIAS_VFORK_AS_FORK
+    debug("vfork() is an alias to fork() in Graphene, calling fork() now\n");
+    return shim_do_fork();
+#else
+    /* NOTE: leaving this old implementation for historical reference */
     INC_PROFILE_OCCURENCE(syscall_use_ipc);
 
     /* DEP 7/7/12 - Why r13?
@@ -114,4 +114,5 @@ int shim_do_vfork (void)
 
     /* here we return immediately, no letting the hooks mes up our stack */
     return 0;
+#endif
 }

+ 12 - 2
LibOS/shim/test/regression/00_bootstrap.py

@@ -40,10 +40,20 @@ regression.add_check(name="2 page child binary",
 rv = regression.run_checks()
 if rv: sys.exit(rv)
 
-# Running Fork and Exec
+# Running fork and exec
 regression = Regression(loader, "fork_and_exec")
 
-regression.add_check(name="Fork and exec 2 page child binary",
+regression.add_check(name="fork and exec 2 page child binary",
+    check=lambda res: "child exited with status: 0" in res[0].out and \
+                      "test completed successfully" in res[0].out)
+
+rv = regression.run_checks()
+if rv: sys.exit(rv)
+
+# Running vfork and exec
+regression = Regression(loader, "vfork_and_exec")
+
+regression.add_check(name="vfork and exec 2 page child binary",
     check=lambda res: "child exited with status: 0" in res[0].out and \
                       "test completed successfully" in res[0].out)
 

+ 15 - 6
LibOS/shim/test/regression/fork_and_exec.c

@@ -1,9 +1,9 @@
-#include <stdlib.h>
-#include <stdio.h>
 #include <errno.h>
-#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <unistd.h>
 
 int main(int argc, const char** argv, const char** envp) {
     pid_t child_pid;
@@ -11,12 +11,21 @@ int main(int argc, const char** argv, const char** envp) {
     /* duplicate STDOUT into newfd and pass it as exec_victim argument
      * (it will be inherited by exec_victim) */
     int newfd = dup(1);
-    char fd_argv[4];
-    snprintf(fd_argv, 4, "%d", newfd);
+    if (newfd < 0) {
+        perror("dup failed");
+        return 1;
+    }
+
+    char fd_argv[12];
+    snprintf(fd_argv, 12, "%d", newfd);
     char* const new_argv[] = {"./exec_victim", fd_argv, NULL};
 
     /* set environment variable to test that it is inherited by exec_victim */
-    setenv("IN_EXECVE", "1", 1);
+    int ret = setenv("IN_EXECVE", "1", 1);
+    if (ret < 0) {
+        perror("setenv failed");
+        return 1;
+    }
 
     child_pid = fork();
 

+ 55 - 0
LibOS/shim/test/regression/vfork_and_exec.c

@@ -0,0 +1,55 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+int main(int argc, const char** argv, const char** envp) {
+    pid_t child_pid;
+
+    /* duplicate STDOUT into newfd and pass it as exec_victim argument
+     * (it will be inherited by exec_victim) */
+    int newfd = dup(1);
+    if (newfd < 0) {
+        perror("dup failed");
+        return 1;
+    }
+
+    char fd_argv[12];
+    snprintf(fd_argv, 12, "%d", newfd);
+    char* const new_argv[] = {"./exec_victim", fd_argv, NULL};
+
+    /* set environment variable to test that it is inherited by exec_victim */
+    int ret = setenv("IN_EXECVE", "1", 1);
+    if (ret < 0) {
+        perror("setenv failed");
+        return 1;
+    }
+
+    child_pid = vfork();
+
+    if (child_pid == 0) {
+        /* child performs execve(exec_victim) */
+        execv(new_argv[0], new_argv);
+        perror("execve failed");
+        return 1;
+    } else if (child_pid > 0) {
+        /* parent waits for child termination */
+        int status;
+        pid_t pid = wait(&status);
+        if (pid < 0) {
+            perror("wait failed");
+            return 1;
+        }
+        if (WIFEXITED(status))
+            printf("child exited with status: %d\n", WEXITSTATUS(status));
+    } else {
+        /* error */
+        perror("fork failed");
+        return 1;
+    }
+
+    puts("test completed successfully");
+    return 0;
+}