Forráskód Böngészése

[LibOS] Reorganize fields in shim_context, shim_regs, and shim_tls

* move syscall_nr from shim_context to shim_regs::orig_rax

* move ret_ip from shim_tls to shim_context as clean up

* move sp from shim_context to shim_regs

* context->regs == NULL doesn't make sense for restore_context() because
  it doesn't work correctly. It tries to execute 0 address and probably
  results in SEGV or something bad.
  plus some code clean up and simplification.

Signed-off-by: Isaku Yamahata <isaku.yamahata@gmail.com>
Isaku Yamahata 5 éve
szülő
commit
5564cb0a50

+ 0 - 1
LibOS/shim/include/shim_thread.h

@@ -313,7 +313,6 @@ struct clone_args {
     PAL_HANDLE initialize_event;
     struct shim_thread * parent, * thread;
     void * stack;
-    void * return_pc;
 };
 
 int clone_implementation_wrapper(struct clone_args * arg);

+ 3 - 3
LibOS/shim/include/shim_tls.h

@@ -20,6 +20,8 @@ struct lock_record {
 #define NUM_LOCK_RECORD_MASK (NUM_LOCK_RECORD - 1)
 
 struct shim_regs {
+    unsigned long           orig_rax;
+    unsigned long           rsp;
     unsigned long           r15;
     unsigned long           r14;
     unsigned long           r13;
@@ -35,12 +37,10 @@ struct shim_regs {
     unsigned long           rbx;
     unsigned long           rbp;
     unsigned long           rflags;
+    unsigned long           rip;
 };
 
 struct shim_context {
-    unsigned long           syscall_nr;
-    void *                  sp;
-    void *                  ret_ip;
     struct shim_regs *      regs;
     struct shim_context *   next;
     uint64_t                enter_time;

+ 1 - 1
LibOS/shim/src/Makefile

@@ -132,7 +132,7 @@ elf/shim_rtld.o: $(wildcard elf/*.h)
 	@echo [ $@ ]
 	@$(AS) $(ASFLAGS) $(defs) -E $< -o $@
 
-syscallas.S: asm-offsets.h
+syscallas.S shim_checkpoint.c: asm-offsets.h
 
 include ../../../Makefile.rules
 

+ 5 - 6
LibOS/shim/src/bookkeep/shim_signal.c

@@ -106,14 +106,12 @@ void __store_context (shim_tcb_t * tcb, PAL_CONTEXT * pal_context,
 {
     ucontext_t * context = &signal->context;
 
-    if (tcb && tcb->context.syscall_nr) {
+    if (tcb && tcb->context.regs && tcb->context.regs->orig_rax) {
         struct shim_context * ct = &tcb->context;
 
-        context->uc_mcontext.gregs[REG_RSP] = (unsigned long) ct->sp;
-        context->uc_mcontext.gregs[REG_RIP] = (unsigned long) ct->ret_ip;
-
         if (ct->regs) {
             struct shim_regs * regs = ct->regs;
+            context->uc_mcontext.gregs[REG_RIP] = regs->rip;
             context->uc_mcontext.gregs[REG_EFL] = regs->rflags;
             context->uc_mcontext.gregs[REG_R15] = regs->r15;
             context->uc_mcontext.gregs[REG_R14] = regs->r14;
@@ -129,6 +127,7 @@ void __store_context (shim_tcb_t * tcb, PAL_CONTEXT * pal_context,
             context->uc_mcontext.gregs[REG_RDI] = regs->rdi;
             context->uc_mcontext.gregs[REG_RBX] = regs->rbx;
             context->uc_mcontext.gregs[REG_RBP] = regs->rbp;
+            context->uc_mcontext.gregs[REG_RSP] = regs->rsp;
         }
 
         signal->context_stored = true;
@@ -643,10 +642,10 @@ __handle_one_signal (shim_tcb_t * tcb, int sig, struct shim_signal * signal)
 
     struct shim_context * context = NULL;
 
-    if (tcb->context.syscall_nr) {
+    if (tcb->context.regs && tcb->context.regs->orig_rax) {
         context = __alloca(sizeof(struct shim_context));
         memcpy(context, &tcb->context, sizeof(struct shim_context));
-        tcb->context.syscall_nr = 0;
+        tcb->context.regs->orig_rax = 0;
         tcb->context.next = context;
     }
 

+ 2 - 2
LibOS/shim/src/bookkeep/shim_thread.c

@@ -717,7 +717,7 @@ int resume_wrapper (void * param)
     __libc_tcb_t * libc_tcb = thread->tcb;
     assert(libc_tcb);
     shim_tcb_t * tcb = &libc_tcb->shim_tcb;
-    assert(tcb->context.sp);
+    assert(tcb->context.regs && tcb->context.regs->rsp);
 
     thread->in_vm = thread->is_alive = true;
     allocate_tls(libc_tcb, thread->user_tcb, thread);
@@ -759,7 +759,7 @@ BEGIN_RS_FUNC(running_thread)
 
         if (libc_tcb) {
             shim_tcb_t * tcb = &libc_tcb->shim_tcb;
-            assert(tcb->context.sp);
+            assert(tcb->context.regs && tcb->context.regs->rsp);
             tcb->debug_buf = shim_get_tls()->debug_buf;
             allocate_tls(libc_tcb, thread->user_tcb, thread);
             /* Temporarily disable preemption until the thread resumes. */

+ 3 - 3
LibOS/shim/src/generated-offsets.c

@@ -8,10 +8,10 @@
 void dummy(void)
 {
     OFFSET_T(SHIM_TCB_OFFSET, __libc_tcb_t, shim_tcb);
-    OFFSET_T(TCB_SYSCALL_NR, shim_tcb_t, context.syscall_nr);
-    OFFSET_T(TCB_SP, shim_tcb_t, context.sp);
-    OFFSET_T(TCB_RET_IP, shim_tcb_t, context.ret_ip);
     OFFSET_T(TCB_REGS, shim_tcb_t, context.regs);
+    OFFSET(SHIM_REGS_RSP, shim_regs, rsp);
+    OFFSET(SHIM_REGS_R15, shim_regs, r15);
+    OFFSET(SHIM_REGS_RIP, shim_regs, rip);
     DEFINE(SHIM_REGS_SIZE, sizeof(struct shim_regs));
 
     /* definitions */

+ 8 - 13
LibOS/shim/src/shim_checkpoint.c

@@ -20,6 +20,7 @@
  * This file contains codes for checkpoint / migration scheme of library OS.
  */
 
+#include "asm-offsets.h"
 #include <shim_internal.h>
 #include <shim_utils.h>
 #include <shim_thread.h>
@@ -1250,20 +1251,13 @@ int do_migration (struct newproc_cp_header * hdr, void ** cpptr)
 
 void restore_context (struct shim_context * context)
 {
-    int nregs = sizeof(struct shim_regs) / sizeof(void *);
-    void * regs[nregs + 1];
+    assert(context->regs);
+    struct shim_regs regs = *context->regs;
+    debug("restore context: SP = 0x%08lx, IP = 0x%08lx\n", regs.rsp, regs.rip);
 
-    if (context->regs)
-        memcpy(regs, context->regs, sizeof(struct shim_regs));
-    else
-        memset(regs, 0, sizeof(struct shim_regs));
-
-    debug("restore context: SP = %p, IP = %p\n", context->sp, context->ret_ip);
-
-    regs[nregs] = (void *) context->sp;
     /* don't clobber redzone. If sigaltstack is used,
      * this area won't be clobbered by signal context */
-    *(void **) (context->sp - 128 - 8) = context->ret_ip;
+    *(unsigned long*) (regs.rsp - RED_ZONE_SIZE - 8) = regs.rip;
 
     /* Ready to resume execution, re-enable preemption. */
     shim_tcb_t * tcb = shim_get_tls();
@@ -1272,6 +1266,7 @@ void restore_context (struct shim_context * context)
     memset(context, 0, sizeof(struct shim_context));
 
     __asm__ volatile("movq %0, %%rsp\r\n"
+                     "addq $2 * 8, %%rsp\r\n"    /* skip orig_rax and rsp */
                      "popq %%r15\r\n"
                      "popq %%r14\r\n"
                      "popq %%r13\r\n"
@@ -1287,8 +1282,8 @@ void restore_context (struct shim_context * context)
                      "popq %%rbx\r\n"
                      "popq %%rbp\r\n"
                      "popfq\r\n"
-                     "popq %%rsp\r\n"
+                     "movq "XSTRINGIFY(SHIM_REGS_RSP)" - "XSTRINGIFY(SHIM_REGS_RIP)"(%%rsp), %%rsp\r\n"
                      "movq $0, %%rax\r\n"
-                     "jmp *-128-8(%%rsp)\r\n"
+                     "jmp *-"XSTRINGIFY(RED_ZONE_SIZE)"-8(%%rsp)\r\n"
                      :: "g"(&regs) : "memory");
 }

+ 2 - 2
LibOS/shim/src/shim_init.c

@@ -799,7 +799,7 @@ restore:
     shim_tcb_t * cur_tcb = shim_get_tls();
     struct shim_thread * cur_thread = (struct shim_thread *) cur_tcb->tp;
 
-    if (cur_tcb->context.sp)
+    if (cur_tcb->context.regs && cur_tcb->context.regs->rsp)
         restore_context(&cur_tcb->context);
 
     if (cur_thread->exec)
@@ -1121,7 +1121,7 @@ int shim_clean (int err)
 
 #ifdef PROFILE
     if (ENTER_TIME) {
-        switch (shim_get_tls()->context.syscall_nr) {
+        switch (shim_get_tls()->context.orig_rax) {
             case __NR_exit_group:
                 SAVE_PROFILE_INTERVAL_SINCE(syscall_exit_group, ENTER_TIME);
                 break;

+ 19 - 14
LibOS/shim/src/sys/shim_clone.c

@@ -53,12 +53,17 @@ void __attribute__((weak)) syscall_wrapper_after_syscalldb(void)
  * child thread can _not_ use parent stack. So return right after syscall
  * instruction as if syscall_wrapper is executed.
  */
-static void fixup_child_context(struct shim_context * context)
+static void fixup_child_context(struct shim_regs * regs)
 {
-    if (context->ret_ip == &syscall_wrapper_after_syscalldb) {
-        context->sp += RED_ZONE_SIZE;
-        context->regs->rflags = context->regs->r11;
-        context->ret_ip = (void*)context->regs->rcx;
+    if (regs->rip == (unsigned long)&syscall_wrapper_after_syscalldb) {
+        /*
+         * we don't need to emulate stack pointer change because %rsp is
+         * initialized to new child user stack passed to clone() system call.
+         * See the caller of fixup_child_context().
+         */
+        /* regs->rsp += RED_ZONE_SIZE; */
+        regs->rflags = regs->r11;
+        regs->rip = regs->rcx;
     }
 }
 
@@ -142,7 +147,6 @@ int clone_implementation_wrapper(struct clone_args * arg)
     }
 
     void * stack = arg->stack;
-    void * return_pc = arg->return_pc;
 
     struct shim_vma_val vma;
     lookup_vma(ALIGN_DOWN(stack), &vma);
@@ -161,13 +165,12 @@ int clone_implementation_wrapper(struct clone_args * arg)
     //user_stack_addr[0] ==> user provided function address
     //user_stack_addr[1] ==> arguments to user provided function.
 
-    debug("child swapping stack to %p return %p: %d\n",
-          stack, return_pc, my_thread->tid);
+    debug("child swapping stack to %p return 0x%lx: %d\n",
+          stack, regs.rip, my_thread->tid);
 
     tcb->context.regs = &regs;
-    tcb->context.sp = stack;
-    tcb->context.ret_ip = return_pc;
-    fixup_child_context(&tcb->context);
+    fixup_child_context(tcb->context.regs);
+    tcb->context.regs->rsp = (unsigned long)stack;
 
     restore_context(&tcb->context);
     return 0;
@@ -319,6 +322,7 @@ int shim_do_clone (int flags, void * user_stack_addr, int * parent_tidptr,
     if (!(flags & CLONE_VM)) {
         __libc_tcb_t * tcb;
         shim_tcb_t * old_shim_tcb = NULL;
+        void * parent_stack = NULL;
 
         if (thread->tcb) {
             tcb = thread->tcb;
@@ -334,8 +338,8 @@ int shim_do_clone (int flags, void * user_stack_addr, int * parent_tidptr,
             lookup_vma(ALIGN_DOWN(user_stack_addr), &vma);
             thread->stack_top = vma.addr + vma.length;
             thread->stack_red = thread->stack = vma.addr;
-            tcb->shim_tcb.context.sp = user_stack_addr;
-            tcb->shim_tcb.context.ret_ip = shim_get_tls()->context.ret_ip;
+            parent_stack = (void *)tcb->shim_tcb.context.regs->rsp;
+            tcb->shim_tcb.context.regs->rsp = (unsigned long)user_stack_addr;
         }
 
         thread->is_alive = true;
@@ -346,6 +350,8 @@ int shim_do_clone (int flags, void * user_stack_addr, int * parent_tidptr,
         ret = do_migrate_process(&migrate_fork, NULL, NULL, thread);
         if (old_shim_tcb)
             memcpy(&tcb->shim_tcb, old_shim_tcb, sizeof(tcb->shim_tcb));
+        if (parent_stack)
+            tcb->shim_tcb.context.regs->rsp = (unsigned long)parent_stack;
         if (ret < 0)
             goto failed;
 
@@ -384,7 +390,6 @@ int shim_do_clone (int flags, void * user_stack_addr, int * parent_tidptr,
     new_args.thread    = thread;
     new_args.parent    = self;
     new_args.stack     = user_stack_addr;
-    new_args.return_pc = shim_get_tls()->context.ret_ip;
 
     // Invoke DkThreadCreate to spawn off a child process using the actual
     // "clone" system call. DkThreadCreate allocates a stack for the child

+ 1 - 1
LibOS/shim/src/sys/shim_sigaction.c

@@ -162,7 +162,7 @@ int shim_do_sigaltstack (const stack_t * ss, stack_t * oss)
     if (oss)
         *oss = *cur_ss;
 
-    void * sp = shim_get_tls()->context.sp;
+    void * sp = (void *)shim_get_tls()->context.regs->rsp;
     /* check if thread is currently executing on an active altstack */
     if (!(cur_ss->ss_flags & SS_DISABLE) &&
         sp &&

+ 6 - 10
LibOS/shim/src/syscallas.S

@@ -59,11 +59,14 @@ syscalldb:
         pushq %r13
         pushq %r14
         pushq %r15
+        leaq SHIM_REGS_SIZE - SHIM_REGS_R15(%rsp), %rbx
+        pushq %rbx
+        pushq %rax
         # shim_regs struct ends here.
 
         movq %rsp, %rbp
-        .cfi_def_cfa_offset SHIM_REGS_SIZE+8  # +8 for ret_addr
-        .cfi_offset %rbp, -3 * 8    # saved_rbp is at CFA-24 (ret + saved_rflags + saved_rbp)
+        .cfi_def_cfa_offset SHIM_REGS_SIZE
+        .cfi_offset %rbp, -3 * 8    # saved_rbp is at CFA-24 (saved_rflags + saved_rbp)
         .cfi_def_cfa_register %rbp  # %rbp
 
         cmp $LIBOS_SYSCALL_BOUND, %rax
@@ -74,11 +77,6 @@ syscalldb:
         cmp $0, %rbx
         je isundef
 
-        movq %rax, %fs:(SHIM_TCB_OFFSET + TCB_SYSCALL_NR)
-        leaq SHIM_REGS_SIZE+8(%rbp), %rax
-        movq %rax, %fs:(SHIM_TCB_OFFSET + TCB_SP)
-        movq SHIM_REGS_SIZE(%rbp), %rax
-        movq %rax, %fs:(SHIM_TCB_OFFSET + TCB_RET_IP)
         movq %rbp, %fs:(SHIM_TCB_OFFSET + TCB_REGS)
 
         /* Translating x86_64 kernel calling convention to user-space
@@ -87,13 +85,11 @@ syscalldb:
         andq $~0xF, %rsp  # Required by System V AMD64 ABI.
         call *%rbx
 
-        movq $0, %fs:(SHIM_TCB_OFFSET + TCB_SYSCALL_NR)
-        movq $0, %fs:(SHIM_TCB_OFFSET + TCB_SP)
-        movq $0, %fs:(SHIM_TCB_OFFSET + TCB_RET_IP)
         movq $0, %fs:(SHIM_TCB_OFFSET + TCB_REGS)
 
 ret:
         movq %rbp, %rsp
+        addq $2 * 8, %rsp   # skip orig_rax and rsp
         popq %r15
         popq %r14
         popq %r13