Переглянути джерело

Align stack to 16 when calling C from asm

Michał Kowalczyk 5 роки тому
батько
коміт
d7b3d6074a

+ 1 - 1
LibOS/shim/include/shim_table.h

@@ -394,7 +394,7 @@ int shim_do_clone (int flags, void * user_stack_addr, int * parent_tidptr,
 int shim_do_fork (void);
 int shim_do_vfork (void);
 int shim_do_execve (const char * file, const char ** argv, const char ** envp);
-int shim_do_exit (int error_code);
+__attribute__((noreturn)) int shim_do_exit (int error_code);
 pid_t shim_do_wait4 (pid_t pid, int * stat_addr, int option,
                      struct __kernel_rusage * ru);
 int shim_do_kill (pid_t pid, int sig);

+ 3 - 3
LibOS/shim/include/shim_utils.h

@@ -213,9 +213,9 @@ int check_elf_object (struct shim_handle * file);
 int load_elf_object (struct shim_handle * file, void * addr, size_t mapped);
 int load_elf_interp (struct shim_handle * exec);
 int free_elf_interp (void);
-int execute_elf_object (struct shim_handle * exec,
-                        int * argcp, const char ** argp,
-                        int nauxv, elf_auxv_t * auxp);
+void execute_elf_object (struct shim_handle * exec,
+                         int * argcp, const char ** argp,
+                         int nauxv, elf_auxv_t * auxp);
 int remove_loaded_libraries (void);
 
 /* gdb debugging support */

+ 4 - 5
LibOS/shim/src/elf/shim_rtld.c

@@ -1558,12 +1558,13 @@ int register_library (const char * name, unsigned long load_address)
     return 0;
 }
 
-int execute_elf_object (struct shim_handle * exec,
-                        int * argcp, const char ** argp,
-                        int nauxv, ElfW(auxv_t) * auxp)
+void execute_elf_object (struct shim_handle * exec,
+                         int * argcp, const char ** argp,
+                         int nauxv, ElfW(auxv_t) * auxp)
 {
     struct link_map * exec_map = __search_map_by_handle(exec);
     assert(exec_map);
+    assert((uintptr_t)argcp % 16 == 0);  // Stack should be aligned to 16 on entry point.
     assert((void*)argcp + sizeof(long) == argp || argp == NULL);
 
     auxp[0].a_type = AT_PHDR;
@@ -1597,8 +1598,6 @@ int execute_elf_object (struct shim_handle * exec,
 #else
 # error "architecture not supported"
 #endif
-    shim_do_exit(0);
-    return 0;
 }
 
 BEGIN_CP_FUNC(library)

+ 1 - 0
LibOS/shim/src/generated-offsets.c

@@ -12,5 +12,6 @@ void dummy(void)
     OFFSET_T(TCB_SP, shim_tcb_t, context.sp);
     OFFSET_T(TCB_RET_IP, shim_tcb_t, context.ret_ip);
     OFFSET_T(TCB_REGS, shim_tcb_t, context.regs);
+    DEFINE(SHIM_REGS_SIZE, sizeof(struct shim_regs));
 }
 

+ 8 - 17
LibOS/shim/src/shim_init.c

@@ -24,6 +24,7 @@
  */
 
 #include <shim_internal.h>
+#include <shim_table.h>
 #include <shim_tls.h>
 #include <shim_thread.h>
 #include <shim_handle.h>
@@ -172,7 +173,6 @@ void * migrated_memory_start;
 void * migrated_memory_end;
 void * migrated_shim_addr;
 
-void * initial_stack;
 const char ** initial_envp __attribute_migratable;
 
 char ** library_paths;
@@ -272,6 +272,8 @@ void * allocate_stack (size_t size, size_t protect_size, bool user)
     INC_PROFILE_OCCURENCE(alloc_stack_count);
 
     stack += protect_size;
+    // Ensure proper alignment for process' initial stack pointer value.
+    stack += (16 - (uintptr_t)stack % 16) % 16;
     DkVirtualMemoryProtect(stack, size, PAL_PROT_READ|PAL_PROT_WRITE);
 
     if (bkeep_mprotect(stack, size, PROT_READ|PROT_WRITE, flags) < 0)
@@ -533,7 +535,7 @@ struct shim_profile profile_root;
     } while (0)
 
 
-static void * __process_auxv (elf_auxv_t * auxp)
+static elf_auxv_t* __process_auxv (elf_auxv_t * auxp)
 {
     elf_auxv_t * av;
 
@@ -545,13 +547,6 @@ static void * __process_auxv (elf_auxv_t * auxp)
     return av + 1;
 }
 
-#define FIND_LAST_STACK(stack)                          \
-    do {                                                \
-        /* check if exist a NULL end */                 \
-        assert(*(uint64_t *) stack == 0);               \
-        stack += sizeof(uint64_t);                      \
-    } while (0)
-
 #ifdef PROFILE
 static void set_profile_enabled (const char ** envp)
 {
@@ -675,7 +670,7 @@ DEFINE_PROFILE_INTERVAL(init_signal,                init);
 
 extern PAL_HANDLE thread_start_event;
 
-int shim_init (int argc, void * args, void ** return_stack)
+__attribute__((noreturn)) void* shim_init (int argc, void * args)
 {
     debug_handle = PAL_CB(debug_stream);
     cur_process.vmid = (IDTYPE) PAL_CB(process_id);
@@ -709,9 +704,7 @@ int shim_init (int argc, void * args, void ** return_stack)
 
     /* call to figure out where the arguments are */
     FIND_ARG_COMPONENTS(args, argc, argv, envp, auxp);
-    initial_stack = __process_auxv(auxp);
-    int nauxv = (elf_auxv_t *) initial_stack - auxp;
-    FIND_LAST_STACK(initial_stack);
+    int nauxv = __process_auxv(auxp) - auxp;
 
 #ifdef PROFILE
     set_profile_enabled(envp);
@@ -789,7 +782,7 @@ restore:
         if (!DkStreamWrite(PAL_CB(parent_process), 0,
                            sizeof(struct newproc_response),
                            &res, NULL))
-            return -PAL_ERRNO;
+            shim_do_exit(-PAL_ERRNO);
     }
 
     debug("shim process initialized\n");
@@ -827,9 +820,7 @@ restore:
     if (cur_thread->exec)
         execute_elf_object(cur_thread->exec,
                            argcp, argp, nauxv, auxp);
-
-    *return_stack = initial_stack;
-    return 0;
+    shim_do_exit(0);
 }
 
 static int create_unique (int (*mkname) (char *, size_t, void *),

+ 8 - 25
LibOS/shim/src/start.S

@@ -27,35 +27,18 @@
 shim_start:
     .cfi_startproc
 
-/* Clear the frame pointer.  The ABI suggests this be done, to mark
-   the outermost frame obviously.  */
+    # Clear the frame pointer.  The ABI suggests this be done, to mark
+    # the outermost frame obviously.
     xorq %rbp, %rbp
-    movq %rsp, %rbp
 
-/* Extract the arguments as encoded on the stack and set up
-   the arguments for shim_init (int, void *, void **),
-   The arguments are passed via registers and on the stack:
-   argc:         %rdi
-   argv:         %rsi
-   stack:        %rdx
-*/
+    # Arguments for shim_init:
+    movq 0(%rsp), %rdi  # argc
+    leaq 8(%rsp), %rsi  # args
 
-    /* Align the stack to a 16 byte boundary to follow the ABI.  */
-    andq  $~15, %rsp
-
-    movq %rdi, %rcx         /* Possibly the stack has to be switched */
-    movq 0(%rbp), %rdi      /* Pop the argument count.  */
-    leaq 8(%rbp), %rsi      /* argv starts just at the current stack top.  */
-
-/* Provide the highest stack address to the user code (for stacks
-   which grow downwards).  */
-
-    pushq %rbp
-    movq %rsp, %rdx
+    # Required by System V AMD64 ABI.
+    andq  $~0xF, %rsp
 
     callq *shim_init@GOTPCREL(%rip)
 
-    popq %rbp
-    leaveq
-    retq
+    # TODO: Call initial %rdi to execute atexit callbacks.
     .cfi_endproc

+ 1 - 2
LibOS/shim/src/sys/shim_exit.c

@@ -196,7 +196,7 @@ int shim_do_exit_group (int error_code)
     return 0;
 }
 
-int shim_do_exit (int error_code)
+__attribute__((noreturn)) int shim_do_exit (int error_code)
 {
     INC_PROFILE_OCCURENCE(syscall_use_ipc);
     struct shim_thread * cur_thread = get_cur_thread();
@@ -219,5 +219,4 @@ int shim_do_exit (int error_code)
 #endif
 
     DkThreadExit();
-    return 0;
 }

+ 23 - 22
LibOS/shim/src/syscallas.S

@@ -33,24 +33,9 @@
 syscalldb:
         .cfi_startproc
 
-        # DEP 7/9/12: Push a stack pointer so clone can find the return address
+        # Create shim_regs struct on the stack.
         pushq %rbp
-        .cfi_def_cfa_offset 16
-        movq %rsp, %rbp
-        .cfi_offset 6,-16
-        .cfi_def_cfa_register 6
-
         pushq %rbx
-
-        cmp $LIBOS_SYSCALL_BOUND, %rax
-        jae isundef
-
-        movq shim_table@GOTPCREL(%rip), %rbx
-        movq (%rbx,%rax,8), %rbx
-        cmp $0, %rbx
-        je isundef
-
-isdef:
         pushq %rdi
         pushq %rsi
         pushq %rdx
@@ -63,17 +48,32 @@ isdef:
         pushq %r13
         pushq %r14
         pushq %r15
+        # shim_regs struct ends here.
+
+        movq %rsp, %rbp
+        .cfi_def_cfa_offset SHIM_REGS_SIZE+8  # +8 for ret_addr
+        .cfi_offset 6,-16        # saved_rbp is at CFA-16 (ret + saved_rbp)
+        .cfi_def_cfa_register 6  # %rbp
+
+        cmp $LIBOS_SYSCALL_BOUND, %rax
+        jae isundef
+
+        movq shim_table@GOTPCREL(%rip), %rbx
+        movq (%rbx,%rax,8), %rbx
+        cmp $0, %rbx
+        je isundef
 
         movq %rax, %fs:(SHIM_TCB_OFFSET + TCB_SYSCALL_NR)
-        leaq 16(%rbp), %rax
+        leaq SHIM_REGS_SIZE+8(%rbp), %rax
         movq %rax, %fs:(SHIM_TCB_OFFSET + TCB_SP)
-        movq 8(%rbp), %rax
+        movq SHIM_REGS_SIZE(%rbp), %rax
         movq %rax, %fs:(SHIM_TCB_OFFSET + TCB_RET_IP)
-        movq %rsp, %fs:(SHIM_TCB_OFFSET + TCB_REGS)
+        movq %rbp, %fs:(SHIM_TCB_OFFSET + TCB_REGS)
 
         /* Translating x86_64 kernel calling convention to user-space
          * calling convention */
         movq %r10, %rcx
+        andq $~0xF, %rsp  # Required by System V AMD64 ABI.
         call *%rbx
 
         movq $0, %fs:(SHIM_TCB_OFFSET + TCB_SYSCALL_NR)
@@ -81,6 +81,8 @@ isdef:
         movq $0, %fs:(SHIM_TCB_OFFSET + TCB_RET_IP)
         movq $0, %fs:(SHIM_TCB_OFFSET + TCB_REGS)
 
+ret:
+        movq %rbp, %rsp
         popq %r15
         popq %r14
         popq %r13
@@ -93,8 +95,6 @@ isdef:
         popq %rdx
         popq %rsi
         popq %rdi
-
-ret:
         popq %rbx
         popq %rbp
         retq
@@ -102,9 +102,10 @@ ret:
 isundef:
 #ifdef DEBUG
         mov %rax, %rdi
+        andq $~0xF, %rsp  # Required by System V AMD64 ABI.
         call *debug_unsupp@GOTPCREL(%rip)
 #endif
-        movq $-38, %rax
+        movq $-38, %rax  # ENOSYS
         jmp ret
 
         .cfi_endproc

+ 2 - 1
Pal/src/db_threading.c

@@ -79,11 +79,12 @@ void DkThreadYieldExecution (void)
 
 /* PAL call DkThreadExit: simply exit the current thread
    no matter what */
-void DkThreadExit (void)
+__attribute__((noreturn)) void DkThreadExit (void)
 {
     ENTER_PAL_CALL(DkThreadExit);
     _DkThreadExit();
     _DkRaiseFailure(PAL_ERROR_NOTKILLABLE);
+    while (1) {}
     LEAVE_PAL_CALL();
 }
 

+ 8 - 0
Pal/src/host/Linux-SGX/sgx_entry.S

@@ -47,8 +47,16 @@ sgx_entry:
 	leaq ocall_table(%rip), %rbx
 	movq (%rbx,%rdi,8), %rbx
 	movq %rsi, %rdi
+
+	pushq %rbp
+	movq %rsp, %rbp
+	andq $~0xF, %rsp  # Required by System V AMD64 ABI.
+
 	callq *%rbx
 
+	movq %rbp, %rsp
+	popq %rbp
+
 	movq %rax, %rdi
 	# Not interrupted
 	xorq %rsi, %rsi

+ 1 - 1
Pal/src/pal.h

@@ -398,7 +398,7 @@ DkThreadDelayExecution (PAL_NUM duration);
 void
 DkThreadYieldExecution (void);
 
-void
+__attribute__((noreturn)) void
 DkThreadExit (void);
 
 PAL_BOL

+ 1 - 0
Pal/src/security/Linux/main.c

@@ -427,6 +427,7 @@ __asm__ (".global start\r\n"
    messed up by function calls */
 __asm__ ("start:\r\n"
      "  movq %rsp, %rdi\r\n"
+     "  andq $~0xF, %rsp\r\n"
      "  call do_main\r\n");
 
 void do_main (void * args)

+ 3 - 2
Pal/src/user_start.S

@@ -31,9 +31,10 @@ _start:
 	mov %rsp, %rsi
 
 	/* Align the stack to a 16 byte boundary to follow the ABI.  */
-	and  $~15, %rsp
+	and  $~0xF, %rsp
 
-	/* push the exit address on the stack */
+	/* push the exit address on the stack and preserve %rsp alignment */
+	subq $8, %rsp
 	pushq %rdx
 
 	/* find the environs */