Prechádzať zdrojové kódy

[Pal/Linux-SGX] Use dedicated signal stack with correct XSAVE area

Previously, Linux-SGX PAL re-used the same normal stack for signal
handling as the application, i.e., signal stack was emulated on normal
stack in Linux-SGX (note that Linux PAL already has correct signal-
stack emulation). This could lead to catastrophic failures if normal
stack gets corrupted or overflows. This also precluded implementations
of user-level threading.

This commit creates a dedicated signal stack so that signal handling
does not happen on normal stack. In particular, signal-stack enclave
pages are added during enclave creation, XSAVE area is correctly
saved-restored during signal handling, and FP registers are copied
to and from signal-handler context. The XSAVE area is also reset
to default value before EEXIT to prevent data leaks.

Since this commit introduces signal stack, there is no need for the
old and brittle "PAL frame unwinding" mechanism. It is removed.
Isaku Yamahata 6 rokov pred
rodič
commit
0219349264

+ 133 - 127
Pal/src/host/Linux-SGX/db_exception.c

@@ -39,30 +39,25 @@
 typedef struct exception_event {
     PAL_IDX             event_num;
     PAL_CONTEXT *       context;
-    struct pal_frame *  frame;
 } PAL_EVENT;
 
 static void _DkGenericEventTrigger (PAL_IDX event_num, PAL_EVENT_HANDLER upcall,
-                                    PAL_NUM arg, struct pal_frame * frame,
-                                    PAL_CONTEXT * context)
+                                    PAL_NUM arg, PAL_CONTEXT* context)
 {
     struct exception_event event;
 
     event.event_num = event_num;
     event.context = context;
-    event.frame = frame;
 
     (*upcall) ((PAL_PTR) &event, arg, context);
 }
 
 static bool
-_DkGenericSignalHandle (int event_num, PAL_NUM arg, struct pal_frame * frame,
-                        PAL_CONTEXT * context)
-{
+_DkGenericSignalHandle(int event_num, PAL_NUM arg, PAL_CONTEXT* context) {
     PAL_EVENT_HANDLER upcall = _DkGetExceptionHandler(event_num);
 
     if (upcall) {
-        _DkGenericEventTrigger(event_num, upcall, arg, frame, context);
+        _DkGenericEventTrigger(event_num, upcall, arg, context);
         return true;
     }
 
@@ -72,44 +67,92 @@ _DkGenericSignalHandle (int event_num, PAL_NUM arg, struct pal_frame * frame,
 #define ADDR_IN_PAL(addr)  \
         ((void*)(addr) > TEXT_START && (void*)(addr) < TEXT_END)
 
-static struct pal_frame * get_frame (sgx_cpu_context_t * uc)
-{
-    unsigned long rbp;
-
-    if (uc) {
-        unsigned long rip = uc->rip;
-        rbp = uc->rbp;
-
-        if (!ADDR_IN_PAL(rip))
-            return NULL;
-    } else {
-        __asm__ volatile ("movq %%rbp, %0" : "=r"(rbp) :: "memory");
-    }
-
-    while (ADDR_IN_PAL(((unsigned long *) rbp)[1]))
-        rbp = *(unsigned long *) rbp;
-
-    struct pal_frame * frame = (struct pal_frame *) rbp - 1;
-
-    for (int i = 0 ; i < 8 ; i++) {
-        if (frame->identifier == PAL_FRAME_IDENTIFIER)
-            return frame;
-
-        frame = (struct pal_frame *) ((void *) frame - 8);
-    }
-
-    return NULL;
-}
-
 /*
  * Restore an sgx_cpu_context_t as generated by .Lhandle_exception. Execution will
  * continue as specified by the rip in the context.
  */
-noreturn static void restore_sgx_context(sgx_cpu_context_t* uc) {
-    SGX_DBG(DBG_E, "uc %p rsp 0x%08lx &rsp: %p rip 0x%08lx &rip: %p\n",
-            uc, uc->rsp, &uc->rsp, uc->rip, &uc->rip);
+noreturn static void restore_sgx_context(sgx_cpu_context_t* uc,
+                                         PAL_XREGS_STATE* xregs_state) {
+    SGX_DBG(DBG_E, "uc %p rsp 0x%08lx &rsp %p rip 0x%08lx +0x%08lx &rip %p\n",
+            uc, uc->rsp, &uc->rsp, uc->rip, uc->rip - (uintptr_t)TEXT_START, &uc->rip);
+
+    if (xregs_state == NULL)
+        xregs_state = (PAL_XREGS_STATE*)xsave_reset_state;
+    _restore_sgx_context(uc, xregs_state);
+}
+
+noreturn static void restore_pal_context(sgx_cpu_context_t* uc, PAL_CONTEXT* ctx) {
+    uc->rax = ctx->rax;
+    uc->rbx = ctx->rbx;
+    uc->rcx = ctx->rcx;
+    uc->rdx = ctx->rdx;
+    uc->rsp = ctx->rsp;
+    uc->rbp = ctx->rbp;
+    uc->rsi = ctx->rsi;
+    uc->rdi = ctx->rdi;
+    uc->r8  = ctx->r8;
+    uc->r9  = ctx->r9;
+    uc->r10 = ctx->r10;
+    uc->r11 = ctx->r11;
+    uc->r12 = ctx->r12;
+    uc->r13 = ctx->r13;
+    uc->r14 = ctx->r14;
+    uc->r15 = ctx->r15;
+    uc->rflags = ctx->efl;
+    uc->rip = ctx->rip;
+
+    restore_sgx_context(uc, ctx->fpregs);
+}
 
-    _restore_sgx_context(uc);
+static void save_pal_context(PAL_CONTEXT* ctx, sgx_cpu_context_t* uc,
+                             PAL_XREGS_STATE* xregs_state) {
+    memset(ctx, 0, sizeof(*ctx));
+
+    ctx->rax = uc->rax;
+    ctx->rbx = uc->rbx;
+    ctx->rcx = uc->rcx;
+    ctx->rdx = uc->rdx;
+    ctx->rsp = uc->rsp;
+    ctx->rbp = uc->rbp;
+    ctx->rsi = uc->rsi;
+    ctx->rdi = uc->rdi;
+    ctx->r8  = uc->r8;
+    ctx->r9  = uc->r9;
+    ctx->r10 = uc->r10;
+    ctx->r11 = uc->r11;
+    ctx->r12 = uc->r12;
+    ctx->r13 = uc->r13;
+    ctx->r14 = uc->r14;
+    ctx->r15 = uc->r15;
+    ctx->efl = uc->rflags;
+    ctx->rip = uc->rip;
+    union pal_csgsfs csgsfs = {
+        .cs = 0x33, // __USER_CS(5) | 0(GDT) | 3(RPL)
+        .fs = 0,
+        .gs = 0,
+        .ss = 0x2b, // __USER_DS(6) | 0(GDT) | 3(RPL)
+    };
+    ctx->csgsfs = csgsfs.csgsfs;
+
+    assert(xregs_state);
+    ctx->fpregs = xregs_state;
+
+    /* Emulate format for fp registers Linux sets up as signal frame.
+     * https://elixir.bootlin.com/linux/v5.4.13/source/arch/x86/kernel/fpu/signal.c#L86
+     * https://elixir.bootlin.com/linux/v5.4.13/source/arch/x86/kernel/fpu/signal.c#L459
+     */
+    PAL_FPX_SW_BYTES* fpx_sw = &xregs_state->fpstate.sw_reserved;
+    fpx_sw->magic1 = PAL_FP_XSTATE_MAGIC1;
+    fpx_sw->extended_size = xsave_size;
+    fpx_sw->xfeatures = xsave_features;
+    memset(fpx_sw->padding, 0, sizeof(fpx_sw->padding));
+    if (xsave_enabled) {
+        fpx_sw->xstate_size = xsave_size + PAL_FP_XSTATE_MAGIC2_SIZE;
+        *(__typeof__(PAL_FP_XSTATE_MAGIC2)*)((void*)xregs_state + xsave_size) =
+            PAL_FP_XSTATE_MAGIC2;
+    } else {
+        fpx_sw->xstate_size = xsave_size;
+    }
 }
 
 /*
@@ -151,8 +194,10 @@ static bool handle_ud(sgx_cpu_context_t * uc)
     return false;
 }
 
-void _DkExceptionHandler (unsigned int exit_info, sgx_cpu_context_t * uc)
-{
+void _DkExceptionHandler(
+    unsigned int exit_info, sgx_cpu_context_t* uc, PAL_XREGS_STATE* xregs_state) {
+    assert(IS_ALIGNED_PTR(xregs_state, PAL_XSTATE_ALIGN));
+
     union {
         sgx_arch_exit_info_t info;
         unsigned int intval;
@@ -169,8 +214,8 @@ void _DkExceptionHandler (unsigned int exit_info, sgx_cpu_context_t * uc)
             break;
         case SGX_EXCEPTION_VECTOR_UD:
             if (handle_ud(uc)) {
-                restore_sgx_context(uc);
-                return;
+                restore_sgx_context(uc, xregs_state);
+                /* NOTREACHED */
             }
             event_num = PAL_EVENT_ILLEGAL;
             break;
@@ -185,8 +230,8 @@ void _DkExceptionHandler (unsigned int exit_info, sgx_cpu_context_t * uc)
         case SGX_EXCEPTION_VECTOR_DB:
         case SGX_EXCEPTION_VECTOR_BP:
         default:
-            restore_sgx_context(uc);
-            return;
+            restore_sgx_context(uc, xregs_state);
+            /* NOTREACHED */
         }
     }
 
@@ -219,25 +264,13 @@ void _DkExceptionHandler (unsigned int exit_info, sgx_cpu_context_t * uc)
     }
 
     PAL_CONTEXT ctx;
-    memset(&ctx, 0, sizeof(ctx));
-    ctx.rax = uc->rax;
-    ctx.rbx = uc->rbx;
-    ctx.rcx = uc->rcx;
-    ctx.rdx = uc->rdx;
-    ctx.rsp = uc->rsp;
-    ctx.rbp = uc->rbp;
-    ctx.rsi = uc->rsi;
-    ctx.rdi = uc->rdi;
-    ctx.r8  = uc->r8;
-    ctx.r9  = uc->r9;
-    ctx.r10 = uc->r10;
-    ctx.r11 = uc->r11;
-    ctx.r12 = uc->r12;
-    ctx.r13 = uc->r13;
-    ctx.r14 = uc->r14;
-    ctx.r15 = uc->r15;
-    ctx.efl = uc->rflags;
-    ctx.rip = uc->rip;
+    save_pal_context(&ctx, uc, xregs_state);
+
+    /* TODO: save EXINFO from MISC region and populate below fields */
+    ctx.err = 0;
+    ctx.trapno = ei.info.valid ? ei.info.vector : event_num;
+    ctx.oldmask = 0;
+    ctx.cr2 = 0;
 
     PAL_NUM arg = 0;
     switch (event_num) {
@@ -254,8 +287,8 @@ void _DkExceptionHandler (unsigned int exit_info, sgx_cpu_context_t * uc)
         /* nothing */
         break;
     }
-    _DkGenericSignalHandle(event_num, arg, NULL, &ctx);
-    restore_sgx_context(uc);
+    _DkGenericSignalHandle(event_num, arg, &ctx);
+    restore_pal_context(uc, &ctx);
 }
 
 void _DkRaiseFailure (int error)
@@ -268,78 +301,51 @@ void _DkRaiseFailure (int error)
     PAL_EVENT event;
     event.event_num = PAL_EVENT_FAILURE;
     event.context   = NULL;
-    event.frame     = NULL;
 
     (*upcall) ((PAL_PTR) &event, error, NULL);
 }
 
-void _DkExceptionReturn (void * event)
-{
-    PAL_EVENT * e = event;
-    PAL_CONTEXT * ctx = e->context;
+void _DkExceptionReturn(void* event) {
+    PAL_EVENT* e = event;
+    PAL_CONTEXT* ctx = e->context;
 
     if (!ctx) {
-        struct pal_frame * frame = e->frame;
-        if (!frame)
-            return;
-
-        __clear_frame(frame);
-        arch_restore_frame(&frame->arch);
-
-        __asm__ volatile (
-                      "xor %%rax, %%rax\r\n"
-                      "leaveq\r\n"
-                      "retq\r\n" ::: "memory");
+        return;
     }
 
-    // Allocate sgx_cpu_context_t just below the "normal" stack (honoring the red
-    // zone) and then copy the content of ctx there. This is needed by
-    // restore_sgx_context.
-    sgx_cpu_context_t * uc = (void *)ctx->rsp - sizeof(sgx_cpu_context_t) - RED_ZONE_SIZE;
-    uc->rax = ctx->rax;
-    uc->rbx = ctx->rbx;
-    uc->rcx = ctx->rcx;
-    uc->rdx = ctx->rdx;
-    uc->rsp = ctx->rsp;
-    uc->rbp = ctx->rbp;
-    uc->rsi = ctx->rsi;
-    uc->rdi = ctx->rdi;
-    uc->r8  = ctx->r8;
-    uc->r9  = ctx->r9;
-    uc->r10 = ctx->r10;
-    uc->r11 = ctx->r11;
-    uc->r12 = ctx->r12;
-    uc->r13 = ctx->r13;
-    uc->r14 = ctx->r14;
-    uc->r15 = ctx->r15;
-    uc->rflags = ctx->efl;
-    uc->rip = ctx->rip;
-
-    restore_sgx_context(uc);
+    sgx_cpu_context_t uc;
+    restore_pal_context(&uc, ctx);
 }
 
-void _DkHandleExternalEvent (PAL_NUM event, sgx_cpu_context_t * uc)
-{
-    struct pal_frame * frame = get_frame(uc);
-
-    if (event == PAL_EVENT_RESUME && frame &&
-        (frame->func == DkSynchronizationObjectWait || frame->func == DkStreamsWaitEvents))
-        return;
-
-    if (!frame) {
-        frame = __alloca(sizeof(struct pal_frame));
-        frame->identifier = PAL_FRAME_IDENTIFIER;
-        frame->func = &_DkHandleExternalEvent;
-        frame->funcname = "_DkHandleExternalEvent";
-        arch_store_frame(&frame->arch);
-    }
+noreturn void _DkHandleExternalEvent(
+    PAL_NUM event, sgx_cpu_context_t* uc, PAL_XREGS_STATE* xregs_state) {
+    assert(event);
+    assert(IS_ALIGNED_PTR(xregs_state, PAL_XSTATE_ALIGN));
 
     /* We only end up in _DkHandleExternalEvent() if interrupted during
-     * host syscall; Dk* function will be unwound, so we must inform LibOS
-     * layer that PAL was interrupted (by setting PAL_ERRNO). */
+     * host syscall; Inform LibOS layer that PAL was interrupted (by setting PAL_ERRNO). */
     _DkRaiseFailure(PAL_ERROR_INTERRUPTED);
 
-    if (!_DkGenericSignalHandle(event, 0, frame, NULL)
-        && event != PAL_EVENT_RESUME)
+    PAL_CONTEXT ctx;
+    save_pal_context(&ctx, uc, xregs_state);
+    ctx.err = 0;
+    /* TODO: event is a PAL event; is that what LibOS/app wants to see? */
+    ctx.trapno = event;
+    ctx.oldmask = 0;
+    ctx.cr2 = 0;
+
+    if (!_DkGenericSignalHandle(event, 0, &ctx) && event != PAL_EVENT_RESUME) {
         _DkThreadExit(/*clear_child_tid=*/NULL);
+    }
+
+    /*
+     * The modification to PAL_CONTEXT is discarded.
+     * It is assumed that LibOS won't change context (GPRs, fp registers)
+     * if RIP is in PAL.
+     *
+     * TODO: in long term, record the signal and trigger the signal handler
+     * when returning from PAL by the use of
+     * ENTER_PAL_CALL/LEAVE_PAL_CALL/LEAVE_PAL_CALL_RETURN.
+     */
+    restore_sgx_context(uc, xregs_state);
 }

+ 124 - 93
Pal/src/host/Linux-SGX/enclave_entry.S

@@ -10,6 +10,14 @@
 	jmp .Lfail_loop\@
 .endm
 
+.macro CHECK_IF_SIGNAL_STACK_IS_USED stack_reg, label_on_stack, label_out_of_stack
+	cmpq %gs:SGX_SIG_STACK_LOW, \stack_reg
+	jb \label_out_of_stack
+	cmpq %gs:SGX_SIG_STACK_HIGH, \stack_reg
+	ja \label_out_of_stack
+	jmp \label_on_stack
+.endm
+
 	.global enclave_entry
 	.type enclave_entry, @function
 
@@ -102,7 +110,7 @@ enclave_entry:
 	#       (e.g., no clearing of YMM/ZMM regs). This is because we didn't read
 	#       the value of XFRM yet, so we don't know whether XRSTOR is safe at
 	#       this point.
-	leaq .Lxrstor_init_arg(%rip), %rax
+	leaq xsave_reset_state(%rip), %rax
 	fxrstor (%rax)
 	xorq %rax, %rax
 
@@ -398,11 +406,40 @@ enclave_entry:
 .Lemulate_tmp_rip_end:
 
 	movq SGX_GPR_RSP(%rbx), %rsi
-	subq $(SGX_CPU_CONTEXT_SIZE + RED_ZONE_SIZE), %rsi
+
+	CHECK_IF_SIGNAL_STACK_IS_USED %rsi, .Lon_signal_stack, .Lout_of_signal_stack
+
+.Lout_of_signal_stack:
+	movq %gs:SGX_SIG_STACK_HIGH, %rsi
+	# When switching to the not yet used signal stack we don't need to reserve
+	# a redzone. So move the stack pointer up here to undo the move down below.
+	addq $RED_ZONE_SIZE, %rsi
+
+	# Setup stack for the signal handler, _DkExceptionHandler().
+	# _restore_sgx_context() must be used to return back to the
+	# original context.
+	# Stack layout:
+	#     8-bytes padding: (8 mod 16) bytes aligned for x86 ABI
+	#                      NOTE: there is no saved rip to return.
+	#     sgx_cpu_context_t: 144 bytes
+	#     xsave area: PAL_XSTATE_ALIGN=64 bytes aligned
+	#     padding if necessary
+	#     RED_ZONE unless newly switching to signal stack
+#define STACK_PADDING_SIZE (PAL_FP_XSTATE_MAGIC2_SIZE + 8)
+#define STACK_FRAME_SUB \
+	(SGX_CPU_CONTEXT_SIZE + RED_ZONE_SIZE + STACK_PADDING_SIZE)
+.Lon_signal_stack:
+	movl xsave_size(%rip), %eax
+	addq $STACK_FRAME_SUB, %rax
+	subq %rax, %rsi
+
+	# Align xsave area to 64 bytes after sgx_cpu_context_t
+	andq $~(PAL_XSTATE_ALIGN - 1), %rsi
+	subq $SGX_CPU_CONTEXT_XSTATE_ALIGN_SUB, %rsi
 
 	# we have exitinfo in RDI, swap with the one on GPR
 	# and dump into the context
-	xchgq %rdi, SGX_GPR_RDI(%rbx)
+	xchgq %rdi, SGX_GPR_RDI(%rbx) # 1st argument for _DkExceptionHandler()
 	movq %rdi, SGX_CPU_CONTEXT_RDI(%rsi)
 
 	# dump the rest of context
@@ -442,14 +479,16 @@ enclave_entry:
 	movq SGX_GPR_RIP(%rbx), %rdi
 	movq %rdi, SGX_CPU_CONTEXT_RIP(%rsi)
 
-	# Pass pointer to sgx_cpu_context_t to _DkExceptionHandler
-	movq %rsi, SGX_GPR_RSI(%rbx)
+	# Pass pointer to sgx_cpu_context_t and PAL_XREGS_STATE to _DkExceptionHandler
+	movq %rsi, SGX_GPR_RSI(%rbx) # 2nd argument for _DkExceptionHandler()
+	movq %rsi, SGX_GPR_RDX(%rbx)
+	addq $SGX_CPU_CONTEXT_SIZE, SGX_GPR_RDX(%rbx) # 3rd argument for _DkExceptionHandler()
+	# TODO: save EXINFO in MISC region
 
 	# x86-64 sysv abi requires 16B alignment of stack before call instruction
 	# which implies a (8 mod 16)B alignment on function entry (due to implicit
-	# push %rip).
-	# Align the stack for _DkExceptionHandler according to this requirement.
-	andq $STACK_ALIGN, %rsi
+	# push %rip). Since we already aligned xsave area above, this requirement
+	# is satisfied.
 	subq $8, %rsi
 	movq %rsi, SGX_GPR_RSP(%rbx)
 
@@ -461,6 +500,13 @@ enclave_entry:
 	leaq _DkExceptionHandler(%rip), %rdi
 	movq %rdi, SGX_GPR_RIP(%rbx)
 
+	movq %rdx, %rbx
+	leaq SGX_CPU_CONTEXT_SIZE + 8(%rsi), %rdi
+	leaq 1f(%rip), %r11
+	jmp __save_xregs
+1:
+	movq %rbx, %rdx
+
 .Leexit_exception:
 	# clear the registers
 	xorq %rdi, %rdi
@@ -474,6 +520,23 @@ enclave_entry:
 	.type sgx_ocall, @function
 
 sgx_ocall:
+	# arguments:
+	#   RDI: OCALL number (code)
+	#   RSI: OCALL args on untrusted stack (ms)
+	#
+	# sgx_cpu_context_t:
+	#   RAX = 0: place holder
+	#   RCX
+	#   ...
+	#   RFLAGS
+	#   RIP
+	# xsave area
+	#   xregs
+	# (padding)
+	# --- stack may be non-contiguous as we may switch the stack to signal stack
+	# previous RBP
+	# previous RIP: pushed by callq
+
 	.cfi_startproc
 	pushq %rbp
 	.cfi_adjust_cfa_offset 8
@@ -481,8 +544,27 @@ sgx_ocall:
 	.cfi_offset %rbp, -16
 	.cfi_def_cfa_register %rbp
 
+	CHECK_IF_SIGNAL_STACK_IS_USED %rsp, .Lon_signal_stack_ocall, .Lout_of_signal_stack_ocall
+
+.Lout_of_signal_stack_ocall:
+	movq %gs:SGX_SIG_STACK_HIGH, %rsp
+
+.Lon_signal_stack_ocall:
+	movl xsave_size(%rip), %eax
+	addq $STACK_PADDING_SIZE, %rax
+	subq %rax, %rsp
+	andq $~(PAL_XSTATE_ALIGN - 1), %rsp
+
+	pushq %rdx
+	pushq %rdi
+	movq %rsp, %rdi
+	addq $2 * 8, %rdi # adjust pushq %rdx; pushq %rdi above
+	callq save_xregs
+	popq %rdi
+	popq %rdx
+
 	movq 8(%rbp), %rax
-	pushq %rax	# previous RIP
+	pushq %rax # previous RIP
 	pushfq
 
 	# Under GDB, single-stepping sets Trap Flag (TP) of EFLAGS,
@@ -501,24 +583,13 @@ sgx_ocall:
 	pushq %rdi
 	pushq %rsi
 	movq (%rbp), %rax
-	pushq %rax	# previous RBP
+	pushq %rax # previous RBP
 	leaq 16(%rbp), %rax
-	pushq %rax	# previous RSP
+	pushq %rax # previous RSP
 	pushq %rbx
 	pushq %rdx
 	pushq %rcx
-	# no RAX
-
-	movq %rsp, %rbp
-
-	# CFA shifted away from RBP=RSP by the size of GPR context except RAX
-	.cfi_adjust_cfa_offset SGX_CPU_CONTEXT_SIZE - 8
-
-	subq $XSAVE_SIZE,  %rsp
-	andq $XSAVE_ALIGN, %rsp
-	fxsave (%rsp)
-
-	pushq %rbp
+	pushq $0 # placeholder for RAX
 
 	# OCALL_EXIT should never return (see sgx_ocall_exit(): it always exits
 	# the thread). Skip setting SGX_OCALL_PREPARED to land in special-case
@@ -576,25 +647,19 @@ __morestack:
 #endif
 
 	.cfi_startproc
-	# CFA is away from RBP by ret_addr + saved_rbp + GPR context except RAX
-	.cfi_def_cfa %rbp, SGX_CPU_CONTEXT_SIZE - 8 + 16
-	.cfi_offset %rbp, -16
 
 	# Clear "extended" state (FPU aka x87, SSE, AVX, ...).
 
-	leaq .Lxrstor_init_arg(%rip), %rcx
 	# pal_sec.enclave_attributes.xfrm will always be zero before
 	# init_enclave has been called by pal_linux_main. So during early init
 	# nothing should use features not covered by fxrstor, like AVX.
-	movq (pal_sec + PAL_SEC_ENCLAVE_ATTRIBUTES + SGX_ATTRIBUTES_XFRM)(%rip), %rax
-	testq $XSAVE_NON_FX_MASK, %rax
-	je 1f
-	mov $0xffffffff, %edx
-	mov $0xffffffff, %eax
-	xrstor (%rcx)
-	jmp 2f
+
+	movq %rdi, %r10
+	leaq xsave_reset_state(%rip), %rdi
+	leaq 1f(%rip), %r11
+	jmp __restore_xregs
 1:
-	fxrstor (%rcx)
+	movq %r10, %rdi
 2:
 
 	# %rax is argument to EEXIT
@@ -628,31 +693,6 @@ __morestack:
 	ud2 # We should never get here.
 	.cfi_endproc
 
-	# fxsave/xsave area to reset extended state.
-	#
-	# The first 512 B are used by fxrstor. We set FCW = 0x037f and MXCSR =
-	# 0x1f80 and the rest to 0 (same values as xrstor uses in
-	# initialization mode).
-	#
-	# The fxsave area is followed by the 64 B xsave header. We use the
-	# "compact" format (XCOMP_BV[63] = 1). Since the rest of XSTATE_BV and
-	# XCOMP_BV are 0s, xrstor initializes all components (assuming it's
-	# called with RFBM set to all 1s). The fxsave area is ignored (because
-	# we request initialization not restore). And thanks to the compact
-	# format we don't need to provide anything after the header.
-.section .rodata
-	.balign 64
-.Lxrstor_init_arg:
-	.byte 0x7f, 0x03        # FCW
-	.skip 22, 0             # FSW, FTW, FOP, etc: all zero-initialized
-	.byte 0x80, 0x1f, 0, 0  # MXCSR
-	.skip 484, 0            # rest of fxstore area
-
-	.skip 15, 0	 	# XSTATE_BV and XCOMP_BV[55:0]
-	.byte 0x80	 	# XCOMP_BV[63:56] i.e. "compact" format
-	.skip 48, 0	 	# rest of xsave header
-.previous
-
 .Lreturn_from_ocall:
 	# PAL convention:
 	# RDI - return value
@@ -665,7 +705,8 @@ __morestack:
 	movq $0, %gs:SGX_OCALL_PREPARED
 .Lreturn_from_ocall_after_clear_ocall_prepared:
 
-	movq %rdi, %rax
+	# sgx_cpu_context_t::rax = %rdi
+	movq %rdi, SGX_CPU_CONTEXT_RAX(%rsp) # return value
 
 	# restore FSBASE if necessary
 	movq %gs:SGX_FSBASE, %rbx
@@ -674,46 +715,33 @@ __morestack:
 	.byte 0xf3, 0x48, 0x0f, 0xae, 0xd3 /* WRFSBASE %RBX */
 .Lno_fsbase:
 
-	popq %rbp
-	fxrstor (%rsp)
-	movq %rbp, %rsp
-
+	# Check if there was a signal
 	cmpq $0, %rsi
-	je .Lno_external_event
+	jne .Lexternal_event
+	movq %rsp, %rdi # %rdi = sgx_cpu_context_t* uc
+	movq %rsp, %rsi
+	addq $SGX_CPU_CONTEXT_SIZE, %rsi # %rsi = PAL_XREGS_STATE* xregs_state
+	# _restore_sgx_context restores rflags and fp registers. So we don't have to
+	# sanitize them like below.
+	jmp _restore_sgx_context
+	# NOTREACHED
 
+.Lexternal_event:
 	# clear the Alignment Check flag (%rFLAGS.AC) to prevent #AC-fault side channel;
-	# this overrides 8B on enclave stack but these 8B will be overwritten with RAX anyway
 	pushfq
 	andq $(~RFLAGS_AC), (%rsp)
 	popfq
 
-	pushq %rax
-	movq %rsi, %rdi
-	movq %rsp, %rsi
-	callq _DkHandleExternalEvent
-	popq %rax
-.Lno_external_event:
+	leaq xsave_reset_state(%rip), %rdi
+	callq restore_xregs
 
-	popq %rcx
-	popq %rdx
-	popq %rbx
-	addq $16, %rsp	# skip RSP and RBP
-	popq %rsi
-	popq %rdi
-	popq %r8
-	popq %r9
-	popq %r10
-	popq %r11
-	popq %r12
-	popq %r13
-	popq %r14
-	popq %r15
-	popfq
-	addq $8, %rsp	# skip RIP
-	popq %rbp
-	retq
+	movq %rsi, %rdi # 1st argument = PAL_NUM event
+	movq %rsp, %rsi # 2nd argument = sgx_cpu_context_t* uc
+	leaq SGX_CPU_CONTEXT_SIZE(%rsp), %rdx # 3rd argument = PAL_XREGS_STATE* xregs_state
+	callq _DkHandleExternalEvent
+	# NOTREACHED
 
-	# noreturn void _restore_sgx_context(sgx_cpu_context_t* uc);
+	# noreturn void _restore_sgx_context(sgx_cpu_context_t* uc, PAL_XREGS_STATE* xsave_area);
 	# Restore an sgx_cpu_context_t as generated by .Lhandle_exception. Execution will
 	# continue as specified by the rip in the context.
 	# If RDI (uc) points into the signal stack we need to ensure that
@@ -726,7 +754,10 @@ __morestack:
 	.type _restore_sgx_context, @function
 _restore_sgx_context:
 	.cfi_startproc
-	movq %rdi, %r15
+	xchgq %rdi, %rsi
+	callq restore_xregs
+
+	movq %rsi, %r15
 
 	movq SGX_CPU_CONTEXT_RAX(%r15), %rax
 	movq SGX_CPU_CONTEXT_RCX(%r15), %rcx

+ 5 - 0
Pal/src/host/Linux-SGX/generated-offsets.c

@@ -69,6 +69,8 @@ void dummy(void)
     OFFSET_T(SGX_CPU_CONTEXT_RFLAGS, sgx_cpu_context_t, rflags);
     OFFSET_T(SGX_CPU_CONTEXT_RIP, sgx_cpu_context_t, rip);
     DEFINE(SGX_CPU_CONTEXT_SIZE, sizeof(sgx_cpu_context_t));
+    DEFINE(SGX_CPU_CONTEXT_XSTATE_ALIGN_SUB,
+           sizeof(sgx_cpu_context_t) % PAL_XSTATE_ALIGN);
 
     /* struct enclave_tls */
     OFFSET(SGX_COMMON_SELF, enclave_tls, common.self);
@@ -77,6 +79,8 @@ void dummy(void)
     OFFSET(SGX_INITIAL_STACK_OFFSET, enclave_tls, initial_stack_offset);
     OFFSET(SGX_TMP_RIP, enclave_tls, tmp_rip);
     OFFSET(SGX_ECALL_RETURN_ADDR, enclave_tls, ecall_return_addr);
+    OFFSET(SGX_SIG_STACK_LOW, enclave_tls, sig_stack_low);
+    OFFSET(SGX_SIG_STACK_HIGH, enclave_tls, sig_stack_high);
     OFFSET(SGX_SSA, enclave_tls, ssa);
     OFFSET(SGX_GPR, enclave_tls, gpr);
     OFFSET(SGX_EXIT_TARGET, enclave_tls, exit_target);
@@ -144,6 +148,7 @@ void dummy(void)
     DEFINE(SSAFRAMENUM, SSAFRAMENUM);
     DEFINE(MEMORY_GAP, MEMORY_GAP);
     DEFINE(ENCLAVE_STACK_SIZE, ENCLAVE_STACK_SIZE);
+    DEFINE(ENCLAVE_SIG_STACK_SIZE, ENCLAVE_SIG_STACK_SIZE);
     DEFINE(DEFAULT_HEAP_MIN, DEFAULT_HEAP_MIN);
 
     /* pal_linux.h */

+ 0 - 97
Pal/src/host/Linux-SGX/pal_host.h

@@ -177,101 +177,4 @@ typedef struct pal_handle
 
 #define HANDLE_TYPE(handle)  ((handle)->hdr.type)
 
-struct arch_frame {
-#ifdef __x86_64__
-    unsigned long rsp, rbp, rbx, rsi, rdi, r12, r13, r14, r15;
-#else
-# error "unsupported architecture"
-#endif
-};
-
-#ifdef __x86_64__
-# define store_register(reg, var)     \
-    __asm__ volatile ("movq %%" #reg ", %0" : "=g" (var) :: "memory");
-
-# define store_register_in_frame(reg, f)     store_register(reg, (f)->reg)
-
-# define arch_store_frame(f)                     \
-    store_register_in_frame(rsp, f)              \
-    store_register_in_frame(rbp, f)              \
-    store_register_in_frame(rbx, f)              \
-    store_register_in_frame(rsi, f)              \
-    store_register_in_frame(rdi, f)              \
-    store_register_in_frame(r12, f)              \
-    store_register_in_frame(r13, f)              \
-    store_register_in_frame(r14, f)              \
-    store_register_in_frame(r15, f)
-
-# define restore_register(reg, var, clobber...)  \
-    __asm__ volatile ("movq %0, %%" #reg :: "g" (var) : "memory", ##clobber);
-
-# define restore_register_in_frame(reg, f)       \
-    restore_register(reg, (f)->reg,              \
-                     "r15", "r14", "r13", "r12", "rdi", "rsi", "rbx")
-
-# define arch_restore_frame(f)                   \
-    restore_register_in_frame(r15, f)            \
-    restore_register_in_frame(r14, f)            \
-    restore_register_in_frame(r13, f)            \
-    restore_register_in_frame(r12, f)            \
-    restore_register_in_frame(rdi, f)            \
-    restore_register_in_frame(rsi, f)            \
-    restore_register_in_frame(rbx, f)            \
-    restore_register_in_frame(rbp, f)            \
-    restore_register_in_frame(rsp, f)
-#else /* __x86_64__ */
-# error "unsupported architecture"
-#endif
-
-#define PAL_FRAME_IDENTIFIER 0xdeaddeadbeefbeef
-
-struct pal_frame {
-    volatile uint64_t           identifier;
-    void *                      func;
-    const char *                funcname;
-    struct arch_frame           arch;
-};
-
-/* DEP 12/25/17: This frame storage thing is important to mark volatile.
- * The compiler should not optimize out any of these changes, and
- * because some accesses can happen during an exception, these are not
- * visible to the compiler in an otherwise stack-local variable (so the
- * compiler will try to optimize out these assignments.
- */
-static inline
-void __store_frame (volatile struct pal_frame * frame,
-                    void * func, const char * funcname)
-{
-    arch_store_frame(&frame->arch)
-    frame->func = func;
-    frame->funcname = funcname;
-    __asm__ volatile ("nop" ::: "memory");
-    frame->identifier = PAL_FRAME_IDENTIFIER;
-}
-
-#define ENTER_PAL_CALL(name)                \
-    struct pal_frame frame;                 \
-    __store_frame(&frame, &(name), #name)
-
-
-static inline
-void __clear_frame (volatile struct pal_frame * frame)
-{
-    if (frame->identifier == PAL_FRAME_IDENTIFIER) {
-        __asm__ volatile ("nop" ::: "memory");
-        frame->identifier = 0;
-    }
-}
-
-#define LEAVE_PAL_CALL()                    \
-    do {                                    \
-        __clear_frame(&frame);              \
-    } while (0)
-
-#define LEAVE_PAL_CALL_RETURN(retval)       \
-    do {                                    \
-        __clear_frame(&frame);              \
-        return (retval);                    \
-    } while (0)
-
 #endif /* PAL_HOST_H */

+ 1 - 1
Pal/src/host/Linux-SGX/pal_linux.h

@@ -122,7 +122,7 @@ extern const uint32_t xsave_reset_state[];
 void init_xsave_size(uint64_t xfrm);
 void save_xregs(PAL_XREGS_STATE* xsave_area);
 void restore_xregs(const PAL_XREGS_STATE* xsave_area);
-noreturn void _restore_sgx_context(sgx_cpu_context_t* uc);
+noreturn void _restore_sgx_context(sgx_cpu_context_t* uc, PAL_XREGS_STATE* xsave_area);
 
 int init_trusted_files (void);
 

+ 1 - 0
Pal/src/host/Linux-SGX/pal_linux_defs.h

@@ -8,6 +8,7 @@
 #define SSAFRAMENUM         2
 #define MEMORY_GAP          PRESET_PAGESIZE
 #define ENCLAVE_STACK_SIZE  (PRESET_PAGESIZE * 64)
+#define ENCLAVE_SIG_STACK_SIZE (PRESET_PAGESIZE * 16)
 #define DEFAULT_HEAP_MIN    0x10000
 #define TRACE_ECALL         1
 #define TRACE_OCALL         1

+ 22 - 2
Pal/src/host/Linux-SGX/sgx_main.c

@@ -347,8 +347,13 @@ int initialize_enclave (struct pal_enclave * enclave)
         enum sgx_page_type type;
     };
 
-    struct mem_area * areas =
-        __alloca(sizeof(areas[0]) * (10 + enclave->thread_num));
+    /*
+     * 10 for manifest, SSA, TCS, etc
+     * + enclave->thread_num for normal stack
+     * + enclave->thread_num for signal stack
+     */
+    int area_num_max = 10 + enclave->thread_num * 2;
+    struct mem_area * areas = __alloca(sizeof(areas[0]) * area_num_max);
     int area_num = 0;
 
     /* The manifest needs to be allocated at the upper end of the enclave
@@ -393,6 +398,16 @@ int initialize_enclave (struct pal_enclave * enclave)
         area_num++;
     }
 
+    struct mem_area* sig_stack_areas = &areas[area_num]; /* memorize for later use */
+    for (uint32_t t = 0; t < enclave->thread_num; t++) {
+        areas[area_num] = (struct mem_area) {
+            .desc = "sig_stack", .skip_eextend = false, .fd = -1,
+            .is_binary = false, .addr = 0, .size = ENCLAVE_SIG_STACK_SIZE,
+            .prot = PROT_READ | PROT_WRITE, .type = SGX_PAGE_REG
+        };
+        area_num++;
+    }
+
     areas[area_num] = (struct mem_area) {
         .desc = "pal", .skip_eextend = false, .fd = enclave_image,
         .is_binary = true, .addr = 0, .size = 0 /* set below */,
@@ -498,6 +513,11 @@ int initialize_enclave (struct pal_enclave * enclave)
                 gs->tcs_offset = tcs_area->addr + g_page_size * t;
                 gs->initial_stack_offset =
                     stack_areas[t].addr + ENCLAVE_STACK_SIZE;
+                gs->sig_stack_low =
+                    sig_stack_areas[t].addr + enclave_secs.base;
+                gs->sig_stack_high =
+                    sig_stack_areas[t].addr + ENCLAVE_SIG_STACK_SIZE +
+                    enclave_secs.base;
                 gs->ssa = (void *) ssa_area->addr +
                     enclave->ssaframesize * SSAFRAMENUM * t +
                     enclave_secs.base;

+ 2 - 0
Pal/src/host/Linux-SGX/sgx_tls.h

@@ -17,6 +17,8 @@ struct enclave_tls {
         uint64_t tcs_offset;
         uint64_t initial_stack_offset;
         uint64_t tmp_rip;
+        uint64_t sig_stack_low;
+        uint64_t sig_stack_high;
         void*    ecall_return_addr;
         void*    ssa;
         sgx_pal_gpr_t* gpr;

+ 7 - 0
Pal/src/host/Linux-SGX/signer/pal-sgx-sign

@@ -385,6 +385,9 @@ def get_memory_areas(attr, args):
     for _ in range(attr['thread_num']):
         areas.append(MemoryArea('stack', size=offs.ENCLAVE_STACK_SIZE,
                                 flags=PAGEINFO_R | PAGEINFO_W | PAGEINFO_REG))
+    for _ in range(attr['thread_num']):
+        areas.append(MemoryArea('sig_stack', size=offs.ENCLAVE_SIG_STACK_SIZE,
+                                flags=PAGEINFO_R | PAGEINFO_W | PAGEINFO_REG))
 
     areas.append(MemoryArea('pal', file=args['libpal'], flags=PAGEINFO_REG))
 
@@ -438,6 +441,7 @@ def gen_area_content(attr, areas):
     tcs_area = find_area(areas, 'tcs')
     tls_area = find_area(areas, 'tls')
     stacks = find_areas(areas, 'stack')
+    sig_stacks = find_areas(areas, 'sig_stack')
 
     tcs_data = bytearray(tcs_area.size)
 
@@ -479,6 +483,9 @@ def gen_area_content(attr, areas):
         set_tls_field(t, offs.SGX_TCS_OFFSET, tcs_area.addr + offs.TCS_SIZE * t)
         set_tls_field(t, offs.SGX_INITIAL_STACK_OFFSET,
                       stacks[t].addr + stacks[t].size)
+        set_tls_field(t, offs.SGX_SIG_STACK_LOW, baseaddr() + sig_stacks[t].addr)
+        set_tls_field(t, offs.SGX_SIG_STACK_HIGH,
+                      baseaddr() + sig_stacks[t].addr + sig_stacks[t].size)
         set_tls_field(t, offs.SGX_SSA, ssa)
         set_tls_field(t, offs.SGX_GPR, ssa + SSAFRAMESIZE - offs.SGX_GPR_SIZE)
         set_tls_field(t, offs.SGX_MANIFEST_SIZE,