瀏覽代碼

[Pal/Linux-SGX] Increase limit of buffer size on untrusted stack

Linux-SGX PAL had the logic on read/write/recv/send OCALLs: if buffer
is more than 4K, it was allocated on the untrusted heap instead of
untrusted stack. This limit is too restrictive and leads to a high
number of untrusted-mmap OCALLs, hurting performance (e.g., Redis
receives/sends messages in batches of ~32KB, thus each receive/send
has to allocate on untrusted heap via untrusted-mmap OCALL). This
commit simply increases the limit to 512KB instead of 4KB (which is
still conservative given that Linux creates 8MB stacks).
Dmitrii Kuvaiskii 4 年之前
父節點
當前提交
786b5a7242

+ 11 - 4
Pal/src/host/Linux-SGX/enclave_ocalls.c

@@ -11,6 +11,13 @@
 #include <api.h>
 #include <asm/errno.h>
 
+/* Check against this limit if the buffer to be allocated fits on the untrusted stack; if not,
+ * buffer will be allocated on untrusted heap. Conservatively set this limit to 1/4 of the
+ * actual stack size. Currently THREAD_STACK_SIZE = 2MB, so this limit is 512KB.
+ * Note that the main thread is special in that it is handled by Linux, with the typical stack
+ * size of 8MB. Thus, 512KB limit also works well for the main thread. */
+#define MAX_UNTRUSTED_STACK_BUF (THREAD_STACK_SIZE / 4)
+
 noreturn void ocall_exit(int exitcode, int is_exitgroup)
 {
     ms_ocall_exit_t * ms;
@@ -220,7 +227,7 @@ int ocall_read (int fd, void * buf, unsigned int count)
     void * obuf = NULL;
     ms_ocall_read_t * ms;
 
-    if (count > PRESET_PAGESIZE) {
+    if (count > MAX_UNTRUSTED_STACK_BUF) {
         retval = ocall_alloc_untrusted(ALLOC_ALIGNUP(count), &obuf);
         if (IS_ERR(retval))
             return retval;
@@ -271,7 +278,7 @@ int ocall_write (int fd, const void * buf, unsigned int count)
         obuf = (void*)buf;
     } else if (sgx_is_completely_within_enclave(buf, count)) {
         /* typical case of buf inside of enclave memory */
-        if (count > PRESET_PAGESIZE) {
+        if (count > MAX_UNTRUSTED_STACK_BUF) {
             /* buf is too big and may overflow untrusted stack, so use untrusted heap */
             retval = ocall_alloc_untrusted(ALLOC_ALIGNUP(count), &obuf);
             if (IS_ERR(retval))
@@ -762,7 +769,7 @@ int ocall_sock_recv (int sockfd, void * buf, unsigned int count,
     unsigned int len = addrlen ? *addrlen : 0;
     ms_ocall_sock_recv_t * ms;
 
-    if ((count + len) > PRESET_PAGESIZE) {
+    if ((count + len) > MAX_UNTRUSTED_STACK_BUF) {
         retval = ocall_alloc_untrusted(ALLOC_ALIGNUP(count), &obuf);
         if (IS_ERR(retval))
             return retval;
@@ -825,7 +832,7 @@ int ocall_sock_send (int sockfd, const void * buf, unsigned int count,
         obuf = (void*)buf;
     } else if (sgx_is_completely_within_enclave(buf, count)) {
         /* typical case of buf inside of enclave memory */
-        if ((count + addrlen) > PRESET_PAGESIZE) {
+        if ((count + addrlen) > MAX_UNTRUSTED_STACK_BUF) {
             /* buf is too big and may overflow untrusted stack, so use untrusted heap */
             retval = ocall_alloc_untrusted(ALLOC_ALIGNUP(count), &obuf);
             if (IS_ERR(retval))

+ 1 - 1
Pal/src/host/Linux-SGX/pal_linux_defs.h

@@ -1,7 +1,7 @@
 #ifndef PAL_LINUX_DEFS_H
 #define PAL_LINUX_DEFS_H
 
-#define THREAD_STACK_SIZE (PRESET_PAGESIZE * 2)
+#define THREAD_STACK_SIZE (PRESET_PAGESIZE * 512)  /* 2MB untrusted stack */
 #define ALT_STACK_SIZE    PRESET_PAGESIZE
 
 #define ENCLAVE_HIGH_ADDRESS    0x800000000

+ 17 - 0
Pal/src/host/Linux-SGX/sgx_main.c

@@ -967,6 +967,20 @@ static int load_enclave (struct pal_enclave * enclave,
     return 0;
 }
 
+/* Grow stack of main thread to THREAD_STACK_SIZE by allocating a large dummy array and probing
+ * each stack page (Linux dynamically grows the stack of the main thread but gets confused with
+ * huge-jump stack accesses coming from within the enclave). Note that other, non-main threads
+ * are created manually via clone(.., THREAD_STACK_SIZE, ..) and thus do not need this hack. */
+static void __attribute__ ((noinline)) force_linux_to_grow_stack() {
+    char dummy[THREAD_STACK_SIZE];
+    for (uint64_t i = 0; i < sizeof(dummy); i += PRESET_PAGESIZE) {
+        /* touch each page on the stack just to make it is not optimized away */
+        __asm__ volatile("movq %0, %%rbx\r\n"
+                         "movq (%%rbx), %%rbx\r\n"
+                         : : "r"(&dummy[i]) : "%rbx");
+    }
+}
+
 int main (int argc, char ** argv, char ** envp)
 {
     char * manifest_uri = NULL;
@@ -977,6 +991,9 @@ int main (int argc, char ** argv, char ** envp)
     bool exec_uri_inferred = false; // Handle the case where the exec uri is
                                     // inferred from the manifest name somewhat
                                     // differently
+
+    force_linux_to_grow_stack();
+
     argc--;
     argv++;
 

+ 13 - 0
Pal/src/host/Linux-SGX/sgx_thread.c

@@ -181,6 +181,19 @@ int clone_thread(void) {
     if (IS_ERR_P(stack))
         return -ENOMEM;
 
+    /* Stack layout for the new thread looks like this (recall that stacks grow towards lower
+     * addresses on Linux on x86-64):
+     *
+     *       stack +--> +-------------------+
+     *                  |  child stack      | THREAD_STACK_SIZE
+     * child_stack +--> +-------------------+
+     *                  |  alternate stack  | ALT_STACK_SIZE - sizeof(PAL_TCB_LINUX)
+     *         tcb +--> +-------------------+
+     *                  |  PAL TCB          | sizeof(PAL_TCB_LINUX)
+     *                  +-------------------+
+     *
+     * Note that this whole memory region is zeroed out because we use mmap(). */
+
     void* child_stack_top = stack + THREAD_STACK_SIZE;
 
     /* initialize TCB at the top of the alternative stack */

+ 17 - 1
Pal/src/host/Linux/db_threading.c

@@ -90,7 +90,23 @@ int _DkThreadCreate (PAL_HANDLE * handle, int (*callback) (void *),
         ret = -ENOMEM;
         goto err;
     }
-    memset(stack, 0, THREAD_STACK_SIZE + ALT_STACK_SIZE);
+
+    /* Stack layout for the new thread looks like this (recall that stacks grow towards lower
+     * addresses on Linux on x86-64):
+     *
+     *       stack +--> +-------------------+
+     *                  |  child stack      | THREAD_STACK_SIZE
+     * child_stack +--> +-------------------+
+     *                  |  alternate stack  | ALT_STACK_SIZE - sizeof(PAL_TCB_LINUX)
+     *         tcb +--> +-------------------+
+     *                  |  PAL TCB          | sizeof(PAL_TCB_LINUX)
+     *                  +-------------------+
+     *
+     * We zero out only the first page of the main stack (to comply with the requirement of
+     * gcc ABI, in particular that the initial stack frame's return address must be NULL).
+     * We zero out the whole altstack (since it is small anyway) and also the PAL TCB. */
+    memset(stack + THREAD_STACK_SIZE - PRESET_PAGESIZE, 0, PRESET_PAGESIZE);
+    memset(stack + THREAD_STACK_SIZE, 0, ALT_STACK_SIZE);
 
     void * child_stack = stack + THREAD_STACK_SIZE;