Browse Source

- add linux-test-project to apps
- add memcached to apps
- merge #33
- merge #35
- fix futex() implementation in libOS
- fix partial alarm() implementation in libOS
- fix memory corruption in handle_map
- correct DkNotificationEventCreate usage
- add syscall chown()
- add syscall rt_sigsuspend()
- add syscall fchownat()
- fix getrlimit() implementation in libOS
- add syscall setrlimit()
- add ioctl(FIONBIO) implementation
- fix partial polling behavior
- implement AF_UNSPEC for sockets
- implement ipv4 mapped ipv6 addresses
- use AES-NI for encryption in SGX PAL
- rewrite mutex implementation in Linux PAL and SGX PAL
- rewrite exception handling in Linux PAL and SGX PAL
- fix issue #27

Chia-Che Tsai 7 years ago
parent
commit
b6e06b5f79
63 changed files with 2490 additions and 587 deletions
  1. 3 2
      LibOS/shim/include/shim_handle.h
  2. 5 0
      LibOS/shim/include/shim_table.h
  3. 3 1
      LibOS/shim/include/shim_thread.h
  4. 1 1
      LibOS/shim/include/shim_utils.h
  5. 5 5
      LibOS/shim/src/bookkeep/shim_handle.c
  6. 1 2
      LibOS/shim/src/bookkeep/shim_signal.c
  7. 8 8
      LibOS/shim/src/bookkeep/shim_thread.c
  8. 1 1
      LibOS/shim/src/fs/proc/thread.c
  9. 93 39
      LibOS/shim/src/shim_async.c
  10. 2 0
      LibOS/shim/src/shim_checkpoint.c
  11. 1 1
      LibOS/shim/src/shim_init.c
  12. 7 1
      LibOS/shim/src/shim_parser.c
  13. 6 6
      LibOS/shim/src/shim_syscalls.c
  14. 138 139
      LibOS/shim/src/sys/shim_alarm.c
  15. 2 2
      LibOS/shim/src/sys/shim_clone.c
  16. 58 0
      LibOS/shim/src/sys/shim_fs.c
  17. 126 48
      LibOS/shim/src/sys/shim_futex.c
  18. 18 3
      LibOS/shim/src/sys/shim_getrlimit.c
  19. 20 5
      LibOS/shim/src/sys/shim_ioctl.c
  20. 1 1
      LibOS/shim/src/sys/shim_migrate.c
  21. 1 1
      LibOS/shim/src/sys/shim_msgget.c
  22. 3 2
      LibOS/shim/src/sys/shim_poll.c
  23. 1 1
      LibOS/shim/src/sys/shim_semget.c
  24. 24 0
      LibOS/shim/src/sys/shim_sigaction.c
  25. 55 16
      LibOS/shim/src/sys/shim_socket.c
  26. 2 2
      LibOS/shim/src/syscallas.S
  27. 6 5
      LibOS/shim/test/Makefile
  28. 1 1
      LibOS/shim/test/apps/lighttpd/Makefile
  29. 9 0
      LibOS/shim/test/apps/lmbench/lmbench-2.5/src/lat_proc.c
  30. 21 0
      LibOS/shim/test/apps/lmbench/lmbench-2.5/src/lat_syscall.c
  31. 36 0
      LibOS/shim/test/apps/ltp/BLOCKED
  32. 36 0
      LibOS/shim/test/apps/ltp/Makefile
  33. 290 0
      LibOS/shim/test/apps/ltp/PASSED
  34. 17 0
      LibOS/shim/test/apps/ltp/block_tests.awk
  35. 18 0
      LibOS/shim/test/apps/ltp/manifest.template
  36. 19 0
      LibOS/shim/test/apps/ltp/passed_tests_only.awk
  37. 29 0
      LibOS/shim/test/apps/ltp/run_in_graphene.awk
  38. 18 0
      LibOS/shim/test/apps/ltp/runltp.patch
  39. 42 0
      LibOS/shim/test/apps/memcached/Makefile
  40. 54 0
      LibOS/shim/test/apps/memcached/memcached.manifest.template
  41. 3 2
      LibOS/shim/test/inline/Makefile
  42. 2 2
      Pal/src/host/Linux-SGX/Makefile
  43. 116 0
      Pal/src/host/Linux-SGX/crypto/aes.c
  44. 816 0
      Pal/src/host/Linux-SGX/crypto/aes_ni.S
  45. 7 3
      Pal/src/host/Linux-SGX/db_exception.c
  46. 4 4
      Pal/src/host/Linux-SGX/db_files.c
  47. 2 0
      Pal/src/host/Linux-SGX/db_main.c
  48. 82 76
      Pal/src/host/Linux-SGX/db_mutex.c
  49. 3 4
      Pal/src/host/Linux-SGX/db_semaphore.c
  50. 20 15
      Pal/src/host/Linux-SGX/db_sockets.c
  51. 40 56
      Pal/src/host/Linux-SGX/enclave_framework.c
  52. 19 6
      Pal/src/host/Linux-SGX/pal_host.h
  53. 2 2
      Pal/src/host/Linux-SGX/pal_linux.h
  54. 4 0
      Pal/src/host/Linux-SGX/pal_linux_defs.h
  55. 9 1
      Pal/src/host/Linux-SGX/sgx-driver/gsgx_main.c
  56. 16 0
      Pal/src/host/Linux-SGX/sgx_enclave.c
  57. 15 10
      Pal/src/host/Linux-SGX/sgx_framework.c
  58. 6 2
      Pal/src/host/Linux/db_exception.c
  59. 61 88
      Pal/src/host/Linux/db_mutex.c
  60. 15 8
      Pal/src/host/Linux/db_pipes.c
  61. 3 4
      Pal/src/host/Linux/db_semaphore.c
  62. 46 3
      Pal/src/host/Linux/db_sockets.c
  63. 18 8
      Pal/src/host/Linux/pal_host.h

+ 3 - 2
LibOS/shim/include/shim_handle.h

@@ -278,7 +278,6 @@ struct shim_sem_handle {
 };
 
 struct shim_futex_handle {
-    PAL_HANDLE          event;
     unsigned int *      uaddr;
     struct list_head    waiters;
     struct shim_vma *   vma;
@@ -369,7 +368,9 @@ struct shim_fd_handle {
     struct shim_handle * handle;
 };
 
-#define MAX_FDS     1024
+#define MAX_MAX_FDS         (65536)
+#define DEFAULT_MAX_FDS     (1024)
+extern unsigned int max_fds;
 
 struct shim_handle_map {
     /* the top of created file descriptors */

+ 5 - 0
LibOS/shim/include/shim_table.h

@@ -409,6 +409,8 @@ int shim_do_unlink (const char * file);
 int shim_do_readlink (const char * file, char * buf, int bufsize);
 int shim_do_chmod (const char * filename, mode_t mode);
 int shim_do_fchmod (int fd, mode_t mode);
+int shim_do_chown (const char * filename, uid_t user, gid_t group);
+int shim_do_fchown (int fd, uid_t user, gid_t group);
 mode_t shim_do_umask (mode_t mask);
 int shim_do_gettimeofday (struct __kernel_timeval * tv,
                           struct __kernel_timezone * tz);
@@ -425,6 +427,7 @@ pid_t shim_do_getpgrp (void);
 int shim_do_setsid (void);
 int shim_do_getpgid (pid_t pid);
 int shim_do_getsid (pid_t pid);
+int shim_do_sigsuspend (const __sigset_t * mask);
 void * shim_do_arch_prctl (int code, void * addr);
 int shim_do_setrlimit (int resource, struct __kernel_rlimit * rlim);
 int shim_do_chroot (const char * filename);
@@ -454,6 +457,8 @@ int shim_do_unlinkat (int dfd, const char * pathname, int flag);
 int shim_do_renameat (int olddfd, const char * pathname, int newdfd,
                       const char * newname);
 int shim_do_fchmodat (int dfd, const char * filename, mode_t mode);
+int shim_do_fchownat (int dfd, const char * filename, uid_t user, gid_t group,
+                      int flags);
 int shim_do_faccessat (int dfd, const char * filename, mode_t mode);
 int shim_do_pselect6 (int nfds, fd_set * readfds, fd_set * writefds,
                       fd_set * exceptfds, const struct __kernel_timespec * tsp,

+ 3 - 1
LibOS/shim/include/shim_thread.h

@@ -57,6 +57,7 @@ struct shim_thread {
     struct shim_signal_handle signal_handles[NUM_SIGS];
     struct shim_atomic has_signal;
     struct shim_signal_log * signal_logs;
+    bool suspend_on_signal;
 
     /* futex robust list */
     void * robust_list;
@@ -201,7 +202,8 @@ static inline void thread_setwait (struct shim_thread ** queue,
         thread = get_cur_thread();
     get_thread(thread);
     DkEventClear(thread->scheduler_event);
-    *queue = thread;
+    if (queue)
+        *queue = thread;
 }
 
 static inline void thread_sleep (void)

+ 1 - 1
LibOS/shim/include/shim_utils.h

@@ -235,7 +235,7 @@ int create_handle (const char * prefix, char * path, size_t size,
 
 /* Asynchronous event support */
 int init_async (void);
-int install_async_event (unsigned long time,
+int install_async_event (PAL_HANDLE object, unsigned long time,
                          void (*callback) (IDTYPE caller, void * arg),
                          void * arg);
 int create_async_helper (void);

+ 5 - 5
LibOS/shim/src/bookkeep/shim_handle.c

@@ -627,12 +627,12 @@ static struct shim_handle_map * __enlarge_handle_map
 
     size_t copy_size = sizeof(struct shim_fd_handle *) * map->fd_size;
     map->fd_size = size;
-    if (old_map && copy_size)
-        memcpy(map->map, old_map, copy_size);
-    memset(&map->map[map->fd_size], 0,
-           (sizeof(struct shim_fd_handle *) * size) - copy_size);
-    if (old_map)
+    memset(map->map, 0, sizeof(struct shim_fd_handle *) * size);
+    if (old_map) {
+        if (copy_size)
+            memcpy(map->map, old_map, copy_size);
         free(old_map);
+    }
     return map;
 }
 

+ 1 - 2
LibOS/shim/src/bookkeep/shim_signal.c

@@ -124,7 +124,6 @@ void __store_context (shim_tcb_t * tcb, PAL_CONTEXT * pal_context,
             context->uc_mcontext.gregs[REG_RDX] = regs->rdx;
             context->uc_mcontext.gregs[REG_RSI] = regs->rsi;
             context->uc_mcontext.gregs[REG_RDI] = regs->rdi;
-            context->uc_mcontext.gregs[REG_R12] = regs->r12;
             context->uc_mcontext.gregs[REG_RBX] = regs->rbx;
             context->uc_mcontext.gregs[REG_RBP] = regs->rbp;
         }
@@ -249,7 +248,7 @@ internal:
     }
 
     if (context)
-        debug("memory fault at %p (IP = %p)\n", arg, context->IP);
+        pal_printf("memory fault at %p (IP = %p)\n", arg, context->IP);
 
     struct shim_vma * vma = NULL;
     if (!lookup_supervma((void *) arg, 0, &vma)) {

+ 8 - 8
LibOS/shim/src/bookkeep/shim_thread.c

@@ -221,9 +221,9 @@ struct shim_thread * get_new_thread (IDTYPE new_tid)
                                  NUM_SIGS);
     thread->vmid = cur_process.vmid;
     create_lock(thread->lock);
-    thread->scheduler_event = DkNotificationEventCreate(1);
-    thread->exit_event = DkNotificationEventCreate(0);
-    thread->child_exit_event = DkNotificationEventCreate(0);
+    thread->scheduler_event = DkNotificationEventCreate(PAL_TRUE);
+    thread->exit_event = DkNotificationEventCreate(PAL_FALSE);
+    thread->child_exit_event = DkNotificationEventCreate(PAL_FALSE);
     return thread;
 }
 
@@ -240,7 +240,7 @@ struct shim_thread * get_new_internal_thread (void)
     thread->tid   = new_tid;
     thread->in_vm = thread->is_alive = true;
     create_lock(thread->lock);
-    thread->exit_event = DkNotificationEventCreate(0);
+    thread->exit_event = DkNotificationEventCreate(PAL_FALSE);
     return thread;
 }
 
@@ -278,7 +278,7 @@ struct shim_simple_thread * get_new_simple_thread (void)
     INIT_LIST_HEAD(&thread->list);
 
     create_lock(thread->lock);
-    thread->exit_event = DkNotificationEventCreate(0);
+    thread->exit_event = DkNotificationEventCreate(PAL_FALSE);
 
     return thread;
 }
@@ -625,9 +625,9 @@ BEGIN_RS_FUNC(thread)
     CP_REBASE(thread->signal_handles);
 
     create_lock(thread->lock);
-    thread->scheduler_event = DkNotificationEventCreate(1);
-    thread->exit_event = DkNotificationEventCreate(0);
-    thread->child_exit_event = DkNotificationEventCreate(0);
+    thread->scheduler_event = DkNotificationEventCreate(PAL_TRUE);
+    thread->exit_event = DkNotificationEventCreate(PAL_FALSE);
+    thread->child_exit_event = DkNotificationEventCreate(PAL_FALSE);
 
     add_thread(thread);
 

+ 1 - 1
LibOS/shim/src/fs/proc/thread.c

@@ -242,7 +242,7 @@ static int parse_thread_fd (const char * name, const char ** rest,
         if (*p < '0' || *p > '9')
             return -ENOENT;
         fd = fd * 10 + *p - '0';
-        if (fd >= MAX_FDS)
+        if (fd >= max_fds)
             return -ENOENT;
     }
 

+ 93 - 39
LibOS/shim/src/shim_async.c

@@ -35,6 +35,7 @@ struct async_event {
     struct list_head    list;
     void                (*callback) (IDTYPE caller, void * arg);
     void *              arg;
+    PAL_HANDLE          object;
     unsigned long       install_time;
     unsigned long       expire_time;
 };
@@ -43,13 +44,13 @@ static LIST_HEAD(async_list);
 
 enum {  HELPER_NOTALIVE, HELPER_ALIVE };
 
-static struct shim_atomic       async_helper_state;
-static struct shim_thread *     async_helper_thread;
-static PAL_HANDLE               async_helper_event;
+static struct shim_atomic   async_helper_state;
+static struct shim_thread * async_helper_thread;
+static AEVENTTYPE           async_helper_event;
 
 static LOCKTYPE async_helper_lock;
 
-int install_async_event (unsigned long time,
+int install_async_event (PAL_HANDLE object, unsigned long time,
                          void (*callback) (IDTYPE caller, void * arg),
                          void * arg)
 {
@@ -63,8 +64,9 @@ int install_async_event (unsigned long time,
     event->callback     = callback;
     event->arg          = arg;
     event->caller       = get_cur_tid();
-    event->install_time = install_time;
-    event->expire_time  = install_time + time;
+    event->object       = object;
+    event->install_time = time ? install_time : 0;
+    event->expire_time  = time ? install_time + time : 0;
 
     lock(async_helper_lock);
 
@@ -72,7 +74,7 @@ int install_async_event (unsigned long time,
     struct list_head * prev = &async_list;
 
     list_for_each_entry(tmp, &async_list, list) {
-        if (tmp->expire_time > event->expire_time)
+        if (event->expire_time && tmp->expire_time > event->expire_time)
             break;
         prev = &tmp->list;
     }
@@ -85,7 +87,7 @@ int install_async_event (unsigned long time,
     if (atomic_read(&async_helper_state) == HELPER_NOTALIVE)
         create_async_helper();
 
-    DkEventSet(async_helper_event);
+    set_event(&async_helper_event, 1);
     return 0;
 }
 
@@ -93,7 +95,7 @@ int init_async (void)
 {
     atomic_set(&async_helper_state, HELPER_NOTALIVE);
     create_lock(async_helper_lock);
-    async_helper_event = DkSynchronizationEventCreate(0);
+    create_event(&async_helper_event);
     return 0;
 }
 
@@ -112,8 +114,10 @@ static void shim_async_helper (void * arg)
     debug("set tcb to %p\n", &tcb);
 
     lock(async_helper_lock);
+    bool notme = (self != async_helper_thread);
+    unlock(async_helper_lock);
 
-    if (self != async_helper_thread) {
+    if (notme) {
         put_thread(self);
         DkThreadExit();
         return;
@@ -126,37 +130,102 @@ static void shim_async_helper (void * arg)
        swap any stack */
     unsigned long idle_cycles = 0;
     unsigned long latest_time;
-    struct async_event * next_event, * finished_event = NULL;
+    struct async_event * next_event = NULL;
+    PAL_HANDLE async_event_handle = event_handle(&async_helper_event);
+
+    int object_list_size = 32, object_num;
+    PAL_HANDLE polled;
+    PAL_HANDLE * local_objects =
+            malloc(sizeof(PAL_HANDLE) * (1 + object_list_size));
+    local_objects[0] = async_event_handle;
 
-    goto update;
+    goto update_status;
 
     while (atomic_read(&async_helper_state) == HELPER_ALIVE) {
+        unsigned long sleep_time;
+        if (next_event) {
+            sleep_time = next_event->expire_time - latest_time;
+            idle_cycles = 0;
+        } else if (object_num) {
+            sleep_time = NO_TIMEOUT;
+            idle_cycles = 0;
+        } else {
+            sleep_time = IDLE_SLEEP_TIME;
+            idle_cycles++;
+        }
+
+        polled = DkObjectsWaitAny(object_num + 1, local_objects, sleep_time);
+
+        if (!polled) {
+            if (next_event) {
+                debug("async event trigger at %llu\n",
+                      next_event->expire_time);
+
+                next_event->callback(next_event->caller, next_event->arg);
+
+                lock(async_helper_lock);
+                list_del(&next_event->list);
+                free(next_event);
+                goto update_list;
+            }
+            continue;
+        }
+
+        if (polled == async_event_handle) {
+            clear_event(&async_helper_event);
+update_status:
+            latest_time = DkSystemTimeQuery();
+            if (atomic_read(&async_helper_state) == HELPER_NOTALIVE) {
+                break;
+            } else {
+                lock(async_helper_lock);
+                goto update_list;
+            }
+        }
+
+        struct async_event * tmp, * n;
+
         lock(async_helper_lock);
-update:
-        latest_time = DkSystemTimeQuery();
-        next_event = NULL;
 
-        if (!list_empty(&async_list)) {
-            if (finished_event) {
-                list_del(&finished_event->list);
-                free(finished_event);
-                finished_event = NULL;
+        list_for_each_entry_safe(tmp, n, &async_list, list) {
+            if (tmp->object == polled) {
+                debug("async event trigger at %llu\n",
+                      latest_time);
+                unlock(async_helper_lock);
+                tmp->callback(tmp->caller, tmp->arg);
+                lock(async_helper_lock);
+                break;
             }
+        }
 
+update_list:
+        next_event = NULL;
+        object_num = 0;
+
+        if (!list_empty(&async_list)) {
             struct async_event * tmp, * n;
 
             list_for_each_entry_safe(tmp, n, &async_list, list) {
+                if (tmp->object) {
+                    local_objects[object_num + 1] = tmp->object;
+                    object_num++;
+                }
+
+                if (!tmp->install_time)
+                    continue;
+
                 if (tmp->expire_time > latest_time) {
                     next_event = tmp;
                     break;
                 }
 
-                debug("async event trigger at %llu (expect expiring at %llu)\n",
+                debug("async event trigger at %llu (expire at %llu)\n",
                       latest_time, tmp->expire_time);
-
                 list_del(&tmp->list);
+                unlock(async_helper_lock);
                 tmp->callback(tmp->caller, tmp->arg);
                 free(tmp);
+                lock(async_helper_lock);
             }
 
             idle_cycles = 0;
@@ -164,27 +233,12 @@ update:
 
         unlock(async_helper_lock);
 
-        if (!next_event && idle_cycles++ == MAX_IDLE_CYCLES) {
+        if (idle_cycles++ == MAX_IDLE_CYCLES) {
             debug("async helper thread reach helper cycle\n");
             /* walking away, if someone is issueing an event,
                they have to create another thread */
             break;
         }
-
-        unsigned long sleep_time = next_event ?
-                                   next_event->expire_time - latest_time :
-                                   IDLE_SLEEP_TIME;
-
-        PAL_HANDLE notify = DkObjectsWaitAny(1, &async_helper_event,
-                                             sleep_time);
-
-        /* if we are not waken up by someone, the waiting has finished */
-        if (!notify && next_event) {
-            debug("async event trigger at %llu\n", next_event->expire_time);
-
-            finished_event = next_event;
-            next_event->callback(next_event->caller, next_event->arg);
-        }
     }
 
     atomic_set(&async_helper_state, HELPER_NOTALIVE);
@@ -245,6 +299,6 @@ int terminate_async_helper (void)
     lock(async_helper_lock);
     atomic_xchg(&async_helper_state, HELPER_NOTALIVE);
     unlock(async_helper_lock);
-    DkEventSet(async_helper_event);
+    set_event(&async_helper_event, 1);
     return 0;
 }

+ 2 - 0
LibOS/shim/src/shim_checkpoint.c

@@ -1181,6 +1181,8 @@ void restore_context (struct shim_context * context)
                  "popq %%r14\r\n"
                  "popq %%r13\r\n"
                  "popq %%r12\r\n"
+                 "popq %%r11\r\n"
+                 "popq %%r10\r\n"
                  "popq %%r9\r\n"
                  "popq %%r8\r\n"
                  "popq %%rcx\r\n"

+ 1 - 1
LibOS/shim/src/shim_init.c

@@ -723,7 +723,7 @@ int shim_init (int argc, void * args, void ** return_stack)
 
     if (cpaddr) {
 restore:
-        thread_start_event = DkNotificationEventCreate(0);
+        thread_start_event = DkNotificationEventCreate(PAL_FALSE);
         RUN_INIT(restore_checkpoint,
                  &hdr.checkpoint.hdr, &hdr.checkpoint.mem,
                  (ptr_t) cpaddr, 0);

+ 7 - 1
LibOS/shim/src/shim_parser.c

@@ -216,7 +216,7 @@ struct parser_table {
     { .slow = 0, .parser = { NULL } }, /* rt_sigpending */
     { .slow = 0, .parser = { NULL } }, /* rt_sigtimedwait */
     { .slow = 0, .parser = { NULL } }, /* rt_sigqueueinfo */
-    { .slow = 0, .parser = { NULL } }, /* rt_sigsuspend */
+    { .slow = 1, .parser = { NULL } }, /* rt_sigsuspend */
     { .slow = 0, .parser = { NULL } }, /* sigaltstack */
     { .slow = 0, .parser = { NULL } }, /* utime */
     { .slow = 0, .parser = { NULL } }, /* mknod */
@@ -915,9 +915,15 @@ static void parse_futexop (const char * type, va_list * ap)
         case FUTEX_WAIT:
             PUTS("FUTEX_WAIT");
             break;
+        case FUTEX_WAIT_BITSET:
+            PUTS("FUTEX_WAIT_BITSET");
+            break;
         case FUTEX_WAKE:
             PUTS("FUTEX_WAKE");
             break;
+        case FUTEX_WAKE_BITSET:
+            PUTS("FUTEX_WAKE_BITSET");
+            break;
         case FUTEX_FD:
             PUTS("FUTEX_FD");
             break;

+ 6 - 6
LibOS/shim/src/shim_syscalls.c

@@ -476,10 +476,10 @@ DEFINE_SHIM_SYSCALL (chmod, 2, shim_do_chmod, int, const char *, filename,
 
 DEFINE_SHIM_SYSCALL (fchmod, 2, shim_do_fchmod, int, int, fd, mode_t, mode)
 
-SHIM_SYSCALL_PASSTHROUGH (chown, 3, int, const char *, filename,
-                          uid_t, user, gid_t, group)
+DEFINE_SHIM_SYSCALL (chown, 3, shim_do_chown, int, const char *, filename,
+                     uid_t, user, gid_t, group)
 
-SHIM_SYSCALL_PASSTHROUGH (fchown, 3, int, int, fd, uid_t, user, gid_t, group)
+DEFINE_SHIM_SYSCALL (fchown, 3, shim_do_fchown, int, int, fd, uid_t, user, gid_t, group)
 
 SHIM_SYSCALL_PASSTHROUGH (lchown, 3, int, const char *, filename,
                           uid_t, user, gid_t, group)
@@ -588,7 +588,7 @@ SHIM_SYSCALL_PASSTHROUGH (rt_sigtimedwait, 4, int, const __sigset_t *, uthese,
 SHIM_SYSCALL_PASSTHROUGH (rt_sigqueueinfo, 3, int, int, pid, int, sig,
                           siginfo_t *, uinfo)
 
-SHIM_SYSCALL_PASSTHROUGH (rt_sigsuspend, 1, int, const __sigset_t *, mask)
+DEFINE_SHIM_SYSCALL (rt_sigsuspend, 1, shim_do_sigsuspend, int, const __sigset_t *, mask)
 
 SHIM_SYSCALL_PASSTHROUGH (sigaltstack, 2, int, const stack_t *, ss, stack_t *,
                           oss)
@@ -1012,8 +1012,8 @@ DEFINE_SHIM_SYSCALL (mkdirat, 3, shim_do_mkdirat, int, int, dfd,
 SHIM_SYSCALL_PASSTHROUGH (mknodat, 4, int, int, dfd, const char *, filename,
                           int, mode, unsigned, dev)
 
-SHIM_SYSCALL_PASSTHROUGH (fchownat, 5, int, int, dfd, const char *, filename,
-                          uid_t, user, gid_t, group, int, flag)
+DEFINE_SHIM_SYSCALL (fchownat, 5, shim_do_fchownat, int, int, dfd,
+                     const char *, filename, uid_t, user, gid_t, group, int, flag)
 
 SHIM_SYSCALL_PASSTHROUGH (futimesat, 3, int, int, dfd, const char *, filename,
                           struct timeval *, utimes)

+ 138 - 139
LibOS/shim/src/sys/shim_alarm.c

@@ -1,139 +1,138 @@
-/* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
-/* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
-
-/* Copyright (C) 2014 OSCAR lab, Stony Brook University
-   This file is part of Graphene Library OS.
-
-   Graphene Library OS is free software: you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation, either version 3 of the
-   License, or (at your option) any later version.
-
-   Graphene Library OS is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
-
-/*
- * shim_alarm.c
- *
- * Implementation of system call "alarm", "setitmer" and "getitimer".
- */
-
-#include <shim_internal.h>
-#include <shim_table.h>
-#include <shim_thread.h>
-#include <shim_utils.h>
-#include <shim_signal.h>
-
-void signal_alarm (IDTYPE target, void * arg)
-{
-    debug("alarm goes off, signaling thread %u\n", target);
-
-    struct shim_thread * thread = lookup_thread(target);
-    if (!thread)
-        return;
-
-    append_signal(thread, SIGALRM, NULL, true);
-}
-
-int shim_do_alarm (unsigned int seconds)
-{
-    return install_async_event(seconds * 1000000,
-                               &signal_alarm,
-                               NULL);
-}
-
-static struct {
-    unsigned long   timeout;
-    unsigned long   reset;
-} real_itimer;
-
-void signal_itimer (IDTYPE target, void * arg)
-{
-    master_lock();
-
-    if (real_itimer.timeout != (unsigned long) arg) {
-        master_unlock();
-        return;
-    }
-
-    real_itimer.timeout += real_itimer.reset;
-    real_itimer.reset = 0;
-    master_unlock();
-}
-
-#ifndef ITIMER_REAL
-# define ITIMER_REAL 0
-#endif
-
-int shim_do_setitimer (int which, struct __kernel_itimerval * value,
-                       struct __kernel_itimerval * ovalue)
-{
-    if (which != ITIMER_REAL)
-        return -ENOSYS;
-
-    if (!value)
-        return -EFAULT;
-
-    unsigned long setup_time = DkSystemTimeQuery();
-
-    unsigned long next_value = value->it_value.tv_sec * 1000000
-                               + value->it_value.tv_usec;
-    unsigned long next_reset = value->it_interval.tv_sec * 1000000
-                               + value->it_interval.tv_usec;
-
-    master_lock();
-
-    unsigned long current_timeout = real_itimer.timeout > setup_time ?
-                                    real_itimer.timeout - setup_time : 0;
-    unsigned long current_reset = real_itimer.reset;
-
-    int ret = install_async_event(next_value, &signal_itimer,
-                                  (void *) (setup_time + next_value));
-
-    if (ret < 0) {
-        master_unlock();
-        return ret;
-    }
-
-    real_itimer.timeout = setup_time + next_value;
-    real_itimer.reset = next_reset;
-
-    master_unlock();
-
-    if (ovalue) {
-        ovalue->it_interval.tv_sec = current_reset / 1000000;
-        ovalue->it_interval.tv_usec = current_reset % 1000000;
-        ovalue->it_value.tv_sec = current_timeout / 1000000;
-        ovalue->it_value.tv_usec = current_timeout % 1000000;
-    }
-
-    return 0;
-}
-
-int shim_do_getitimer (int which, struct __kernel_itimerval * value)
-{
-    if (which != ITIMER_REAL)
-        return -ENOSYS;
-
-    if (!value)
-        return -EFAULT;
-
-    unsigned long setup_time = DkSystemTimeQuery();
-
-    master_lock();
-    unsigned long current_timeout = real_itimer.timeout > setup_time ?
-                                    real_itimer.timeout - setup_time : 0;
-    unsigned long current_reset = real_itimer.reset;
-    master_unlock();
-
-    value->it_interval.tv_sec = current_reset / 1000000;
-    value->it_interval.tv_usec = current_reset % 1000000;
-    value->it_value.tv_sec = current_timeout / 1000000;
-    value->it_value.tv_usec = current_timeout % 1000000;
-    return 0;
-}
+/* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
+/* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
+
+/* Copyright (C) 2014 OSCAR lab, Stony Brook University
+   This file is part of Graphene Library OS.
+
+   Graphene Library OS is free software: you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation, either version 3 of the
+   License, or (at your option) any later version.
+
+   Graphene Library OS is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+ * shim_alarm.c
+ *
+ * Implementation of system call "alarm", "setitmer" and "getitimer".
+ */
+
+#include <shim_internal.h>
+#include <shim_table.h>
+#include <shim_thread.h>
+#include <shim_utils.h>
+#include <shim_signal.h>
+
+void signal_alarm (IDTYPE target, void * arg)
+{
+    debug("alarm goes off, signaling thread %u\n", target);
+
+    struct shim_thread * thread = lookup_thread(target);
+    if (!thread)
+        return;
+
+    append_signal(thread, SIGALRM, NULL, true);
+}
+
+int shim_do_alarm (unsigned int seconds)
+{
+    uint64_t usecs = 1000000ULL * seconds;
+    return install_async_event(NULL, usecs, &signal_alarm, NULL);
+}
+
+static struct {
+    unsigned long   timeout;
+    unsigned long   reset;
+} real_itimer;
+
+void signal_itimer (IDTYPE target, void * arg)
+{
+    master_lock();
+
+    if (real_itimer.timeout != (unsigned long) arg) {
+        master_unlock();
+        return;
+    }
+
+    real_itimer.timeout += real_itimer.reset;
+    real_itimer.reset = 0;
+    master_unlock();
+}
+
+#ifndef ITIMER_REAL
+# define ITIMER_REAL 0
+#endif
+
+int shim_do_setitimer (int which, struct __kernel_itimerval * value,
+                       struct __kernel_itimerval * ovalue)
+{
+    if (which != ITIMER_REAL)
+        return -ENOSYS;
+
+    if (!value)
+        return -EFAULT;
+
+    unsigned long setup_time = DkSystemTimeQuery();
+
+    unsigned long next_value = value->it_value.tv_sec * 1000000
+                               + value->it_value.tv_usec;
+    unsigned long next_reset = value->it_interval.tv_sec * 1000000
+                               + value->it_interval.tv_usec;
+
+    master_lock();
+
+    unsigned long current_timeout = real_itimer.timeout > setup_time ?
+                                    real_itimer.timeout - setup_time : 0;
+    unsigned long current_reset = real_itimer.reset;
+
+    int ret = install_async_event(NULL, next_value, &signal_itimer,
+                                  (void *) (setup_time + next_value));
+
+    if (ret < 0) {
+        master_unlock();
+        return ret;
+    }
+
+    real_itimer.timeout = setup_time + next_value;
+    real_itimer.reset = next_reset;
+
+    master_unlock();
+
+    if (ovalue) {
+        ovalue->it_interval.tv_sec = current_reset / 1000000;
+        ovalue->it_interval.tv_usec = current_reset % 1000000;
+        ovalue->it_value.tv_sec = current_timeout / 1000000;
+        ovalue->it_value.tv_usec = current_timeout % 1000000;
+    }
+
+    return 0;
+}
+
+int shim_do_getitimer (int which, struct __kernel_itimerval * value)
+{
+    if (which != ITIMER_REAL)
+        return -ENOSYS;
+
+    if (!value)
+        return -EFAULT;
+
+    unsigned long setup_time = DkSystemTimeQuery();
+
+    master_lock();
+    unsigned long current_timeout = real_itimer.timeout > setup_time ?
+                                    real_itimer.timeout - setup_time : 0;
+    unsigned long current_reset = real_itimer.reset;
+    master_unlock();
+
+    value->it_interval.tv_sec = current_reset / 1000000;
+    value->it_interval.tv_usec = current_reset % 1000000;
+    value->it_value.tv_sec = current_timeout / 1000000;
+    value->it_value.tv_usec = current_timeout % 1000000;
+    return 0;
+}

+ 2 - 2
LibOS/shim/src/sys/shim_clone.c

@@ -285,13 +285,13 @@ int shim_do_clone (int flags, void * user_stack_addr, int * parent_tidptr,
     struct clone_args * new_args = __alloca(sizeof(struct clone_args));
     memset(new_args, 0, sizeof(struct clone_args));
 
-    new_args->create_event = DkNotificationEventCreate(0);
+    new_args->create_event = DkNotificationEventCreate(PAL_FALSE);
     if (!new_args->create_event) {
         ret = -PAL_ERRNO;
         goto clone_thread_failed;
     }
 
-    new_args->initialize_event = DkNotificationEventCreate(0);
+    new_args->initialize_event = DkNotificationEventCreate(PAL_FALSE);
     if (!new_args->initialize_event) {
         ret = -PAL_ERRNO;
         goto clone_thread_failed;

+ 58 - 0
LibOS/shim/src/sys/shim_fs.c

@@ -267,6 +267,64 @@ out:
     return ret;
 }
 
+int shim_do_chown (const char * path, uid_t uid, gid_t gid)
+{
+    struct shim_dentry * dent = NULL;
+    int ret = 0;
+
+    if ((ret = path_lookupat(NULL, path, LOOKUP_OPEN, &dent)) < 0)
+        return ret;
+
+    /* do nothing*/
+
+out:
+    put_dentry(dent);
+    return ret;
+}
+
+int shim_do_fchownat (int dfd, const char * filename, uid_t uid, gid_t gid,
+                      int flags)
+{
+    if (!filename)
+        return -EINVAL;
+
+    if (*filename == '/')
+        return shim_do_chown(filename, uid, gid);
+
+    struct shim_dentry * dir = NULL, * dent = NULL;
+    int ret = 0;
+
+    if ((ret = path_startat(dfd, &dir)) < 0)
+        return ret;
+
+    if ((ret = path_lookupat(dir, filename, LOOKUP_OPEN, &dent)) < 0)
+        goto out;
+
+    /* do nothing */
+
+out_dent:
+    put_dentry(dent);
+out:
+    put_dentry(dir);
+    return ret;
+}
+
+int shim_do_fchown (int fd, uid_t uid, gid_t gid)
+{
+    struct shim_handle * hdl = get_fd_handle(fd, NULL, NULL);
+    if (!hdl)
+        return -EBADF;
+
+    struct shim_dentry * dent = hdl->dentry;
+    int ret = 0;
+
+    /* do nothing */
+
+out:
+    put_handle(hdl);
+    return ret;
+}
+
 #define MAP_SIZE    (allocsize * 4)
 #define BUF_SIZE    (2048)
 

+ 126 - 48
LibOS/shim/src/sys/shim_futex.c

@@ -45,6 +45,7 @@
 
 struct futex_waiter {
     struct shim_thread * thread;
+    uint32_t bitset;
     struct list_head list;
 };
 
@@ -54,8 +55,11 @@ static LOCKTYPE futex_list_lock;
 int shim_do_futex (unsigned int * uaddr, int op, int val, void * utime,
                    unsigned int * uaddr2, int val3)
 {
-    struct shim_futex_handle * tmp = NULL, * futex = NULL;
-    struct shim_handle * hdl;
+    struct shim_futex_handle * tmp = NULL, * futex = NULL, * futex2 = NULL;
+    struct shim_handle * hdl = NULL, * hdl2 = NULL;
+    uint32_t futex_op = (op & FUTEX_CMD_MASK);
+
+    uint32_t val2 = 0;
     int ret = 0;
 
     if (!uaddr || ((uintptr_t) uaddr % sizeof(unsigned int)))
@@ -88,28 +92,69 @@ int shim_do_futex (unsigned int * uaddr, int op, int val, void * utime,
         list_add_tail(&futex->list, &futex_list);
     }
 
+    if (futex_op == FUTEX_WAKE_OP || futex_op == FUTEX_REQUEUE) {
+        list_for_each_entry(tmp, &futex_list, list)
+            if (tmp->uaddr == uaddr2) {
+                futex2 = tmp;
+                break;
+            }
+
+        if (futex2) {
+            hdl2 = container_of(futex2, struct shim_handle, info.futex);
+            get_handle(hdl2);
+        } else {
+            if (!(hdl2 = get_new_handle())) {
+                unlock(futex_list_lock);
+                return -ENOMEM;
+            }
+
+            hdl2->type = TYPE_FUTEX;
+            futex2 = &hdl2->info.futex;
+            futex2->uaddr = uaddr2;
+            get_handle(hdl2);
+            INIT_LIST_HEAD(&futex2->waiters);
+            INIT_LIST_HEAD(&futex2->list);
+            list_add_tail(&futex2->list, &futex_list);
+        }
+
+        val2 = (uint32_t)(uint64_t) utime;
+    }
+
     unlock(futex_list_lock);
     lock(hdl->lock);
 
-    switch (op & FUTEX_CMD_MASK) {
+    switch (futex_op) {
         case FUTEX_WAIT:
-            debug("FUTEX_WAIT: %p (val = %d) vs %d\n", uaddr, *uaddr, val);
-            if (*uaddr != val)
+        case FUTEX_WAIT_BITSET: {
+            uint32_t bitset = (futex_op == FUTEX_WAIT_BITSET) ? val3 :
+                              0xffffffff;
+            debug("FUTEX_WAIT: %p (val = %d) vs %d mask = %08x\n",
+                  uaddr, *uaddr, val, bitset);
+
+            if (*uaddr != val) {
+                ret = -EAGAIN;
                 break;
+            }
 
             struct futex_waiter waiter;
             thread_setwait(&waiter.thread, NULL);
             INIT_LIST_HEAD(&waiter.list);
+            waiter.bitset = (futex_op == FUTEX_WAIT_BITSET) ? val3 : 0xffffffff;
             list_add_tail(&waiter.list, &futex->waiters);
 
             unlock(hdl->lock);
             thread_sleep();
             lock(hdl->lock);
             break;
+        }
 
-        case FUTEX_WAKE: {
-            debug("FUTEX_WAKE: %p (val = %d)\n", uaddr, *uaddr);
-            int cnt;
+        case FUTEX_WAKE:
+        case FUTEX_WAKE_BITSET: {
+            uint32_t bitset = (futex_op == FUTEX_WAKE_BITSET) ? val3 :
+                              0xffffffff;
+            debug("FUTEX_WAKE: %p (val = %d) count = %d mask = %08x\n",
+                  uaddr, *uaddr, val, bitset);
+            int cnt, nwaken = 0;
             for (cnt = 0 ; cnt < val ; cnt++) {
                 if (list_empty(&futex->waiters))
                     break;
@@ -118,55 +163,96 @@ int shim_do_futex (unsigned int * uaddr, int op, int val, void * utime,
                                                           struct futex_waiter,
                                                           list);
 
+                if (!(bitset & waiter->bitset))
+                    continue;
+
                 debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n",
                       waiter->thread->tid, uaddr, *uaddr);
                 list_del(&waiter->list);
                 thread_wakeup(waiter->thread);
+                nwaken++;
             }
-            ret = cnt;
+
+            ret = nwaken;
             debug("FUTEX_WAKE done: %p (val = %d)\n", uaddr, *uaddr);
             break;
         }
 
-        case FUTEX_CMP_REQUEUE:
-            if (*uaddr != val3) {
-                ret = -EAGAIN;
-                break;
+        case FUTEX_WAKE_OP: {
+            assert(futex2);
+            int oldval = *(int *) uaddr2, newval, cmpval;
+
+            newval = (val3 >> 12) & 0xfff;
+            switch ((val3 >> 28) & 0xf) {
+                case FUTEX_OP_SET:  break;
+                case FUTEX_OP_ADD:  newval = oldval + newval;  break;
+                case FUTEX_OP_OR:   newval = oldval | newval;  break;
+                case FUTEX_OP_ANDN: newval = oldval & ~newval; break;
+                case FUTEX_OP_XOR:  newval = oldval ^ newval;  break;
             }
 
-        case FUTEX_REQUEUE: {
-            struct shim_futex_handle * futex2 = NULL;
-            struct shim_handle * hdl2;
-
-            lock(futex_list_lock);
+            cmpval = val3 & 0xfff;
+            switch ((val3 >> 24) & 0xf) {
+                case FUTEX_OP_CMP_EQ: cmpval = (oldval == cmpval); break;
+                case FUTEX_OP_CMP_NE: cmpval = (oldval != cmpval); break;
+                case FUTEX_OP_CMP_LT: cmpval = (oldval < cmpval);  break;
+                case FUTEX_OP_CMP_LE: cmpval = (oldval <= cmpval); break;
+                case FUTEX_OP_CMP_GT: cmpval = (oldval > cmpval);  break;
+                case FUTEX_OP_CMP_GE: cmpval = (oldval >= cmpval); break;
+            }
 
-            list_for_each_entry(tmp, &futex_list, list)
-                if (tmp->uaddr == uaddr2) {
-                    futex2 = tmp;
+            *(int *) uaddr2 = newval;
+            int cnt, nwaken = 0;
+            debug("FUTEX_WAKE: %p (val = %d) count = %d\n", uaddr, *uaddr, val);
+            for (cnt = 0 ; cnt < val ; cnt++) {
+                if (list_empty(&futex->waiters))
                     break;
-                }
 
-            if (futex2) {
-                hdl2 = container_of(futex2, struct shim_handle, info.futex);
-                get_handle(hdl2);
-            } else {
-                if (!(hdl2 = get_new_handle())) {
-                    unlock(futex_list_lock);
-                    ret = -ENOMEM;
-                    goto out;
-                }
+                struct futex_waiter * waiter = list_entry(futex->waiters.next,
+                                                          struct futex_waiter,
+                                                          list);
 
-                hdl2->type = TYPE_FUTEX;
-                futex2 = &hdl2->info.futex;
-                futex2->uaddr = uaddr2;
-                get_handle(hdl2);
-                INIT_LIST_HEAD(&futex2->waiters);
-                INIT_LIST_HEAD(&futex2->list);
-                list_add_tail(&futex2->list, &futex_list);
+                debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n",
+                      waiter->thread->tid, uaddr, *uaddr);
+                list_del(&waiter->list);
+                thread_wakeup(waiter->thread);
+                nwaken++;
             }
 
-            unlock(futex_list_lock);
+            if (cmpval) {
+                unlock(hdl->lock);
+                put_handle(hdl);
+                hdl = hdl2;
+                lock(hdl->lock);
+                debug("FUTEX_WAKE: %p (val = %d) count = %d\n", uaddr2,
+                      *uaddr2, val2);
+                for (cnt = 0 ; cnt < val2 ; cnt++) {
+                    if (list_empty(&futex2->waiters))
+                        break;
+
+                    struct futex_waiter * waiter = list_entry(futex2->waiters.next,
+                                                              struct futex_waiter,
+                                                              list);
+
+                    debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n",
+                          waiter->thread->tid, uaddr2, *uaddr2);
+                    list_del(&waiter->list);
+                    thread_wakeup(waiter->thread);
+                    nwaken++;
+                }
+            }
+            ret = nwaken;
+            break;
+        }
 
+        case FUTEX_CMP_REQUEUE:
+            if (*uaddr != val3) {
+                ret = -EAGAIN;
+                break;
+            }
+
+        case FUTEX_REQUEUE: {
+            assert(futex2);
             int cnt;
             for (cnt = 0 ; cnt < val ; cnt++) {
                 if (list_empty(&futex->waiters))
@@ -181,14 +267,7 @@ int shim_do_futex (unsigned int * uaddr, int op, int val, void * utime,
             }
 
             lock(hdl2->lock);
-            while (!list_empty(&futex->waiters)) {
-                struct futex_waiter * waiter = list_entry(futex->waiters.next,
-                                                          struct futex_waiter,
-                                                          list);
-
-                list_del(&waiter->list);
-                list_add_tail(&waiter->list, &futex2->waiters);
-            }
+            list_splice_init(&futex->waiters, &futex2->waiters);
             unlock(hdl2->lock);
             put_handle(hdl2);
             ret = cnt;
@@ -206,7 +285,6 @@ int shim_do_futex (unsigned int * uaddr, int op, int val, void * utime,
     }
 
     unlock(hdl->lock);
-out:
     put_handle(hdl);
     return ret;
 }

+ 18 - 3
LibOS/shim/src/sys/shim_getrlimit.c

@@ -30,12 +30,14 @@
 
 #include <asm/resource.h>
 
+unsigned int max_fds = DEFAULT_MAX_FDS;
+
 int shim_do_getrlimit (int resource, struct __kernel_rlimit * rlim)
 {
     switch (resource) {
         case RLIMIT_NOFILE:
-            rlim->rlim_cur = MAX_FDS;
-            rlim->rlim_max = MAX_FDS;
+            rlim->rlim_cur = max_fds;
+            rlim->rlim_max = MAX_MAX_FDS;
             return 0;
 
         case RLIMIT_RSS:
@@ -60,5 +62,18 @@ int shim_do_getrlimit (int resource, struct __kernel_rlimit * rlim)
 
 int shim_do_setrlimit (int resource, struct __kernel_rlimit * rlim)
 {
-    return -EPERM;
+    switch (resource) {
+        case RLIMIT_NOFILE:
+            if (rlim->rlim_cur > MAX_MAX_FDS)
+                return -EINVAL;
+            max_fds = rlim->rlim_cur;
+            return 0;
+
+        case RLIMIT_STACK:
+            sys_stack_size = rlim->rlim_cur;
+            return 0;
+
+        default:
+            return -ENOSYS;
+    }
 }

+ 20 - 5
LibOS/shim/src/sys/shim_ioctl.c

@@ -132,8 +132,6 @@ static int ioctl_termios (struct shim_handle * hdl, unsigned int cmd,
         case TIOCSSERIAL:
         /* 0x00005420 TIOCPKT const int * */
         case TIOCPKT:
-        /* 0x00005421 FIONBIO const int * */
-        case FIONBIO:
         /* 0x00005422 TIOCNOTTY void */
         case TIOCNOTTY:
         /* 0x00005423 TIOCSETD const int * */
@@ -142,8 +140,6 @@ static int ioctl_termios (struct shim_handle * hdl, unsigned int cmd,
         case TIOCGETD:
         /* 0x00005425 TCSBRKP int */
         case TCSBRKP:
-        /* 0x00005452 FIOASYNC const int * */
-        case FIOASYNC:
         /* 0x00005453 TIOCSERCONFIG void */
         case TIOCSERCONFIG:
         /* 0x00005454 TIOCSERGWILD int * */
@@ -294,6 +290,17 @@ static int ioctl_netdevice (struct shim_handle * hdl, unsigned int cmd,
     return -EAGAIN;
 }
 
+void signal_io (IDTYPE target, void * arg)
+{
+    debug("detecting input, signaling thread %u\n", target);
+
+    struct shim_thread * thread = lookup_thread(target);
+    if (!thread)
+        return;
+
+    append_signal(thread, SIGIO, NULL, true);
+}
+
 int shim_do_ioctl (int fd, int cmd, unsigned long arg)
 {
     struct shim_handle * hdl = get_fd_handle(fd, NULL, NULL);
@@ -334,13 +341,19 @@ int shim_do_ioctl (int fd, int cmd, unsigned long arg)
         case TIOCGSERIAL:
         case TIOCSSERIAL:
         case TIOCPKT:
-        case FIONBIO:
         case TIOCNOTTY:
         case TIOCSETD:
         case TIOCGETD:
         case TCSBRKP:
             ret = ioctl_termios(hdl, cmd, arg);
             break;
+        case FIONBIO:
+            if (hdl->fs && hdl->fs->fs_ops &&
+                hdl->fs->fs_ops->setflags)
+                hdl->fs->fs_ops->setflags(hdl, hdl->flags | O_NONBLOCK);
+            hdl->flags |= O_NONBLOCK;
+            ret = 0;
+            break;
         case FIONCLEX:
             hdl->flags &= ~FD_CLOEXEC;
             ret = 0;
@@ -350,6 +363,8 @@ int shim_do_ioctl (int fd, int cmd, unsigned long arg)
             ret = 0;
             break;
         case FIOASYNC:
+            ret = install_async_event(hdl->pal_handle, 0, &signal_io, NULL);
+            break;
         case TIOCSERCONFIG:
         case TIOCSERGWILD:
         case TIOCSERSWILD:

+ 1 - 1
LibOS/shim/src/sys/shim_migrate.c

@@ -67,7 +67,7 @@ int create_checkpoint (const char * cpdir, IDTYPE * sid)
 
     INIT_LIST_HEAD(&cpsession->registered_threads);
     INIT_LIST_HEAD(&cpsession->list);
-    cpsession->finish_event = DkNotificationEventCreate(0);
+    cpsession->finish_event = DkNotificationEventCreate(PAL_FALSE);
     cpsession->cpfile = NULL;
 
     int len = strlen(cpdir);

+ 1 - 1
LibOS/shim/src/sys/shim_msgget.c

@@ -100,7 +100,7 @@ static int __add_msg_handle (unsigned long key, IDTYPE msqid, bool owned,
     msgq->owned     = owned;
     msgq->deleted   = false;
     msgq->currentsize = 0;
-    msgq->event     = DkSynchronizationEventCreate(0);
+    msgq->event     = DkSynchronizationEventCreate(PAL_FALSE);
 
     msgq->queue     = malloc(MSG_QOBJ_SIZE * DEFAULT_MSG_QUEUE_SIZE);
     msgq->queuesize = DEFAULT_MSG_QUEUE_SIZE;

+ 3 - 2
LibOS/shim/src/sys/shim_poll.c

@@ -225,19 +225,20 @@ no_op:
                     if (polled & FS_POLL_ER) {
                         debug("fd %d known to have error\n", p->fd);
                         p->flags |= KNOWN_R|KNOWN_W|RET_E;
+                        do_r = do_w = false;
                     }
 
                     if ((polled & FS_POLL_RD)) {
                         debug("fd %d known to be readable\n", p->fd);
                         p->flags |= KNOWN_R|RET_R;
+                        do_r = false;
                     }
 
                     if (polled & FS_POLL_WR) {
                         debug("fd %d known to be writeable\n", p->fd);
                         p->flags |= KNOWN_W|RET_W;
+                        do_w = false;
                     }
-
-                    do_r = do_w = false;
                 }
             }
 

+ 1 - 1
LibOS/shim/src/sys/shim_semget.c

@@ -90,7 +90,7 @@ static int __add_sem_handle (unsigned long key, IDTYPE semid,
     tmp->semkey = key;
     tmp->semid  = semid;
     tmp->owned  = owned;
-    tmp->event  = DkNotificationEventCreate(0);
+    tmp->event  = DkNotificationEventCreate(PAL_FALSE);
 
     if (owned && nsems) {
         tmp->nsems  = nsems;

+ 24 - 0
LibOS/shim/src/sys/shim_sigaction.c

@@ -137,6 +137,30 @@ out:
     return err;
 }
 
+int shim_do_sigsuspend (const __sigset_t * mask)
+{
+    __sigset_t * old, tmp;
+    struct shim_thread * cur = get_cur_thread();
+    int err = 0;
+
+    lock(cur->lock);
+
+    old = get_sig_mask(cur);
+    memcpy(&tmp, old, sizeof(__sigset_t));
+    old = &tmp;
+
+    set_sig_mask(cur, mask);
+    cur->suspend_on_signal = true;
+    thread_setwait(NULL, NULL);
+    thread_sleep();
+out:
+    unlock(cur->lock);
+    set_sig_mask(cur, old);
+
+    return err;
+
+}
+
 struct walk_arg {
     struct shim_thread * current;
     IDTYPE sender;

+ 55 - 16
LibOS/shim/src/sys/shim_socket.c

@@ -61,6 +61,8 @@
 #define TCP_CONGESTION      13  /* Congestion control algorithm.  */
 #define TCP_MD5SIG          14  /* TCP MD5 Signature (RFC2385) */
 
+#define AF_UNSPEC       0
+
 #define SOCK_URI_SIZE   108
 
 static int rebase_on_lo __attribute_migratable = -1;
@@ -316,6 +318,28 @@ static inline void unix_copy_addr (struct sockaddr * saddr,
     memcpy(un->sun_path, path, size + 1);
 }
 
+static bool inet_check_addr (int domain, struct sockaddr * addr, int addrlen)
+{
+    if (domain == AF_INET) {
+        if (addr->sa_family != AF_INET ||
+            addrlen != sizeof(struct sockaddr_in))
+            return false;
+        return true;
+    }
+
+    if (domain == AF_INET6) {
+        if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
+            return false;
+        if (addrlen != ((addr->sa_family == AF_INET) ?
+                        sizeof(struct sockaddr_in) :
+                        sizeof(struct sockaddr_in6)))
+            return false;
+        return true;
+    }
+
+    return false;
+}
+
 static int inet_copy_addr (int domain, struct sockaddr * saddr,
                            const struct addr_inet * addr)
 {
@@ -349,9 +373,18 @@ static void inet_save_addr (int domain, struct addr_inet * addr,
     }
 
     if (domain == AF_INET6) {
-        const struct sockaddr_in6 * in6 = (const struct sockaddr_in6 *) saddr;
-        addr->port = __ntohs(in6->sin6_port);
-        addr->addr.v6 = in6->sin6_addr;
+        if (saddr->sa_family == AF_INET) {
+            const struct sockaddr_in * in = (const struct sockaddr_in *) saddr;
+            addr->port = __ntohs(in->sin_port);
+            ((uint32_t *) &addr->addr.v6.s6_addr)[0] = 0;
+            ((uint32_t *) &addr->addr.v6.s6_addr)[1] = 0;
+            ((uint32_t *) &addr->addr.v6.s6_addr)[2] = 0xffff0000;
+            ((uint32_t *) &addr->addr.v6.s6_addr)[3] = in->sin_addr.s_addr;
+        } else {
+            const struct sockaddr_in6 * in6 = (const struct sockaddr_in6 *) saddr;
+            addr->port = __ntohs(in6->sin6_port);
+            addr->addr.v6 = in6->sin6_addr;
+        }
         return;
     }
 }
@@ -451,12 +484,8 @@ int shim_do_bind (int sockfd, struct sockaddr * addr, socklen_t addrlen)
         sock->addr.un.dentry = dent;
 
     } else if (sock->domain == AF_INET || sock->domain == AF_INET6) {
-
-        if (addrlen != ((sock->domain == AF_INET) ? sizeof(struct sockaddr_in) :
-                        sizeof(struct sockaddr_in6))) {
+        if (!inet_check_addr(sock->domain, addr, addrlen))
             goto out;
-        }
-        
         inet_save_addr(sock->domain, &sock->addr.in.bind, addr);
         inet_rebase_port(false, sock->domain, &sock->addr.in.bind, true);
     }
@@ -657,6 +686,18 @@ int shim_do_connect (int sockfd, struct sockaddr * addr, int addrlen)
     int ret = -EINVAL;
 
     if (state == SOCK_CONNECTED) {
+        if (addr->sa_family == AF_UNSPEC) {
+            sock->sock_state = SOCK_CREATED;
+            if (sock->sock_type == SOCK_STREAM && hdl->pal_handle) {
+                DkStreamDelete(hdl->pal_handle, 0);
+                DkObjectClose(hdl->pal_handle);
+                hdl->pal_handle = NULL;
+            }
+            debug("shim_connect: reconnect on a stream socket\n");
+            ret = 0;
+            goto out;
+        }
+
         debug("shim_connect: reconnect on a stream socket\n");
         goto out;
     }
@@ -702,14 +743,12 @@ int shim_do_connect (int sockfd, struct sockaddr * addr, int addrlen)
         hdl->pal_handle = NULL;
     }
 
-	if (sock->domain != AF_UNIX) {
-		if (addrlen != ((sock->domain == AF_INET) ? sizeof(struct sockaddr_in) :
-					sizeof(struct sockaddr_in6)))
-			goto out;
-
-		inet_save_addr(sock->domain, &sock->addr.in.conn, addr);
-		inet_rebase_port(false, sock->domain, &sock->addr.in.conn, false);
-	}
+    if (sock->domain != AF_UNIX) {
+        if (!inet_check_addr(sock->domain, addr, addrlen))
+            goto out;
+        inet_save_addr(sock->domain, &sock->addr.in.conn, addr);
+        inet_rebase_port(false, sock->domain, &sock->addr.in.conn, false);
+    }
 
     sock->sock_state = (state == SOCK_BOUND) ? SOCK_BOUNDCONNECTED :
                                                SOCK_CONNECTED;

+ 2 - 2
LibOS/shim/src/syscallas.S

@@ -63,9 +63,9 @@ isdef:
         pushq %r15
 
         movq %rax, %fs:(SHIM_TCB_OFFSET + 24)
-        leaq 8(%rbp), %rax
+        leaq 16(%rbp), %rax
         movq %rax, %fs:(SHIM_TCB_OFFSET + 32)
-        movq (%rbp), %rax
+        movq 8(%rbp), %rax
         movq %rax, %fs:(SHIM_TCB_OFFSET + 40)
         movq %rsp, %fs:(SHIM_TCB_OFFSET + 48)
 

+ 6 - 5
LibOS/shim/test/Makefile

@@ -10,12 +10,11 @@ LIBCDIR = $(level)../../glibc-build
 CC = gcc
 CXX = g++
 CFLAGS 	= -std=gnu99
-CFLAGS-debug = $(CFLAGS) -I$(PALDIR)/../include/pal -I$(PALDIR)/../lib
+CFLAGS-debug = $(CFLAGS) -I$(SHIMDIR)/../include -I$(PALDIR)/../include/pal -I$(PALDIR)/../lib
 LDFLAGS	=
-LDFLAGS-debug = $(LDFLAGS) -L$(SHIMDIR) \
+LDFLAGS-debug = $(LDFLAGS) -L$(SHIMDIR) -L$(PALDIR) \
 		-Wl,-rpath-link=$(abspath $(SHIMDIR)) \
-		-Wl,-rpath-link=$(abspath $(PALDIR)) \
-		-lpal -lsysdb_debug
+		-Wl,-rpath-link=$(abspath $(PALDIR))
 
 default: all
 include $(PALDIR)/Makefile.Test
@@ -37,9 +36,11 @@ manifest_rules = \
 	-e 's:\$$(LIBCDIR):'$$RELDIR'$(LIBCDIR):g' \
 	$(extra_rules)
 
+relative-to = $(shell python -c "import os.path; print os.path.relpath(\"$(abspath $2)\",\"$(abspath $1)\")")
+
 %manifest: %manifest.template
 	@echo [ $@ ]
-	RELDIR=$(filter-out ./,$(python -c "import os.path; print os.path.relpath($(abspath $(dir $@)) $(PWD))")) && \
+	RELDIR=$(filter-out ./,$(call relative-to,$(dir $@),$(shell pwd))/) && \
 	sed $(manifest_rules) $< > $@
 	(grep -q '^#!' $@ && chmod +x $@) || true
 

+ 1 - 1
LibOS/shim/test/apps/lighttpd/Makefile

@@ -55,7 +55,7 @@ start-multithreaded-native-server:
                 $(if $(CONF),$(CONF),lighttpd-multithreaded.conf)
 
 start-graphene-server:
-	$(PREFIX) ./lighttpd.manifest -D -m /lighttpd -f \
+	$(PREFIX) ./lighttpd.manifest$(if $(SGX_RUN),.sgx,) -D -m /lighttpd -f \
 		$(if $(CONF),$(CONF),lighttpd.conf)
 
 start-multithreaded-graphene-server:

+ 9 - 0
LibOS/shim/test/apps/lmbench/lmbench-2.5/src/lat_proc.c

@@ -180,6 +180,15 @@ main(int ac, char **av)
 		micro("Static Process fork+exit", get_n());
 #else
 		micro("Process fork+exit", get_n());
+#endif
+	} else if (!strcmp("fork-size", av[1])) {
+		size_t size = bytes(av[2]);
+		memset(malloc(size), 0, size);
+		BENCH(do_fork(), 0);
+#ifdef STATIC
+		micro("Static Process fork+exit", get_n());
+#else
+		micro("Process fork+exit", get_n());
 #endif
 	} else if (!strcmp("dfork", av[1])) {
 		BENCH(do_dfork(), 0);

+ 21 - 0
LibOS/shim/test/apps/lmbench/lmbench-2.5/src/lat_syscall.c

@@ -33,6 +33,15 @@ do_read(int fd, char *s)
 	}
 }
 
+void
+do_read_size(int fd, void * buf, size_t size, char *s)
+{
+	if (read(fd, buf, size) == -1) {
+		perror(s);
+		return;
+	}
+}
+
 void
 do_stat(char *s)
 {
@@ -100,6 +109,18 @@ main(int ac, char **av)
 		BENCH(do_read(fd, file), 0);
 		micro("Simple read", get_n());
 		close(fd);
+	} else if (!strcmp("read-size", av[1])) {
+		size_t size = bytes(av[2]);
+		void * buf = malloc(size);
+		file = av[3] ? av[3] : "/dev/zero";
+		fd = open(file, 0);
+		if (fd == -1) {
+			fprintf(stderr, "Read from %s: %s\n", file, strerror(errno));
+			return(1);
+		}
+		BENCH(do_read_size(fd, buf, size, file), 0);
+		micro("Simple read", get_n());
+		close(fd);
 	} else if (!strcmp("stat", av[1])) {
 		BENCH(do_stat(file), 0);
 		micro("Simple stat", get_n());

+ 36 - 0
LibOS/shim/test/apps/ltp/BLOCKED

@@ -0,0 +1,36 @@
+getresuid02
+getresuid03
+epoll-ltp
+epoll_pwait01
+fork06
+fork12
+fork14
+getitimer02
+getresgid02
+getresgid03
+getrlimit02
+getrusage02
+gettimeofday01
+gettimeofday02
+kill02
+kill06
+kill08
+kill10
+kill12
+nosleep04
+nanosleep04
+pipe04
+pipe05
+pipe07
+process_vm_readv02
+process_vm_readv03
+process_vm01 -r
+prot_hsymlinks
+setitimer02
+set_robust_list01
+signal01
+trace05
+uname02
+waitpid04
+futex_wait01
+futex_wake02

+ 36 - 0
LibOS/shim/test/apps/ltp/Makefile

@@ -0,0 +1,36 @@
+SRCDIR = ltp-master
+BUILDDIR = opt/ltp
+TESTCASEDIR = $(BUILDDIR)/testcases/bin
+
+target = $(BUILDDIR)/bin/run_in_graphene.awk $(TESTCASEDIR)/pal_loader $(TESTCASEDIR)/manifest
+exec_target =
+
+clean-extra = clean-build
+
+level = ../../
+include ../../Makefile
+
+master.zip:
+	wget https://github.com/linux-test-project/ltp/archive/master.zip
+
+$(SRCDIR)/configure: master.zip
+	unzip master.zip
+	cd $(SRCDIR) && make autotools
+
+$(BUILDDIR)/runltp: $(SRCDIR)/configure
+	cd $(SRCDIR) && ./configure
+	cd $(SRCDIR) && make all
+	cd $(SRCDIR) && make "DESTDIR=$(PWD)" SKIP_IDCHECK=1 install
+	patch -d $(dir $@) < runltp.patch
+
+$(BUILDDIR)/bin/run_in_graphene.awk: run_in_graphene.awk $(BUILDDIR)/runltp
+	cp -f $< $@
+
+$(TESTCASEDIR)/pal_loader: $(BUILDDIR)/runltp
+	ln -sf $(call relative-to,$(dir $@),../pal_loader) $@
+
+$(TESTCASEDIR)/manifest.template: manifest.template
+	cp -f $< $@
+
+clean-build:
+	rm -rf $(BUILDDIR)

+ 290 - 0
LibOS/shim/test/apps/ltp/PASSED

@@ -0,0 +1,290 @@
+Test,Subtest number
+accept01,1
+accept01,2
+alarm01,1
+alarm02,1
+alarm02,2
+alarm02,3
+clone01,1
+clone03,1
+clone04,1
+clone06,1
+clone07,1
+close02,1
+confstr01,1
+confstr01,2
+confstr01,3
+confstr01,4
+confstr01,5
+confstr01,6
+confstr01,7
+confstr01,8
+confstr01,9
+confstr01,10
+confstr01,11
+confstr01,12
+confstr01,13
+confstr01,14
+confstr01,15
+confstr01,16
+confstr01,17
+confstr01,18
+confstr01,19
+fcntl13,6
+fork02,1
+fork03,1
+fork11,1
+fstat05,1
+getcontext01,1
+getdomainname01,1
+getdtablesize01,1
+getegid01,1
+geteuid01,1
+getgid01,1
+gethostbyname_r01,1
+gethostname01,1
+getitimer01,1
+getitimer01,3
+getitimer01,5
+getpagesize01,1
+getpeername01,1
+getpeername01,2
+getpgid01,1
+getpgid01,2
+getpgrp01,1
+getpid01,1
+getpid02,1
+getppid01,1
+getrlimit01,4
+getrlimit01,6
+getrlimit01,8
+getrlimit01,10
+getsid01,1
+getsockopt01,1
+getsockopt01,2
+gettid01,1
+getuid01,1
+kill03,1
+kill09,1
+listen01,1
+listen01,2
+memset01,1
+memset01,2
+memcmp1,1
+memcmp1,2
+memcpy1,1
+memcpy1,2
+memcpy1,3
+mlock03,1
+nanosleep03,1
+pathconf01,1
+pathconf01,2
+pathconf01,3
+pathconf01,4
+pathconf01,5
+pathconf01,6
+pathconf01,7
+personality02,1
+pipe09,1
+pipe10,1
+poll01,1
+poll01,2
+process_vm_writev,2
+process_vm_writev,4
+process_vm_writev,6
+process_vm_writev,8
+process_vm_writev,11
+process_vm_writev,13
+process_vm_writev,15
+process_vm_writev,17
+process_vm_writev,19
+process_vm_writev,21
+ptrace05,1
+sbrk01,1
+sbrk01,2
+sbrk02,1
+sched_yield01,1
+select02,1
+setgid01,1
+setitimer01,1
+setpgid01,1
+setpgid01,2
+setpgrp01,1
+setpgrp02,1
+setrlimit03,1
+setsid01,1
+setsockopt01,1
+setsockopt01,2
+settimeofday01,3
+setuid01,1
+sigaction01,4
+sigaction02,1
+sigaction02,2
+sigaction02,3
+signal02,1
+signal02,2
+signal02,3
+signal03,1
+signal03,2
+signal03,3
+signal03,4
+signal03,5
+signal03,6
+signal03,7
+signal03,8
+signal03,9
+signal03,10
+signal03,11
+signal03,12
+signal03,13
+signal03,14
+signal03,15
+signal03,16
+signal03,17
+signal03,18
+signal03,19
+signal03,20
+signal03,21
+signal03,22
+signal03,23
+signal03,24
+signal03,25
+signal03,26
+signal03,27
+signal03,28
+signal03,29
+signal03,30
+signal03,31
+signal04,1
+signal04,2
+signal04,3
+signal04,4
+signal04,5
+signal04,6
+signal04,7
+signal04,8
+signal04,9
+signal04,10
+signal04,11
+signal04,12
+signal04,13
+signal04,14
+signal04,15
+signal04,16
+signal04,17
+signal04,18
+signal04,19
+signal04,20
+signal04,21
+signal04,22
+signal04,23
+signal04,24
+signal04,25
+signal04,26
+signal04,27
+signal04,28
+signal05,1
+signal05,2
+signal05,3
+signal05,4
+signal05,5
+signal05,6
+signal05,7
+signal05,8
+signal05,9
+signal05,10
+signal05,11
+signal05,12
+signal05,13
+signal05,14
+signal05,15
+signal05,16
+signal05,17
+signal05,18
+signal05,19
+signal05,20
+signal05,21
+signal05,22
+signal05,23
+signal05,24
+signal05,25
+signal05,26
+signal05,27
+signal05,28
+signal05,29
+signal05,30
+signal05,31
+signal06,1
+signal06,2
+signal06,3
+signal06,4
+signal06,5
+socketcall01,1
+socketcall02,1
+socketcall03,1
+socketcall04,1
+splice02,1
+string01,1
+sync01,1
+sysconf01,1
+sysconf01,2
+sysconf01,4
+sysconf01,5
+sysconf01,6
+sysconf01,7
+sysconf01,8
+sysconf01,9
+sysconf01,10
+sysconf01,11
+sysconf01,12
+sysconf01,13
+sysconf01,14
+sysconf01,15
+sysconf01,16
+sysconf01,17
+sysconf01,18
+sysconf01,19
+sysconf01,21
+sysconf01,24
+sysconf01,25
+sysconf01,27
+sysconf01,28
+sysconf01,29
+sysconf01,30
+sysconf01,31
+sysconf01,32
+sysconf01,33
+sysconf01,34
+sysconf01,35
+sysconf01,36
+sysconf01,37
+sysconf01,38
+sysconf01,39
+sysconf01,41
+sysconf01,42
+sysconf01,44
+sysconf01,45
+sysconf01,46
+sysconf01,47
+sysconf01,48
+sysconf01,50
+sysconf01,51
+sysconf01,52
+sysconf01,53
+sysconf01,56
+time01,1
+time02,1
+times01,1
+umask01,1
+umask02,1
+uname01,1
+uname03,1
+ustat02,1
+wait01,1
+wait401,1
+wait401,2
+waitpid01,1
+waitpid02,1
+waitpid02,3
+waitpid03,1
+waitpid03,2

+ 17 - 0
LibOS/shim/test/apps/ltp/block_tests.awk

@@ -0,0 +1,17 @@
+#!/usr/bin/awk -f
+BEGIN{
+	while(getline < "BLOCKED") {
+		test = $1$2$3
+		blocked[test]
+	}
+}
+NF && ! /^#/ {
+	test = $2$3
+	if(!(test in blocked)) {
+		s=$1 "_graphene ./pal_loader"
+		for (i=2; i<=NF; i++) {
+			s = s " " $i
+		}
+		print s
+	}
+}

+ 18 - 0
LibOS/shim/test/apps/ltp/manifest.template

@@ -0,0 +1,18 @@
+loader.preload = file:$(SHIMPATH)
+loader.env.LD_LIBRARY_PATH = /lib:/lib64:/usr/lib:/usr/lib64
+loader.debug_type = none
+
+fs.mount.tmp1.type = chroot
+fs.mount.tmp1.path = /tmp
+fs.mount.tmp1.uri = file:/tmp
+
+fs.mount.lib.type = chroot
+fs.mount.lib.path = /lib
+fs.mount.lib.uri = file:$(LIBCDIR)
+
+fs.mount.usr.type = chroot
+fs.mount.usr.path = /usr
+fs.mount.usr.uri = file:/usr
+
+sys.brk.size = 32M
+sys.stack.size = 4M

+ 19 - 0
LibOS/shim/test/apps/ltp/passed_tests_only.awk

@@ -0,0 +1,19 @@
+#!/usr/bin/awk -f
+BEGIN{
+	getline < "PASSED"
+	while(getline < "PASSED") {
+		split($1$2$3, a, ",")
+		test = a[1]
+		passed[test]
+	}
+}
+NF && ! /^#/ {
+	test = $2$3
+	if(test in passed) {
+		s=$1 "_graphene ./pal_loader"
+		for (i=2; i<=NF; i++) {
+			s = s " " $i
+		}
+		print s
+	}
+}

+ 29 - 0
LibOS/shim/test/apps/ltp/run_in_graphene.awk

@@ -0,0 +1,29 @@
+#!/usr/bin/awk -f
+#
+#    Script for adding necessary valgrind calls before commands.
+#
+#    Copyright (C) 2016, Graphene library OS.
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License along
+#    with this program; if not, write to the Free Software Foundation, Inc.,
+#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Chia-Che Tsai, Fall 2016
+#
+NF && ! /^#/ {
+	s=$1 "_graphene ./pal_loader"
+	for (i = 2; i <= NF; i++) {
+		s = s " " $i
+	}
+	print s
+}

+ 18 - 0
LibOS/shim/test/apps/ltp/runltp.patch

@@ -0,0 +1,18 @@
+--- runltp.old	2016-10-24 17:04:25.557950335 -0400
++++ runltp	2016-10-24 18:20:37.168448299 -0400
+@@ -813,6 +813,15 @@ main()
+ 		fi
+ 	fi
+ 
++	## Running all test in Graphene if $GRAPHENE_RUN = 1
++	if [ $GRAPHENE_RUN ]; then
++		awk -f ${LTPROOT}/bin/run_in_graphene.awk \
++			${TMP}/alltests > \
++			${TMP}/alltests.tmp
++		cp ${TMP}/alltests.tmp ${TMP}/alltests
++		rm -rf ${TMP}/alltests.tmp
++	fi
++
+ 	## Valgrind Check will work only when Kernel Fault Injection is not expected,
+ 	## We do not want to test Faults when valgrind is running
+ 	if [ $VALGRIND_CHECK ]; then

+ 42 - 0
LibOS/shim/test/apps/memcached/Makefile

@@ -0,0 +1,42 @@
+manifests = memcached.manifest
+SRCDIR = memcached-master
+HOST = $(firstword $(shell ifconfig | grep 'inet addr:'| grep -v '127.0.0.1' -m 1 | cut -d: -f2))
+PORT = 8000
+CORES = 4
+MAXMEM = 256
+MAXTHREADS = 16
+
+target = memcached $(manifests)
+exec_target = memcached
+
+clean-extra = clean-build
+
+extra_rules = \
+	-e 's:\$$(PAL):../pal_loader:g' \
+	-e 's:\$$(HOST):$(HOST):g' \
+	-e 's:\$$(PORT):$(PORT):g' \
+	-e 's:\$$(MEMSIZE):$(shell expr $(MAXMEM) + $(MAXMEM))M:g' \
+	-e 's:\$$(THREADNUM):$(shell expr 2 + $(MAXTHREADS)):g'
+
+level = ../../
+include ../../Makefile
+
+master.zip:
+	wget https://github.com/memcached/memcached/archive/master.zip
+
+$(SRCDIR)/configure: master.zip
+	unzip master.zip
+	cd $(SRCDIR) && ./autogen.sh
+
+$(SRCDIR)/memcached: $(SRCDIR)/configure
+	cd $(SRCDIR) && ./configure
+	cd $(SRCDIR) && make
+
+memcached: $(SRCDIR)/memcached
+	cp -f $< $@
+
+clean-build:
+	rm -rf $(BUILDDIR)
+
+distclean: clean
+	rm -rf $(SRCDIR)

+ 54 - 0
LibOS/shim/test/apps/memcached/memcached.manifest.template

@@ -0,0 +1,54 @@
+#!$(PAL)
+
+loader.preload = file:$(SHIMPATH)
+loader.exec = file:memcached
+loader.env.LD_LIBRARY_PATH = /lib:/usr/lib:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu
+loader.debug_type = none
+
+fs.mount.lib.type = chroot
+fs.mount.lib.path = /lib
+fs.mount.lib.uri = file:$(LIBCDIR)
+
+fs.mount.lib2.type = chroot
+fs.mount.lib2.path = /lib/x86_64-linux-gnu
+fs.mount.lib2.uri = file:/lib/x86_64-linux-gnu
+
+fs.mount.usr.type = chroot
+fs.mount.usr.path = /usr
+fs.mount.usr.uri = file:/usr
+
+fs.mount.passwd.type = chroot
+fs.mount.passwd.path = /etc/passwd
+fs.mount.passwd.uri = file:/etc/passwd
+
+fs.mount.shadow.type = chroot
+fs.mount.shadow.path = /etc/shadow
+fs.mount.shadow.uri = file:/etc/shadow
+
+fs.mount.tmp.type = chroot
+fs.mount.tmp.path = /tmp
+fs.mount.tmp.uri = file:/tmp
+
+# allow to bind on port for listening
+net.allow_bind.1 = $(HOST):$(PORT)
+
+sys.stack.size = 1m
+sys.brk.size = 64M
+glibc.heap_size = 16M
+
+sgx.enclave_size = $(MEMSIZE)
+sgx.thread_num = $(THREADNUM)
+
+sgx.trusted_files.ld = file:$(LIBCDIR)/ld-linux-x86-64.so.2
+sgx.trusted_files.libc = file:$(LIBCDIR)/libc.so.6
+sgx.trusted_files.libdl = file:$(LIBCDIR)/libdl.so.2
+sgx.trusted_files.libm = file:$(LIBCDIR)/libm.so.6
+sgx.trusted_files.libpthread = file:$(LIBCDIR)/libpthread.so.0
+sgx.trusted_files.libnss1 = file:/lib/x86_64-linux-gnu/libnss_compat.so.2
+sgx.trusted_files.libnss2 = file:/lib/x86_64-linux-gnu/libnss_files.so.2
+sgx.trusted_files.libnss3 = file:/lib/x86_64-linux-gnu/libnss_nis.so.2
+sgx.trusted_files.libnsl = file:/lib/x86_64-linux-gnu/libnsl.so.1
+sgx.trusted_files.libevent = file:/usr/lib/x86_64-linux-gnu/libevent-2.0.so.5
+
+sgx.allowed_files.passwd = file:/etc/passwd
+sgx.allowed_files.shadow = file:/etc/shadow

+ 3 - 2
LibOS/shim/test/inline/Makefile

@@ -11,11 +11,12 @@ CFLAGS-debug += -fno-builtin -nostdlib
 ifeq ($(SYS),x86_64-linux-gnu)
 $(c_executables): %: %.c $(libs) $(level)../../../Pal/src/user_start.o
 	@echo [ $@ ]
-	@$(CC) $(CFLAGS-debug) $(LDFLAGS-debug) -o $@ $^
+	$(CC) $(CFLAGS-debug) $(LDFLAGS-debug) -o $@ $^ -lpal -lsysdb_debug
+
 
 $(cxx_executables): %: %.cpp $(libs) $(level)../../../Pal/src/user_start.o
 	@echo [ $@ ]
-	@$(CXX) $(CFLAGS-debug) $(LDFLAGS-debug) -o $@ $^
+	$(CXX) $(CFLAGS-debug) $(LDFLAGS-debug) -o $@ $^ -lpal -lsysdb_debug
 
 .PHONY: pack
 pack: $(c_executables) $(cxx_executables)

+ 2 - 2
Pal/src/host/Linux-SGX/Makefile

@@ -6,7 +6,7 @@ LD	= ld
 include Makefile.am
 
 CFLAGS	= -Wall -fPIC -O2 -std=gnu99 -fgnu89-inline -U_FORTIFY_SOURCE \
-	  -fno-omit-frame-pointer \
+	  -fno-omit-frame-pointer -maes \
 	  -fno-stack-protector -fno-builtin \
 	  -I. -Iinclude -I../.. -I../../../include -I../../../lib
 ASFLAGS = -DPIC -DSHARED -fPIC -DASSEMBLER -Wa,--noexecstack \
@@ -23,7 +23,7 @@ enclave-objs = $(addprefix db_,files devices pipes sockets streams memory \
 		 exception misc ipc spinlock) \
 	       $(addprefix enclave_,ocalls ecalls framework pages untrusted) \
 	       $(patsubst %.c,%,$(wildcard crypto/*.c))
-enclave-asm-objs = enclave_entry
+enclave-asm-objs = enclave_entry $(patsubst %.S,%,$(wildcard crypto/*.S))
 urts-objs = $(addprefix sgx_,enclave framework main rtld thread process exception graphene)
 urts-asm-objs = sgx_entry
 graphene_lib = ../../.lib/graphene-lib.a

+ 116 - 0
Pal/src/host/Linux-SGX/crypto/aes.c

@@ -25,6 +25,7 @@
 #include "aes.h"
 #include "error-crypt.h"
 #include "api.h"
+#include "pal_linux_defs.h"
 
 #define XMEMSET memset
 #define XMEMCPY memcpy
@@ -756,6 +757,105 @@ static const word32 Td[5][256] = {
 
 #define GETBYTE(x, y) (word32)((byte)((x) >> (8  *(y))))
 
+#if USE_AES_NI == 1
+
+#include <wmmintrin.h>
+
+/* tell C compiler these are asm functions in case any mix up of ABI underscore
+   prefix between clang/gcc/llvm etc */
+void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
+                     unsigned char* ivec, unsigned long length,
+                     const unsigned char* KS, int nr);
+
+
+void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
+                     unsigned char* ivec, unsigned long length,
+                     const unsigned char* KS, int nr);
+
+void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
+                     unsigned long length, const unsigned char* KS, int nr);
+
+
+void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
+                     unsigned long length, const unsigned char* KS, int nr);
+
+void AES_128_Key_Expansion(const unsigned char* userkey,
+                           unsigned char* key_schedule);
+
+void AES_192_Key_Expansion(const unsigned char* userkey,
+                           unsigned char* key_schedule);
+
+void AES_256_Key_Expansion(const unsigned char* userkey,
+                           unsigned char* key_schedule);
+
+
+static int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+                               AES* aes)
+{
+    if (!userKey || !aes)
+        return BAD_FUNC_ARG;
+
+    if (bits == 128) {
+       AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10;
+       return 0;
+    }
+    else if (bits == 192) {
+       AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12;
+       return 0;
+    }
+    else if (bits == 256) {
+       AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14;
+       return 0;
+    }
+    return BAD_FUNC_ARG;
+}
+
+
+static int AES_set_decrypt_key(const unsigned char* userKey, const int bits,
+                               AES* aes)
+{
+    int nr;
+    AES temp_key;
+    __m128i *Key_Schedule = (__m128i*)aes->key;
+    __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key;
+
+    if (!userKey || !aes)
+        return BAD_FUNC_ARG;
+
+    if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG)
+        return BAD_FUNC_ARG;
+
+    nr = temp_key.rounds;
+    aes->rounds = nr;
+
+    Key_Schedule[nr] = Temp_Key_Schedule[0];
+    Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
+    Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
+    Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]);
+    Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]);
+    Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]);
+    Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]);
+    Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]);
+    Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]);
+    Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]);
+
+    if(nr>10) {
+        Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]);
+        Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]);
+    }
+
+    if(nr>12) {
+        Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]);
+        Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]);
+    }
+
+    Key_Schedule[0] = Temp_Key_Schedule[nr];
+
+    return 0;
+}
+
+#endif /* USE_AES_NI == 1 */
+
 void AESEncrypt(AES *aes, const byte *inBlock, byte *outBlock)
 {
     word32 s0, s1, s2, s3;
@@ -766,6 +866,14 @@ void AESEncrypt(AES *aes, const byte *inBlock, byte *outBlock)
     if (r > 7 || r == 0)
         return;  /* stop instead of segfaulting, set up your keys! */
 
+#if USE_AES_NI == 1
+    /* check alignment, decrypt doesn't need alignment */
+    if (!((uint64_t) inBlock % 16)) {
+        AES_ECB_encrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
+                        aes->rounds);
+        return;
+    }
+#endif
     /*
       *map byte array block to cipher state
       *and add initial round key:
@@ -898,6 +1006,14 @@ void AESDecrypt(AES *aes, const byte *inBlock, byte *outBlock)
     if (r > 7 || r == 0)
         return;  /* stop instead of segfaulting, set up your keys! */
 
+#if USE_AES_NI == 1
+    /* if input and output same will overwrite input iv */
+    XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
+    AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
+                    aes->rounds);
+    return;
+#endif
+
     /*
       *map byte array block to cipher state
       *and add initial round key:

+ 816 - 0
Pal/src/host/Linux-SGX/crypto/aes_ni.S

@@ -0,0 +1,816 @@
+/* aes_asm.s
+ *
+ * Copyright (C) 2006-2014 wolfSSL Inc.
+ *
+ * This file is part of CyaSSL.
+ *
+ * CyaSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * CyaSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+
+/* See Intel® Advanced Encryption Standard (AES) Instructions Set White Paper
+ * by Intel Mobility Group, Israel Development Center, Israel Shay Gueron
+ */
+
+/* This file is in at&t asm syntax, see .asm for intel syntax */
+
+
+/*
+AES_CBC_encrypt (const unsigned char *in,
+	unsigned char *out,
+	unsigned char ivec[16],
+	unsigned long length,
+	const unsigned char *KS,
+	int nr)
+*/
+.globl AES_CBC_encrypt
+AES_CBC_encrypt:
+# parameter 1: %rdi
+# parameter 2: %rsi
+# parameter 3: %rdx
+# parameter 4: %rcx
+# parameter 5: %r8
+# parameter 6: %r9d
+movq	%rcx, %r10
+shrq	$4, %rcx
+shlq	$60, %r10
+je	NO_PARTS
+addq	$1, %rcx
+NO_PARTS:
+subq	$16, %rsi
+movdqa	(%rdx), %xmm1
+LOOP:
+pxor	(%rdi), %xmm1
+pxor	(%r8), %xmm1
+addq	$16,%rsi
+addq	$16,%rdi
+cmpl	$12, %r9d
+aesenc	16(%r8),%xmm1
+aesenc	32(%r8),%xmm1
+aesenc	48(%r8),%xmm1
+aesenc	64(%r8),%xmm1
+aesenc	80(%r8),%xmm1
+aesenc	96(%r8),%xmm1
+aesenc	112(%r8),%xmm1
+aesenc	128(%r8),%xmm1
+aesenc	144(%r8),%xmm1
+movdqa	160(%r8),%xmm2
+jb	LAST
+cmpl	$14, %r9d
+
+aesenc	160(%r8),%xmm1
+aesenc	176(%r8),%xmm1
+movdqa	192(%r8),%xmm2
+jb	LAST
+aesenc	192(%r8),%xmm1
+aesenc	208(%r8),%xmm1
+movdqa	224(%r8),%xmm2
+LAST:
+decq	%rcx
+aesenclast %xmm2,%xmm1
+movdqu	%xmm1,(%rsi)
+jne	LOOP
+ret
+
+
+
+
+/*
+AES_CBC_decrypt (const unsigned char *in,
+  unsigned char *out,
+  unsigned char ivec[16],
+  unsigned long length,
+  const unsigned char *KS,
+  int nr)
+*/
+.globl AES_CBC_decrypt
+AES_CBC_decrypt:
+# parameter 1: %rdi
+# parameter 2: %rsi
+# parameter 3: %rdx
+# parameter 4: %rcx
+# parameter 5: %r8
+# parameter 6: %r9d
+
+movq    %rcx, %r10
+shrq $4, %rcx
+shlq   $60, %r10
+je    DNO_PARTS_4
+addq    $1, %rcx
+DNO_PARTS_4:
+movq   %rcx, %r10
+shlq    $62, %r10
+shrq  $62, %r10
+shrq  $2, %rcx
+movdqu (%rdx),%xmm5
+je DREMAINDER_4
+subq   $64, %rsi
+DLOOP_4:
+movdqu (%rdi), %xmm1
+movdqu  16(%rdi), %xmm2
+movdqu  32(%rdi), %xmm3
+movdqu  48(%rdi), %xmm4
+movdqa  %xmm1, %xmm6
+movdqa %xmm2, %xmm7
+movdqa %xmm3, %xmm8
+movdqa %xmm4, %xmm15
+movdqa    (%r8), %xmm9
+movdqa 16(%r8), %xmm10
+movdqa  32(%r8), %xmm11
+movdqa  48(%r8), %xmm12
+pxor    %xmm9, %xmm1
+pxor   %xmm9, %xmm2
+pxor   %xmm9, %xmm3
+
+pxor    %xmm9, %xmm4
+aesdec %xmm10, %xmm1
+aesdec    %xmm10, %xmm2
+aesdec    %xmm10, %xmm3
+aesdec    %xmm10, %xmm4
+aesdec    %xmm11, %xmm1
+aesdec    %xmm11, %xmm2
+aesdec    %xmm11, %xmm3
+aesdec    %xmm11, %xmm4
+aesdec    %xmm12, %xmm1
+aesdec    %xmm12, %xmm2
+aesdec    %xmm12, %xmm3
+aesdec    %xmm12, %xmm4
+movdqa    64(%r8), %xmm9
+movdqa   80(%r8), %xmm10
+movdqa  96(%r8), %xmm11
+movdqa  112(%r8), %xmm12
+aesdec %xmm9, %xmm1
+aesdec %xmm9, %xmm2
+aesdec %xmm9, %xmm3
+aesdec %xmm9, %xmm4
+aesdec %xmm10, %xmm1
+aesdec    %xmm10, %xmm2
+aesdec    %xmm10, %xmm3
+aesdec    %xmm10, %xmm4
+aesdec    %xmm11, %xmm1
+aesdec    %xmm11, %xmm2
+aesdec    %xmm11, %xmm3
+aesdec    %xmm11, %xmm4
+aesdec    %xmm12, %xmm1
+aesdec    %xmm12, %xmm2
+aesdec    %xmm12, %xmm3
+aesdec    %xmm12, %xmm4
+movdqa    128(%r8), %xmm9
+movdqa  144(%r8), %xmm10
+movdqa 160(%r8), %xmm11
+cmpl   $12, %r9d
+aesdec  %xmm9, %xmm1
+aesdec %xmm9, %xmm2
+aesdec %xmm9, %xmm3
+aesdec %xmm9, %xmm4
+aesdec %xmm10, %xmm1
+aesdec    %xmm10, %xmm2
+aesdec    %xmm10, %xmm3
+aesdec    %xmm10, %xmm4
+jb    DLAST_4
+movdqa  160(%r8), %xmm9
+movdqa  176(%r8), %xmm10
+movdqa 192(%r8), %xmm11
+cmpl   $14, %r9d
+aesdec  %xmm9, %xmm1
+aesdec %xmm9, %xmm2
+aesdec %xmm9, %xmm3
+aesdec %xmm9, %xmm4
+aesdec %xmm10, %xmm1
+aesdec    %xmm10, %xmm2
+aesdec    %xmm10, %xmm3
+aesdec    %xmm10, %xmm4
+jb    DLAST_4
+
+movdqa  192(%r8), %xmm9
+movdqa  208(%r8), %xmm10
+movdqa 224(%r8), %xmm11
+aesdec %xmm9, %xmm1
+aesdec %xmm9, %xmm2
+aesdec %xmm9, %xmm3
+aesdec %xmm9, %xmm4
+aesdec %xmm10, %xmm1
+aesdec    %xmm10, %xmm2
+aesdec    %xmm10, %xmm3
+aesdec    %xmm10, %xmm4
+DLAST_4:
+addq   $64, %rdi
+addq    $64, %rsi
+decq  %rcx
+aesdeclast %xmm11, %xmm1
+aesdeclast %xmm11, %xmm2
+aesdeclast %xmm11, %xmm3
+aesdeclast %xmm11, %xmm4
+pxor   %xmm5 ,%xmm1
+pxor    %xmm6 ,%xmm2
+pxor   %xmm7 ,%xmm3
+pxor   %xmm8 ,%xmm4
+movdqu %xmm1, (%rsi)
+movdqu    %xmm2, 16(%rsi)
+movdqu  %xmm3, 32(%rsi)
+movdqu  %xmm4, 48(%rsi)
+movdqa  %xmm15,%xmm5
+jne    DLOOP_4
+addq    $64, %rsi
+DREMAINDER_4:
+cmpq    $0, %r10
+je  DEND_4
+DLOOP_4_2:
+movdqu  (%rdi), %xmm1
+movdqa    %xmm1 ,%xmm15
+addq  $16, %rdi
+pxor  (%r8), %xmm1
+movdqu 160(%r8), %xmm2
+cmpl    $12, %r9d
+aesdec    16(%r8), %xmm1
+aesdec   32(%r8), %xmm1
+aesdec   48(%r8), %xmm1
+aesdec   64(%r8), %xmm1
+aesdec   80(%r8), %xmm1
+aesdec   96(%r8), %xmm1
+aesdec   112(%r8), %xmm1
+aesdec  128(%r8), %xmm1
+aesdec  144(%r8), %xmm1
+jb  DLAST_4_2
+movdqu    192(%r8), %xmm2
+cmpl    $14, %r9d
+aesdec    160(%r8), %xmm1
+aesdec  176(%r8), %xmm1
+jb  DLAST_4_2
+movdqu    224(%r8), %xmm2
+aesdec  192(%r8), %xmm1
+aesdec  208(%r8), %xmm1
+DLAST_4_2:
+aesdeclast %xmm2, %xmm1
+pxor    %xmm5, %xmm1
+movdqa %xmm15, %xmm5
+movdqu    %xmm1, (%rsi)
+
+addq    $16, %rsi
+decq    %r10
+jne DLOOP_4_2
+DEND_4:
+ret
+
+
+/*
+AES_ECB_encrypt (const unsigned char *in,
+	unsigned char *out,
+	unsigned long length,
+	const unsigned char *KS,
+	int nr)
+*/
+.globl AES_ECB_encrypt
+AES_ECB_encrypt:
+# parameter 1: %rdi
+# parameter 2: %rsi
+# parameter 3: %rdx
+# parameter 4: %rcx
+# parameter 5: %r8d
+        movq    %rdx, %r10
+        shrq    $4, %rdx
+        shlq    $60, %r10
+        je      EECB_NO_PARTS_4
+        addq    $1, %rdx
+EECB_NO_PARTS_4:
+        movq    %rdx, %r10
+        shlq    $62, %r10
+        shrq    $62, %r10
+        shrq    $2, %rdx
+        je      EECB_REMAINDER_4
+        subq    $64, %rsi
+EECB_LOOP_4:
+        movdqu  (%rdi), %xmm1
+        movdqu  16(%rdi), %xmm2
+        movdqu  32(%rdi), %xmm3
+        movdqu  48(%rdi), %xmm4
+        movdqa  (%rcx), %xmm9
+        movdqa  16(%rcx), %xmm10
+        movdqa  32(%rcx), %xmm11
+        movdqa  48(%rcx), %xmm12
+        pxor    %xmm9, %xmm1
+        pxor    %xmm9, %xmm2
+        pxor    %xmm9, %xmm3
+        pxor    %xmm9, %xmm4
+        aesenc  %xmm10, %xmm1
+        aesenc  %xmm10, %xmm2
+        aesenc  %xmm10, %xmm3
+        aesenc  %xmm10, %xmm4
+        aesenc  %xmm11, %xmm1
+        aesenc  %xmm11, %xmm2
+        aesenc  %xmm11, %xmm3
+        aesenc  %xmm11, %xmm4
+        aesenc  %xmm12, %xmm1
+        aesenc  %xmm12, %xmm2
+        aesenc  %xmm12, %xmm3
+        aesenc  %xmm12, %xmm4
+        movdqa  64(%rcx), %xmm9
+        movdqa  80(%rcx), %xmm10
+        movdqa  96(%rcx), %xmm11
+        movdqa  112(%rcx), %xmm12
+        aesenc  %xmm9, %xmm1
+        aesenc  %xmm9, %xmm2
+        aesenc  %xmm9, %xmm3
+        aesenc  %xmm9, %xmm4
+        aesenc  %xmm10, %xmm1
+        aesenc  %xmm10, %xmm2
+        aesenc  %xmm10, %xmm3
+        aesenc  %xmm10, %xmm4
+        aesenc  %xmm11, %xmm1
+        aesenc  %xmm11, %xmm2
+        aesenc  %xmm11, %xmm3
+        aesenc  %xmm11, %xmm4
+        aesenc  %xmm12, %xmm1
+        aesenc  %xmm12, %xmm2
+        aesenc  %xmm12, %xmm3
+        aesenc  %xmm12, %xmm4
+        movdqa  128(%rcx), %xmm9
+        movdqa  144(%rcx), %xmm10
+        movdqa  160(%rcx), %xmm11
+        cmpl    $12, %r8d
+        aesenc  %xmm9, %xmm1
+        aesenc  %xmm9, %xmm2
+        aesenc  %xmm9, %xmm3
+        aesenc  %xmm9, %xmm4
+        aesenc  %xmm10, %xmm1
+        aesenc  %xmm10, %xmm2
+        aesenc  %xmm10, %xmm3
+        aesenc  %xmm10, %xmm4
+        jb      EECB_LAST_4
+        movdqa  160(%rcx), %xmm9
+        movdqa  176(%rcx), %xmm10
+        movdqa  192(%rcx), %xmm11
+        cmpl    $14, %r8d
+        aesenc  %xmm9, %xmm1
+        aesenc  %xmm9, %xmm2
+        aesenc  %xmm9, %xmm3
+        aesenc  %xmm9, %xmm4
+        aesenc  %xmm10, %xmm1
+        aesenc  %xmm10, %xmm2
+        aesenc  %xmm10, %xmm3
+        aesenc  %xmm10, %xmm4
+        jb      EECB_LAST_4
+        movdqa  192(%rcx), %xmm9
+        movdqa  208(%rcx), %xmm10
+        movdqa  224(%rcx), %xmm11
+        aesenc  %xmm9, %xmm1
+        aesenc  %xmm9, %xmm2
+        aesenc  %xmm9, %xmm3
+        aesenc  %xmm9, %xmm4
+        aesenc  %xmm10, %xmm1
+        aesenc  %xmm10, %xmm2
+        aesenc  %xmm10, %xmm3
+        aesenc  %xmm10, %xmm4
+EECB_LAST_4:
+        addq    $64, %rdi
+        addq    $64, %rsi
+        decq    %rdx
+        aesenclast %xmm11, %xmm1
+        aesenclast %xmm11, %xmm2
+        aesenclast %xmm11, %xmm3
+        aesenclast %xmm11, %xmm4
+        movdqu  %xmm1, (%rsi)
+        movdqu  %xmm2, 16(%rsi)
+        movdqu  %xmm3, 32(%rsi)
+        movdqu  %xmm4, 48(%rsi)
+        jne     EECB_LOOP_4
+        addq    $64, %rsi
+EECB_REMAINDER_4:
+        cmpq    $0, %r10
+        je      EECB_END_4
+EECB_LOOP_4_2:
+        movdqu  (%rdi), %xmm1
+        addq    $16, %rdi
+        pxor    (%rcx), %xmm1
+        movdqu  160(%rcx), %xmm2
+        aesenc  16(%rcx), %xmm1
+        aesenc  32(%rcx), %xmm1
+        aesenc  48(%rcx), %xmm1
+        aesenc  64(%rcx), %xmm1
+        aesenc  80(%rcx), %xmm1
+        aesenc  96(%rcx), %xmm1
+        aesenc  112(%rcx), %xmm1
+        aesenc  128(%rcx), %xmm1
+        aesenc  144(%rcx), %xmm1
+        cmpl    $12, %r8d
+        jb      EECB_LAST_4_2
+        movdqu  192(%rcx), %xmm2
+        aesenc  160(%rcx), %xmm1
+        aesenc  176(%rcx), %xmm1
+        cmpl    $14, %r8d
+        jb      EECB_LAST_4_2
+        movdqu  224(%rcx), %xmm2
+        aesenc  192(%rcx), %xmm1
+        aesenc  208(%rcx), %xmm1
+EECB_LAST_4_2:
+        aesenclast %xmm2, %xmm1
+        movdqu  %xmm1, (%rsi)
+        addq    $16, %rsi
+        decq    %r10
+        jne     EECB_LOOP_4_2
+EECB_END_4:
+        ret
+
+
+/*
+AES_ECB_decrypt (const unsigned char *in,
+  unsigned char *out,
+  unsigned long length,
+  const unsigned char *KS,
+  int nr)
+*/
+.globl AES_ECB_decrypt
+AES_ECB_decrypt:
+# parameter 1: %rdi
+# parameter 2: %rsi
+# parameter 3: %rdx
+# parameter 4: %rcx
+# parameter 5: %r8d
+
+        movq    %rdx, %r10
+        shrq    $4, %rdx
+        shlq    $60, %r10
+        je      DECB_NO_PARTS_4
+        addq    $1, %rdx
+DECB_NO_PARTS_4:
+        movq    %rdx, %r10
+        shlq    $62, %r10
+        shrq    $62, %r10
+        shrq    $2, %rdx
+        je      DECB_REMAINDER_4
+        subq    $64, %rsi
+DECB_LOOP_4:
+        movdqu  (%rdi), %xmm1
+        movdqu  16(%rdi), %xmm2
+        movdqu  32(%rdi), %xmm3
+        movdqu  48(%rdi), %xmm4
+        movdqa  (%rcx), %xmm9
+        movdqa  16(%rcx), %xmm10
+        movdqa  32(%rcx), %xmm11
+        movdqa  48(%rcx), %xmm12
+        pxor    %xmm9, %xmm1
+        pxor    %xmm9, %xmm2
+        pxor    %xmm9, %xmm3
+        pxor    %xmm9, %xmm4
+        aesdec  %xmm10, %xmm1
+        aesdec  %xmm10, %xmm2
+        aesdec  %xmm10, %xmm3
+        aesdec  %xmm10, %xmm4
+        aesdec  %xmm11, %xmm1
+        aesdec  %xmm11, %xmm2
+        aesdec  %xmm11, %xmm3
+        aesdec  %xmm11, %xmm4
+        aesdec  %xmm12, %xmm1
+        aesdec  %xmm12, %xmm2
+        aesdec  %xmm12, %xmm3
+        aesdec  %xmm12, %xmm4
+        movdqa  64(%rcx), %xmm9
+        movdqa  80(%rcx), %xmm10
+        movdqa  96(%rcx), %xmm11
+        movdqa  112(%rcx), %xmm12
+        aesdec  %xmm9, %xmm1
+        aesdec  %xmm9, %xmm2
+        aesdec  %xmm9, %xmm3
+        aesdec  %xmm9, %xmm4
+        aesdec  %xmm10, %xmm1
+        aesdec  %xmm10, %xmm2
+        aesdec  %xmm10, %xmm3
+        aesdec  %xmm10, %xmm4
+        aesdec  %xmm11, %xmm1
+        aesdec  %xmm11, %xmm2
+        aesdec  %xmm11, %xmm3
+        aesdec  %xmm11, %xmm4
+        aesdec  %xmm12, %xmm1
+        aesdec  %xmm12, %xmm2
+        aesdec  %xmm12, %xmm3
+        aesdec  %xmm12, %xmm4
+        movdqa  128(%rcx), %xmm9
+        movdqa  144(%rcx), %xmm10
+        movdqa  160(%rcx), %xmm11
+        cmpl    $12, %r8d
+        aesdec  %xmm9, %xmm1
+        aesdec  %xmm9, %xmm2
+        aesdec  %xmm9, %xmm3
+        aesdec  %xmm9, %xmm4
+        aesdec  %xmm10, %xmm1
+        aesdec  %xmm10, %xmm2
+        aesdec  %xmm10, %xmm3
+        aesdec  %xmm10, %xmm4
+        jb      DECB_LAST_4
+        movdqa  160(%rcx), %xmm9
+        movdqa  176(%rcx), %xmm10
+        movdqa  192(%rcx), %xmm11
+        cmpl    $14, %r8d
+        aesdec  %xmm9, %xmm1
+        aesdec  %xmm9, %xmm2
+        aesdec  %xmm9, %xmm3
+        aesdec  %xmm9, %xmm4
+        aesdec  %xmm10, %xmm1
+        aesdec  %xmm10, %xmm2
+        aesdec  %xmm10, %xmm3
+        aesdec  %xmm10, %xmm4
+        jb      DECB_LAST_4
+        movdqa  192(%rcx), %xmm9
+        movdqa  208(%rcx), %xmm10
+        movdqa  224(%rcx), %xmm11
+        aesdec  %xmm9, %xmm1
+        aesdec  %xmm9, %xmm2
+        aesdec  %xmm9, %xmm3
+        aesdec  %xmm9, %xmm4
+        aesdec  %xmm10, %xmm1
+        aesdec  %xmm10, %xmm2
+        aesdec  %xmm10, %xmm3
+        aesdec  %xmm10, %xmm4
+DECB_LAST_4:
+        addq    $64, %rdi
+        addq    $64, %rsi
+        decq    %rdx
+        aesdeclast %xmm11, %xmm1
+        aesdeclast %xmm11, %xmm2
+        aesdeclast %xmm11, %xmm3
+        aesdeclast %xmm11, %xmm4
+        movdqu  %xmm1, (%rsi)
+        movdqu  %xmm2, 16(%rsi)
+        movdqu  %xmm3, 32(%rsi)
+        movdqu  %xmm4, 48(%rsi)
+        jne     DECB_LOOP_4
+        addq    $64, %rsi
+DECB_REMAINDER_4:
+        cmpq    $0, %r10
+        je      DECB_END_4
+DECB_LOOP_4_2:
+        movdqu  (%rdi), %xmm1
+        addq    $16, %rdi
+        pxor    (%rcx), %xmm1
+        movdqu  160(%rcx), %xmm2
+        cmpl    $12, %r8d
+        aesdec  16(%rcx), %xmm1
+        aesdec  32(%rcx), %xmm1
+        aesdec  48(%rcx), %xmm1
+        aesdec  64(%rcx), %xmm1
+        aesdec  80(%rcx), %xmm1
+        aesdec  96(%rcx), %xmm1
+        aesdec  112(%rcx), %xmm1
+        aesdec  128(%rcx), %xmm1
+        aesdec  144(%rcx), %xmm1
+        jb      DECB_LAST_4_2
+        cmpl    $14, %r8d
+        movdqu  192(%rcx), %xmm2
+        aesdec  160(%rcx), %xmm1
+        aesdec  176(%rcx), %xmm1
+        jb      DECB_LAST_4_2
+        movdqu  224(%rcx), %xmm2
+        aesdec  192(%rcx), %xmm1
+        aesdec  208(%rcx), %xmm1
+DECB_LAST_4_2:
+        aesdeclast %xmm2, %xmm1
+        movdqu  %xmm1, (%rsi)
+        addq    $16, %rsi
+        decq    %r10
+        jne     DECB_LOOP_4_2
+DECB_END_4:
+        ret
+
+
+
+
+/*
+void AES_128_Key_Expansion(const unsigned char* userkey,
+   unsigned char* key_schedule);
+*/
+.align  16,0x90
+.globl AES_128_Key_Expansion
+AES_128_Key_Expansion:
+# parameter 1: %rdi
+# parameter 2: %rsi
+movl    $10, 240(%rsi)
+
+movdqu  (%rdi), %xmm1
+movdqa    %xmm1, (%rsi)
+
+
+ASSISTS:
+aeskeygenassist $1, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 16(%rsi)
+aeskeygenassist $2, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 32(%rsi)
+aeskeygenassist $4, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 48(%rsi)
+aeskeygenassist $8, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 64(%rsi)
+aeskeygenassist $16, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 80(%rsi)
+aeskeygenassist $32, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 96(%rsi)
+aeskeygenassist $64, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 112(%rsi)
+aeskeygenassist $0x80, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 128(%rsi)
+aeskeygenassist $0x1b, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 144(%rsi)
+aeskeygenassist $0x36, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 160(%rsi)
+ret
+
+PREPARE_ROUNDKEY_128:
+pshufd $255, %xmm2, %xmm2
+movdqa %xmm1, %xmm3
+pslldq $4, %xmm3
+pxor %xmm3, %xmm1
+pslldq $4, %xmm3
+pxor %xmm3, %xmm1
+pslldq $4, %xmm3
+pxor %xmm3, %xmm1
+pxor %xmm2, %xmm1
+ret
+
+
+/*
+void AES_192_Key_Expansion (const unsigned char *userkey,
+  unsigned char *key)
+*/
+.globl AES_192_Key_Expansion
+AES_192_Key_Expansion:
+# parameter 1: %rdi
+# parameter 2: %rsi
+
+movdqu (%rdi), %xmm1
+movdqu 16(%rdi), %xmm3
+movdqa %xmm1, (%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x1, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 16(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 32(%rsi)
+
+aeskeygenassist $0x2, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 48(%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x4, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 64(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 80(%rsi)
+
+aeskeygenassist $0x8, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 96(%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x10, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 112(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 128(%rsi)
+
+aeskeygenassist $0x20, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 144(%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x40, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 160(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 176(%rsi)
+
+aeskeygenassist $0x80, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 192(%rsi)
+movdqa %xmm3, 208(%rsi)
+ret
+
+PREPARE_ROUNDKEY_192:
+pshufd $0x55, %xmm2, %xmm2
+movdqu %xmm1, %xmm4
+pslldq $4, %xmm4
+pxor   %xmm4, %xmm1
+
+pslldq $4, %xmm4
+pxor   %xmm4, %xmm1
+pslldq $4, %xmm4
+pxor  %xmm4, %xmm1
+pxor   %xmm2, %xmm1
+pshufd $0xff, %xmm1, %xmm2
+movdqu %xmm3, %xmm4
+pslldq $4, %xmm4
+pxor   %xmm4, %xmm3
+pxor   %xmm2, %xmm3
+ret
+ 
+
+/*
+void AES_256_Key_Expansion (const unsigned char *userkey,
+  unsigned char *key)
+*/
+.globl AES_256_Key_Expansion
+AES_256_Key_Expansion:
+# parameter 1: %rdi
+# parameter 2: %rsi
+
+movdqu (%rdi), %xmm1
+movdqu 16(%rdi), %xmm3
+movdqa %xmm1, (%rsi)
+movdqa %xmm3, 16(%rsi)
+
+aeskeygenassist $0x1, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 32(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 48(%rsi)
+aeskeygenassist $0x2, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 64(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 80(%rsi)
+aeskeygenassist $0x4, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 96(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 112(%rsi)
+aeskeygenassist $0x8, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 128(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 144(%rsi)
+aeskeygenassist $0x10, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 160(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 176(%rsi)
+aeskeygenassist $0x20, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 192(%rsi)
+
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 208(%rsi)
+aeskeygenassist $0x40, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 224(%rsi)
+
+ret
+
+MAKE_RK256_a:
+pshufd $0xff, %xmm2, %xmm2
+movdqa %xmm1, %xmm4
+pslldq $4, %xmm4
+pxor   %xmm4, %xmm1
+pslldq $4, %xmm4
+pxor  %xmm4, %xmm1
+pslldq $4, %xmm4
+pxor  %xmm4, %xmm1
+pxor   %xmm2, %xmm1
+ret
+
+MAKE_RK256_b:
+pshufd $0xaa, %xmm2, %xmm2
+movdqa %xmm3, %xmm4
+pslldq $4, %xmm4
+pxor   %xmm4, %xmm3
+pslldq $4, %xmm4
+pxor  %xmm4, %xmm3
+pslldq $4, %xmm4
+pxor  %xmm4, %xmm3
+pxor   %xmm2, %xmm3
+ret
+

+ 7 - 3
Pal/src/host/Linux-SGX/db_exception.c

@@ -102,7 +102,7 @@ static struct pal_frame * get_frame (sgx_context_t * uc)
     struct pal_frame * frame = (struct pal_frame *) rbp - 1;
 
     for (int i = 0 ; i < 8 ; i++) {
-        if (frame->self == frame)
+        if (frame->identifier == PAL_FRAME_IDENTIFIER)
             return frame;
 
         frame = (struct pal_frame *) ((void *) frame - 8);
@@ -136,7 +136,7 @@ void _DkExceptionRealHandler (int event, PAL_NUM arg, struct pal_frame * frame,
 {
     if (frame) {
         frame = __alloca(sizeof(struct pal_frame));
-        frame->self     = frame;
+        frame->identifier = PAL_FRAME_IDENTIFIER;
         frame->func     = &_DkExceptionRealHandler;
         frame->funcname = "_DkExceptionRealHandler";
 
@@ -351,9 +351,13 @@ void _DkCheckExternalEvent (void)
     ENCLAVE_TLS(external_event) = 0;
     struct pal_frame * frame = get_frame(NULL);
 
+    if (event == PAL_EVENT_RESUME &&
+        frame && frame->func == DkObjectsWaitAny)
+        return;
+
     if (!frame) {
         frame = __alloca(sizeof(struct pal_frame));
-        frame->self = frame;
+        frame->identifier = PAL_FRAME_IDENTIFIER;
         frame->func = &_DkCheckExternalEvent;
         frame->funcname = "DkCheckExternalEvent";
         arch_store_frame(&frame->arch);

+ 4 - 4
Pal/src/host/Linux-SGX/db_files.c

@@ -65,11 +65,11 @@ static int file_open (PAL_HANDLE * handle, const char * type, const char * uri,
     get_norm_path(uri, path, 0, len + 1);
     hdl->file.realpath = (PAL_STR) path;
 
-    sgx_checksum_t * stubs;
+    sgx_arch_mac_t * stubs;
     uint64_t total;
     int ret = load_trusted_file(hdl, &stubs, &total);
     if (ret < 0) {
-        SGX_DBG(DBG_E, "Accessing file:%s is denied. (%d)"
+        SGX_DBG(DBG_E, "Accessing file:%s is denied. (%e) "
                 "This file is not trusted or allowed.\n", hdl->file.realpath, ret);
         free(hdl);
         return -PAL_ERROR_DENIED;
@@ -85,7 +85,7 @@ static int file_open (PAL_HANDLE * handle, const char * type, const char * uri,
 static int file_read (PAL_HANDLE handle, int offset, int count,
                       void * buffer)
 {
-    sgx_checksum_t * stubs = (sgx_checksum_t *) handle->file.stubs;
+    sgx_arch_mac_t * stubs = (sgx_arch_mac_t *) handle->file.stubs;
     unsigned int total = handle->file.total;
     int ret;
 
@@ -178,7 +178,7 @@ static int file_delete (PAL_HANDLE handle, int access)
 static int file_map (PAL_HANDLE handle, void ** addr, int prot,
                      uint64_t offset, uint64_t size)
 {
-    sgx_checksum_t * stubs = (sgx_checksum_t *) handle->file.stubs;
+    sgx_arch_mac_t * stubs = (sgx_arch_mac_t *) handle->file.stubs;
     unsigned int total = handle->file.total;
     void * mem = *addr;
     void * umem;

+ 2 - 0
Pal/src/host/Linux-SGX/db_main.c

@@ -84,6 +84,7 @@ static struct link_map pal_map;
 
 int init_untrusted_slab_mgr (int pagesize);
 int init_enclave (void);
+int init_enclave_key (void);
 int init_child_process (PAL_HANDLE * parent_handle);
 
 static PAL_HANDLE setup_file_handle (const char * name, int fd)
@@ -139,6 +140,7 @@ void pal_linux_main(const char ** arguments, const char ** environments,
     init_slab_mgr(pagesz);
     init_untrusted_slab_mgr(pagesz);
     init_pages();
+    init_enclave_key();
 
     /* now we can add a link map for PAL itself */
     setup_pal_map(&pal_map);

+ 82 - 76
Pal/src/host/Linux-SGX/db_mutex.c

@@ -39,126 +39,132 @@
 #include <atomic.h>
 #include <linux/time.h>
 
-#define MUTEX_SPINLOCK_TIMES    20
-
-int _DkMutexLockTimeout (struct mutex_handle * mut, int timeout)
+#ifdef __i386__
+# define barrier()       asm volatile("" ::: "memory");
+# define rmb()           asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+# define cpu_relax()     asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __x86_64__
+# include <unistd.h>
+# define barrier()       asm volatile("" ::: "memory");
+# define rmb()           asm volatile("lfence" ::: "memory")
+# define cpu_relax()     asm volatile("rep; nop" ::: "memory");
+#endif
+
+#define MUTEX_SPINLOCK_TIMES    100
+
+int _DkMutexLockTimeout (struct mutex_handle * m, int timeout)
 {
-    int i, c = 0;
+    int ret = 0;
+#ifdef DEBUG_MUTEX
+    int tid = INLINE_SYSCALL(gettid, 0);
+#endif
 
     if (timeout == -1)
-        return -_DkMutexLock(mut);
-
-    struct atomic_int * m = &mut->value;
-
-    /* Spin and try to take lock */
-    for (i = 0 ; i < MUTEX_SPINLOCK_TIMES ; i++)
-    {
-        c = atomic_dec_and_test(m);
-        if (c)
-            goto success;
-        cpu_relax();
-    }
-
-    /* The lock is now contended */
+        return -_DkMutexLock(m);
 
-    int ret;
+    if (!xchg(&m->b.locked, 1))
+        goto success;
 
     if (timeout == 0) {
-        ret = c ? 0 : -PAL_ERROR_TRYAGAIN;
+        ret = -PAL_ERROR_TRYAGAIN;
         goto out;
     }
 
     unsigned long waittime = timeout;
 
-    while (!c) {
-        int val = atomic_read(m);
-        if (val == 1)
-            goto again;
-
-        ret = ocall_futex((int *) &m->counter, FUTEX_WAIT, 2, timeout ? &waittime : NULL);
-        if (ret < 0)
+    while (xchg(&m->u, 257) & 1) {
+        ret = ocall_futex((int *) m, FUTEX_WAIT, 257, timeout ? &waittime : NULL);
+        if (ret < 0) {
+            if (ret == -PAL_ERROR_TRYAGAIN) {
+                xchg(&m->b.contended, 0);
+                goto out;
+            }
+#ifdef DEBUG_MUTEX
+            printf("futex failed (err = %d)\n", ERRNO(ret));
+#endif
             goto out;
-
-again:
-        /* Upon wakeup, we still need to check whether mutex is unlocked or
-         * someone else took it.
-         * If c==0 upon return from xchg (i.e., the older value of m==0), we
-         * will exit the loop. Else, we sleep again (through a futex call).
-         */
-        c = atomic_dec_and_test(m);
+        }
     }
 
 success:
+#ifdef DEBUG_MUTEX
+    m->owner = tid;
+#endif
     ret = 0;
 out:
+#ifdef DEBUG_MUTEX
+    if (ret < 0)
+        printf("mutex failed (%e, tid = %d)\n", -ret, tid);
+#endif
     return ret;
 }
 
-int _DkMutexLock (struct mutex_handle * mut)
+int _DkMutexLock (struct mutex_handle * m)
 {
-    int i, c = 0;
-    int ret;
-    struct atomic_int * m = &mut->value;
+    int ret = 0, i;
+#ifdef DEBUG_MUTEX
+    int tid = INLINE_SYSCALL(gettid, 0);
+#endif
 
     /* Spin and try to take lock */
     for (i = 0; i < MUTEX_SPINLOCK_TIMES; i++) {
-        c = atomic_dec_and_test(m);
-        if (c)
+        if (!xchg(&m->b.locked, 1))
             goto success;
         cpu_relax();
     }
 
-    /* The lock is now contended */
-
-    while (!c) {
-        int val = atomic_read(m);
-        if (val == 1)
-            goto again;
-
-        ret = ocall_futex((int *) &m->counter, FUTEX_WAIT, 2, NULL);
-        if (ret < 0)
+    while (xchg(&m->u, 257) & 1) {
+        ret = ocall_futex((int *) m, FUTEX_WAIT, 257, NULL);
+        if (ret < 0 &&
+            ret != -PAL_ERROR_TRYAGAIN) {
+#ifdef DEBUG_MUTEX
+            printf("futex failed (err = %d)\n", ERRNO(ret));
+#endif
             goto out;
-
-again:
-        /* Upon wakeup, we still need to check whether mutex is unlocked or
-         * someone else took it.
-         * If c==0 upon return from xchg (i.e., the older value of m==0), we
-         * will exit the loop. Else, we sleep again (through a futex call).
-         */
-        c = atomic_dec_and_test(m);
+        }
     }
 
-
 success:
+#ifdef DEBUG_MUTEX
+    m->owner = tid;
+#endif
     ret = 0;
 out:
+#ifdef DEBUG_MUTEX
+    if (ret < 0)
+        printf("mutex failed (%e, tid = %d)\n", -ret, tid);
+#endif
     return ret;
 }
 
-int _DkMutexUnlock (struct mutex_handle * mut)
+int _DkMutexUnlock (struct mutex_handle * m)
 {
-    int ret = 0;
-    int must_wake = 0;
-    struct atomic_int * m = &mut->value;
+    int ret = 0, i;
 
-    /* Unlock, and if not contended then exit. */
-    if (atomic_read(m) < 0)
-        must_wake = 1;
+#ifdef DEBUG_MUTEX
+    m->owner = 0;
+#endif
 
-    atomic_set(m, 1);
+    /* Unlock, and if not contended then exit. */
+    if ((m->u == 1) && (cmpxchg(&m->u, 1, 0) == 1)) return 0;
+    m->b.locked = 0;
+    barrier();
 
-    if (must_wake) {
-        /* We need to wake someone up */
-        ret = ocall_futex((int *) &m->counter, FUTEX_WAKE, 1, NULL);
-        if (ret < 0)
-            goto out;
+    /* Spin and try to take lock */
+    for (i = 0; i < MUTEX_SPINLOCK_TIMES * 2; i++) {
+        if (m->b.locked)
+            goto success;
+        cpu_relax();
     }
 
-    if (ret < 0) {
-        ret = -PAL_ERROR_TRYAGAIN;
-        goto out;
-    }
+    m->b.contended = 0;
+
+    /* We need to wake someone up */
+    ocall_futex((int *) m, FUTEX_WAKE, 1, NULL);
 
+success:
     ret = 0;
 out:
     return ret;

+ 3 - 4
Pal/src/host/Linux-SGX/db_semaphore.c

@@ -68,7 +68,7 @@ _DkSemaphoreCreate (PAL_HANDLE handle, int initialCount, int maxCount)
 
     /* optimization: if maxCount == 1, we make it into mutex */
     if (handle->semaphore.max_value == 1) {
-        atomic_set(&handle->semaphore.value.mut.value, 1 - initialCount);
+        handle->semaphore.value.mut.u = initialCount;
     } else {
         atomic_set(&handle->semaphore.value.i, maxCount - initialCount);
     }
@@ -246,9 +246,8 @@ void _DkSemaphoreRelease (PAL_HANDLE sem, int count)
 int _DkSemaphoreGetCurrentCount (PAL_HANDLE sem)
 {
     if (sem->semaphore.max_value == 1) {
-        struct mutex_handle * mut =
-            &sem->semaphore.value.mut;
-        return atomic_read(&mut->value);
+        struct mutex_handle * m = &sem->semaphore.value.mut;
+        return m->b.locked;
     }
 
     int c = atomic_read(&sem->semaphore.value.i);

+ 20 - 15
Pal/src/host/Linux-SGX/db_sockets.c

@@ -42,6 +42,18 @@ typedef __kernel_pid_t pid_t;
 #include <asm/fcntl.h>
 #include <asm-generic/socket.h>
 
+#ifndef SOL_TCP
+# define SOL_TCP 6
+#endif
+
+#ifndef TCP_NODELAY
+# define TCP_NODELAY 1
+#endif
+
+#ifndef TCP_CORK
+# define TCP_CORK 3
+#endif
+
 /* 96 bytes is the minimal size of buffer to store a IPv4/IPv6
    address */
 #define PAL_SOCKADDR_SIZE   96
@@ -755,7 +767,14 @@ static int socket_attrquerybyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR  * attr)
 
     int fd = handle->sock.fd, ret;
 
-    if (handle->sock.conn) {
+    if (IS_HANDLE_TYPE(handle, tcpsrv)) {
+        struct pollfd pfd = { .fd = fd, .events = POLLIN, .revents = 0 };
+        unsigned long waittime = 0;
+        int ret = ocall_poll(&pfd, 1, &waittime);
+        if (ret < 0)
+            return ret;
+        attr->readable = (ret == 1 && pfd.revents == POLLIN);
+    } else {
         /* try use ioctl FIONEAD to get the size of socket */
         ret = ocall_fionread(fd);
         if (ret < 0)
@@ -763,25 +782,11 @@ static int socket_attrquerybyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR  * attr)
 
         attr->pending_size = ret;
         attr->readable = !!attr->pending_size > 0;
-    } else {
-        attr->readable = !attr->disconnected;
     }
 
     return 0;
 }
 
-#ifndef SOL_TCP
-# define SOL_TCP 6
-#endif
-
-#ifndef TCP_NODELAY
-# define TCP_NODELAY 1
-#endif
-
-#ifndef TCP_CORK
-# define TCP_CORK 3
-#endif
-
 static int socket_attrsetbyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR  * attr)
 {
     if (handle->sock.fd == PAL_IDX_POISON)

+ 40 - 56
Pal/src/host/Linux-SGX/enclave_framework.c

@@ -71,6 +71,8 @@ int sgx_get_report (sgx_arch_hash_t * mrenclave,
 
 #include "crypto/cmac.h"
 
+static sgx_arch_key128_t enclave_key;
+
 int sgx_verify_report (sgx_arch_report_t * report)
 {
     sgx_arch_keyrequest_t keyrequest;
@@ -78,19 +80,30 @@ int sgx_verify_report (sgx_arch_report_t * report)
     keyrequest.keyname = REPORT_KEY;
     memcpy(keyrequest.keyid, report->keyid, sizeof(keyrequest.keyid));
 
-    sgx_arch_key128_t key;
-    int ret = sgx_getkey(&keyrequest, &key);
+    int ret = sgx_getkey(&keyrequest, &enclave_key);
     if (ret) {
         SGX_DBG(DBG_S, "Can't get report key\n");
         return -PAL_ERROR_DENIED;
     }
-    SGX_DBG(DBG_S, "Get report key for verification: %s\n", hex2str(key));
 
-    sgx_arch_mac_t mac;
-    AES_CMAC((void *) &key, (void *) report, SGX_REPORT_SIGNED_SIZE, mac);
-    SGX_DBG(DBG_S, "Generated mac: %s\n", hex2str(mac));
+    SGX_DBG(DBG_S, "Get report key for verification: %s\n", hex2str(enclave_key));
+    return 0;
+}
 
-    return memcmp(mac, report->mac, sizeof(sgx_arch_mac_t)) ? 1 : 0;
+int init_enclave_key (void)
+{
+    sgx_arch_keyrequest_t keyrequest;
+    memset(&keyrequest, 0, sizeof(sgx_arch_keyrequest_t));
+    keyrequest.keyname = SEAL_KEY;
+
+    int ret = sgx_getkey(&keyrequest, &enclave_key);
+    if (ret) {
+        SGX_DBG(DBG_S, "Can't get report key\n");
+        return -PAL_ERROR_DENIED;
+    }
+
+    SGX_DBG(DBG_S, "Get sealing key: %s\n", hex2str(enclave_key));
+    return 0;
 }
 
 struct trusted_file {
@@ -99,7 +112,8 @@ struct trusted_file {
     uint64_t size;
     int uri_len;
     char uri[URI_MAX];
-    sgx_checksum_t checksum, * stubs;
+    sgx_checksum_t checksum;
+    sgx_arch_mac_t * stubs;
 };
 
 static LIST_HEAD(trusted_file_list);
@@ -108,7 +122,7 @@ static int trusted_file_indexes = 0;
 
 #include <crypto/sha256.h>
 
-int load_trusted_file (PAL_HANDLE file, sgx_checksum_t ** stubptr,
+int load_trusted_file (PAL_HANDLE file, sgx_arch_mac_t ** stubptr,
                        uint64_t * sizeptr)
 {
     struct trusted_file * tf = NULL, * tmp;
@@ -149,11 +163,13 @@ int load_trusted_file (PAL_HANDLE file, sgx_checksum_t ** stubptr,
     if (tf->index < 0) 
         return tf->index;
 
+#if CACHE_FILE_STUBS == 1
     if (tf->index && tf->stubs) {
         *stubptr = tf->stubs;
         *sizeptr = tf->size;
         return 0;
     }
+#endif
 
     if (!tf->index) {
         *stubptr = NULL;
@@ -169,10 +185,11 @@ int load_trusted_file (PAL_HANDLE file, sgx_checksum_t ** stubptr,
     int nstubs = tf->size / TRUSTED_STUB_SIZE +
                 (tf->size % TRUSTED_STUB_SIZE ? 1 : 0);
 
-    sgx_checksum_t * stubs = malloc(sizeof(sgx_checksum_t) * nstubs);
-    if (!tf) 
+    sgx_arch_mac_t * stubs = malloc(sizeof(sgx_arch_mac_t) * nstubs);
+    if (!tf)
         return -PAL_ERROR_NOMEM;
 
+    sgx_arch_mac_t * s = stubs;
     uint64_t offset = 0;
     SHA256 sha;
     void * umem;
@@ -181,7 +198,7 @@ int load_trusted_file (PAL_HANDLE file, sgx_checksum_t ** stubptr,
     if (ret < 0)
         goto failed;
 
-    for (; offset < tf->size ; offset += TRUSTED_STUB_SIZE) {
+    for (; offset < tf->size ; offset += TRUSTED_STUB_SIZE, s++) {
         uint64_t mapping_size = tf->size - offset;
         if (mapping_size > TRUSTED_STUB_SIZE)
             mapping_size = TRUSTED_STUB_SIZE;
@@ -190,20 +207,7 @@ int load_trusted_file (PAL_HANDLE file, sgx_checksum_t ** stubptr,
         if (ret < 0)
             goto unmap;
 
-        /* calculate stub checksum */
-        SHA256 stub_sha;
-        ret = SHA256Init(&stub_sha);
-        if (ret < 0)
-            goto unmap;
-
-        ret = SHA256Update(&stub_sha, umem, mapping_size);
-        if (ret < 0)
-            goto unmap;
-
-        ret = SHA256Final(&stub_sha,
-                          (uint8_t *) stubs[offset / TRUSTED_STUB_SIZE].bytes);
-        if (ret < 0)
-            goto unmap;
+        AES_CMAC((void *) &enclave_key, umem, mapping_size, (uint8_t *) s);
 
         /* update the file checksum */
         ret = SHA256Update(&sha, umem, mapping_size);
@@ -226,12 +230,9 @@ unmap:
     }
 
     _DkSpinLock(&trusted_file_lock);
-    if (tf->stubs || tf->index == -PAL_ERROR_DENIED) {
-        free(stubs);
-        *stubptr = tf->stubs;
-    } else {
-        *stubptr = tf->stubs = stubs;
-    }
+    if (tf->stubs || tf->index == -PAL_ERROR_DENIED)
+        free(tf->stubs);
+    *stubptr = tf->stubs = stubs;
     *sizeptr = tf->size;
     ret = tf->index;
     _DkSpinUnlock(&trusted_file_lock);
@@ -255,12 +256,11 @@ failed:
 
 int verify_trusted_file (const char * uri, void * mem,
                          unsigned int offset, unsigned int size,
-                         sgx_checksum_t * stubs,
+                         sgx_arch_mac_t * stubs,
                          unsigned int total_size)
 {
     unsigned long checking = offset;
-    sgx_checksum_t * s = stubs + checking / TRUSTED_STUB_SIZE;
-    char checksum_text[sizeof(sgx_checksum_t) * 2 + 1] = "\0";
+    sgx_arch_mac_t * s = stubs + checking / TRUSTED_STUB_SIZE;
     int ret;
 
     for (; checking < offset + size ; checking += TRUSTED_STUB_SIZE, s++) {
@@ -268,29 +268,13 @@ int verify_trusted_file (const char * uri, void * mem,
         if (checking_size > total_size - checking)
             checking_size = total_size - checking;
 
-        /* calculate stub checksum */
-        sgx_checksum_t checksum;
-        SHA256 stub_sha;
-        ret = SHA256Init(&stub_sha);
-        if (ret < 0)
-            return -PAL_ERROR_DENIED;
-
-        ret = SHA256Update(&stub_sha, mem + checking - offset,
-                           checking_size);
-        if (ret < 0)
-            return -PAL_ERROR_DENIED;
-
-        ret = SHA256Final(&stub_sha, (uint8_t *) checksum.bytes);
-        if (ret < 0)
-            return -PAL_ERROR_DENIED;
-
-        for (int i = 0 ; i < sizeof(sgx_checksum_t) ; i++)
-            snprintf(checksum_text + i * 2, 3, "%02x",
-                     checksum.bytes[i]);
+        sgx_arch_mac_t mac;
+        AES_CMAC((void *) &enclave_key, mem + checking - offset,
+                 checking_size, (uint8_t *) &mac);
 
-        if (memcmp(s, &checksum, sizeof(sgx_checksum_t))) {
+        if (memcmp(s, &mac, sizeof(sgx_arch_mac_t))) {
             SGX_DBG(DBG_E, "Accesing file:%s is denied. "
-                    "Does not match with its checksum.\n", uri);
+                    "Does not match with its MAC.\n", uri);
             return -PAL_ERROR_DENIED;
         }
     }

+ 19 - 6
Pal/src/host/Linux-SGX/pal_host.h

@@ -53,9 +53,19 @@ void free_untrusted (void * mem);
    because it is required by futex call. If DEBUG_MUTEX is defined,
    mutex_handle will record the owner of mutex locking. */
 struct mutex_handle {
-    struct atomic_int value;
+    union {
+        unsigned int u;
+        struct {
+            unsigned char locked;
+            unsigned char contended;
+        } b;
+    };
 };
 
+/* Initializer of Mutexes */
+#define MUTEX_HANDLE_INIT    { .u = 0 }
+#define INIT_MUTEX_HANDLE(m)  do { m->u = 0; } while (0)
+
 typedef union pal_handle
 {
     /* TSAI: Here we define the internal types of PAL_HANDLE
@@ -228,8 +238,10 @@ struct arch_frame {
 # error "unsupported architecture"
 #endif
 
+#define PAL_FRAME_IDENTIFIER    (0xdeaddeadbeefbeef)
+
 struct pal_frame {
-    volatile struct pal_frame * self;
+    volatile uint64_t           identifier;
     void *                      func;
     const char *                funcname;
     struct arch_frame           arch;
@@ -239,10 +251,11 @@ static inline
 void __store_frame (struct pal_frame * frame,
                     void * func, const char * funcname)
 {
-    *(volatile void **) &frame->self = frame;
+    arch_store_frame(&frame->arch)
     frame->func = func;
     frame->funcname = funcname;
-    arch_store_frame(&frame->arch)
+    asm volatile ("nop" ::: "memory");
+    frame->identifier = PAL_FRAME_IDENTIFIER;
 }
 
 #define ENTER_PAL_CALL(name)                \
@@ -253,9 +266,9 @@ void __store_frame (struct pal_frame * frame,
 static inline
 void __clear_frame (struct pal_frame * frame)
 {
-    if (*(volatile void **) &frame->self == frame) {
+    if (frame->identifier == PAL_FRAME_IDENTIFIER) {
         asm volatile ("nop" ::: "memory");
-        *(volatile void **) &frame->self = NULL;
+        frame->identifier = 0;
     }
 }
 

+ 2 - 2
Pal/src/host/Linux-SGX/pal_linux.h

@@ -97,10 +97,10 @@ typedef struct { unsigned char bytes[32]; } sgx_checksum_t;
 
 int init_trusted_files (void);
 int load_trusted_file
-    (PAL_HANDLE file, sgx_checksum_t ** stubptr, uint64_t * sizeptr);
+    (PAL_HANDLE file, sgx_arch_mac_t ** stubptr, uint64_t * sizeptr);
 int verify_trusted_file
     (const char * uri, void * mem, unsigned int offset, unsigned int size,
-     sgx_checksum_t * stubs, unsigned int total_size);
+     sgx_arch_mac_t * stubs, unsigned int total_size);
 
 int init_trusted_children (void);
 int register_trusted_child (const char * uri, const char * mrenclave_str);

+ 4 - 0
Pal/src/host/Linux-SGX/pal_linux_defs.h

@@ -18,4 +18,8 @@
 
 #define TRUSTED_STUB_SIZE   (PRESET_PAGESIZE * 32)
 
+#define CACHE_FILE_STUBS    (1)
+
+#define USE_AES_NI          (1)
+
 #endif /* PAL_LINUX_DEFS_H */

+ 9 - 1
Pal/src/host/Linux-SGX/sgx-driver/gsgx_main.c

@@ -28,6 +28,13 @@ MODULE_VERSION(DRV_VERSION);
 
 IMPORT_KSYM(dac_mmap_min_addr);
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+static void __enable_fsgsbase(void *v)
+{
+	write_cr4(read_cr4() | X86_CR4_FSGSBASE);
+}
+#endif
+
 static long gsgx_ioctl_enclave_create(struct file *filep, unsigned int cmd,
 				      unsigned long arg)
 {
@@ -43,7 +50,8 @@ static long gsgx_ioctl_enclave_create(struct file *filep, unsigned int cmd,
 	}
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
-	write_cr4(read_cr4() | X86_CR4_FSGSBASE);
+	__enable_fsgsbase(NULL);
+	smp_call_function(__enable_fsgsbase, NULL, 1);
 #endif
 
 	isgx_create.src = createp->src;

+ 16 - 0
Pal/src/host/Linux-SGX/sgx_enclave.c

@@ -13,9 +13,14 @@
 #include <asm/signal.h>
 #include <linux/fs.h>
 #include <linux/in.h>
+#include <linux/in6.h>
 #include <math.h>
 #include <asm/errno.h>
 
+#ifndef SOL_IPV6
+# define SOL_IPV6 41
+#endif
+
 #define PAL_SEC() (&current_enclave->pal_sec)
 
 #define ODEBUG(code, ms) do {} while (0)
@@ -294,6 +299,11 @@ static int sgx_ocall_sock_listen(void * pms)
     }
 
     fd = ret;
+    if (ms->ms_addr->sa_family == AF_INET6) {
+        int ipv6only = 1;
+        INLINE_SYSCALL(setsockopt, 5, fd, SOL_IPV6, IPV6_V6ONLY, &ipv6only,
+                       sizeof(int));
+    }
     /* must set the socket to be reuseable */
     int reuseaddr = 1;
     INLINE_SYSCALL(setsockopt, 5, fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
@@ -368,6 +378,12 @@ static int sgx_ocall_sock_connect(void * pms)
     }
 
     fd = ret;
+    if (ms->ms_addr->sa_family == AF_INET6) {
+        int ipv6only = 1;
+        INLINE_SYSCALL(setsockopt, 5, fd, SOL_IPV6, IPV6_V6ONLY, &ipv6only,
+                       sizeof(int));
+    }
+
     if (ms->ms_bind_addr && ms->ms_bind_addr->sa_family) {
         ret = INLINE_SYSCALL(bind, 3, fd, ms->ms_bind_addr,
                              ms->ms_bind_addrlen);

+ 15 - 10
Pal/src/host/Linux-SGX/sgx_framework.c

@@ -150,23 +150,28 @@ int create_enclave(sgx_arch_secs_t * secs,
 
     struct gsgx_enclave_create param;
     if (baseaddr) {
-        secs->baseaddr = (unsigned long) baseaddr & ~(secs->size - 1);
+        secs->baseaddr = (uint64_t) baseaddr & ~(secs->size - 1);
         flags |= MAP_FIXED;
-    } else 
+    } else {
         secs->baseaddr = 0ULL;
+    }
+
+    uint64_t addr = INLINE_SYSCALL(mmap, 6, secs->baseaddr, size,
+                                   PROT_READ|PROT_WRITE|PROT_EXEC, flags,
+                                   isgx_device, 0);
 
-    secs->baseaddr = INLINE_SYSCALL(mmap, 6, secs->baseaddr, size,
-                                    PROT_READ|PROT_WRITE|PROT_EXEC, flags,
-                                    isgx_device, 0);
+    if (IS_ERR_P(addr)) {
+        if (ERRNO_P(addr) == 1 && (flags | MAP_FIXED))
+            pal_printf("Permission denied on mapping enclave. "
+                       "You may need to set sysctl vm.mmap_min_addr to zero\n");
 
-    if (IS_ERR_P(secs->baseaddr)) {
-        if (ERRNO_P(secs->baseaddr) == 1 && (flags | MAP_FIXED))
-            pal_printf("Permission denied on mapping enclave.  You may need to set sysctl vm.mmap_min_addr to zero\n");
-        SGX_DBG(DBG_I, "enclave ECREATE failed in allocating EPC memory - %d\n", ERRNO_P(secs->baseaddr));
+        SGX_DBG(DBG_I, "enclave ECREATE failed in allocating EPC memory "
+                "(errno = %d)\n", ERRNO_P(addr));
         return -ENOMEM;
     }
 
-    param.src = (unsigned long) secs;
+    secs->baseaddr = addr;
+    param.src = (uint64_t) secs;
     int ret = INLINE_SYSCALL(ioctl, 3, gsgx_device, GSGX_IOCTL_ENCLAVE_CREATE,
                          &param);
     

+ 6 - 2
Pal/src/host/Linux/db_exception.c

@@ -197,7 +197,7 @@ static struct pal_frame * get_frame (ucontext_t * uc)
 {
     unsigned long rip = uc->uc_mcontext.gregs[REG_RIP];
     unsigned long rbp = uc->uc_mcontext.gregs[REG_RBP];
-    unsigned long last_rbp = rbp - 1024;
+    unsigned long last_rbp = rbp - 64;
 
     if (!ADDR_IN_PAL(rip))
         return NULL;
@@ -211,7 +211,7 @@ static struct pal_frame * get_frame (ucontext_t * uc)
     for (unsigned long ptr = rbp - sizeof(unsigned long) ;
          ptr > last_rbp ; ptr -= 8) {
         struct pal_frame * frame = (struct pal_frame *) ptr;
-        if (frame->self == frame)
+        if (frame->identifier == PAL_FRAME_IDENTIFIER)
             return frame;
     }
 
@@ -245,12 +245,16 @@ static void _DkGenericSighandler (int signum, siginfo_t * info,
         msg[20] = '0' + (pid / 10) % 10;
         msg[21] = '0' + pid % 10;
         INLINE_SYSCALL(write, 3, 1, msg, 24);
+        while(1);
     }
 #endif
 
     struct pal_frame * frame = get_frame(uc);
     void * eframe;
 
+    if (signum == SIGCONT && frame && frame->func == DkObjectsWaitAny)
+        return;
+
     asm volatile ("movq %%rbp, %0" : "=r"(eframe));
 
     if (frame && frame->func != &_DkGenericSighandler &&

+ 61 - 88
Pal/src/host/Linux/db_mutex.c

@@ -42,167 +42,140 @@
 #include <unistd.h>
 
 #ifdef __i386__
+# define barrier()       asm volatile("" ::: "memory");
 # define rmb()           asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 # define cpu_relax()     asm volatile("rep; nop" ::: "memory");
 #endif
 
 #ifdef __x86_64__
 # include <unistd.h>
+# define barrier()       asm volatile("" ::: "memory");
 # define rmb()           asm volatile("lfence" ::: "memory")
 # define cpu_relax()     asm volatile("rep; nop" ::: "memory");
 #endif
 
-#define MUTEX_SPINLOCK_TIMES    20
+#define MUTEX_SPINLOCK_TIMES    100
 
-int _DkMutexLockTimeout (struct mutex_handle * mut, int timeout)
+int _DkMutexLockTimeout (struct mutex_handle * m, int timeout)
 {
-    int i, c = 0;
+    int ret = 0;
+#ifdef DEBUG_MUTEX
+    int tid = INLINE_SYSCALL(gettid, 0);
+#endif
 
     if (timeout == -1)
-        return -_DkMutexLock(mut);
+        return -_DkMutexLock(m);
 
-    struct atomic_int * m = &mut->value;
-
-    /* Spin and try to take lock */
-    for (i = 0 ; i < MUTEX_SPINLOCK_TIMES ; i++)
-    {
-        c = atomic_dec_and_test(m);
-        if (c)
-            goto success;
-        cpu_relax();
-    }
-
-    /* The lock is now contended */
-
-    int ret;
+    if (!xchg(&m->b.locked, 1))
+        goto success;
 
     if (timeout == 0) {
-        ret = c ? 0 : -PAL_ERROR_TRYAGAIN;
+        ret = -PAL_ERROR_TRYAGAIN;
         goto out;
     }
 
-    while (!c) {
-        int val = atomic_read(m);
-        if (val == 1)
-            goto again;
-
+    while (xchg(&m->u, 257) & 1) {
         struct timespec waittime;
         long sec = timeout / 1000000;
         long microsec = timeout - (sec * 1000000);
         waittime.tv_sec = sec;
         waittime.tv_nsec = microsec * 1000;
 
-        ret = INLINE_SYSCALL(futex, 6, m, FUTEX_WAIT, val, &waittime, NULL, 0);
+        ret = INLINE_SYSCALL(futex, 6, m, FUTEX_WAIT, 257, &waittime, NULL, 0);
 
-        if (IS_ERR(ret) &&
-            ERRNO(ret) != EWOULDBLOCK &&
-            ERRNO(ret) != EINTR) {
+        if (IS_ERR(ret)) {
+            if (ERRNO(ret) == EWOULDBLOCK) {
+                xchg(&m->b.contended, 0);
+                ret = -PAL_ERROR_TRYAGAIN;
+                goto out;
+            }
+#ifdef DEBUG_MUTEX
+            printf("futex failed (err = %d)\n", ERRNO(ret));
+#endif
             ret = unix_to_pal_error(ERRNO(ret));
             goto out;
         }
-
-#ifdef DEBUG_MUTEX
-        if (IS_ERR(ret))
-            printf("mutex held by thread %d\n", mut->owner);
-#endif
-
-again:
-        /* Upon wakeup, we still need to check whether mutex is unlocked or
-         * someone else took it.
-         * If c==0 upon return from xchg (i.e., the older value of m==0), we
-         * will exit the loop. Else, we sleep again (through a futex call).
-         */
-        c = atomic_dec_and_test(m);
     }
 
 success:
 #ifdef DEBUG_MUTEX
-    mut->owner = INLINE_SYSCALL(gettid, 0);
+    m->owner = tid;
 #endif
     ret = 0;
 out:
+#ifdef DEBUG_MUTEX
+    if (ret < 0)
+        printf("mutex failed (%e, tid = %d)\n", -ret, tid);
+#endif
     return ret;
 }
 
-int _DkMutexLock (struct mutex_handle * mut)
+int _DkMutexLock (struct mutex_handle * m)
 {
-    int i, c = 0;
-    int ret;
-    struct atomic_int * m = &mut->value;
+    int ret = 0, i;
+#ifdef DEBUG_MUTEX
+    int tid = INLINE_SYSCALL(gettid, 0);
+#endif
 
     /* Spin and try to take lock */
     for (i = 0; i < MUTEX_SPINLOCK_TIMES; i++) {
-        c = atomic_dec_and_test(m);
-        if (c)
+        if (!xchg(&m->b.locked, 1))
             goto success;
         cpu_relax();
     }
 
-    /* The lock is now contended */
-
-    while (!c) {
-        int val = atomic_read(m);
-        if (val == 1)
-            goto again;
-
-        ret = INLINE_SYSCALL(futex, 6, m, FUTEX_WAIT, val, NULL, NULL, 0);
+    while (xchg(&m->u, 257) & 1) {
+        ret = INLINE_SYSCALL(futex, 6, m, FUTEX_WAIT, 257, NULL, NULL, 0);
 
         if (IS_ERR(ret) &&
-            ERRNO(ret) != EWOULDBLOCK &&
-            ERRNO(ret) != EINTR) {
+            ERRNO(ret) != EWOULDBLOCK) {
+#ifdef DEBUG_MUTEX
+            printf("futex failed (err = %d)\n", ERRNO(ret));
+#endif
             ret = unix_to_pal_error(ERRNO(ret));
             goto out;
         }
-
-#ifdef DEBUG_MUTEX
-        if (IS_ERR(ret))
-            printf("mutex held by thread %d\n", mut->owner);
-#endif
-
-again:
-        /* Upon wakeup, we still need to check whether mutex is unlocked or
-         * someone else took it.
-         * If c==0 upon return from xchg (i.e., the older value of m==0), we
-         * will exit the loop. Else, we sleep again (through a futex call).
-         */
-        c = atomic_dec_and_test(m);
     }
 
 success:
 #ifdef DEBUG_MUTEX
-    mut->owner = INLINE_SYSCALL(gettid, 0);
+    m->owner = tid;
 #endif
     ret = 0;
 out:
+#ifdef DEBUG_MUTEX
+    if (ret < 0)
+        printf("mutex failed (%e, tid = %d)\n", -ret, tid);
+#endif
     return ret;
 }
 
-int _DkMutexUnlock (struct mutex_handle * mut)
+int _DkMutexUnlock (struct mutex_handle * m)
 {
-    int ret = 0;
-    int must_wake = 0;
-    struct atomic_int * m = &mut->value;
+    int ret = 0, i;
 
 #ifdef DEBUG_MUTEX
-    mut->owner = 0;
+    m->owner = 0;
 #endif
 
     /* Unlock, and if not contended then exit. */
-    if (atomic_read(m) < 0)
-        must_wake = 1;
-
-    atomic_set(m, 1);
+    if ((m->u == 1) && (cmpxchg(&m->u, 1, 0) == 1)) return 0;
+    m->b.locked = 0;
+    barrier();
 
-    if (must_wake) {
-        /* We need to wake someone up */
-        ret = INLINE_SYSCALL(futex, 6, m, FUTEX_WAKE, 1, NULL, NULL, 0);
+    /* Spin and try to take lock */
+    for (i = 0; i < MUTEX_SPINLOCK_TIMES * 2; i++) {
+        if (m->b.locked)
+            goto success;
+        cpu_relax();
     }
 
-    if (IS_ERR(ret)) {
-        ret = -PAL_ERROR_TRYAGAIN;
-        goto out;
-    }
+    m->b.contended = 0;
 
+    /* We need to wake someone up */
+    INLINE_SYSCALL(futex, 6, m, FUTEX_WAKE, 1, NULL, NULL, 0);
+
+success:
     ret = 0;
 out:
     return ret;

+ 15 - 8
Pal/src/host/Linux/db_pipes.c

@@ -529,22 +529,29 @@ static int pipe_attrquerybyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
 
     attr->handle_type  = PAL_GET_TYPE(handle);
 
-    if (attr->handle_type == pal_type_pipe) {
+    if (attr->handle_type != pal_type_pipesrv) {
         ret = INLINE_SYSCALL(ioctl, 3, HANDLE_HDR(handle)->fds[0], FIONREAD, &val);
         if (IS_ERR(ret)) {
             return unix_to_pal_error(ERRNO(ret));
         }
+
+        attr->readable     = val > 0;
+        attr->pending_size = val;
+        attr->writeable    = HANDLE_HDR(handle)->flags & (
+            (PAL_GET_TYPE(handle) == pal_type_pipeprv) ? WRITEABLE(1) :
+            WRITEABLE(0));
+    } else {
+        struct pollfd pfd = { .fd = HANDLE_HDR(handle)->fds[0], .events = POLLIN, .revents = 0 };
+        struct timespec tp = { 0, 0 };
+        ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
+        attr->readable = (ret == 1 && pfd.revents == POLLIN);
+        attr->pending_size = 0;
+        attr->writeable    = PAL_FALSE;
     }
 
     attr->disconnected = HANDLE_HDR(handle)->flags & ERROR(0);
-    attr->nonblocking  = (HANDLE_HDR(handle)->type == pal_type_pipeprv) ?
+    attr->nonblocking  = (PAL_GET_TYPE(handle) == pal_type_pipeprv) ?
                          handle->pipeprv.nonblocking : handle->pipe.nonblocking;
-    attr->readable     = val > 0;
-    if (PAL_GET_TYPE(handle) == pal_type_pipeprv)
-        attr->writeable = HANDLE_HDR(handle)->flags & WRITEABLE(1);
-    else
-        attr->writeable = HANDLE_HDR(handle)->flags & WRITEABLE(0);
-    attr->pending_size = val;
     return 0;
 }
 

+ 3 - 4
Pal/src/host/Linux/db_semaphore.c

@@ -69,7 +69,7 @@ _DkSemaphoreCreate (PAL_HANDLE handle, int initialCount, int maxCount)
 
     /* optimization: if maxCount == 1, we make it into mutex */
     if (handle->semaphore.max_value == 1) {
-        atomic_set(&handle->semaphore.value.mut.value, 1 - initialCount);
+        handle->semaphore.value.mut.u = initialCount;
     } else {
         atomic_set(&handle->semaphore.value.i, maxCount - initialCount);
     }
@@ -255,9 +255,8 @@ void _DkSemaphoreRelease (PAL_HANDLE sem, int count)
 int _DkSemaphoreGetCurrentCount (PAL_HANDLE sem)
 {
     if (sem->semaphore.max_value == 1) {
-        struct mutex_handle * mut =
-            &sem->semaphore.value.mut;
-        return atomic_read(&mut->value);
+        struct mutex_handle * m = &sem->semaphore.value.mut;
+        return m->b.locked;
     }
 
     int c = atomic_read(&sem->semaphore.value.i);

+ 46 - 3
Pal/src/host/Linux/db_sockets.c

@@ -45,6 +45,22 @@ typedef __kernel_pid_t pid_t;
 #include <netinet/tcp.h>
 #include <asm/errno.h>
 
+#ifndef SOL_TCP
+# define SOL_TCP 6
+#endif
+
+#ifndef TCP_NODELAY
+# define TCP_NODELAY 1
+#endif
+
+#ifndef TCP_CORK
+# define TCP_CORK 3
+#endif
+
+#ifndef SOL_IPV6
+# define SOL_IPV6 41
+#endif
+
 /* 96 bytes is the minimal size of buffer to store a IPv4/IPv6
    address */
 #define PAL_SOCKADDR_SIZE   96
@@ -345,6 +361,12 @@ static int tcp_listen (PAL_HANDLE * handle, char * uri, int options)
     if (IS_ERR(fd))
         return -PAL_ERROR_DENIED;
 
+    if (bind_addr->sa_family == AF_INET6) {
+        int ipv6only = 1;
+        INLINE_SYSCALL(setsockopt, 5, fd, SOL_IPV6, IPV6_V6ONLY, &ipv6only,
+                       sizeof(int));
+    }
+
     /* must set the socket to be reuseable */
     int reuseaddr = 1;
     INLINE_SYSCALL(setsockopt, 5, fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
@@ -474,6 +496,12 @@ static int tcp_connect (PAL_HANDLE * handle, char * uri, int options)
         }
     }
 
+    if (dest_addr->sa_family == AF_INET6) {
+        int ipv6only = 1;
+        INLINE_SYSCALL(setsockopt, 5, fd, SOL_IPV6, IPV6_V6ONLY, &ipv6only,
+                       sizeof(int));
+    }
+
     ret = INLINE_SYSCALL(connect, 3, fd, dest_addr, dest_addrlen);
 
     if (IS_ERR(ret)) {
@@ -629,6 +657,12 @@ static int udp_bind (PAL_HANDLE * handle, char * uri, int options)
     if (IS_ERR(fd))
         return -PAL_ERROR_DENIED;
 
+    if (bind_addr->sa_family == AF_INET6) {
+        int ipv6only = 1;
+        INLINE_SYSCALL(setsockopt, 5, fd, SOL_IPV6, IPV6_V6ONLY, &ipv6only,
+                       sizeof(int));
+    }
+
     ret = INLINE_SYSCALL(bind, 3, fd, bind_addr, bind_addrlen);
 
     if (IS_ERR(ret)) {
@@ -678,6 +712,12 @@ static int udp_connect (PAL_HANDLE * handle, char * uri, int options)
     if (IS_ERR(fd))
         return -PAL_ERROR_DENIED;
 
+    if (dest_addr->sa_family == AF_INET6) {
+        int ipv6only = 1;
+        INLINE_SYSCALL(setsockopt, 5, fd, SOL_IPV6, IPV6_V6ONLY, &ipv6only,
+                       sizeof(int));
+    }
+
     if (bind_addr) {
         ret = INLINE_SYSCALL(bind, 3, fd, bind_addr, bind_addrlen);
 
@@ -1000,7 +1040,12 @@ static int socket_attrquerybyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR  * attr)
 
     int fd = handle->sock.fd, ret, val;
 
-    if (handle->sock.conn) {
+    if (IS_HANDLE_TYPE(handle, tcpsrv)) {
+        struct pollfd pfd = { .fd = fd, .events = POLLIN, .revents = 0 };
+        struct timespec tp = { 0, 0 };
+        ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
+        attr->readable = (ret == 1 && pfd.revents == POLLIN);
+    } else {
         /* try use ioctl FIONEAD to get the size of socket */
         ret = INLINE_SYSCALL(ioctl, 3, fd, FIONREAD, &val);
         if (IS_ERR(ret))
@@ -1008,8 +1053,6 @@ static int socket_attrquerybyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR  * attr)
 
         attr->pending_size = val;
         attr->readable = !!attr->pending_size > 0;
-    } else {
-        attr->readable = !attr->disconnected;
     }
 
     return 0;

+ 18 - 8
Pal/src/host/Linux/pal_host.h

@@ -30,20 +30,27 @@
 # error "cannot be included outside PAL"
 #endif
 
+#define DEBUG_MUTEX 1
+
 /* internal Mutex design, the structure has to align at integer boundary
    because it is required by futex call. If DEBUG_MUTEX is defined,
    mutex_handle will record the owner of mutex locking. */
 typedef struct mutex_handle {
-    struct atomic_int value;
+    union {
+        unsigned int u;
+        struct {
+            unsigned char locked;
+            unsigned char contended;
+        } b;
+    };
 #ifdef DEBUG_MUTEX
     int owner;
 #endif
 } PAL_LOCK;
 
 /* Initializer of Mutexes */
-#define MUTEX_HANDLE_INIT    { .value = { .counter = 1 } }
-#define INIT_MUTEX_HANDLE(mut)  \
-    do { atomic_set(&(mut)->value, 1); } while (0)
+#define MUTEX_HANDLE_INIT    { .u = 0 }
+#define INIT_MUTEX_HANDLE(m)  do { m->u = 0; } while (0)
 
 #define LOCK_INIT MUTEX_HANDLE_INIT
 #define INIT_LOCK(lock) INIT_MUTEX_HANDLE(lock);
@@ -224,8 +231,10 @@ struct arch_frame {
 # error "unsupported architecture"
 #endif
 
+#define PAL_FRAME_IDENTIFIER    (0xdeaddeadbeefbeef)
+
 struct pal_frame {
-    volatile struct pal_frame * self;
+    volatile uint64_t           identifier;
     void *                      func;
     const char *                funcname;
     struct arch_frame           arch;
@@ -236,9 +245,10 @@ void __store_frame (struct pal_frame * frame,
                     void * func, const char * funcname)
 {
     arch_store_frame(&frame->arch)
-    *(volatile void **) &frame->self = frame;
     frame->func = func;
     frame->funcname = funcname;
+    asm volatile ("nop" ::: "memory");
+    frame->identifier = PAL_FRAME_IDENTIFIER;
 }
 
 #define ENTER_PAL_CALL(name)                \
@@ -249,9 +259,9 @@ void __store_frame (struct pal_frame * frame,
 static inline
 void __clear_frame (struct pal_frame * frame)
 {
-    if (*(volatile void **) &frame->self == frame) {
+    if (frame->identifier == PAL_FRAME_IDENTIFIER) {
         asm volatile ("nop" ::: "memory");
-        *(volatile void **) &frame->self = NULL;
+        frame->identifier = 0;
     }
 }