Browse Source

Replace the atomics implementation (#83)

Replace the atomics implementation.

* Change the PAL Semaphore to a Mutex, and fix some issues in the Mutex implementations

* Tweak the layout of a PAL Handle

* Rework some of the IPC helper synchronization

* Taking out waitpid03 - it is flaky, even on the commit where it was added to the PASSED list.
Don Porter 6 years ago
parent
commit
d04f172e89
59 changed files with 964 additions and 1934 deletions
  1. 0 131
      LibOS/shim/include/shim_atomic.h
  2. 2 2
      LibOS/shim/include/shim_handle.h
  3. 3 3
      LibOS/shim/include/shim_internal.h
  4. 0 1
      LibOS/shim/include/shim_ipc.h
  5. 1 1
      LibOS/shim/include/shim_signal.h
  6. 1 1
      LibOS/shim/include/shim_thread.h
  7. 1 9
      LibOS/shim/include/shim_types.h
  8. 1 1
      LibOS/shim/src/fs/shim_fs.c
  9. 1 1
      LibOS/shim/src/ipc/shim_ipc.c
  10. 85 49
      LibOS/shim/src/ipc/shim_ipc_helper.c
  11. 26 24
      LibOS/shim/src/shim_async.c
  12. 1 1
      LibOS/shim/src/shim_init.c
  13. 0 2
      LibOS/shim/test/apps/ltp/PASSED
  14. 0 110
      Pal/include/atomic.h
  15. 0 5
      Pal/include/cmpxchg.h
  16. 0 275
      Pal/include/cmpxchg_32.h
  17. 0 121
      Pal/include/cmpxchg_64.h
  18. 179 0
      Pal/lib/atomic.h
  19. 0 1
      Pal/lib/crypto/adapters/mbedtls_dh.c
  20. 1 1
      Pal/lib/crypto/mbedtls/mbedtls/config.h
  21. 2 2
      Pal/lib/graphene/config.c
  22. 25 0
      Pal/regression/00_Atomics.py
  23. 2 2
      Pal/regression/00_Symbols.py
  24. 2 8
      Pal/regression/02_Semaphore.py
  25. 66 0
      Pal/regression/AtomicMath.c
  26. 2 43
      Pal/regression/Semaphore.c
  27. 2 2
      Pal/regression/Symbols.c
  28. 1 1
      Pal/src/Makefile
  29. 21 19
      Pal/src/db_mutex.c
  30. 1 1
      Pal/src/db_object.c
  31. 2 2
      Pal/src/db_streams.c
  32. 1 1
      Pal/src/host/FreeBSD/pal_host.h
  33. 1 1
      Pal/src/host/Linux-SGX/Makefile
  34. 1 0
      Pal/src/host/Linux-SGX/db_devices.c
  35. 2 2
      Pal/src/host/Linux-SGX/db_files.c
  36. 70 93
      Pal/src/host/Linux-SGX/db_mutex.c
  37. 4 5
      Pal/src/host/Linux-SGX/db_object.c
  38. 5 4
      Pal/src/host/Linux-SGX/db_pipes.c
  39. 0 255
      Pal/src/host/Linux-SGX/db_semaphore.c
  40. 2 2
      Pal/src/host/Linux-SGX/db_sockets.c
  41. 2 2
      Pal/src/host/Linux-SGX/db_streams.c
  42. 1 1
      Pal/src/host/Linux-SGX/enclave_framework.c
  43. 2 2
      Pal/src/host/Linux-SGX/pal.map
  44. 109 119
      Pal/src/host/Linux-SGX/pal_host.h
  45. 12 14
      Pal/src/host/Linux-SGX/pal_linux.h
  46. 1 1
      Pal/src/host/Linux/Makefile
  47. 1 0
      Pal/src/host/Linux/db_devices.c
  48. 2 2
      Pal/src/host/Linux/db_files.c
  49. 107 83
      Pal/src/host/Linux/db_mutex.c
  50. 4 5
      Pal/src/host/Linux/db_object.c
  51. 11 10
      Pal/src/host/Linux/db_pipes.c
  52. 0 263
      Pal/src/host/Linux/db_semaphore.c
  53. 2 2
      Pal/src/host/Linux/db_sockets.c
  54. 3 3
      Pal/src/host/Linux/db_streams.c
  55. 2 2
      Pal/src/host/Linux/pal.map
  56. 103 120
      Pal/src/host/Linux/pal_host.h
  57. 61 57
      Pal/src/host/Skeleton/pal_host.h
  58. 18 33
      Pal/src/pal.h
  59. 9 33
      Pal/src/pal_internal.h

+ 0 - 131
LibOS/shim/include/shim_atomic.h

@@ -1,131 +0,0 @@
-/* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
-/* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
-
-/* Copyright (C) 2014 OSCAR lab, Stony Brook University
-   This file is part of Graphene Library OS.
-
-   Graphene Library OS is free software: you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation, either version 3 of the
-   License, or (at your option) any later version.
-
-   Graphene Library OS is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
-
-/*
- * shim_atomic.h
- *
- * This file contains functions and macros for atomic operations.
- */
-
-#ifndef _SHIM_ATOMIC_H_
-#define _SHIM_ATOMIC_H_
-
-#include "shim_types.h"
-
-/* Optimization barrier */
-/* The "volatile" is due to gcc bugs */
-#define barrier() __asm__ __volatile__("": : :"memory")
-
-#ifdef __x86_64__
-/*
- * Some non-Intel clones support  of order store. wmb() ceases to be a
- * nop for these.
- */
-# define cpu_relax()    asm volatile ("rep; nop" ::: "memory")
-# define mb()    asm volatile ("mfence" ::: "memory")
-# define rmb()   asm volatile ("lfence" ::: "memory")
-# define wmb()   asm volatile ("sfence" ::: "memory")
-#endif
-
-#define LOCK_PREFIX     "\n\tlock; "
-
-#define ATOMIC_INIT(i)      { (i) }
-
-static inline int atomic_read (const struct shim_atomic * v)
-{
-    return (*(volatile long *)&(v)->counter);
-}
-
-static inline void atomic_set (struct shim_atomic * v, int i)
-{
-    v->counter = i;
-}
-
-static inline void atomic_add (int i, struct shim_atomic * v)
-{
-    asm volatile(LOCK_PREFIX "addl %1,%0"
-                 : "+m" (v->counter)
-                 : "ir" (i));
-}
-
-static inline void atomic_sub (int i, struct shim_atomic * v)
-{
-    asm volatile(LOCK_PREFIX "subl %1,%0"
-                 : "+m" (v->counter)
-                 : "ir" (i));
-}
-
-static inline int atomic_sub_and_test (int i, struct shim_atomic * v)
-{
-    unsigned char c;
-    asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
-                 : "+m" (v->counter), "=qm" (c)
-                 : "ir" (i) : "memory");
-    return c;
-}
-
-static inline void atomic_inc (struct shim_atomic * v)
-{
-    asm volatile(LOCK_PREFIX "incl %0"
-                 : "+m" (v->counter));
-}
-
-static inline int atomic_inc_and_test (struct shim_atomic * v)
-{
-    unsigned char c;
-    asm volatile(LOCK_PREFIX "incl %0; sete %1"
-                 : "+m" (v->counter), "=qm" (c)
-                 : : "memory");
-    return c != 0;
-}
-
-static inline void atomic_dec (struct shim_atomic * v)
-{
-    asm volatile(LOCK_PREFIX "decl %0"
-                 : "+m" (v->counter));
-}
-
-static inline int atomic_dec_and_test (struct shim_atomic * v)
-{
-    unsigned char c;
-    asm volatile(LOCK_PREFIX "decl %0; sete %1"
-                 : "+m" (v->counter), "=qm" (c)
-                 : : "memory");
-    return c != 0;
-}
-
-#undef LOCK_PREFIX
-
-#ifndef __i386__
-# include "cmpxchg_64.h"
-#else
-# include "cmpxchg_32.h"
-#endif
-
-static inline int atomic_cmpxchg (struct shim_atomic * v, int old, int new)
-{
-    return cmpxchg(&v->counter, old, new);
-}
-
-static inline int atomic_xchg (struct shim_atomic * v, int new)
-{
-    return xchg(&v->counter, new);
-}
-
-#endif /* _SHIM_ATOMIC_H_ */

+ 2 - 2
LibOS/shim/include/shim_handle.h

@@ -69,11 +69,11 @@ enum shim_file_type {
 
 struct shim_file_data {
     LOCKTYPE            lock;
-    struct shim_atomic  version;
+    struct atomic_int   version;
     bool                queried;
     enum shim_file_type type;
     mode_t     mode;
-    struct shim_atomic  size;
+    struct atomic_int   size;
     struct shim_qstr    host_uri;
     unsigned long       atime;
     unsigned long       mtime;

+ 3 - 3
LibOS/shim/include/shim_internal.h

@@ -39,7 +39,7 @@
 
 #include <shim_types.h>
 #include <shim_defs.h>
-#include <shim_atomic.h>
+#include <atomic.h>
 #include <shim_tls.h>
 
 /* important macros */
@@ -474,7 +474,7 @@ static inline void enable_preempt (shim_tcb_t * tcb)
 
 #define create_lock(l)                          \
     do {                                        \
-        (l).lock = DkSemaphoreCreate(0, 1);     \
+        (l).lock = DkMutexCreate(0);               \
         /* (l).owner = LOCK_FREE;               */ \
         /* (l).reowned = 0;                     */ \
     } while (0)
@@ -532,7 +532,7 @@ static inline void __unlock (LOCKTYPE * l)
 #endif
 
     l->owner = 0;
-    DkSemaphoreRelease(l->lock, 1);
+    DkMutexRelease(l->lock);
     enable_preempt(tcb);
 }
 

+ 0 - 1
LibOS/shim/include/shim_ipc.h

@@ -604,7 +604,6 @@ void ipc_parent_exit  (struct shim_ipc_port * port, IDTYPE vmid,
 void ipc_child_exit   (struct shim_ipc_port * port, IDTYPE vmid,
                        unsigned int exitcode, unsigned int term_signal);
 
-int create_ipc_helper (void);
 int exit_with_ipc_helper (bool handover);
 
 #define IPC_FORCE_RECONNECT     ((void *) -1)

+ 1 - 1
LibOS/shim/include/shim_signal.h

@@ -105,7 +105,7 @@ struct shim_signal {
 #define MAX_SIGNAL_LOG      32
 
 struct shim_signal_log {
-    struct shim_atomic head, tail;
+    struct atomic_int head, tail;
     struct shim_signal * logs[MAX_SIGNAL_LOG];
 };
 

+ 1 - 1
LibOS/shim/include/shim_thread.h

@@ -57,7 +57,7 @@ struct shim_thread {
     /* signal handling */
     __sigset_t signal_mask;
     struct shim_signal_handle signal_handles[NUM_SIGS];
-    struct shim_atomic has_signal;
+    struct atomic_int has_signal;
     struct shim_signal_log * signal_logs;
     bool suspend_on_signal;
     stack_t signal_altstack;

+ 1 - 9
LibOS/shim/include/shim_types.h

@@ -462,15 +462,7 @@ typedef uint16_t FDTYPE;
 typedef unsigned long LEASETYPE;
 typedef unsigned long HASHTYPE;
 
-struct shim_atomic {
-#ifndef __i386__
-    long counter;
-#else
-    int counter;
-#endif
-};
-
-typedef struct shim_atomic REFTYPE;
+typedef struct atomic_int REFTYPE;
 
 #include <pal.h>
 

+ 1 - 1
LibOS/shim/src/fs/shim_fs.c

@@ -447,7 +447,7 @@ int mount_fs (const char * type, const char * uri, const char * mount_point,
      * entry (until the unmount).  But we shouldn't also hold the reference on
      * dent from the validation step.  Drop it here */
     put_dentry(dent2);
-    
+
     ret = __mount_fs(mount, dent);
 
     // If we made it this far and the dentry is still negative, clear

+ 1 - 1
LibOS/shim/src/ipc/shim_ipc.c

@@ -399,7 +399,7 @@ int close_ipc_message_duplex (struct shim_ipc_msg_obj * msg,
     return 0;
 }
 
-static struct shim_atomic ipc_seq_counter;
+static struct atomic_int ipc_seq_counter;
 
 int send_ipc_message_duplex (struct shim_ipc_msg_obj * msg,
                              struct shim_ipc_port * port, bool save,

+ 85 - 49
LibOS/shim/src/ipc/shim_ipc_helper.c

@@ -55,12 +55,17 @@ static LISTP_TYPE(shim_ipc_port) pobj_list;
 /* This points to a list of shim_ipc_port objects (by the hlist field) */
 static LISTP_TYPE(shim_ipc_port) ipc_port_pool [PID_HASH_NUM];
 
-enum {
+/* This variable can be read without the ipc_helper_lock held, but
+ * should be modified with the ipc_helper_lock held (and in some cases,
+ * the value should be re-checked after acquiring the lock. 
+ * For reads in a loop without the lock, some caution should be taken to 
+ * use compiler barriers to ensure that a stale value isn't cached.
+ */
+static enum {
     HELPER_UNINITIALIZED, HELPER_DELAYED, HELPER_NOTALIVE,
     HELPER_ALIVE, HELPER_HANDEDOVER,
-};
+} ipc_helper_state;
 
-static struct shim_atomic    ipc_helper_state;
 static struct shim_thread *  ipc_helper_thread;
 static bool                  ipc_helper_update;
 static AEVENTTYPE            ipc_helper_event;
@@ -142,14 +147,29 @@ int init_ipc_ports (void)
     return 0;
 }
 
+
+static int create_ipc_helper (void);
+
+/* This function should be called as part of init, before locks or atomics are
+ * required */
 int init_ipc_helper (void)
 {
-    bool need_helper = (atomic_read(&ipc_helper_state) == HELPER_DELAYED);
-    atomic_set(&ipc_helper_state, HELPER_NOTALIVE);
+    bool need_helper = (ipc_helper_state == HELPER_DELAYED);
+    ipc_helper_state = HELPER_NOTALIVE;
     create_lock(ipc_helper_lock);
     create_event(&ipc_helper_event);
-    if (need_helper)
+    if (need_helper) {
+        /*
+         * we are enabling multi-threading, must turn on threading
+         * before grabbing any lock
+         */
+        enable_locking();
+
+        /* Go ahead and lock the ipc helper lock here, for consistency */
+        lock(ipc_helper_lock);
         create_ipc_helper();
+        unlock(ipc_helper_lock);
+    }
     return 0;
 }
 
@@ -184,11 +204,12 @@ static void __put_ipc_port (struct shim_ipc_port * pobj)
     }
 }
 
+/* This should be called with the ipc_helper_lock held */
 static inline void restart_ipc_helper (bool need_create)
 {
-    switch (atomic_read(&ipc_helper_state)) {
+    switch (ipc_helper_state) {
         case HELPER_UNINITIALIZED:
-            atomic_set(&ipc_helper_state, HELPER_DELAYED);
+            ipc_helper_state = HELPER_DELAYED;
         case HELPER_DELAYED:
             return;
         case HELPER_NOTALIVE:
@@ -274,10 +295,10 @@ void add_ipc_port (struct shim_ipc_port * port, IDTYPE vmid, int type,
 
     lock(ipc_helper_lock);
     bool need_restart = __add_ipc_port(port, vmid, type, fini);
-    unlock(ipc_helper_lock);
-
     if (need_restart)
         restart_ipc_helper(true);
+
+    unlock(ipc_helper_lock);
 }
 
 static struct shim_ipc_port * __get_new_ipc_port (PAL_HANDLE hdl)
@@ -330,7 +351,7 @@ void add_ipc_port_by_id (IDTYPE vmid, PAL_HANDLE hdl, int type,
 
     if (!port && !(port = __get_new_ipc_port(hdl))) {
         *portptr = NULL;
-        return;
+        goto out;
     }
 
     bool need_restart = __add_ipc_port(port, vmid, type, fini);
@@ -340,10 +361,11 @@ void add_ipc_port_by_id (IDTYPE vmid, PAL_HANDLE hdl, int type,
     else
         __put_ipc_port(port);
 
-    unlock(ipc_helper_lock);
-
     if (need_restart)
         restart_ipc_helper(true);
+
+out:
+    unlock(ipc_helper_lock);
 }
 
 static bool __del_ipc_port (struct shim_ipc_port * port, int type)
@@ -391,10 +413,11 @@ void del_ipc_port (struct shim_ipc_port * port, int type)
 {
     lock(ipc_helper_lock);
     bool need_restart = __del_ipc_port(port, type);
-    unlock(ipc_helper_lock);
 
     if (need_restart)
         restart_ipc_helper(false);
+
+    unlock(ipc_helper_lock);
 }
 
 void del_ipc_port_by_id (IDTYPE vmid, int type)
@@ -415,10 +438,9 @@ void del_ipc_port_by_id (IDTYPE vmid, int type)
         }
     }
 
-    unlock(ipc_helper_lock);
-
     if (need_restart)
         restart_ipc_helper(false);
+    unlock(ipc_helper_lock);
 }
 
 void del_ipc_port_fini (struct shim_ipc_port * port, unsigned int exitcode)
@@ -457,13 +479,12 @@ void del_ipc_port_fini (struct shim_ipc_port * port, unsigned int exitcode)
         }
     }
 
-    unlock(port->msgs_lock);
-
     put_ipc_port(port);
     assert(REF_GET(port->ref_count) > 0);
 
     if (need_restart)
         restart_ipc_helper(false);
+    unlock(port->msgs_lock);
 }
 
 static struct shim_ipc_port * __lookup_ipc_port (IDTYPE vmid, int type)
@@ -511,10 +532,10 @@ void del_all_ipc_ports (int type)
         if (pobj->pal_handle && __del_ipc_port(pobj, type))
             need_restart = true;
 
-    unlock(ipc_helper_lock);
-
     if (need_restart)
         restart_ipc_helper(false);
+
+    unlock(ipc_helper_lock);
 }
 
 int broadcast_ipc (struct shim_ipc_msg * msg, struct shim_ipc_port ** exclude,
@@ -799,11 +820,16 @@ static void shim_ipc_helper (void * arg)
 
     goto update_status;
 
-    while (atomic_read(&ipc_helper_state) == HELPER_ALIVE ||
+    /* The compiler should be careful not to cache the ipc_helper_state or
+     * else ths loop could fail to terminate on update.  Use a compiler
+     * barrier to force a re-read after sleeping. */
+    while ((ipc_helper_state == HELPER_ALIVE) ||
            nalive) {
         /* do a global poll on all the ports */
         polled = DkObjectsWaitAny(port_num + 1, local_ports, NO_TIMEOUT);
 
+        barrier();
+        
         if (!polled)
             continue;
 
@@ -812,7 +838,8 @@ static void shim_ipc_helper (void * arg)
         if (polled == ipc_event_handle) {
             clear_event(&ipc_helper_event);
 update_status:
-            if (atomic_read(&ipc_helper_state) == HELPER_NOTALIVE)
+            barrier();
+            if (ipc_helper_state == HELPER_NOTALIVE)
                 goto end;
             else
                 goto update_list;
@@ -962,13 +989,18 @@ end:
     if (self->handle_map)
         put_handle_map(self->handle_map);
 
-    if (atomic_read(&ipc_helper_state) == HELPER_HANDEDOVER) {
+    /* shim_clean ultimately calls del_all_ipc_ports(), which reacquires the
+     * helper lock.  Err on the side of caution by adding a barrier to ensure 
+     * reading the latest ipc helper state.       
+     */
+    barrier();
+    if (ipc_helper_state == HELPER_HANDEDOVER) {
         debug("ipc helper thread is the last thread, process exiting\n");
         shim_clean();
     }
 
-    atomic_xchg(&ipc_helper_state, HELPER_NOTALIVE);
     lock(ipc_helper_lock);
+    ipc_helper_state = HELPER_NOTALIVE;
     ipc_helper_thread = NULL;
     unlock(ipc_helper_lock);
     put_thread(self);
@@ -977,42 +1009,30 @@ end:
     DkThreadExit();
 }
 
-int create_ipc_helper (void)
+/* This function shoudl be called with the ipc_helper_lock held */
+static int create_ipc_helper (void)
 {
     int ret = 0;
 
-    if (atomic_read(&ipc_helper_state) == HELPER_ALIVE)
+    /* If we are holding the lock, no barrier is needed here, as 
+     * the lock (and new function) form an implicit barrier, and
+     * any "recent" changes should have come from this thread */
+    if (ipc_helper_state == HELPER_ALIVE)
         return 0;
 
-    /*
-     * we are enabling multi-threading, must turn on threading
-     * before grabbing any lock
-     */
-    enable_locking();
-
     struct shim_thread * new = get_new_internal_thread();
     if (!new)
         return -ENOMEM;
 
-    lock(ipc_helper_lock);
-    if (atomic_read(&ipc_helper_state) == HELPER_ALIVE) {
-        unlock(ipc_helper_lock);
-        put_thread(new);
-        return 0;
-    }
-
     ipc_helper_thread = new;
-    atomic_xchg(&ipc_helper_state, HELPER_ALIVE);
-    unlock(ipc_helper_lock);
+    ipc_helper_state = HELPER_ALIVE;
 
     PAL_HANDLE handle = thread_create(shim_ipc_helper, new, 0);
 
     if (!handle) {
         ret = -PAL_ERRNO;
-        lock(ipc_helper_lock);
         ipc_helper_thread = NULL;
-        atomic_xchg(&ipc_helper_state, HELPER_NOTALIVE);
-        unlock(ipc_helper_lock);
+        ipc_helper_state = HELPER_NOTALIVE;
         put_thread(new);
         return ret;
     }
@@ -1023,7 +1043,7 @@ int create_ipc_helper (void)
 
 int exit_with_ipc_helper (bool handover)
 {
-    if (IN_HELPER() || atomic_read(&ipc_helper_state) != HELPER_ALIVE)
+    if (IN_HELPER() || ipc_helper_state != HELPER_ALIVE)
         return 0;
 
     lock(ipc_helper_lock);
@@ -1036,7 +1056,6 @@ int exit_with_ipc_helper (bool handover)
                 break;
             }
     }
-    unlock(ipc_helper_lock);
 
     int new_state = HELPER_NOTALIVE;
     if (handover) {
@@ -1046,10 +1065,27 @@ int exit_with_ipc_helper (bool handover)
         debug("exiting ipc helper\n");
     }
 
-    atomic_xchg(&ipc_helper_state, new_state);
+    ipc_helper_state = new_state;
+    unlock(ipc_helper_lock);
+
     set_event(&ipc_helper_event, 1);
 
-    return (new_state == HELPER_NOTALIVE) ? 0 : -EAGAIN;
+    if (new_state != HELPER_NOTALIVE) {
+        return -EAGAIN;
+    } else {
+        /* We could get here via a signal handler invoked during
+         * receive_ipc_message. Let that complete so that whoever
+         * generated the signal doesn't hang waiting for IPC_RESP. */
+        int loops = 0;
+        while (ipc_helper_thread != NULL && loops++ < 2000) {
+            barrier();
+            DkThreadDelayExecution(1000);
+        }
+        if (ipc_helper_thread != NULL) {
+            debug("timed out waiting for ipc helper to exit\n");
+        }
+        return 0;
+    }
 }
 
 int terminate_ipc_helper (void)
@@ -1063,7 +1099,7 @@ int terminate_ipc_helper (void)
     }
 
     debug("terminating ipc helper\n");
-    atomic_xchg(&ipc_helper_state, HELPER_NOTALIVE);
+    ipc_helper_state = HELPER_NOTALIVE;
     set_event(&ipc_helper_event, 1);
     unlock(ipc_helper_lock);
     return 0;

+ 26 - 24
LibOS/shim/src/shim_async.c

@@ -44,9 +44,10 @@ struct async_event {
 DEFINE_LISTP(async_event);
 static LISTP_TYPE(async_event) async_list;
 
-enum {  HELPER_NOTALIVE, HELPER_ALIVE };
+/* This variable can be read without the async_helper_lock held, but is always
+ * modified with it held. */
+static enum {  HELPER_NOTALIVE, HELPER_ALIVE } async_helper_state;
 
-static struct shim_atomic   async_helper_state;
 static struct shim_thread * async_helper_thread;
 static AEVENTTYPE           async_helper_event;
 
@@ -110,13 +111,17 @@ int install_async_event (PAL_HANDLE object, unsigned long time,
     if (atomic_read(&async_helper_state) == HELPER_NOTALIVE)
         create_async_helper();
 
+    unlock(async_helper_lock);
+    
     set_event(&async_helper_event, 1);
     return 0;
 }
 
 int init_async (void)
 {
-    atomic_set(&async_helper_state, HELPER_NOTALIVE);
+    /* This is early enough in init that we can write this variable without
+     * the lock. */
+    async_helper_state = HELPER_NOTALIVE;
     create_lock(async_helper_lock);
     create_event(&async_helper_event);
     return 0;
@@ -164,7 +169,10 @@ static void shim_async_helper (void * arg)
 
     goto update_status;
 
-    while (atomic_read(&async_helper_state) == HELPER_ALIVE) {
+    /* This loop should be careful to use a barrier after sleeping
+     * to ensure that the while breaks once async_helper_state changes.
+     */
+    while (async_helper_state == HELPER_ALIVE) {
         unsigned long sleep_time;
         if (next_event) {
             sleep_time = next_event->expire_time - latest_time;
@@ -178,7 +186,8 @@ static void shim_async_helper (void * arg)
         }
 
         polled = DkObjectsWaitAny(object_num + 1, local_objects, sleep_time);
-
+        barrier();
+        
         if (!polled) {
             if (next_event) {
                 debug("async event trigger at %llu\n",
@@ -199,7 +208,7 @@ static void shim_async_helper (void * arg)
             clear_event(&async_helper_event);
 update_status:
             latest_time = DkSystemTimeQuery();
-            if (atomic_read(&async_helper_state) == HELPER_NOTALIVE) {
+            if (async_helper_state == HELPER_NOTALIVE) {
                 break;
             } else {
                 lock(async_helper_lock);
@@ -265,8 +274,8 @@ update_list:
         }
     }
 
-    atomic_set(&async_helper_state, HELPER_NOTALIVE);
     lock(async_helper_lock);
+    async_helper_state = HELPER_NOTALIVE;
     async_helper_thread = NULL;
     unlock(async_helper_lock);
     put_thread(self);
@@ -275,11 +284,12 @@ update_list:
     DkThreadExit();
 }
 
+/* This should be called with the async_helper_lock held */
 int create_async_helper (void)
 {
     int ret = 0;
 
-    if (atomic_read(&async_helper_state) == HELPER_ALIVE)
+    if (async_helper_state == HELPER_ALIVE)
         return 0;
 
     enable_locking();
@@ -288,40 +298,32 @@ int create_async_helper (void)
     if (!new)
         return -ENOMEM;
 
-    lock(async_helper_lock);
-    if (atomic_read(&async_helper_state) == HELPER_ALIVE) {
-        unlock(async_helper_lock);
-        put_thread(new);
-        return 0;
-    }
-
-    async_helper_thread = new;
-    atomic_xchg(&async_helper_state, HELPER_ALIVE);
-    unlock(async_helper_lock);
-
     PAL_HANDLE handle = thread_create(shim_async_helper, new, 0);
 
     if (!handle) {
         ret = -PAL_ERRNO;
-        lock(async_helper_lock);
         async_helper_thread = NULL;
-        atomic_xchg(&async_helper_state, HELPER_NOTALIVE);
-        unlock(async_helper_lock);
+        async_helper_state = HELPER_NOTALIVE;
         put_thread(new);
         return ret;
     }
 
     new->pal_handle = handle;
+
+    /* Publish new and update the state once fully initialized */
+    async_helper_thread = new;
+    async_helper_state = HELPER_ALIVE;
+    
     return 0;
 }
 
 int terminate_async_helper (void)
 {
-    if (atomic_read(&async_helper_state) != HELPER_ALIVE)
+    if (async_helper_state != HELPER_ALIVE)
         return 0;
 
     lock(async_helper_lock);
-    atomic_xchg(&async_helper_state, HELPER_NOTALIVE);
+    async_helper_state = HELPER_NOTALIVE;
     unlock(async_helper_lock);
     set_event(&async_helper_event, 1);
     return 0;

+ 1 - 1
LibOS/shim/src/shim_init.c

@@ -1074,7 +1074,7 @@ static void print_profile_result (PAL_HANDLE hdl, struct shim_profile * root,
 }
 #endif /* PROFILE */
 
-static struct shim_atomic in_terminate = { .counter = 0, };
+static struct atomic_int in_terminate = { .counter = 0, };
 
 int shim_terminate (void)
 {

+ 0 - 2
LibOS/shim/test/apps/ltp/PASSED

@@ -945,8 +945,6 @@ waitpid01,2
 waitpid02,1
 waitpid02,2
 waitpid02,3
-waitpid03,1
-waitpid03,2
 waitpid05,1
 waitpid05,2
 waitpid05,3

+ 0 - 110
Pal/include/atomic.h

@@ -1,110 +0,0 @@
-/* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
-/* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
-
-#ifndef _SHIM_ATOMIC_H_
-#define _SHIM_ATOMIC_H_
-
-#define LOCK_PREFIX     "\n\tlock; "
-
-#define ATOMIC_INIT(i)      { (i) }
-
-static inline volatile int atomic_read (const struct atomic_int * v)
-{
-    return v->counter;
-}
-
-static inline void atomic_set (struct atomic_int * v, int i)
-{
-    v->counter = i;
-}
-
-static inline void atomic_add (int i, struct atomic_int * v)
-{
-    asm volatile(LOCK_PREFIX "addl %1,%0"
-                 : "+m" (v->counter)
-                 : "ir" (i));
-}
-
-static inline void atomic_sub (int i, struct atomic_int * v)
-{
-    asm volatile(LOCK_PREFIX "subl %1,%0"
-                 : "+m" (v->counter)
-                 : "ir" (i));
-}
-
-static inline int atomic_sub_and_test (int i, struct atomic_int * v)
-{
-    unsigned char c;
-    asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
-                 : "+m" (v->counter), "=qm" (c)
-                 : "ir" (i) : "memory");
-    return c;
-}
-
-/* Return 0 if the value drops below zero, 1 if >= 0 */
-static inline int atomic_sub_and_test_nonnegative (int i, struct atomic_int * v)
-{
-    unsigned char c;
-    asm volatile(LOCK_PREFIX "subl %2,%0; setns %1"
-                 : "+m" (v->counter), "=qm" (c)
-                 : "ir" (i) : "memory");
-    return c;
-}
-
-static inline void atomic_inc (struct atomic_int * v)
-{
-    asm volatile(LOCK_PREFIX "incl %0"
-                 : "+m" (v->counter));
-}
-
-static inline int atomic_inc_and_test (struct atomic_int * v)
-{
-    unsigned char c;
-    asm volatile(LOCK_PREFIX "incl %0; sete %1"
-                 : "+m" (v->counter), "=qm" (c)
-                 : : "memory");
-    return c != 0;
-}
-
-static inline void atomic_dec (struct atomic_int * v)
-{
-    asm volatile(LOCK_PREFIX "decl %0"
-                 : "+m" (v->counter));
-}
-
-static inline int atomic_dec_and_test (struct atomic_int * v)
-{
-    unsigned char c;
-    asm volatile(LOCK_PREFIX "decl %0; sete %1"
-                 : "+m" (v->counter), "=qm" (c)
-                 : : "memory");
-    return c != 0;
-}
-
-/* Return 0 if the value drops below zero, 1 if >= 0 */
-static inline int atomic_dec_and_test_nonnegative (struct atomic_int * v)
-{
-    unsigned char c;
-    asm volatile(LOCK_PREFIX "decl %0; setns %1"
-                 : "+m" (v->counter), "=qm" (c)
-                 : : "memory");
-    return c;
-}
-
-#ifndef __i386__
-# include "cmpxchg_64.h"
-#else
-# include "cmpxchg_32.h"
-#endif
-
-static inline int atomic_cmpxchg (struct atomic_int * v, int old, int new)
-{
-    return cmpxchg((&v->counter), old, new);
-}
-
-static inline int atomic_xchg (struct atomic_int * v, int new)
-{
-    return xchg((&v->counter), new);
-}
-
-#endif /* _ATOMIC_INT_H_ */

+ 0 - 5
Pal/include/cmpxchg.h

@@ -1,5 +0,0 @@
-#ifdef CONFIG_X86_32
-# include "cmpxchg_32.h"
-#else
-# include "cmpxchg_64.h"
-#endif

+ 0 - 275
Pal/include/cmpxchg_32.h

@@ -1,275 +0,0 @@
-#ifndef _ASM_X86_CMPXCHG_32_H
-#define _ASM_X86_CMPXCHG_32_H
-
-#define LOCK_PREFIX "\n\tlock; "
-/*
- * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you
- *       you need to test for the feature in boot_cpu_data.
- */
-
-extern void __xchg_wrong_size(void);
-
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
- *	  but generally the primitive is invalid, *ptr is output argument. --ANK
- */
-
-struct __xchg_dummy {
-	unsigned long a[100];
-};
-#define __xg(x) ((struct __xchg_dummy *)(x))
-
-#define __xchg(x, ptr, size)						\
-({									\
-	__typeof(*(ptr)) __x = (x);					\
-	switch (size) {							\
-	case 1:								\
-	  asm volatile("lock; xchgb %b0,%1"				\
-			     : "=q" (__x), "+m" (*__xg(ptr))		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	case 2:								\
-	  asm volatile("lock; xchgw %w0,%1"				\
-			     : "=r" (__x), "+m" (*__xg(ptr))		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	case 4:								\
-	  asm volatile("lock; xchgl %0,%1"				\
-			     : "=r" (__x), "+m" (*__xg(ptr))		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	default:							\
-		__xchg_wrong_size();					\
-	}								\
-	__x;								\
-})
-
-#define xchg(ptr, v)							\
-	__xchg((v), (ptr), sizeof(*ptr))
-
-/*
- * CMPXCHG8B only writes to the target if we had the previous
- * value in registers, otherwise it acts as a read and gives us the
- * "new previous" value.  That is why there is a loop.  Preloading
- * EDX:EAX is a performance optimization: in the common case it means
- * we need only one locked operation.
- *
- * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
- * least an FPU save and/or %cr0.ts manipulation.
- *
- * cmpxchg8b must be used with the lock prefix here to allow the
- * instruction to be executed atomically.  We need to have the reader
- * side to see the coherent 64bit value.
- */
-static inline void set_64bit(volatile u64 *ptr, u64 value)
-{
-	u32 low  = value;
-	u32 high = value >> 32;
-	u64 prev = *ptr;
-
-	asm volatile("\n1:\t"
-		     LOCK_PREFIX "cmpxchg8b %0\n\t"
-		     "jnz 1b"
-		     : "=m" (*ptr), "+A" (prev)
-		     : "b" (low), "c" (high)
-		     : "memory");
-}
-
-extern void __cmpxchg_wrong_size(void);
-
-/*
- * Atomic compare and exchange.  Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.  Success is
- * indicated by comparing RETURN with OLD.
- */
-#define __raw_cmpxchg(ptr, old, new, size, lock)			\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	__typeof__(*(ptr)) __old = (old);				\
-	__typeof__(*(ptr)) __new = (new);				\
-	switch (size) {							\
-	case 1:								\
-		asm volatile(lock "cmpxchgb %b2,%1"			\
-			     : "=a" (__ret), "+m" (*__xg(ptr))		\
-			     : "q" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	case 2:								\
-		asm volatile(lock "cmpxchgw %w2,%1"			\
-			     : "=a" (__ret), "+m" (*__xg(ptr))		\
-			     : "r" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	case 4:								\
-		asm volatile(lock "cmpxchgl %2,%1"			\
-			     : "=a" (__ret), "+m" (*__xg(ptr))		\
-			     : "r" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	default:							\
-		__cmpxchg_wrong_size();					\
-	}								\
-	__ret;								\
-})
-
-#define __cmpxchg(ptr, old, new, size)					\
-	__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
-
-#define __sync_cmpxchg(ptr, old, new, size)				\
-	__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
-
-#define __cmpxchg_local(ptr, old, new, size)				\
-	__raw_cmpxchg((ptr), (old), (new), (size), "")
-
-#ifdef CONFIG_X86_CMPXCHG
-#define __HAVE_ARCH_CMPXCHG 1
-
-#define cmpxchg(ptr, old, new)						\
-	__cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define sync_cmpxchg(ptr, old, new)					\
-	__sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define cmpxchg_local(ptr, old, new)					\
-	__cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-#define cmpxchg64(ptr, o, n)						\
-	((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
-					 (unsigned long long)(n)))
-#define cmpxchg64_local(ptr, o, n)					\
-	((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
-					       (unsigned long long)(n)))
-#endif
-
-static inline unsigned long long __cmpxchg64(volatile void *ptr,
-					     unsigned long long old,
-					     unsigned long long new)
-{
-	unsigned long long prev;
-	asm volatile(LOCK_PREFIX "cmpxchg8b %1"
-		     : "=A" (prev),
-		       "+m" (*__xg(ptr))
-		     : "b" ((unsigned long)new),
-		       "c" ((unsigned long)(new >> 32)),
-		       "0" (old)
-		     : "memory");
-	return prev;
-}
-
-static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
-						   unsigned long long old,
-						   unsigned long long new)
-{
-	unsigned long long prev;
-	asm volatile("cmpxchg8b %1"
-		     : "=A" (prev),
-		       "+m" (*__xg(ptr))
-		     : "b" ((unsigned long)new),
-		       "c" ((unsigned long)(new >> 32)),
-		       "0" (old)
-		     : "memory");
-	return prev;
-}
-
-#ifndef CONFIG_X86_CMPXCHG
-/*
- * Building a kernel capable running on 80386. It may be necessary to
- * simulate the cmpxchg on the 80386 CPU. For that purpose we define
- * a function for each of the sizes we support.
- */
-
-extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
-extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
-extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
-
-static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
-					unsigned long new, int size)
-{
-	switch (size) {
-	case 1:
-		return cmpxchg_386_u8(ptr, old, new);
-	case 2:
-		return cmpxchg_386_u16(ptr, old, new);
-	case 4:
-		return cmpxchg_386_u32(ptr, old, new);
-	}
-	return old;
-}
-
-#define cmpxchg(ptr, o, n)						\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	if (likely(boot_cpu_data.x86 > 3))				\
-		__ret = (__typeof__(*(ptr)))__cmpxchg((ptr),		\
-				(unsigned long)(o), (unsigned long)(n),	\
-				sizeof(*(ptr)));			\
-	else								\
-		__ret = (__typeof__(*(ptr)))cmpxchg_386((ptr),		\
-				(unsigned long)(o), (unsigned long)(n),	\
-				sizeof(*(ptr)));			\
-	__ret;								\
-})
-#define cmpxchg_local(ptr, o, n)					\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	if (likely(boot_cpu_data.x86 > 3))				\
-		__ret = (__typeof__(*(ptr)))__cmpxchg_local((ptr),	\
-				(unsigned long)(o), (unsigned long)(n),	\
-				sizeof(*(ptr)));			\
-	else								\
-		__ret = (__typeof__(*(ptr)))cmpxchg_386((ptr),		\
-				(unsigned long)(o), (unsigned long)(n),	\
-				sizeof(*(ptr)));			\
-	__ret;								\
-})
-#endif
-
-#ifndef CONFIG_X86_CMPXCHG64
-/*
- * Building a kernel capable running on 80386 and 80486. It may be necessary
- * to simulate the cmpxchg8b on the 80386 and 80486 CPU.
- */
-
-extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);
-
-#define cmpxchg64(ptr, o, n)					\
-({								\
-	__typeof__(*(ptr)) __ret;				\
-	__typeof__(*(ptr)) __old = (o);				\
-	__typeof__(*(ptr)) __new = (n);				\
-	alternative_io(LOCK_PREFIX_HERE				\
-			"call cmpxchg8b_emu",			\
-			"lock; cmpxchg8b (%%esi)" ,		\
-		       X86_FEATURE_CX8,				\
-		       "=A" (__ret),				\
-		       "S" ((ptr)), "0" (__old),		\
-		       "b" ((unsigned int)__new),		\
-		       "c" ((unsigned int)(__new>>32))		\
-		       : "memory");				\
-	__ret; })
-
-
-
-#define cmpxchg64_local(ptr, o, n)					\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	if (likely(boot_cpu_data.x86 > 4))				\
-		__ret = (__typeof__(*(ptr)))__cmpxchg64_local((ptr),	\
-				(unsigned long long)(o),		\
-				(unsigned long long)(n));		\
-	else								\
-		__ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr),	\
-				(unsigned long long)(o),		\
-				(unsigned long long)(n));		\
-	__ret;								\
-})
-
-#endif
-
-#endif /* _ASM_X86_CMPXCHG_32_H */

+ 0 - 121
Pal/include/cmpxchg_64.h

@@ -1,121 +0,0 @@
-#ifndef _ASM_X86_CMPXCHG_64_H
-#define _ASM_X86_CMPXCHG_64_H
-
-//#include <asm/alternative.h> /* Provides LOCK_PREFIX */
-
-/*
-  Including the definition of LOCK_PREFIX directly here
-*/
-#define LOCK_PREFIX "\n\tlock; "
-
-#define __xg(x) ((volatile char *)(x))
-
-/*static inline void set_64bit(volatile u64 *ptr, u64 val)
-{
-	*ptr = val;
-}*/
-
-extern void __xchg_wrong_size(void);
-extern void __cmpxchg_wrong_size(void);
-
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
- *	  but generally the primitive is invalid, *ptr is output argument. --ANK
- */
-#define __xchg(x, ptr, size)						\
-({									\
-	__typeof(*(ptr)) __x = (x);					\
-	switch (size) {							\
-	case 1:								\
-	  asm volatile("lock; xchgb %b0,%1"				\
-			     : "=q" (__x), "+m" (*__xg(ptr))		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	case 2:								\
-	  asm volatile("lock; xchgw %w0,%1"				\
-			     : "=r" (__x), "+m" (*__xg(ptr))		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	case 4:								\
-	  asm volatile("lock; xchgl %k0,%1"				\
-			     : "=r" (__x), "+m" (*__xg(ptr))		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	case 8:								\
-	  asm volatile("lock; xchgq %0,%1"				\
-			     : "=r" (__x), "+m" (*__xg(ptr))		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	default:							\
-		__xchg_wrong_size();					\
-	}								\
-	__x;								\
-})
-
-#define xchg(ptr, v)							\
-	__xchg((v), (ptr), sizeof(*ptr))
-
-#define __HAVE_ARCH_CMPXCHG 1
-
-/*
- * Atomic compare and exchange.  Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.  Success is
- * indicated by comparing RETURN with OLD.
- */
-#define __raw_cmpxchg(ptr, old, new, size)				\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	__typeof__(*(ptr)) __old = (old);				\
-	__typeof__(*(ptr)) __new = (new);				\
-	switch (size) {							\
-	case 1:								\
-	  asm volatile(LOCK_PREFIX "cmpxchgb %b2,%1"			\
-			     : "=a" (__ret), "+m" (*__xg(ptr))		\
-			     : "q" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	case 2:								\
-	  asm volatile(LOCK_PREFIX "cmpxchgw %w2,%1"			\
-			     : "=a" (__ret), "+m" (*__xg(ptr))		\
-			     : "r" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	case 4:								\
-	  asm volatile(LOCK_PREFIX "cmpxchgl %k2,%1"			\
-			     : "=a" (__ret), "+m" (*__xg(ptr))		\
-			     : "r" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	case 8:								\
-	  asm volatile(LOCK_PREFIX "cmpxchgq %2,%1"			\
-			     : "=a" (__ret), "+m" (*__xg(ptr))		\
-			     : "r" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	default:							\
-		__cmpxchg_wrong_size();					\
-	}								\
-	__ret;								\
-})
-
-#define cmpxchg(ptr, old, new)						\
-	__raw_cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define cmpxchg64(ptr, o, n)						\
-({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg((ptr), (o), (n));					\
-})
-
-#define cmpxchg64_local(ptr, o, n)					\
-({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_local((ptr), (o), (n));					\
-})
-
-#endif /* _ASM_X86_CMPXCHG_64_H */

+ 179 - 0
Pal/lib/atomic.h

@@ -0,0 +1,179 @@
+/* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
+/* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
+
+#ifndef _SHIM_ATOMIC_H_
+#define _SHIM_ATOMIC_H_
+
+/* Copyright (C) 2014 OSCAR lab, Stony Brook University
+ * Copyright (C) 2017 Fortanix Inc, and University of North Carolina 
+ * at Chapel Hill.
+ *
+ * This file defines atomic operations (And barriers) for use in 
+ * Graphene.
+ * 
+ * The atomic operation assembly code is taken from musl libc, which 
+ * is subject to the MIT license.
+ * 
+ * At this point, we primarily focus on x86_64; there are some vestigial
+ * 32-bit definitions here, but a more portable version would need to 
+ * move and reimplement portions of this for 32-bit x86 (or other architectures).
+ */
+
+/*
+/----------------------------------------------------------------------
+Copyright (C) 2005-2014 Rich Felker, et al.
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+    ----------------------------------------------------------------------
+*/
+
+/* Optimization barrier */
+#define barrier()    __asm__ __volatile__("": : :"memory")
+# define cpu_relax() __asm__ __volatile__("rep; nop" ::: "memory");
+
+#ifdef __i386__
+# define rmb()      __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
+
+struct atomic_int {
+    volatile int32_t counter;
+}
+#ifdef __GNUC__
+__attribute__((aligned(sizeof(uint32_t))))
+#endif
+;
+#endif
+
+
+/* The return types below effectively assume we are dealing with a 64-bit
+ * signed value. 
+ */
+#ifdef __x86_64__
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+# define mb()    __asm__ __volatile__ ("mfence" ::: "memory")
+# define rmb()   __asm__ __volatile__ ("lfence" ::: "memory")
+# define wmb()   __asm__ __volatile__ ("sfence" ::: "memory")
+
+struct atomic_int {
+    volatile int64_t counter;
+}
+#ifdef __GNUC__
+__attribute__((aligned(sizeof(uint64_t))))
+#endif
+;
+#endif
+
+#define LOCK_PREFIX     "\n\tlock; "
+
+#define ATOMIC_INIT(i)      { (i) }
+
+/* Read the value currently stored in the atomic_int */
+static inline int64_t atomic_read (const struct atomic_int * v)
+{
+    //  Effectively:
+    //      return v->counter;
+    int64_t i;
+    /* Use inline assembly to ensure this is one instruction */
+    __asm__ __volatile__("mov %1, %0"
+                         : "=r"(i) :
+                           "m"(v->counter) : "memory");
+    return i;
+}
+
+/* Does a blind write to the atomic variable */
+static inline void atomic_set (struct atomic_int * v, int64_t i)
+{
+    //  Effectively:
+    //      v->counter = i;
+    /* Use inline assembly to ensure this is one instruction */
+    __asm__ __volatile__("mov %2, %0"
+                         : "=m"(v->counter) :
+                           "m"(v->counter), "r"(i) : "memory");
+}
+
+/* Helper function that atomically adds a value to an atomic_int,
+ * and returns the _new_ value. */
+static inline int64_t _atomic_add (int64_t i, struct atomic_int * v)
+{
+    int64_t increment = i;
+    __asm__ __volatile__(
+        "lock ; xadd %0, %1"
+        : "=r"(i), "=m"(v->counter) : "0"(i) : "memory" );
+    return i + increment;
+}
+
+/* Atomically adds i to v.  Does not return a value. */
+static inline void atomic_add (int64_t i, struct atomic_int * v)
+{
+    _atomic_add(i, v);
+}
+
+/* Atomically substracts i from v.  Does not return a value. */
+static inline void atomic_sub (int64_t i, struct atomic_int * v)
+{
+    _atomic_add(-i, v);
+}
+
+/* Atomically adds 1 to v.  Does not return a value. */
+static inline void atomic_inc (struct atomic_int * v)
+{
+    __asm__ __volatile__(
+        "lock ; incl %0"
+        : "=m"(v->counter) : "m"(v->counter) : "memory" );
+}
+
+/* Atomically substracts 1 from v.  Does not return a value. */
+static inline void atomic_dec (struct atomic_int * v)
+{
+    __asm__ __volatile__(
+        "lock ; decl %0"
+        : "=m"(v->counter) : "m"(v->counter) : "memory" );
+}
+
+/* Atomically substracts 1 from v.  Returns 1 if this causes the 
+   value to reach 0; returns 0 otherwise. */
+static inline int64_t atomic_dec_and_test (struct atomic_int * v)
+{
+    int64_t i = _atomic_add(-1, v);
+    return i == 0;
+}
+ 
+/* Helper function to atomically compare-and-swap the value pointed to by p.
+ * t is the old value, s is the new value.  Returns 
+ * the value originally in p. */
+static inline int64_t cmpxchg(volatile int64_t *p, int64_t t, int64_t s)
+{
+    __asm__ __volatile__ (
+        "lock ; cmpxchg %3, %1"
+        : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
+    return t;
+}
+
+/* Helper function to atomically compare-and-swap the value in v.
+ * If v == old, it sets v = new.
+ * Returns the value originally in v. */
+static inline int64_t atomic_cmpxchg (struct atomic_int * v, int64_t old, int64_t new)
+{
+    return cmpxchg(&v->counter, old, new);
+}
+
+#endif /* _ATOMIC_INT_H_ */

+ 0 - 1
Pal/lib/crypto/adapters/mbedtls_dh.c

@@ -107,4 +107,3 @@ void lib_DhFinal(LIB_DH_CONTEXT *context)
     /* This call zeros out context for us. */
     mbedtls_dhm_free(context);
 }
-

+ 1 - 1
Pal/lib/crypto/mbedtls/mbedtls/config.h

@@ -1,5 +1,5 @@
 /* Copyright (C) 2017 Fortanix, Inc.
-   
+
    This file is part of Graphene Library OS.
 
    Graphene Library OS is free software: you can redistribute it and/or

+ 2 - 2
Pal/lib/graphene/config.c

@@ -177,8 +177,8 @@ int get_config_entries_size (struct config_store * store, const char * key)
 }
 
 static int __del_config (struct config_store * store,
-                         LISTP_TYPE(config) * root, struct config * p, 
-                         const char * key)
+                         LISTP_TYPE(config) * root,
+                         struct config * p, const char * key)
 {
     struct config * e, * found = NULL;
     int len = 0;

+ 25 - 0
Pal/regression/00_Atomics.py

@@ -0,0 +1,25 @@
+#!/usr/bin/python
+
+import os, sys, mmap
+from regression import Regression
+
+loader = os.environ['PAL_LOADER']
+is_sgx = 'SGX_RUN' in os.environ and os.environ['SGX_RUN'] == '1'
+success = True
+
+def manifest_file(file):
+    if is_sgx:
+        return file + '.manifest.sgx'
+    else:
+        return file + '.manifest'
+
+# Running AtomicMath
+regression = Regression(loader, "AtomicMath")
+
+regression.add_check(name="Atomic Math",
+    check=lambda res: "Subtract INT_MIN: Both values match 2147483648" in res[0].log and \
+                     "Subtract INT_MAX: Both values match -2147483647" in res[0].log and \
+                     "Subtract LLONG_MIN: Both values match -9223372036854775808" in res[0].log and \
+                     "Subtract LLONG_MAX: Both values match -9223372036854775807" in res[0].log)
+
+regression.run_checks()

+ 2 - 2
Pal/regression/00_Symbols.py

@@ -37,8 +37,8 @@ all_symbols = [
     'DkThreadResume',
     'DkSetExceptionHandler',
     'DkExceptionReturn',
-    'DkSemaphoreCreate',
-    'DkSemaphoreRelease',
+    'DkMutexCreate',
+    'DkMutexRelease',
     'DkNotificationEventCreate',
     'DkSynchronizationEventCreate',
     'DkEventSet',

+ 2 - 8
Pal/regression/02_Semaphore.py

@@ -10,17 +10,11 @@ regression = Regression(loader, "Semaphore")
 
 regression.add_check(name="Semaphore: Timeout on Locked Semaphores",
     check=lambda res: "Locked binary semaphore timed out (1000)." in res[0].log and
-                      "Locked non-binary semaphore timed out (1000)." in res[0].log and
-                      "Two locked semaphores timed out (1000)." in res[0].log and
-                      "Locked binary semaphore timed out (0)." in res[0].log and
-                      "Locked non-binary semaphore timed out (0)." in res[0].log and
-                      "Two locked semaphores timed out (0)." in res[0].log)
+                      "Locked binary semaphore timed out (0)." in res[0].log)
 
 regression.add_check(name="Semaphore: Acquire Unlocked Semaphores",
     check=lambda res: "Locked binary semaphore successfully (-1)." in res[0].log and
-                      "Locked non-binary semaphore successfully (-1)." in res[0].log and
-                      "Locked binary semaphore successfully (0)." in res[0].log and
-                      "Locked non-binary semaphore successfully (0)." in res[0].log)
+                      "Locked binary semaphore successfully (0)." in res[0].log)
 
 rv = regression.run_checks()
 if rv: sys.exit(rv)

+ 66 - 0
Pal/regression/AtomicMath.c

@@ -0,0 +1,66 @@
+/* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
+/* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
+
+#include "pal.h"
+#include "pal_debug.h"
+#include <atomic.h>
+
+#include <string.h>
+#include <limits.h>
+
+int main (int argc, char ** argv, char ** envp)
+{
+  int64_t my_int = 0;
+  struct atomic_int a_int;
+  atomic_set(&a_int, 0);
+  
+  /* Check that INT_MIN and INT_MAX wrap around consistently 
+   * with atomic values.
+   * 
+   * Check atomic_sub specifically.
+   */
+  my_int -= INT_MIN;
+  atomic_sub(INT_MIN, &a_int);
+
+  if (my_int == atomic_read(&a_int))
+    pal_printf("Subtract INT_MIN: Both values match %lld\n", my_int);
+  else
+    pal_printf("Subtract INT_MIN: Values do not match %lld, %lld\n", my_int, atomic_read(&a_int));
+
+  atomic_set(&a_int, 0);
+  my_int = 0;
+
+  my_int -= INT_MAX;
+  atomic_sub(INT_MAX, &a_int);
+
+  if (my_int == atomic_read(&a_int))
+    pal_printf("Subtract INT_MAX: Both values match %lld\n", my_int);
+  else
+    pal_printf("Subtract INT_MAX: Values do not match %lld, %lld\n", my_int, atomic_read(&a_int));
+  
+  /* Check that 64-bit signed values also wrap properly. */
+  atomic_set(&a_int, 0);
+  my_int = 0;
+  
+  my_int -= LLONG_MIN;
+  atomic_sub(LLONG_MIN, &a_int);
+
+  if (my_int == atomic_read(&a_int))
+    pal_printf("Subtract LLONG_MIN: Both values match %lld\n", my_int);
+  else
+    pal_printf("Subtract LLONG_MIN: Values do not match %lld, %lld\n", my_int, atomic_read(&a_int));
+
+  atomic_set(&a_int, 0);
+  my_int = 0;
+
+  my_int -= LLONG_MAX;
+  atomic_sub(LLONG_MAX, &a_int);
+
+  if (my_int == atomic_read(&a_int))
+    pal_printf("Subtract LLONG_MAX: Both values match %lld\n", my_int);
+  else
+    pal_printf("Subtract LLONG_MAX: Values do not match %lld, %lld\n", my_int, atomic_read(&a_int));
+
+  
+  return 0;
+}

+ 2 - 43
Pal/regression/Semaphore.c

@@ -8,7 +8,7 @@
 void helper_timeout(PAL_NUM timeout) {
     /* Create a binary semaphore */
 
-    PAL_HANDLE sem1 = DkSemaphoreCreate(1, 1);
+    PAL_HANDLE sem1 = DkMutexCreate(1);
 
     if(!sem1) {
         pal_printf("Failed to create a binary semaphore\n");
@@ -22,39 +22,13 @@ void helper_timeout(PAL_NUM timeout) {
     else 
         pal_printf("Acquired locked binary semaphore!?! Got back %p; sem1 is %p (%d)\n", rv, sem1, timeout);
     
-    PAL_HANDLE sem2 = DkSemaphoreCreate(2, 2);
-
-    if(!sem2) {
-        pal_printf("Failed to create a non-binary semaphore\n");
-        return;
-    }
-
-    /* Wait on the non-binary semaphore with a timeout */
-    rv = DkObjectsWaitAny(1, &sem2, timeout);
-    if (rv == NULL)
-        pal_printf("Locked non-binary semaphore timed out (%d).\n", timeout);
-    else 
-        pal_printf("Acquired locked non-binary semaphore!?! Got back %p; sem2 is %p (%d)\n", rv, sem2, timeout);
-
-    /* Try waiting on both */
-    PAL_HANDLE hdls[2];
-    hdls[0] = sem1;
-    hdls[1] = sem2;
-
-    rv = DkObjectsWaitAny(2, hdls, timeout);
-    if (rv == NULL)
-        pal_printf("Two locked semaphores timed out (%d).\n", timeout);
-    else 
-        pal_printf("Somehow locked one of two locked semaphore handles? %p (%d)\n", rv, timeout);
-
     DkObjectClose(sem1);
-    DkObjectClose(sem2);
 }
 
 void helper_success(PAL_NUM timeout) {
     /* Create a binary semaphore */
 
-    PAL_HANDLE sem1 = DkSemaphoreCreate(0, 1);
+    PAL_HANDLE sem1 = DkMutexCreate(0);
 
     if(!sem1) {
         pal_printf("Failed to create a binary semaphore\n");
@@ -68,22 +42,7 @@ void helper_success(PAL_NUM timeout) {
     else 
         pal_printf("Failed to lock binary semaphore: Got back %p; sem1 is %p\n", rv, sem1);
     
-    PAL_HANDLE sem2 = DkSemaphoreCreate(0, 2);
-
-    if(!sem2) {
-        pal_printf("Failed to create a non-binary semaphore\n");
-        return;
-    }
-
-    /* Wait on the non-binary semaphore with a timeout */
-    rv = DkObjectsWaitAny(1, &sem2, timeout);
-    if (rv == sem2)
-        pal_printf("Locked non-binary semaphore successfully (%d).\n", timeout);
-    else 
-        pal_printf("Failed to lock non-binary semaphore Got back %p; sem2 is %p\n", rv, sem2);
-
     DkObjectClose(sem1);
-    DkObjectClose(sem2);
 }
 
 

+ 2 - 2
Pal/regression/Symbols.c

@@ -48,8 +48,8 @@ int main (int argc, char ** argv, char ** envp)
     print_symbol(DkSetExceptionHandler);
     print_symbol(DkExceptionReturn);
 
-    print_symbol(DkSemaphoreCreate);
-    print_symbol(DkSemaphoreRelease);
+    print_symbol(DkMutexCreate);
+    print_symbol(DkMutexRelease);
     print_symbol(DkNotificationEventCreate);
     print_symbol(DkSynchronizationEventCreate);
     print_symbol(DkEventSet);

+ 1 - 1
Pal/src/Makefile

@@ -33,7 +33,7 @@ files_to_build = $(pal_lib) $(pal_lib_post) $(pal_static) \
 
 defs	= -DIN_PAL -DHOST_TYPE="$(PAL_HOST)" -D$(PAL_HOST_MACRO) -DPAL_DIR=$(PAL_DIR) \
 	  -DRUNTIME_DIR=$(RUNTIME_DIR)
-objs	= $(addprefix db_,streams memory threading semaphore events process \
+objs	= $(addprefix db_,streams memory threading mutex events process \
 	    object main misc ipc exception rtld) slab printf
 graphene_lib = .lib/graphene-lib.a
 host_lib = host/$(PAL_HOST)/libpal-$(PAL_HOST).a

+ 21 - 19
Pal/src/db_semaphore.c → Pal/src/db_mutex.c

@@ -18,9 +18,9 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
 /*
- * db_semaphore.c
+ * db_mutex.c
  *
- * This file contains APIs that provides operations of semaphores.
+ * This file contains APIs that provides operations of mutexes.
  */
 
 #include "pal_defs.h"
@@ -30,13 +30,14 @@
 #include "api.h"
 
 PAL_HANDLE
-DkSemaphoreCreate (PAL_NUM initialCount, PAL_NUM maxCount)
+DkMutexCreate (PAL_NUM initialCount)
 {
-    ENTER_PAL_CALL(DkSemaphoreCreate);
+    ENTER_PAL_CALL(DkMutexCreate);
 
-    PAL_HANDLE handle = (PAL_HANDLE) malloc(HANDLE_SIZE(semaphore));
-
-    int ret = _DkSemaphoreCreate(handle, initialCount, maxCount);
+    PAL_HANDLE handle = (PAL_HANDLE) malloc(HANDLE_SIZE(mutex));
+    SET_HANDLE_TYPE(handle, mutex);
+    
+    int ret = _DkMutexCreate(handle, initialCount);
 
     if (ret < 0) {
         free(handle);
@@ -48,38 +49,39 @@ DkSemaphoreCreate (PAL_NUM initialCount, PAL_NUM maxCount)
 }
 
 void
-DkSemaphoreDestroy (PAL_HANDLE semaphoreHandle)
+DkMutexDestroy (PAL_HANDLE handle)
 {
-    ENTER_PAL_CALL(DkSemaphoreDestroy);
+    ENTER_PAL_CALL(DkMutexDestroy);
 
-    if (!semaphoreHandle) {
+    if (!handle) {
         _DkRaiseFailure(PAL_ERROR_INVAL);
         LEAVE_PAL_CALL();
     }
 
-    _DkSemaphoreDestroy(semaphoreHandle);
+    _DkMutexDestroy(handle);
+    free(handle);
     LEAVE_PAL_CALL();
 }
 
-void DkSemaphoreRelease (PAL_HANDLE handle, PAL_NUM count)
+void DkMutexRelease (PAL_HANDLE handle)
 {
-    ENTER_PAL_CALL(DkSemaphoreRelease);
+    ENTER_PAL_CALL(DkMutexRelease);
 
     if (!handle ||
-        !IS_HANDLE_TYPE(handle, semaphore)) {
+        !IS_HANDLE_TYPE(handle, mutex)) {
         _DkRaiseFailure(PAL_ERROR_INVAL);
         LEAVE_PAL_CALL();
     }
 
-    _DkSemaphoreRelease (handle, count);
+    _DkMutexRelease (handle);
     LEAVE_PAL_CALL();
 }
 
-static int sem_wait (PAL_HANDLE handle, uint64_t timeout)
+static int mutex_wait (PAL_HANDLE handle, uint64_t timeout)
 {
-    return _DkSemaphoreAcquireTimeout(handle, 1, timeout);
+    return _DkMutexAcquireTimeout(handle, timeout);
 }
 
-struct handle_ops sem_ops = {
-        .wait               = &sem_wait,
+struct handle_ops mutex_ops = {
+        .wait               = &mutex_wait,
     };

+ 1 - 1
Pal/src/db_object.c

@@ -57,7 +57,7 @@ int _DkObjectClose (PAL_HANDLE objectHandle)
     if (!objectHandle || UNKNOWN_HANDLE(objectHandle))
         return -PAL_ERROR_INVAL;
 
-    if (atomic_dec_and_test_nonnegative(&HANDLE_HDR(objectHandle)->ref))
+    if (!atomic_dec_and_test(&HANDLE_HDR(objectHandle)->ref))
         return 0;
 
     const struct handle_ops * ops = HANDLE_OPS(objectHandle);

+ 2 - 2
Pal/src/db_streams.c

@@ -45,7 +45,7 @@ extern struct handle_ops udpsrv_ops;
 extern struct hadnle_ops udppacket_ops;
 extern struct handle_ops thread_ops;
 extern struct handle_ops proc_ops;
-extern struct handle_ops sem_ops;
+extern struct handle_ops mutex_ops;
 extern struct handle_ops event_ops;
 extern struct handle_ops gipc_ops;
 extern struct handle_ops mcast_ops;
@@ -65,7 +65,7 @@ const struct handle_ops * pal_handle_ops [PAL_HANDLE_TYPE_BOUND] = {
             [pal_type_process]   = &proc_ops,
             [pal_type_mcast]     = &mcast_ops,
             [pal_type_thread]    = &thread_ops,
-            [pal_type_semaphore] = &sem_ops,
+            [pal_type_mutex]     = &mutex_ops,
             [pal_type_event]     = &event_ops,
             [pal_type_gipc]      = &gipc_ops,
         };

+ 1 - 1
Pal/src/host/FreeBSD/pal_host.h

@@ -60,8 +60,8 @@ typedef union pal_handle
 
     struct {
         PAL_IDX type;
-        PAL_REF ref;
         PAL_FLG flags;
+        PAL_REF ref;
         PAL_IDX fds[];
     } __in;
 

+ 1 - 1
Pal/src/host/Linux-SGX/Makefile

@@ -7,7 +7,7 @@ host_files = libpal-Linux-SGX.a pal-sgx debugger/sgx_gdb.so
 
 defs	= -DIN_PAL -DPAL_DIR=$(PAL_DIR) -DRUNTIME_DIR=$(RUNTIME_DIR)
 enclave-objs = $(addprefix db_,files devices pipes sockets streams memory \
-		 threading semaphore mutex events process object main rtld \
+		 threading mutex events process object main rtld \
 		 exception misc ipc spinlock) \
 	       $(addprefix enclave_,ocalls ecalls framework pages untrusted) 
 enclave-asm-objs = enclave_entry 

+ 1 - 0
Pal/src/host/Linux-SGX/db_devices.c

@@ -220,6 +220,7 @@ static int dev_open (PAL_HANDLE * handle, const char * type, const char * uri,
             return -PAL_ERROR_NOTSUPPORT;
 
     PAL_HANDLE hdl = malloc(HANDLE_SIZE(dev));
+    SET_HANDLE_TYPE(hdl, dev);
     hdl->dev.fd_in  = PAL_IDX_POISON;
     hdl->dev.fd_out = PAL_IDX_POISON;
     *handle = hdl;

+ 2 - 2
Pal/src/host/Linux-SGX/db_files.c

@@ -316,7 +316,7 @@ static int file_attrquery (const char * type, const char * uri,
 static int file_attrquerybyhdl (PAL_HANDLE handle,
                                 PAL_STREAM_ATTR * attr)
 {
-    int fd = HANDLE_HDR(handle)->fds[0];
+    int fd = handle->file.fd;
     struct stat stat_buf;
 
     int ret = ocall_fstat(fd, &stat_buf);
@@ -330,7 +330,7 @@ static int file_attrquerybyhdl (PAL_HANDLE handle,
 static int file_attrsetbyhdl (PAL_HANDLE handle,
                               PAL_STREAM_ATTR * attr)
 {
-    int fd = HANDLE_HDR(handle)->fds[0];
+    int fd = handle->file.fd;
     int ret = ocall_fchmod(fd, attr->share_flags | 0600);
     if (ret < 0)
         return ret;

+ 70 - 93
Pal/src/host/Linux-SGX/db_mutex.c

@@ -30,6 +30,7 @@
 #include "pal.h"
 #include "pal_internal.h"
 #include "pal_linux.h"
+#include "pal_linux_error.h"
 #include "pal_error.h"
 #include "pal_debug.h"
 #include "api.h"
@@ -38,33 +39,39 @@
 #include <limits.h>
 #include <atomic.h>
 #include <linux/time.h>
+#include <errno.h>
+#include <asm/errno.h>
 
-#ifdef __i386__
-# define barrier()       asm volatile("" ::: "memory");
-# define rmb()           asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-# define cpu_relax()     asm volatile("rep; nop" ::: "memory");
-#endif
+#define MUTEX_SPINLOCK_TIMES    100
+#define MUTEX_UNLOCKED            0
+#define MUTEX_LOCKED              1
 
-#ifdef __x86_64__
-# include <unistd.h>
-# define barrier()       asm volatile("" ::: "memory");
-# define rmb()           asm volatile("lfence" ::: "memory")
-# define cpu_relax()     asm volatile("rep; nop" ::: "memory");
-#endif
 
-#define MUTEX_SPINLOCK_TIMES    100
+int
+_DkMutexCreate (PAL_HANDLE handle, int initialCount)
+{
+    /*
+     * Allocation and free of the handle are done outside of host-specific code.
+     * This code initializes the mutex state that is host-specific,
+     * including how initialCount is encoded.
+     */
+    SET_HANDLE_TYPE(handle, mutex);
+    atomic_set(&handle->mutex.mut.nwaiters, 0);
+    handle->mutex.mut.locked = initialCount;
+    return 0;
+}
 
-int _DkMutexLockTimeout (struct mutex_handle * m, int timeout)
+void _DkMutexDestroy (PAL_HANDLE handle)
 {
-    int ret = 0;
-#ifdef DEBUG_MUTEX
-    int tid = INLINE_SYSCALL(gettid, 0);
-#endif
+    free(handle);
+}
 
-    if (timeout == -1)
-        return -_DkMutexLock(m);
 
-    if (!xchg(&m->b.locked, 1))
+int _DkMutexLockTimeout (struct mutex_handle * m, uint64_t timeout)
+{
+    int ret = 0;
+
+    if (MUTEX_UNLOCKED == cmpxchg(&m->locked, MUTEX_UNLOCKED, MUTEX_LOCKED))
         goto success;
 
     if (timeout == 0) {
@@ -72,104 +79,74 @@ int _DkMutexLockTimeout (struct mutex_handle * m, int timeout)
         goto out;
     }
 
-    unsigned long waittime = timeout;
+    // Bump up the waiters count; we are probably going to block
+    atomic_inc(&m->nwaiters);
+
+    while (MUTEX_LOCKED == cmpxchg(&m->locked, MUTEX_UNLOCKED, MUTEX_LOCKED)) {
+
+        // This is broken. The mutex is in enclave memory, the URTS can't
+        // do FUTEX_WAIT on it. This call will always fail and the next level
+        // up needs to retry.
+        ret = ocall_futex((int *) m, FUTEX_WAIT, MUTEX_LOCKED, timeout == -1 ? NULL : &timeout);
 
-    while (xchg(&m->u, 257) & 1) {
-        ret = ocall_futex((int *) m, FUTEX_WAIT, 257, timeout ? &waittime : NULL);
         if (ret < 0) {
-            if (ret == -PAL_ERROR_TRYAGAIN) {
-                xchg(&m->b.contended, 0);
+            if (-ret == EWOULDBLOCK) {
+                ret = -PAL_ERROR_TRYAGAIN;
+                atomic_dec(&m->nwaiters);
                 goto out;
             }
-#ifdef DEBUG_MUTEX
-            printf("futex failed (err = %d)\n", ERRNO(ret));
-#endif
+            ret = unix_to_pal_error(ERRNO(ret));
+            atomic_dec(&m->nwaiters);
             goto out;
         }
     }
 
+    atomic_dec(&m->nwaiters);
+
 success:
-#ifdef DEBUG_MUTEX
-    m->owner = tid;
-#endif
     ret = 0;
 out:
-#ifdef DEBUG_MUTEX
-    if (ret < 0)
-        printf("mutex failed (%s, tid = %d)\n", PAL_STRERROR(ret), tid);
-#endif
     return ret;
 }
 
 int _DkMutexLock (struct mutex_handle * m)
 {
     int ret = 0, i;
-#ifdef DEBUG_MUTEX
-    int tid = INLINE_SYSCALL(gettid, 0);
-#endif
-
-    /* Spin and try to take lock */
-    for (i = 0; i < MUTEX_SPINLOCK_TIMES; i++) {
-        if (!xchg(&m->b.locked, 1))
-            goto success;
-        cpu_relax();
-    }
-
-    // Mutex is union of u8 array and u32; this assumes a little-endian machine.
-    while (xchg(&m->u, 257) & 1) {
-        // This is broken. The mutex is in enclave memory, the URTS can't
-        // do FUTEX_WAIT on it. This call will always fail and the next level
-        // up needs to retry.
-        ret = ocall_futex((int *) m, FUTEX_WAIT, 257, NULL);
-        if (ret < 0 &&
-            ret != -PAL_ERROR_TRYAGAIN) {
-#ifdef DEBUG_MUTEX
-            printf("futex failed (err = %d)\n", ERRNO(ret));
-#endif
-            goto out;
-        }
-    }
+    return _DkMutexLockTimeout(m, -1);
+}
 
-success:
-#ifdef DEBUG_MUTEX
-    m->owner = tid;
-#endif
-    ret = 0;
-out:
-#ifdef DEBUG_MUTEX
-    if (ret < 0)
-        printf("mutex failed (%s, tid = %d)\n", PAL_STRERROR(ret), tid);
-#endif
-    return ret;
+int _DkMutexAcquireTimeout (PAL_HANDLE handle, int _timeout)
+{
+    struct mutex_handle * mut = &handle->mutex.mut;
+    return _DkMutexLockTimeout(mut, _timeout);
 }
 
 int _DkMutexUnlock (struct mutex_handle * m)
 {
-    int ret = 0, i;
-
-#ifdef DEBUG_MUTEX
-    m->owner = 0;
-#endif
-
-    /* Unlock, and if not contended then exit. */
-    if ((m->u == 1) && (cmpxchg(&m->u, 1, 0) == 1)) return 0;
-    m->b.locked = 0;
-    barrier();
+    int ret = 0;
+    int need_wake;
 
-    /* See if somebody else takes the lock */
-    for (i = 0; i < MUTEX_SPINLOCK_TIMES * 2; i++) {
-        if (m->b.locked)
-            goto success;
-        cpu_relax();
-    }
+    /* Unlock */
+    m->locked = 0;
+    /* We need to make sure the write to locked is visible to lock-ers
+     * before we read the waiter count. */
+    mb();
 
-    m->b.contended = 0;
+    need_wake= atomic_read(&m->nwaiters);
 
-    /* Nobody took it, we need to wake someone up */
-    ocall_futex((int *) m, FUTEX_WAKE, 1, NULL);
+    /* If we need to wake someone up... */
+    if (need_wake)
+        ocall_futex((int *) m, FUTEX_WAKE, 1, NULL);
 
-success:
-    ret = 0;
-out:
     return ret;
 }
+
+void _DkMutexRelease (PAL_HANDLE handle)
+{
+    struct mutex_handle * mut = &handle->mutex.mut;
+    int ret = _DkMutexUnlock(mut);
+    if (ret < 0)
+        _DkRaiseFailure(ret);
+    return;
+}
+

+ 4 - 5
Pal/src/host/Linux-SGX/db_object.c

@@ -36,7 +36,6 @@
 #include <linux/poll.h>
 #include <linux/wait.h>
 #include <atomic.h>
-#include <cmpxchg.h>
 
 #define DEFAULT_QUANTUM 500
 
@@ -63,7 +62,7 @@ static int _DkObjectWaitOne (PAL_HANDLE handle, uint64_t timeout)
                 events |= POLLOUT;
 
             if (events) {
-                fds[nfds].fd = HANDLE_HDR(handle)->fds[i];
+                fds[nfds].fd = handle->generic.fds[i];
                 fds[nfds].events = events|POLLHUP|POLLERR;
                 fds[nfds].revents = 0;
                 off[nfds] = i;
@@ -167,8 +166,8 @@ int _DkObjectsWaitAny (int count, PAL_HANDLE * handleArray, uint64_t timeout,
                 !(HANDLE_HDR(hdl)->flags & ERROR(j)))
                 events |= POLLOUT;
 
-            if (events && HANDLE_HDR(hdl)->fds[j] != PAL_IDX_POISON) {
-                fds[nfds].fd = HANDLE_HDR(hdl)->fds[j];
+            if (events && hdl->generic.fds[j] != PAL_IDX_POISON) {
+                fds[nfds].fd = hdl->generic.fds[j];
                 fds[nfds].events = events|POLLHUP|POLLERR;
                 fds[nfds].revents = 0;
                 hdls[nfds] = hdl;
@@ -205,7 +204,7 @@ int _DkObjectsWaitAny (int count, PAL_HANDLE * handleArray, uint64_t timeout,
 
         for (j = 0 ; j < MAX_FDS ; j++)
             if ((HANDLE_HDR(hdl)->flags & (RFD(j)|WFD(j))) &&
-                HANDLE_HDR(hdl)->fds[j] == fds[i].fd)
+                hdl->generic.fds[j] == fds[i].fd)
                 break;
 
         if (j == MAX_FDS)

+ 5 - 4
Pal/src/host/Linux-SGX/db_pipes.c

@@ -324,12 +324,12 @@ static int pipe_delete (PAL_HANDLE handle, int access)
 
 static int pipe_attrquerybyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
 {
-    if (HANDLE_HDR(handle)->fds[0] == PAL_IDX_POISON)
+    if (handle->generic.fds[0] == PAL_IDX_POISON)
         return -PAL_ERROR_BADHANDLE;
 
     attr->handle_type  = PAL_GET_TYPE(handle);
 
-    int read_fd = HANDLE_HDR(handle)->fds[0];
+    int read_fd = handle->generic.fds[0];
     int flags = HANDLE_HDR(handle)->flags;
 
     if (!IS_HANDLE_TYPE(handle, pipesrv)) {
@@ -350,6 +350,7 @@ static int pipe_attrquerybyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
     int ret = ocall_poll(&pfd, 1, &waittime);
     if (ret < 0)
         return ret;
+    
     attr->readable = (ret == 1 && pfd.revents == POLLIN);
 
     attr->disconnected = flags & ERROR(0);
@@ -361,7 +362,7 @@ static int pipe_attrquerybyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
 
 static int pipe_attrsetbyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
 {
-    if (HANDLE_HDR(handle)->fds[0] == PAL_IDX_POISON)
+    if (handle->generic.fds[0] == PAL_IDX_POISON)
         return -PAL_ERROR_BADHANDLE;
 
     PAL_BOL * nonblocking = (HANDLE_HDR(handle)->type == pal_type_pipeprv) ?
@@ -369,7 +370,7 @@ static int pipe_attrsetbyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
                             &handle->pipe.nonblocking;
 
     if (attr->nonblocking != *nonblocking) {
-        int ret = ocall_fsetnonblock(HANDLE_HDR(handle)->fds[0], attr->nonblocking);
+        int ret = ocall_fsetnonblock(handle->generic.fds[0], attr->nonblocking);
         if (ret < 0)
             return ret;
 

+ 0 - 255
Pal/src/host/Linux-SGX/db_semaphore.c

@@ -1,255 +0,0 @@
-/* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
-/* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
-
-/* Copyright (C) 2014 OSCAR lab, Stony Brook University
-   This file is part of Graphene Library OS.
-
-   Graphene Library OS is free software: you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation, either version 3 of the
-   License, or (at your option) any later version.
-
-   Graphene Library OS is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
-
-/*
- * db_semaphore.c
- *
- * This file contains APIs that provides operations of semaphores.
- */
-
-#include "pal_defs.h"
-#include "pal_linux_defs.h"
-#include "pal.h"
-#include "pal_internal.h"
-#include "pal_linux.h"
-#include "pal_error.h"
-#include "api.h"
-
-#include <cmpxchg.h>
-#include <atomic.h>
-#include <linux/futex.h>
-#include <limits.h>
-#include <linux/time.h>
-
-static inline int atomic_dec_if_positive (struct atomic_int *v)
-{
-    int c, old, dec;
-    c = atomic_read(v);
-    for (;;) {
-        dec = c - 1;
-        if (dec < 0)
-            break;
-        old = atomic_cmpxchg((v), c, dec);
-        if (old == c)
-            break;
-        c = old;
-    }
-    return dec;
-}
-
-int
-_DkSemaphoreCreate (PAL_HANDLE handle, int initialCount, int maxCount)
-{
-    /*
-     * 1. Allocate memory for db_sem (this includes a futex variable).
-     * 2. Pack it into a PAL_HANDLE
-     * 3. Set the semaphore object with the argument values (count, maxCount)
-     */
-
-    SET_HANDLE_TYPE(handle, semaphore);
-    atomic_set(&handle->semaphore.nwaiters, 0);
-    handle->semaphore.max_value = maxCount;
-
-    /* optimization: if maxCount == 1, we make it into mutex */
-    if (handle->semaphore.max_value == 1) {
-        handle->semaphore.value.mut.u = initialCount;
-    } else {
-        atomic_set(&handle->semaphore.value.i, maxCount - initialCount);
-    }
-
-    return 0;
-}
-
-void _DkSemaphoreDestroy (PAL_HANDLE semaphoreHandle)
-{
-    /* do nothing */
-}
-
-int _DkMutexLockTimeout (struct mutex_handle * mut, int timeout);
-
-int _DkSemaphoreAcquire (PAL_HANDLE sem, int count)
-{
-    /* optimization: use it as a mutex */
-    if (sem->semaphore.max_value == 1) {
-        struct mutex_handle * mut = &sem->semaphore.value.mut;
-        return _DkMutexLock(mut);
-    }
-
-    if (count > sem->semaphore.max_value)
-        return -PAL_ERROR_INVAL;
-
-    struct atomic_int * value = &sem->semaphore.value.i;
-    int c = 0;
-
-    if (!value)
-        return -PAL_ERROR_BADHANDLE;
-
-    if (count == 1)
-        c = atomic_dec_and_test_nonnegative (value);
-    else
-        c = atomic_sub_and_test_nonnegative (count, value);
-
-    if (c)
-        return 0;
-
-    /* We didn't get the lock.  Bump the count back up. */
-    if (count == 1)
-        atomic_inc (value);
-    else
-        atomic_add (count, value);
-
-    int ret = 0;
-    atomic_inc (&sem->semaphore.nwaiters);
-
-    while (1) {
-        ret = ocall_futex((int *) &value->counter, FUTEX_WAIT, 0, NULL);
-
-        if (ret < 0) {
-            if (ret == -PAL_ERROR_TRYAGAIN)
-                ret = 0;
-            else
-                break;
-        }
-
-        if (count == 1)
-            c = atomic_dec_and_test_nonnegative (value);
-        else
-            c = atomic_sub_and_test_nonnegative (count, value);
-
-        if (c)
-            break;
-
-        /* We didn't get the lock.  Bump the count back up. */
-        if (count == 1)
-            atomic_inc (value);
-        else
-            atomic_add (count, value);
-    }
-
-    atomic_dec (&sem->semaphore.nwaiters);
-    return ret;
-}
-
-int _DkSemaphoreAcquireTimeout (PAL_HANDLE sem, int count, uint64_t timeout)
-{
-    /* Pass it up to the no-timeout version if no timeout requested */
-    if (timeout == -1)
-        return _DkSemaphoreAcquire(sem, count);
-
-    /* optimization: use it as a mutex */
-    if (sem->semaphore.max_value == 1) {
-        struct mutex_handle * mut = & sem->semaphore.value.mut;
-        return _DkMutexLockTimeout(mut, timeout);
-    }
-
-    if (count > sem->semaphore.max_value)
-        return -PAL_ERROR_INVAL;
-
-    struct atomic_int * value = &sem->semaphore.value.i;
-    int c = 0;
-
-    if (!value)
-        return -PAL_ERROR_BADHANDLE;
-
-    if (count == 1)
-        c = atomic_dec_and_test_nonnegative (value);
-    else
-        c = atomic_sub_and_test_nonnegative (count, value);
-
-    if (c)
-        return 0;
-
-    /* We didn't get the lock.  Bump the count back up. */
-    if (count == 1)
-        atomic_inc ((struct atomic_int *) value);
-    else
-        atomic_add (count, (struct atomic_int *) value);
-
-    if (!timeout)
-        return -PAL_ERROR_TRYAGAIN;
-
-    unsigned long waittime = timeout;
-    int ret = 0;
-
-    atomic_inc (&sem->semaphore.nwaiters);
-
-    while (1) {
-        ret = ocall_futex((int *) &value->counter, FUTEX_WAIT, 0, timeout >= 0 ? &waittime : NULL);
-
-        if (ret < 0) {
-            if (ret == -PAL_ERROR_TRYAGAIN)
-                ret = 0;
-            else
-                break;
-        }
-
-        if (count == 1)
-            c = atomic_dec_and_test_nonnegative (value);
-        else
-            c = atomic_sub_and_test_nonnegative (count, value);
-
-        if (c)
-            break;
-    }
-
-    /* We didn't get the lock.  Bump the count back up. */
-    if (count == 1)
-        atomic_inc (value);
-    else
-        atomic_add (count, value);
-
-    atomic_dec (&sem->semaphore.nwaiters);
-    return ret;
-}
-
-void _DkSemaphoreRelease (PAL_HANDLE sem, int count)
-{
-    /* optimization: use it as a mutex */
-    if (sem->semaphore.max_value == 1) {
-        struct mutex_handle * mut =
-            &sem->semaphore.value.mut;
-
-        int ret = _DkMutexUnlock(mut);
-        if (ret < 0)
-            _DkRaiseFailure(ret);
-        return;
-    }
-
-    struct atomic_int * value = &sem->semaphore.value.i;
-
-    if (count == 1)
-        atomic_inc (value);
-    else
-        atomic_add (count, value);
-
-    int nwaiters = atomic_read (&sem->semaphore.nwaiters);
-    if (nwaiters > 0)
-        ocall_futex((int *) &value->counter, FUTEX_WAKE, nwaiters, NULL);
-}
-
-int _DkSemaphoreGetCurrentCount (PAL_HANDLE sem)
-{
-    if (sem->semaphore.max_value == 1) {
-        struct mutex_handle * m = &sem->semaphore.value.mut;
-        return m->b.locked;
-    }
-
-    int c = atomic_read(&sem->semaphore.value.i);
-    return sem->semaphore.max_value - c;
-}

+ 2 - 2
Pal/src/host/Linux-SGX/db_sockets.c

@@ -239,8 +239,8 @@ PAL_HANDLE socket_create_handle (int type, int fd, int options,
     if (!hdl)
         return NULL;
 
-    memset(hdl, 0, sizeof(union pal_handle));
-    HANDLE_TYPE(hdl) = type;
+    memset(hdl, 0, sizeof(struct pal_handle));
+    init_handle_hdr(HANDLE_HDR(hdl), type);
     HANDLE_HDR(hdl)->flags |= RFD(0)|(type != pal_type_tcpsrv ? WFD(0) : 0);
     hdl->sock.fd = fd;
     void * addr = (void *) hdl + HANDLE_SIZE(sock);

+ 2 - 2
Pal/src/host/Linux-SGX/db_streams.c

@@ -292,7 +292,7 @@ int _DkSendHandle (PAL_HANDLE hdl, PAL_HANDLE cargo)
     for (int i = 0 ; i < MAX_FDS ; i++)
         if (HANDLE_HDR(cargo)->flags & (RFD(i)|WFD(1))) {
             hdl_hdr.fds |= 1U << i;
-            fds[nfds++] = HANDLE_HDR(cargo)->fds[i];
+            fds[nfds++] = cargo->generic.fds[i];
         }
 
     // ~ Initialize common parameter formessage passing
@@ -365,7 +365,7 @@ int _DkReceiveHandle(PAL_HANDLE hdl, PAL_HANDLE * cargo)
     for (int i = 0 ; i < MAX_FDS ; i++)
         if (hdl_hdr.fds & (1U << i)) {
             if (n < nfds) {
-                HANDLE_HDR(handle)->fds[i] = fds[n++];
+                handle->generic.fds[i] = fds[n++];
             } else {
                 HANDLE_HDR(handle)->flags &= ~(RFD(i)|WFD(i));
             }

+ 1 - 1
Pal/src/host/Linux-SGX/enclave_framework.c

@@ -127,7 +127,7 @@ int load_trusted_file (PAL_HANDLE file, sgx_stub_t ** stubptr,
     struct trusted_file * tf = NULL, * tmp;
     char uri[URI_MAX];
     char normpath[URI_MAX];
-    int ret, fd = HANDLE_HDR(file)->fds[0], uri_len, len;
+    int ret, fd = file->file.fd, uri_len, len;
 
     if (!(HANDLE_HDR(file)->flags & RFD(0))) 
         return -PAL_ERROR_DENIED;

+ 2 - 2
Pal/src/host/Linux-SGX/pal.map

@@ -5,9 +5,9 @@ PAL {
         DkThreadCreate; DkThreadDelayExecution;
         DkThreadYieldExecution; DkThreadExit; DkThreadResume;
 
-        DkSemaphoreCreate; DkNotificationEventCreate;
+        DkMutexCreate; DkNotificationEventCreate;
         DkSynchronizationEventCreate;
-        DkSemaphoreRelease;
+        DkMutexRelease;
         DkEventSet;  DkEventClear;
         DkObjectsWaitAny;
 

+ 109 - 119
Pal/src/host/Linux-SGX/pal_host.h

@@ -49,22 +49,25 @@ void free_untrusted (void * mem);
 
 #include <list.h>
 
-/* internal Mutex design, the structure has to align at integer boundary
-   because it is required by futex call. If DEBUG_MUTEX is defined,
-   mutex_handle will record the owner of mutex locking. */
+/* Simpler mutex design: a single variable that tracks whether the mutex
+ * is locked (just waste a 64 bit word for now).  State is 1 (locked) or
+ * 0 (unlocked).
+ *
+ * Keep a count of how many threads are waiting on the mutex.
+ *
+ * If DEBUG_MUTEX is defined, mutex_handle will record the owner of
+ * mutex locking. */
 struct mutex_handle {
-    union {
-        unsigned int u;
-        struct {
-            unsigned char locked;
-            unsigned char contended;
-        } b;
-    };
+    volatile int64_t locked;
+    struct atomic_int nwaiters;
+#ifdef DEBUG_MUTEX
+    int owner;
+#endif
 };
 
 /* Initializer of Mutexes */
 #define MUTEX_HANDLE_INIT    { .u = 0 }
-#define INIT_MUTEX_HANDLE(m)  do { m->u = 0; } while (0)
+#define INIT_MUTEX_HANDLE(m)  do { (m)->u = 0; } while (0)
 
 DEFINE_LIST(pal_handle_thread);
 struct pal_handle_thread {
@@ -75,117 +78,104 @@ struct pal_handle_thread {
     void * param;
 };
 
-typedef union pal_handle
+typedef struct pal_handle
 {
-    /* TSAI: Here we define the internal types of PAL_HANDLE
-     * in PAL design, user has not to access the content inside the
-     * handle, also there is no need to allocate the internal
-     * handles, so we hide the type name of these handles on purpose.
+    /*
+     * Here we define the internal structure of PAL_HANDLE.
+     * user has no access to the content inside these handles.
      */
 
-    struct {
-        PAL_IDX type;
-        PAL_REF ref;
-        PAL_FLG flags;
-        PAL_IDX fds[];
-    } hdr;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd;
-        PAL_BOL append;
-        PAL_BOL pass;
-        PAL_STR realpath;
-        PAL_NUM total;
-        PAL_PTR stubs;
-    } file;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd;
-        PAL_NUM pipeid;
-        PAL_BOL nonblocking;
-    } pipe;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fds[2];
-        PAL_BOL nonblocking;
-    } pipeprv;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd_in, fd_out;
-        PAL_IDX dev_type;
-        PAL_BOL destroy;
-        PAL_STR realpath;
-    } dev;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd;
-        PAL_STR realpath;
-        PAL_PTR buf;
-        PAL_PTR ptr;
-        PAL_PTR end;
-        PAL_BOL endofstream;
-    } dir;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd;
-    } gipc;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd;
-        PAL_PTR bind;
-        PAL_PTR conn;
-        PAL_BOL nonblocking;
-        PAL_NUM linger;
-        PAL_NUM receivebuf;
-        PAL_NUM sendbuf;
-        PAL_NUM receivetimeout;
-        PAL_NUM sendtimeout;
-        PAL_BOL tcp_cork;
-        PAL_BOL tcp_keepalive;
-        PAL_BOL tcp_nodelay;
-    } sock;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX stream_in, stream_out;
-        PAL_IDX cargo;
-        PAL_IDX pid;
-        PAL_BOL nonblocking;
-    } process;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX cli;
-        PAL_IDX srv;
-        PAL_IDX port;
-        PAL_BOL nonblocking;
-    } mcast;
-
-    struct pal_handle_thread thread;
-
-    struct {
-        PAL_HDR reserved;
-        struct atomic_int nwaiters;
-        PAL_NUM max_value;
-        union {
-            struct mutex_handle mut;
-            struct atomic_int i;
-        } value;
-    } semaphore;
-
-    struct {
-        PAL_HDR reserved;
-        struct atomic_int signaled;
-        struct atomic_int nwaiters;
-        PAL_BOL isnotification;
-    } event;
+    PAL_HDR hdr;
+    union {
+        struct {
+            PAL_IDX fds[2];
+        } generic;
+
+        struct {
+            PAL_IDX fd;
+            PAL_BOL append;
+            PAL_BOL pass;
+            PAL_STR realpath;
+            PAL_NUM total;
+            PAL_PTR stubs;
+        } file;
+
+        struct {
+            PAL_IDX fd;
+            PAL_NUM pipeid;
+            PAL_BOL nonblocking;
+        } pipe;
+
+        struct {
+            PAL_IDX fds[2];
+            PAL_BOL nonblocking;
+        } pipeprv;
+
+        struct {
+            PAL_IDX fd_in, fd_out;
+            PAL_IDX dev_type;
+            PAL_BOL destroy;
+            PAL_STR realpath;
+        } dev;
+
+        struct {
+            PAL_IDX fd;
+            PAL_STR realpath;
+            PAL_PTR buf;
+            PAL_PTR ptr;
+            PAL_PTR end;
+            PAL_BOL endofstream;
+        } dir;
+
+        struct {
+            PAL_IDX fd;
+        } gipc;
+
+        struct {
+            PAL_IDX fd;
+            PAL_PTR bind;
+            PAL_PTR conn;
+            PAL_BOL nonblocking;
+            PAL_NUM linger;
+            PAL_NUM receivebuf;
+            PAL_NUM sendbuf;
+            PAL_NUM receivetimeout;
+            PAL_NUM sendtimeout;
+            PAL_BOL tcp_cork;
+            PAL_BOL tcp_keepalive;
+            PAL_BOL tcp_nodelay;
+        } sock;
+
+        struct {
+            PAL_IDX stream_in, stream_out;
+            PAL_IDX cargo;
+            PAL_IDX pid;
+            PAL_BOL nonblocking;
+        } process;
+
+        struct {
+            PAL_IDX cli;
+            PAL_IDX srv;
+            PAL_IDX port;
+            PAL_BOL nonblocking;
+        } mcast;
+
+        struct pal_handle_thread thread;
+
+        struct {
+            struct atomic_int nwaiters;
+            PAL_NUM max_value;
+            union {
+                struct mutex_handle mut;
+            } mutex;
+
+            struct {
+                struct atomic_int signaled;
+                struct atomic_int nwaiters;
+                PAL_BOL isnotification;
+            } event;
+        };
+    };
 } * PAL_HANDLE;
 
 #define RFD(n)          (00001 << (n))

+ 12 - 14
Pal/src/host/Linux-SGX/pal_linux.h

@@ -31,6 +31,15 @@
 #include "sgx_api.h"
 #include "enclave_ocalls.h"
 
+#ifdef __x86_64__
+# include "sysdep-x86_64.h"
+#endif
+
+#define IS_ERR INTERNAL_SYSCALL_ERROR
+#define IS_ERR_P INTERNAL_SYSCALL_ERROR_P
+#define ERRNO INTERNAL_SYSCALL_ERRNO
+#define ERRNO_P INTERNAL_SYSCALL_ERRNO_P
+
 extern struct pal_linux_state {
     PAL_NUM         parent_process_id;
     PAL_NUM         process_id;
@@ -75,11 +84,11 @@ bool stataccess (struct stat * stats, int acc);
 #ifdef IN_ENCLAVE
 
 /* Locking and unlocking of Mutexes */
-int _DkMutexCreate (struct mutex_handle * mut);
+int __DkMutexCreate (struct mutex_handle * mut);
 int _DkMutexAtomicCreate (struct mutex_handle * mut);
-int _DkMutexDestroy (struct mutex_handle * mut);
+int __DkMutexDestroy (struct mutex_handle * mut);
 int _DkMutexLock (struct mutex_handle * mut);
-int _DkMutexLockTimeout (struct mutex_handle * mut, int timeout);
+int _DkMutexLockTimeout (struct mutex_handle * mut, uint64_t timeout);
 int _DkMutexUnlock (struct mutex_handle * mut);
 
 int * get_futex (void);
@@ -213,15 +222,4 @@ int pal_printf(const char * fmt, ...);
     do { if ((class) & DBG_LEVEL) pal_printf(fmt); } while (0)
 #endif
 
-#ifdef __i386__
-# define rmb()           asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-# define cpu_relax()     asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __x86_64__
-# include <unistd.h>
-# define rmb()           asm volatile("lfence" ::: "memory")
-# define cpu_relax()     asm volatile("rep; nop" ::: "memory");
-#endif
-
 #endif /* PAL_LINUX_H */

+ 1 - 1
Pal/src/host/Linux/Makefile

@@ -13,7 +13,7 @@ host_files = libpal-Linux.a
 
 defs	= -DIN_PAL -DPAL_DIR=$(PAL_DIR) -DRUNTIME_DIR=$(RUNTIME_DIR)
 objs	= $(addprefix db_,files devices pipes sockets streams memory threading \
-	    semaphore mutex events process object main rtld misc ipc \
+	    mutex events process object main rtld misc ipc \
 	    exception) manifest clone-x86_64 gettimeofday-x86_64
 graphene_lib = ../../.lib/graphene-lib.a
 headers	= $(wildcard *.h) $(wildcard ../../*.h) $(wildcard ../../../lib/*.h)

+ 1 - 0
Pal/src/host/Linux/db_devices.c

@@ -231,6 +231,7 @@ static int dev_open (PAL_HANDLE * handle, const char * type, const char * uri,
             return -PAL_ERROR_NOTSUPPORT;
 
     PAL_HANDLE hdl = malloc(HANDLE_SIZE(dev));
+    SET_HANDLE_TYPE(hdl, dev);
     hdl->dev.fd_in  = PAL_IDX_POISON;
     hdl->dev.fd_out = PAL_IDX_POISON;
     *handle = hdl;

+ 2 - 2
Pal/src/host/Linux/db_files.c

@@ -241,7 +241,7 @@ static int file_attrquery (const char * type, const char * uri,
 static int file_attrquerybyhdl (PAL_HANDLE handle,
                                 PAL_STREAM_ATTR * attr)
 {
-    int fd = HANDLE_HDR(handle)->fds[0];
+    int fd = handle->generic.fds[0];
     struct stat stat_buf;
 
     int ret = INLINE_SYSCALL(fstat, 2, fd, &stat_buf);
@@ -256,7 +256,7 @@ static int file_attrquerybyhdl (PAL_HANDLE handle,
 static int file_attrsetbyhdl (PAL_HANDLE handle,
                               PAL_STREAM_ATTR * attr)
 {
-    int fd = HANDLE_HDR(handle)->fds[0], ret;
+    int fd = handle->generic.fds[0], ret;
 
     ret = INLINE_SYSCALL(fchmod, 2, fd, attr->share_flags | 0600);
     if (IS_ERR(ret))

+ 107 - 83
Pal/src/host/Linux/db_mutex.c

@@ -36,73 +36,128 @@
 #include <linux/futex.h>
 #include <limits.h>
 #include <atomic.h>
-#include <cmpxchg.h>
 #include <asm/errno.h>
 #include <linux/time.h>
 #include <unistd.h>
 
-#ifdef __i386__
-# define barrier()       asm volatile("" ::: "memory");
-# define rmb()           asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-# define cpu_relax()     asm volatile("rep; nop" ::: "memory");
-#endif
-
 #ifdef __x86_64__
 # include <unistd.h>
-# define barrier()       asm volatile("" ::: "memory");
-# define rmb()           asm volatile("lfence" ::: "memory")
-# define cpu_relax()     asm volatile("rep; nop" ::: "memory");
 #endif
 
 #define MUTEX_SPINLOCK_TIMES    100
+#define MUTEX_UNLOCKED 0
+#define MUTEX_LOCKED   1
+
+/* Interplay between locked and nwaiters:
+ * 
+ * If lock is unlocked and uncontended, just set the locked state.
+ * 
+ * Important possible interleavings of lock and unlock:
+ * 
+ * Case 1: 
+ * 
+ * Owner:                Locker:
+ *                       Try lock and fail; increment nwaiters; sleep
+ * Set state to unlocked
+ * Read nwaiters; wake
+ *                       Try again and succeed.
+ *
+ * ***************************************************
+ *
+ * Case 2: 
+ * 
+ * Owner:                Locker:
+ *                       Try lock and fail
+ * Set state to unlocked
+ * Read nwaiters (=0)
+ *                      Increment nwaiters.
+ *                      Can't go to sleep here; will cmpxchg locked and succeed
+ * Don't wake anyone
+ */
+
+int
+_DkMutexCreate (PAL_HANDLE handle, int initialCount)
+{
+    /*
+     * Allocation and free of the handle are done outside of host-specific code.
+     * This code initializes the mutex state that is host-specific,
+     * including how initialCount is encoded.
+     */
+    atomic_set(&handle->mutex.mut.nwaiters, 0);
+    handle->mutex.mut.locked = initialCount;
+    return 0;
+}
+
+void _DkMutexDestroy (PAL_HANDLE handle)
+{
+    // Do nothing; handled in higher-level code
+}
+
 
 int _DkMutexLockTimeout (struct mutex_handle * m, uint64_t timeout)
 {
-    int ret = 0;
+    int i, ret = 0;
 #ifdef DEBUG_MUTEX
     int tid = INLINE_SYSCALL(gettid, 0);
 #endif
+    /* If this is a trylock-style call, break more quickly. */
+    int iterations = (timeout == 0) ? 1 : MUTEX_SPINLOCK_TIMES;
 
-    if (timeout == -1)
-        return -_DkMutexLock(m);
-
-    if (!xchg(&m->b.locked, 1))
-        goto success;
+    /* Spin and try to take lock.  Ignore any contribution this makes toward
+     * the timeout.*/
+    for (i = 0; i < iterations; i++) {
+        if (MUTEX_UNLOCKED == cmpxchg(&m->locked, MUTEX_UNLOCKED, MUTEX_LOCKED))
+            goto success;
+        cpu_relax();
+    }
 
     if (timeout == 0) {
         ret = -PAL_ERROR_TRYAGAIN;
         goto out;
     }
 
-    while (xchg(&m->u, 257) & 1) {
-        struct timespec waittime;
-        long sec = timeout / 1000000;
-        long microsec = timeout - (sec * 1000000);
-        waittime.tv_sec = sec;
-        waittime.tv_nsec = microsec * 1000;
+    // Bump up the waiters count; we are probably going to block
+    atomic_inc(&m->nwaiters);
+
+    while (MUTEX_LOCKED == cmpxchg(&m->locked, MUTEX_UNLOCKED, MUTEX_LOCKED)) {
+        struct timespec waittime, *waittimep = NULL;
+        if (timeout != NO_TIMEOUT) {
+            long sec = timeout / 1000000;
+            long microsec = timeout - (sec * 1000000);
+            waittime.tv_sec = sec;
+            waittime.tv_nsec = microsec * 1000;
+            waittimep = &waittime;
+        }
 
-        ret = INLINE_SYSCALL(futex, 6, m, FUTEX_WAIT, 257, &waittime, NULL, 0);
+        ret = INLINE_SYSCALL(futex, 6, m, FUTEX_WAIT, MUTEX_LOCKED, waittimep, NULL, 0);
 
         if (IS_ERR(ret)) {
             if (ERRNO(ret) == EWOULDBLOCK) {
-                xchg(&m->b.contended, 0);
-                ret = -PAL_ERROR_TRYAGAIN;
-                goto out;
-            }
+                if (timeout != NO_TIMEOUT) {
+                    ret = -PAL_ERROR_TRYAGAIN;
+                    atomic_dec(&m->nwaiters);
+                    goto out;
+                }
+            } else {
 #ifdef DEBUG_MUTEX
-            printf("futex failed (err = %d)\n", ERRNO(ret));
+                printf("futex failed (err = %d)\n", ERRNO(ret));
 #endif
-            ret = unix_to_pal_error(ERRNO(ret));
-            goto out;
+                ret = unix_to_pal_error(ERRNO(ret));
+                atomic_dec(&m->nwaiters);
+                goto out;
+            }
         }
     }
 
+    atomic_dec(&m->nwaiters);
+
 success:
 #ifdef DEBUG_MUTEX
     m->owner = tid;
 #endif
     ret = 0;
 out:
+
 #ifdef DEBUG_MUTEX
     if (ret < 0)
         printf("mutex failed (%s, tid = %d)\n", PAL_STRERROR(ret), tid);
@@ -112,71 +167,40 @@ out:
 
 int _DkMutexLock (struct mutex_handle * m)
 {
-    int ret = 0, i;
-#ifdef DEBUG_MUTEX
-    int tid = INLINE_SYSCALL(gettid, 0);
-#endif
-
-    /* Spin and try to take lock */
-    for (i = 0; i < MUTEX_SPINLOCK_TIMES; i++) {
-        if (!xchg(&m->b.locked, 1))
-            goto success;
-        cpu_relax();
-    }
-
-    while (xchg(&m->u, 257) & 1) {
-        ret = INLINE_SYSCALL(futex, 6, m, FUTEX_WAIT, 257, NULL, NULL, 0);
-
-        if (IS_ERR(ret) &&
-            ERRNO(ret) != EWOULDBLOCK) {
-#ifdef DEBUG_MUTEX
-            printf("futex failed (err = %d)\n", ERRNO(ret));
-#endif
-            ret = unix_to_pal_error(ERRNO(ret));
-            goto out;
-        }
-    }
+    return _DkMutexLockTimeout(m, -1);
+}
 
-success:
-#ifdef DEBUG_MUTEX
-    m->owner = tid;
-#endif
-    ret = 0;
-out:
-#ifdef DEBUG_MUTEX
-    if (ret < 0)
-        printf("mutex failed (%s, tid = %d)\n", PAL_STRERROR(ret), tid);
-#endif
-    return ret;
+int _DkMutexAcquireTimeout (PAL_HANDLE handle, int timeout)
+{
+    return _DkMutexLockTimeout(&handle->mutex.mut, timeout);
 }
 
 int _DkMutexUnlock (struct mutex_handle * m)
 {
-    int ret = 0, i;
+    int ret = 0;
+    int need_wake;
 
 #ifdef DEBUG_MUTEX
     m->owner = 0;
 #endif
 
-    /* Unlock, and if not contended then exit. */
-    if ((m->u == 1) && (cmpxchg(&m->u, 1, 0) == 1)) return 0;
-    m->b.locked = 0;
-    barrier();
+    /* Unlock */
+    m->locked = 0;
+    /* We need to make sure the write to locked is visible to lock-ers
+     * before we read the waiter count. */
+    mb();
 
-    /* Spin and try to take lock */
-    for (i = 0; i < MUTEX_SPINLOCK_TIMES * 2; i++) {
-        if (m->b.locked)
-            goto success;
-        cpu_relax();
-    }
+    need_wake = atomic_read(&m->nwaiters);
 
-    m->b.contended = 0;
+    /* If we need to wake someone up... */
+    if (need_wake)
+        INLINE_SYSCALL(futex, 6, m, FUTEX_WAKE, 1, NULL, NULL, 0);
 
-    /* We need to wake someone up */
-    INLINE_SYSCALL(futex, 6, m, FUTEX_WAKE, 1, NULL, NULL, 0);
-
-success:
-    ret = 0;
-out:
     return ret;
 }
+
+void _DkMutexRelease (PAL_HANDLE handle)
+{
+    _DkMutexUnlock(&handle->mutex.mut);
+    return;
+}

+ 4 - 5
Pal/src/host/Linux/db_object.c

@@ -36,7 +36,6 @@
 #include <linux/poll.h>
 #include <linux/wait.h>
 #include <atomic.h>
-#include <cmpxchg.h>
 #include <asm/errno.h>
 
 #define DEFAULT_QUANTUM 500
@@ -77,7 +76,7 @@ static int _DkObjectWaitOne (PAL_HANDLE handle, uint64_t timeout)
                 events |= POLLOUT;
 
             if (events) {
-                fds[nfds].fd = HANDLE_HDR(handle)->fds[i];
+                fds[nfds].fd = handle->generic.fds[i];
                 fds[nfds].events = events|POLLHUP|POLLERR;
                 fds[nfds].revents = 0;
                 off[nfds] = i;
@@ -191,8 +190,8 @@ int _DkObjectsWaitAny (int count, PAL_HANDLE * handleArray, uint64_t timeout,
                 !(HANDLE_HDR(hdl)->flags & ERROR(j)))
                 events |= POLLOUT;
 
-            if (events && HANDLE_HDR(hdl)->fds[j] != PAL_IDX_POISON) {
-                fds[nfds].fd = HANDLE_HDR(hdl)->fds[j];
+            if (events && hdl->generic.fds[j] != PAL_IDX_POISON) {
+                fds[nfds].fd = hdl->generic.fds[j];
                 fds[nfds].events = events|POLLHUP|POLLERR;
                 fds[nfds].revents = 0;
                 hdls[nfds] = hdl;
@@ -246,7 +245,7 @@ int _DkObjectsWaitAny (int count, PAL_HANDLE * handleArray, uint64_t timeout,
 
         for (j = 0 ; j < MAX_FDS ; j++)
             if ((HANDLE_HDR(hdl)->flags & (RFD(j)|WFD(j))) &&
-                HANDLE_HDR(hdl)->fds[j] == fds[i].fd)
+                hdl->generic.fds[j] == fds[i].fd)
                 break;
 
         if (j == MAX_FDS)

+ 11 - 10
Pal/src/host/Linux/db_pipes.c

@@ -524,28 +524,29 @@ static int pipe_attrquerybyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
 {
     int ret, val;
 
-    if (HANDLE_HDR(handle)->fds[0] == PAL_IDX_POISON)
+    if (handle->generic.fds[0] == PAL_IDX_POISON)
         return -PAL_ERROR_BADHANDLE;
 
     attr->handle_type  = PAL_GET_TYPE(handle);
 
-    int read_fd = HANDLE_HDR(handle)->fds[0];
-    int flags = HANDLE_HDR(handle)->flags;
-
-    if (!IS_HANDLE_TYPE(handle, pipesrv)) {
-        ret = INLINE_SYSCALL(ioctl, 3, read_fd, FIONREAD, &val);
+    if (attr->handle_type != pal_type_pipesrv) {
+        ret = INLINE_SYSCALL(ioctl, 3, handle->generic.fds[0], FIONREAD, &val);
         if (IS_ERR(ret)) {
             return unix_to_pal_error(ERRNO(ret));
         }
         attr->pending_size = val;
-        attr->writeable    = flags & (
+        attr->writeable    = HANDLE_HDR(handle)->flags & (
             IS_HANDLE_TYPE(handle, pipeprv) ? WRITEABLE(1) : WRITEABLE(0));
     } else {
+        struct pollfd pfd = { .fd = handle->generic.fds[0], .events = POLLIN, .revents = 0 };
+        struct timespec tp = { 0, 0 };
+        ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
+        attr->readable = (ret == 1 && pfd.revents == POLLIN);
         attr->pending_size = 0;
         attr->writeable    = PAL_FALSE;
     }
 
-    struct pollfd pfd = { .fd = read_fd, .events = POLLIN, .revents = 0 };
+    struct pollfd pfd = { .fd = handle->generic.fds[0], .events = POLLIN, .revents = 0 };
     struct timespec tp = { 0, 0 };
     ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
     attr->readable = (ret == 1 && pfd.revents == POLLIN);
@@ -558,7 +559,7 @@ static int pipe_attrquerybyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
 
 static int pipe_attrsetbyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
 {
-    if (HANDLE_HDR(handle)->fds[0] == PAL_IDX_POISON)
+    if (handle->generic.fds[0] == PAL_IDX_POISON)
         return -PAL_ERROR_BADHANDLE;
 
     int ret;
@@ -567,7 +568,7 @@ static int pipe_attrsetbyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
                             &handle->pipe.nonblocking;
 
     if (attr->nonblocking != *nonblocking) {
-        ret = INLINE_SYSCALL(fcntl, 3, HANDLE_HDR(handle)->fds[0], F_SETFL,
+        ret = INLINE_SYSCALL(fcntl, 3, handle->generic.fds[0], F_SETFL,
                              attr->nonblocking ? O_NONBLOCK : 0);
 
         if (IS_ERR(ret))

+ 0 - 263
Pal/src/host/Linux/db_semaphore.c

@@ -1,263 +0,0 @@
-/* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
-/* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
-
-/* Copyright (C) 2014 OSCAR lab, Stony Brook University
-   This file is part of Graphene Library OS.
-
-   Graphene Library OS is free software: you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation, either version 3 of the
-   License, or (at your option) any later version.
-
-   Graphene Library OS is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
-
-/*
- * db_semaphore.c
- *
- * This file contains APIs that provides operations of semaphores.
- */
-
-#include "pal_defs.h"
-#include "pal_linux_defs.h"
-#include "pal.h"
-#include "pal_internal.h"
-#include "pal_linux.h"
-#include "pal_error.h"
-#include "api.h"
-
-#include <cmpxchg.h>
-#include <atomic.h>
-#include <linux/futex.h>
-#include <limits.h>
-#include <errno.h>
-#include <linux/time.h>
-
-static inline int atomic_dec_if_positive (struct atomic_int *v)
-{
-    int c, old, dec;
-    c = atomic_read(v);
-    for (;;) {
-        dec = c - 1;
-        if (unlikely(dec < 0))
-            break;
-        old = atomic_cmpxchg((v), c, dec);
-        if (likely(old == c))
-            break;
-        c = old;
-    }
-    return dec;
-}
-
-int
-_DkSemaphoreCreate (PAL_HANDLE handle, int initialCount, int maxCount)
-{
-    /*
-     * 1. Allocate memory for db_sem (this includes a futex variable).
-     * 2. Pack it into a PAL_HANDLE
-     * 3. Set the semaphore object with the argument values (count, maxCount)
-     */
-
-    SET_HANDLE_TYPE(handle, semaphore);
-    atomic_set(&handle->semaphore.nwaiters, 0);
-    handle->semaphore.max_value = maxCount;
-
-    /* optimization: if maxCount == 1, we make it into mutex */
-    if (handle->semaphore.max_value == 1) {
-        handle->semaphore.value.mut.u = initialCount;
-    } else {
-        atomic_set(&handle->semaphore.value.i, maxCount - initialCount);
-    }
-
-    return 0;
-}
-
-void _DkSemaphoreDestroy (PAL_HANDLE semaphoreHandle)
-{
-    free(semaphoreHandle);
-}
-
-int _DkMutexLockTimeout (struct mutex_handle * mut, uint64_t timeout);
-
-int _DkSemaphoreAcquire (PAL_HANDLE sem, int count)
-{
-    /* optimization: use it as a mutex */
-    if (sem->semaphore.max_value == 1) {
-        struct mutex_handle * mut = &sem->semaphore.value.mut;
-        _DkMutexLock(mut);
-        return 0;
-    }
-
-    if (count > sem->semaphore.max_value)
-        return -PAL_ERROR_INVAL;
-
-    struct atomic_int * value = &sem->semaphore.value.i;
-    int c = 0;
-
-    if (!value)
-        return -PAL_ERROR_BADHANDLE;
-
-    if (count == 1)
-        c = atomic_dec_and_test_nonnegative (value);
-    else
-        c = atomic_sub_and_test_nonnegative (count, value);
-
-    if (c)
-        return 0;
-
-    /* We didn't get the lock.  Bump the count back up. */
-    if (count == 1)
-        atomic_inc (value);
-    else
-        atomic_add (count, value);
-
-    int ret = 0;
-    atomic_inc (&sem->semaphore.nwaiters);
-
-    while (1) {
-        ret = INLINE_SYSCALL(futex, 6, value, FUTEX_WAIT, 0,
-                             NULL, NULL, 0);
-
-        if (IS_ERR(ret)) {
-            if (ERRNO(ret) == EWOULDBLOCK) {
-                ret = 0;
-            } else {
-                ret = unix_to_pal_error(ERRNO(ret));
-                break;
-            }
-        }
-
-        if (count == 1)
-            c = atomic_dec_and_test_nonnegative (value);
-        else
-            c = atomic_sub_and_test_nonnegative (count, value);
-
-        if (c)
-            break;
-
-        /* We didn't get the lock.  Bump the count back up. */
-        if (count == 1)
-            atomic_inc (value);
-        else
-            atomic_add (count, value);
-    }
-
-    atomic_dec (&sem->semaphore.nwaiters);
-    return ret;
-}
-
-int _DkSemaphoreAcquireTimeout (PAL_HANDLE sem, int count, uint64_t timeout)
-{
-    /* Pass it up to the no-timeout version if no timeout requested */
-    if (timeout == NO_TIMEOUT)
-        return _DkSemaphoreAcquire(sem, count);
-
-    /* optimization: use it as a mutex */
-    if (sem->semaphore.max_value == 1) {
-        struct mutex_handle * mut = & sem->semaphore.value.mut;
-        return _DkMutexLockTimeout(mut, timeout);
-    }
-
-    if (count > sem->semaphore.max_value)
-        return -PAL_ERROR_INVAL;
-
-    struct atomic_int * value = &sem->semaphore.value.i;
-    int c = 0;
-
-    if (!value)
-        return -PAL_ERROR_BADHANDLE;
-
-    if (count == 1)
-        c = atomic_dec_and_test_nonnegative (value);
-    else
-        c = atomic_sub_and_test_nonnegative (count, value);
-
-    if (c)
-        return 0;
-
-    /* We didn't get the lock.  Bump the count back up. */
-    if (count == 1)
-        atomic_inc (value);
-    else
-        atomic_add (count, value);
-
-    if (!timeout)
-        return -PAL_ERROR_TRYAGAIN;
-
-    struct timespec waittime;
-    long sec = timeout / 1000000;
-    long microsec = timeout - (sec * 1000000);
-    waittime.tv_sec = sec;
-    waittime.tv_nsec = microsec * 1000;
-    int ret = 0;
-    atomic_inc (&sem->semaphore.nwaiters);
-
-    while (1) {
-        ret = INLINE_SYSCALL(futex, 6, value, FUTEX_WAIT, 0,
-                             &waittime, NULL, 0);
-
-        if (ERRNO(ret) == EWOULDBLOCK) {
-            ret = 0;
-        } else {
-            ret = unix_to_pal_error(ERRNO(ret));
-            break;
-        }
-
-        if (count == 1)
-            c = atomic_dec_and_test_nonnegative (value);
-        else
-            c = atomic_sub_and_test_nonnegative (count, value);
-
-        if (c)
-            break;
-    }
-
-    /* We didn't get the lock.  Bump the count back up. */
-    if (count == 1)
-        atomic_inc (value);
-    else
-        atomic_add (count, value);
-
-    atomic_dec (&sem->semaphore.nwaiters);
-    return ret;
-}
-
-void _DkSemaphoreRelease (PAL_HANDLE sem, int count)
-{
-    /* optimization: use it as a mutex */
-    if (sem->semaphore.max_value == 1) {
-        struct mutex_handle * mut =
-            &sem->semaphore.value.mut;
-
-        _DkMutexUnlock(mut);
-        return;
-    }
-
-    struct atomic_int * value = &sem->semaphore.value.i;
-
-    if (count == 1)
-        atomic_inc (value);
-    else
-        atomic_add (count, value);
-
-    int nwaiters = atomic_read (&sem->semaphore.nwaiters);
-
-    if (nwaiters > 0)
-        INLINE_SYSCALL(futex, 6, value, FUTEX_WAKE, nwaiters, NULL, NULL, 0);
-}
-
-int _DkSemaphoreGetCurrentCount (PAL_HANDLE sem)
-{
-    if (sem->semaphore.max_value == 1) {
-        struct mutex_handle * m = &sem->semaphore.value.mut;
-        return m->b.locked;
-    }
-
-    int c = atomic_read(&sem->semaphore.value.i);
-    return sem->semaphore.max_value - c;
-}

+ 2 - 2
Pal/src/host/Linux/db_sockets.c

@@ -243,8 +243,8 @@ PAL_HANDLE socket_create_handle (int type, int fd, int options,
     if (!hdl)
         return NULL;
 
-    memset(hdl, 0, sizeof(union pal_handle));
-    PAL_GET_TYPE(hdl) = type;
+    memset(hdl, 0, sizeof(struct pal_handle));
+    init_handle_hdr(HANDLE_HDR(hdl), type);
     HANDLE_HDR(hdl)->flags |= RFD(0)|(type != pal_type_tcpsrv ? WFD(0) : 0);
     hdl->sock.fd = fd;
     void * addr = (void *) hdl + HANDLE_SIZE(sock);

+ 3 - 3
Pal/src/host/Linux/db_streams.c

@@ -92,7 +92,7 @@ int handle_set_cloexec (PAL_HANDLE handle, bool enable)
         if (HANDLE_HDR(handle)->flags & (RFD(i)|WFD(i))) {
             long flags = enable ? FD_CLOEXEC : 0;
             int ret = INLINE_SYSCALL(fcntl, 3,
-                                     HANDLE_HDR(handle)->fds[i], F_SETFD,
+                                     handle->generic.fds[i], F_SETFD,
                                      flags);
             if (IS_ERR(ret) && ERRNO(ret) != EBADF)
                 return -PAL_ERROR_DENIED;
@@ -305,7 +305,7 @@ int _DkSendHandle (PAL_HANDLE hdl, PAL_HANDLE cargo)
     for (int i = 0 ; i < MAX_FDS ; i++)
         if (HANDLE_HDR(cargo)->flags & (RFD(i)|WFD(1))) {
             hdl_hdr.fds |= 1U << i;
-            fds[nfds++] = HANDLE_HDR(cargo)->fds[i];
+            fds[nfds++] = cargo->generic.fds[i];
         }
 
     // ~ Initialize common parameter formessage passing
@@ -457,7 +457,7 @@ int _DkReceiveHandle(PAL_HANDLE hdl, PAL_HANDLE * cargo)
     for (int i = 0 ; i < MAX_FDS ; i++)
         if (hdl_hdr.fds & (1U << i)) {
             if (n < total_fds) {
-                HANDLE_HDR(handle)->fds[i] = ((int *) CMSG_DATA(chdr))[n++];
+                handle->generic.fds[i] = ((int *) CMSG_DATA(chdr))[n++];
             } else {
                 HANDLE_HDR(handle)->flags &= ~(RFD(i)|WFD(i));
             }

+ 2 - 2
Pal/src/host/Linux/pal.map

@@ -9,9 +9,9 @@ PAL {
         DkThreadCreate; DkThreadDelayExecution;
         DkThreadYieldExecution; DkThreadExit; DkThreadResume;
 
-        DkSemaphoreCreate; DkNotificationEventCreate;
+        DkMutexCreate; DkNotificationEventCreate;
         DkSynchronizationEventCreate;
-        DkSemaphoreRelease;
+        DkMutexRelease;
         DkEventSet;  DkEventClear;
         DkObjectsWaitAny;
 

+ 103 - 120
Pal/src/host/Linux/pal_host.h

@@ -30,25 +30,25 @@
 # error "cannot be included outside PAL"
 #endif
 
-/* internal Mutex design, the structure has to align at integer boundary
-   because it is required by futex call. If DEBUG_MUTEX is defined,
-   mutex_handle will record the owner of mutex locking. */
+#include <atomic.h>
+
+/* Simpler mutex design: a single variable that tracks whether the 
+ * mutex is locked (just waste a 64 bit word for now).  State is 1 (locked) or
+ * 0 (unlocked).
+ * Keep a count of how many threads are waiting on the mutex.
+ * If DEBUG_MUTEX is defined,
+ * mutex_handle will record the owner of mutex locking. */
 typedef struct mutex_handle {
-    union {
-        unsigned int u;
-        struct {
-            unsigned char locked;
-            unsigned char contended;
-        } b;
-    };
+    volatile int64_t locked;
+    struct atomic_int nwaiters;
 #ifdef DEBUG_MUTEX
     int owner;
 #endif
 } PAL_LOCK;
 
 /* Initializer of Mutexes */
-#define MUTEX_HANDLE_INIT    { .u = 0 }
-#define INIT_MUTEX_HANDLE(m)  do { m->u = 0; } while (0)
+#define MUTEX_HANDLE_INIT    { .locked = 0, .nwaiters.counter = 0 }
+#define INIT_MUTEX_HANDLE(m)  do { m->locked = 0; atomic_set(&m->nwaiters, 0); } while (0)
 
 #define LOCK_INIT MUTEX_HANDLE_INIT
 #define INIT_LOCK(lock) INIT_MUTEX_HANDLE(lock);
@@ -56,122 +56,105 @@ typedef struct mutex_handle {
 #define _DkInternalLock _DkMutexLock
 #define _DkInternalUnlock _DkMutexUnlock
 
-typedef union pal_handle
+typedef struct pal_handle
 {
     /* TSAI: Here we define the internal types of PAL_HANDLE
      * in PAL design, user has not to access the content inside the
      * handle, also there is no need to allocate the internal
      * handles, so we hide the type name of these handles on purpose.
      */
+    PAL_HDR hdr;
+    
+    union {
+        struct {
+            PAL_IDX fds[2];
+        } generic;
+
+        struct {
+            PAL_IDX fd;
+            PAL_NUM offset;
+            PAL_BOL append;
+            PAL_BOL pass;
+            PAL_STR realpath;
+        } file;
+        
+        struct {
+            PAL_IDX fd;
+            PAL_NUM pipeid;
+            PAL_BOL nonblocking;
+        } pipe;
+
+        struct {
+            PAL_IDX fds[2];
+            PAL_BOL nonblocking;
+        } pipeprv;
+
+        struct {
+            PAL_IDX fd_in, fd_out;
+            PAL_IDX dev_type;
+            PAL_BOL destroy;
+            PAL_STR realpath;
+        } dev;
+
+        struct {
+            PAL_IDX fd;
+            PAL_STR realpath;
+            PAL_PTR buf;
+            PAL_PTR ptr;
+            PAL_PTR end;
+            PAL_BOL endofstream;
+        } dir;
 
-    struct {
-        PAL_IDX type;
-        PAL_REF ref;
-        PAL_FLG flags;
-        PAL_IDX fds[];
-    } hdr;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd;
-        PAL_NUM offset;
-        PAL_BOL append;
-        PAL_BOL pass;
-        PAL_STR realpath;
-    } file;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd;
-        PAL_NUM pipeid;
-        PAL_BOL nonblocking;
-    } pipe;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fds[2];
-        PAL_BOL nonblocking;
-    } pipeprv;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd_in, fd_out;
-        PAL_IDX dev_type;
-        PAL_BOL destroy;
-        PAL_STR realpath;
-    } dev;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd;
-        PAL_STR realpath;
-        PAL_PTR buf;
-        PAL_PTR ptr;
-        PAL_PTR end;
-        PAL_BOL endofstream;
-    } dir;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd;
-        PAL_NUM token;
-    } gipc;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX fd;
-        PAL_PTR bind;
-        PAL_PTR conn;
-        PAL_BOL nonblocking;
-        PAL_BOL reuseaddr;
-        PAL_NUM linger;
-        PAL_NUM receivebuf;
-        PAL_NUM sendbuf;
-        PAL_NUM receivetimeout;
-        PAL_NUM sendtimeout;
-        PAL_BOL tcp_cork;
-        PAL_BOL tcp_keepalive;
-        PAL_BOL tcp_nodelay;
-    } sock;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX stream_in, stream_out;
-        PAL_IDX cargo;
-        PAL_IDX pid;
-        PAL_BOL nonblocking;
-    } process;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX cli;
-        PAL_IDX srv;
-        PAL_IDX port;
-        PAL_BOL nonblocking;
-        PAL_PTR addr;
-    } mcast;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX tid;
-    } thread;
-
-    struct {
-        PAL_HDR reserved;
-        struct atomic_int nwaiters;
-        PAL_NUM max_value;
-        union {
+        struct {
+            PAL_IDX fd;
+            PAL_NUM token;
+        } gipc;
+
+        struct {
+            PAL_IDX fd;
+            PAL_PTR bind;
+            PAL_PTR conn;
+            PAL_BOL nonblocking;
+            PAL_BOL reuseaddr;
+            PAL_NUM linger;
+            PAL_NUM receivebuf;
+            PAL_NUM sendbuf;
+            PAL_NUM receivetimeout;
+            PAL_NUM sendtimeout;
+            PAL_BOL tcp_cork;
+            PAL_BOL tcp_keepalive;
+            PAL_BOL tcp_nodelay;
+        } sock;
+
+        struct {
+            PAL_IDX stream_in, stream_out;
+            PAL_IDX cargo;
+            PAL_IDX pid;
+            PAL_BOL nonblocking;
+        } process;
+
+        struct {
+            PAL_IDX cli;
+            PAL_IDX srv;
+            PAL_IDX port;
+            PAL_BOL nonblocking;
+            PAL_PTR addr;
+        } mcast;
+
+        struct {
+            PAL_IDX tid;
+        } thread;
+
+        struct {
             struct mutex_handle mut;
-            struct atomic_int i;
-        } value;
-    } semaphore;
-
-    struct {
-        PAL_HDR reserved;
-        struct atomic_int signaled;
-        struct atomic_int nwaiters;
-        PAL_BOL isnotification;
-    } event;
+        } mutex;
+
+        struct {
+            struct atomic_int signaled;
+            struct atomic_int nwaiters;
+            PAL_BOL isnotification;
+        } event;
+    };
 } * PAL_HANDLE;
 
 #define RFD(n)          (00001 << (n))

+ 61 - 57
Pal/src/host/Skeleton/pal_host.h

@@ -33,69 +33,73 @@
 typedef int PAL_LOCK;
 #define LOCK_INIT   (0)
 
-typedef union pal_handle
+typedef struct pal_handle
 {
     /* TSAI: Here we define the internal types of PAL_HANDLE
      * in PAL design, user has not to access the content inside the
      * handle, also there is no need to allocate the internal
      * handles, so we hide the type name of these handles on purpose.
      */
-
-    struct {
-        PAL_IDX type;
-        PAL_REF ref;
-        PAL_FLG flags;
-        PAL_IDX fds[];
-    } hdr;
-
-    struct {
-        PAL_HDR reserved;
-    } file;
-
-    struct {
-        PAL_HDR reserved;
-    } pipe;
-
-    struct {
-        PAL_HDR reserved;
-    } pipeprv;
-
-    struct {
-        PAL_HDR reserved;
-        PAL_IDX dev_type;
-    } dev;
-
-    struct {
-        PAL_HDR reserved;
-    } dir;
-
-    struct {
-        PAL_HDR reserved;
-    } gipc;
-
-    struct {
-        PAL_HDR reserved;
-    } sock;
-
-    struct {
-        PAL_HDR reserved;
-    } process;
-
-    struct {
-        PAL_HDR reserved;
-    } mcast;
-
-    struct {
-        PAL_HDR reserved;
-    } thread;
-
-    struct {
-        PAL_HDR reserved;
-    } semaphore;
-
-    struct {
-        PAL_HDR reserved;
-    } event;
+    PAL_HDR hdr;
+    
+    union {
+        struct {
+            PAL_IDX fds[2];
+        } generic;
+
+        /* DP: Here we just define a placeholder fd; place your details here.
+         * Not every type requires an fd either - this is up to your
+         * host-specific code.
+         */
+        struct {
+            PAL_IDX fd;
+        } file;
+        
+        struct {
+            PAL_IDX fd;
+        } pipe;
+        
+        struct {
+            PAL_IDX fd;
+        } pipeprv;
+        
+        struct {
+            PAL_IDX fd;
+            PAL_IDX dev_type;
+        } dev;
+        
+        struct {
+            PAL_IDX fd;
+        } dir;
+        
+        struct {
+            PAL_IDX fd;
+        } gipc;
+        
+        struct {
+            PAL_IDX fd;
+        } sock;
+        
+        struct {
+            PAL_IDX fd;
+        } process;
+        
+        struct {
+            PAL_IDX fd;
+        } mcast;
+        
+        struct {
+            PAL_IDX fd;
+        } thread;
+        
+        struct {
+            PAL_IDX fd;
+        } semaphore;
+        
+        struct {
+            PAL_IDX fd;
+        } event;
+    };
 } * PAL_HANDLE;
 
 #endif /* PAL_HOST_H */

+ 18 - 33
Pal/src/pal.h

@@ -33,19 +33,12 @@
 typedef unsigned long PAL_NUM;
 typedef const char *  PAL_STR;
 typedef void *        PAL_PTR;
-typedef unsigned int  PAL_FLG;
-typedef unsigned int  PAL_IDX;
+typedef uint32_t      PAL_FLG;
+typedef uint32_t      PAL_IDX;
 typedef bool          PAL_BOL;
 
 #ifdef IN_PAL
-struct atomic_int {
-    volatile int counter;
-}
-#ifdef __GNUC__
-__attribute__((aligned(sizeof(int))))
-#endif
-;
-
+#include <atomic.h>
 typedef struct atomic_int PAL_REF;
 
 typedef struct {
@@ -60,12 +53,14 @@ typedef struct {
 #  define HANDLE_HDR(handle) (&((handle)->hdr))
 # endif
 
-# define SET_HANDLE_TYPE(handle, t)             \
-    do {                                        \
-        HANDLE_HDR(handle)->type = pal_type_##t;\
-        HANDLE_HDR(handle)->ref.counter = 0;    \
-        HANDLE_HDR(handle)->flags = 0;          \
-    } while (0)
+static inline void init_handle_hdr(PAL_HDR *hdr, int pal_type) {
+    hdr->type = pal_type;
+    hdr->ref.counter = 1;
+    hdr->flags = 0;
+}
+
+# define SET_HANDLE_TYPE(handle, t) \
+    init_handle_hdr(HANDLE_HDR(handle), pal_type_##t)
 
 # define IS_HANDLE_TYPE(handle, t)              \
     (HANDLE_HDR(handle)->type == pal_type_##t)
@@ -115,7 +110,7 @@ enum {
     pal_type_process,
     pal_type_mcast,
     pal_type_thread,
-    pal_type_semaphore,
+    pal_type_mutex,
     pal_type_event,
     pal_type_gipc,
     PAL_HANDLE_TYPE_BOUND,
@@ -428,26 +423,16 @@ void DkExceptionReturn (PAL_PTR event);
  * We may want to replace it with a PAL_HANDLE. Ideally, either use PAL_HANDLE
  * or threadHandle.
  */
-/* maxcount sets the number of threads allowed to hold the semaphore
- * at once.  For 1, this becomes a mutex.
- * initialCount of 0 is totally unlocked; an initialCount that
- * equals maxCount means that all resources are taken. */ 
+/* Create a Mutex.
+ * initialCount of 0 is totally unlocked; an initialCount of 1
+ * is initialized to locked. */ 
 PAL_HANDLE
-DkSemaphoreCreate (PAL_NUM initialCount, PAL_NUM maxCount);
-
-/* DkSemaphoreDestroy deprecated, replaced by DkObjectClose */
+DkMutexCreate (PAL_NUM initialCount);
 
-/* TSAI: I preserve this API because DkObjectsWaitAny can't acquire multiple
- * counts of a semaphore. Acquiring multiple counts is required for
- * implementing a read-write-lock. To make this API complementary to
- * DkObjectsWaitAny, I added a 'timeout' to its arguments */
-
-/* DkSemaphoreAcquire deprecated */
+/* Destroy a mutex using DkObjectClose */
 
 void
-DkSemaphoreRelease (PAL_HANDLE semaphoreHandle, PAL_NUM count);
-
-/* DkSemaphoreGetCurrentCount deprecated */
+DkMutexRelease (PAL_HANDLE mutexHandle);
 
 PAL_HANDLE
 DkNotificationEventCreate (PAL_BOL initialState);

+ 9 - 33
Pal/src/pal_internal.h

@@ -155,38 +155,14 @@ static inline unsigned int hash64 (unsigned long key)
 /* We allow dynamic size handle allocation. Here is some macro to help
    deciding the actual size of the handle */
 extern PAL_HANDLE _h;
-#define HANDLE_SIZE(type)  (sizeof(_h->type))
+#define HANDLE_SIZE(type)  (sizeof(*_h))
 
 #define UNKNOWN_HANDLE(handle)     \
     (PAL_GET_TYPE(handle) == 0 || PAL_GET_TYPE(handle) >= PAL_HANDLE_TYPE_BOUND)
 
 static inline int handle_size (PAL_HANDLE handle)
 {
-    static int handle_sizes[PAL_HANDLE_TYPE_BOUND]
-            = { 0,
-                [pal_type_file]      = sizeof(handle->file),
-                [pal_type_pipe]      = sizeof(handle->pipe),
-                [pal_type_pipesrv]   = sizeof(handle->pipe),
-                [pal_type_pipecli]   = sizeof(handle->pipe),
-                [pal_type_pipeprv]   = sizeof(handle->pipeprv),
-                [pal_type_dev]       = sizeof(handle->dev),
-                [pal_type_dir]       = sizeof(handle->dir),
-                [pal_type_tcp]       = sizeof(handle->sock),
-                [pal_type_tcpsrv]    = sizeof(handle->sock),
-                [pal_type_udp]       = sizeof(handle->sock),
-                [pal_type_udpsrv]    = sizeof(handle->sock),
-                [pal_type_process]   = sizeof(handle->process),
-                [pal_type_mcast]     = sizeof(handle->mcast),
-                [pal_type_thread]    = sizeof(handle->thread),
-                [pal_type_semaphore] = sizeof(handle->semaphore),
-                [pal_type_event]     = sizeof(handle->event),
-                [pal_type_gipc]      = sizeof(handle->gipc),
-            };
-
-    if (UNKNOWN_HANDLE(handle))
-        return 0;
-    else
-        return handle_sizes[PAL_GET_TYPE(handle)];
+    return sizeof(*handle);
 }
 
 #ifndef ENTER_PAL_CALL
@@ -324,13 +300,13 @@ int _DkProcessCreate (PAL_HANDLE * handle, const char * uri,
 void _DkProcessExit (int exitCode);
 int _DkProcessSandboxCreate (const char * manifest, int flags);
 
-/* DkSemaphore calls */
-int _DkSemaphoreCreate (PAL_HANDLE handle, int initialCount, int maxCount);
-void _DkSemaphoreDestroy (PAL_HANDLE semaphoreHandle);
-int _DkSemaphoreAcquire (PAL_HANDLE sem, int count);
-int _DkSemaphoreAcquireTimeout (PAL_HANDLE sem, int count, uint64_t timeout);
-void _DkSemaphoreRelease (PAL_HANDLE sem, int count);
-int _DkSemaphoreGetCurrentCount (PAL_HANDLE sem);
+/* DkMutex calls */
+int _DkMutexCreate (PAL_HANDLE handle, int initialCount);
+void _DkMutexDestroy (PAL_HANDLE semaphoreHandle);
+int _DkMutexAcquire (PAL_HANDLE sem);
+int _DkMutexAcquireTimeout (PAL_HANDLE sem, int timeout);
+void _DkMutexRelease (PAL_HANDLE sem);
+int _DkMutexGetCurrentCount (PAL_HANDLE sem);
 
 /* DkEvent calls */
 int _DkEventCreate (PAL_HANDLE * event, bool initialState,