Browse Source

[LibOS] Optimize shim_get_tcb() and its variants

- apply SHIM_TCB_GET() and SHIM_TCB_SET()
- remove dead code
Isaku Yamahata 4 years ago
parent
commit
a7cb199b9a

+ 4 - 6
LibOS/shim/include/shim_internal.h

@@ -44,12 +44,11 @@
 noreturn void shim_clean_and_exit(int exit_code);
 
 /* important macros and static inline functions */
-static inline unsigned int get_cur_tid(void)
-{
-    return shim_get_tcb()->tid;
+static inline unsigned int get_cur_tid(void) {
+    return SHIM_TCB_GET(tid);
 }
 
-#define PAL_NATIVE_ERRNO        (shim_get_tcb()->pal_errno)
+#define PAL_NATIVE_ERRNO        (SHIM_TCB_GET(pal_errno))
 
 #define INTERNAL_TID_BASE       ((IDTYPE) 1 << (sizeof(IDTYPE) * 8 - 1))
 
@@ -583,8 +582,7 @@ static inline bool locked(struct shim_lock* l)
     if (!lock_enabled || !l->lock)
         return false;
 
-    shim_tcb_t* tcb = shim_get_tcb();
-    return tcb->tid == l->owner;
+    return get_cur_tid() == l->owner;
 }
 
 #define DEBUG_MASTER_LOCK 0

+ 98 - 11
LibOS/shim/include/shim_tcb.h

@@ -1,7 +1,9 @@
 #ifndef _SHIM_TCB_H_
 #define _SHIM_TCB_H_
 
+#include <assert.h>
 #include <atomic.h>
+#include <pal.h>
 
 #define SHIM_TCB_CANARY 0xdeadbeef
 
@@ -56,20 +58,105 @@ struct shim_tcb {
     } test_range;
 };
 
-void init_tcb (shim_tcb_t * tcb);
+#define SHIM_TCB_GET(member)                                            \
+    ({                                                                  \
+        shim_tcb_t* tcb;                                                \
+        __typeof__(tcb->member) ret;                                    \
+        static_assert(sizeof(ret) == 8 ||                               \
+                      sizeof(ret) == 4 ||                               \
+                      sizeof(ret) == 2 ||                               \
+                      sizeof(ret) == 1,                                 \
+                      "SHIM_TCB_GET can be used only for "              \
+                      "8, 4, 2, or 1-byte(s) members");                 \
+        switch (sizeof(ret)) {                                          \
+        case 8:                                                         \
+            __asm__("movq %%gs:%c1, %0\n"                               \
+                    : "=r"(ret)                                         \
+                    : "i" (offsetof(PAL_TCB, libos_tcb) +               \
+                           offsetof(shim_tcb_t, member)));              \
+            break;                                                      \
+        case 4:                                                         \
+            __asm__("movl %%gs:%c1, %0\n"                               \
+                    : "=r"(ret)                                         \
+                    : "i" (offsetof(PAL_TCB, libos_tcb) +               \
+                           offsetof(shim_tcb_t, member)));              \
+            break;                                                      \
+        case 2:                                                         \
+            __asm__("movw %%gs:%c1, %0\n"                               \
+                    : "=r"(ret)                                         \
+                    : "i" (offsetof(PAL_TCB, libos_tcb) +               \
+                           offsetof(shim_tcb_t, member)));              \
+            break;                                                      \
+        case 1:                                                         \
+            __asm__("movb %%gs:%c1, %0\n"                               \
+                    : "=r"(ret)                                         \
+                    : "i" (offsetof(PAL_TCB, libos_tcb) +               \
+                           offsetof(shim_tcb_t, member)));              \
+            break;                                                      \
+        default:                                                        \
+            __abort();                                                  \
+        }                                                               \
+        ret;                                                            \
+    })
 
-static inline shim_tcb_t * shim_get_tcb(void)
-{
-    /* TODO: optimize to use single movq %gs:<offset> */
-    PAL_TCB * tcb = pal_get_tcb();
-    return (shim_tcb_t*)tcb->libos_tcb;
+#define SHIM_TCB_SET(member, value)                                     \
+    do {                                                                \
+        shim_tcb_t* tcb;                                                \
+        static_assert(sizeof(tcb->member) == 8 ||                       \
+                      sizeof(tcb->member) == 4 ||                       \
+                      sizeof(tcb->member) == 2 ||                       \
+                      sizeof(tcb->member) == 1,                         \
+                      "SHIM_TCB_SET can be used only for "              \
+                      "8, 4, 2, or 1-byte(s) members");                 \
+        switch (sizeof(tcb->member)) {                                  \
+        case 8:                                                         \
+            __asm__("movq %0, %%gs:%c1\n"                               \
+                    :: "ir"(value),                                     \
+                     "i"(offsetof(PAL_TCB, libos_tcb) +                 \
+                         offsetof(shim_tcb_t, member)));                \
+            break;                                                      \
+        case 4:                                                         \
+            __asm__("movl %0, %%gs:%c1\n"                               \
+                    :: "ir"(value),                                     \
+                     "i"(offsetof(PAL_TCB, libos_tcb) +                 \
+                         offsetof(shim_tcb_t, member)));                \
+            break;                                                      \
+        case 2:                                                         \
+            __asm__("movw %0, %%gs:%c1\n"                               \
+                    :: "ir"(value),                                     \
+                     "i"(offsetof(PAL_TCB, libos_tcb) +                 \
+                         offsetof(shim_tcb_t, member)));                \
+            break;                                                      \
+        case 1:                                                         \
+            __asm__("movb %0, %%gs:%c1\n"                               \
+                    :: "ir"(value),                                     \
+                     "i"(offsetof(PAL_TCB, libos_tcb) +                 \
+                         offsetof(shim_tcb_t, member)));                \
+            break;                                                      \
+        default:                                                        \
+            __abort();                                                  \
+        }                                                               \
+    } while (0)
+
+static inline void __shim_tcb_init(shim_tcb_t* shim_tcb) {
+    shim_tcb->canary = SHIM_TCB_CANARY;
+    shim_tcb->self = shim_tcb;
+}
+
+/* Call this function at the beginning of thread execution. */
+static inline void shim_tcb_init(void) {
+    PAL_TCB* tcb = pal_get_tcb();
+    shim_tcb_t* shim_tcb = (shim_tcb_t*)tcb->libos_tcb;
+    memset(shim_tcb, 0, sizeof(*shim_tcb));
+    __shim_tcb_init(shim_tcb);
+}
+
+static inline shim_tcb_t* shim_get_tcb(void) {
+    return SHIM_TCB_GET(self);
 }
 
-static inline bool shim_tcb_check_canary(void)
-{
-    /* TODO: optimize to use single movq %gs:<offset> */
-    shim_tcb_t * shim_tcb = shim_get_tcb();
-    return shim_tcb->canary == SHIM_TCB_CANARY;
+static inline bool shim_tcb_check_canary(void) {
+    return SHIM_TCB_GET(canary) == SHIM_TCB_CANARY;
 }
 
 #endif /* _SHIM_H_ */

+ 2 - 18
LibOS/shim/include/shim_thread.h

@@ -133,21 +133,6 @@ struct shim_simple_thread {
 
 int init_thread (void);
 
-static inline struct shim_thread * shim_thread_self(void)
-{
-    /* TODO: optimize to use single movq %gs:<offset> */
-    shim_tcb_t * shim_tcb = shim_get_tcb();
-    return shim_tcb->tp;
-}
-
-static inline struct shim_thread * save_shim_thread_self(struct shim_thread * __self)
-{
-    /* TODO: optimize to use single movq %gs:<offset> */
-    shim_tcb_t * shim_tcb = shim_get_tcb();
-    shim_tcb->tp = __self;
-    return __self;
-}
-
 static inline bool is_internal(struct shim_thread *thread)
 {
     return thread->tid >= INTERNAL_TID_BASE;
@@ -178,9 +163,8 @@ void debug_setbuf (shim_tcb_t * tcb, bool on_stack)
 
 static inline
 __attribute__((always_inline))
-struct shim_thread * get_cur_thread (void)
-{
-    return shim_thread_self();
+struct shim_thread* get_cur_thread (void) {
+    return SHIM_TCB_GET(tp);
 }
 
 static inline

+ 2 - 11
LibOS/shim/src/bookkeep/shim_thread.c

@@ -97,16 +97,6 @@ struct shim_thread* lookup_thread(IDTYPE tid) {
     return thread;
 }
 
-struct shim_thread * __get_cur_thread (void)
-{
-    return shim_thread_self();
-}
-
-shim_tcb_t * __get_cur_tcb (void)
-{
-    return shim_get_tcb();
-}
-
 IDTYPE get_pid (void)
 {
     IDTYPE idx;
@@ -759,6 +749,7 @@ static int resume_wrapper (void * param)
 
     /* initialize the current shim_tcb_t (= shim_get_tcb())
        based on saved thread->shim_tcb */
+    shim_tcb_init();
     shim_tcb_t* saved_tcb = thread->shim_tcb;
     assert(saved_tcb->context.regs && saved_tcb->context.regs->rsp);
     unsigned long fs_base = saved_tcb->context.fs_base;
@@ -813,6 +804,7 @@ BEGIN_RS_FUNC(running_thread)
             /* fork case */
             shim_tcb_t* tcb = shim_get_tcb();
             memcpy(tcb, saved_tcb, sizeof(*tcb));
+            __shim_tcb_init(tcb);
 
             assert(tcb->context.regs && tcb->context.regs->rsp);
             init_fs_base(tcb->context.fs_base, thread);
@@ -831,7 +823,6 @@ BEGIN_RS_FUNC(running_thread)
              * in_vm = false
              */
             thread->shim_tcb = shim_get_tcb();
-            init_tcb(thread->shim_tcb);
             debug_setbuf(thread->shim_tcb, false);
             set_cur_thread(thread);
         }

+ 1 - 0
LibOS/shim/src/ipc/shim_ipc_helper.c

@@ -792,6 +792,7 @@ static void shim_ipc_helper_prepare(void* arg) {
     if (!arg)
         return;
 
+    shim_tcb_init();
     init_fs_base(0, self);
     debug_setbuf(shim_get_tcb(), true);
 

+ 1 - 0
LibOS/shim/src/shim_async.c

@@ -144,6 +144,7 @@ static void shim_async_helper(void * arg) {
     if (!arg)
         return;
 
+    shim_tcb_init();
     init_fs_base(0, self);
     debug_setbuf(shim_get_tcb(), true);
 

+ 1 - 7
LibOS/shim/src/shim_init.c

@@ -199,17 +199,10 @@ char ** library_paths = NULL;
 struct shim_lock __master_lock;
 bool lock_enabled;
 
-void init_tcb (shim_tcb_t * tcb)
-{
-    tcb->canary = SHIM_TCB_CANARY;
-    tcb->self = tcb;
-}
-
 /* This function is used to allocate tls before interpreter start running */
 void init_fs_base (unsigned long fs_base, struct shim_thread * thread)
 {
     shim_tcb_t * shim_tcb = shim_get_tcb();
-    init_tcb(shim_tcb);
 
     if (thread) {
         thread->shim_tcb = shim_tcb;
@@ -675,6 +668,7 @@ noreturn void* shim_init (int argc, void * args)
     cur_process.vmid = (IDTYPE) PAL_CB(process_id);
 
     /* create the initial TCB, shim can not be run without a tcb */
+    shim_tcb_init();
     init_fs_base(0, NULL);
     __disable_preempt(shim_get_tcb()); // Temporarily disable preemption for delaying any signal
                                        // that arrives during initialization

+ 1 - 0
LibOS/shim/src/sys/shim_clone.c

@@ -118,6 +118,7 @@ static int clone_implementation_wrapper(struct shim_clone_args * arg)
     struct shim_thread* my_thread = arg->thread;
     assert(my_thread);
 
+    shim_tcb_init();
     init_fs_base(arg->fs_base, my_thread);
     shim_tcb_t * tcb = my_thread->shim_tcb;