db_threading.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * db_threading.c
  15. *
  16. * This file contain APIs to create, exit and yield a thread.
  17. */
  18. #include "api.h"
  19. #include "pal.h"
  20. #include "pal_debug.h"
  21. #include "pal_defs.h"
  22. #include "pal_error.h"
  23. #include "pal_internal.h"
  24. #include "pal_linux.h"
  25. #include "pal_linux_defs.h"
  26. #include "spinlock.h"
  27. #include <errno.h>
  28. #include <linux/mman.h>
  29. #include <linux/sched.h>
  30. #include <linux/signal.h>
  31. #include <linux/types.h>
  32. #include <linux/wait.h>
  33. #if defined(__i386__)
  34. #include <asm/ldt.h>
  35. #else
  36. #include <asm/prctl.h>
  37. #endif
  38. /* Linux PAL cannot use mmap/unmap to manage thread stacks because this may overlap with
  39. * pal_control.user_address. Linux PAL also cannot just use malloc/free because DkThreadExit
  40. * needs to use raw system calls and inline asm. Thus, we resort to recycling thread stacks
  41. * allocated by previous threads and not used anymore. This still leaks memory but at least
  42. * it is bounded by the maximum number of simultaneously executing threads. Note that main
  43. * thread is not a part of this mechanism (it only allocates a tiny altstack). */
  44. struct thread_stack_map_t {
  45. void* stack;
  46. bool used;
  47. };
  48. static struct thread_stack_map_t* g_thread_stack_map = NULL;
  49. static size_t g_thread_stack_num = 0;
  50. static size_t g_thread_stack_size = 0;
  51. static spinlock_t g_thread_stack_lock = INIT_SPINLOCK_UNLOCKED;
  52. static void* get_thread_stack(void) {
  53. void* ret = NULL;
  54. spinlock_lock(&g_thread_stack_lock);
  55. for (size_t i = 0; i < g_thread_stack_num; i++) {
  56. if (!g_thread_stack_map[i].used) {
  57. /* found allocated and unused stack -- use it */
  58. g_thread_stack_map[i].used = true;
  59. ret = g_thread_stack_map[i].stack;
  60. goto out;
  61. }
  62. }
  63. if (g_thread_stack_num == g_thread_stack_size) {
  64. /* realloc g_thread_stack_map to accommodate more objects (includes the very first time) */
  65. g_thread_stack_size += 8;
  66. struct thread_stack_map_t* tmp = malloc(g_thread_stack_size * sizeof(*tmp));
  67. if (!tmp)
  68. goto out;
  69. memcpy(tmp, g_thread_stack_map, g_thread_stack_num * sizeof(*tmp));
  70. free(g_thread_stack_map);
  71. g_thread_stack_map = tmp;
  72. }
  73. ret = malloc(THREAD_STACK_SIZE + ALT_STACK_SIZE);
  74. if (!ret)
  75. goto out;
  76. g_thread_stack_map[g_thread_stack_num].stack = ret;
  77. g_thread_stack_map[g_thread_stack_num].used = true;
  78. g_thread_stack_num++;
  79. out:
  80. spinlock_unlock(&g_thread_stack_lock);
  81. return ret;
  82. }
  83. /*
  84. * pal_thread_init(): An initialization wrapper of a newly-created thread (including
  85. * the first thread). This function accepts a TCB pointer to be set to the GS register
  86. * of the thread. The rest of the TCB is used as the alternative stack for signal
  87. * handling.
  88. */
  89. int pal_thread_init (void * tcbptr)
  90. {
  91. PAL_TCB_LINUX * tcb = tcbptr;
  92. int ret;
  93. ret = INLINE_SYSCALL(arch_prctl, 2, ARCH_SET_GS, tcb);
  94. if (IS_ERR(ret))
  95. return -ERRNO(ret);
  96. if (tcb->alt_stack) {
  97. // Align stack to 16 bytes
  98. void* alt_stack_top = ALIGN_DOWN_PTR(tcb, 16);
  99. assert(alt_stack_top > tcb->alt_stack);
  100. stack_t ss;
  101. ss.ss_sp = alt_stack_top;
  102. ss.ss_flags = 0;
  103. ss.ss_size = alt_stack_top - tcb->alt_stack;
  104. ret = INLINE_SYSCALL(sigaltstack, 2, &ss, NULL);
  105. if (IS_ERR(ret))
  106. return -ERRNO(ret);
  107. }
  108. if (tcb->callback)
  109. return (*tcb->callback) (tcb->param);
  110. return 0;
  111. }
  112. /* _DkThreadCreate for internal use. Create an internal thread
  113. inside the current process. The arguments callback and param
  114. specify the starting function and parameters */
  115. int _DkThreadCreate (PAL_HANDLE * handle, int (*callback) (void *),
  116. const void * param)
  117. {
  118. int ret = 0;
  119. PAL_HANDLE hdl = NULL;
  120. void* stack = get_thread_stack();
  121. if (!stack) {
  122. ret = -ENOMEM;
  123. goto err;
  124. }
  125. /* Stack layout for the new thread looks like this (recall that stacks grow towards lower
  126. * addresses on Linux on x86-64):
  127. *
  128. * stack +--> +-------------------+
  129. * | child stack | THREAD_STACK_SIZE
  130. * child_stack +--> +-------------------+
  131. * | alternate stack | ALT_STACK_SIZE - sizeof(PAL_TCB_LINUX)
  132. * tcb +--> +-------------------+
  133. * | PAL TCB | sizeof(PAL_TCB_LINUX)
  134. * +-------------------+
  135. *
  136. * We zero out only the first page of the main stack (to comply with the requirement of
  137. * gcc ABI, in particular that the initial stack frame's return address must be NULL).
  138. * We zero out the whole altstack (since it is small anyway) and also the PAL TCB. */
  139. memset(stack + THREAD_STACK_SIZE - PRESET_PAGESIZE, 0, PRESET_PAGESIZE);
  140. memset(stack + THREAD_STACK_SIZE, 0, ALT_STACK_SIZE);
  141. void * child_stack = stack + THREAD_STACK_SIZE;
  142. hdl = malloc(HANDLE_SIZE(thread));
  143. if (!hdl) {
  144. ret = -ENOMEM;
  145. goto err;
  146. }
  147. SET_HANDLE_TYPE(hdl, thread);
  148. // Initialize TCB at the top of the alternative stack.
  149. PAL_TCB_LINUX * tcb = child_stack + ALT_STACK_SIZE - sizeof(PAL_TCB_LINUX);
  150. tcb->common.self = &tcb->common;
  151. tcb->handle = hdl;
  152. tcb->alt_stack = child_stack; // Stack bottom
  153. tcb->callback = callback;
  154. tcb->param = (void *) param;
  155. /* align child_stack to 16 */
  156. child_stack = ALIGN_DOWN_PTR(child_stack, 16);
  157. ret = clone(pal_thread_init, child_stack,
  158. CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SYSVSEM|
  159. CLONE_THREAD|CLONE_SIGHAND|CLONE_PTRACE|
  160. CLONE_PARENT_SETTID,
  161. (void *) tcb, &hdl->thread.tid, NULL);
  162. if (IS_ERR(ret)) {
  163. ret = -PAL_ERROR_DENIED;
  164. goto err;
  165. }
  166. hdl->thread.stack = stack;
  167. *handle = hdl;
  168. return 0;
  169. err:
  170. free(stack);
  171. free(hdl);
  172. return ret;
  173. }
  174. int _DkThreadDelayExecution (unsigned long * duration)
  175. {
  176. struct timespec sleeptime;
  177. struct timespec remainingtime;
  178. const unsigned long VERY_LONG_TIME_IN_US = 1000000L * 60 * 60 * 24 * 365 * 128;
  179. if (*duration > VERY_LONG_TIME_IN_US) {
  180. /* avoid overflow with time_t */
  181. sleeptime.tv_sec = VERY_LONG_TIME_IN_US / 1000000;
  182. sleeptime.tv_nsec = 0;
  183. } else {
  184. sleeptime.tv_sec = *duration / 1000000;
  185. sleeptime.tv_nsec = (*duration - sleeptime.tv_sec * 1000000) * 1000;
  186. }
  187. int ret = INLINE_SYSCALL(nanosleep, 2, &sleeptime, &remainingtime);
  188. if (IS_ERR(ret)) {
  189. PAL_NUM remaining = remainingtime.tv_sec * 1000000 +
  190. remainingtime.tv_nsec / 1000;
  191. *duration -= remaining;
  192. return -PAL_ERROR_INTERRUPTED;
  193. }
  194. return 0;
  195. }
  196. /* PAL call DkThreadYieldExecution. Yield the execution
  197. of the current thread. */
  198. void _DkThreadYieldExecution (void)
  199. {
  200. INLINE_SYSCALL(sched_yield, 0);
  201. }
  202. /* _DkThreadExit for internal use: Thread exiting */
  203. noreturn void _DkThreadExit(int* clear_child_tid) {
  204. PAL_TCB_LINUX* tcb = get_tcb_linux();
  205. PAL_HANDLE handle = tcb->handle;
  206. assert(handle);
  207. block_async_signals(true);
  208. if (tcb->alt_stack) {
  209. stack_t ss;
  210. ss.ss_sp = NULL;
  211. ss.ss_flags = SS_DISABLE;
  212. ss.ss_size = 0;
  213. // Take precautions to unset the TCB and alternative stack first.
  214. INLINE_SYSCALL(arch_prctl, 2, ARCH_SET_GS, 0);
  215. INLINE_SYSCALL(sigaltstack, 2, &ss, NULL);
  216. }
  217. /* we do not free thread stack but instead mark it as recycled, see get_thread_stack() */
  218. spinlock_lock(&g_thread_stack_lock);
  219. for (size_t i = 0; i < g_thread_stack_num; i++) {
  220. if (g_thread_stack_map[i].stack == handle->thread.stack) {
  221. g_thread_stack_map[i].used = false;
  222. break;
  223. }
  224. }
  225. /* we might still be using the stack we just marked as unused until we enter the asm mode,
  226. * so we do not unlock now but rather in asm below */
  227. /* To make sure the compiler doesn't touch the stack after it was freed, need inline asm:
  228. * 1. Unlock g_thread_stack_lock (so that other threads can start re-using this stack)
  229. * 2. Set *clear_child_tid = 0 if clear_child_tid != NULL
  230. * (we thus inform LibOS, where async helper thread is waiting on this to wake up parent)
  231. * 3. Exit thread */
  232. static_assert(sizeof(g_thread_stack_lock) == 4, "unexpected g_thread_stack_lock size");
  233. static_assert(sizeof(*clear_child_tid) == 4, "unexpected clear_child_tid size");
  234. __asm__ volatile("movl $0, (%%rdx) \n\t" /* spinlock_unlock(&g_thread_stack_lock) */
  235. "cmpq $0, %%rbx \n\t" /* check if clear_child_tid != NULL */
  236. "je 1f \n\t"
  237. "movl $0, (%%rbx) \n\t" /* set *clear_child_tid = 0 */
  238. "1: \n\t"
  239. "syscall \n\t" /* rdi arg is already prepared, call exit */
  240. : /* no output regs since we don't return from exit */
  241. : "a"(__NR_exit), "D"(0), /* rdi = exit status == 0 */
  242. "d"(&g_thread_stack_lock), "b"(clear_child_tid)
  243. : "cc", "rcx", "r11", "memory" /* syscall instr clobbers cc, rcx, and r11 */
  244. );
  245. while (true) {
  246. /* nothing */
  247. }
  248. }
  249. int _DkThreadResume (PAL_HANDLE threadHandle)
  250. {
  251. int ret = INLINE_SYSCALL(tgkill, 3,
  252. linux_state.pid,
  253. threadHandle->thread.tid,
  254. SIGCONT);
  255. if (IS_ERR(ret))
  256. return -PAL_ERROR_DENIED;
  257. return 0;
  258. }
  259. struct handle_ops thread_ops = {
  260. /* nothing */
  261. };