shim_futex.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_futex.c
  15. *
  16. * Implementation of system call "futex", "set_robust_list" and
  17. * "get_robust_list".
  18. */
  19. #include <asm/prctl.h>
  20. #include <errno.h>
  21. #include <linux/futex.h>
  22. #include <list.h>
  23. #include <pal.h>
  24. #include <pal_error.h>
  25. #include <shim_checkpoint.h>
  26. #include <shim_internal.h>
  27. #include <shim_table.h>
  28. #include <shim_thread.h>
  29. #include <shim_utils.h>
  30. #include <sys/mman.h>
  31. #include <sys/syscall.h>
  32. #define FUTEX_MIN_VALUE 0
  33. #define FUTEX_MAX_VALUE 255
  34. /* futex_waiters are linked off of shim_futex_handle by the waiters
  35. * listp */
  36. struct futex_waiter {
  37. struct shim_thread* thread;
  38. uint32_t bitset;
  39. LIST_TYPE(futex_waiter) list;
  40. };
  41. // Links shim_futex_handle by the list field
  42. DEFINE_LISTP(shim_futex_handle);
  43. static LISTP_TYPE(shim_futex_handle) futex_list = LISTP_INIT;
  44. static struct shim_lock futex_list_lock;
  45. static void add_futex_waiter(struct futex_waiter* waiter,
  46. struct shim_futex_handle* futex,
  47. uint32_t bitset) {
  48. thread_setwait(&waiter->thread, NULL);
  49. INIT_LIST_HEAD(waiter, list);
  50. waiter->bitset = bitset;
  51. LISTP_ADD_TAIL(waiter, &futex->waiters, list);
  52. }
  53. static void del_futex_waiter(struct futex_waiter* waiter, struct shim_futex_handle* futex) {
  54. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  55. assert(waiter->thread);
  56. put_thread(waiter->thread);
  57. }
  58. static void del_futex_waiter_wakeup(struct futex_waiter* waiter, struct shim_futex_handle* futex) {
  59. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  60. assert(waiter->thread);
  61. thread_wakeup(waiter->thread);
  62. put_thread(waiter->thread);
  63. }
  64. int shim_do_futex(int* uaddr, int op, int val, void* utime, int* uaddr2, int val3) {
  65. struct shim_futex_handle* tmp = NULL;
  66. struct shim_futex_handle* futex = NULL;
  67. struct shim_futex_handle* futex2 = NULL;
  68. struct shim_handle* hdl = NULL;
  69. struct shim_handle* hdl2 = NULL;
  70. uint32_t futex_op = (op & FUTEX_CMD_MASK);
  71. uint32_t val2 = 0;
  72. int ret = 0;
  73. if (!uaddr || !IS_ALIGNED_PTR(uaddr, sizeof(unsigned int)))
  74. return -EINVAL;
  75. create_lock_runtime(&futex_list_lock);
  76. lock(&futex_list_lock);
  77. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
  78. if (tmp->uaddr == uaddr) {
  79. futex = tmp;
  80. break;
  81. }
  82. }
  83. if (futex) {
  84. hdl = container_of(futex, struct shim_handle, info.futex);
  85. get_handle(hdl);
  86. } else {
  87. if (!(hdl = get_new_handle())) {
  88. unlock(&futex_list_lock);
  89. return -ENOMEM;
  90. }
  91. hdl->type = TYPE_FUTEX;
  92. futex = &hdl->info.futex;
  93. futex->uaddr = uaddr;
  94. get_handle(hdl);
  95. INIT_LISTP(&futex->waiters);
  96. INIT_LIST_HEAD(futex, list);
  97. LISTP_ADD_TAIL(futex, &futex_list, list);
  98. }
  99. if (futex_op == FUTEX_WAKE_OP || futex_op == FUTEX_REQUEUE || futex_op == FUTEX_CMP_REQUEUE) {
  100. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
  101. if (tmp->uaddr == uaddr2) {
  102. futex2 = tmp;
  103. break;
  104. }
  105. }
  106. if (futex2) {
  107. hdl2 = container_of(futex2, struct shim_handle, info.futex);
  108. get_handle(hdl2);
  109. } else {
  110. if (!(hdl2 = get_new_handle())) {
  111. unlock(&futex_list_lock);
  112. return -ENOMEM;
  113. }
  114. hdl2->type = TYPE_FUTEX;
  115. futex2 = &hdl2->info.futex;
  116. futex2->uaddr = uaddr2;
  117. get_handle(hdl2);
  118. INIT_LISTP(&futex2->waiters);
  119. INIT_LIST_HEAD(futex2, list);
  120. LISTP_ADD_TAIL(futex2, &futex_list, list);
  121. }
  122. val2 = (uint32_t)(uint64_t)utime;
  123. }
  124. unlock(&futex_list_lock);
  125. lock(&hdl->lock);
  126. uint64_t timeout_us = NO_TIMEOUT;
  127. switch (futex_op) {
  128. case FUTEX_WAIT_BITSET:
  129. if (utime && timeout_us == NO_TIMEOUT) {
  130. struct timespec* ts = (struct timespec*)utime;
  131. // Round to microsecs
  132. timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
  133. /* Check for the CLOCK_REALTIME flag
  134. * DEP 1/28/17: Should really differentiate clocks, but
  135. * Graphene only has one for now.
  136. * if (futex_op & FUTEX_CLOCK_REALTIME) { */
  137. uint64_t current_time = DkSystemTimeQuery();
  138. if (current_time == 0) {
  139. ret = -EINVAL;
  140. break;
  141. }
  142. timeout_us -= current_time;
  143. }
  144. /* Note: for FUTEX_WAIT, timeout is interpreted as a relative
  145. * value. This differs from other futex operations, where
  146. * timeout is interpreted as an absolute value. To obtain the
  147. * equivalent of FUTEX_WAIT with an absolute timeout, employ
  148. * FUTEX_WAIT_BITSET with val3 specified as
  149. * FUTEX_BITSET_MATCH_ANY. */
  150. /* FALLTHROUGH */
  151. case FUTEX_WAIT:
  152. if (utime && timeout_us == NO_TIMEOUT) {
  153. struct timespec* ts = (struct timespec*)utime;
  154. // Round to microsecs
  155. timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
  156. }
  157. {
  158. uint32_t bitset = (futex_op == FUTEX_WAIT_BITSET) ? (uint32_t)val3 : 0xffffffff;
  159. debug("FUTEX_WAIT: %p (val = %d) vs %d mask = %08x, timeout ptr %p\n", uaddr,
  160. *uaddr, val, bitset, utime);
  161. if (*uaddr != val) {
  162. ret = -EAGAIN;
  163. break;
  164. }
  165. struct futex_waiter waiter = { 0 };
  166. add_futex_waiter(&waiter, futex, bitset);
  167. unlock(&hdl->lock);
  168. ret = thread_sleep(timeout_us);
  169. /* DEP 1/28/17: Should return ETIMEDOUT, not EAGAIN, on timeout. */
  170. if (ret == -EAGAIN)
  171. ret = -ETIMEDOUT;
  172. lock(&hdl->lock);
  173. /* Chia-Che 10/17/17: FUTEX_WAKE should remove the waiter
  174. * from the list; if not, we should remove it now. */
  175. if (!LIST_EMPTY(&waiter, list)) {
  176. del_futex_waiter(&waiter, futex);
  177. }
  178. break;
  179. }
  180. case FUTEX_WAKE:
  181. case FUTEX_WAKE_BITSET: {
  182. struct futex_waiter* waiter;
  183. struct futex_waiter* wtmp;
  184. int nwaken = 0;
  185. uint32_t bitset = (futex_op == FUTEX_WAKE_BITSET) ? (uint32_t)val3 : 0xffffffff;
  186. debug("FUTEX_WAKE: %p (val = %d) count = %d mask = %08x\n", uaddr, *uaddr, val, bitset);
  187. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  188. if (!(bitset & waiter->bitset))
  189. continue;
  190. debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n", waiter->thread->tid, uaddr,
  191. *uaddr);
  192. del_futex_waiter_wakeup(waiter, futex);
  193. nwaken++;
  194. if (nwaken >= val)
  195. break;
  196. }
  197. ret = nwaken;
  198. debug("FUTEX_WAKE done: %p (val = %d) woke %d threads\n", uaddr, *uaddr, ret);
  199. break;
  200. }
  201. case FUTEX_WAKE_OP: {
  202. assert(futex2);
  203. int oldval = *(int*)uaddr2, newval, cmpval;
  204. newval = (val3 >> 12) & 0xfff;
  205. switch ((val3 >> 28) & 0xf) {
  206. case FUTEX_OP_SET:
  207. break;
  208. case FUTEX_OP_ADD:
  209. newval = oldval + newval;
  210. break;
  211. case FUTEX_OP_OR:
  212. newval = oldval | newval;
  213. break;
  214. case FUTEX_OP_ANDN:
  215. newval = oldval & ~newval;
  216. break;
  217. case FUTEX_OP_XOR:
  218. newval = oldval ^ newval;
  219. break;
  220. }
  221. cmpval = val3 & 0xfff;
  222. switch ((val3 >> 24) & 0xf) {
  223. case FUTEX_OP_CMP_EQ:
  224. cmpval = (oldval == cmpval);
  225. break;
  226. case FUTEX_OP_CMP_NE:
  227. cmpval = (oldval != cmpval);
  228. break;
  229. case FUTEX_OP_CMP_LT:
  230. cmpval = (oldval < cmpval);
  231. break;
  232. case FUTEX_OP_CMP_LE:
  233. cmpval = (oldval <= cmpval);
  234. break;
  235. case FUTEX_OP_CMP_GT:
  236. cmpval = (oldval > cmpval);
  237. break;
  238. case FUTEX_OP_CMP_GE:
  239. cmpval = (oldval >= cmpval);
  240. break;
  241. }
  242. *(int*)uaddr2 = newval;
  243. struct futex_waiter* waiter;
  244. struct futex_waiter* wtmp;
  245. int nwaken = 0;
  246. debug("FUTEX_WAKE_OP: %p (val = %d) count = %d\n", uaddr, *uaddr, val);
  247. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  248. debug("FUTEX_WAKE_OP wake thread %d: %p (val = %d)\n", waiter->thread->tid, uaddr,
  249. *uaddr);
  250. del_futex_waiter_wakeup(waiter, futex);
  251. nwaken++;
  252. }
  253. if (cmpval) {
  254. unlock(&hdl->lock);
  255. put_handle(hdl);
  256. hdl = hdl2;
  257. lock(&hdl->lock);
  258. debug("FUTEX_WAKE: %p (val = %d) count = %d\n", uaddr2, *uaddr2, val2);
  259. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex2->waiters, list) {
  260. debug("FUTEX_WAKE_OP(2) wake thread %d: %p (val = %d)\n", waiter->thread->tid,
  261. uaddr2, *uaddr2);
  262. del_futex_waiter_wakeup(waiter, futex2);
  263. nwaken++;
  264. }
  265. }
  266. ret = nwaken;
  267. break;
  268. }
  269. case FUTEX_CMP_REQUEUE:
  270. if (*uaddr != val3) {
  271. ret = -EAGAIN;
  272. break;
  273. }
  274. /* FALLTHROUGH */
  275. case FUTEX_REQUEUE: {
  276. assert(futex2);
  277. struct futex_waiter* waiter;
  278. struct futex_waiter* wtmp;
  279. int nwaken = 0;
  280. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  281. del_futex_waiter_wakeup(waiter, futex);
  282. nwaken++;
  283. if (nwaken >= val)
  284. break;
  285. }
  286. lock(&hdl2->lock);
  287. LISTP_SPLICE_INIT(&futex->waiters, &futex2->waiters, list, futex_waiter);
  288. unlock(&hdl2->lock);
  289. put_handle(hdl2);
  290. ret = nwaken;
  291. break;
  292. }
  293. case FUTEX_FD:
  294. ret = set_new_fd_handle(hdl, 0, NULL);
  295. break;
  296. default:
  297. debug("unsupported futex op: 0x%x\n", op);
  298. ret = -ENOSYS;
  299. break;
  300. }
  301. unlock(&hdl->lock);
  302. put_handle(hdl);
  303. return ret;
  304. }
  305. int shim_do_set_robust_list(struct robust_list_head* head, size_t len) {
  306. struct shim_thread* self = get_cur_thread();
  307. assert(self);
  308. if (len != sizeof(struct robust_list_head))
  309. return -EINVAL;
  310. self->robust_list = head;
  311. return 0;
  312. }
  313. int shim_do_get_robust_list(pid_t pid, struct robust_list_head** head, size_t* len) {
  314. if (!head)
  315. return -EFAULT;
  316. struct shim_thread* thread;
  317. if (pid) {
  318. thread = lookup_thread(pid);
  319. if (!thread)
  320. return -ESRCH;
  321. } else {
  322. thread = get_cur_thread();
  323. get_thread(thread);
  324. }
  325. *head = (struct robust_list_head*)thread->robust_list;
  326. *len = sizeof(struct robust_list_head);
  327. put_thread(thread);
  328. return 0;
  329. }
  330. void release_robust_list(struct robust_list_head* head) {
  331. long futex_offset = head->futex_offset;
  332. struct robust_list* robust;
  333. struct robust_list* prev = &head->list;
  334. create_lock_runtime(&futex_list_lock);
  335. for (robust = prev->next; robust && robust != prev; prev = robust, robust = robust->next) {
  336. void* futex_addr = (void*)robust + futex_offset;
  337. struct shim_futex_handle* tmp;
  338. struct shim_futex_handle* futex = NULL;
  339. lock(&futex_list_lock);
  340. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
  341. if (tmp->uaddr == futex_addr) {
  342. futex = tmp;
  343. break;
  344. }
  345. }
  346. unlock(&futex_list_lock);
  347. if (!futex)
  348. continue;
  349. struct futex_waiter* waiter;
  350. struct futex_waiter* wtmp;
  351. struct shim_handle* hdl = container_of(futex, struct shim_handle, info.futex);
  352. get_handle(hdl);
  353. lock(&hdl->lock);
  354. debug("release robust list: %p\n", futex_addr);
  355. *(int*)futex_addr = 0;
  356. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  357. del_futex_waiter_wakeup(waiter, futex);
  358. }
  359. unlock(&hdl->lock);
  360. put_handle(hdl);
  361. }
  362. }
  363. /* Function is called by Async Helper thread to wait on clear_child_tid_val_pal to be set to 0
  364. * (PAL does it when child thread finally exits). Next, *clear_child_tid is set to 0 and parent
  365. * threads are woken up. Since it is a callback to Async Helper thread, it must follow the
  366. * `void (*callback) (IDTYPE caller, void * arg)` signature even though we don't use caller. */
  367. void release_clear_child_id(IDTYPE caller, void* clear_child_tids) {
  368. __UNUSED(caller);
  369. struct clear_child_tid_struct* child = (struct clear_child_tid_struct*)clear_child_tids;
  370. if (!child || !child->clear_child_tid)
  371. goto out;
  372. /* wait on clear_child_tid_val_pal; this signals that PAL layer exited child thread */
  373. while (__atomic_load_n(&child->clear_child_tid_val_pal, __ATOMIC_RELAXED) != 0) {
  374. __asm__ volatile ("pause");
  375. }
  376. /* child thread exited, now parent can wake up; note that PAL layer can't set clear_child_tid
  377. * itself, because parent thread could spuriously wake up, notice 0 on clear_child_tid, and
  378. * continue its execution without waiting for this function to succeed first */
  379. __atomic_store_n(child->clear_child_tid, 0, __ATOMIC_RELAXED);
  380. /* at this point, child thread finally exited, can wake up parents if any */
  381. create_lock_runtime(&futex_list_lock);
  382. struct shim_futex_handle* tmp;
  383. struct shim_futex_handle* futex = NULL;
  384. lock(&futex_list_lock);
  385. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
  386. if (tmp->uaddr == (void*)child->clear_child_tid) {
  387. futex = tmp;
  388. break;
  389. }
  390. }
  391. unlock(&futex_list_lock);
  392. if (!futex) {
  393. /* no parent threads waiting on this child to exit */
  394. goto out;
  395. }
  396. debug("release futex at %p\n", child->clear_child_tid);
  397. struct futex_waiter* waiter;
  398. struct futex_waiter* wtmp;
  399. struct shim_handle* hdl = container_of(futex, struct shim_handle, info.futex);
  400. get_handle(hdl);
  401. lock(&hdl->lock);
  402. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  403. /* wake up every parent waiting on this child */
  404. del_futex_waiter_wakeup(waiter, futex);
  405. }
  406. unlock(&hdl->lock);
  407. put_handle(hdl);
  408. out:
  409. free(child);
  410. }