shim_futex.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_futex.c
  17. *
  18. * Implementation of system call "futex", "set_robust_list" and
  19. * "get_robust_list".
  20. */
  21. #include <shim_internal.h>
  22. #include <shim_table.h>
  23. #include <shim_thread.h>
  24. #include <shim_checkpoint.h>
  25. #include <shim_utils.h>
  26. #include <pal.h>
  27. #include <pal_error.h>
  28. #include <list.h>
  29. #include <sys/syscall.h>
  30. #include <sys/mman.h>
  31. #include <asm/prctl.h>
  32. #include <linux/futex.h>
  33. #include <errno.h>
  34. #define FUTEX_MIN_VALUE 0
  35. #define FUTEX_MAX_VALUE 255
  36. /* futex_waiters are linked off of shim_futex_handle by the waiters
  37. * listp */
  38. struct futex_waiter {
  39. struct shim_thread * thread;
  40. uint32_t bitset;
  41. LIST_TYPE(futex_waiter) list;
  42. };
  43. // Links shim_futex_handle by the list field
  44. DEFINE_LISTP(shim_futex_handle);
  45. static LISTP_TYPE(shim_futex_handle) futex_list = LISTP_INIT;
  46. static LOCKTYPE futex_list_lock;
  47. int shim_do_futex (unsigned int * uaddr, int op, int val, void * utime,
  48. unsigned int * uaddr2, int val3)
  49. {
  50. struct shim_futex_handle * tmp = NULL, * futex = NULL, * futex2 = NULL;
  51. struct shim_handle * hdl = NULL, * hdl2 = NULL;
  52. uint32_t futex_op = (op & FUTEX_CMD_MASK);
  53. uint32_t val2 = 0;
  54. int ret = 0;
  55. if (!uaddr || ((uintptr_t) uaddr % sizeof(unsigned int)))
  56. return -EINVAL;
  57. create_lock_runtime(&futex_list_lock);
  58. lock(futex_list_lock);
  59. listp_for_each_entry(tmp, &futex_list, list)
  60. if (tmp->uaddr == uaddr) {
  61. futex = tmp;
  62. break;
  63. }
  64. if (futex) {
  65. hdl = container_of(futex, struct shim_handle, info.futex);
  66. get_handle(hdl);
  67. } else {
  68. if (!(hdl = get_new_handle())) {
  69. unlock(futex_list_lock);
  70. return -ENOMEM;
  71. }
  72. hdl->type = TYPE_FUTEX;
  73. futex = &hdl->info.futex;
  74. futex->uaddr = uaddr;
  75. get_handle(hdl);
  76. INIT_LISTP(&futex->waiters);
  77. INIT_LIST_HEAD(futex, list);
  78. listp_add_tail(futex, &futex_list, list);
  79. }
  80. if (futex_op == FUTEX_WAKE_OP || futex_op == FUTEX_REQUEUE ||
  81. futex_op == FUTEX_CMP_REQUEUE) {
  82. listp_for_each_entry(tmp, &futex_list, list)
  83. if (tmp->uaddr == uaddr2) {
  84. futex2 = tmp;
  85. break;
  86. }
  87. if (futex2) {
  88. hdl2 = container_of(futex2, struct shim_handle, info.futex);
  89. get_handle(hdl2);
  90. } else {
  91. if (!(hdl2 = get_new_handle())) {
  92. unlock(futex_list_lock);
  93. return -ENOMEM;
  94. }
  95. hdl2->type = TYPE_FUTEX;
  96. futex2 = &hdl2->info.futex;
  97. futex2->uaddr = uaddr2;
  98. get_handle(hdl2);
  99. INIT_LISTP(&futex2->waiters);
  100. INIT_LIST_HEAD(futex2, list);
  101. listp_add_tail(futex2, &futex_list, list);
  102. }
  103. val2 = (uint32_t)(uint64_t) utime;
  104. }
  105. unlock(futex_list_lock);
  106. lock(hdl->lock);
  107. uint64_t timeout_us = NO_TIMEOUT;
  108. switch (futex_op) {
  109. case FUTEX_WAIT_BITSET:
  110. if (utime && timeout_us == NO_TIMEOUT) {
  111. struct timespec *ts = (struct timespec*) utime;
  112. // Round to microsecs
  113. timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
  114. // Check for the CLOCK_REALTIME flag
  115. if (futex_op == FUTEX_WAIT_BITSET) {
  116. // DEP 1/28/17: Should really differentiate clocks, but
  117. // Graphene only has one for now.
  118. //&& 0 != (op & FUTEX_CLOCK_REALTIME)) {
  119. uint64_t current_time = DkSystemTimeQuery();
  120. if (current_time == 0) {
  121. ret = -EINVAL;
  122. break;
  123. }
  124. timeout_us -= current_time;
  125. }
  126. }
  127. /* Note: for FUTEX_WAIT, timeout is interpreted as a relative
  128. * value. This differs from other futex operations, where
  129. * timeout is interpreted as an absolute value. To obtain the
  130. * equivalent of FUTEX_WAIT with an absolute timeout, employ
  131. * FUTEX_WAIT_BITSET with val3 specified as
  132. * FUTEX_BITSET_MATCH_ANY. */
  133. case FUTEX_WAIT:
  134. if (utime && timeout_us == NO_TIMEOUT) {
  135. struct timespec *ts = (struct timespec*) utime;
  136. // Round to microsecs
  137. timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
  138. }
  139. {
  140. uint32_t bitset = (futex_op == FUTEX_WAIT_BITSET) ? val3 :
  141. 0xffffffff;
  142. debug("FUTEX_WAIT: %p (val = %d) vs %d mask = %08x, timeout ptr %p\n",
  143. uaddr, *uaddr, val, bitset, utime);
  144. if (*uaddr != val) {
  145. ret = -EAGAIN;
  146. break;
  147. }
  148. struct futex_waiter waiter;
  149. thread_setwait(&waiter.thread, NULL);
  150. INIT_LIST_HEAD(&waiter, list);
  151. waiter.bitset = bitset;
  152. listp_add_tail(&waiter, &futex->waiters, list);
  153. unlock(hdl->lock);
  154. ret = thread_sleep(timeout_us);
  155. /* DEP 1/28/17: Should return ETIMEDOUT, not EAGAIN, on timeout. */
  156. if (ret == -EAGAIN)
  157. ret = -ETIMEDOUT;
  158. if (ret == -ETIMEDOUT)
  159. listp_del(&waiter, &futex->waiters, list);
  160. lock(hdl->lock);
  161. /* Chia-Che 10/17/17: FUTEX_WAKE should remove the waiter
  162. * from the list; if not, we should remove it now. */
  163. if (!list_empty(&waiter, list))
  164. listp_del(&waiter, &futex->waiters, list);
  165. break;
  166. }
  167. case FUTEX_WAKE:
  168. case FUTEX_WAKE_BITSET: {
  169. struct futex_waiter * waiter, * wtmp;
  170. int nwaken = 0;
  171. uint32_t bitset = (futex_op == FUTEX_WAKE_BITSET) ? val3 :
  172. 0xffffffff;
  173. debug("FUTEX_WAKE: %p (val = %d) count = %d mask = %08x\n",
  174. uaddr, *uaddr, val, bitset);
  175. listp_for_each_entry_safe(waiter, wtmp, &futex->waiters, list) {
  176. if (!(bitset & waiter->bitset))
  177. continue;
  178. debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n",
  179. waiter->thread->tid, uaddr, *uaddr);
  180. listp_del_init(waiter, &futex->waiters, list);
  181. thread_wakeup(waiter->thread);
  182. nwaken++;
  183. if (nwaken >= val) break;
  184. }
  185. ret = nwaken;
  186. debug("FUTEX_WAKE done: %p (val = %d) woke %d threads\n", uaddr, *uaddr, ret);
  187. break;
  188. }
  189. case FUTEX_WAKE_OP: {
  190. assert(futex2);
  191. int oldval = *(int *) uaddr2, newval, cmpval;
  192. newval = (val3 >> 12) & 0xfff;
  193. switch ((val3 >> 28) & 0xf) {
  194. case FUTEX_OP_SET: break;
  195. case FUTEX_OP_ADD: newval = oldval + newval; break;
  196. case FUTEX_OP_OR: newval = oldval | newval; break;
  197. case FUTEX_OP_ANDN: newval = oldval & ~newval; break;
  198. case FUTEX_OP_XOR: newval = oldval ^ newval; break;
  199. }
  200. cmpval = val3 & 0xfff;
  201. switch ((val3 >> 24) & 0xf) {
  202. case FUTEX_OP_CMP_EQ: cmpval = (oldval == cmpval); break;
  203. case FUTEX_OP_CMP_NE: cmpval = (oldval != cmpval); break;
  204. case FUTEX_OP_CMP_LT: cmpval = (oldval < cmpval); break;
  205. case FUTEX_OP_CMP_LE: cmpval = (oldval <= cmpval); break;
  206. case FUTEX_OP_CMP_GT: cmpval = (oldval > cmpval); break;
  207. case FUTEX_OP_CMP_GE: cmpval = (oldval >= cmpval); break;
  208. }
  209. *(int *) uaddr2 = newval;
  210. struct futex_waiter * waiter, * wtmp;
  211. int nwaken = 0;
  212. debug("FUTEX_WAKE_OP: %p (val = %d) count = %d\n", uaddr, *uaddr, val);
  213. listp_for_each_entry_safe(waiter, wtmp, &futex->waiters, list) {
  214. debug("FUTEX_WAKE_OP wake thread %d: %p (val = %d)\n",
  215. waiter->thread->tid, uaddr, *uaddr);
  216. listp_del_init(waiter, &futex->waiters, list);
  217. thread_wakeup(waiter->thread);
  218. nwaken++;
  219. }
  220. if (cmpval) {
  221. unlock(hdl->lock);
  222. put_handle(hdl);
  223. hdl = hdl2;
  224. lock(hdl->lock);
  225. debug("FUTEX_WAKE: %p (val = %d) count = %d\n", uaddr2,
  226. *uaddr2, val2);
  227. listp_for_each_entry_safe(waiter, wtmp, &futex2->waiters, list) {
  228. debug("FUTEX_WAKE_OP(2) wake thread %d: %p (val = %d)\n",
  229. waiter->thread->tid, uaddr2, *uaddr2);
  230. listp_del_init(waiter, &futex2->waiters, list);
  231. thread_wakeup(waiter->thread);
  232. nwaken++;
  233. }
  234. }
  235. ret = nwaken;
  236. break;
  237. }
  238. case FUTEX_CMP_REQUEUE:
  239. if (*uaddr != val3) {
  240. ret = -EAGAIN;
  241. break;
  242. }
  243. case FUTEX_REQUEUE: {
  244. assert(futex2);
  245. struct futex_waiter * waiter, * wtmp;
  246. int nwaken = 0;
  247. listp_for_each_entry_safe(waiter, wtmp, &futex->waiters, list) {
  248. listp_del_init(waiter, &futex->waiters, list);
  249. thread_wakeup(waiter->thread);
  250. nwaken++;
  251. if (nwaken >= val)
  252. break;
  253. }
  254. lock(hdl2->lock);
  255. listp_splice_init(&futex->waiters, &futex2->waiters, list, futex_waiter);
  256. unlock(hdl2->lock);
  257. put_handle(hdl2);
  258. ret = nwaken;
  259. break;
  260. }
  261. case FUTEX_FD:
  262. ret = set_new_fd_handle(hdl, 0, NULL);
  263. break;
  264. default:
  265. debug("unsupported futex op: 0x%x\n", op);
  266. ret = -ENOSYS;
  267. break;
  268. }
  269. unlock(hdl->lock);
  270. put_handle(hdl);
  271. return ret;
  272. }
  273. int shim_do_set_robust_list (struct robust_list_head * head, size_t len)
  274. {
  275. struct shim_thread * self = get_cur_thread();
  276. assert(self);
  277. if (len != sizeof(struct robust_list_head))
  278. return -EINVAL;
  279. self->robust_list = head;
  280. return 0;
  281. }
  282. int shim_do_get_robust_list (pid_t pid, struct robust_list_head ** head,
  283. size_t * len)
  284. {
  285. if (!head)
  286. return -EFAULT;
  287. struct shim_thread * thread;
  288. if (pid) {
  289. thread = lookup_thread(pid);
  290. if (!thread)
  291. return -ESRCH;
  292. } else {
  293. thread = get_cur_thread();
  294. }
  295. *head = (struct robust_list_head *) thread->robust_list;
  296. *len = sizeof(struct robust_list_head);
  297. return 0;
  298. }
  299. void release_robust_list (struct robust_list_head * head)
  300. {
  301. long futex_offset = head->futex_offset;
  302. struct robust_list * robust, * prev = &head->list;
  303. create_lock_runtime(&futex_list_lock);
  304. for (robust = prev->next ; robust && robust != prev ;
  305. prev = robust, robust = robust->next) {
  306. void * futex_addr = (void *) robust + futex_offset;
  307. struct shim_futex_handle * tmp, * futex = NULL;
  308. lock(futex_list_lock);
  309. listp_for_each_entry(tmp, &futex_list, list)
  310. if (tmp->uaddr == futex_addr) {
  311. futex = tmp;
  312. break;
  313. }
  314. unlock(futex_list_lock);
  315. if (!futex)
  316. continue;
  317. struct futex_waiter * waiter, * wtmp;
  318. struct shim_handle * hdl =
  319. container_of(futex, struct shim_handle, info.futex);
  320. get_handle(hdl);
  321. lock(hdl->lock);
  322. debug("release robust list: %p\n", futex_addr);
  323. *(int *) futex_addr = 0;
  324. listp_for_each_entry_safe(waiter, wtmp, &futex->waiters, list) {
  325. listp_del_init(waiter, &futex->waiters, list);
  326. thread_wakeup(waiter->thread);
  327. }
  328. unlock(hdl->lock);
  329. put_handle(hdl);
  330. }
  331. }
  332. void release_clear_child_id (int * clear_child_tid)
  333. {
  334. debug("clear child tid at %p\n", clear_child_tid);
  335. *clear_child_tid = 0;
  336. create_lock_runtime(&futex_list_lock);
  337. struct shim_futex_handle * tmp, * futex = NULL;
  338. lock(futex_list_lock);
  339. listp_for_each_entry(tmp, &futex_list, list)
  340. if (tmp->uaddr == (void *) clear_child_tid) {
  341. futex = tmp;
  342. break;
  343. }
  344. unlock(futex_list_lock);
  345. if (!futex)
  346. return;
  347. struct futex_waiter * waiter, * wtmp;
  348. struct shim_handle * hdl =
  349. container_of(futex, struct shim_handle, info.futex);
  350. get_handle(hdl);
  351. lock(hdl->lock);
  352. debug("release futex at %p\n", clear_child_tid);
  353. *clear_child_tid = 0;
  354. listp_for_each_entry_safe(waiter, wtmp, &futex->waiters, list) {
  355. listp_del_init(waiter, &futex->waiters, list);
  356. thread_wakeup(waiter->thread);
  357. }
  358. unlock(hdl->lock);
  359. put_handle(hdl);
  360. }