shim_futex.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_futex.c
  15. *
  16. * Implementation of system call "futex", "set_robust_list" and
  17. * "get_robust_list".
  18. */
  19. #include <shim_internal.h>
  20. #include <shim_table.h>
  21. #include <shim_thread.h>
  22. #include <shim_checkpoint.h>
  23. #include <shim_utils.h>
  24. #include <pal.h>
  25. #include <pal_error.h>
  26. #include <list.h>
  27. #include <sys/syscall.h>
  28. #include <sys/mman.h>
  29. #include <asm/prctl.h>
  30. #include <linux/futex.h>
  31. #include <errno.h>
  32. #define FUTEX_MIN_VALUE 0
  33. #define FUTEX_MAX_VALUE 255
  34. /* futex_waiters are linked off of shim_futex_handle by the waiters
  35. * listp */
  36. struct futex_waiter {
  37. struct shim_thread * thread;
  38. uint32_t bitset;
  39. LIST_TYPE(futex_waiter) list;
  40. };
  41. // Links shim_futex_handle by the list field
  42. DEFINE_LISTP(shim_futex_handle);
  43. static LISTP_TYPE(shim_futex_handle) futex_list = LISTP_INIT;
  44. static struct shim_lock futex_list_lock;
  45. int shim_do_futex (int * uaddr, int op, int val, void * utime,
  46. int * uaddr2, int val3)
  47. {
  48. struct shim_futex_handle * tmp = NULL, * futex = NULL, * futex2 = NULL;
  49. struct shim_handle * hdl = NULL, * hdl2 = NULL;
  50. uint32_t futex_op = (op & FUTEX_CMD_MASK);
  51. uint32_t val2 = 0;
  52. int ret = 0;
  53. if (!uaddr || ((uintptr_t) uaddr % sizeof(unsigned int)))
  54. return -EINVAL;
  55. create_lock_runtime(&futex_list_lock);
  56. lock(&futex_list_lock);
  57. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list)
  58. if (tmp->uaddr == uaddr) {
  59. futex = tmp;
  60. break;
  61. }
  62. if (futex) {
  63. hdl = container_of(futex, struct shim_handle, info.futex);
  64. get_handle(hdl);
  65. } else {
  66. if (!(hdl = get_new_handle())) {
  67. unlock(&futex_list_lock);
  68. return -ENOMEM;
  69. }
  70. hdl->type = TYPE_FUTEX;
  71. futex = &hdl->info.futex;
  72. futex->uaddr = uaddr;
  73. get_handle(hdl);
  74. INIT_LISTP(&futex->waiters);
  75. INIT_LIST_HEAD(futex, list);
  76. LISTP_ADD_TAIL(futex, &futex_list, list);
  77. }
  78. if (futex_op == FUTEX_WAKE_OP || futex_op == FUTEX_REQUEUE ||
  79. futex_op == FUTEX_CMP_REQUEUE) {
  80. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list)
  81. if (tmp->uaddr == uaddr2) {
  82. futex2 = tmp;
  83. break;
  84. }
  85. if (futex2) {
  86. hdl2 = container_of(futex2, struct shim_handle, info.futex);
  87. get_handle(hdl2);
  88. } else {
  89. if (!(hdl2 = get_new_handle())) {
  90. unlock(&futex_list_lock);
  91. return -ENOMEM;
  92. }
  93. hdl2->type = TYPE_FUTEX;
  94. futex2 = &hdl2->info.futex;
  95. futex2->uaddr = uaddr2;
  96. get_handle(hdl2);
  97. INIT_LISTP(&futex2->waiters);
  98. INIT_LIST_HEAD(futex2, list);
  99. LISTP_ADD_TAIL(futex2, &futex_list, list);
  100. }
  101. val2 = (uint32_t)(uint64_t) utime;
  102. }
  103. unlock(&futex_list_lock);
  104. lock(&hdl->lock);
  105. uint64_t timeout_us = NO_TIMEOUT;
  106. switch (futex_op) {
  107. case FUTEX_WAIT_BITSET:
  108. if (utime && timeout_us == NO_TIMEOUT) {
  109. struct timespec *ts = (struct timespec*) utime;
  110. // Round to microsecs
  111. timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
  112. /* Check for the CLOCK_REALTIME flag
  113. * DEP 1/28/17: Should really differentiate clocks, but
  114. * Graphene only has one for now.
  115. * if (futex_op & FUTEX_CLOCK_REALTIME) { */
  116. uint64_t current_time = DkSystemTimeQuery();
  117. if (current_time == 0) {
  118. ret = -EINVAL;
  119. break;
  120. }
  121. timeout_us -= current_time;
  122. }
  123. /* Note: for FUTEX_WAIT, timeout is interpreted as a relative
  124. * value. This differs from other futex operations, where
  125. * timeout is interpreted as an absolute value. To obtain the
  126. * equivalent of FUTEX_WAIT with an absolute timeout, employ
  127. * FUTEX_WAIT_BITSET with val3 specified as
  128. * FUTEX_BITSET_MATCH_ANY. */
  129. /* FALLTHROUGH */
  130. case FUTEX_WAIT:
  131. if (utime && timeout_us == NO_TIMEOUT) {
  132. struct timespec *ts = (struct timespec*) utime;
  133. // Round to microsecs
  134. timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
  135. }
  136. {
  137. uint32_t bitset = (futex_op == FUTEX_WAIT_BITSET) ? val3 :
  138. 0xffffffff;
  139. debug("FUTEX_WAIT: %p (val = %d) vs %d mask = %08x, timeout ptr %p\n",
  140. uaddr, *uaddr, val, bitset, utime);
  141. if (*uaddr != val) {
  142. ret = -EAGAIN;
  143. break;
  144. }
  145. struct futex_waiter waiter;
  146. thread_setwait(&waiter.thread, NULL);
  147. INIT_LIST_HEAD(&waiter, list);
  148. waiter.bitset = bitset;
  149. LISTP_ADD_TAIL(&waiter, &futex->waiters, list);
  150. unlock(&hdl->lock);
  151. ret = thread_sleep(timeout_us);
  152. /* DEP 1/28/17: Should return ETIMEDOUT, not EAGAIN, on timeout. */
  153. if (ret == -EAGAIN)
  154. ret = -ETIMEDOUT;
  155. if (ret == -ETIMEDOUT)
  156. LISTP_DEL(&waiter, &futex->waiters, list);
  157. lock(&hdl->lock);
  158. /* Chia-Che 10/17/17: FUTEX_WAKE should remove the waiter
  159. * from the list; if not, we should remove it now. */
  160. if (!LIST_EMPTY(&waiter, list))
  161. LISTP_DEL(&waiter, &futex->waiters, list);
  162. break;
  163. }
  164. case FUTEX_WAKE:
  165. case FUTEX_WAKE_BITSET: {
  166. struct futex_waiter * waiter, * wtmp;
  167. int nwaken = 0;
  168. uint32_t bitset = (futex_op == FUTEX_WAKE_BITSET) ? val3 :
  169. 0xffffffff;
  170. debug("FUTEX_WAKE: %p (val = %d) count = %d mask = %08x\n",
  171. uaddr, *uaddr, val, bitset);
  172. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  173. if (!(bitset & waiter->bitset))
  174. continue;
  175. debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n",
  176. waiter->thread->tid, uaddr, *uaddr);
  177. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  178. thread_wakeup(waiter->thread);
  179. nwaken++;
  180. if (nwaken >= val) break;
  181. }
  182. ret = nwaken;
  183. debug("FUTEX_WAKE done: %p (val = %d) woke %d threads\n", uaddr, *uaddr, ret);
  184. break;
  185. }
  186. case FUTEX_WAKE_OP: {
  187. assert(futex2);
  188. int oldval = *(int *) uaddr2, newval, cmpval;
  189. newval = (val3 >> 12) & 0xfff;
  190. switch ((val3 >> 28) & 0xf) {
  191. case FUTEX_OP_SET: break;
  192. case FUTEX_OP_ADD: newval = oldval + newval; break;
  193. case FUTEX_OP_OR: newval = oldval | newval; break;
  194. case FUTEX_OP_ANDN: newval = oldval & ~newval; break;
  195. case FUTEX_OP_XOR: newval = oldval ^ newval; break;
  196. }
  197. cmpval = val3 & 0xfff;
  198. switch ((val3 >> 24) & 0xf) {
  199. case FUTEX_OP_CMP_EQ: cmpval = (oldval == cmpval); break;
  200. case FUTEX_OP_CMP_NE: cmpval = (oldval != cmpval); break;
  201. case FUTEX_OP_CMP_LT: cmpval = (oldval < cmpval); break;
  202. case FUTEX_OP_CMP_LE: cmpval = (oldval <= cmpval); break;
  203. case FUTEX_OP_CMP_GT: cmpval = (oldval > cmpval); break;
  204. case FUTEX_OP_CMP_GE: cmpval = (oldval >= cmpval); break;
  205. }
  206. *(int *) uaddr2 = newval;
  207. struct futex_waiter * waiter, * wtmp;
  208. int nwaken = 0;
  209. debug("FUTEX_WAKE_OP: %p (val = %d) count = %d\n", uaddr, *uaddr, val);
  210. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  211. debug("FUTEX_WAKE_OP wake thread %d: %p (val = %d)\n",
  212. waiter->thread->tid, uaddr, *uaddr);
  213. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  214. thread_wakeup(waiter->thread);
  215. nwaken++;
  216. }
  217. if (cmpval) {
  218. unlock(&hdl->lock);
  219. put_handle(hdl);
  220. hdl = hdl2;
  221. lock(&hdl->lock);
  222. debug("FUTEX_WAKE: %p (val = %d) count = %d\n", uaddr2,
  223. *uaddr2, val2);
  224. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex2->waiters, list) {
  225. debug("FUTEX_WAKE_OP(2) wake thread %d: %p (val = %d)\n",
  226. waiter->thread->tid, uaddr2, *uaddr2);
  227. LISTP_DEL_INIT(waiter, &futex2->waiters, list);
  228. thread_wakeup(waiter->thread);
  229. nwaken++;
  230. }
  231. }
  232. ret = nwaken;
  233. break;
  234. }
  235. case FUTEX_CMP_REQUEUE:
  236. if (*uaddr != val3) {
  237. ret = -EAGAIN;
  238. break;
  239. }
  240. /* FALLTHROUGH */
  241. case FUTEX_REQUEUE: {
  242. assert(futex2);
  243. struct futex_waiter * waiter, * wtmp;
  244. int nwaken = 0;
  245. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  246. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  247. thread_wakeup(waiter->thread);
  248. nwaken++;
  249. if (nwaken >= val)
  250. break;
  251. }
  252. lock(&hdl2->lock);
  253. LISTP_SPLICE_INIT(&futex->waiters, &futex2->waiters, list, futex_waiter);
  254. unlock(&hdl2->lock);
  255. put_handle(hdl2);
  256. ret = nwaken;
  257. break;
  258. }
  259. case FUTEX_FD:
  260. ret = set_new_fd_handle(hdl, 0, NULL);
  261. break;
  262. default:
  263. debug("unsupported futex op: 0x%x\n", op);
  264. ret = -ENOSYS;
  265. break;
  266. }
  267. unlock(&hdl->lock);
  268. put_handle(hdl);
  269. return ret;
  270. }
  271. int shim_do_set_robust_list (struct robust_list_head * head, size_t len)
  272. {
  273. struct shim_thread * self = get_cur_thread();
  274. assert(self);
  275. if (len != sizeof(struct robust_list_head))
  276. return -EINVAL;
  277. self->robust_list = head;
  278. return 0;
  279. }
  280. int shim_do_get_robust_list (pid_t pid, struct robust_list_head ** head,
  281. size_t * len)
  282. {
  283. if (!head)
  284. return -EFAULT;
  285. struct shim_thread * thread;
  286. if (pid) {
  287. thread = lookup_thread(pid);
  288. if (!thread)
  289. return -ESRCH;
  290. } else {
  291. thread = get_cur_thread();
  292. }
  293. *head = (struct robust_list_head *) thread->robust_list;
  294. *len = sizeof(struct robust_list_head);
  295. return 0;
  296. }
  297. void release_robust_list (struct robust_list_head * head)
  298. {
  299. long futex_offset = head->futex_offset;
  300. struct robust_list * robust, * prev = &head->list;
  301. create_lock_runtime(&futex_list_lock);
  302. for (robust = prev->next ; robust && robust != prev ;
  303. prev = robust, robust = robust->next) {
  304. void * futex_addr = (void *) robust + futex_offset;
  305. struct shim_futex_handle * tmp, * futex = NULL;
  306. lock(&futex_list_lock);
  307. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list)
  308. if (tmp->uaddr == futex_addr) {
  309. futex = tmp;
  310. break;
  311. }
  312. unlock(&futex_list_lock);
  313. if (!futex)
  314. continue;
  315. struct futex_waiter * waiter, * wtmp;
  316. struct shim_handle * hdl =
  317. container_of(futex, struct shim_handle, info.futex);
  318. get_handle(hdl);
  319. lock(&hdl->lock);
  320. debug("release robust list: %p\n", futex_addr);
  321. *(int *) futex_addr = 0;
  322. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  323. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  324. thread_wakeup(waiter->thread);
  325. }
  326. unlock(&hdl->lock);
  327. put_handle(hdl);
  328. }
  329. }
  330. void release_clear_child_id (int * clear_child_tid)
  331. {
  332. debug("clear child tid at %p\n", clear_child_tid);
  333. *clear_child_tid = 0;
  334. create_lock_runtime(&futex_list_lock);
  335. struct shim_futex_handle * tmp, * futex = NULL;
  336. lock(&futex_list_lock);
  337. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list)
  338. if (tmp->uaddr == (void *) clear_child_tid) {
  339. futex = tmp;
  340. break;
  341. }
  342. unlock(&futex_list_lock);
  343. if (!futex)
  344. return;
  345. struct futex_waiter * waiter, * wtmp;
  346. struct shim_handle * hdl =
  347. container_of(futex, struct shim_handle, info.futex);
  348. get_handle(hdl);
  349. lock(&hdl->lock);
  350. debug("release futex at %p\n", clear_child_tid);
  351. *clear_child_tid = 0;
  352. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  353. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  354. thread_wakeup(waiter->thread);
  355. }
  356. unlock(&hdl->lock);
  357. put_handle(hdl);
  358. }