shim_futex.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 OSCAR lab, Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_futex.c
  17. *
  18. * Implementation of system call "futex", "set_robust_list" and
  19. * "get_robust_list".
  20. */
  21. #include <shim_internal.h>
  22. #include <shim_table.h>
  23. #include <shim_thread.h>
  24. #include <shim_checkpoint.h>
  25. #include <shim_utils.h>
  26. #include <pal.h>
  27. #include <pal_error.h>
  28. #include <linux_list.h>
  29. #include <sys/syscall.h>
  30. #include <sys/mman.h>
  31. #include <asm/prctl.h>
  32. #include <linux/futex.h>
  33. #include <errno.h>
  34. #define FUTEX_MIN_VALUE 0
  35. #define FUTEX_MAX_VALUE 255
  36. struct futex_waiter {
  37. struct shim_thread * thread;
  38. uint32_t bitset;
  39. struct list_head list;
  40. };
  41. static LIST_HEAD(futex_list);
  42. static LOCKTYPE futex_list_lock;
  43. int shim_do_futex (unsigned int * uaddr, int op, int val, void * utime,
  44. unsigned int * uaddr2, int val3)
  45. {
  46. struct shim_futex_handle * tmp = NULL, * futex = NULL, * futex2 = NULL;
  47. struct shim_handle * hdl = NULL, * hdl2 = NULL;
  48. uint32_t futex_op = (op & FUTEX_CMD_MASK);
  49. uint32_t val2 = 0;
  50. int ret = 0;
  51. if (!uaddr || ((uintptr_t) uaddr % sizeof(unsigned int)))
  52. return -EINVAL;
  53. create_lock_runtime(&futex_list_lock);
  54. lock(futex_list_lock);
  55. list_for_each_entry(tmp, &futex_list, list)
  56. if (tmp->uaddr == uaddr) {
  57. futex = tmp;
  58. break;
  59. }
  60. if (futex) {
  61. hdl = container_of(futex, struct shim_handle, info.futex);
  62. get_handle(hdl);
  63. } else {
  64. if (!(hdl = get_new_handle())) {
  65. unlock(futex_list_lock);
  66. return -ENOMEM;
  67. }
  68. hdl->type = TYPE_FUTEX;
  69. futex = &hdl->info.futex;
  70. futex->uaddr = uaddr;
  71. get_handle(hdl);
  72. INIT_LIST_HEAD(&futex->waiters);
  73. INIT_LIST_HEAD(&futex->list);
  74. list_add_tail(&futex->list, &futex_list);
  75. }
  76. if (futex_op == FUTEX_WAKE_OP || futex_op == FUTEX_REQUEUE) {
  77. list_for_each_entry(tmp, &futex_list, list)
  78. if (tmp->uaddr == uaddr2) {
  79. futex2 = tmp;
  80. break;
  81. }
  82. if (futex2) {
  83. hdl2 = container_of(futex2, struct shim_handle, info.futex);
  84. get_handle(hdl2);
  85. } else {
  86. if (!(hdl2 = get_new_handle())) {
  87. unlock(futex_list_lock);
  88. return -ENOMEM;
  89. }
  90. hdl2->type = TYPE_FUTEX;
  91. futex2 = &hdl2->info.futex;
  92. futex2->uaddr = uaddr2;
  93. get_handle(hdl2);
  94. INIT_LIST_HEAD(&futex2->waiters);
  95. INIT_LIST_HEAD(&futex2->list);
  96. list_add_tail(&futex2->list, &futex_list);
  97. }
  98. val2 = (uint32_t)(uint64_t) utime;
  99. }
  100. unlock(futex_list_lock);
  101. lock(hdl->lock);
  102. switch (futex_op) {
  103. case FUTEX_WAIT:
  104. case FUTEX_WAIT_BITSET: {
  105. uint32_t bitset = (futex_op == FUTEX_WAIT_BITSET) ? val3 :
  106. 0xffffffff;
  107. uint64_t timeout_us = NO_TIMEOUT;
  108. debug("FUTEX_WAIT: %p (val = %d) vs %d mask = %08x, timeout ptr %p\n",
  109. uaddr, *uaddr, val, bitset, utime);
  110. if (*uaddr != val) {
  111. ret = -EAGAIN;
  112. break;
  113. }
  114. struct futex_waiter waiter;
  115. thread_setwait(&waiter.thread, NULL);
  116. INIT_LIST_HEAD(&waiter.list);
  117. waiter.bitset = bitset;
  118. list_add_tail(&waiter.list, &futex->waiters);
  119. unlock(hdl->lock);
  120. if (utime) {
  121. struct timespec *ts = (struct timespec*) utime;
  122. // Round to microsecs
  123. timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
  124. // Check for the CLOCK_REALTIME flag
  125. if (futex_op == FUTEX_WAIT_BITSET) {
  126. // DEP 1/28/17: Should really differentiate clocks, but
  127. // Graphene only has one for now.
  128. //&& 0 != (op & FUTEX_CLOCK_REALTIME)) {
  129. uint64_t current_time = DkSystemTimeQuery();
  130. if (current_time == 0) {
  131. ret = -EINVAL;
  132. break;
  133. }
  134. timeout_us -= current_time;
  135. }
  136. }
  137. ret = thread_sleep(timeout_us);
  138. /* DEP 1/28/17: Should return ETIMEDOUT, not EAGAIN, on timeout. */
  139. if (ret == -EAGAIN)
  140. ret = -ETIMEDOUT;
  141. lock(hdl->lock);
  142. break;
  143. }
  144. case FUTEX_WAKE:
  145. case FUTEX_WAKE_BITSET: {
  146. uint32_t bitset = (futex_op == FUTEX_WAKE_BITSET) ? val3 :
  147. 0xffffffff;
  148. struct list_head *cursor;
  149. debug("FUTEX_WAKE: %p (val = %d) count = %d mask = %08x\n",
  150. uaddr, *uaddr, val, bitset);
  151. int cnt, nwaken = 0;
  152. list_for_each(cursor, &futex->waiters) {
  153. struct futex_waiter * waiter = list_entry(cursor,
  154. struct futex_waiter,
  155. list);
  156. if (!(bitset & waiter->bitset))
  157. continue;
  158. debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n",
  159. waiter->thread->tid, uaddr, *uaddr);
  160. list_del(&waiter->list);
  161. thread_wakeup(waiter->thread);
  162. nwaken++;
  163. if (nwaken >= val) break;
  164. }
  165. ret = nwaken;
  166. debug("FUTEX_WAKE done: %p (val = %d) woke %d threads\n", uaddr, *uaddr, ret);
  167. break;
  168. }
  169. case FUTEX_WAKE_OP: {
  170. assert(futex2);
  171. int oldval = *(int *) uaddr2, newval, cmpval;
  172. newval = (val3 >> 12) & 0xfff;
  173. switch ((val3 >> 28) & 0xf) {
  174. case FUTEX_OP_SET: break;
  175. case FUTEX_OP_ADD: newval = oldval + newval; break;
  176. case FUTEX_OP_OR: newval = oldval | newval; break;
  177. case FUTEX_OP_ANDN: newval = oldval & ~newval; break;
  178. case FUTEX_OP_XOR: newval = oldval ^ newval; break;
  179. }
  180. cmpval = val3 & 0xfff;
  181. switch ((val3 >> 24) & 0xf) {
  182. case FUTEX_OP_CMP_EQ: cmpval = (oldval == cmpval); break;
  183. case FUTEX_OP_CMP_NE: cmpval = (oldval != cmpval); break;
  184. case FUTEX_OP_CMP_LT: cmpval = (oldval < cmpval); break;
  185. case FUTEX_OP_CMP_LE: cmpval = (oldval <= cmpval); break;
  186. case FUTEX_OP_CMP_GT: cmpval = (oldval > cmpval); break;
  187. case FUTEX_OP_CMP_GE: cmpval = (oldval >= cmpval); break;
  188. }
  189. *(int *) uaddr2 = newval;
  190. int cnt, nwaken = 0;
  191. debug("FUTEX_WAKE: %p (val = %d) count = %d\n", uaddr, *uaddr, val);
  192. for (cnt = 0 ; cnt < val ; cnt++) {
  193. if (list_empty(&futex->waiters))
  194. break;
  195. struct futex_waiter * waiter = list_entry(futex->waiters.next,
  196. struct futex_waiter,
  197. list);
  198. debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n",
  199. waiter->thread->tid, uaddr, *uaddr);
  200. list_del(&waiter->list);
  201. thread_wakeup(waiter->thread);
  202. nwaken++;
  203. }
  204. if (cmpval) {
  205. unlock(hdl->lock);
  206. put_handle(hdl);
  207. hdl = hdl2;
  208. lock(hdl->lock);
  209. debug("FUTEX_WAKE: %p (val = %d) count = %d\n", uaddr2,
  210. *uaddr2, val2);
  211. for (cnt = 0 ; cnt < val2 ; cnt++) {
  212. if (list_empty(&futex2->waiters))
  213. break;
  214. struct futex_waiter * waiter = list_entry(futex2->waiters.next,
  215. struct futex_waiter,
  216. list);
  217. debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n",
  218. waiter->thread->tid, uaddr2, *uaddr2);
  219. list_del(&waiter->list);
  220. thread_wakeup(waiter->thread);
  221. nwaken++;
  222. }
  223. }
  224. ret = nwaken;
  225. break;
  226. }
  227. case FUTEX_CMP_REQUEUE:
  228. if (*uaddr != val3) {
  229. ret = -EAGAIN;
  230. break;
  231. }
  232. case FUTEX_REQUEUE: {
  233. assert(futex2);
  234. int cnt;
  235. for (cnt = 0 ; cnt < val ; cnt++) {
  236. if (list_empty(&futex->waiters))
  237. break;
  238. struct futex_waiter * waiter = list_entry(futex->waiters.next,
  239. struct futex_waiter,
  240. list);
  241. list_del(&waiter->list);
  242. thread_wakeup(waiter->thread);
  243. }
  244. lock(hdl2->lock);
  245. list_splice_init(&futex->waiters, &futex2->waiters);
  246. unlock(hdl2->lock);
  247. put_handle(hdl2);
  248. ret = cnt;
  249. break;
  250. }
  251. case FUTEX_FD:
  252. ret = set_new_fd_handle(hdl, 0, NULL);
  253. break;
  254. default:
  255. debug("unsupported futex op: 0x%x\n", op);
  256. ret = -ENOSYS;
  257. break;
  258. }
  259. unlock(hdl->lock);
  260. put_handle(hdl);
  261. return ret;
  262. }
  263. int shim_do_set_robust_list (struct robust_list_head * head, size_t len)
  264. {
  265. struct shim_thread * self = get_cur_thread();
  266. assert(self);
  267. if (len != sizeof(struct robust_list_head))
  268. return -EINVAL;
  269. self->robust_list = head;
  270. return 0;
  271. }
  272. int shim_do_get_robust_list (pid_t pid, struct robust_list_head ** head,
  273. size_t * len)
  274. {
  275. if (!head)
  276. return -EFAULT;
  277. struct shim_thread * thread;
  278. if (pid) {
  279. thread = lookup_thread(pid);
  280. if (!thread)
  281. return -ESRCH;
  282. } else {
  283. thread = get_cur_thread();
  284. }
  285. *head = (struct robust_list_head *) thread->robust_list;
  286. *len = sizeof(struct robust_list_head);
  287. return 0;
  288. }
  289. void release_robust_list (struct robust_list_head * head)
  290. {
  291. long futex_offset = head->futex_offset;
  292. struct robust_list * robust, * prev = &head->list;
  293. create_lock_runtime(&futex_list_lock);
  294. for (robust = prev->next ; robust && robust != prev ;
  295. prev = robust, robust = robust->next) {
  296. void * futex_addr = (void *) robust + futex_offset;
  297. struct shim_futex_handle * tmp, * futex = NULL;
  298. lock(futex_list_lock);
  299. list_for_each_entry(tmp, &futex_list, list)
  300. if (tmp->uaddr == futex_addr) {
  301. futex = tmp;
  302. break;
  303. }
  304. unlock(futex_list_lock);
  305. if (!futex)
  306. continue;
  307. struct shim_handle * hdl =
  308. container_of(futex, struct shim_handle, info.futex);
  309. get_handle(hdl);
  310. lock(hdl->lock);
  311. debug("release robust list: %p\n", futex_addr);
  312. *(int *) futex_addr = 0;
  313. while (!list_empty(&futex->waiters)) {
  314. struct futex_waiter * waiter = list_entry(futex->waiters.next,
  315. struct futex_waiter,
  316. list);
  317. list_del(&waiter->list);
  318. thread_wakeup(waiter->thread);
  319. }
  320. unlock(hdl->lock);
  321. put_handle(hdl);
  322. }
  323. }
  324. void release_clear_child_id (int * clear_child_tid)
  325. {
  326. debug("clear child tid at %p\n", clear_child_tid);
  327. *clear_child_tid = 0;
  328. create_lock_runtime(&futex_list_lock);
  329. struct shim_futex_handle * tmp, * futex = NULL;
  330. lock(futex_list_lock);
  331. list_for_each_entry(tmp, &futex_list, list)
  332. if (tmp->uaddr == (void *) clear_child_tid) {
  333. futex = tmp;
  334. break;
  335. }
  336. unlock(futex_list_lock);
  337. if (!futex)
  338. return;
  339. struct shim_handle * hdl =
  340. container_of(futex, struct shim_handle, info.futex);
  341. get_handle(hdl);
  342. lock(hdl->lock);
  343. debug("release futex at %p\n", clear_child_tid);
  344. *clear_child_tid = 0;
  345. while (!list_empty(&futex->waiters)) {
  346. struct futex_waiter * waiter = list_entry(futex->waiters.next,
  347. struct futex_waiter,
  348. list);
  349. list_del(&waiter->list);
  350. thread_wakeup(waiter->thread);
  351. }
  352. unlock(hdl->lock);
  353. put_handle(hdl);
  354. }