shim_futex.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_futex.c
  15. *
  16. * Implementation of system call "futex", "set_robust_list" and
  17. * "get_robust_list".
  18. */
  19. #include <asm/prctl.h>
  20. #include <errno.h>
  21. #include <linux/futex.h>
  22. #include <list.h>
  23. #include <pal.h>
  24. #include <pal_error.h>
  25. #include <shim_checkpoint.h>
  26. #include <shim_internal.h>
  27. #include <shim_table.h>
  28. #include <shim_thread.h>
  29. #include <shim_utils.h>
  30. #include <sys/mman.h>
  31. #include <sys/syscall.h>
  32. #define FUTEX_MIN_VALUE 0
  33. #define FUTEX_MAX_VALUE 255
  34. /* futex_waiters are linked off of shim_futex_handle by the waiters
  35. * listp */
  36. struct futex_waiter {
  37. struct shim_thread* thread;
  38. uint32_t bitset;
  39. LIST_TYPE(futex_waiter) list;
  40. };
  41. // Links shim_futex_handle by the list field
  42. DEFINE_LISTP(shim_futex_handle);
  43. static LISTP_TYPE(shim_futex_handle) futex_list = LISTP_INIT;
  44. static struct shim_lock futex_list_lock;
  45. int shim_do_futex(int* uaddr, int op, int val, void* utime, int* uaddr2, int val3) {
  46. struct shim_futex_handle* tmp = NULL;
  47. struct shim_futex_handle* futex = NULL;
  48. struct shim_futex_handle* futex2 = NULL;
  49. struct shim_handle* hdl = NULL;
  50. struct shim_handle* hdl2 = NULL;
  51. uint32_t futex_op = (op & FUTEX_CMD_MASK);
  52. uint32_t val2 = 0;
  53. int ret = 0;
  54. if (!uaddr || ((uintptr_t)uaddr % sizeof(unsigned int)))
  55. return -EINVAL;
  56. create_lock_runtime(&futex_list_lock);
  57. lock(&futex_list_lock);
  58. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
  59. if (tmp->uaddr == uaddr) {
  60. futex = tmp;
  61. break;
  62. }
  63. }
  64. if (futex) {
  65. hdl = container_of(futex, struct shim_handle, info.futex);
  66. get_handle(hdl);
  67. } else {
  68. if (!(hdl = get_new_handle())) {
  69. unlock(&futex_list_lock);
  70. return -ENOMEM;
  71. }
  72. hdl->type = TYPE_FUTEX;
  73. futex = &hdl->info.futex;
  74. futex->uaddr = uaddr;
  75. get_handle(hdl);
  76. INIT_LISTP(&futex->waiters);
  77. INIT_LIST_HEAD(futex, list);
  78. LISTP_ADD_TAIL(futex, &futex_list, list);
  79. }
  80. if (futex_op == FUTEX_WAKE_OP || futex_op == FUTEX_REQUEUE || futex_op == FUTEX_CMP_REQUEUE) {
  81. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
  82. if (tmp->uaddr == uaddr2) {
  83. futex2 = tmp;
  84. break;
  85. }
  86. }
  87. if (futex2) {
  88. hdl2 = container_of(futex2, struct shim_handle, info.futex);
  89. get_handle(hdl2);
  90. } else {
  91. if (!(hdl2 = get_new_handle())) {
  92. unlock(&futex_list_lock);
  93. return -ENOMEM;
  94. }
  95. hdl2->type = TYPE_FUTEX;
  96. futex2 = &hdl2->info.futex;
  97. futex2->uaddr = uaddr2;
  98. get_handle(hdl2);
  99. INIT_LISTP(&futex2->waiters);
  100. INIT_LIST_HEAD(futex2, list);
  101. LISTP_ADD_TAIL(futex2, &futex_list, list);
  102. }
  103. val2 = (uint32_t)(uint64_t)utime;
  104. }
  105. unlock(&futex_list_lock);
  106. lock(&hdl->lock);
  107. uint64_t timeout_us = NO_TIMEOUT;
  108. switch (futex_op) {
  109. case FUTEX_WAIT_BITSET:
  110. if (utime && timeout_us == NO_TIMEOUT) {
  111. struct timespec* ts = (struct timespec*)utime;
  112. // Round to microsecs
  113. timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
  114. /* Check for the CLOCK_REALTIME flag
  115. * DEP 1/28/17: Should really differentiate clocks, but
  116. * Graphene only has one for now.
  117. * if (futex_op & FUTEX_CLOCK_REALTIME) { */
  118. uint64_t current_time = DkSystemTimeQuery();
  119. if (current_time == 0) {
  120. ret = -EINVAL;
  121. break;
  122. }
  123. timeout_us -= current_time;
  124. }
  125. /* Note: for FUTEX_WAIT, timeout is interpreted as a relative
  126. * value. This differs from other futex operations, where
  127. * timeout is interpreted as an absolute value. To obtain the
  128. * equivalent of FUTEX_WAIT with an absolute timeout, employ
  129. * FUTEX_WAIT_BITSET with val3 specified as
  130. * FUTEX_BITSET_MATCH_ANY. */
  131. /* FALLTHROUGH */
  132. case FUTEX_WAIT:
  133. if (utime && timeout_us == NO_TIMEOUT) {
  134. struct timespec* ts = (struct timespec*)utime;
  135. // Round to microsecs
  136. timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
  137. }
  138. {
  139. uint32_t bitset = (futex_op == FUTEX_WAIT_BITSET) ? (uint32_t)val3 : 0xffffffff;
  140. debug("FUTEX_WAIT: %p (val = %d) vs %d mask = %08x, timeout ptr %p\n", uaddr,
  141. *uaddr, val, bitset, utime);
  142. if (*uaddr != val) {
  143. ret = -EAGAIN;
  144. break;
  145. }
  146. struct futex_waiter waiter;
  147. thread_setwait(&waiter.thread, NULL);
  148. INIT_LIST_HEAD(&waiter, list);
  149. waiter.bitset = bitset;
  150. LISTP_ADD_TAIL(&waiter, &futex->waiters, list);
  151. unlock(&hdl->lock);
  152. ret = thread_sleep(timeout_us);
  153. /* DEP 1/28/17: Should return ETIMEDOUT, not EAGAIN, on timeout. */
  154. if (ret == -EAGAIN)
  155. ret = -ETIMEDOUT;
  156. if (ret == -ETIMEDOUT)
  157. LISTP_DEL(&waiter, &futex->waiters, list);
  158. lock(&hdl->lock);
  159. /* Chia-Che 10/17/17: FUTEX_WAKE should remove the waiter
  160. * from the list; if not, we should remove it now. */
  161. if (!LIST_EMPTY(&waiter, list))
  162. LISTP_DEL(&waiter, &futex->waiters, list);
  163. break;
  164. }
  165. case FUTEX_WAKE:
  166. case FUTEX_WAKE_BITSET: {
  167. struct futex_waiter* waiter;
  168. struct futex_waiter* wtmp;
  169. int nwaken = 0;
  170. uint32_t bitset = (futex_op == FUTEX_WAKE_BITSET) ? (uint32_t)val3 : 0xffffffff;
  171. debug("FUTEX_WAKE: %p (val = %d) count = %d mask = %08x\n", uaddr, *uaddr, val, bitset);
  172. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  173. if (!(bitset & waiter->bitset))
  174. continue;
  175. debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n", waiter->thread->tid, uaddr,
  176. *uaddr);
  177. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  178. thread_wakeup(waiter->thread);
  179. nwaken++;
  180. if (nwaken >= val)
  181. break;
  182. }
  183. ret = nwaken;
  184. debug("FUTEX_WAKE done: %p (val = %d) woke %d threads\n", uaddr, *uaddr, ret);
  185. break;
  186. }
  187. case FUTEX_WAKE_OP: {
  188. assert(futex2);
  189. int oldval = *(int*)uaddr2, newval, cmpval;
  190. newval = (val3 >> 12) & 0xfff;
  191. switch ((val3 >> 28) & 0xf) {
  192. case FUTEX_OP_SET:
  193. break;
  194. case FUTEX_OP_ADD:
  195. newval = oldval + newval;
  196. break;
  197. case FUTEX_OP_OR:
  198. newval = oldval | newval;
  199. break;
  200. case FUTEX_OP_ANDN:
  201. newval = oldval & ~newval;
  202. break;
  203. case FUTEX_OP_XOR:
  204. newval = oldval ^ newval;
  205. break;
  206. }
  207. cmpval = val3 & 0xfff;
  208. switch ((val3 >> 24) & 0xf) {
  209. case FUTEX_OP_CMP_EQ:
  210. cmpval = (oldval == cmpval);
  211. break;
  212. case FUTEX_OP_CMP_NE:
  213. cmpval = (oldval != cmpval);
  214. break;
  215. case FUTEX_OP_CMP_LT:
  216. cmpval = (oldval < cmpval);
  217. break;
  218. case FUTEX_OP_CMP_LE:
  219. cmpval = (oldval <= cmpval);
  220. break;
  221. case FUTEX_OP_CMP_GT:
  222. cmpval = (oldval > cmpval);
  223. break;
  224. case FUTEX_OP_CMP_GE:
  225. cmpval = (oldval >= cmpval);
  226. break;
  227. }
  228. *(int*)uaddr2 = newval;
  229. struct futex_waiter* waiter;
  230. struct futex_waiter* wtmp;
  231. int nwaken = 0;
  232. debug("FUTEX_WAKE_OP: %p (val = %d) count = %d\n", uaddr, *uaddr, val);
  233. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  234. debug("FUTEX_WAKE_OP wake thread %d: %p (val = %d)\n", waiter->thread->tid, uaddr,
  235. *uaddr);
  236. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  237. thread_wakeup(waiter->thread);
  238. nwaken++;
  239. }
  240. if (cmpval) {
  241. unlock(&hdl->lock);
  242. put_handle(hdl);
  243. hdl = hdl2;
  244. lock(&hdl->lock);
  245. debug("FUTEX_WAKE: %p (val = %d) count = %d\n", uaddr2, *uaddr2, val2);
  246. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex2->waiters, list) {
  247. debug("FUTEX_WAKE_OP(2) wake thread %d: %p (val = %d)\n", waiter->thread->tid,
  248. uaddr2, *uaddr2);
  249. LISTP_DEL_INIT(waiter, &futex2->waiters, list);
  250. thread_wakeup(waiter->thread);
  251. nwaken++;
  252. }
  253. }
  254. ret = nwaken;
  255. break;
  256. }
  257. case FUTEX_CMP_REQUEUE:
  258. if (*uaddr != val3) {
  259. ret = -EAGAIN;
  260. break;
  261. }
  262. /* FALLTHROUGH */
  263. case FUTEX_REQUEUE: {
  264. assert(futex2);
  265. struct futex_waiter* waiter;
  266. struct futex_waiter* wtmp;
  267. int nwaken = 0;
  268. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  269. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  270. thread_wakeup(waiter->thread);
  271. nwaken++;
  272. if (nwaken >= val)
  273. break;
  274. }
  275. lock(&hdl2->lock);
  276. LISTP_SPLICE_INIT(&futex->waiters, &futex2->waiters, list, futex_waiter);
  277. unlock(&hdl2->lock);
  278. put_handle(hdl2);
  279. ret = nwaken;
  280. break;
  281. }
  282. case FUTEX_FD:
  283. ret = set_new_fd_handle(hdl, 0, NULL);
  284. break;
  285. default:
  286. debug("unsupported futex op: 0x%x\n", op);
  287. ret = -ENOSYS;
  288. break;
  289. }
  290. unlock(&hdl->lock);
  291. put_handle(hdl);
  292. return ret;
  293. }
  294. int shim_do_set_robust_list(struct robust_list_head* head, size_t len) {
  295. struct shim_thread* self = get_cur_thread();
  296. assert(self);
  297. if (len != sizeof(struct robust_list_head))
  298. return -EINVAL;
  299. self->robust_list = head;
  300. return 0;
  301. }
  302. int shim_do_get_robust_list(pid_t pid, struct robust_list_head** head, size_t* len) {
  303. if (!head)
  304. return -EFAULT;
  305. struct shim_thread* thread;
  306. if (pid) {
  307. thread = lookup_thread(pid);
  308. if (!thread)
  309. return -ESRCH;
  310. } else {
  311. thread = get_cur_thread();
  312. }
  313. *head = (struct robust_list_head*)thread->robust_list;
  314. *len = sizeof(struct robust_list_head);
  315. return 0;
  316. }
  317. void release_robust_list(struct robust_list_head* head) {
  318. long futex_offset = head->futex_offset;
  319. struct robust_list* robust;
  320. struct robust_list* prev = &head->list;
  321. create_lock_runtime(&futex_list_lock);
  322. for (robust = prev->next; robust && robust != prev; prev = robust, robust = robust->next) {
  323. void* futex_addr = (void*)robust + futex_offset;
  324. struct shim_futex_handle* tmp;
  325. struct shim_futex_handle* futex = NULL;
  326. lock(&futex_list_lock);
  327. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
  328. if (tmp->uaddr == futex_addr) {
  329. futex = tmp;
  330. break;
  331. }
  332. }
  333. unlock(&futex_list_lock);
  334. if (!futex)
  335. continue;
  336. struct futex_waiter* waiter;
  337. struct futex_waiter* wtmp;
  338. struct shim_handle* hdl = container_of(futex, struct shim_handle, info.futex);
  339. get_handle(hdl);
  340. lock(&hdl->lock);
  341. debug("release robust list: %p\n", futex_addr);
  342. *(int*)futex_addr = 0;
  343. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  344. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  345. thread_wakeup(waiter->thread);
  346. }
  347. unlock(&hdl->lock);
  348. put_handle(hdl);
  349. }
  350. }
  351. void release_clear_child_id(int* clear_child_tid) {
  352. debug("clear child tid at %p\n", clear_child_tid);
  353. *clear_child_tid = 0;
  354. create_lock_runtime(&futex_list_lock);
  355. struct shim_futex_handle* tmp;
  356. struct shim_futex_handle* futex = NULL;
  357. lock(&futex_list_lock);
  358. LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
  359. if (tmp->uaddr == (void*)clear_child_tid) {
  360. futex = tmp;
  361. break;
  362. }
  363. }
  364. unlock(&futex_list_lock);
  365. if (!futex)
  366. return;
  367. struct futex_waiter* waiter;
  368. struct futex_waiter* wtmp;
  369. struct shim_handle* hdl = container_of(futex, struct shim_handle, info.futex);
  370. get_handle(hdl);
  371. lock(&hdl->lock);
  372. debug("release futex at %p\n", clear_child_tid);
  373. *clear_child_tid = 0;
  374. LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
  375. LISTP_DEL_INIT(waiter, &futex->waiters, list);
  376. thread_wakeup(waiter->thread);
  377. }
  378. unlock(&hdl->lock);
  379. put_handle(hdl);
  380. }