123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487 |
- /* Copyright (C) 2014 Stony Brook University
- This file is part of Graphene Library OS.
- Graphene Library OS is free software: you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public License
- as published by the Free Software Foundation, either version 3 of the
- License, or (at your option) any later version.
- Graphene Library OS is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Lesser General Public License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
- /*
- * shim_futex.c
- *
- * Implementation of system call "futex", "set_robust_list" and
- * "get_robust_list".
- */
- #include <asm/prctl.h>
- #include <errno.h>
- #include <linux/futex.h>
- #include <list.h>
- #include <pal.h>
- #include <pal_error.h>
- #include <shim_checkpoint.h>
- #include <shim_internal.h>
- #include <shim_table.h>
- #include <shim_thread.h>
- #include <shim_utils.h>
- #include <sys/mman.h>
- #include <sys/syscall.h>
- #define FUTEX_MIN_VALUE 0
- #define FUTEX_MAX_VALUE 255
- /* futex_waiters are linked off of shim_futex_handle by the waiters
- * listp */
- struct futex_waiter {
- struct shim_thread* thread;
- uint32_t bitset;
- LIST_TYPE(futex_waiter) list;
- };
- // Links shim_futex_handle by the list field
- DEFINE_LISTP(shim_futex_handle);
- static LISTP_TYPE(shim_futex_handle) futex_list = LISTP_INIT;
- static struct shim_lock futex_list_lock;
- static void add_futex_waiter(struct futex_waiter* waiter,
- struct shim_futex_handle* futex,
- uint32_t bitset) {
- thread_setwait(&waiter->thread, NULL);
- INIT_LIST_HEAD(waiter, list);
- waiter->bitset = bitset;
- LISTP_ADD_TAIL(waiter, &futex->waiters, list);
- }
- static void del_futex_waiter(struct futex_waiter* waiter, struct shim_futex_handle* futex) {
- LISTP_DEL_INIT(waiter, &futex->waiters, list);
- assert(waiter->thread);
- put_thread(waiter->thread);
- }
- static void del_futex_waiter_wakeup(struct futex_waiter* waiter, struct shim_futex_handle* futex) {
- LISTP_DEL_INIT(waiter, &futex->waiters, list);
- assert(waiter->thread);
- thread_wakeup(waiter->thread);
- put_thread(waiter->thread);
- }
- int shim_do_futex(int* uaddr, int op, int val, void* utime, int* uaddr2, int val3) {
- struct shim_futex_handle* tmp = NULL;
- struct shim_futex_handle* futex = NULL;
- struct shim_futex_handle* futex2 = NULL;
- struct shim_handle* hdl = NULL;
- struct shim_handle* hdl2 = NULL;
- uint32_t futex_op = (op & FUTEX_CMD_MASK);
- uint32_t val2 = 0;
- int ret = 0;
- if (!uaddr || !IS_ALIGNED_PTR(uaddr, sizeof(unsigned int)))
- return -EINVAL;
- create_lock_runtime(&futex_list_lock);
- lock(&futex_list_lock);
- LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
- if (tmp->uaddr == uaddr) {
- futex = tmp;
- break;
- }
- }
- if (futex) {
- hdl = container_of(futex, struct shim_handle, info.futex);
- get_handle(hdl);
- } else {
- if (!(hdl = get_new_handle())) {
- unlock(&futex_list_lock);
- return -ENOMEM;
- }
- hdl->type = TYPE_FUTEX;
- futex = &hdl->info.futex;
- futex->uaddr = uaddr;
- get_handle(hdl);
- INIT_LISTP(&futex->waiters);
- INIT_LIST_HEAD(futex, list);
- LISTP_ADD_TAIL(futex, &futex_list, list);
- }
- if (futex_op == FUTEX_WAKE_OP || futex_op == FUTEX_REQUEUE || futex_op == FUTEX_CMP_REQUEUE) {
- LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
- if (tmp->uaddr == uaddr2) {
- futex2 = tmp;
- break;
- }
- }
- if (futex2) {
- hdl2 = container_of(futex2, struct shim_handle, info.futex);
- get_handle(hdl2);
- } else {
- if (!(hdl2 = get_new_handle())) {
- unlock(&futex_list_lock);
- return -ENOMEM;
- }
- hdl2->type = TYPE_FUTEX;
- futex2 = &hdl2->info.futex;
- futex2->uaddr = uaddr2;
- get_handle(hdl2);
- INIT_LISTP(&futex2->waiters);
- INIT_LIST_HEAD(futex2, list);
- LISTP_ADD_TAIL(futex2, &futex_list, list);
- }
- val2 = (uint32_t)(uint64_t)utime;
- }
- unlock(&futex_list_lock);
- lock(&hdl->lock);
- uint64_t timeout_us = NO_TIMEOUT;
- switch (futex_op) {
- case FUTEX_WAIT_BITSET:
- if (utime && timeout_us == NO_TIMEOUT) {
- struct timespec* ts = (struct timespec*)utime;
- // Round to microsecs
- timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
- /* Check for the CLOCK_REALTIME flag
- * DEP 1/28/17: Should really differentiate clocks, but
- * Graphene only has one for now.
- * if (futex_op & FUTEX_CLOCK_REALTIME) { */
- uint64_t current_time = DkSystemTimeQuery();
- if (current_time == 0) {
- ret = -EINVAL;
- break;
- }
- timeout_us -= current_time;
- }
- /* Note: for FUTEX_WAIT, timeout is interpreted as a relative
- * value. This differs from other futex operations, where
- * timeout is interpreted as an absolute value. To obtain the
- * equivalent of FUTEX_WAIT with an absolute timeout, employ
- * FUTEX_WAIT_BITSET with val3 specified as
- * FUTEX_BITSET_MATCH_ANY. */
- /* FALLTHROUGH */
- case FUTEX_WAIT:
- if (utime && timeout_us == NO_TIMEOUT) {
- struct timespec* ts = (struct timespec*)utime;
- // Round to microsecs
- timeout_us = (ts->tv_sec * 1000000) + (ts->tv_nsec / 1000);
- }
- {
- uint32_t bitset = (futex_op == FUTEX_WAIT_BITSET) ? (uint32_t)val3 : 0xffffffff;
- debug("FUTEX_WAIT: %p (val = %d) vs %d mask = %08x, timeout ptr %p\n", uaddr,
- *uaddr, val, bitset, utime);
- if (*uaddr != val) {
- ret = -EAGAIN;
- break;
- }
- struct futex_waiter waiter = { 0 };
- add_futex_waiter(&waiter, futex, bitset);
- unlock(&hdl->lock);
- ret = thread_sleep(timeout_us);
- /* DEP 1/28/17: Should return ETIMEDOUT, not EAGAIN, on timeout. */
- if (ret == -EAGAIN)
- ret = -ETIMEDOUT;
- lock(&hdl->lock);
- /* Chia-Che 10/17/17: FUTEX_WAKE should remove the waiter
- * from the list; if not, we should remove it now. */
- if (!LIST_EMPTY(&waiter, list)) {
- del_futex_waiter(&waiter, futex);
- }
- break;
- }
- case FUTEX_WAKE:
- case FUTEX_WAKE_BITSET: {
- struct futex_waiter* waiter;
- struct futex_waiter* wtmp;
- int nwaken = 0;
- uint32_t bitset = (futex_op == FUTEX_WAKE_BITSET) ? (uint32_t)val3 : 0xffffffff;
- debug("FUTEX_WAKE: %p (val = %d) count = %d mask = %08x\n", uaddr, *uaddr, val, bitset);
- LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
- if (!(bitset & waiter->bitset))
- continue;
- debug("FUTEX_WAKE wake thread %d: %p (val = %d)\n", waiter->thread->tid, uaddr,
- *uaddr);
- del_futex_waiter_wakeup(waiter, futex);
- nwaken++;
- if (nwaken >= val)
- break;
- }
- ret = nwaken;
- debug("FUTEX_WAKE done: %p (val = %d) woke %d threads\n", uaddr, *uaddr, ret);
- break;
- }
- case FUTEX_WAKE_OP: {
- assert(futex2);
- int oldval = *(int*)uaddr2, newval, cmpval;
- newval = (val3 >> 12) & 0xfff;
- switch ((val3 >> 28) & 0xf) {
- case FUTEX_OP_SET:
- break;
- case FUTEX_OP_ADD:
- newval = oldval + newval;
- break;
- case FUTEX_OP_OR:
- newval = oldval | newval;
- break;
- case FUTEX_OP_ANDN:
- newval = oldval & ~newval;
- break;
- case FUTEX_OP_XOR:
- newval = oldval ^ newval;
- break;
- }
- cmpval = val3 & 0xfff;
- switch ((val3 >> 24) & 0xf) {
- case FUTEX_OP_CMP_EQ:
- cmpval = (oldval == cmpval);
- break;
- case FUTEX_OP_CMP_NE:
- cmpval = (oldval != cmpval);
- break;
- case FUTEX_OP_CMP_LT:
- cmpval = (oldval < cmpval);
- break;
- case FUTEX_OP_CMP_LE:
- cmpval = (oldval <= cmpval);
- break;
- case FUTEX_OP_CMP_GT:
- cmpval = (oldval > cmpval);
- break;
- case FUTEX_OP_CMP_GE:
- cmpval = (oldval >= cmpval);
- break;
- }
- *(int*)uaddr2 = newval;
- struct futex_waiter* waiter;
- struct futex_waiter* wtmp;
- int nwaken = 0;
- debug("FUTEX_WAKE_OP: %p (val = %d) count = %d\n", uaddr, *uaddr, val);
- LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
- debug("FUTEX_WAKE_OP wake thread %d: %p (val = %d)\n", waiter->thread->tid, uaddr,
- *uaddr);
- del_futex_waiter_wakeup(waiter, futex);
- nwaken++;
- }
- if (cmpval) {
- unlock(&hdl->lock);
- put_handle(hdl);
- hdl = hdl2;
- lock(&hdl->lock);
- debug("FUTEX_WAKE: %p (val = %d) count = %d\n", uaddr2, *uaddr2, val2);
- LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex2->waiters, list) {
- debug("FUTEX_WAKE_OP(2) wake thread %d: %p (val = %d)\n", waiter->thread->tid,
- uaddr2, *uaddr2);
- del_futex_waiter_wakeup(waiter, futex2);
- nwaken++;
- }
- }
- ret = nwaken;
- break;
- }
- case FUTEX_CMP_REQUEUE:
- if (*uaddr != val3) {
- ret = -EAGAIN;
- break;
- }
- /* FALLTHROUGH */
- case FUTEX_REQUEUE: {
- assert(futex2);
- struct futex_waiter* waiter;
- struct futex_waiter* wtmp;
- int nwaken = 0;
- LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
- del_futex_waiter_wakeup(waiter, futex);
- nwaken++;
- if (nwaken >= val)
- break;
- }
- lock(&hdl2->lock);
- LISTP_SPLICE_INIT(&futex->waiters, &futex2->waiters, list, futex_waiter);
- unlock(&hdl2->lock);
- put_handle(hdl2);
- ret = nwaken;
- break;
- }
- case FUTEX_FD:
- ret = set_new_fd_handle(hdl, 0, NULL);
- break;
- default:
- debug("unsupported futex op: 0x%x\n", op);
- ret = -ENOSYS;
- break;
- }
- unlock(&hdl->lock);
- put_handle(hdl);
- return ret;
- }
- int shim_do_set_robust_list(struct robust_list_head* head, size_t len) {
- struct shim_thread* self = get_cur_thread();
- assert(self);
- if (len != sizeof(struct robust_list_head))
- return -EINVAL;
- self->robust_list = head;
- return 0;
- }
- int shim_do_get_robust_list(pid_t pid, struct robust_list_head** head, size_t* len) {
- if (!head)
- return -EFAULT;
- struct shim_thread* thread;
- if (pid) {
- thread = lookup_thread(pid);
- if (!thread)
- return -ESRCH;
- } else {
- thread = get_cur_thread();
- get_thread(thread);
- }
- *head = (struct robust_list_head*)thread->robust_list;
- *len = sizeof(struct robust_list_head);
- put_thread(thread);
- return 0;
- }
- void release_robust_list(struct robust_list_head* head) {
- long futex_offset = head->futex_offset;
- struct robust_list* robust;
- struct robust_list* prev = &head->list;
- create_lock_runtime(&futex_list_lock);
- for (robust = prev->next; robust && robust != prev; prev = robust, robust = robust->next) {
- void* futex_addr = (void*)robust + futex_offset;
- struct shim_futex_handle* tmp;
- struct shim_futex_handle* futex = NULL;
- lock(&futex_list_lock);
- LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
- if (tmp->uaddr == futex_addr) {
- futex = tmp;
- break;
- }
- }
- unlock(&futex_list_lock);
- if (!futex)
- continue;
- struct futex_waiter* waiter;
- struct futex_waiter* wtmp;
- struct shim_handle* hdl = container_of(futex, struct shim_handle, info.futex);
- get_handle(hdl);
- lock(&hdl->lock);
- debug("release robust list: %p\n", futex_addr);
- *(int*)futex_addr = 0;
- LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
- del_futex_waiter_wakeup(waiter, futex);
- }
- unlock(&hdl->lock);
- put_handle(hdl);
- }
- }
- /* Function is called by Async Helper thread to wait on clear_child_tid_val_pal to be set to 0
- * (PAL does it when child thread finally exits). Next, *clear_child_tid is set to 0 and parent
- * threads are woken up. Since it is a callback to Async Helper thread, it must follow the
- * `void (*callback) (IDTYPE caller, void * arg)` signature even though we don't use caller. */
- void release_clear_child_id(IDTYPE caller, void* clear_child_tids) {
- __UNUSED(caller);
- struct clear_child_tid_struct* child = (struct clear_child_tid_struct*)clear_child_tids;
- if (!child || !child->clear_child_tid)
- goto out;
- /* wait on clear_child_tid_val_pal; this signals that PAL layer exited child thread */
- while (__atomic_load_n(&child->clear_child_tid_val_pal, __ATOMIC_RELAXED) != 0) {
- __asm__ volatile ("pause");
- }
- /* child thread exited, now parent can wake up; note that PAL layer can't set clear_child_tid
- * itself, because parent thread could spuriously wake up, notice 0 on clear_child_tid, and
- * continue its execution without waiting for this function to succeed first */
- __atomic_store_n(child->clear_child_tid, 0, __ATOMIC_RELAXED);
- /* at this point, child thread finally exited, can wake up parents if any */
- create_lock_runtime(&futex_list_lock);
- struct shim_futex_handle* tmp;
- struct shim_futex_handle* futex = NULL;
- lock(&futex_list_lock);
- LISTP_FOR_EACH_ENTRY(tmp, &futex_list, list) {
- if (tmp->uaddr == (void*)child->clear_child_tid) {
- futex = tmp;
- break;
- }
- }
- unlock(&futex_list_lock);
- if (!futex) {
- /* no parent threads waiting on this child to exit */
- goto out;
- }
- debug("release futex at %p\n", child->clear_child_tid);
- struct futex_waiter* waiter;
- struct futex_waiter* wtmp;
- struct shim_handle* hdl = container_of(futex, struct shim_handle, info.futex);
- get_handle(hdl);
- lock(&hdl->lock);
- LISTP_FOR_EACH_ENTRY_SAFE(waiter, wtmp, &futex->waiters, list) {
- /* wake up every parent waiting on this child */
- del_futex_waiter_wakeup(waiter, futex);
- }
- unlock(&hdl->lock);
- put_handle(hdl);
- out:
- free(child);
- }
|