/* Copyright (C) 2014 Stony Brook University This file is part of Graphene Library OS. Graphene Library OS is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Graphene Library OS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . */ /* * shim_epoll.c * * Implementation of system call "epoll_create", "epoll_create1", "epoll_ctl" * and "epoll_wait". */ #include #include #include #include #include #include #include #include #include #include /* Avoid duplicated definitions */ #ifndef EPOLLIN #define EPOLLIN 0x001 #define EPOLLPRI 0x002 #define EPOLLOUT 0x004 #define EPOLLRDNORM 0x040 #define EPOLLRDBAND 0x080 #define EPOLLWRNORM 0x100 #define EPOLLERBAND 0x200 #define EPOLLERR 0x008 #define EPOLLHUP 0x010 #define EPOLLRDHUP 0x2000 #endif #define MAX_EPOLL_FDS 1024 struct shim_mount epoll_builtin_fs; /* shim_epoll_fds are linked as a list (by the list field), * hanging off of a shim_epoll_handle (by the fds field) */ struct shim_epoll_fd { FDTYPE fd; unsigned int events; __u64 data; unsigned int revents; struct shim_handle* handle; struct shim_handle* epoll; PAL_HANDLE pal_handle; LIST_TYPE(shim_epoll_fd) list; LIST_TYPE(shim_epoll_fd) back; }; int shim_do_epoll_create1(int flags) { if ((flags & ~EPOLL_CLOEXEC)) return -EINVAL; struct shim_handle* hdl = get_new_handle(); if (!hdl) return -ENOMEM; struct shim_epoll_handle* epoll = &hdl->info.epoll; hdl->type = TYPE_EPOLL; set_handle_fs(hdl, &epoll_builtin_fs); epoll->maxfds = MAX_EPOLL_FDS; epoll->nfds = 0; epoll->pal_fds = malloc(sizeof(FDTYPE) * MAX_EPOLL_FDS); epoll->pal_handles = malloc(sizeof(PAL_HANDLE) * MAX_EPOLL_FDS); create_event(&epoll->event); INIT_LISTP(&epoll->fds); int vfd = set_new_fd_handle(hdl, (flags & EPOLL_CLOEXEC) ? FD_CLOEXEC : 0, NULL); put_handle(hdl); return vfd; } /* the 'size' argument of epoll_create is not used */ int shim_do_epoll_create(int size) { if (size < 0) return -EINVAL; return shim_do_epoll_create1(0); } static void update_epoll(struct shim_epoll_handle* epoll) { struct shim_epoll_fd* tmp; int npals = 0; epoll->nread = 0; LISTP_FOR_EACH_ENTRY(tmp, &epoll->fds, list) { if (!tmp->pal_handle) continue; debug("found handle %p (pal handle %p) from epoll handle %p\n", tmp->handle, tmp->pal_handle, epoll); epoll->pal_fds[npals] = tmp->fd; epoll->pal_handles[npals] = tmp->pal_handle; npals++; if (tmp->handle->acc_mode & MAY_READ) epoll->nread++; } epoll->npals = npals; if (epoll->nwaiters) set_event(&epoll->event, epoll->nwaiters); } int delete_from_epoll_handles(struct shim_handle* handle) { while (1) { lock(&handle->lock); if (LISTP_EMPTY(&handle->epolls)) { unlock(&handle->lock); break; } struct shim_epoll_fd* epoll_fd = LISTP_FIRST_ENTRY(&handle->epolls, struct shim_epoll_fd, back); LISTP_DEL(epoll_fd, &handle->epolls, back); unlock(&handle->lock); put_handle(handle); struct shim_handle* epoll_hdl = epoll_fd->epoll; struct shim_epoll_handle* epoll = &epoll_hdl->info.epoll; debug("delete handle %p from epoll handle %p\n", handle, &epoll_hdl->info.epoll); lock(&epoll_hdl->lock); LISTP_DEL(epoll_fd, &epoll->fds, list); free(epoll_fd); epoll_hdl->info.epoll.nfds--; update_epoll(&epoll_hdl->info.epoll); unlock(&epoll_hdl->lock); put_handle(epoll_hdl); } return 0; } int shim_do_epoll_ctl(int epfd, int op, int fd, struct __kernel_epoll_event* event) { struct shim_thread* cur = get_cur_thread(); int ret = 0; struct shim_handle* epoll_hdl = get_fd_handle(epfd, NULL, cur->handle_map); if (!epoll_hdl) return -EBADF; if (epoll_hdl->type != TYPE_EPOLL) { put_handle(epoll_hdl); return -EINVAL; } struct shim_epoll_handle* epoll = &epoll_hdl->info.epoll; struct shim_epoll_fd* epoll_fd; lock(&epoll_hdl->lock); switch (op) { case EPOLL_CTL_ADD: { LISTP_FOR_EACH_ENTRY(epoll_fd, &epoll->fds, list) { if (epoll_fd->fd == fd) { ret = -EEXIST; goto out; } } struct shim_handle* hdl = get_fd_handle(fd, NULL, cur->handle_map); if (!hdl) { ret = -EBADF; goto out; } if ((hdl->type != TYPE_PIPE && hdl->type != TYPE_SOCK) || !hdl->pal_handle) { ret = -EPERM; put_handle(hdl); goto out; } if (epoll->nfds == MAX_EPOLL_FDS) { ret = -ENOSPC; put_handle(hdl); goto out; } debug("add handle %p to epoll handle %p\n", hdl, epoll); epoll_fd = malloc(sizeof(struct shim_epoll_fd)); epoll_fd->fd = fd; epoll_fd->events = event->events; epoll_fd->data = event->data; epoll_fd->revents = 0; epoll_fd->handle = hdl; epoll_fd->epoll = epoll_hdl; epoll_fd->pal_handle = hdl->pal_handle; /* Register the epoll handle */ get_handle(epoll_hdl); lock(&hdl->lock); INIT_LIST_HEAD(epoll_fd, back); LISTP_ADD_TAIL(epoll_fd, &hdl->epolls, back); unlock(&hdl->lock); INIT_LIST_HEAD(epoll_fd, list); LISTP_ADD_TAIL(epoll_fd, &epoll->fds, list); epoll->nfds++; goto update; } case EPOLL_CTL_MOD: { LISTP_FOR_EACH_ENTRY(epoll_fd, &epoll->fds, list) { if (epoll_fd->fd == fd) { epoll_fd->events = event->events; epoll_fd->data = event->data; goto update; } } ret = -ENOENT; goto out; } case EPOLL_CTL_DEL: { LISTP_FOR_EACH_ENTRY(epoll_fd, &epoll->fds, list) { if (epoll_fd->fd == fd) { struct shim_handle* hdl = epoll_fd->handle; /* Unregister the epoll handle */ lock(&hdl->lock); LISTP_DEL(epoll_fd, &hdl->epolls, back); unlock(&hdl->lock); debug("delete handle %p from epoll handle %p\n", hdl, epoll); put_handle(epoll_hdl); put_handle(hdl); LISTP_DEL(epoll_fd, &epoll->fds, list); epoll->nfds--; free(epoll_fd); goto update; } } ret = -ENOENT; goto out; } default: ret = -ENOSYS; goto out; } update: update_epoll(epoll); out: unlock(&epoll_hdl->lock); put_handle(epoll_hdl); return ret; } int shim_do_epoll_wait(int epfd, struct __kernel_epoll_event* events, int maxevents, int timeout_ms) { int ret = 0; struct shim_handle* epoll_hdl = get_fd_handle(epfd, NULL, NULL); if (!epoll_hdl) return -EBADF; if (epoll_hdl->type != TYPE_EPOLL) { put_handle(epoll_hdl); return -EINVAL; } struct shim_epoll_handle* epoll = &epoll_hdl->info.epoll; struct shim_epoll_fd* epoll_fd; int nevents = 0; int npals, nread; bool need_update = false; lock(&epoll_hdl->lock); retry: if (!(npals = epoll->npals)) goto reply; PAL_HANDLE* pal_handles = __alloca(sizeof(PAL_HANDLE) * (npals + 1)); FDTYPE* fds = __alloca(sizeof(FDTYPE) * npals); memcpy(fds, epoll->pal_fds, sizeof(FDTYPE) * npals); memcpy(pal_handles, epoll->pal_handles, sizeof(PAL_HANDLE) * npals); pal_handles[npals] = epoll->event.event; if ((nread = epoll->nread)) epoll->nwaiters++; unlock(&epoll_hdl->lock); PAL_NUM pal_timeout = timeout_ms == -1 ? NO_TIMEOUT : (PAL_NUM)timeout_ms * 1000; PAL_HANDLE polled = DkObjectsWaitAny(nread ? npals + 1 : npals, pal_handles, nread ? pal_timeout : 0); lock(&epoll_hdl->lock); if (nread) epoll->nwaiters--; if (!polled) goto reply; if (polled == epoll->event.event) { wait_event(&epoll->event); goto retry; } PAL_STREAM_ATTR attr; if (!DkStreamAttributesQueryByHandle(polled, &attr)) goto reply; LISTP_FOR_EACH_ENTRY(epoll_fd, &epoll->fds, list) { if (polled == epoll_fd->pal_handle) { debug("epoll: fd %d (handle %p) polled\n", epoll_fd->fd, epoll_fd->handle); if (attr.disconnected) { epoll_fd->revents |= EPOLLERR | EPOLLHUP | EPOLLRDHUP; epoll_fd->pal_handle = NULL; need_update = true; } if (attr.readable) epoll_fd->revents |= EPOLLIN; if (attr.writable) epoll_fd->revents |= EPOLLOUT; break; } } reply: LISTP_FOR_EACH_ENTRY(epoll_fd, &epoll->fds, list) { if (nevents == maxevents) break; if ((epoll_fd->events | EPOLLERR | EPOLLHUP) & epoll_fd->revents) { events[nevents].events = (epoll_fd->events | EPOLLERR | EPOLLHUP) & epoll_fd->revents; events[nevents].data = epoll_fd->data; nevents++; epoll_fd->revents &= ~epoll_fd->events; } } if (need_update) update_epoll(epoll); unlock(&epoll_hdl->lock); ret = nevents; put_handle(epoll_hdl); return ret; } int shim_do_epoll_pwait(int epfd, struct __kernel_epoll_event* events, int maxevents, int timeout_ms, const __sigset_t* sigmask, size_t sigsetsize) { __UNUSED(sigmask); __UNUSED(sigsetsize); int ret = shim_do_epoll_wait(epfd, events, maxevents, timeout_ms); return ret; } static int epoll_close(struct shim_handle* hdl) { __UNUSED(hdl); return 0; } struct shim_fs_ops epoll_fs_ops = { .close = &epoll_close, }; struct shim_mount epoll_builtin_fs = { .type = "epoll", .fs_ops = &epoll_fs_ops, }; BEGIN_CP_FUNC(epoll_fd) { assert(size == sizeof(LISTP_TYPE(shim_epoll_fd))); LISTP_TYPE(shim_epoll_fd)* old_list = (LISTP_TYPE(shim_epoll_fd)*)obj; LISTP_TYPE(shim_epoll_fd)* new_list = (LISTP_TYPE(shim_epoll_fd)*)objp; struct shim_epoll_fd* epoll_fd; debug("checkpoint epoll: %p -> %p (base = 0x%08lx)\n", old_list, new_list, base); INIT_LISTP(new_list); LISTP_FOR_EACH_ENTRY(epoll_fd, old_list, list) { ptr_t off = ADD_CP_OFFSET(sizeof(struct shim_epoll_fd)); struct shim_epoll_fd* new_epoll_fd = (struct shim_epoll_fd*)(base + off); new_epoll_fd->fd = epoll_fd->fd; new_epoll_fd->events = epoll_fd->events; new_epoll_fd->data = epoll_fd->data; new_epoll_fd->revents = epoll_fd->revents; new_epoll_fd->pal_handle = NULL; LISTP_ADD(new_epoll_fd, new_list, list); DO_CP(handle, epoll_fd->handle, &new_epoll_fd->handle); } ADD_CP_FUNC_ENTRY((ptr_t)objp - base); } END_CP_FUNC(epoll_fd) BEGIN_RS_FUNC(epoll_fd) { __UNUSED(offset); LISTP_TYPE(shim_epoll_fd)* list = (void*)(base + GET_CP_FUNC_ENTRY()); struct shim_epoll_fd* epoll_fd; CP_REBASE(*list); LISTP_FOR_EACH_ENTRY(epoll_fd, list, list) { CP_REBASE(epoll_fd->handle); CP_REBASE(epoll_fd->back); epoll_fd->pal_handle = epoll_fd->handle->pal_handle; CP_REBASE(epoll_fd->list); DEBUG_RS("fd=%d,path=%s,type=%s,uri=%s", epoll_fd->fd, qstrgetstr(&epoll_fd->handle->path), epoll_fd->handle->fs_type, qstrgetstr(&epoll_fd->handle->uri)); } } END_RS_FUNC(epoll_fd)