|
@@ -17,547 +17,267 @@
|
|
/*
|
|
/*
|
|
* shim_poll.c
|
|
* shim_poll.c
|
|
*
|
|
*
|
|
- * Implementation of system call "poll", "ppoll", "select" and "pselect6".
|
|
|
|
|
|
+ * Implementation of system calls "poll", "ppoll", "select" and "pselect6".
|
|
*/
|
|
*/
|
|
|
|
|
|
|
|
+#include <errno.h>
|
|
|
|
+#include <linux/fcntl.h>
|
|
|
|
+#include <pal.h>
|
|
|
|
+#include <pal_error.h>
|
|
|
|
+#include <shim_fs.h>
|
|
|
|
+#include <shim_handle.h>
|
|
#include <shim_internal.h>
|
|
#include <shim_internal.h>
|
|
#include <shim_table.h>
|
|
#include <shim_table.h>
|
|
-#include <shim_utils.h>
|
|
|
|
#include <shim_thread.h>
|
|
#include <shim_thread.h>
|
|
-#include <shim_handle.h>
|
|
|
|
-#include <shim_fs.h>
|
|
|
|
-#include <shim_profile.h>
|
|
|
|
-
|
|
|
|
-#include <pal.h>
|
|
|
|
-#include <pal_error.h>
|
|
|
|
-#include <list.h>
|
|
|
|
-
|
|
|
|
-#include <errno.h>
|
|
|
|
-
|
|
|
|
-#include <linux/fcntl.h>
|
|
|
|
-
|
|
|
|
-noreturn void
|
|
|
|
-fortify_fail (const char *msg)
|
|
|
|
-{
|
|
|
|
- /* The loop is added only to keep gcc happy. */
|
|
|
|
- while (1)
|
|
|
|
- debug("*** %s ***\n", msg);
|
|
|
|
-}
|
|
|
|
|
|
+#include <shim_utils.h>
|
|
|
|
|
|
-noreturn void
|
|
|
|
-chk_fail (void)
|
|
|
|
-{
|
|
|
|
- fortify_fail ("buffer overflow detected");
|
|
|
|
-}
|
|
|
|
|
|
+typedef long int __fd_mask;
|
|
|
|
|
|
-static inline __attribute__((always_inline))
|
|
|
|
-void * __try_alloca (struct shim_thread * cur, int size)
|
|
|
|
-{
|
|
|
|
- if (!size)
|
|
|
|
- return NULL;
|
|
|
|
|
|
+#ifndef __NFDBITS
|
|
|
|
+#define __NFDBITS (8 * (int)sizeof(__fd_mask))
|
|
|
|
+#endif
|
|
|
|
+#ifndef __FDS_BITS
|
|
|
|
+#define __FDS_BITS(set) ((set)->fds_bits)
|
|
|
|
+#endif
|
|
|
|
|
|
- if (check_stack_size(cur, size))
|
|
|
|
- return __alloca(size);
|
|
|
|
- else
|
|
|
|
- return malloc(size);
|
|
|
|
-}
|
|
|
|
|
|
+# define __FD_ZERO(set) \
|
|
|
|
+ do { \
|
|
|
|
+ unsigned int __i; \
|
|
|
|
+ fd_set *__arr = (set); \
|
|
|
|
+ for (__i = 0; __i < sizeof (fd_set) / sizeof (__fd_mask); ++__i) \
|
|
|
|
+ __FDS_BITS (__arr)[__i] = 0; \
|
|
|
|
+ } while (0)
|
|
|
|
|
|
-static inline __attribute__((always_inline))
|
|
|
|
-void __try_free (struct shim_thread * cur, void * mem)
|
|
|
|
-{
|
|
|
|
- if (mem && !check_on_stack(cur, mem))
|
|
|
|
- free(mem);
|
|
|
|
-}
|
|
|
|
|
|
+#define __FD_ELT(d) ((d) / __NFDBITS)
|
|
|
|
+#define __FD_MASK(d) ((__fd_mask)1 << ((d) % __NFDBITS))
|
|
|
|
|
|
-DEFINE_PROFILE_CATEGORY(__do_poll, select);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_get_handle, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_search_repeat, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_set_bookkeeping, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_check_accmode, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_vfs_polling, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_update_bookkeeping, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_first_loop, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_second_loop, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_wait_any, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_wait_any_peek, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_third_loop, __do_poll);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(do_poll_fourth_loop, __do_poll);
|
|
|
|
-
|
|
|
|
-#define DO_R 0001
|
|
|
|
-#define DO_W 0002
|
|
|
|
-#define KNOWN_R 0004
|
|
|
|
-#define KNOWN_W 0010
|
|
|
|
-#define RET_R 0020
|
|
|
|
-#define RET_W 0040
|
|
|
|
-#define RET_E 0100
|
|
|
|
-#define POLL_R 0200
|
|
|
|
-#define POLL_W 0400
|
|
|
|
-
|
|
|
|
-struct poll_handle {
|
|
|
|
- unsigned short flags;
|
|
|
|
- FDTYPE fd;
|
|
|
|
- struct shim_handle * handle;
|
|
|
|
- struct poll_handle * next;
|
|
|
|
- struct poll_handle * children;
|
|
|
|
-};
|
|
|
|
|
|
+#define __FD_SET(d, set) \
|
|
|
|
+ ((void)(__FDS_BITS(set)[__FD_ELT(d)] |= __FD_MASK(d)))
|
|
|
|
+#define __FD_CLR(d, set) \
|
|
|
|
+ ((void)(__FDS_BITS(set)[__FD_ELT(d)] &= ~__FD_MASK(d)))
|
|
|
|
+#define __FD_ISSET(d, set) \
|
|
|
|
+ ((__FDS_BITS(set)[__FD_ELT(d)] & __FD_MASK(d)) != 0)
|
|
|
|
|
|
#define POLL_NOTIMEOUT ((uint64_t)-1)
|
|
#define POLL_NOTIMEOUT ((uint64_t)-1)
|
|
|
|
|
|
-static int __do_poll(int npolls, struct poll_handle* polls, uint64_t timeout_us)
|
|
|
|
-{
|
|
|
|
- struct shim_thread * cur = get_cur_thread();
|
|
|
|
-
|
|
|
|
- struct shim_handle_map * map = cur->handle_map;
|
|
|
|
- int npals = 0;
|
|
|
|
- bool has_r = false, has_known = false;
|
|
|
|
- struct poll_handle * polling = NULL;
|
|
|
|
- struct poll_handle * p, ** n, * q;
|
|
|
|
- PAL_HANDLE * pals = NULL;
|
|
|
|
- int ret = 0;
|
|
|
|
-
|
|
|
|
-#ifdef PROFILE
|
|
|
|
- unsigned long begin_time = GET_PROFILE_INTERVAL();
|
|
|
|
- BEGIN_PROFILE_INTERVAL_SET(begin_time);
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
- lock(&map->lock);
|
|
|
|
-
|
|
|
|
- for (p = polls ; p < polls + npolls ; p++) {
|
|
|
|
- bool do_r = p->flags & DO_R;
|
|
|
|
- bool do_w = p->flags & DO_W;
|
|
|
|
|
|
+int shim_do_poll(struct pollfd* fds, nfds_t nfds, int timeout_ms) {
|
|
|
|
+ if (!fds || test_user_memory(fds, sizeof(*fds) * nfds, true))
|
|
|
|
+ return -EFAULT;
|
|
|
|
|
|
- if (!do_r && !do_w) {
|
|
|
|
-no_op:
|
|
|
|
- p->flags = 0;
|
|
|
|
- p->handle = NULL;
|
|
|
|
- UPDATE_PROFILE_INTERVAL();
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
|
|
+ if ((uint64_t)nfds > get_rlimit_cur(RLIMIT_NOFILE))
|
|
|
|
+ return -EINVAL;
|
|
|
|
|
|
- struct shim_handle * hdl = __get_fd_handle(p->fd, NULL, map);
|
|
|
|
- if (!hdl->fs || !hdl->fs->fs_ops)
|
|
|
|
- goto no_op;
|
|
|
|
|
|
+ struct shim_handle_map* map = get_cur_thread()->handle_map;
|
|
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_get_handle);
|
|
|
|
|
|
+ uint64_t timeout_us = timeout_ms < 0 ? POLL_NOTIMEOUT : timeout_ms * 1000ULL;
|
|
|
|
|
|
- /* search for a repeated entry */
|
|
|
|
- struct poll_handle * rep = polling;
|
|
|
|
- for ( ; rep ; rep = rep->next)
|
|
|
|
- if (rep->handle == hdl)
|
|
|
|
- break;
|
|
|
|
|
|
+ /* nfds is the upper limit for actual number of handles */
|
|
|
|
+ PAL_HANDLE* pals = malloc(nfds * sizeof(PAL_HANDLE));
|
|
|
|
+ if (!pals)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_search_repeat);
|
|
|
|
-
|
|
|
|
- p->flags = (do_r ? DO_R : 0)|(do_w ? DO_W : 0);
|
|
|
|
- p->handle = NULL;
|
|
|
|
- p->next = NULL;
|
|
|
|
- p->children = NULL;
|
|
|
|
|
|
+ /* for bookkeeping, need to have a mapping FD -> handle */
|
|
|
|
+ struct shim_handle** fds_to_hdls = malloc(nfds * sizeof(struct shim_handle*));
|
|
|
|
+ if (!fds_to_hdls) {
|
|
|
|
+ free(pals);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
|
|
- if (rep) {
|
|
|
|
- /* if there is repeated handles and we already know the
|
|
|
|
- result, let's skip them */
|
|
|
|
- if (rep->flags & (KNOWN_R|POLL_R)) {
|
|
|
|
- p->flags = rep->flags & (KNOWN_R|RET_R|RET_E|POLL_R);
|
|
|
|
- do_r = false;
|
|
|
|
- }
|
|
|
|
|
|
+ nfds_t npals = 0;
|
|
|
|
+ nfds_t nrevents = 0;
|
|
|
|
|
|
- if (rep->flags & (KNOWN_W|POLL_W)) {
|
|
|
|
- p->flags = rep->flags & (KNOWN_W|RET_W|RET_E|POLL_W);
|
|
|
|
- do_w = false;
|
|
|
|
- }
|
|
|
|
|
|
+ lock(&map->lock);
|
|
|
|
|
|
- p->next = rep->children;
|
|
|
|
- rep->children = p;
|
|
|
|
|
|
+ /* collect PAL handles that correspond to user-supplied FDs (only those that can be polled) */
|
|
|
|
+ for (nfds_t i = 0; i < nfds; i++) {
|
|
|
|
+ fds[i].revents = 0;
|
|
|
|
+ fds_to_hdls[i] = NULL;
|
|
|
|
|
|
- if (!do_r && !do_w) {
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_set_bookkeeping);
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- get_handle(hdl);
|
|
|
|
- p->handle = hdl;
|
|
|
|
- p->next = polling;
|
|
|
|
- polling = p;
|
|
|
|
|
|
+ if (fds[i].fd < 0) {
|
|
|
|
+ /* FD is negative, must be ignored */
|
|
|
|
+ continue;
|
|
}
|
|
}
|
|
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_set_bookkeeping);
|
|
|
|
-
|
|
|
|
- /* do the easiest check, check handle's access mode */
|
|
|
|
- if (do_r && !(hdl->acc_mode & MAY_READ)) {
|
|
|
|
- p->flags |= KNOWN_R;
|
|
|
|
- debug("fd %d known to be not readable\n", p->fd);
|
|
|
|
- do_r = false;
|
|
|
|
|
|
+ if (!(fds[i].events & (POLLIN|POLLRDNORM)) &&
|
|
|
|
+ !(fds[i].events & (POLLOUT|POLLWRNORM))) {
|
|
|
|
+ /* user didn't ask for read or write, ignore this FD */
|
|
|
|
+ continue;
|
|
}
|
|
}
|
|
|
|
|
|
- if (do_w && !(hdl->acc_mode & MAY_WRITE)) {
|
|
|
|
- p->flags |= KNOWN_W;
|
|
|
|
- debug("fd %d known to be not writable\n", p->fd);
|
|
|
|
- do_w = false;
|
|
|
|
|
|
+ struct shim_handle* hdl = __get_fd_handle(fds[i].fd, NULL, map);
|
|
|
|
+ if (!hdl || !hdl->fs || !hdl->fs->fs_ops) {
|
|
|
|
+ /* the corresponding handle doesn't exist or doesn't provide FS-like semantics */
|
|
|
|
+ continue;
|
|
}
|
|
}
|
|
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_check_accmode);
|
|
|
|
-
|
|
|
|
- if (!do_r && !do_w)
|
|
|
|
- goto done_finding;
|
|
|
|
-
|
|
|
|
- /* if fs provides a poll operator, let's try it. */
|
|
|
|
- if (hdl->fs->fs_ops->poll) {
|
|
|
|
- int need_poll = 0;
|
|
|
|
-
|
|
|
|
- if (do_r && !(p->flags & POLL_R))
|
|
|
|
- need_poll |= FS_POLL_RD;
|
|
|
|
- if (do_w && !(p->flags & POLL_W))
|
|
|
|
- need_poll |= FS_POLL_WR;
|
|
|
|
-
|
|
|
|
- if (need_poll) {
|
|
|
|
- int polled = hdl->fs->fs_ops->poll(hdl, need_poll);
|
|
|
|
-
|
|
|
|
- if (polled < 0) {
|
|
|
|
- if (polled != -EAGAIN) {
|
|
|
|
- unlock(&map->lock);
|
|
|
|
- ret = polled;
|
|
|
|
- goto done_polling;
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- if (polled & FS_POLL_ER) {
|
|
|
|
- debug("fd %d known to have error\n", p->fd);
|
|
|
|
- p->flags |= KNOWN_R|KNOWN_W|RET_E;
|
|
|
|
- do_r = do_w = false;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if ((polled & FS_POLL_RD)) {
|
|
|
|
- debug("fd %d known to be readable\n", p->fd);
|
|
|
|
- p->flags |= KNOWN_R|RET_R;
|
|
|
|
- do_r = false;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (polled & FS_POLL_WR) {
|
|
|
|
- debug("fd %d known to be writable\n", p->fd);
|
|
|
|
- p->flags |= KNOWN_W|RET_W;
|
|
|
|
- do_w = false;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_vfs_polling);
|
|
|
|
-
|
|
|
|
- if (!do_r && !do_w)
|
|
|
|
- goto done_finding;
|
|
|
|
|
|
+ int allowed_events = 2; /* read + write */
|
|
|
|
+ if ((fds[i].events & (POLLIN|POLLRDNORM)) && !(hdl->acc_mode & MAY_READ))
|
|
|
|
+ allowed_events -= 1; /* minus read */
|
|
|
|
+ if ((fds[i].events & (POLLOUT|POLLWRNORM)) && !(hdl->acc_mode & MAY_WRITE))
|
|
|
|
+ allowed_events -= 1; /* minus write */
|
|
|
|
+ if (!allowed_events) {
|
|
|
|
+ /* the corresponding handle cannot be read or written */
|
|
|
|
+ continue;
|
|
}
|
|
}
|
|
|
|
|
|
- struct poll_handle * to_poll = rep ? : p;
|
|
|
|
-
|
|
|
|
- if (!(to_poll->flags & (POLL_R|POLL_W))) {
|
|
|
|
- if (!hdl->pal_handle) {
|
|
|
|
- p->flags |= KNOWN_R|KNOWN_W|RET_E;
|
|
|
|
- do_r = do_w = false;
|
|
|
|
- goto done_finding;
|
|
|
|
|
|
+ if (!(fds[i].events & (POLLIN|POLLRDNORM)) && (fds[i].events & (POLLOUT|POLLWRNORM))) {
|
|
|
|
+ /* special case: user is interested only in write event on this handle, and whether
|
|
|
|
+ * write event occurs is always known in PAL layer, so simply consult PAL and
|
|
|
|
+ * update revents and skip this handle for polling (note that otherwise PAL could get
|
|
|
|
+ * stuck in host poll() because PAL always polls on read events) */
|
|
|
|
+ PAL_STREAM_ATTR attr;
|
|
|
|
+ if (!DkStreamAttributesQueryByHandle(hdl->pal_handle, &attr)) {
|
|
|
|
+ /* something went wrong with this handle, silently skip this handle */
|
|
|
|
+ continue;
|
|
}
|
|
}
|
|
|
|
|
|
- debug("polling fd %d\n", to_poll->fd);
|
|
|
|
- npals++;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- to_poll->flags |= (do_r ? POLL_R : 0)|(do_w ? POLL_W : 0);
|
|
|
|
-
|
|
|
|
-done_finding:
|
|
|
|
- /* feedback the new knowledge of repeated handles */
|
|
|
|
- if (rep)
|
|
|
|
- rep->flags |= p->flags &
|
|
|
|
- (KNOWN_R|KNOWN_W|RET_R|RET_W|RET_E|POLL_R|POLL_W);
|
|
|
|
-
|
|
|
|
- if (do_r)
|
|
|
|
- has_r = true;
|
|
|
|
-
|
|
|
|
- if (p->flags & (RET_R|RET_W|RET_E))
|
|
|
|
- has_known = true;
|
|
|
|
-
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_update_bookkeeping);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- unlock(&map->lock);
|
|
|
|
-
|
|
|
|
- SAVE_PROFILE_INTERVAL_SINCE(do_poll_first_loop, begin_time);
|
|
|
|
-
|
|
|
|
- if (!npals) {
|
|
|
|
- ret = 0;
|
|
|
|
- goto done_polling;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- pals = __try_alloca(cur, sizeof(PAL_HANDLE) * npals);
|
|
|
|
- npals = 0;
|
|
|
|
-
|
|
|
|
- n = &polling;
|
|
|
|
- for (p = polling ; p ; p = p->next) {
|
|
|
|
- assert(p->handle);
|
|
|
|
|
|
+ if (attr.writable)
|
|
|
|
+ fds[i].revents |= (fds[i].events & (POLLOUT|POLLWRNORM));
|
|
|
|
+ if (attr.disconnected)
|
|
|
|
+ fds[i].revents |= (POLLERR|POLLHUP);
|
|
|
|
|
|
- if (!(p->flags & (POLL_R|POLL_W))) {
|
|
|
|
- *n = p->next;
|
|
|
|
- put_handle(p->handle);
|
|
|
|
- p->handle = NULL;
|
|
|
|
|
|
+ if (fds[i].revents)
|
|
|
|
+ nrevents++;
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
|
|
- pals[npals++] = p->handle->pal_handle;
|
|
|
|
- n = &p->next;
|
|
|
|
|
|
+ get_handle(hdl);
|
|
|
|
+ fds_to_hdls[i] = hdl;
|
|
|
|
+ pals[npals] = hdl->pal_handle;
|
|
|
|
+ npals++;
|
|
}
|
|
}
|
|
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_second_loop);
|
|
|
|
|
|
+ unlock(&map->lock);
|
|
|
|
|
|
|
|
+ /* TODO: This loop is highly inefficient, since DkObjectsWaitAny returns only one (random)
|
|
|
|
+ * handle out of the whole array of handles-waiting-for-events. We must replace this
|
|
|
|
+ * loop with a single DkObjectsWaitEvents(). */
|
|
while (npals) {
|
|
while (npals) {
|
|
- int pal_timeout_us = (has_r && !has_known) ? timeout_us : 0;
|
|
|
|
- PAL_HANDLE polled = DkObjectsWaitAny(npals, pals, pal_timeout_us);
|
|
|
|
-
|
|
|
|
- if (pal_timeout_us)
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_wait_any);
|
|
|
|
- else
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_wait_any_peek);
|
|
|
|
-
|
|
|
|
|
|
+ PAL_HANDLE polled = DkObjectsWaitAny(npals, pals, timeout_us);
|
|
if (!polled)
|
|
if (!polled)
|
|
break;
|
|
break;
|
|
|
|
|
|
PAL_STREAM_ATTR attr;
|
|
PAL_STREAM_ATTR attr;
|
|
if (!DkStreamAttributesQueryByHandle(polled, &attr))
|
|
if (!DkStreamAttributesQueryByHandle(polled, &attr))
|
|
- break;
|
|
|
|
|
|
+ continue;
|
|
|
|
|
|
- n = &polling;
|
|
|
|
- for (p = polling ; p ; p = p->next) {
|
|
|
|
- if (p->handle->pal_handle == polled)
|
|
|
|
|
|
+ for (nfds_t i = 0; i < nfds; i++) {
|
|
|
|
+ if (fds_to_hdls[i]->pal_handle == polled) {
|
|
|
|
+ /* found user-supplied FD, update it with returned events */
|
|
|
|
+ fds[i].revents = 0;
|
|
|
|
+ if (attr.readable)
|
|
|
|
+ fds[i].revents |= (fds[i].events & (POLLIN|POLLRDNORM));
|
|
|
|
+ if (attr.writable)
|
|
|
|
+ fds[i].revents |= (fds[i].events & (POLLOUT|POLLWRNORM));
|
|
|
|
+ if (attr.disconnected)
|
|
|
|
+ fds[i].revents |= (POLLERR|POLLHUP);
|
|
|
|
+
|
|
|
|
+ if (fds[i].revents)
|
|
|
|
+ nrevents++;
|
|
break;
|
|
break;
|
|
- n = &p->next;
|
|
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
- if (!p)
|
|
|
|
- break;
|
|
|
|
-
|
|
|
|
- debug("handle %s is polled\n", qstrgetstr(&p->handle->uri));
|
|
|
|
-
|
|
|
|
- p->flags |= KNOWN_R|KNOWN_W;
|
|
|
|
-
|
|
|
|
- if (attr.disconnected) {
|
|
|
|
- debug("handle is polled to be disconnected\n");
|
|
|
|
- p->flags |= RET_E;
|
|
|
|
- }
|
|
|
|
- if (attr.readable) {
|
|
|
|
- debug("handle is polled to be readable\n");
|
|
|
|
- p->flags |= RET_R;
|
|
|
|
- }
|
|
|
|
- if (attr.writable) {
|
|
|
|
- debug("handle is polled to be writable\n");
|
|
|
|
- p->flags |= RET_W;
|
|
|
|
|
|
+ /* done with this PAL handle, remove it from array on which to DkObjectsWaitAny */
|
|
|
|
+ nfds_t skip = 0;
|
|
|
|
+ for (nfds_t i = 0; i < npals; i++) {
|
|
|
|
+ if (pals[i] == polled)
|
|
|
|
+ skip = 1;
|
|
|
|
+ else
|
|
|
|
+ pals[i - skip] = pals[i];
|
|
}
|
|
}
|
|
-
|
|
|
|
- for (q = p->children ; q ; q = q->next)
|
|
|
|
- q->flags |= p->flags & (KNOWN_R|KNOWN_W|RET_W|RET_R|RET_E);
|
|
|
|
-
|
|
|
|
- if ((p->flags & (POLL_R|KNOWN_R)) != (POLL_R|KNOWN_R) &&
|
|
|
|
- (p->flags & (POLL_W|KNOWN_W)) != (POLL_W|KNOWN_W))
|
|
|
|
- continue;
|
|
|
|
-
|
|
|
|
- has_known = true;
|
|
|
|
- *n = p->next;
|
|
|
|
- put_handle(p->handle);
|
|
|
|
- p->handle = NULL;
|
|
|
|
-
|
|
|
|
- int nskip = 0;
|
|
|
|
- for (int i = 0 ; i < npals ; i++)
|
|
|
|
- if (pals[i] == polled) {
|
|
|
|
- nskip = 1;
|
|
|
|
- } else if (nskip) {
|
|
|
|
- pals[i - nskip] = pals[i];
|
|
|
|
- }
|
|
|
|
- npals -= nskip;
|
|
|
|
-
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_third_loop);
|
|
|
|
|
|
+ npals -= skip;
|
|
}
|
|
}
|
|
|
|
|
|
- ret = 0;
|
|
|
|
-done_polling:
|
|
|
|
- for (p = polling ; p ; p = p->next)
|
|
|
|
- put_handle(p->handle);
|
|
|
|
-
|
|
|
|
- SAVE_PROFILE_INTERVAL(do_poll_fourth_loop);
|
|
|
|
|
|
+ for (nfds_t i = 0; i < nfds; i++)
|
|
|
|
+ if (fds_to_hdls[i])
|
|
|
|
+ put_handle(fds_to_hdls[i]);
|
|
|
|
+ free(pals);
|
|
|
|
+ free(fds_to_hdls);
|
|
|
|
|
|
- if (pals)
|
|
|
|
- __try_free(cur, pals);
|
|
|
|
-
|
|
|
|
- return ret;
|
|
|
|
|
|
+ return nrevents;
|
|
}
|
|
}
|
|
|
|
|
|
-int shim_do_poll (struct pollfd * fds, nfds_t nfds, int timeout_ms)
|
|
|
|
-{
|
|
|
|
- struct shim_thread * cur = get_cur_thread();
|
|
|
|
-
|
|
|
|
- struct poll_handle * polls =
|
|
|
|
- __try_alloca(cur, sizeof(struct poll_handle) * nfds);
|
|
|
|
-
|
|
|
|
- for (size_t i = 0 ; i < nfds ; i++) {
|
|
|
|
- polls[i].fd = fds[i].fd;
|
|
|
|
- polls[i].flags = 0;
|
|
|
|
- if (fds[i].events & (POLLIN|POLLRDNORM))
|
|
|
|
- polls[i].flags |= DO_R;
|
|
|
|
- if (fds[i].events & (POLLOUT|POLLWRNORM))
|
|
|
|
- polls[i].flags |= DO_W;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- int ret = __do_poll(nfds, polls,
|
|
|
|
- timeout_ms < 0 ? POLL_NOTIMEOUT : timeout_ms * 1000ULL);
|
|
|
|
-
|
|
|
|
- if (ret < 0)
|
|
|
|
- goto out;
|
|
|
|
-
|
|
|
|
- ret = 0;
|
|
|
|
-
|
|
|
|
- for (size_t i = 0 ; i < nfds ; i++) {
|
|
|
|
- fds[i].revents = 0;
|
|
|
|
-
|
|
|
|
- if (polls[i].flags & RET_R)
|
|
|
|
- fds[i].revents |= (fds[i].events & (POLLIN|POLLRDNORM));
|
|
|
|
- if (polls[i].flags & RET_W)
|
|
|
|
- fds[i].revents |= (fds[i].events & (POLLOUT|POLLWRNORM));
|
|
|
|
- if (polls[i].flags & RET_E)
|
|
|
|
- fds[i].revents |= (POLLERR|POLLHUP);
|
|
|
|
-
|
|
|
|
- if (fds[i].revents)
|
|
|
|
- ret++;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-out:
|
|
|
|
- __try_free(cur, polls);
|
|
|
|
-
|
|
|
|
- return ret;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-int shim_do_ppoll (struct pollfd * fds, int nfds, struct timespec * tsp,
|
|
|
|
- const __sigset_t * sigmask, size_t sigsetsize)
|
|
|
|
-{
|
|
|
|
|
|
+int shim_do_ppoll(struct pollfd* fds, int nfds, struct timespec* tsp,
|
|
|
|
+ const __sigset_t* sigmask, size_t sigsetsize) {
|
|
__UNUSED(sigmask);
|
|
__UNUSED(sigmask);
|
|
__UNUSED(sigsetsize);
|
|
__UNUSED(sigsetsize);
|
|
- struct shim_thread * cur = get_cur_thread();
|
|
|
|
-
|
|
|
|
- struct poll_handle * polls =
|
|
|
|
- __try_alloca(cur, sizeof(struct poll_handle) * nfds);
|
|
|
|
-
|
|
|
|
- for (int i = 0 ; i < nfds ; i++) {
|
|
|
|
- polls[i].fd = fds[i].fd;
|
|
|
|
- polls[i].flags = 0;
|
|
|
|
- if (fds[i].events & (POLLIN|POLLRDNORM))
|
|
|
|
- polls[i].flags |= DO_R;
|
|
|
|
- if (fds[i].events & (POLLOUT|POLLWRNORM))
|
|
|
|
- polls[i].flags |= DO_W;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- uint64_t timeout_us = tsp ? tsp->tv_sec * 1000000ULL + tsp->tv_nsec / 1000 : POLL_NOTIMEOUT;
|
|
|
|
- int ret = __do_poll(nfds, polls, timeout_us);
|
|
|
|
-
|
|
|
|
- if (ret < 0)
|
|
|
|
- goto out;
|
|
|
|
|
|
|
|
- ret = 0;
|
|
|
|
-
|
|
|
|
- for (int i = 0 ; i < nfds ; i++) {
|
|
|
|
- fds[i].revents = 0;
|
|
|
|
-
|
|
|
|
- if (polls[i].flags & RET_R)
|
|
|
|
- fds[i].revents |= (fds[i].events & (POLLIN|POLLRDNORM));
|
|
|
|
- if (polls[i].flags & RET_W)
|
|
|
|
- fds[i].revents |= (fds[i].events & (POLLOUT|POLLWRNORM));
|
|
|
|
- if (polls[i].flags & RET_E)
|
|
|
|
- fds[i].revents |= (fds[i].events & (POLLERR|POLLHUP));
|
|
|
|
-
|
|
|
|
- if (fds[i].revents)
|
|
|
|
- ret++;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-out:
|
|
|
|
- __try_free(cur, polls);
|
|
|
|
-
|
|
|
|
- return ret;
|
|
|
|
|
|
+ uint64_t timeout_ms = tsp ? tsp->tv_sec * 1000ULL + tsp->tv_nsec / 1000000 : POLL_NOTIMEOUT;
|
|
|
|
+ return shim_do_poll(fds, nfds, timeout_ms);
|
|
}
|
|
}
|
|
|
|
|
|
-typedef long int __fd_mask;
|
|
|
|
-
|
|
|
|
-#ifndef __NFDBITS
|
|
|
|
-#define __NFDBITS (8 * (int)sizeof(__fd_mask))
|
|
|
|
-#endif
|
|
|
|
-#ifndef __FDS_BITS
|
|
|
|
-#define __FDS_BITS(set) ((set)->fds_bits)
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
-/* We don't use `memset' because this would require a prototype and
|
|
|
|
- the array isn't too big. */
|
|
|
|
-# define __FD_ZERO(set) \
|
|
|
|
- do { \
|
|
|
|
- unsigned int __i; \
|
|
|
|
- fd_set *__arr = (set); \
|
|
|
|
- for (__i = 0; __i < sizeof (fd_set) / sizeof (__fd_mask); ++__i) \
|
|
|
|
- __FDS_BITS (__arr)[__i] = 0; \
|
|
|
|
- } while (0)
|
|
|
|
-
|
|
|
|
-#define __FD_ELT(d) ((d) / __NFDBITS)
|
|
|
|
-#define __FD_MASK(d) ((__fd_mask)1 << ((d) % __NFDBITS))
|
|
|
|
-
|
|
|
|
-#define __FD_SET(d, set) \
|
|
|
|
- ((void)(__FDS_BITS(set)[__FD_ELT(d)] |= __FD_MASK(d)))
|
|
|
|
-#define __FD_CLR(d, set) \
|
|
|
|
- ((void)(__FDS_BITS(set)[__FD_ELT(d)] &= ~__FD_MASK(d)))
|
|
|
|
-#define __FD_ISSET(d, set) \
|
|
|
|
- ((__FDS_BITS(set)[__FD_ELT(d)] & __FD_MASK(d)) != 0)
|
|
|
|
-
|
|
|
|
-DEFINE_PROFILE_CATEGORY(select, );
|
|
|
|
-DEFINE_PROFILE_INTERVAL(select_tryalloca_1, select);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(select_setup_array, select);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(select_do_poll, select);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(select_fd_zero, select);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(select_fd_sets, select);
|
|
|
|
-DEFINE_PROFILE_INTERVAL(select_try_free, select);
|
|
|
|
|
|
+int shim_do_select(int nfds, fd_set* readfds, fd_set* writefds,
|
|
|
|
+ fd_set* errorfds, struct __kernel_timeval* tsv) {
|
|
|
|
+ if (tsv && (tsv->tv_sec < 0 || tsv->tv_usec < 0))
|
|
|
|
+ return -EINVAL;
|
|
|
|
|
|
-int shim_do_select (int nfds, fd_set * readfds, fd_set * writefds,
|
|
|
|
- fd_set * errorfds, struct __kernel_timeval * tsv)
|
|
|
|
-{
|
|
|
|
- BEGIN_PROFILE_INTERVAL();
|
|
|
|
|
|
+ if (nfds < 0 || (uint64_t)nfds > get_rlimit_cur(RLIMIT_NOFILE))
|
|
|
|
+ return -EINVAL;
|
|
|
|
|
|
if (!nfds) {
|
|
if (!nfds) {
|
|
if (!tsv)
|
|
if (!tsv)
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
+ /* special case of select(0, ..., tsv) used for sleep */
|
|
struct __kernel_timespec tsp;
|
|
struct __kernel_timespec tsp;
|
|
tsp.tv_sec = tsv->tv_sec;
|
|
tsp.tv_sec = tsv->tv_sec;
|
|
tsp.tv_nsec = tsv->tv_usec * 1000;
|
|
tsp.tv_nsec = tsv->tv_usec * 1000;
|
|
- return shim_do_nanosleep (&tsp, NULL);
|
|
|
|
|
|
+ return shim_do_nanosleep(&tsp, NULL);
|
|
}
|
|
}
|
|
|
|
|
|
- struct shim_thread * cur = get_cur_thread();
|
|
|
|
-
|
|
|
|
- struct poll_handle * polls =
|
|
|
|
- __try_alloca(cur, sizeof(struct poll_handle) * nfds);
|
|
|
|
- int npolls = 0;
|
|
|
|
-
|
|
|
|
- SAVE_PROFILE_INTERVAL(select_tryalloca_1);
|
|
|
|
-
|
|
|
|
- for (int fd = 0 ; fd < nfds ; fd++) {
|
|
|
|
- bool do_r = (readfds && __FD_ISSET(fd, readfds));
|
|
|
|
- bool do_w = (writefds && __FD_ISSET(fd, writefds));
|
|
|
|
- if (!do_r && !do_w)
|
|
|
|
- continue;
|
|
|
|
- debug("poll fd %d %s%s\n", fd, do_r ? "R" : "", do_w ? "W" : "");
|
|
|
|
- polls[npolls].fd = fd;
|
|
|
|
- polls[npolls].flags = (do_r ? DO_R : 0)|(do_w ? DO_W : 0);
|
|
|
|
- npolls++;
|
|
|
|
|
|
+ if (nfds < __NFDBITS) {
|
|
|
|
+ /* interesting corner case: Linux always checks at least 64 first FDs */
|
|
|
|
+ nfds = __NFDBITS;
|
|
}
|
|
}
|
|
|
|
|
|
- SAVE_PROFILE_INTERVAL(select_setup_array);
|
|
|
|
|
|
+ /* nfds is the upper limit for actual number of fds for poll */
|
|
|
|
+ struct pollfd* fds_poll = malloc(nfds * sizeof(struct pollfd));
|
|
|
|
+ if (!fds_poll)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+
|
|
|
|
+ /* populate array of pollfd's based on user-supplied readfds & writefds */
|
|
|
|
+ nfds_t nfds_poll = 0;
|
|
|
|
+ for (int fd = 0; fd < nfds; fd++) {
|
|
|
|
+ short events = 0;
|
|
|
|
+ if (readfds && __FD_ISSET(fd, readfds))
|
|
|
|
+ events |= POLLIN;
|
|
|
|
+ if (writefds && __FD_ISSET(fd, writefds))
|
|
|
|
+ events |= POLLOUT;
|
|
|
|
+
|
|
|
|
+ if (!events)
|
|
|
|
+ continue;
|
|
|
|
|
|
- uint64_t timeout_us = tsv ? tsv->tv_sec * 1000000ULL + tsv->tv_usec : POLL_NOTIMEOUT;
|
|
|
|
- int ret = __do_poll(npolls, polls, timeout_us);
|
|
|
|
|
|
+ fds_poll[nfds_poll].fd = fd;
|
|
|
|
+ fds_poll[nfds_poll].events = events;
|
|
|
|
+ fds_poll[nfds_poll].revents = 0;
|
|
|
|
+ nfds_poll++;
|
|
|
|
+ }
|
|
|
|
|
|
- SAVE_PROFILE_INTERVAL(select_do_poll);
|
|
|
|
|
|
+ /* select()/pselect() return -EBADF if invalid FD was given by user in readfds/writefds;
|
|
|
|
+ * note that poll()/ppoll() don't have this error code, so we return this code only here */
|
|
|
|
+ struct shim_handle_map* map = get_cur_thread()->handle_map;
|
|
|
|
+ lock(&map->lock);
|
|
|
|
+ for (nfds_t i = 0; i < nfds_poll; i++) {
|
|
|
|
+ struct shim_handle* hdl = __get_fd_handle(fds_poll[i].fd, NULL, map);
|
|
|
|
+ if (!hdl || !hdl->fs || !hdl->fs->fs_ops) {
|
|
|
|
+ /* the corresponding handle doesn't exist or doesn't provide FS-like semantics */
|
|
|
|
+ free(fds_poll);
|
|
|
|
+ unlock(&map->lock);
|
|
|
|
+ return -EBADF;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ unlock(&map->lock);
|
|
|
|
|
|
- if (ret < 0)
|
|
|
|
- goto out;
|
|
|
|
|
|
+ uint64_t timeout_ms = tsv ? tsv->tv_sec * 1000ULL + tsv->tv_usec / 1000 : POLL_NOTIMEOUT;
|
|
|
|
+ int ret = shim_do_poll(fds_poll, nfds_poll, timeout_ms);
|
|
|
|
|
|
- ret = 0;
|
|
|
|
|
|
+ if (ret < 0) {
|
|
|
|
+ free(fds_poll);
|
|
|
|
+ return ret;
|
|
|
|
+ }
|
|
|
|
|
|
|
|
+ /* modify readfds, writefds, and errorfds in-place with returned events */
|
|
if (readfds)
|
|
if (readfds)
|
|
__FD_ZERO(readfds);
|
|
__FD_ZERO(readfds);
|
|
if (writefds)
|
|
if (writefds)
|
|
@@ -565,85 +285,37 @@ int shim_do_select (int nfds, fd_set * readfds, fd_set * writefds,
|
|
if (errorfds)
|
|
if (errorfds)
|
|
__FD_ZERO(errorfds);
|
|
__FD_ZERO(errorfds);
|
|
|
|
|
|
- SAVE_PROFILE_INTERVAL(select_fd_zero);
|
|
|
|
-
|
|
|
|
- for (int i = 0 ; i < npolls ; i++) {
|
|
|
|
- if (readfds && ((polls[i].flags & (DO_R|RET_R)) == (DO_R|RET_R))) {
|
|
|
|
- __FD_SET(polls[i].fd, readfds);
|
|
|
|
|
|
+ ret = 0;
|
|
|
|
+ for (nfds_t i = 0; i < nfds_poll; i++) {
|
|
|
|
+ if (readfds && (fds_poll[i].revents & POLLIN)) {
|
|
|
|
+ __FD_SET(fds_poll[i].fd, readfds);
|
|
ret++;
|
|
ret++;
|
|
}
|
|
}
|
|
- if (writefds && ((polls[i].flags & (DO_W|RET_W)) == (DO_W|RET_W))) {
|
|
|
|
- __FD_SET(polls[i].fd, writefds);
|
|
|
|
|
|
+ if (writefds && (fds_poll[i].revents & POLLOUT)) {
|
|
|
|
+ __FD_SET(fds_poll[i].fd, writefds);
|
|
ret++;
|
|
ret++;
|
|
}
|
|
}
|
|
- if (errorfds && ((polls[i].flags & (DO_R|DO_W|RET_E)) > RET_E)) {
|
|
|
|
- __FD_SET(polls[i].fd, errorfds);
|
|
|
|
|
|
+ if (errorfds && (fds_poll[i].revents & POLLERR)) {
|
|
|
|
+ __FD_SET(fds_poll[i].fd, errorfds);
|
|
ret++;
|
|
ret++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- SAVE_PROFILE_INTERVAL(select_fd_sets);
|
|
|
|
|
|
|
|
-out:
|
|
|
|
- __try_free(cur, polls);
|
|
|
|
- SAVE_PROFILE_INTERVAL(select_try_free);
|
|
|
|
|
|
+ free(fds_poll);
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
-int shim_do_pselect6 (int nfds, fd_set * readfds, fd_set * writefds,
|
|
|
|
- fd_set * errorfds, const struct __kernel_timespec * tsp,
|
|
|
|
- const __sigset_t * sigmask)
|
|
|
|
-{
|
|
|
|
|
|
+int shim_do_pselect6(int nfds, fd_set* readfds, fd_set* writefds,
|
|
|
|
+ fd_set* errorfds, const struct __kernel_timespec* tsp,
|
|
|
|
+ const __sigset_t* sigmask) {
|
|
__UNUSED(sigmask);
|
|
__UNUSED(sigmask);
|
|
- if (!nfds)
|
|
|
|
- return tsp ? shim_do_nanosleep (tsp, NULL) : -EINVAL;
|
|
|
|
-
|
|
|
|
- struct shim_thread * cur = get_cur_thread();
|
|
|
|
-
|
|
|
|
- struct poll_handle * polls =
|
|
|
|
- __try_alloca(cur, sizeof(struct poll_handle) * nfds);
|
|
|
|
- int npolls = 0;
|
|
|
|
|
|
|
|
- for (int fd = 0 ; fd < nfds ; fd++) {
|
|
|
|
- bool do_r = (readfds && __FD_ISSET(fd, readfds));
|
|
|
|
- bool do_w = (writefds && __FD_ISSET(fd, writefds));
|
|
|
|
- if (!do_r && !do_w)
|
|
|
|
- continue;
|
|
|
|
- polls[npolls].fd = fd;
|
|
|
|
- polls[npolls].flags = (do_r ? DO_R : 0)|(do_w ? DO_W : 0);
|
|
|
|
- npolls++;
|
|
|
|
|
|
+ if (tsp) {
|
|
|
|
+ struct __kernel_timeval tsv;
|
|
|
|
+ tsv.tv_sec = tsp->tv_sec;
|
|
|
|
+ tsv.tv_usec = tsp->tv_nsec / 1000;
|
|
|
|
+ return shim_do_select(nfds, readfds, writefds, errorfds, &tsv);
|
|
}
|
|
}
|
|
|
|
|
|
- uint64_t timeout_us = tsp ? tsp->tv_sec * 1000000ULL + tsp->tv_nsec / 1000 : POLL_NOTIMEOUT;
|
|
|
|
- int ret = __do_poll(npolls, polls, timeout_us);
|
|
|
|
-
|
|
|
|
- if (ret < 0)
|
|
|
|
- goto out;
|
|
|
|
-
|
|
|
|
- ret = 0;
|
|
|
|
-
|
|
|
|
- if (readfds)
|
|
|
|
- __FD_ZERO(readfds);
|
|
|
|
- if (writefds)
|
|
|
|
- __FD_ZERO(writefds);
|
|
|
|
- if (errorfds)
|
|
|
|
- __FD_ZERO(errorfds);
|
|
|
|
-
|
|
|
|
- for (int i = 0 ; i < npolls ; i++) {
|
|
|
|
- if (readfds && ((polls[i].flags & (DO_R|RET_R)) == (DO_R|RET_R))) {
|
|
|
|
- __FD_SET(polls[i].fd, readfds);
|
|
|
|
- ret++;
|
|
|
|
- }
|
|
|
|
- if (writefds && ((polls[i].flags & (DO_W|RET_W)) == (DO_W|RET_W))) {
|
|
|
|
- __FD_SET(polls[i].fd, writefds);
|
|
|
|
- ret++;
|
|
|
|
- }
|
|
|
|
- if (errorfds && ((polls[i].flags & (DO_R|DO_W|RET_E)) > RET_E)) {
|
|
|
|
- __FD_SET(polls[i].fd, errorfds);
|
|
|
|
- ret++;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-out:
|
|
|
|
- __try_free(cur, polls);
|
|
|
|
- return ret;
|
|
|
|
|
|
+ return shim_do_select(nfds, readfds, writefds, errorfds, NULL);
|
|
}
|
|
}
|