shim_poll.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_poll.c
  15. *
  16. * Implementation of system calls "poll", "ppoll", "select" and "pselect6".
  17. */
  18. #include <errno.h>
  19. #include <linux/fcntl.h>
  20. #include <pal.h>
  21. #include <pal_error.h>
  22. #include <shim_fs.h>
  23. #include <shim_handle.h>
  24. #include <shim_internal.h>
  25. #include <shim_table.h>
  26. #include <shim_thread.h>
  27. #include <shim_utils.h>
  28. typedef long int __fd_mask;
  29. #ifndef __NFDBITS
  30. #define __NFDBITS (8 * (int)sizeof(__fd_mask))
  31. #endif
  32. #ifndef __FDS_BITS
  33. #define __FDS_BITS(set) ((set)->fds_bits)
  34. #endif
  35. # define __FD_ZERO(set) \
  36. do { \
  37. unsigned int __i; \
  38. fd_set *__arr = (set); \
  39. for (__i = 0; __i < sizeof (fd_set) / sizeof (__fd_mask); ++__i) \
  40. __FDS_BITS (__arr)[__i] = 0; \
  41. } while (0)
  42. #define __FD_ELT(d) ((d) / __NFDBITS)
  43. #define __FD_MASK(d) ((__fd_mask)1 << ((d) % __NFDBITS))
  44. #define __FD_SET(d, set) \
  45. ((void)(__FDS_BITS(set)[__FD_ELT(d)] |= __FD_MASK(d)))
  46. #define __FD_CLR(d, set) \
  47. ((void)(__FDS_BITS(set)[__FD_ELT(d)] &= ~__FD_MASK(d)))
  48. #define __FD_ISSET(d, set) \
  49. ((__FDS_BITS(set)[__FD_ELT(d)] & __FD_MASK(d)) != 0)
  50. #define POLL_NOTIMEOUT ((uint64_t)-1)
  51. int shim_do_poll(struct pollfd* fds, nfds_t nfds, int timeout_ms) {
  52. if (!fds || test_user_memory(fds, sizeof(*fds) * nfds, true))
  53. return -EFAULT;
  54. if ((uint64_t)nfds > get_rlimit_cur(RLIMIT_NOFILE))
  55. return -EINVAL;
  56. struct shim_handle_map* map = get_cur_thread()->handle_map;
  57. uint64_t timeout_us = timeout_ms < 0 ? POLL_NOTIMEOUT : timeout_ms * 1000ULL;
  58. /* nfds is the upper limit for actual number of handles */
  59. PAL_HANDLE* pals = malloc(nfds * sizeof(PAL_HANDLE));
  60. if (!pals)
  61. return -ENOMEM;
  62. /* for bookkeeping, need to have a mapping FD -> handle */
  63. struct shim_handle** fds_to_hdls = malloc(nfds * sizeof(struct shim_handle*));
  64. if (!fds_to_hdls) {
  65. free(pals);
  66. return -ENOMEM;
  67. }
  68. nfds_t npals = 0;
  69. nfds_t nrevents = 0;
  70. lock(&map->lock);
  71. /* collect PAL handles that correspond to user-supplied FDs (only those that can be polled) */
  72. for (nfds_t i = 0; i < nfds; i++) {
  73. fds[i].revents = 0;
  74. fds_to_hdls[i] = NULL;
  75. if (fds[i].fd < 0) {
  76. /* FD is negative, must be ignored */
  77. continue;
  78. }
  79. if (!(fds[i].events & (POLLIN|POLLRDNORM)) &&
  80. !(fds[i].events & (POLLOUT|POLLWRNORM))) {
  81. /* user didn't ask for read or write, ignore this FD */
  82. continue;
  83. }
  84. struct shim_handle* hdl = __get_fd_handle(fds[i].fd, NULL, map);
  85. if (!hdl || !hdl->fs || !hdl->fs->fs_ops) {
  86. /* the corresponding handle doesn't exist or doesn't provide FS-like semantics */
  87. continue;
  88. }
  89. int allowed_events = 2; /* read + write */
  90. if ((fds[i].events & (POLLIN|POLLRDNORM)) && !(hdl->acc_mode & MAY_READ))
  91. allowed_events -= 1; /* minus read */
  92. if ((fds[i].events & (POLLOUT|POLLWRNORM)) && !(hdl->acc_mode & MAY_WRITE))
  93. allowed_events -= 1; /* minus write */
  94. if (!allowed_events) {
  95. /* the corresponding handle cannot be read or written */
  96. continue;
  97. }
  98. if (!(fds[i].events & (POLLIN|POLLRDNORM)) && (fds[i].events & (POLLOUT|POLLWRNORM))) {
  99. /* special case: user is interested only in write event on this handle, and whether
  100. * write event occurs is always known in PAL layer, so simply consult PAL and
  101. * update revents and skip this handle for polling (note that otherwise PAL could get
  102. * stuck in host poll() because PAL always polls on read events) */
  103. PAL_STREAM_ATTR attr;
  104. if (!DkStreamAttributesQueryByHandle(hdl->pal_handle, &attr)) {
  105. /* something went wrong with this handle, silently skip this handle */
  106. continue;
  107. }
  108. if (attr.writable)
  109. fds[i].revents |= (fds[i].events & (POLLOUT|POLLWRNORM));
  110. if (attr.disconnected)
  111. fds[i].revents |= (POLLERR|POLLHUP);
  112. if (fds[i].revents)
  113. nrevents++;
  114. continue;
  115. }
  116. get_handle(hdl);
  117. fds_to_hdls[i] = hdl;
  118. pals[npals] = hdl->pal_handle;
  119. npals++;
  120. }
  121. unlock(&map->lock);
  122. /* TODO: This loop is highly inefficient, since DkObjectsWaitAny returns only one (random)
  123. * handle out of the whole array of handles-waiting-for-events. We must replace this
  124. * loop with a single DkObjectsWaitEvents(). */
  125. while (npals) {
  126. PAL_HANDLE polled = DkObjectsWaitAny(npals, pals, timeout_us);
  127. if (!polled)
  128. break;
  129. PAL_STREAM_ATTR attr;
  130. if (!DkStreamAttributesQueryByHandle(polled, &attr))
  131. continue;
  132. for (nfds_t i = 0; i < nfds; i++) {
  133. if (fds_to_hdls[i]->pal_handle == polled) {
  134. /* found user-supplied FD, update it with returned events */
  135. fds[i].revents = 0;
  136. if (attr.readable)
  137. fds[i].revents |= (fds[i].events & (POLLIN|POLLRDNORM));
  138. if (attr.writable)
  139. fds[i].revents |= (fds[i].events & (POLLOUT|POLLWRNORM));
  140. if (attr.disconnected)
  141. fds[i].revents |= (POLLERR|POLLHUP);
  142. if (fds[i].revents)
  143. nrevents++;
  144. break;
  145. }
  146. }
  147. /* done with this PAL handle, remove it from array on which to DkObjectsWaitAny */
  148. nfds_t skip = 0;
  149. for (nfds_t i = 0; i < npals; i++) {
  150. if (pals[i] == polled)
  151. skip = 1;
  152. else
  153. pals[i - skip] = pals[i];
  154. }
  155. npals -= skip;
  156. }
  157. for (nfds_t i = 0; i < nfds; i++)
  158. if (fds_to_hdls[i])
  159. put_handle(fds_to_hdls[i]);
  160. free(pals);
  161. free(fds_to_hdls);
  162. return nrevents;
  163. }
  164. int shim_do_ppoll(struct pollfd* fds, int nfds, struct timespec* tsp,
  165. const __sigset_t* sigmask, size_t sigsetsize) {
  166. __UNUSED(sigmask);
  167. __UNUSED(sigsetsize);
  168. uint64_t timeout_ms = tsp ? tsp->tv_sec * 1000ULL + tsp->tv_nsec / 1000000 : POLL_NOTIMEOUT;
  169. return shim_do_poll(fds, nfds, timeout_ms);
  170. }
  171. int shim_do_select(int nfds, fd_set* readfds, fd_set* writefds,
  172. fd_set* errorfds, struct __kernel_timeval* tsv) {
  173. if (tsv && (tsv->tv_sec < 0 || tsv->tv_usec < 0))
  174. return -EINVAL;
  175. if (nfds < 0 || (uint64_t)nfds > get_rlimit_cur(RLIMIT_NOFILE))
  176. return -EINVAL;
  177. if (!nfds) {
  178. if (!tsv)
  179. return -EINVAL;
  180. /* special case of select(0, ..., tsv) used for sleep */
  181. struct __kernel_timespec tsp;
  182. tsp.tv_sec = tsv->tv_sec;
  183. tsp.tv_nsec = tsv->tv_usec * 1000;
  184. return shim_do_nanosleep(&tsp, NULL);
  185. }
  186. if (nfds < __NFDBITS) {
  187. /* interesting corner case: Linux always checks at least 64 first FDs */
  188. nfds = __NFDBITS;
  189. }
  190. /* nfds is the upper limit for actual number of fds for poll */
  191. struct pollfd* fds_poll = malloc(nfds * sizeof(struct pollfd));
  192. if (!fds_poll)
  193. return -ENOMEM;
  194. /* populate array of pollfd's based on user-supplied readfds & writefds */
  195. nfds_t nfds_poll = 0;
  196. for (int fd = 0; fd < nfds; fd++) {
  197. short events = 0;
  198. if (readfds && __FD_ISSET(fd, readfds))
  199. events |= POLLIN;
  200. if (writefds && __FD_ISSET(fd, writefds))
  201. events |= POLLOUT;
  202. if (!events)
  203. continue;
  204. fds_poll[nfds_poll].fd = fd;
  205. fds_poll[nfds_poll].events = events;
  206. fds_poll[nfds_poll].revents = 0;
  207. nfds_poll++;
  208. }
  209. /* select()/pselect() return -EBADF if invalid FD was given by user in readfds/writefds;
  210. * note that poll()/ppoll() don't have this error code, so we return this code only here */
  211. struct shim_handle_map* map = get_cur_thread()->handle_map;
  212. lock(&map->lock);
  213. for (nfds_t i = 0; i < nfds_poll; i++) {
  214. struct shim_handle* hdl = __get_fd_handle(fds_poll[i].fd, NULL, map);
  215. if (!hdl || !hdl->fs || !hdl->fs->fs_ops) {
  216. /* the corresponding handle doesn't exist or doesn't provide FS-like semantics */
  217. free(fds_poll);
  218. unlock(&map->lock);
  219. return -EBADF;
  220. }
  221. }
  222. unlock(&map->lock);
  223. uint64_t timeout_ms = tsv ? tsv->tv_sec * 1000ULL + tsv->tv_usec / 1000 : POLL_NOTIMEOUT;
  224. int ret = shim_do_poll(fds_poll, nfds_poll, timeout_ms);
  225. if (ret < 0) {
  226. free(fds_poll);
  227. return ret;
  228. }
  229. /* modify readfds, writefds, and errorfds in-place with returned events */
  230. if (readfds)
  231. __FD_ZERO(readfds);
  232. if (writefds)
  233. __FD_ZERO(writefds);
  234. if (errorfds)
  235. __FD_ZERO(errorfds);
  236. ret = 0;
  237. for (nfds_t i = 0; i < nfds_poll; i++) {
  238. if (readfds && (fds_poll[i].revents & POLLIN)) {
  239. __FD_SET(fds_poll[i].fd, readfds);
  240. ret++;
  241. }
  242. if (writefds && (fds_poll[i].revents & POLLOUT)) {
  243. __FD_SET(fds_poll[i].fd, writefds);
  244. ret++;
  245. }
  246. if (errorfds && (fds_poll[i].revents & POLLERR)) {
  247. __FD_SET(fds_poll[i].fd, errorfds);
  248. ret++;
  249. }
  250. }
  251. free(fds_poll);
  252. return ret;
  253. }
  254. int shim_do_pselect6(int nfds, fd_set* readfds, fd_set* writefds,
  255. fd_set* errorfds, const struct __kernel_timespec* tsp,
  256. const __sigset_t* sigmask) {
  257. __UNUSED(sigmask);
  258. if (tsp) {
  259. struct __kernel_timeval tsv;
  260. tsv.tv_sec = tsp->tv_sec;
  261. tsv.tv_usec = tsp->tv_nsec / 1000;
  262. return shim_do_select(nfds, readfds, writefds, errorfds, &tsv);
  263. }
  264. return shim_do_select(nfds, readfds, writefds, errorfds, NULL);
  265. }