shim_poll.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_poll.c
  15. *
  16. * Implementation of system calls "poll", "ppoll", "select" and "pselect6".
  17. */
  18. #include <errno.h>
  19. #include <linux/fcntl.h>
  20. #include <pal.h>
  21. #include <pal_error.h>
  22. #include <shim_fs.h>
  23. #include <shim_handle.h>
  24. #include <shim_internal.h>
  25. #include <shim_table.h>
  26. #include <shim_thread.h>
  27. #include <shim_utils.h>
  28. typedef long int __fd_mask;
  29. #ifndef __NFDBITS
  30. #define __NFDBITS (8 * (int)sizeof(__fd_mask))
  31. #endif
  32. #ifndef __FDS_BITS
  33. #define __FDS_BITS(set) ((set)->fds_bits)
  34. #endif
  35. #define __FD_ZERO(set) \
  36. do { \
  37. unsigned int i; \
  38. fd_set* arr = (set); \
  39. for (i = 0; i < sizeof(fd_set) / sizeof(__fd_mask); i++) \
  40. __FDS_BITS(arr)[i] = 0; \
  41. } while (0)
  42. #define __FD_ELT(d) ((d) / __NFDBITS)
  43. #define __FD_MASK(d) ((__fd_mask)1 << ((d) % __NFDBITS))
  44. #define __FD_SET(d, set) ((void)(__FDS_BITS(set)[__FD_ELT(d)] |= __FD_MASK(d)))
  45. #define __FD_CLR(d, set) ((void)(__FDS_BITS(set)[__FD_ELT(d)] &= ~__FD_MASK(d)))
  46. #define __FD_ISSET(d, set) ((__FDS_BITS(set)[__FD_ELT(d)] & __FD_MASK(d)) != 0)
  47. #define POLL_NOTIMEOUT ((uint64_t)-1)
  48. int shim_do_poll(struct pollfd* fds, nfds_t nfds, int timeout_ms) {
  49. if (!fds || test_user_memory(fds, sizeof(*fds) * nfds, true))
  50. return -EFAULT;
  51. if ((uint64_t)nfds > get_rlimit_cur(RLIMIT_NOFILE))
  52. return -EINVAL;
  53. struct shim_handle_map* map = get_cur_thread()->handle_map;
  54. uint64_t timeout_us = timeout_ms < 0 ? POLL_NOTIMEOUT : timeout_ms * 1000ULL;
  55. /* nfds is the upper limit for actual number of handles */
  56. PAL_HANDLE* pals = malloc(nfds * sizeof(PAL_HANDLE));
  57. if (!pals)
  58. return -ENOMEM;
  59. /* for bookkeeping, need to have a mapping FD -> {shim handle, index-in-pals} */
  60. struct fds_mapping_t {
  61. struct shim_handle* hdl; /* NULL if no mapping (handle is not used in polling) */
  62. nfds_t idx; /* index from fds array to pals array */
  63. };
  64. struct fds_mapping_t* fds_mapping = malloc(nfds * sizeof(struct fds_mapping_t));
  65. if (!fds_mapping) {
  66. free(pals);
  67. return -ENOMEM;
  68. }
  69. /* allocate one memory region to hold two PAL_FLG arrays: events and revents */
  70. PAL_FLG* pal_events = malloc(nfds * sizeof(PAL_FLG) * 2);
  71. if (!pal_events) {
  72. free(pals);
  73. free(fds_mapping);
  74. return -ENOMEM;
  75. }
  76. PAL_FLG* ret_events = pal_events + nfds;
  77. nfds_t pal_cnt = 0;
  78. nfds_t nrevents = 0;
  79. lock(&map->lock);
  80. /* collect PAL handles that correspond to user-supplied FDs (only those that can be polled) */
  81. for (nfds_t i = 0; i < nfds; i++) {
  82. fds[i].revents = 0;
  83. fds_mapping[i].hdl = NULL;
  84. if (fds[i].fd < 0) {
  85. /* FD is negative, must be ignored */
  86. continue;
  87. }
  88. struct shim_handle* hdl = __get_fd_handle(fds[i].fd, NULL, map);
  89. if (!hdl || !hdl->fs || !hdl->fs->fs_ops) {
  90. /* The corresponding handle doesn't exist or doesn't provide FS-like semantics; do not
  91. * include it in handles-to-poll array but notify user about invalid request. */
  92. fds[i].revents = POLLNVAL;
  93. nrevents++;
  94. continue;
  95. }
  96. if (hdl->type == TYPE_FILE || hdl->type == TYPE_DEV) {
  97. /* Files and devs are special cases: their poll is emulated at LibOS level; do not
  98. * include them in handles-to-poll array but instead use handle-specific callback. */
  99. int shim_events = 0;
  100. if ((fds[i].events & (POLLIN | POLLRDNORM)) && (hdl->acc_mode & MAY_READ))
  101. shim_events |= FS_POLL_RD;
  102. if ((fds[i].events & (POLLOUT | POLLWRNORM)) && (hdl->acc_mode & MAY_WRITE))
  103. shim_events |= FS_POLL_WR;
  104. int shim_revents = hdl->fs->fs_ops->poll(hdl, shim_events);
  105. fds[i].revents = 0;
  106. if (shim_revents & FS_POLL_RD)
  107. fds[i].revents |= fds[i].events & (POLLIN | POLLRDNORM);
  108. if (shim_revents & FS_POLL_WR)
  109. fds[i].revents |= fds[i].events & (POLLOUT | POLLWRNORM);
  110. nrevents++;
  111. continue;
  112. }
  113. PAL_FLG allowed_events = 0;
  114. if ((fds[i].events & (POLLIN | POLLRDNORM)) && (hdl->acc_mode & MAY_READ))
  115. allowed_events |= PAL_WAIT_READ;
  116. if ((fds[i].events & (POLLOUT | POLLWRNORM)) && (hdl->acc_mode & MAY_WRITE))
  117. allowed_events |= PAL_WAIT_WRITE;
  118. if ((fds[i].events & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) && !allowed_events) {
  119. /* If user requested read/write events but they are not allowed on this handle, ignore
  120. * this handle (but note that user may only be interested in errors, and this is a valid
  121. * request). */
  122. continue;
  123. }
  124. get_handle(hdl);
  125. fds_mapping[i].hdl = hdl;
  126. fds_mapping[i].idx = pal_cnt;
  127. pals[pal_cnt] = hdl->pal_handle;
  128. pal_events[pal_cnt] = allowed_events;
  129. ret_events[pal_cnt] = 0;
  130. pal_cnt++;
  131. }
  132. unlock(&map->lock);
  133. PAL_BOL polled = DkStreamsWaitEvents(pal_cnt, pals, pal_events, ret_events, timeout_us);
  134. /* update fds.revents, but only if something was actually polled */
  135. if (polled) {
  136. for (nfds_t i = 0; i < nfds; i++) {
  137. if (!fds_mapping[i].hdl)
  138. continue;
  139. fds[i].revents = 0;
  140. if (ret_events[fds_mapping[i].idx] & PAL_WAIT_ERROR)
  141. fds[i].revents |= POLLERR | POLLHUP;
  142. if (ret_events[fds_mapping[i].idx] & PAL_WAIT_READ)
  143. fds[i].revents |= fds[i].events & (POLLIN | POLLRDNORM);
  144. if (ret_events[fds_mapping[i].idx] & PAL_WAIT_WRITE)
  145. fds[i].revents |= fds[i].events & (POLLOUT | POLLWRNORM);
  146. if (fds[i].revents)
  147. nrevents++;
  148. put_handle(fds_mapping[i].hdl);
  149. }
  150. }
  151. free(pals);
  152. free(pal_events);
  153. free(fds_mapping);
  154. return nrevents;
  155. }
  156. int shim_do_ppoll(struct pollfd* fds, int nfds, struct timespec* tsp, const __sigset_t* sigmask,
  157. size_t sigsetsize) {
  158. __UNUSED(sigmask);
  159. __UNUSED(sigsetsize);
  160. uint64_t timeout_ms = tsp ? tsp->tv_sec * 1000ULL + tsp->tv_nsec / 1000000 : POLL_NOTIMEOUT;
  161. return shim_do_poll(fds, nfds, timeout_ms);
  162. }
  163. int shim_do_select(int nfds, fd_set* readfds, fd_set* writefds, fd_set* errorfds,
  164. struct __kernel_timeval* tsv) {
  165. if (tsv && (tsv->tv_sec < 0 || tsv->tv_usec < 0))
  166. return -EINVAL;
  167. if (nfds < 0 || (uint64_t)nfds > get_rlimit_cur(RLIMIT_NOFILE))
  168. return -EINVAL;
  169. if (!nfds) {
  170. if (!tsv)
  171. return -EINVAL;
  172. /* special case of select(0, ..., tsv) used for sleep */
  173. struct __kernel_timespec tsp;
  174. tsp.tv_sec = tsv->tv_sec;
  175. tsp.tv_nsec = tsv->tv_usec * 1000;
  176. return shim_do_nanosleep(&tsp, NULL);
  177. }
  178. if (nfds < __NFDBITS) {
  179. /* interesting corner case: Linux always checks at least 64 first FDs */
  180. nfds = __NFDBITS;
  181. }
  182. /* nfds is the upper limit for actual number of fds for poll */
  183. struct pollfd* fds_poll = malloc(nfds * sizeof(struct pollfd));
  184. if (!fds_poll)
  185. return -ENOMEM;
  186. /* populate array of pollfd's based on user-supplied readfds & writefds */
  187. nfds_t nfds_poll = 0;
  188. for (int fd = 0; fd < nfds; fd++) {
  189. short events = 0;
  190. if (readfds && __FD_ISSET(fd, readfds))
  191. events |= POLLIN;
  192. if (writefds && __FD_ISSET(fd, writefds))
  193. events |= POLLOUT;
  194. if (!events)
  195. continue;
  196. fds_poll[nfds_poll].fd = fd;
  197. fds_poll[nfds_poll].events = events;
  198. fds_poll[nfds_poll].revents = 0;
  199. nfds_poll++;
  200. }
  201. /* select()/pselect() return -EBADF if invalid FD was given by user in readfds/writefds;
  202. * note that poll()/ppoll() don't have this error code, so we return this code only here */
  203. struct shim_handle_map* map = get_cur_thread()->handle_map;
  204. lock(&map->lock);
  205. for (nfds_t i = 0; i < nfds_poll; i++) {
  206. struct shim_handle* hdl = __get_fd_handle(fds_poll[i].fd, NULL, map);
  207. if (!hdl || !hdl->fs || !hdl->fs->fs_ops) {
  208. /* the corresponding handle doesn't exist or doesn't provide FS-like semantics */
  209. free(fds_poll);
  210. unlock(&map->lock);
  211. return -EBADF;
  212. }
  213. }
  214. unlock(&map->lock);
  215. uint64_t timeout_ms = tsv ? tsv->tv_sec * 1000ULL + tsv->tv_usec / 1000 : POLL_NOTIMEOUT;
  216. int ret = shim_do_poll(fds_poll, nfds_poll, timeout_ms);
  217. if (ret < 0) {
  218. free(fds_poll);
  219. return ret;
  220. }
  221. /* modify readfds, writefds, and errorfds in-place with returned events */
  222. if (readfds)
  223. __FD_ZERO(readfds);
  224. if (writefds)
  225. __FD_ZERO(writefds);
  226. if (errorfds)
  227. __FD_ZERO(errorfds);
  228. ret = 0;
  229. for (nfds_t i = 0; i < nfds_poll; i++) {
  230. if (readfds && (fds_poll[i].revents & POLLIN)) {
  231. __FD_SET(fds_poll[i].fd, readfds);
  232. ret++;
  233. }
  234. if (writefds && (fds_poll[i].revents & POLLOUT)) {
  235. __FD_SET(fds_poll[i].fd, writefds);
  236. ret++;
  237. }
  238. if (errorfds && (fds_poll[i].revents & POLLERR)) {
  239. __FD_SET(fds_poll[i].fd, errorfds);
  240. ret++;
  241. }
  242. }
  243. free(fds_poll);
  244. return ret;
  245. }
  246. int shim_do_pselect6(int nfds, fd_set* readfds, fd_set* writefds, fd_set* errorfds,
  247. const struct __kernel_timespec* tsp, const __sigset_t* sigmask) {
  248. __UNUSED(sigmask);
  249. if (tsp) {
  250. struct __kernel_timeval tsv;
  251. tsv.tv_sec = tsp->tv_sec;
  252. tsv.tv_usec = tsp->tv_nsec / 1000;
  253. return shim_do_select(nfds, readfds, writefds, errorfds, &tsv);
  254. }
  255. return shim_do_select(nfds, readfds, writefds, errorfds, NULL);
  256. }