shim_poll.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 OSCAR lab, Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_poll.c
  17. *
  18. * Implementation of system call "poll", "ppoll", "select" and "pselect6".
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_table.h>
  22. #include <shim_utils.h>
  23. #include <shim_thread.h>
  24. #include <shim_handle.h>
  25. #include <shim_fs.h>
  26. #include <shim_profile.h>
  27. #include <pal.h>
  28. #include <pal_error.h>
  29. #include <linux_list.h>
  30. #include <fcntl.h>
  31. #include <errno.h>
  32. #include <sys/poll.h>
  33. #include <sys/select.h>
  34. void __attribute__ ((noreturn))
  35. fortify_fail (const char *msg)
  36. {
  37. /* The loop is added only to keep gcc happy. */
  38. while (1)
  39. debug("*** %s ***\n", msg);
  40. }
  41. void __attribute__ ((noreturn))
  42. chk_fail (void)
  43. {
  44. fortify_fail ("buffer overflow detected");
  45. }
  46. unsigned long int
  47. __fdelt_chk (unsigned long int d)
  48. {
  49. if (d > FD_SETSIZE)
  50. chk_fail();
  51. return d / __NFDBITS;
  52. }
  53. static inline __attribute__((always_inline))
  54. void * __try_alloca (struct shim_thread * cur, int size)
  55. {
  56. if (!size)
  57. return NULL;
  58. if (check_stack_size(cur, size))
  59. return __alloca(size);
  60. else
  61. return malloc(size);
  62. }
  63. static inline __attribute__((always_inline))
  64. void __try_free (struct shim_thread * cur, void * mem)
  65. {
  66. if (mem && !check_on_stack(cur, mem))
  67. free(mem);
  68. }
  69. DEFINE_PROFILE_CATAGORY(__do_poll, select);
  70. DEFINE_PROFILE_INTERVAL(do_poll_get_handle, __do_poll);
  71. DEFINE_PROFILE_INTERVAL(do_poll_search_repeat, __do_poll);
  72. DEFINE_PROFILE_INTERVAL(do_poll_set_bookkeeping, __do_poll);
  73. DEFINE_PROFILE_INTERVAL(do_poll_check_accmode, __do_poll);
  74. DEFINE_PROFILE_INTERVAL(do_poll_vfs_polling, __do_poll);
  75. DEFINE_PROFILE_INTERVAL(do_poll_update_bookkeeping, __do_poll);
  76. DEFINE_PROFILE_INTERVAL(do_poll_first_loop, __do_poll);
  77. DEFINE_PROFILE_INTERVAL(do_poll_second_loop, __do_poll);
  78. DEFINE_PROFILE_INTERVAL(do_poll_wait_any, __do_poll);
  79. DEFINE_PROFILE_INTERVAL(do_poll_wait_any_peek, __do_poll);
  80. DEFINE_PROFILE_INTERVAL(do_poll_third_loop, __do_poll);
  81. DEFINE_PROFILE_INTERVAL(do_poll_fourth_loop, __do_poll);
  82. #define DO_R 0001
  83. #define DO_W 0002
  84. #define KNOWN_R 0004
  85. #define KNOWN_W 0010
  86. #define RET_R 0020
  87. #define RET_W 0040
  88. #define RET_E 0100
  89. #define POLL_R 0200
  90. #define POLL_W 0400
  91. struct poll_handle {
  92. unsigned short flags;
  93. FDTYPE fd;
  94. struct shim_handle * handle;
  95. struct poll_handle * next;
  96. struct poll_handle * children;
  97. } __attribute__((packed));
  98. #define POLL_NOTIMEOUT ((unsigned long) -1)
  99. static int __do_poll (int npolls, struct poll_handle * polls,
  100. unsigned long timeout)
  101. {
  102. struct shim_thread * cur = get_cur_thread();
  103. struct shim_handle_map * map = cur->handle_map;
  104. int npals = 0;
  105. bool has_r = false, has_known = false;
  106. struct poll_handle * polling = NULL;
  107. struct poll_handle * p, ** n, * q;
  108. PAL_HANDLE * pals = NULL;
  109. #ifdef PROFILE
  110. unsigned long begin_time = GET_PROFILE_INTERVAL();
  111. BEGIN_PROFILE_INTERVAL_SET(begin_time);
  112. #endif
  113. lock(map->lock);
  114. for (p = polls ; p < &polls[npolls] ; p++) {
  115. bool do_r = p->flags & DO_R;
  116. bool do_w = p->flags & DO_W;
  117. if (!do_r && !do_w) {
  118. no_op:
  119. p->flags = 0;
  120. p->handle = NULL;
  121. UPDATE_PROFILE_INTERVAL();
  122. continue;
  123. }
  124. struct shim_handle * hdl = __get_fd_handle(p->fd, NULL, map);
  125. if (!hdl->fs || !hdl->fs->fs_ops)
  126. goto no_op;
  127. SAVE_PROFILE_INTERVAL(do_poll_get_handle);
  128. /* search for a repeated entry */
  129. struct poll_handle * rep = polling;
  130. for ( ; rep ; rep = rep->next)
  131. if (rep->handle == hdl)
  132. break;
  133. SAVE_PROFILE_INTERVAL(do_poll_search_repeat);
  134. p->flags = (do_r ? DO_R : 0)|(do_w ? DO_W : 0);
  135. p->handle = NULL;
  136. p->next = NULL;
  137. p->children = NULL;
  138. if (rep) {
  139. /* if there is repeated handles and we already know the
  140. result, let's skip them */
  141. if (rep->flags & (KNOWN_R|POLL_R)) {
  142. p->flags = rep->flags & (KNOWN_R|RET_R|RET_E|POLL_R);
  143. do_r = false;
  144. }
  145. if (rep->flags & (KNOWN_W|POLL_W)) {
  146. p->flags = rep->flags & (KNOWN_W|RET_W|RET_E|POLL_W);
  147. do_w = false;
  148. }
  149. p->next = rep->children;
  150. rep->children = p;
  151. if (!do_r && !do_w) {
  152. SAVE_PROFILE_INTERVAL(do_poll_set_bookkeeping);
  153. continue;
  154. }
  155. } else {
  156. get_handle(hdl);
  157. p->handle = hdl;
  158. p->next = polling;
  159. polling = p;
  160. }
  161. SAVE_PROFILE_INTERVAL(do_poll_set_bookkeeping);
  162. /* do the easiest check, check handle's access mode */
  163. if (do_r && !(hdl->acc_mode & MAY_READ)) {
  164. p->flags |= KNOWN_R;
  165. debug("fd %d known to be not readable\n", p->fd);
  166. do_r = false;
  167. }
  168. if (do_w && !(hdl->acc_mode & MAY_WRITE)) {
  169. p->flags |= KNOWN_W;
  170. debug("fd %d known to be not writeable\n", p->fd);
  171. do_w = false;
  172. }
  173. SAVE_PROFILE_INTERVAL(do_poll_check_accmode);
  174. if (!do_r && !do_w)
  175. goto done_finding;
  176. /* if fs provides a poll operator, let's try it. */
  177. if (hdl->fs->fs_ops->poll) {
  178. int need_poll = 0;
  179. if (do_r && !(p->flags & POLL_R))
  180. need_poll |= FS_POLL_RD;
  181. if (do_w && !(p->flags & POLL_W))
  182. need_poll |= FS_POLL_WR;
  183. if (need_poll) {
  184. int polled = hdl->fs->fs_ops->poll(hdl, need_poll);
  185. if (polled != -EAGAIN) {
  186. if (polled & FS_POLL_ER) {
  187. debug("fd %d known to have error\n", p->fd);
  188. p->flags |= KNOWN_R|KNOWN_W|RET_E;
  189. }
  190. if (do_r && (polled & FS_POLL_RD)) {
  191. debug("fd %d known to be readable\n", p->fd);
  192. p->flags |= KNOWN_R|RET_R;
  193. do_r = false;
  194. }
  195. if (do_w && (polled & FS_POLL_WR)) {
  196. debug("fd %d known to be writeable\n", p->fd);
  197. p->flags |= KNOWN_W|RET_W;
  198. do_w = false;
  199. }
  200. }
  201. }
  202. SAVE_PROFILE_INTERVAL(do_poll_vfs_polling);
  203. if (!do_r && !do_w)
  204. goto done_finding;
  205. }
  206. struct poll_handle * to_poll = rep ? : p;
  207. if (!(to_poll->flags & (POLL_R|POLL_W))) {
  208. if (!hdl->pal_handle) {
  209. p->flags |= (KNOWN_R|KNOWN_W|RET_E);
  210. do_r = do_w = false;
  211. goto done_finding;
  212. }
  213. debug("polling fd %d\n", to_poll->fd);
  214. npals++;
  215. }
  216. to_poll->flags |= (do_r ? POLL_R : 0)|(do_w ? POLL_W : 0);
  217. done_finding:
  218. /* feedback the new knowledge of repeated handles */
  219. if (rep)
  220. rep->flags |= p->flags &
  221. (KNOWN_R|KNOWN_W|RET_R|RET_W|RET_E|POLL_R|POLL_W);
  222. if (do_r)
  223. has_r = true;
  224. if (p->flags & (RET_R|RET_W|RET_E))
  225. has_known = true;
  226. SAVE_PROFILE_INTERVAL(do_poll_update_bookkeeping);
  227. }
  228. unlock(cur->handle_map->lock);
  229. SAVE_PROFILE_INTERVAL_SINCE(do_poll_first_loop, begin_time);
  230. if (!npals)
  231. goto done_polling;
  232. pals = __try_alloca(cur, sizeof(PAL_HANDLE) * npals);
  233. npals = 0;
  234. for (n = &polling, p = polling ; p ; n = &p->next, p = p->next) {
  235. if (!(p->flags & (POLL_R|POLL_W))) {
  236. *n = p->next;
  237. put_handle(p->handle);
  238. p->handle = NULL;
  239. continue;
  240. }
  241. pals[npals++] = p->handle->pal_handle;
  242. }
  243. SAVE_PROFILE_INTERVAL(do_poll_second_loop);
  244. while (npals) {
  245. int pal_timeout = (has_r && !has_known) ? timeout : 0;
  246. PAL_HANDLE polled = DkObjectsWaitAny(npals, pals, pal_timeout);
  247. if (pal_timeout)
  248. SAVE_PROFILE_INTERVAL(do_poll_wait_any);
  249. else
  250. SAVE_PROFILE_INTERVAL(do_poll_wait_any_peek);
  251. if (!polled)
  252. break;
  253. PAL_STREAM_ATTR attr;
  254. if (!DkStreamAttributesQuerybyHandle(polled, &attr))
  255. break;
  256. for (n = &polling, p = polling ; p ; n = &p->next, p = p->next)
  257. if (p->handle->pal_handle == polled)
  258. break;
  259. if (!p)
  260. break;
  261. debug("handle %s is polled\n", qstrgetstr(&p->handle->uri));
  262. if (attr.disconnected) {
  263. debug("handle is polled to be disconnected\n");
  264. p->flags |= (KNOWN_R|KNOWN_W|RET_E);
  265. }
  266. if (attr.readable) {
  267. debug("handle is polled to be readable\n");
  268. p->flags |= (KNOWN_R|RET_R);
  269. }
  270. if (attr.writeable) {
  271. debug("handle is polled to be writeable\n");
  272. p->flags |= (KNOWN_W|RET_W);
  273. }
  274. for (q = p->children ; q ; q = q->next)
  275. q->flags |= p->flags & (KNOWN_R|KNOWN_W|RET_W|RET_R|RET_E);
  276. if ((p->flags & (POLL_R|KNOWN_R)) != (POLL_R|KNOWN_R) &&
  277. (p->flags & (POLL_W|KNOWN_W)) != (POLL_W|KNOWN_W))
  278. continue;
  279. has_known = true;
  280. *n = p->next;
  281. put_handle(p->handle);
  282. p->handle = NULL;
  283. int nskip = 0;
  284. for (int i = 0 ; i < npals ; i++)
  285. if (pals[i] == polled) {
  286. nskip = 1;
  287. } else if (nskip) {
  288. pals[i - nskip] = pals[i];
  289. }
  290. npals -= nskip;
  291. SAVE_PROFILE_INTERVAL(do_poll_third_loop);
  292. }
  293. done_polling:
  294. for (p = polling ; p ; p = p->next)
  295. put_handle(p->handle);
  296. SAVE_PROFILE_INTERVAL(do_poll_fourth_loop);
  297. if (pals)
  298. __try_free(cur, pals);
  299. return 0;
  300. }
  301. int shim_do_poll (struct pollfd * fds, nfds_t nfds, int timeout)
  302. {
  303. struct shim_thread * cur = get_cur_thread();
  304. struct poll_handle * polls =
  305. __try_alloca(cur, sizeof(struct poll_handle) * nfds);
  306. for (int i = 0 ; i < nfds ; i++) {
  307. polls[i].fd = fds[i].fd;
  308. polls[i].flags = 0;
  309. if (fds[i].events & (POLLIN|POLLRDNORM))
  310. polls[i].flags |= DO_R;
  311. if (fds[i].events & (POLLOUT|POLLWRNORM))
  312. polls[i].flags |= DO_W;
  313. }
  314. int ret = __do_poll(nfds, polls,
  315. timeout < 0 ? POLL_NOTIMEOUT : timeout * 1000ULL);
  316. if (ret < 0)
  317. goto out;
  318. ret = 0;
  319. for (int i = 0 ; i < nfds ; i++) {
  320. fds[i].revents = 0;
  321. if (polls[i].flags & RET_R)
  322. fds[i].revents |= (fds[i].events & (POLLIN|POLLRDNORM));
  323. if (polls[i].flags & RET_W)
  324. fds[i].revents |= (fds[i].events & (POLLOUT|POLLWRNORM));
  325. if (polls[i].flags & RET_E)
  326. fds[i].revents |= (fds[i].events & (POLLERR|POLLHUP));
  327. if (fds[i].revents)
  328. ret++;
  329. }
  330. out:
  331. __try_free(cur, polls);
  332. return ret;
  333. }
  334. int shim_do_ppoll (struct pollfd * fds, int nfds, struct timespec * tsp,
  335. const sigset_t * sigmask, size_t sigsetsize)
  336. {
  337. struct shim_thread * cur = get_cur_thread();
  338. struct poll_handle * polls =
  339. __try_alloca(cur, sizeof(struct poll_handle) * nfds);
  340. for (int i = 0 ; i < nfds ; i++) {
  341. polls[i].fd = fds[i].fd;
  342. polls[i].flags = 0;
  343. if (fds[i].events & (POLLIN|POLLRDNORM))
  344. polls[i].flags |= DO_R;
  345. if (fds[i].events & (POLLOUT|POLLWRNORM))
  346. polls[i].flags |= DO_W;
  347. }
  348. unsigned long timeout = tsp ?
  349. tsp->tv_sec * 1000000ULL + tsp->tv_nsec / 1000 :
  350. POLL_NOTIMEOUT;
  351. int ret = __do_poll(nfds, polls, timeout);
  352. if (ret < 0)
  353. goto out;
  354. ret = 0;
  355. for (int i = 0 ; i < nfds ; i++) {
  356. fds[i].revents = 0;
  357. if (polls[i].flags & RET_R)
  358. fds[i].revents |= (fds[i].events & (POLLIN|POLLRDNORM));
  359. if (polls[i].flags & RET_W)
  360. fds[i].revents |= (fds[i].events & (POLLOUT|POLLWRNORM));
  361. if (polls[i].flags & RET_E)
  362. fds[i].revents |= (fds[i].events & (POLLERR|POLLHUP));
  363. if (fds[i].revents)
  364. ret++;
  365. }
  366. out:
  367. __try_free(cur, polls);
  368. return ret;
  369. }
  370. DEFINE_PROFILE_CATAGORY(select, );
  371. DEFINE_PROFILE_INTERVAL(select_tryalloca_1, select);
  372. DEFINE_PROFILE_INTERVAL(select_setup_array, select);
  373. DEFINE_PROFILE_INTERVAL(select_do_poll, select);
  374. DEFINE_PROFILE_INTERVAL(select_fd_zero, select);
  375. DEFINE_PROFILE_INTERVAL(select_fd_sets, select);
  376. DEFINE_PROFILE_INTERVAL(select_try_free, select);
  377. int shim_do_select (int nfds, fd_set * readfds, fd_set * writefds,
  378. fd_set * errorfds, struct __kernel_timeval * tsv)
  379. {
  380. BEGIN_PROFILE_INTERVAL();
  381. if (!nfds) {
  382. if (!tsv)
  383. return -EINVAL;
  384. struct __kernel_timespec tsp;
  385. tsp.tv_sec = tsv->tv_sec;
  386. tsp.tv_nsec = tsv->tv_usec * 1000;
  387. return shim_do_nanosleep (&tsp, NULL);
  388. }
  389. struct shim_thread * cur = get_cur_thread();
  390. struct poll_handle * polls =
  391. __try_alloca(cur, sizeof(struct poll_handle) * nfds);
  392. int npolls = 0;
  393. SAVE_PROFILE_INTERVAL(select_tryalloca_1);
  394. for (int fd = 0 ; fd < nfds ; fd++) {
  395. bool do_r = (readfds && FD_ISSET(fd, readfds));
  396. bool do_w = (writefds && FD_ISSET(fd, writefds));
  397. if (!do_r && !do_w)
  398. continue;
  399. polls[npolls].fd = fd;
  400. polls[npolls].flags = (do_r ? DO_R : 0)|(do_w ? DO_W : 0);
  401. npolls++;
  402. }
  403. SAVE_PROFILE_INTERVAL(select_setup_array);
  404. unsigned long timeout = tsv ?
  405. tsv->tv_sec * 1000000ULL + tsv->tv_usec :
  406. POLL_NOTIMEOUT;
  407. int ret = __do_poll(npolls, polls, timeout);
  408. SAVE_PROFILE_INTERVAL(select_do_poll);
  409. if (ret < 0)
  410. goto out;
  411. ret = 0;
  412. if (readfds)
  413. FD_ZERO(readfds);
  414. if (writefds)
  415. FD_ZERO(writefds);
  416. if (errorfds)
  417. FD_ZERO(errorfds);
  418. SAVE_PROFILE_INTERVAL(select_fd_zero);
  419. for (int i = 0 ; i < npolls ; i++) {
  420. if (readfds && ((polls[i].flags & (DO_R|RET_R)) == (DO_R|RET_R))) {
  421. FD_SET(polls[i].fd, readfds);
  422. ret++;
  423. }
  424. if (writefds && ((polls[i].flags & (DO_W|RET_W)) == (DO_W|RET_W))) {
  425. FD_SET(polls[i].fd, writefds);
  426. ret++;
  427. }
  428. if (errorfds && ((polls[i].flags & (DO_R|DO_W|RET_E)) > RET_E)) {
  429. FD_SET(polls[i].fd, errorfds);
  430. ret++;
  431. }
  432. }
  433. SAVE_PROFILE_INTERVAL(select_fd_sets);
  434. out:
  435. __try_free(cur, polls);
  436. SAVE_PROFILE_INTERVAL(select_try_free);
  437. return ret;
  438. }
  439. int shim_do_pselect6 (int nfds, fd_set * readfds, fd_set * writefds,
  440. fd_set * errorfds, const struct __kernel_timespec * tsp,
  441. const sigset_t * sigmask)
  442. {
  443. if (!nfds)
  444. return tsp ? shim_do_nanosleep (tsp, NULL) : -EINVAL;
  445. struct shim_thread * cur = get_cur_thread();
  446. struct poll_handle * polls =
  447. __try_alloca(cur, sizeof(struct poll_handle) * nfds);
  448. int npolls = 0;
  449. for (int fd = 0 ; fd < nfds ; fd++) {
  450. bool do_r = (readfds && FD_ISSET(fd, readfds));
  451. bool do_w = (writefds && FD_ISSET(fd, writefds));
  452. if (!do_r && !do_w)
  453. continue;
  454. polls[npolls].fd = fd;
  455. polls[npolls].flags = (do_r ? DO_R : 0)|(do_w ? DO_W : 0);
  456. npolls++;
  457. }
  458. unsigned long timeout = tsp ?
  459. tsp->tv_sec * 1000000ULL + tsp->tv_nsec / 1000 :
  460. POLL_NOTIMEOUT;
  461. int ret = __do_poll(npolls, polls, timeout);
  462. if (ret < 0)
  463. goto out;
  464. ret = 0;
  465. if (readfds)
  466. FD_ZERO(readfds);
  467. if (writefds)
  468. FD_ZERO(writefds);
  469. if (errorfds)
  470. FD_ZERO(errorfds);
  471. for (int i = 0 ; i < npolls ; i++) {
  472. if (readfds && ((polls[i].flags & (DO_R|RET_R)) == (DO_R|RET_R))) {
  473. FD_SET(polls[i].fd, readfds);
  474. ret++;
  475. }
  476. if (writefds && ((polls[i].flags & (DO_W|RET_W)) == (DO_W|RET_W))) {
  477. FD_SET(polls[i].fd, writefds);
  478. ret++;
  479. }
  480. if (errorfds && ((polls[i].flags & (DO_R|DO_W|RET_E)) > RET_E)) {
  481. FD_SET(polls[i].fd, errorfds);
  482. ret++;
  483. }
  484. }
  485. out:
  486. __try_free(cur, polls);
  487. return ret;
  488. }