db_process.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * db_process.c
  15. *
  16. * This source file contains functions to create a child process and terminate
  17. * the running process. Child does not inherit any objects or memory from its
  18. * parent pricess. A Parent process may not modify the execution of its
  19. * children. It can wait for a child to exit using its handle. Also, parent and
  20. * child may communicate through I/O streams provided by the parent to the child
  21. * at creation.
  22. */
  23. #include "api.h"
  24. #include "pal.h"
  25. #include "pal_debug.h"
  26. #include "pal_defs.h"
  27. #include "pal_error.h"
  28. #include "pal_internal.h"
  29. #include "pal_linux.h"
  30. #include "pal_linux_defs.h"
  31. #include "pal_rtld.h"
  32. #include "pal_security.h"
  33. typedef __kernel_pid_t pid_t;
  34. #include <asm/errno.h>
  35. #include <asm/fcntl.h>
  36. #include <asm/poll.h>
  37. #include <linux/sched.h>
  38. #include <linux/time.h>
  39. #include <linux/types.h>
  40. #include <sys/socket.h>
  41. static inline int create_process_handle (PAL_HANDLE * parent,
  42. PAL_HANDLE * child)
  43. {
  44. PAL_HANDLE phdl = NULL, chdl = NULL;
  45. int fds[4] = { -1, -1, -1, -1 };
  46. int socktype = SOCK_STREAM | SOCK_CLOEXEC;
  47. int ret;
  48. if (IS_ERR((ret = INLINE_SYSCALL(socketpair, 4, AF_UNIX, socktype, 0, &fds[0]))) ||
  49. IS_ERR((ret = INLINE_SYSCALL(socketpair, 4, AF_UNIX, socktype, 0, &fds[2])))) {
  50. ret = -PAL_ERROR_DENIED;
  51. goto out;
  52. }
  53. phdl = malloc(HANDLE_SIZE(process));
  54. if (!phdl) {
  55. ret = -PAL_ERROR_NOMEM;
  56. goto out;
  57. }
  58. SET_HANDLE_TYPE(phdl, process);
  59. HANDLE_HDR(phdl)->flags |= RFD(0)|WFD(0)|RFD(1)|WFD(1);
  60. phdl->process.stream = fds[0];
  61. phdl->process.cargo = fds[2];
  62. phdl->process.pid = linux_state.pid;
  63. phdl->process.nonblocking = PAL_FALSE;
  64. chdl = malloc(HANDLE_SIZE(process));
  65. if (!chdl) {
  66. ret = -PAL_ERROR_NOMEM;
  67. goto out;
  68. }
  69. SET_HANDLE_TYPE(chdl, process);
  70. HANDLE_HDR(chdl)->flags |= RFD(0)|WFD(0)|RFD(1)|WFD(1);
  71. chdl->process.stream = fds[1];
  72. chdl->process.cargo = fds[3];
  73. chdl->process.pid = 0; /* unknown yet */
  74. chdl->process.nonblocking = PAL_FALSE;
  75. *parent = phdl;
  76. *child = chdl;
  77. ret = 0;
  78. out:
  79. if (ret < 0) {
  80. if (phdl)
  81. _DkObjectClose(phdl);
  82. if (chdl)
  83. _DkObjectClose(chdl);
  84. for (int i = 0; i < 4; i++)
  85. if (fds[i] != -1)
  86. INLINE_SYSCALL(close, 1, fds[i]);
  87. }
  88. return ret;
  89. }
  90. struct proc_param {
  91. PAL_HANDLE parent;
  92. PAL_HANDLE exec;
  93. PAL_HANDLE manifest;
  94. const char ** argv;
  95. };
  96. struct proc_args {
  97. PAL_NUM parent_process_id;
  98. struct pal_sec pal_sec;
  99. #if PROFILING == 1
  100. unsigned long process_create_time;
  101. #endif
  102. unsigned long memory_quota;
  103. unsigned int parent_data_size;
  104. unsigned int exec_data_size;
  105. unsigned int manifest_data_size;
  106. };
  107. /*
  108. * vfork() shares stack between child and parent. Any stack modifications in
  109. * child are reflected in parent's stack. Compiler may unwittingly modify
  110. * child's stack for its own purposes and thus corrupt parent's stack
  111. * (e.g., GCC re-uses the same stack area for local vars with non-overlapping
  112. * lifetimes).
  113. * Introduce noinline function with stack area used only by child.
  114. * Make this function non-local to keep function signature.
  115. * NOTE: more tricks may be needed to prevent unexpected optimization for
  116. * future compiler.
  117. */
  118. static int __attribute_noinline
  119. child_process (struct proc_param * proc_param)
  120. {
  121. int ret = ARCH_VFORK();
  122. if (ret)
  123. return ret;
  124. /* child */
  125. ret = INLINE_SYSCALL(dup2, 2, proc_param->parent->process.stream, PROC_INIT_FD);
  126. if (IS_ERR(ret))
  127. goto failed;
  128. if (proc_param->parent)
  129. handle_set_cloexec(proc_param->parent, false);
  130. if (proc_param->exec)
  131. handle_set_cloexec(proc_param->exec, false);
  132. if (proc_param->manifest)
  133. handle_set_cloexec(proc_param->manifest, false);
  134. INLINE_SYSCALL(execve, 3, PAL_LOADER, proc_param->argv,
  135. linux_state.environ);
  136. failed:
  137. /* fail is it gets here */
  138. return -PAL_ERROR_DENIED;
  139. }
  140. int _DkProcessCreate (PAL_HANDLE * handle, const char * uri, const char ** args)
  141. {
  142. PAL_HANDLE exec = NULL;
  143. PAL_HANDLE parent_handle = NULL, child_handle = NULL;
  144. int ret;
  145. #if PROFILING == 1
  146. unsigned long before_create = _DkSystemTimeQuery();
  147. #endif
  148. /* step 1: open uri and check whether it is an executable */
  149. if (uri) {
  150. if ((ret = _DkStreamOpen(&exec, uri, PAL_ACCESS_RDONLY, 0, 0, 0)) < 0)
  151. return ret;
  152. if (check_elf_object(exec) < 0) {
  153. ret = -PAL_ERROR_INVAL;
  154. goto out;
  155. }
  156. /* If this process creation is for fork emulation,
  157. * map address of executable is already determined.
  158. * tell its address to forked process.
  159. */
  160. size_t len;
  161. const char * file_uri = URI_PREFIX_FILE;
  162. if (exec_map && exec_map->l_name &&
  163. (len = strlen(uri)) >= URI_PREFIX_FILE_LEN && !memcmp(uri, file_uri, URI_PREFIX_FILE_LEN) &&
  164. /* skip "file:"*/
  165. strlen(exec_map->l_name) == len - URI_PREFIX_FILE_LEN &&
  166. /* + 1 for lasting * NUL */
  167. !memcmp(exec_map->l_name, uri + URI_PREFIX_FILE_LEN, len - URI_PREFIX_FILE_LEN + 1))
  168. exec->file.map_start = (PAL_PTR)exec_map->l_map_start;
  169. }
  170. /* step 2: create parant and child process handle */
  171. struct proc_param param;
  172. ret = create_process_handle(&parent_handle, &child_handle);
  173. if (ret < 0)
  174. goto out;
  175. param.parent = parent_handle;
  176. param.exec = exec;
  177. param.manifest = pal_state.manifest_handle;
  178. /* step 3: compose process parameter */
  179. size_t parent_datasz = 0, exec_datasz = 0, manifest_datasz = 0;
  180. void * parent_data = NULL;
  181. void * exec_data = NULL;
  182. void * manifest_data = NULL;
  183. ret = handle_serialize(parent_handle, &parent_data);
  184. if (ret < 0)
  185. goto out;
  186. parent_datasz = (size_t)ret;
  187. if (exec) {
  188. ret = handle_serialize(exec, &exec_data);
  189. if (ret < 0) {
  190. free(parent_data);
  191. goto out;
  192. }
  193. exec_datasz = (size_t)ret;
  194. }
  195. if (pal_state.manifest_handle) {
  196. ret = handle_serialize(pal_state.manifest_handle, &manifest_data);
  197. if (ret < 0) {
  198. free(parent_data);
  199. free(exec_data);
  200. goto out;
  201. }
  202. manifest_datasz = (size_t)ret;
  203. }
  204. size_t datasz = parent_datasz + exec_datasz + manifest_datasz;
  205. struct proc_args * proc_args = __alloca(sizeof(struct proc_args) + datasz);
  206. proc_args->parent_process_id = linux_state.parent_process_id;
  207. memcpy(&proc_args->pal_sec, &pal_sec, sizeof(struct pal_sec));
  208. proc_args->pal_sec._dl_debug_state = NULL;
  209. proc_args->pal_sec._r_debug = NULL;
  210. proc_args->memory_quota = linux_state.memory_quota;
  211. void * data = (void *) (proc_args + 1);
  212. memcpy(data, parent_data, parent_datasz);
  213. data += (proc_args->parent_data_size = parent_datasz);
  214. free(parent_data);
  215. if (exec_data) {
  216. memcpy(data, exec_data, exec_datasz);
  217. data += (proc_args->exec_data_size = exec_datasz);
  218. free(exec_data);
  219. } else {
  220. proc_args->exec_data_size = 0;
  221. }
  222. if (manifest_data) {
  223. memcpy(data, manifest_data, manifest_datasz);
  224. data += (proc_args->manifest_data_size = manifest_datasz);
  225. free(manifest_data);
  226. } else {
  227. proc_args->manifest_data_size = 0;
  228. }
  229. /* step 4: create a child thread which will execve in the future */
  230. /* the first arguement must be the PAL */
  231. int argc = 0;
  232. if (args)
  233. for (; args[argc] ; argc++);
  234. param.argv = __alloca(sizeof(const char *) * (argc + 2));
  235. param.argv[0] = PAL_LOADER;
  236. if (args)
  237. memcpy(&param.argv[1], args, sizeof(const char *) * argc);
  238. param.argv[argc + 1] = NULL;
  239. #if PROFILING == 1
  240. proc_args->process_create_time = before_create;
  241. #endif
  242. /* Child's signal handler may mess with parent's memory during vfork(),
  243. * so block signals
  244. */
  245. ret = block_async_signals(true);
  246. if (ret < 0)
  247. goto out;
  248. ret = child_process(&param);
  249. if (IS_ERR(ret)) {
  250. ret = -PAL_ERROR_DENIED;
  251. goto out;
  252. }
  253. proc_args->pal_sec.process_id = ret;
  254. child_handle->process.pid = ret;
  255. /* children unblock async signals by signal_setup() */
  256. ret = block_async_signals(false);
  257. if (ret < 0)
  258. goto out;
  259. /* step 4: send parameters over the process handle */
  260. ret = INLINE_SYSCALL(write, 3,
  261. child_handle->process.stream,
  262. proc_args,
  263. sizeof(struct proc_args) + datasz);
  264. if (IS_ERR(ret) || (size_t)ret < sizeof(struct proc_args) + datasz) {
  265. ret = -PAL_ERROR_DENIED;
  266. goto out;
  267. }
  268. *handle = child_handle;
  269. ret = 0;
  270. out:
  271. if (parent_handle)
  272. _DkObjectClose(parent_handle);
  273. if (exec)
  274. _DkObjectClose(exec);
  275. if (ret < 0) {
  276. if (child_handle)
  277. _DkObjectClose(child_handle);
  278. }
  279. return ret;
  280. }
  281. void init_child_process (PAL_HANDLE * parent_handle,
  282. PAL_HANDLE * exec_handle,
  283. PAL_HANDLE * manifest_handle)
  284. {
  285. int ret = 0;
  286. /* try to do a very large reading, so it doesn't have to be read for the
  287. second time */
  288. struct proc_args * proc_args = __alloca(sizeof(struct proc_args));
  289. struct proc_args * new_proc_args;
  290. int bytes = INLINE_SYSCALL(read, 3, PROC_INIT_FD, proc_args,
  291. sizeof(*proc_args));
  292. if (IS_ERR(bytes)) {
  293. if (ERRNO(bytes) != EBADF)
  294. INIT_FAIL(PAL_ERROR_DENIED, "communication fail with parent");
  295. /* in the first process */
  296. /* occupy PROC_INIT_FD so no one will use it */
  297. INLINE_SYSCALL(dup2, 2, 0, PROC_INIT_FD);
  298. return;
  299. }
  300. /* a child must have parent handle and an executable */
  301. if (!proc_args->parent_data_size)
  302. INIT_FAIL(PAL_ERROR_INVAL, "invalid process created");
  303. int datasz = proc_args->parent_data_size + proc_args->exec_data_size +
  304. proc_args->manifest_data_size;
  305. if (!datasz)
  306. goto no_data;
  307. new_proc_args = __alloca(sizeof(*proc_args) + datasz);
  308. memcpy(new_proc_args, proc_args, sizeof(*proc_args));
  309. proc_args = new_proc_args;
  310. void * data = (void *) (proc_args + 1);
  311. bytes = INLINE_SYSCALL(read, 3, PROC_INIT_FD, data, datasz);
  312. if (IS_ERR(bytes))
  313. INIT_FAIL(PAL_ERROR_DENIED, "communication fail with parent");
  314. /* now deserialize the parent_handle */
  315. PAL_HANDLE parent = NULL;
  316. ret = handle_deserialize(&parent, data, proc_args->parent_data_size);
  317. if (ret < 0)
  318. INIT_FAIL(-ret, "cannot deseilaize parent process handle");
  319. data += proc_args->parent_data_size;
  320. *parent_handle = parent;
  321. /* occupy PROC_INIT_FD so no one will use it */
  322. INLINE_SYSCALL(dup2, 2, 0, PROC_INIT_FD);
  323. /* deserialize the executable handle */
  324. if (proc_args->exec_data_size) {
  325. PAL_HANDLE exec = NULL;
  326. ret = handle_deserialize(&exec, data,
  327. proc_args->exec_data_size);
  328. if (ret < 0)
  329. INIT_FAIL(-ret, "cannot deserialize executable handle");
  330. data += proc_args->exec_data_size;
  331. *exec_handle = exec;
  332. }
  333. /* deserialize the manifest handle, if there is one */
  334. if (proc_args->manifest_data_size) {
  335. PAL_HANDLE manifest = NULL;
  336. ret = handle_deserialize(&manifest, data,
  337. proc_args->manifest_data_size);
  338. if (ret < 0)
  339. INIT_FAIL(-ret, "cannot deserialize manifest handle");
  340. data += proc_args->manifest_data_size;
  341. *manifest_handle = manifest;
  342. }
  343. no_data:
  344. linux_state.parent_process_id = proc_args->parent_process_id;
  345. linux_state.memory_quota = proc_args->memory_quota;
  346. #if PROFILING == 1
  347. pal_state.process_create_time = proc_args->process_create_time;
  348. #endif
  349. memcpy(&pal_sec, &proc_args->pal_sec, sizeof(struct pal_sec));
  350. }
  351. noreturn void _DkProcessExit (int exitcode)
  352. {
  353. if (exitcode == PAL_WAIT_FOR_CHILDREN_EXIT) {
  354. /* this is a "temporary" process exiting after execve'ing a child process: it must still
  355. * be around until the child finally exits (because its parent in turn may wait on it) */
  356. int wstatus;
  357. INLINE_SYSCALL(wait4, 4, /*any child*/-1, &wstatus, /*options=*/0, /*rusage=*/NULL);
  358. exitcode = wstatus;
  359. }
  360. INLINE_SYSCALL(exit_group, 1, exitcode);
  361. while (true) {
  362. /* nothing */;
  363. }
  364. }
  365. static int64_t proc_read (PAL_HANDLE handle, uint64_t offset, uint64_t count,
  366. void * buffer)
  367. {
  368. if (offset)
  369. return -PAL_ERROR_INVAL;
  370. int64_t bytes = INLINE_SYSCALL(read, 3, handle->process.stream, buffer, count);
  371. if (IS_ERR(bytes))
  372. switch(ERRNO(bytes)) {
  373. case EWOULDBLOCK:
  374. return -PAL_ERROR_TRYAGAIN;
  375. case EINTR:
  376. return -PAL_ERROR_INTERRUPTED;
  377. default:
  378. return -PAL_ERROR_DENIED;
  379. }
  380. return bytes;
  381. }
  382. static int64_t proc_write (PAL_HANDLE handle, uint64_t offset, uint64_t count,
  383. const void * buffer)
  384. {
  385. if (offset)
  386. return -PAL_ERROR_INVAL;
  387. int64_t bytes = INLINE_SYSCALL(write, 3, handle->process.stream, buffer, count);
  388. if (IS_ERR(bytes))
  389. switch(ERRNO(bytes)) {
  390. case EWOULDBLOCK:
  391. return -PAL_ERROR_TRYAGAIN;
  392. case EINTR:
  393. return -PAL_ERROR_INTERRUPTED;
  394. default:
  395. return -PAL_ERROR_DENIED;
  396. }
  397. assert(!IS_ERR(bytes));
  398. return bytes;
  399. }
  400. static int proc_close (PAL_HANDLE handle)
  401. {
  402. if (handle->process.stream != PAL_IDX_POISON) {
  403. INLINE_SYSCALL(close, 1, handle->process.stream);
  404. handle->process.stream = PAL_IDX_POISON;
  405. }
  406. if (handle->process.cargo != PAL_IDX_POISON) {
  407. INLINE_SYSCALL(close, 1, handle->process.cargo);
  408. handle->process.cargo = PAL_IDX_POISON;
  409. }
  410. return 0;
  411. }
  412. static int proc_delete (PAL_HANDLE handle, int access)
  413. {
  414. int shutdown;
  415. switch (access) {
  416. case 0:
  417. shutdown = SHUT_RDWR;
  418. break;
  419. case PAL_DELETE_RD:
  420. shutdown = SHUT_RD;
  421. break;
  422. case PAL_DELETE_WR:
  423. shutdown = SHUT_WR;
  424. break;
  425. default:
  426. return -PAL_ERROR_INVAL;
  427. }
  428. if (handle->process.stream != PAL_IDX_POISON)
  429. INLINE_SYSCALL(shutdown, 2, handle->process.stream, shutdown);
  430. if (handle->process.cargo != PAL_IDX_POISON)
  431. INLINE_SYSCALL(shutdown, 2, handle->process.cargo, shutdown);
  432. return 0;
  433. }
  434. static int proc_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
  435. int ret;
  436. int val;
  437. if (handle->process.stream == PAL_IDX_POISON)
  438. return -PAL_ERROR_BADHANDLE;
  439. attr->handle_type = HANDLE_HDR(handle)->type;
  440. attr->nonblocking = handle->process.nonblocking;
  441. attr->disconnected = HANDLE_HDR(handle)->flags & ERROR(0);
  442. /* get number of bytes available for reading */
  443. ret = INLINE_SYSCALL(ioctl, 3, handle->process.stream, FIONREAD, &val);
  444. if (IS_ERR(ret))
  445. return unix_to_pal_error(ERRNO(ret));
  446. attr->pending_size = val;
  447. /* query if there is data available for reading */
  448. struct pollfd pfd = {.fd = handle->process.stream, .events = POLLIN | POLLOUT, .revents = 0};
  449. struct timespec tp = {0, 0};
  450. ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
  451. if (IS_ERR(ret))
  452. return unix_to_pal_error(ERRNO(ret));
  453. attr->readable = ret == 1 && (pfd.revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
  454. attr->writable = ret == 1 && (pfd.revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
  455. return 0;
  456. }
  457. static int proc_attrsetbyhdl (PAL_HANDLE handle, PAL_STREAM_ATTR * attr)
  458. {
  459. if (handle->process.stream == PAL_IDX_POISON)
  460. return -PAL_ERROR_BADHANDLE;
  461. int ret;
  462. if (attr->nonblocking != handle->process.nonblocking) {
  463. ret = INLINE_SYSCALL(fcntl, 3, handle->process.stream, F_SETFL,
  464. handle->process.nonblocking ? O_NONBLOCK : 0);
  465. if (IS_ERR(ret))
  466. return unix_to_pal_error(ERRNO(ret));
  467. handle->process.nonblocking = attr->nonblocking;
  468. }
  469. return 0;
  470. }
  471. struct handle_ops proc_ops = {
  472. .read = &proc_read,
  473. .write = &proc_write,
  474. .close = &proc_close,
  475. .delete = &proc_delete,
  476. .attrquerybyhdl = &proc_attrquerybyhdl,
  477. .attrsetbyhdl = &proc_attrsetbyhdl,
  478. };