shim_clone.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_clone.c
  17. *
  18. * Implementation of system call "clone". (using "clone" as "fork" is not
  19. * implemented yet.)
  20. */
  21. #include <shim_types.h>
  22. #include <shim_internal.h>
  23. #include <shim_table.h>
  24. #include <shim_thread.h>
  25. #include <shim_utils.h>
  26. #include <shim_checkpoint.h>
  27. #include <shim_profile.h>
  28. #include <pal.h>
  29. #include <pal_error.h>
  30. #include <errno.h>
  31. #include <sys/syscall.h>
  32. #include <sys/mman.h>
  33. #include <linux/sched.h>
  34. #include <asm/prctl.h>
  35. /* from **sysdeps/unix/sysv/linux/x86_64/clone.S:
  36. The userland implementation is:
  37. int clone (int (*fn)(void *arg), void *child_stack, int flags, void *arg),
  38. the kernel entry is:
  39. int clone (long flags, void *child_stack).
  40. The parameters are passed in register and on the stack from userland:
  41. rdi: fn
  42. rsi: child_stack
  43. rdx: flags
  44. rcx: arg
  45. r8d: TID field in parent
  46. r9d: thread pointer
  47. %esp+8: TID field in child
  48. The kernel expects:
  49. rax: system call number
  50. rdi: flags
  51. rsi: child_stack
  52. rdx: TID field in parent
  53. r10: TID field in child
  54. r8: thread pointer
  55. */
  56. /*
  57. * This Function is a wrapper around the user provided function.
  58. * Code flow for clone is as follows -
  59. * 1) User application allocates stack for child process and
  60. * calls clone. The clone code sets up the user function
  61. * address and the argument address on the child stack.
  62. * 2)we Hijack the clone call and control flows to shim_clone
  63. * 3)In Shim Clone we just call the DK Api to create a thread by providing a
  64. * wrapper function around the user provided function
  65. * 4)PAL layer allocates a stack and then invokes the clone syscall
  66. * 5)PAL runs thread_init function on PAL allocated Stack
  67. * 6)thread_init calls our wrapper and gives the user provided stack
  68. * address.
  69. * 7.In the wrapper function ,we just do the stack switch to user
  70. * Provided stack and execute the user Provided function.
  71. */
  72. /* glibc needs space offset by fs. In the absence of a good way to predict
  73. * how big the struct pthread will be (defined in nptl/descr.h),
  74. * let's just define a value that over-shoots it.
  75. */
  76. #define PTHREAD_PADDING 2048
  77. int clone_implementation_wrapper(struct clone_args * arg)
  78. {
  79. //The child thread created by PAL is now running on the
  80. //PAL allocated stack. We need to switch the stack to use
  81. //the user provided stack.
  82. struct clone_args *pcargs = arg;
  83. int stack_allocated = 0;
  84. DkObjectsWaitAny(1, &pcargs->create_event, NO_TIMEOUT);
  85. DkObjectClose(pcargs->create_event);
  86. struct shim_thread * my_thread = pcargs->thread;
  87. assert(my_thread);
  88. get_thread(my_thread);
  89. if (!my_thread->tcb) {
  90. stack_allocated = 1;
  91. my_thread->tcb = __alloca(sizeof(__libc_tcb_t) + PTHREAD_PADDING);
  92. }
  93. allocate_tls(my_thread->tcb, my_thread->user_tcb, my_thread);
  94. shim_tcb_t * tcb = &((__libc_tcb_t *) my_thread->tcb)->shim_tcb;
  95. __disable_preempt(tcb); // Temporarily disable preemption, because the preemption
  96. // will be re-enabled when the thread starts.
  97. debug_setbuf(tcb, true);
  98. debug("set tcb to %p (stack allocated? %d)\n", my_thread->tcb, stack_allocated);
  99. struct shim_regs regs;
  100. regs = *((__libc_tcb_t *) arg->parent->tcb)->shim_tcb.context.regs;
  101. if (my_thread->set_child_tid)
  102. *(my_thread->set_child_tid) = my_thread->tid;
  103. void * stack = pcargs->stack;
  104. void * return_pc = pcargs->return_pc;
  105. struct shim_vma_val vma;
  106. lookup_vma(ALIGN_DOWN(stack), &vma);
  107. my_thread->stack_top = vma.addr + vma.length;
  108. my_thread->stack_red = my_thread->stack = vma.addr;
  109. /* until now we're not ready to be exposed to other thread */
  110. add_thread(my_thread);
  111. set_as_child(arg->parent, my_thread);
  112. /* Don't signal the initialize event until we are actually init-ed */
  113. DkEventSet(pcargs->initialize_event);
  114. /***** From here down, we are switching to the user-provided stack ****/
  115. //user_stack_addr[0] ==> user provided function address
  116. //user_stack_addr[1] ==> arguments to user provided function.
  117. debug("child swapping stack to %p return %p: %d\n",
  118. stack, return_pc, my_thread->tid);
  119. tcb->context.regs = &regs;
  120. tcb->context.sp = stack;
  121. tcb->context.ret_ip = return_pc;
  122. restore_context(&tcb->context);
  123. return 0;
  124. }
  125. int migrate_fork (struct shim_cp_store * cpstore,
  126. struct shim_thread * thread,
  127. struct shim_process * process, va_list ap);
  128. /* long int __arg0 - flags
  129. * long int __arg1 - 16 bytes ( 2 words ) offset into the child stack allocated
  130. * by the parent */
  131. int shim_do_clone (int flags, void * user_stack_addr, int * parent_tidptr,
  132. int * child_tidptr, void * tls)
  133. {
  134. //The Clone Implementation in glibc has setup the child's stack
  135. //with the function pointer and the argument to the funciton.
  136. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  137. struct shim_thread * self = get_cur_thread();
  138. assert(self);
  139. int * set_parent_tid = NULL;
  140. int ret = 0;
  141. /* special case for vfork. some runtime uses clone() for vfork */
  142. if (flags == (CLONE_VFORK | CLONE_VM | SIGCHLD) &&
  143. user_stack_addr == NULL && parent_tidptr == NULL &&
  144. child_tidptr == NULL && tls == NULL) {
  145. return shim_do_vfork();
  146. }
  147. assert((flags & ~(CLONE_PARENT_SETTID|CLONE_CHILD_SETTID|
  148. CLONE_CHILD_CLEARTID|CLONE_SETTLS|
  149. CLONE_VM|CLONE_FILES|
  150. CLONE_FS|CLONE_SIGHAND|CLONE_THREAD|
  151. CLONE_DETACHED| // Unused
  152. #ifdef CLONE_PTRACE
  153. CLONE_PTRACE| // Unused
  154. #endif
  155. CLONE_SYSVSEM|CSIGNAL)) == 0);
  156. if (!(flags & CLONE_FS))
  157. debug("clone without CLONE_FS is not yet implemented\n");
  158. if (!(flags & CLONE_SIGHAND))
  159. debug("clone without CLONE_SIGHAND is not yet implemented\n");
  160. if (!(flags & CLONE_SYSVSEM))
  161. debug("clone without CLONE_SYSVSEM is not yet implemented\n");
  162. if (flags & CLONE_PARENT_SETTID) {
  163. if (!parent_tidptr)
  164. return -EINVAL;
  165. set_parent_tid = parent_tidptr;
  166. }
  167. struct shim_thread * thread = get_new_thread(0);
  168. if (!thread) {
  169. ret = -ENOMEM;
  170. goto failed;
  171. }
  172. IDTYPE tid = thread->tid;
  173. if (flags & CLONE_CHILD_SETTID) {
  174. if (!child_tidptr) {
  175. ret = -EINVAL;
  176. goto failed;
  177. }
  178. thread->set_child_tid = child_tidptr;
  179. }
  180. if (flags & CLONE_CHILD_CLEARTID)
  181. /* Implemented in shim_futex.c: release_clear_child_id */
  182. thread->clear_child_tid = parent_tidptr;
  183. if (flags & CLONE_SETTLS) {
  184. if (!tls) {
  185. ret = -EINVAL;
  186. goto failed;
  187. }
  188. thread->tcb = tls;
  189. thread->user_tcb = true;
  190. } else {
  191. thread->tcb = NULL;
  192. }
  193. if (!(flags & CLONE_THREAD))
  194. thread->tgid = thread->tid;
  195. struct shim_handle_map * handle_map = get_cur_handle_map(self);
  196. if (flags & CLONE_FILES) {
  197. set_handle_map(thread, handle_map);
  198. } else {
  199. /* if CLONE_FILES is not given, the new thread should receive
  200. a copy of current descriptor table */
  201. struct shim_handle_map * new_map = NULL;
  202. get_handle_map(handle_map);
  203. dup_handle_map(&new_map, handle_map);
  204. set_handle_map(thread, new_map);
  205. put_handle_map(handle_map);
  206. }
  207. if (!(flags & CLONE_VM)) {
  208. __libc_tcb_t * tcb;
  209. shim_tcb_t * old_shim_tcb = NULL;
  210. if (thread->tcb) {
  211. tcb = (__libc_tcb_t *) thread->tcb;
  212. } else {
  213. thread->tcb = tcb = (__libc_tcb_t *) self->tcb;
  214. old_shim_tcb = __alloca(sizeof(shim_tcb_t));
  215. memcpy(old_shim_tcb, &tcb->shim_tcb, sizeof(shim_tcb_t));
  216. }
  217. if (user_stack_addr) {
  218. struct shim_vma_val vma;
  219. lookup_vma(ALIGN_DOWN(user_stack_addr), &vma);
  220. thread->stack_top = vma.addr + vma.length;
  221. thread->stack_red = thread->stack = vma.addr;
  222. tcb->shim_tcb.context.sp = user_stack_addr;
  223. tcb->shim_tcb.context.ret_ip = *(void **) user_stack_addr;
  224. }
  225. thread->is_alive = true;
  226. thread->in_vm = false;
  227. add_thread(thread);
  228. set_as_child(self, thread);
  229. if ((ret = do_migrate_process(&migrate_fork, NULL, NULL, thread)) < 0)
  230. goto failed;
  231. if (old_shim_tcb)
  232. memcpy(&tcb->shim_tcb, old_shim_tcb, sizeof(shim_tcb_t));
  233. lock(thread->lock);
  234. handle_map = thread->handle_map;
  235. thread->handle_map = NULL;
  236. unlock(thread->lock);
  237. if (handle_map)
  238. put_handle_map(handle_map);
  239. if (set_parent_tid)
  240. *set_parent_tid = tid;
  241. put_thread(thread);
  242. return tid;
  243. }
  244. enable_locking();
  245. struct clone_args new_args;
  246. memset(&new_args, 0, sizeof(new_args));
  247. new_args.create_event = DkNotificationEventCreate(PAL_FALSE);
  248. if (!new_args.create_event) {
  249. ret = -PAL_ERRNO;
  250. goto clone_thread_failed;
  251. }
  252. new_args.initialize_event = DkNotificationEventCreate(PAL_FALSE);
  253. if (!new_args.initialize_event) {
  254. ret = -PAL_ERRNO;
  255. goto clone_thread_failed;
  256. }
  257. new_args.thread = thread;
  258. new_args.parent = self;
  259. new_args.stack = user_stack_addr;
  260. new_args.return_pc = *(void **) user_stack_addr;
  261. // Invoke DkThreadCreate to spawn off a child process using the actual
  262. // "clone" system call. DkThreadCreate allocates a stack for the child
  263. // and then runs the given function on that stack However, we want our
  264. // child to run on the Parent allocated stack , so once the DkThreadCreate
  265. // returns .The parent comes back here - however, the child is Happily
  266. // running the function we gave to DkThreadCreate.
  267. PAL_HANDLE pal_handle = thread_create(clone_implementation_wrapper,
  268. &new_args, flags);
  269. if (!pal_handle) {
  270. ret = -PAL_ERRNO;
  271. goto clone_thread_failed;
  272. }
  273. thread->pal_handle = pal_handle;
  274. thread->in_vm = thread->is_alive = true;
  275. if (set_parent_tid)
  276. *set_parent_tid = tid;
  277. DkEventSet(new_args.create_event);
  278. DkObjectsWaitAny(1, &new_args.initialize_event, NO_TIMEOUT);
  279. DkObjectClose(new_args.initialize_event);
  280. put_thread(thread);
  281. return tid;
  282. clone_thread_failed:
  283. if (new_args.create_event)
  284. DkObjectClose(new_args.create_event);
  285. if (new_args.initialize_event)
  286. DkObjectClose(new_args.initialize_event);
  287. failed:
  288. if (thread)
  289. put_thread(thread);
  290. return ret;
  291. }