shim_init.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 OSCAR lab, Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_init.c
  17. *
  18. * This file contains entry and exit functions of library OS.
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_tls.h>
  22. #include <shim_thread.h>
  23. #include <shim_handle.h>
  24. #include <shim_vma.h>
  25. #include <shim_checkpoint.h>
  26. #include <shim_fs.h>
  27. #include <shim_ipc.h>
  28. #include <shim_profile.h>
  29. #include <pal.h>
  30. #include <pal_debug.h>
  31. #include <pal_error.h>
  32. #include <sys/mman.h>
  33. #include <asm/unistd.h>
  34. #include <asm/fcntl.h>
  35. unsigned long allocsize;
  36. unsigned long allocshift;
  37. unsigned long allocmask;
  38. /* The following constants will help matching glibc version with compatible
  39. SHIM libraries */
  40. #include "glibc-version.h"
  41. const unsigned int glibc_vers_2_17 = GLIBC_VERSION_2_17;
  42. static void handle_failure (PAL_PTR event, PAL_NUM arg, PAL_CONTEXT * context)
  43. {
  44. SHIM_GET_TLS()->pal_errno = (arg <= PAL_ERROR_BOUND) ? arg : 0;
  45. }
  46. void __assert_fail (const char * assertion, const char * file,
  47. unsigned int line, const char * function)
  48. {
  49. __sys_printf("assert failed %s:%d %s\n", file, line, assertion);
  50. pause();
  51. shim_terminate();
  52. }
  53. void __stack_chk_fail (void)
  54. {
  55. }
  56. static int pal_errno_to_unix_errno [PAL_ERROR_BOUND + 1] = {
  57. /* reserved */ 0,
  58. /* PAL_ERROR_NOTIMPLEMENTED */ ENOSYS,
  59. /* PAL_ERROR_NOTDEFINED */ ENOSYS,
  60. /* PAL_ERROR_NOTSUPPORT */ EACCES,
  61. /* PAL_ERROR_INVAL */ EINVAL,
  62. /* PAL_ERROR_TOOLONG */ ENAMETOOLONG,
  63. /* PAL_ERROR_DENIED */ EACCES,
  64. /* PAL_ERROR_BADHANDLE */ EFAULT,
  65. /* PAL_ERROR_STREAMEXIST */ EEXIST,
  66. /* PAL_ERROR_STREAMNOTEXIST */ ENOENT,
  67. /* PAL_ERROR_STREAMISFILE */ ENOTDIR,
  68. /* PAL_ERROR_STREAMISDIR */ EISDIR,
  69. /* PAL_ERROR_STREAMISDEVICE */ ESPIPE,
  70. /* PAL_ERROR_INTERRUPTED */ EINTR,
  71. /* PAL_ERROR_OVERFLOW */ EFAULT,
  72. /* PAL_ERROR_BADADDR */ EFAULT,
  73. /* PAL_ERROR_NOMEM */ ENOMEM,
  74. /* PAL_ERROR_NOTKILLABLE */ EACCES,
  75. /* PAL_ERROR_INCONSIST */ EFAULT,
  76. /* PAL_ERROR_TRYAGAIN */ EAGAIN,
  77. /* PAL_ERROR_ENDOFSTREAM */ 0,
  78. /* PAL_ERROR_NOTSERVER */ EINVAL,
  79. /* PAL_ERROR_NOTCONNECTION */ ENOTCONN,
  80. /* PAL_ERROR_ZEROSIZE */ 0,
  81. /* PAL_ERROR_CONNFAILED */ ECONNRESET,
  82. /* PAL_ERROR_ADDRNOTEXIST */ EADDRNOTAVAIL,
  83. };
  84. long convert_pal_errno (long err)
  85. {
  86. return (err >= 0 && err <= PAL_ERROR_BOUND) ?
  87. pal_errno_to_unix_errno[err] : 0;
  88. }
  89. void * initial_stack = NULL;
  90. const char ** initial_envp __attribute_migratable = NULL;
  91. void * migrated_memory_start = 0;
  92. void * migrated_memory_end = 0;
  93. extern void * migrated_shim_addr;
  94. const char ** library_paths = NULL;
  95. bool in_gdb = false;
  96. LOCKTYPE __master_lock;
  97. bool lock_enabled = false;
  98. void init_tcb (shim_tcb_t * tcb)
  99. {
  100. tcb->canary = SHIM_TLS_CANARY;
  101. tcb->self = tcb;
  102. }
  103. void copy_tcb (shim_tcb_t * new_tcb, const shim_tcb_t * old_tcb)
  104. {
  105. memset(new_tcb, 0, sizeof(shim_tcb_t));
  106. new_tcb->canary = SHIM_TLS_CANARY;
  107. new_tcb->self = new_tcb;
  108. new_tcb->tp = old_tcb->tp;
  109. memcpy(&new_tcb->context, &old_tcb->context, sizeof(struct shim_context));
  110. new_tcb->tid = old_tcb->tid;
  111. new_tcb->debug_buf = old_tcb->debug_buf;
  112. }
  113. /* This function is used to allocate tls before interpreter start running */
  114. void allocate_tls (void * tcb_location, struct shim_thread * thread)
  115. {
  116. __libc_tcb_t * tcb = tcb_location;
  117. assert(tcb);
  118. tcb->tcb = tcb;
  119. init_tcb(&tcb->shim_tcb);
  120. if (thread) {
  121. thread->tcb = tcb;
  122. tcb->shim_tcb.tp = thread;
  123. tcb->shim_tcb.tid = thread->tid;
  124. } else {
  125. tcb->shim_tcb.tp = NULL;
  126. tcb->shim_tcb.tid = 0;
  127. }
  128. DkThreadPrivate(tcb);
  129. assert(SHIM_TLS_CHECK_CANARY());
  130. }
  131. void populate_tls (void * tcb_location)
  132. {
  133. __libc_tcb_t * tcb = (__libc_tcb_t *) tcb_location;
  134. assert(tcb);
  135. tcb->tcb = tcb;
  136. copy_tcb(&tcb->shim_tcb, SHIM_GET_TLS());
  137. struct shim_thread * thread = (struct shim_thread *) tcb->shim_tcb.tp;
  138. if (thread)
  139. thread->tcb = tcb;
  140. DkThreadPrivate(tcb);
  141. assert(SHIM_TLS_CHECK_CANARY());
  142. }
  143. DEFINE_PROFILE_OCCURENCE(alloc_stack, memory);
  144. DEFINE_PROFILE_OCCURENCE(alloc_stack_count, memory);
  145. #define STACK_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL)
  146. void * allocate_stack (size_t size, size_t protect_size, bool user)
  147. {
  148. size = ALIGN_UP(size);
  149. protect_size = ALIGN_UP(protect_size);
  150. /* preserve a non-readable, non-writeable page below the user
  151. stack to stop user program to clobber other vmas */
  152. void * stack = user ?
  153. get_unmapped_vma(size + protect_size, STACK_FLAGS) :
  154. NULL;
  155. if (user)
  156. stack = DkVirtualMemoryAlloc(stack, size + protect_size,
  157. 0, PAL_PROT_READ|PAL_PROT_WRITE);
  158. else
  159. stack = system_malloc(size + protect_size);
  160. if (!stack)
  161. return NULL;
  162. ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
  163. INC_PROFILE_OCCURENCE(alloc_stack_count);
  164. if (protect_size &&
  165. !DkVirtualMemoryProtect(stack, protect_size, PAL_PROT_NONE))
  166. return NULL;
  167. stack += protect_size;
  168. if (user) {
  169. if (bkeep_mmap(stack, size, PROT_READ|PROT_WRITE,
  170. STACK_FLAGS, NULL, 0, "stack") < 0)
  171. return NULL;
  172. if (protect_size &&
  173. bkeep_mmap(stack - protect_size, protect_size, 0,
  174. STACK_FLAGS, NULL, 0, NULL) < 0)
  175. return NULL;
  176. }
  177. debug("allocated stack at %p (size = %d)\n", stack, size);
  178. return stack;
  179. }
  180. int populate_user_stack (void * stack, size_t stack_size,
  181. int nauxv, elf_auxv_t ** auxpp,
  182. const char *** argvp, const char *** envpp)
  183. {
  184. const char ** argv = *argvp, ** envp = *envpp;
  185. const char ** new_argv = NULL, ** new_envp = NULL;
  186. void * stack_bottom = stack;
  187. void * stack_top = stack + stack_size;
  188. #define ALLOCATE_TOP(size) \
  189. ({ if ((stack_top -= (size)) < stack_bottom) return -ENOMEM; \
  190. stack_top; })
  191. #define ALLOCATE_BOTTOM(size) \
  192. ({ if ((stack_bottom += (size)) > stack_top) return -ENOMEM; \
  193. stack_bottom - (size); })
  194. if (!argv) {
  195. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  196. goto copy_envp;
  197. }
  198. new_argv = stack_bottom;
  199. while (argv) {
  200. for (const char ** a = argv ; *a ; a++) {
  201. const char ** t = ALLOCATE_BOTTOM(sizeof(const char *));
  202. int len = strlen(*a) + 1;
  203. char * abuf = ALLOCATE_TOP(len);
  204. memcpy(abuf, *a, len);
  205. *t = abuf;
  206. }
  207. *((const char **) ALLOCATE_BOTTOM(sizeof(const char *))) = NULL;
  208. copy_envp:
  209. if (!envp)
  210. break;
  211. new_envp = stack_bottom;
  212. argv = envp;
  213. envp = NULL;
  214. }
  215. if (!new_envp)
  216. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  217. stack_bottom = (void *) ((unsigned long) stack_bottom & ~7UL);
  218. *((unsigned long *) ALLOCATE_TOP(sizeof(unsigned long))) = 0;
  219. if (nauxv) {
  220. elf_auxv_t * old_auxp = *auxpp;
  221. *auxpp = ALLOCATE_TOP(sizeof(elf_auxv_t) * nauxv);
  222. if (old_auxp)
  223. memcpy(*auxpp, old_auxp, nauxv * sizeof(elf_auxv_t));
  224. }
  225. memmove(stack_top - (stack_bottom - stack), stack, stack_bottom - stack);
  226. if (new_argv)
  227. *argvp = (void *) new_argv + (stack_top - stack_bottom);
  228. if (new_envp)
  229. *envpp = (void *) new_envp + (stack_top - stack_bottom);
  230. return 0;
  231. }
  232. unsigned long sys_stack_size = 0;
  233. int init_stack (const char ** argv, const char ** envp, const char *** argpp,
  234. int nauxv, elf_auxv_t ** auxpp)
  235. {
  236. if (!sys_stack_size) {
  237. sys_stack_size = DEFAULT_SYS_STACK_SIZE;
  238. if (root_config) {
  239. char stack_cfg[CONFIG_MAX];
  240. if (get_config(root_config, "sys.stack.size", stack_cfg,
  241. CONFIG_MAX) > 0)
  242. sys_stack_size = ALIGN_UP(atoi(stack_cfg));
  243. }
  244. }
  245. struct shim_thread * cur_thread = get_cur_thread();
  246. if (!cur_thread || cur_thread->stack)
  247. return 0;
  248. void * stack = allocate_stack(sys_stack_size, allocsize, true);
  249. if (!stack)
  250. return -ENOMEM;
  251. if (initial_envp)
  252. envp = initial_envp;
  253. int ret = populate_user_stack(stack, sys_stack_size,
  254. nauxv, auxpp, &argv, &envp);
  255. if (ret < 0)
  256. return ret;
  257. *argpp = argv;
  258. initial_envp = envp;
  259. cur_thread->stack_top = stack + sys_stack_size;
  260. cur_thread->stack = stack;
  261. cur_thread->stack_red = stack - allocsize;
  262. return 0;
  263. }
  264. int read_environs (const char ** envp)
  265. {
  266. for (const char ** e = envp ; *e ; e++) {
  267. switch ((*e)[0]) {
  268. case 'L': {
  269. if (!memcmp(*e, "LD_LIBRARY_PATH=", 16)) {
  270. int npaths = 0;
  271. for (const char * s = (*e) + 16 ; *s ; s++)
  272. if (*s == ':')
  273. npaths++;
  274. const char ** paths = malloc(sizeof(const char *) *
  275. (npaths + 1));
  276. if (!paths)
  277. return -ENOMEM;
  278. const char * s = (*e) + 16, * next;
  279. int cnt = 0;
  280. while (*s) {
  281. for (next = s ; *next && *next != ':' ; next++);
  282. int len = next - s;
  283. char * str = malloc(len + 1);
  284. if (!str)
  285. return -ENOMEM;
  286. memcpy(str, s, len);
  287. str[len] = 0;
  288. paths[cnt++] = str;
  289. s = *next ? next + 1 : next;
  290. }
  291. paths[cnt] = NULL;
  292. library_paths = paths;
  293. break;
  294. }
  295. break;
  296. }
  297. case 'I': {
  298. if (!memcmp(*e, "IN_GDB=1", 8)) {
  299. in_gdb = true;
  300. break;
  301. }
  302. break;
  303. }
  304. }
  305. }
  306. return 0;
  307. }
  308. struct config_store * root_config = NULL;
  309. static void * __malloc (int size)
  310. {
  311. return malloc(size);
  312. }
  313. static void __free (void * mem)
  314. {
  315. free(mem);
  316. }
  317. extern bool ask_for_checkpoint;
  318. int init_manifest (PAL_HANDLE manifest_handle)
  319. {
  320. PAL_STREAM_ATTR attr;
  321. if (!DkStreamAttributesQuerybyHandle(manifest_handle, &attr))
  322. return -PAL_ERRNO;
  323. size_t cfg_size = attr.size;
  324. void * cfg_addr = DkStreamMap(manifest_handle, NULL,
  325. PAL_PROT_READ|PAL_PROT_WRITECOPY, 0,
  326. ALIGN_UP(cfg_size));
  327. if (!cfg_addr)
  328. return -PAL_ERRNO;
  329. root_config = malloc(sizeof(struct config_store));
  330. root_config->raw_data = cfg_addr;
  331. root_config->raw_size = cfg_size;
  332. root_config->malloc = __malloc;
  333. root_config->free = __free;
  334. const char * errstring = "Unexpected error";
  335. int ret = 0;
  336. if ((ret = read_config(root_config, NULL, &errstring)) < 0) {
  337. root_config = NULL;
  338. sys_printf("Unable to read manifest file: %s\n", errstring);
  339. return ret;
  340. }
  341. char cfgbuf[CONFIG_MAX];
  342. if (get_config(root_config, "sys.ask_for_checkpoint", cfgbuf,
  343. CONFIG_MAX) > 0 &&
  344. cfgbuf[0] == '1' && !cfgbuf[1])
  345. ask_for_checkpoint = true;
  346. return 0;
  347. }
  348. #ifdef PROFILE
  349. struct shim_profile profile_root;
  350. #endif
  351. # define FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
  352. do { \
  353. void *_tmp = (cookie); \
  354. (argv) = _tmp; \
  355. _tmp += sizeof(char *) * ((argc) + 1); \
  356. (envp) = _tmp; \
  357. for ( ; *(char **) _tmp; _tmp += sizeof(char *)); \
  358. (auxp) = _tmp + sizeof(char *); \
  359. } while (0)
  360. static void * __process_auxv (elf_auxv_t * auxp)
  361. {
  362. elf_auxv_t * av;
  363. for (av = auxp; av->a_type != AT_NULL; av++)
  364. switch (av->a_type) {
  365. default: break;
  366. }
  367. return av + 1;
  368. }
  369. #define FIND_LAST_STACK(stack) \
  370. do { \
  371. /* check if exist a NULL end */ \
  372. assert(*(uint64_t *) stack == 0); \
  373. stack += sizeof(uint64_t); \
  374. } while (0)
  375. #ifdef PROFILE
  376. static void set_profile_enabled (const char ** envp)
  377. {
  378. const char ** p;
  379. for (p = envp ; (*p) ; p++)
  380. if (!memcmp(*p, "PROFILE_ENABLED=", 16))
  381. break;
  382. if (!(*p))
  383. return;
  384. for (int i = 0 ; i < N_PROFILE ; i++)
  385. PROFILES[i].disabled = true;
  386. const char * str = (*p) + 16;
  387. bool enabled = false;
  388. while (*str) {
  389. const char * next = str;
  390. for ( ; (*next) && (*next) != ',' ; next++);
  391. if (next > str) {
  392. int len = next - str;
  393. for (int i = 0 ; i < N_PROFILE ; i++) {
  394. struct shim_profile * profile = &PROFILES[i];
  395. if (!memcmp(profile->name, str, len) && !profile->name[len]) {
  396. profile->disabled = false;
  397. if (profile->type == CATAGORY)
  398. enabled = true;
  399. }
  400. }
  401. }
  402. str = (*next) ? next + 1 : next;
  403. }
  404. while (enabled) {
  405. enabled = false;
  406. for (int i = 0 ; i < N_PROFILE ; i++) {
  407. struct shim_profile * profile = &PROFILES[i];
  408. if (!profile->disabled || profile->root == &profile_)
  409. continue;
  410. if (!profile->root->disabled) {
  411. profile->disabled = false;
  412. if (profile->type == CATAGORY)
  413. enabled = true;
  414. }
  415. }
  416. }
  417. for (int i = 0 ; i < N_PROFILE ; i++) {
  418. struct shim_profile * profile = &PROFILES[i];
  419. if (profile->type == CATAGORY || profile->disabled)
  420. continue;
  421. for (profile = profile->root ;
  422. profile != &profile_ && profile->disabled ;
  423. profile = profile->root)
  424. profile->disabled = false;
  425. }
  426. }
  427. #endif
  428. DEFINE_PROFILE_CATAGORY(resume, );
  429. DEFINE_PROFILE_INTERVAL(child_created_in_new_process, resume);
  430. DEFINE_PROFILE_INTERVAL(child_receive_header, resume);
  431. DEFINE_PROFILE_INTERVAL(child_total_migration_time, resume);
  432. static int init_newproc (struct newproc_header * hdr)
  433. {
  434. int bytes = DkStreamRead(PAL_CB(parent_process), 0,
  435. sizeof(struct newproc_header), hdr,
  436. NULL, 0);
  437. if (!bytes)
  438. return -PAL_ERRNO;
  439. SAVE_PROFILE_INTERVAL_SINCE(child_receive_header, hdr->write_proc_time);
  440. return hdr->failure;
  441. }
  442. DEFINE_PROFILE_CATAGORY(init, );
  443. DEFINE_PROFILE_INTERVAL(init_signal, init);
  444. DEFINE_PROFILE_INTERVAL(init_heap, init);
  445. DEFINE_PROFILE_INTERVAL(init_slab, init);
  446. DEFINE_PROFILE_INTERVAL(init_str_mgr, init);
  447. DEFINE_PROFILE_INTERVAL(init_internal_map, init);
  448. DEFINE_PROFILE_INTERVAL(init_vma, init);
  449. DEFINE_PROFILE_INTERVAL(init_fs, init);
  450. DEFINE_PROFILE_INTERVAL(init_handle, init);
  451. DEFINE_PROFILE_INTERVAL(init_randgen, init);
  452. DEFINE_PROFILE_INTERVAL(read_from_checkpoint, init);
  453. DEFINE_PROFILE_INTERVAL(read_from_file, init);
  454. DEFINE_PROFILE_INTERVAL(init_newproc, init);
  455. DEFINE_PROFILE_INTERVAL(init_checkpoint, init);
  456. DEFINE_PROFILE_INTERVAL(init_mount_root, init);
  457. DEFINE_PROFILE_INTERVAL(restore_from_checkpoint, init);
  458. DEFINE_PROFILE_INTERVAL(restore_from_file, init);
  459. DEFINE_PROFILE_INTERVAL(restore_from_stack, init);
  460. DEFINE_PROFILE_INTERVAL(init_manifest, init);
  461. DEFINE_PROFILE_INTERVAL(init_ipc, init);
  462. DEFINE_PROFILE_INTERVAL(init_thread, init);
  463. DEFINE_PROFILE_INTERVAL(init_important_handles, init);
  464. DEFINE_PROFILE_INTERVAL(init_mount, init);
  465. DEFINE_PROFILE_INTERVAL(init_async, init);
  466. DEFINE_PROFILE_INTERVAL(init_stack, init);
  467. DEFINE_PROFILE_INTERVAL(read_environs, init);
  468. DEFINE_PROFILE_INTERVAL(init_loader, init);
  469. DEFINE_PROFILE_INTERVAL(init_ipc_helper, init);
  470. #define CALL_INIT(func, args ...) func(args)
  471. #define RUN_INIT(func, ...) \
  472. do { \
  473. int _err = CALL_INIT(func, ##__VA_ARGS__); \
  474. if (_err < 0) { \
  475. sys_printf("shim initialization failed in " #func " (%e)", \
  476. -_err); \
  477. shim_terminate(); \
  478. } \
  479. SAVE_PROFILE_INTERVAL(func); \
  480. } while (0)
  481. extern PAL_HANDLE thread_start_event;
  482. int shim_init (int argc, void * args, void ** return_stack)
  483. {
  484. debug_handle = PAL_CB(debug_stream);
  485. /* create the initial TCB, shim can not be run without a tcb */
  486. __libc_tcb_t tcb;
  487. memset(&tcb, 0, sizeof(__libc_tcb_t));
  488. allocate_tls(&tcb, NULL);
  489. debug_setbuf(&tcb.shim_tcb, true);
  490. #ifdef PROFILE
  491. unsigned long begin_time = GET_PROFILE_INTERVAL();
  492. #endif
  493. DkSetExceptionHandler(&handle_failure, PAL_EVENT_FAILURE, 0);
  494. allocsize = PAL_CB(alloc_align);
  495. allocshift = allocsize - 1;
  496. allocmask = ~allocshift;
  497. create_lock(__master_lock);
  498. const char ** argv, ** envp, ** argp = NULL;
  499. elf_auxv_t * auxp;
  500. /* call to figure out where the arguments are */
  501. FIND_ARG_COMPONENTS(args, argc, argv, envp, auxp);
  502. initial_stack = __process_auxv(auxp);
  503. int nauxv = (elf_auxv_t *) initial_stack - auxp;
  504. FIND_LAST_STACK(initial_stack);
  505. #ifdef PROFILE
  506. set_profile_enabled(envp);
  507. #endif
  508. struct newproc_header hdr;
  509. void * cpaddr = NULL;
  510. #ifdef PROFILE
  511. unsigned long begin_create_time = 0;
  512. #endif
  513. BEGIN_PROFILE_INTERVAL();
  514. RUN_INIT(init_signal);
  515. RUN_INIT(init_heap);
  516. RUN_INIT(init_slab);
  517. RUN_INIT(init_str_mgr);
  518. RUN_INIT(init_internal_map);
  519. RUN_INIT(init_vma);
  520. RUN_INIT(init_fs);
  521. RUN_INIT(init_handle);
  522. RUN_INIT(init_randgen);
  523. debug("shim loaded at %p, ready to initialize\n", &__load_address);
  524. if (argc && argv[0][0] == '-') {
  525. if (!memcmp(argv[0], "-resume", 8) && argc >= 2) {
  526. const char * filename = *(argv + 1);
  527. argc -= 2;
  528. argv += 2;
  529. RUN_INIT(init_mount_root);
  530. RUN_INIT(restore_from_checkpoint, filename, &hdr.checkpoint,
  531. &cpaddr);
  532. goto restore;
  533. }
  534. if (!memcmp(argv[0], "-resume-file", 13) && argc >= 2) {
  535. const char * filename = *(argv + 1);
  536. argc -= 2;
  537. argv += 2;
  538. RUN_INIT(init_mount_root);
  539. RUN_INIT(restore_from_file, filename, &hdr.checkpoint, &cpaddr);
  540. goto restore;
  541. }
  542. }
  543. if (PAL_CB(parent_process)) {
  544. RUN_INIT(init_newproc, &hdr);
  545. SAVE_PROFILE_INTERVAL_SET(child_created_in_new_process,
  546. hdr.create_time, begin_time);
  547. #ifdef PROFILE
  548. begin_create_time = hdr.begin_create_time;
  549. #endif
  550. if (hdr.checkpoint.data.cpsize)
  551. RUN_INIT(init_checkpoint, &hdr.checkpoint, &cpaddr);
  552. }
  553. if (cpaddr) {
  554. restore:
  555. thread_start_event = DkNotificationEventCreate(0);
  556. RUN_INIT(restore_from_stack, cpaddr, &hdr.checkpoint.data, 0);
  557. }
  558. if (PAL_CB(manifest_handle))
  559. RUN_INIT(init_manifest, PAL_CB(manifest_handle));
  560. RUN_INIT(init_mount_root);
  561. RUN_INIT(init_ipc);
  562. RUN_INIT(init_thread);
  563. RUN_INIT(init_important_handles);
  564. RUN_INIT(init_mount);
  565. RUN_INIT(init_async);
  566. RUN_INIT(init_stack, argv, envp, &argp, nauxv, &auxp);
  567. RUN_INIT(read_environs, envp);
  568. RUN_INIT(init_loader);
  569. RUN_INIT(init_ipc_helper);
  570. debug("shim process initialized\n");
  571. #ifdef PROFILE
  572. if (begin_create_time)
  573. SAVE_PROFILE_INTERVAL_SINCE(child_total_migration_time,
  574. begin_create_time);
  575. #endif
  576. if (thread_start_event)
  577. DkEventSet(thread_start_event);
  578. shim_tcb_t * cur_tcb = SHIM_GET_TLS();
  579. struct shim_thread * cur_thread = (struct shim_thread *) cur_tcb->tp;
  580. if (cur_tcb->context.sp)
  581. restore_context(&cur_tcb->context);
  582. if (cur_thread->exec)
  583. execute_elf_object(cur_thread->exec,
  584. argc, argp, nauxv, auxp);
  585. *return_stack = initial_stack;
  586. return 0;
  587. }
  588. static int create_unique (int (*mkname) (char *, size_t, void *),
  589. int (*create) (const char *, void *),
  590. int (*output) (char *, size_t, const void *,
  591. struct shim_qstr *),
  592. char * name, size_t size, void * id, void * obj,
  593. struct shim_qstr * qstr)
  594. {
  595. int ret, len;
  596. while (1) {
  597. len = mkname(name, size, id);
  598. if (len < 0)
  599. return len;
  600. if ((ret = create(name, obj)) < 0)
  601. return ret;
  602. if (ret)
  603. continue;
  604. if (output)
  605. return output(name, size, id, qstr);
  606. if (qstr)
  607. qstrsetstr(qstr, name, len);
  608. return len;
  609. }
  610. }
  611. static int name_pipe (char * uri, size_t size, void * id)
  612. {
  613. IDTYPE pipeid;
  614. int len;
  615. if (getrand(&pipeid, sizeof(IDTYPE)) < sizeof(IDTYPE))
  616. return -EACCES;
  617. if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) == size)
  618. return -ERANGE;
  619. *((IDTYPE *) id) = pipeid;
  620. return len;
  621. }
  622. static int open_pipe (const char * uri, void * obj)
  623. {
  624. PAL_HANDLE pipe = DkStreamOpen(uri, 0, 0, 0, 0);
  625. if (!pipe)
  626. return PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST ? 1 :
  627. -PAL_ERRNO;
  628. if (obj)
  629. *((PAL_HANDLE *) obj) = pipe;
  630. else
  631. DkObjectClose(pipe);
  632. return 0;
  633. }
  634. static int pipe_addr (char * uri, size_t size, const void * id,
  635. struct shim_qstr * qstr)
  636. {
  637. IDTYPE pipeid = *((IDTYPE *) id);
  638. int len;
  639. if ((len = snprintf(uri, size, "pipe:%u", pipeid)) == size)
  640. return -ERANGE;
  641. if (qstr)
  642. qstrsetstr(qstr, uri, len);
  643. return len;
  644. }
  645. int create_pipe (IDTYPE * id, char * uri, size_t size, PAL_HANDLE * hdl,
  646. struct shim_qstr * qstr)
  647. {
  648. IDTYPE pipeid;
  649. int ret = create_unique(&name_pipe, &open_pipe, &pipe_addr,
  650. uri, size, &pipeid, hdl, qstr);
  651. if (ret > 0 && id)
  652. *id = pipeid;
  653. return ret;
  654. }
  655. static int name_path (char * path, size_t size, void * id)
  656. {
  657. unsigned int suffix;
  658. int prefix_len = strlen(path);
  659. int len;
  660. if (getrand(&suffix, sizeof(unsigned int)) < sizeof(unsigned int))
  661. return -EACCES;
  662. len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
  663. if (len == size)
  664. return -ERANGE;
  665. *((unsigned int *) id) = suffix;
  666. return prefix_len + len;
  667. }
  668. static int open_dir (const char * path, void * obj)
  669. {
  670. struct shim_handle * dir = NULL;
  671. if (obj) {
  672. dir = get_new_handle();
  673. if (!dir)
  674. return -ENOMEM;
  675. }
  676. int ret = open_namei(dir, NULL, path, O_CREAT|O_EXCL|O_DIRECTORY, 0700,
  677. NULL);
  678. if (ret < 0)
  679. return ret = -EEXIST ? 1 : ret;
  680. if (obj)
  681. *((struct shim_handle **) obj) = dir;
  682. return 0;
  683. }
  684. static int open_file (const char * path, void * obj)
  685. {
  686. struct shim_handle * file = NULL;
  687. if (obj) {
  688. file = get_new_handle();
  689. if (!file)
  690. return -ENOMEM;
  691. }
  692. int ret = open_namei(file, NULL, path, O_CREAT|O_EXCL|O_RDWR, 0600,
  693. NULL);
  694. if (ret < 0)
  695. return ret = -EEXIST ? 1 : ret;
  696. if (obj)
  697. *((struct shim_handle **) obj) = file;
  698. return 0;
  699. }
  700. static int open_pal_handle (const char * uri, void * obj)
  701. {
  702. PAL_HANDLE hdl;
  703. if (!memcmp(uri, "dir:", 4))
  704. hdl = DkStreamOpen(uri, 0,
  705. PAL_SHARE_OWNER_X|PAL_SHARE_OWNER_W|
  706. PAL_SHARE_OWNER_R,
  707. PAL_CREAT_TRY|PAL_CREAT_ALWAYS,
  708. 0);
  709. else
  710. hdl = DkStreamOpen(uri, PAL_ACCESS_RDWR,
  711. PAL_SHARE_OWNER_W|PAL_SHARE_OWNER_R,
  712. PAL_CREAT_TRY|PAL_CREAT_ALWAYS,
  713. 0);
  714. if (!hdl) {
  715. if (PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST)
  716. return 0;
  717. else
  718. return -PAL_ERRNO;
  719. }
  720. if (obj)
  721. *((PAL_HANDLE *) obj) = hdl;
  722. return 0;
  723. }
  724. static int output_path (char * path, size_t size, const void * id,
  725. struct shim_qstr * qstr)
  726. {
  727. int len = strlen(path);
  728. if (qstr)
  729. qstrsetstr(qstr, path, len);
  730. return len;
  731. }
  732. int create_dir (const char * prefix, char * path, size_t size,
  733. struct shim_handle ** hdl)
  734. {
  735. unsigned int suffix;
  736. if (prefix) {
  737. int len = strlen(prefix);
  738. if (len >= size)
  739. return -ERANGE;
  740. memcpy(path, prefix, len + 1);
  741. }
  742. return create_unique(&name_path, &open_dir, &output_path, path, size,
  743. &suffix, hdl, NULL);
  744. }
  745. int create_file (const char * prefix, char * path, size_t size,
  746. struct shim_handle ** hdl)
  747. {
  748. unsigned int suffix;
  749. if (prefix) {
  750. int len = strlen(prefix);
  751. if (len >= size)
  752. return -ERANGE;
  753. memcpy(path, prefix, len + 1);
  754. }
  755. return create_unique(&name_path, &open_file, &output_path, path, size,
  756. &suffix, hdl, NULL);
  757. }
  758. int create_handle (const char * prefix, char * uri, size_t size,
  759. PAL_HANDLE * hdl, unsigned int * id)
  760. {
  761. unsigned int suffix;
  762. if (prefix) {
  763. int len = strlen(prefix);
  764. if (len >= size)
  765. return -ERANGE;
  766. memcpy(uri, prefix, len + 1);
  767. }
  768. return create_unique(&name_path, &open_pal_handle, &output_path, uri, size,
  769. id ? : &suffix, hdl, NULL);
  770. }
  771. void check_stack_hook (void)
  772. {
  773. struct shim_thread * cur_thread = get_cur_thread();
  774. void * rsp;
  775. asm volatile ("movq %%rsp, %0" : "=r"(rsp) :: "memory");
  776. if (rsp <= cur_thread->stack_top && rsp > cur_thread->stack) {
  777. if (rsp - cur_thread->stack < PAL_CB(pagesize))
  778. sys_printf("*** stack is almost drained (RSP = %p, stack = %p-%p) ***\n",
  779. rsp, cur_thread->stack, cur_thread->stack_top);
  780. } else {
  781. sys_printf("*** context dismateched with thread stack (RSP = %p, stack = %p-%p) ***\n",
  782. rsp, cur_thread->stack, cur_thread->stack_top);
  783. }
  784. }
  785. #ifdef PROFILE
  786. static void print_profile_result (PAL_HANDLE hdl, struct shim_profile * root,
  787. int level)
  788. {
  789. unsigned long total_interval_time = 0;
  790. unsigned long total_interval_count = 0;
  791. for (int i = 0 ; i < N_PROFILE ; i++) {
  792. struct shim_profile * profile = &PROFILES[i];
  793. if (profile->root != root || profile->disabled)
  794. continue;
  795. switch (profile->type) {
  796. case OCCURENCE: {
  797. unsigned int count =
  798. atomic_read(&profile->val.occurence.count);
  799. if (count) {
  800. for (int j = 0 ; j < level ; j++)
  801. __sys_fprintf(hdl, " ");
  802. __sys_fprintf(hdl, "- %s: %u times\n", profile->name, count);
  803. }
  804. break;
  805. }
  806. case INTERVAL: {
  807. unsigned int count =
  808. atomic_read(&profile->val.interval.count);
  809. if (count) {
  810. unsigned long time =
  811. atomic_read(&profile->val.interval.time);
  812. unsigned long ind_time = time / count;
  813. total_interval_time += time;
  814. total_interval_count += count;
  815. for (int j = 0 ; j < level ; j++)
  816. __sys_fprintf(hdl, " ");
  817. __sys_fprintf(hdl, "- (%11.11lu) %s: %u times, %lu msec\n",
  818. time, profile->name, count, ind_time);
  819. }
  820. break;
  821. }
  822. case CATAGORY:
  823. for (int j = 0 ; j < level ; j++)
  824. __sys_fprintf(hdl, " ");
  825. __sys_fprintf(hdl, "- %s:\n", profile->name);
  826. print_profile_result(hdl, profile, level + 1);
  827. break;
  828. }
  829. }
  830. if (total_interval_count) {
  831. __sys_fprintf(hdl, "- (%11.11u) total: %u times, %lu msec\n",
  832. total_interval_time, total_interval_count,
  833. total_interval_time / total_interval_count);
  834. }
  835. }
  836. #endif /* PROFILE */
  837. static struct shim_atomic in_terminate = { .counter = 0, };
  838. int shim_terminate (void)
  839. {
  840. debug("teminating the whole process\n");
  841. /* do last clean-up of the process */
  842. shim_clean();
  843. DkProcessExit(0);
  844. return 0;
  845. }
  846. int shim_clean (void)
  847. {
  848. /* preventing multiple cleanup, this is mostly caused by
  849. assertion in shim_clean */
  850. atomic_inc(&in_terminate);
  851. if (atomic_read(&in_terminate) > 1)
  852. return 0;
  853. store_all_msg_persist();
  854. #ifdef PROFILE
  855. if (ENTER_TIME) {
  856. switch (SHIM_GET_TLS()->context.syscall_nr) {
  857. case __NR_exit_group:
  858. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit_group, ENTER_TIME);
  859. break;
  860. case __NR_exit:
  861. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit, ENTER_TIME);
  862. break;
  863. }
  864. }
  865. if (ipc_cld_profile_send()) {
  866. master_lock();
  867. PAL_HANDLE hdl = __open_shim_stdio();
  868. if (hdl) {
  869. __sys_fprintf(hdl, "******************************\n");
  870. __sys_fprintf(hdl, "profiling:\n");
  871. print_profile_result(hdl, &profile_root, 0);
  872. __sys_fprintf(hdl, "******************************\n");
  873. }
  874. master_unlock();
  875. }
  876. #endif
  877. del_all_ipc_ports(0);
  878. if (shim_stdio && shim_stdio != (PAL_HANDLE) -1)
  879. DkObjectClose(shim_stdio);
  880. shim_stdio = NULL;
  881. debug("process %u successfully terminated\n", cur_process.vmid);
  882. master_lock();
  883. DkProcessExit(cur_process.exit_code);
  884. return 0;
  885. }
  886. int message_confirm (const char * message, const char * options)
  887. {
  888. char answer;
  889. int noptions = strlen(options);
  890. char * option_str = __alloca(noptions * 2 + 3), * str = option_str;
  891. int ret = 0;
  892. *(str++) = ' ';
  893. *(str++) = '[';
  894. for (int i = 0 ; i < noptions ; i++) {
  895. *(str++) = options[i];
  896. *(str++) = '/';
  897. }
  898. str--;
  899. *(str++) = ']';
  900. *(str++) = ' ';
  901. master_lock();
  902. PAL_HANDLE hdl = __open_shim_stdio();
  903. if (!hdl) {
  904. master_unlock();
  905. return -EACCES;
  906. }
  907. #define WRITE(buf, len) \
  908. ({ int _ret = DkStreamWrite(hdl, 0, len, buf, NULL); \
  909. _ret ? : -PAL_ERRNO; })
  910. #define READ(buf, len) \
  911. ({ int _ret = DkStreamRead(hdl, 0, len, buf, NULL, 0); \
  912. _ret ? : -PAL_ERRNO; })
  913. if ((ret = WRITE(message, strlen(message))) < 0)
  914. goto out;
  915. if ((ret = WRITE(option_str, noptions * 2 + 3)) < 0)
  916. goto out;
  917. if ((ret = READ(&answer, 1)) < 0)
  918. goto out;
  919. out:
  920. master_unlock();
  921. return (ret < 0) ? ret : answer;
  922. }