shim_init.c 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_init.c
  17. *
  18. * This file contains entry and exit functions of library OS.
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_tls.h>
  22. #include <shim_thread.h>
  23. #include <shim_handle.h>
  24. #include <shim_vma.h>
  25. #include <shim_checkpoint.h>
  26. #include <shim_fs.h>
  27. #include <shim_ipc.h>
  28. #include <shim_profile.h>
  29. #include <pal.h>
  30. #include <pal_debug.h>
  31. #include <pal_error.h>
  32. #include <sys/mman.h>
  33. #include <asm/unistd.h>
  34. #include <asm/fcntl.h>
  35. unsigned long allocsize;
  36. unsigned long allocshift;
  37. unsigned long allocmask;
  38. /* The following constants will help matching glibc version with compatible
  39. SHIM libraries */
  40. #include "glibc-version.h"
  41. const unsigned int glibc_version = GLIBC_VERSION;
  42. static void handle_failure (PAL_PTR event, PAL_NUM arg, PAL_CONTEXT * context)
  43. {
  44. SHIM_GET_TLS()->pal_errno = (arg <= PAL_ERROR_BOUND) ? arg : 0;
  45. }
  46. void __abort(void) {
  47. pause();
  48. shim_terminate();
  49. }
  50. void warn (const char *format, ...)
  51. {
  52. va_list args;
  53. va_start (args, format);
  54. __sys_vprintf(format, &args);
  55. va_end (args);
  56. }
  57. void __stack_chk_fail (void)
  58. {
  59. }
  60. static int pal_errno_to_unix_errno [PAL_ERROR_BOUND + 1] = {
  61. /* reserved */ 0,
  62. /* PAL_ERROR_NOTIMPLEMENTED */ ENOSYS,
  63. /* PAL_ERROR_NOTDEFINED */ ENOSYS,
  64. /* PAL_ERROR_NOTSUPPORT */ EACCES,
  65. /* PAL_ERROR_INVAL */ EINVAL,
  66. /* PAL_ERROR_TOOLONG */ ENAMETOOLONG,
  67. /* PAL_ERROR_DENIED */ EACCES,
  68. /* PAL_ERROR_BADHANDLE */ EFAULT,
  69. /* PAL_ERROR_STREAMEXIST */ EEXIST,
  70. /* PAL_ERROR_STREAMNOTEXIST */ ENOENT,
  71. /* PAL_ERROR_STREAMISFILE */ ENOTDIR,
  72. /* PAL_ERROR_STREAMISDIR */ EISDIR,
  73. /* PAL_ERROR_STREAMISDEVICE */ ESPIPE,
  74. /* PAL_ERROR_INTERRUPTED */ EINTR,
  75. /* PAL_ERROR_OVERFLOW */ EFAULT,
  76. /* PAL_ERROR_BADADDR */ EFAULT,
  77. /* PAL_ERROR_NOMEM */ ENOMEM,
  78. /* PAL_ERROR_NOTKILLABLE */ EACCES,
  79. /* PAL_ERROR_INCONSIST */ EFAULT,
  80. /* PAL_ERROR_TRYAGAIN */ EAGAIN,
  81. /* PAL_ERROR_ENDOFSTREAM */ 0,
  82. /* PAL_ERROR_NOTSERVER */ EINVAL,
  83. /* PAL_ERROR_NOTCONNECTION */ ENOTCONN,
  84. /* PAL_ERROR_ZEROSIZE */ 0,
  85. /* PAL_ERROR_CONNFAILED */ ECONNRESET,
  86. /* PAL_ERROR_ADDRNOTEXIST */ EADDRNOTAVAIL,
  87. };
  88. long convert_pal_errno (long err)
  89. {
  90. return (err >= 0 && err <= PAL_ERROR_BOUND) ?
  91. pal_errno_to_unix_errno[err] : 0;
  92. }
  93. unsigned long parse_int (const char * str)
  94. {
  95. unsigned long num = 0;
  96. int radix = 10;
  97. char c;
  98. if (str[0] == '0') {
  99. str++;
  100. radix = 8;
  101. if (str[0] == 'x') {
  102. str++;
  103. radix = 16;
  104. }
  105. }
  106. while ((c = *(str++))) {
  107. int val;
  108. if (c >= 'A' && c <= 'F')
  109. val = c - 'A' + 10;
  110. else if (c >= 'a' && c <= 'f')
  111. val = c - 'a' + 10;
  112. else if (c >= '0' && c <= '9')
  113. val = c - '0';
  114. else
  115. break;
  116. if (val >= radix)
  117. break;
  118. num = num * radix + val;
  119. }
  120. if (c == 'G' || c == 'g')
  121. num *= 1024 * 1024 * 1024;
  122. else if (c == 'M' || c == 'm')
  123. num *= 1024 * 1024;
  124. else if (c == 'K' || c == 'k')
  125. num *= 1024;
  126. return num;
  127. }
  128. long int glibc_option (const char * opt)
  129. {
  130. char cfg[CONFIG_MAX];
  131. if (strcmp_static(opt, "heap_size")) {
  132. ssize_t ret = get_config(root_config, "glibc.heap_size", cfg, CONFIG_MAX);
  133. if (ret <= 0) {
  134. debug("no glibc option: %s (err=%d)\n", opt, ret);
  135. return -ENOENT;
  136. }
  137. long int heap_size = parse_int(cfg);
  138. debug("glibc option: heap_size = %ld\n", heap_size);
  139. return (long int) heap_size;
  140. }
  141. return -EINVAL;
  142. }
  143. void * migrated_memory_start;
  144. void * migrated_memory_end;
  145. void * migrated_shim_addr;
  146. void * initial_stack;
  147. const char ** initial_envp __attribute_migratable;
  148. const char ** library_paths;
  149. LOCKTYPE __master_lock;
  150. bool lock_enabled;
  151. void init_tcb (shim_tcb_t * tcb)
  152. {
  153. tcb->canary = SHIM_TLS_CANARY;
  154. tcb->self = tcb;
  155. }
  156. void copy_tcb (shim_tcb_t * new_tcb, const shim_tcb_t * old_tcb)
  157. {
  158. memset(new_tcb, 0, sizeof(shim_tcb_t));
  159. new_tcb->canary = SHIM_TLS_CANARY;
  160. new_tcb->self = new_tcb;
  161. new_tcb->tp = old_tcb->tp;
  162. memcpy(&new_tcb->context, &old_tcb->context, sizeof(struct shim_context));
  163. new_tcb->tid = old_tcb->tid;
  164. new_tcb->debug_buf = old_tcb->debug_buf;
  165. }
  166. /* This function is used to allocate tls before interpreter start running */
  167. void allocate_tls (void * tcb_location, bool user, struct shim_thread * thread)
  168. {
  169. __libc_tcb_t * tcb = tcb_location;
  170. assert(tcb);
  171. tcb->tcb = tcb;
  172. init_tcb(&tcb->shim_tcb);
  173. if (thread) {
  174. thread->tcb = tcb;
  175. thread->user_tcb = user;
  176. tcb->shim_tcb.tp = thread;
  177. tcb->shim_tcb.tid = thread->tid;
  178. } else {
  179. tcb->shim_tcb.tp = NULL;
  180. tcb->shim_tcb.tid = 0;
  181. }
  182. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  183. assert(SHIM_TLS_CHECK_CANARY());
  184. }
  185. void populate_tls (void * tcb_location, bool user)
  186. {
  187. __libc_tcb_t * tcb = (__libc_tcb_t *) tcb_location;
  188. assert(tcb);
  189. tcb->tcb = tcb;
  190. copy_tcb(&tcb->shim_tcb, SHIM_GET_TLS());
  191. struct shim_thread * thread = (struct shim_thread *) tcb->shim_tcb.tp;
  192. if (thread) {
  193. thread->tcb = tcb;
  194. thread->user_tcb = user;
  195. }
  196. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  197. assert(SHIM_TLS_CHECK_CANARY());
  198. }
  199. DEFINE_PROFILE_OCCURENCE(alloc_stack, memory);
  200. DEFINE_PROFILE_OCCURENCE(alloc_stack_count, memory);
  201. #define STACK_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
  202. void * allocate_stack (size_t size, size_t protect_size, bool user)
  203. {
  204. size = ALIGN_UP(size);
  205. protect_size = ALIGN_UP(protect_size);
  206. /* preserve a non-readable, non-writeable page below the user
  207. stack to stop user program to clobber other vmas */
  208. void * stack = user ?
  209. get_unmapped_vma(size + protect_size, STACK_FLAGS) :
  210. NULL;
  211. if (user)
  212. stack = (void *) DkVirtualMemoryAlloc(stack, size + protect_size,
  213. 0, PAL_PROT_READ|PAL_PROT_WRITE);
  214. else
  215. stack = system_malloc(size + protect_size);
  216. if (!stack)
  217. return NULL;
  218. ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
  219. INC_PROFILE_OCCURENCE(alloc_stack_count);
  220. if (protect_size &&
  221. !DkVirtualMemoryProtect(stack, protect_size, PAL_PROT_NONE))
  222. return NULL;
  223. stack += protect_size;
  224. if (user) {
  225. if (bkeep_mmap(stack, size, PROT_READ|PROT_WRITE,
  226. STACK_FLAGS, NULL, 0, "stack") < 0)
  227. return NULL;
  228. if (protect_size &&
  229. bkeep_mmap(stack - protect_size, protect_size, 0,
  230. STACK_FLAGS, NULL, 0, NULL) < 0)
  231. return NULL;
  232. }
  233. debug("allocated stack at %p (size = %d)\n", stack, size);
  234. return stack;
  235. }
  236. int populate_user_stack (void * stack, size_t stack_size,
  237. int nauxv, elf_auxv_t ** auxpp,
  238. const char *** argvp, const char *** envpp)
  239. {
  240. const char ** argv = *argvp, ** envp = *envpp;
  241. const char ** new_argv = NULL, ** new_envp = NULL;
  242. void * stack_bottom = stack;
  243. void * stack_top = stack + stack_size;
  244. #define ALLOCATE_TOP(size) \
  245. ({ if ((stack_top -= (size)) < stack_bottom) return -ENOMEM; \
  246. stack_top; })
  247. #define ALLOCATE_BOTTOM(size) \
  248. ({ if ((stack_bottom += (size)) > stack_top) return -ENOMEM; \
  249. stack_bottom - (size); })
  250. if (!argv) {
  251. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  252. goto copy_envp;
  253. }
  254. new_argv = stack_bottom;
  255. while (argv) {
  256. for (const char ** a = argv ; *a ; a++) {
  257. const char ** t = ALLOCATE_BOTTOM(sizeof(const char *));
  258. int len = strlen(*a) + 1;
  259. char * abuf = ALLOCATE_TOP(len);
  260. memcpy(abuf, *a, len);
  261. *t = abuf;
  262. }
  263. *((const char **) ALLOCATE_BOTTOM(sizeof(const char *))) = NULL;
  264. copy_envp:
  265. if (!envp)
  266. break;
  267. new_envp = stack_bottom;
  268. argv = envp;
  269. envp = NULL;
  270. }
  271. if (!new_envp)
  272. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  273. stack_bottom = (void *) ((unsigned long) stack_bottom & ~7UL);
  274. *((unsigned long *) ALLOCATE_TOP(sizeof(unsigned long))) = 0;
  275. if (nauxv) {
  276. elf_auxv_t * old_auxp = *auxpp;
  277. *auxpp = ALLOCATE_TOP(sizeof(elf_auxv_t) * nauxv);
  278. if (old_auxp)
  279. memcpy(*auxpp, old_auxp, nauxv * sizeof(elf_auxv_t));
  280. }
  281. memmove(stack_top - (stack_bottom - stack), stack, stack_bottom - stack);
  282. if (new_argv)
  283. *argvp = (void *) new_argv + (stack_top - stack_bottom);
  284. if (new_envp)
  285. *envpp = (void *) new_envp + (stack_top - stack_bottom);
  286. return 0;
  287. }
  288. unsigned long sys_stack_size = 0;
  289. int init_stack (const char ** argv, const char ** envp, const char *** argpp,
  290. int nauxv, elf_auxv_t ** auxpp)
  291. {
  292. if (!sys_stack_size) {
  293. sys_stack_size = DEFAULT_SYS_STACK_SIZE;
  294. if (root_config) {
  295. char stack_cfg[CONFIG_MAX];
  296. if (get_config(root_config, "sys.stack.size", stack_cfg,
  297. CONFIG_MAX) > 0)
  298. sys_stack_size = ALIGN_UP(parse_int(stack_cfg));
  299. }
  300. }
  301. struct shim_thread * cur_thread = get_cur_thread();
  302. if (!cur_thread || cur_thread->stack)
  303. return 0;
  304. void * stack = allocate_stack(sys_stack_size, allocsize, true);
  305. if (!stack)
  306. return -ENOMEM;
  307. if (initial_envp)
  308. envp = initial_envp;
  309. int ret = populate_user_stack(stack, sys_stack_size,
  310. nauxv, auxpp, &argv, &envp);
  311. if (ret < 0)
  312. return ret;
  313. *argpp = argv;
  314. initial_envp = envp;
  315. cur_thread->stack_top = stack + sys_stack_size;
  316. cur_thread->stack = stack;
  317. cur_thread->stack_red = stack - allocsize;
  318. return 0;
  319. }
  320. int read_environs (const char ** envp)
  321. {
  322. for (const char ** e = envp ; *e ; e++) {
  323. switch ((*e)[0]) {
  324. case 'L': {
  325. if (strpartcmp_static(*e, "LD_LIBRARY_PATH=")) {
  326. const char * s = *e + static_strlen("LD_LIBRARY_PATH=");
  327. int npaths = 0;
  328. for (const char * tmp = s ; *tmp ; tmp++)
  329. if (*tmp == ':')
  330. npaths++;
  331. const char ** paths = malloc(sizeof(const char *) *
  332. (npaths + 1));
  333. if (!paths)
  334. return -ENOMEM;
  335. int cnt = 0;
  336. while (*s) {
  337. const char * next;
  338. for (next = s ; *next && *next != ':' ; next++);
  339. int len = next - s;
  340. char * str = malloc(len + 1);
  341. if (!str)
  342. return -ENOMEM;
  343. memcpy(str, s, len);
  344. str[len] = 0;
  345. paths[cnt++] = str;
  346. s = *next ? next + 1 : next;
  347. }
  348. paths[cnt] = NULL;
  349. library_paths = paths;
  350. break;
  351. }
  352. break;
  353. }
  354. }
  355. }
  356. return 0;
  357. }
  358. struct config_store * root_config = NULL;
  359. static void * __malloc (size_t size)
  360. {
  361. return malloc(size);
  362. }
  363. static void __free (void * mem)
  364. {
  365. free(mem);
  366. }
  367. int init_manifest (PAL_HANDLE manifest_handle)
  368. {
  369. void * addr;
  370. unsigned int size;
  371. if (PAL_CB(manifest_preload.start)) {
  372. addr = PAL_CB(manifest_preload.start);
  373. size = PAL_CB(manifest_preload.end) - PAL_CB(manifest_preload.start);
  374. } else {
  375. PAL_STREAM_ATTR attr;
  376. if (!DkStreamAttributesQuerybyHandle(manifest_handle, &attr))
  377. return -PAL_ERRNO;
  378. size = attr.pending_size;
  379. addr = (void *) DkStreamMap(manifest_handle, NULL,
  380. PAL_PROT_READ, 0,
  381. ALIGN_UP(size));
  382. if (!addr)
  383. return -PAL_ERRNO;
  384. }
  385. bkeep_mmap(addr, ALIGN_UP(size), PROT_READ,
  386. MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL, NULL, 0,
  387. "manifest");
  388. root_config = malloc(sizeof(struct config_store));
  389. root_config->raw_data = addr;
  390. root_config->raw_size = size;
  391. root_config->malloc = __malloc;
  392. root_config->free = __free;
  393. const char * errstring = "Unexpected error";
  394. int ret = 0;
  395. if ((ret = read_config(root_config, NULL, &errstring)) < 0) {
  396. root_config = NULL;
  397. sys_printf("Unable to read manifest file: %s\n", errstring);
  398. return ret;
  399. }
  400. return 0;
  401. }
  402. #ifdef PROFILE
  403. struct shim_profile profile_root;
  404. #endif
  405. # define FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
  406. do { \
  407. void *_tmp = (cookie); \
  408. (argv) = _tmp; \
  409. _tmp += sizeof(char *) * ((argc) + 1); \
  410. (envp) = _tmp; \
  411. for ( ; *(char **) _tmp; _tmp += sizeof(char *)); \
  412. (auxp) = _tmp + sizeof(char *); \
  413. } while (0)
  414. static void * __process_auxv (elf_auxv_t * auxp)
  415. {
  416. elf_auxv_t * av;
  417. for (av = auxp; av->a_type != AT_NULL; av++)
  418. switch (av->a_type) {
  419. default: break;
  420. }
  421. return av + 1;
  422. }
  423. #define FIND_LAST_STACK(stack) \
  424. do { \
  425. /* check if exist a NULL end */ \
  426. assert(*(uint64_t *) stack == 0); \
  427. stack += sizeof(uint64_t); \
  428. } while (0)
  429. #ifdef PROFILE
  430. static void set_profile_enabled (const char ** envp)
  431. {
  432. const char ** p;
  433. for (p = envp ; (*p) ; p++)
  434. if (strpartcmp_static(*p, "PROFILE_ENABLED="))
  435. break;
  436. if (!(*p))
  437. return;
  438. for (int i = 0 ; i < N_PROFILE ; i++)
  439. PROFILES[i].disabled = true;
  440. const char * str = (*p) + 16;
  441. bool enabled = false;
  442. while (*str) {
  443. const char * next = str;
  444. for ( ; (*next) && (*next) != ',' ; next++);
  445. if (next > str) {
  446. int len = next - str;
  447. for (int i = 0 ; i < N_PROFILE ; i++) {
  448. struct shim_profile * profile = &PROFILES[i];
  449. if (!memcmp(profile->name, str, len) && !profile->name[len]) {
  450. profile->disabled = false;
  451. if (profile->type == CATAGORY)
  452. enabled = true;
  453. }
  454. }
  455. }
  456. str = (*next) ? next + 1 : next;
  457. }
  458. while (enabled) {
  459. enabled = false;
  460. for (int i = 0 ; i < N_PROFILE ; i++) {
  461. struct shim_profile * profile = &PROFILES[i];
  462. if (!profile->disabled || profile->root == &profile_)
  463. continue;
  464. if (!profile->root->disabled) {
  465. profile->disabled = false;
  466. if (profile->type == CATAGORY)
  467. enabled = true;
  468. }
  469. }
  470. }
  471. for (int i = 0 ; i < N_PROFILE ; i++) {
  472. struct shim_profile * profile = &PROFILES[i];
  473. if (profile->type == CATAGORY || profile->disabled)
  474. continue;
  475. for (profile = profile->root ;
  476. profile != &profile_ && profile->disabled ;
  477. profile = profile->root)
  478. profile->disabled = false;
  479. }
  480. }
  481. #endif
  482. static int init_newproc (struct newproc_header * hdr)
  483. {
  484. BEGIN_PROFILE_INTERVAL();
  485. int bytes = DkStreamRead(PAL_CB(parent_process), 0,
  486. sizeof(struct newproc_header), hdr,
  487. NULL, 0);
  488. if (!bytes)
  489. return -PAL_ERRNO;
  490. SAVE_PROFILE_INTERVAL(child_wait_header);
  491. SAVE_PROFILE_INTERVAL_SINCE(child_receive_header, hdr->write_proc_time);
  492. return hdr->failure;
  493. }
  494. DEFINE_PROFILE_CATAGORY(pal, );
  495. DEFINE_PROFILE_INTERVAL(pal_startup_time, pal);
  496. DEFINE_PROFILE_INTERVAL(pal_host_specific_startup_time, pal);
  497. DEFINE_PROFILE_INTERVAL(pal_relocation_time, pal);
  498. DEFINE_PROFILE_INTERVAL(pal_linking_time, pal);
  499. DEFINE_PROFILE_INTERVAL(pal_manifest_loading_time, pal);
  500. DEFINE_PROFILE_INTERVAL(pal_allocation_time, pal);
  501. DEFINE_PROFILE_INTERVAL(pal_tail_startup_time, pal);
  502. DEFINE_PROFILE_INTERVAL(pal_child_creation_time, pal);
  503. DEFINE_PROFILE_CATAGORY(init, );
  504. DEFINE_PROFILE_INTERVAL(init_randgen, init);
  505. DEFINE_PROFILE_INTERVAL(init_heap, init);
  506. DEFINE_PROFILE_INTERVAL(init_slab, init);
  507. DEFINE_PROFILE_INTERVAL(init_str_mgr, init);
  508. DEFINE_PROFILE_INTERVAL(init_internal_map, init);
  509. DEFINE_PROFILE_INTERVAL(init_vma, init);
  510. DEFINE_PROFILE_INTERVAL(init_fs, init);
  511. DEFINE_PROFILE_INTERVAL(init_dcache, init);
  512. DEFINE_PROFILE_INTERVAL(init_handle, init);
  513. DEFINE_PROFILE_INTERVAL(read_from_checkpoint, init);
  514. DEFINE_PROFILE_INTERVAL(read_from_file, init);
  515. DEFINE_PROFILE_INTERVAL(init_newproc, init);
  516. DEFINE_PROFILE_INTERVAL(init_mount_root, init);
  517. DEFINE_PROFILE_INTERVAL(init_from_checkpoint_file, init);
  518. DEFINE_PROFILE_INTERVAL(restore_from_file, init);
  519. DEFINE_PROFILE_INTERVAL(init_manifest, init);
  520. DEFINE_PROFILE_INTERVAL(init_ipc, init);
  521. DEFINE_PROFILE_INTERVAL(init_thread, init);
  522. DEFINE_PROFILE_INTERVAL(init_important_handles, init);
  523. DEFINE_PROFILE_INTERVAL(init_mount, init);
  524. DEFINE_PROFILE_INTERVAL(init_async, init);
  525. DEFINE_PROFILE_INTERVAL(init_stack, init);
  526. DEFINE_PROFILE_INTERVAL(read_environs, init);
  527. DEFINE_PROFILE_INTERVAL(init_loader, init);
  528. DEFINE_PROFILE_INTERVAL(init_ipc_helper, init);
  529. DEFINE_PROFILE_INTERVAL(init_signal, init);
  530. #define CALL_INIT(func, args ...) func(args)
  531. #define RUN_INIT(func, ...) \
  532. do { \
  533. int _err = CALL_INIT(func, ##__VA_ARGS__); \
  534. if (_err < 0) { \
  535. debug("initialization failed in " #func " (%d)\n", _err); \
  536. shim_terminate(); \
  537. } \
  538. SAVE_PROFILE_INTERVAL(func); \
  539. } while (0)
  540. extern PAL_HANDLE thread_start_event;
  541. int shim_init (int argc, void * args, void ** return_stack)
  542. {
  543. debug_handle = PAL_CB(debug_stream);
  544. cur_process.vmid = (IDTYPE) PAL_CB(process_id);
  545. /* create the initial TCB, shim can not be run without a tcb */
  546. __libc_tcb_t tcb;
  547. memset(&tcb, 0, sizeof(__libc_tcb_t));
  548. allocate_tls(&tcb, false, NULL);
  549. debug_setbuf(&tcb.shim_tcb, true);
  550. debug("set tcb to %p\n", &tcb);
  551. #ifdef PROFILE
  552. unsigned long begin_time = GET_PROFILE_INTERVAL();
  553. #endif
  554. debug("host: %s\n", PAL_CB(host_type));
  555. DkSetExceptionHandler(&handle_failure, PAL_EVENT_FAILURE, 0);
  556. allocsize = PAL_CB(alloc_align);
  557. allocshift = allocsize - 1;
  558. allocmask = ~allocshift;
  559. create_lock(__master_lock);
  560. const char ** argv, ** envp, ** argp = NULL;
  561. elf_auxv_t * auxp;
  562. /* call to figure out where the arguments are */
  563. FIND_ARG_COMPONENTS(args, argc, argv, envp, auxp);
  564. initial_stack = __process_auxv(auxp);
  565. int nauxv = (elf_auxv_t *) initial_stack - auxp;
  566. FIND_LAST_STACK(initial_stack);
  567. #ifdef PROFILE
  568. set_profile_enabled(envp);
  569. #endif
  570. struct newproc_header hdr;
  571. void * cpaddr = NULL;
  572. #ifdef PROFILE
  573. unsigned long begin_create_time = 0;
  574. #endif
  575. BEGIN_PROFILE_INTERVAL();
  576. RUN_INIT(init_randgen);
  577. RUN_INIT(init_heap);
  578. RUN_INIT(init_slab);
  579. RUN_INIT(read_environs, envp);
  580. RUN_INIT(init_str_mgr);
  581. RUN_INIT(init_internal_map);
  582. RUN_INIT(init_vma);
  583. RUN_INIT(init_fs);
  584. RUN_INIT(init_dcache);
  585. RUN_INIT(init_handle);
  586. debug("shim loaded at %p, ready to initialize\n", &__load_address);
  587. if (argc && argv[0][0] == '-') {
  588. if (strcmp_static(argv[0], "-resume") && argc >= 2) {
  589. const char * filename = *(argv + 1);
  590. argc -= 2;
  591. argv += 2;
  592. RUN_INIT(init_mount_root);
  593. RUN_INIT(init_from_checkpoint_file, filename, &hdr.checkpoint,
  594. &cpaddr);
  595. goto restore;
  596. }
  597. }
  598. if (PAL_CB(parent_process)) {
  599. RUN_INIT(init_newproc, &hdr);
  600. SAVE_PROFILE_INTERVAL_SET(child_created_in_new_process,
  601. hdr.create_time, begin_time);
  602. #ifdef PROFILE
  603. begin_create_time = hdr.begin_create_time;
  604. #endif
  605. if (hdr.checkpoint.hdr.size)
  606. RUN_INIT(do_migration, &hdr.checkpoint, &cpaddr);
  607. }
  608. if (cpaddr) {
  609. restore:
  610. thread_start_event = DkNotificationEventCreate(PAL_FALSE);
  611. RUN_INIT(restore_checkpoint,
  612. &hdr.checkpoint.hdr, &hdr.checkpoint.mem,
  613. (ptr_t) cpaddr, 0);
  614. }
  615. if (PAL_CB(manifest_handle))
  616. RUN_INIT(init_manifest, PAL_CB(manifest_handle));
  617. RUN_INIT(init_mount_root);
  618. RUN_INIT(init_ipc);
  619. RUN_INIT(init_thread);
  620. RUN_INIT(init_mount);
  621. RUN_INIT(init_important_handles);
  622. RUN_INIT(init_async);
  623. RUN_INIT(init_stack, argv, envp, &argp, nauxv, &auxp);
  624. RUN_INIT(init_loader);
  625. RUN_INIT(init_ipc_helper);
  626. RUN_INIT(init_signal);
  627. debug("shim process initialized\n");
  628. #ifdef PROFILE
  629. if (begin_create_time)
  630. SAVE_PROFILE_INTERVAL_SINCE(child_total_migration_time,
  631. begin_create_time);
  632. #endif
  633. SAVE_PROFILE_INTERVAL_SET(pal_startup_time, 0, pal_control.startup_time);
  634. SAVE_PROFILE_INTERVAL_SET(pal_host_specific_startup_time, 0,
  635. pal_control.host_specific_startup_time);
  636. SAVE_PROFILE_INTERVAL_SET(pal_relocation_time, 0,
  637. pal_control.relocation_time);
  638. SAVE_PROFILE_INTERVAL_SET(pal_linking_time, 0, pal_control.linking_time);
  639. SAVE_PROFILE_INTERVAL_SET(pal_manifest_loading_time, 0,
  640. pal_control.manifest_loading_time);
  641. SAVE_PROFILE_INTERVAL_SET(pal_allocation_time, 0,
  642. pal_control.allocation_time);
  643. SAVE_PROFILE_INTERVAL_SET(pal_tail_startup_time, 0,
  644. pal_control.tail_startup_time);
  645. SAVE_PROFILE_INTERVAL_SET(pal_child_creation_time, 0,
  646. pal_control.child_creation_time);
  647. if (thread_start_event)
  648. DkEventSet(thread_start_event);
  649. shim_tcb_t * cur_tcb = SHIM_GET_TLS();
  650. struct shim_thread * cur_thread = (struct shim_thread *) cur_tcb->tp;
  651. if (cur_tcb->context.sp)
  652. restore_context(&cur_tcb->context);
  653. if (cur_thread->exec)
  654. execute_elf_object(cur_thread->exec,
  655. argc, argp, nauxv, auxp);
  656. *return_stack = initial_stack;
  657. return 0;
  658. }
  659. static int create_unique (int (*mkname) (char *, size_t, void *),
  660. int (*create) (const char *, void *),
  661. int (*output) (char *, size_t, const void *,
  662. struct shim_qstr *),
  663. char * name, size_t size, void * id, void * obj,
  664. struct shim_qstr * qstr)
  665. {
  666. int ret, len;
  667. while (1) {
  668. len = mkname(name, size, id);
  669. if (len < 0)
  670. return len;
  671. if ((ret = create(name, obj)) < 0)
  672. return ret;
  673. if (ret)
  674. continue;
  675. if (output)
  676. return output(name, size, id, qstr);
  677. if (qstr)
  678. qstrsetstr(qstr, name, len);
  679. return len;
  680. }
  681. }
  682. static int name_pipe (char * uri, size_t size, void * id)
  683. {
  684. IDTYPE pipeid;
  685. int len;
  686. if (getrand(&pipeid, sizeof(IDTYPE)) < sizeof(IDTYPE))
  687. return -EACCES;
  688. debug("creating pipe: pipe.srv:%u\n", pipeid);
  689. if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) == size)
  690. return -ERANGE;
  691. *((IDTYPE *) id) = pipeid;
  692. return len;
  693. }
  694. static int open_pipe (const char * uri, void * obj)
  695. {
  696. PAL_HANDLE pipe = DkStreamOpen(uri, 0, 0, 0, 0);
  697. if (!pipe)
  698. return PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST ? 1 :
  699. -PAL_ERRNO;
  700. if (obj)
  701. *((PAL_HANDLE *) obj) = pipe;
  702. else
  703. DkObjectClose(pipe);
  704. return 0;
  705. }
  706. static int pipe_addr (char * uri, size_t size, const void * id,
  707. struct shim_qstr * qstr)
  708. {
  709. IDTYPE pipeid = *((IDTYPE *) id);
  710. int len;
  711. if ((len = snprintf(uri, size, "pipe:%u", pipeid)) == size)
  712. return -ERANGE;
  713. if (qstr)
  714. qstrsetstr(qstr, uri, len);
  715. return len;
  716. }
  717. int create_pipe (IDTYPE * id, char * uri, size_t size, PAL_HANDLE * hdl,
  718. struct shim_qstr * qstr)
  719. {
  720. IDTYPE pipeid;
  721. int ret = create_unique(&name_pipe, &open_pipe, &pipe_addr,
  722. uri, size, &pipeid, hdl, qstr);
  723. if (ret > 0 && id)
  724. *id = pipeid;
  725. return ret;
  726. }
  727. static int name_path (char * path, size_t size, void * id)
  728. {
  729. unsigned int suffix;
  730. int prefix_len = strlen(path);
  731. int len;
  732. if (getrand(&suffix, sizeof(unsigned int)) < sizeof(unsigned int))
  733. return -EACCES;
  734. len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
  735. if (len == size)
  736. return -ERANGE;
  737. *((unsigned int *) id) = suffix;
  738. return prefix_len + len;
  739. }
  740. static int open_dir (const char * path, void * obj)
  741. {
  742. struct shim_handle * dir = NULL;
  743. if (obj) {
  744. dir = get_new_handle();
  745. if (!dir)
  746. return -ENOMEM;
  747. }
  748. int ret = open_namei(dir, NULL, path, O_CREAT|O_EXCL|O_DIRECTORY, 0700,
  749. NULL);
  750. if (ret < 0)
  751. return ret = -EEXIST ? 1 : ret;
  752. if (obj)
  753. *((struct shim_handle **) obj) = dir;
  754. return 0;
  755. }
  756. static int open_file (const char * path, void * obj)
  757. {
  758. struct shim_handle * file = NULL;
  759. if (obj) {
  760. file = get_new_handle();
  761. if (!file)
  762. return -ENOMEM;
  763. }
  764. int ret = open_namei(file, NULL, path, O_CREAT|O_EXCL|O_RDWR, 0600,
  765. NULL);
  766. if (ret < 0)
  767. return ret = -EEXIST ? 1 : ret;
  768. if (obj)
  769. *((struct shim_handle **) obj) = file;
  770. return 0;
  771. }
  772. static int open_pal_handle (const char * uri, void * obj)
  773. {
  774. PAL_HANDLE hdl;
  775. if (strpartcmp_static(uri, "dev:"))
  776. hdl = DkStreamOpen(uri, 0,
  777. PAL_SHARE_OWNER_X|PAL_SHARE_OWNER_W|
  778. PAL_SHARE_OWNER_R,
  779. PAL_CREAT_TRY|PAL_CREAT_ALWAYS,
  780. 0);
  781. else
  782. hdl = DkStreamOpen(uri, PAL_ACCESS_RDWR,
  783. PAL_SHARE_OWNER_W|PAL_SHARE_OWNER_R,
  784. PAL_CREAT_TRY|PAL_CREAT_ALWAYS,
  785. 0);
  786. if (!hdl) {
  787. if (PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST)
  788. return 0;
  789. else
  790. return -PAL_ERRNO;
  791. }
  792. if (obj) {
  793. *((PAL_HANDLE *) obj) = hdl;
  794. } else {
  795. DkObjectClose(hdl);
  796. }
  797. return 0;
  798. }
  799. static int output_path (char * path, size_t size, const void * id,
  800. struct shim_qstr * qstr)
  801. {
  802. int len = strlen(path);
  803. if (qstr)
  804. qstrsetstr(qstr, path, len);
  805. return len;
  806. }
  807. int create_dir (const char * prefix, char * path, size_t size,
  808. struct shim_handle ** hdl)
  809. {
  810. unsigned int suffix;
  811. if (prefix) {
  812. int len = strlen(prefix);
  813. if (len >= size)
  814. return -ERANGE;
  815. memcpy(path, prefix, len + 1);
  816. }
  817. return create_unique(&name_path, &open_dir, &output_path, path, size,
  818. &suffix, hdl, NULL);
  819. }
  820. int create_file (const char * prefix, char * path, size_t size,
  821. struct shim_handle ** hdl)
  822. {
  823. unsigned int suffix;
  824. if (prefix) {
  825. int len = strlen(prefix);
  826. if (len >= size)
  827. return -ERANGE;
  828. memcpy(path, prefix, len + 1);
  829. }
  830. return create_unique(&name_path, &open_file, &output_path, path, size,
  831. &suffix, hdl, NULL);
  832. }
  833. int create_handle (const char * prefix, char * uri, size_t size,
  834. PAL_HANDLE * hdl, unsigned int * id)
  835. {
  836. unsigned int suffix;
  837. if (prefix) {
  838. int len = strlen(prefix);
  839. if (len >= size)
  840. return -ERANGE;
  841. memcpy(uri, prefix, len + 1);
  842. }
  843. return create_unique(&name_path, &open_pal_handle, &output_path, uri, size,
  844. id ? : &suffix, hdl, NULL);
  845. }
  846. void check_stack_hook (void)
  847. {
  848. struct shim_thread * cur_thread = get_cur_thread();
  849. void * rsp;
  850. asm volatile ("movq %%rsp, %0" : "=r"(rsp) :: "memory");
  851. if (rsp <= cur_thread->stack_top && rsp > cur_thread->stack) {
  852. if (rsp - cur_thread->stack < PAL_CB(pagesize))
  853. sys_printf("*** stack is almost drained (RSP = %p, stack = %p-%p) ***\n",
  854. rsp, cur_thread->stack, cur_thread->stack_top);
  855. } else {
  856. sys_printf("*** context dismatched with thread stack (RSP = %p, stack = %p-%p) ***\n",
  857. rsp, cur_thread->stack, cur_thread->stack_top);
  858. }
  859. }
  860. #ifdef PROFILE
  861. static void print_profile_result (PAL_HANDLE hdl, struct shim_profile * root,
  862. int level)
  863. {
  864. unsigned long total_interval_time = 0;
  865. unsigned long total_interval_count = 0;
  866. for (int i = 0 ; i < N_PROFILE ; i++) {
  867. struct shim_profile * profile = &PROFILES[i];
  868. if (profile->root != root || profile->disabled)
  869. continue;
  870. switch (profile->type) {
  871. case OCCURENCE: {
  872. unsigned int count =
  873. atomic_read(&profile->val.occurence.count);
  874. if (count) {
  875. for (int j = 0 ; j < level ; j++)
  876. __sys_fprintf(hdl, " ");
  877. __sys_fprintf(hdl, "- %s: %u times\n", profile->name, count);
  878. }
  879. break;
  880. }
  881. case INTERVAL: {
  882. unsigned int count =
  883. atomic_read(&profile->val.interval.count);
  884. if (count) {
  885. unsigned long time =
  886. atomic_read(&profile->val.interval.time);
  887. unsigned long ind_time = time / count;
  888. total_interval_time += time;
  889. total_interval_count += count;
  890. for (int j = 0 ; j < level ; j++)
  891. __sys_fprintf(hdl, " ");
  892. __sys_fprintf(hdl, "- (%11.11lu) %s: %u times, %lu msec\n",
  893. time, profile->name, count, ind_time);
  894. }
  895. break;
  896. }
  897. case CATAGORY:
  898. for (int j = 0 ; j < level ; j++)
  899. __sys_fprintf(hdl, " ");
  900. __sys_fprintf(hdl, "- %s:\n", profile->name);
  901. print_profile_result(hdl, profile, level + 1);
  902. break;
  903. }
  904. }
  905. if (total_interval_count) {
  906. __sys_fprintf(hdl, " - (%11.11u) total: %u times, %lu msec\n",
  907. total_interval_time, total_interval_count,
  908. total_interval_time / total_interval_count);
  909. }
  910. }
  911. #endif /* PROFILE */
  912. static struct atomic_int in_terminate = { .counter = 0, };
  913. int shim_terminate (void)
  914. {
  915. debug("teminating the whole process\n");
  916. /* do last clean-up of the process */
  917. shim_clean();
  918. DkProcessExit(0);
  919. return 0;
  920. }
  921. int shim_clean (void)
  922. {
  923. /* preventing multiple cleanup, this is mostly caused by
  924. assertion in shim_clean */
  925. atomic_inc(&in_terminate);
  926. if (atomic_read(&in_terminate) > 1)
  927. return 0;
  928. store_all_msg_persist();
  929. #ifdef PROFILE
  930. if (ENTER_TIME) {
  931. switch (SHIM_GET_TLS()->context.syscall_nr) {
  932. case __NR_exit_group:
  933. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit_group, ENTER_TIME);
  934. break;
  935. case __NR_exit:
  936. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit, ENTER_TIME);
  937. break;
  938. }
  939. }
  940. if (ipc_cld_profile_send()) {
  941. master_lock();
  942. PAL_HANDLE hdl = __open_shim_stdio();
  943. if (hdl) {
  944. __sys_fprintf(hdl, "******************************\n");
  945. __sys_fprintf(hdl, "profiling:\n");
  946. print_profile_result(hdl, &profile_root, 0);
  947. __sys_fprintf(hdl, "******************************\n");
  948. }
  949. master_unlock();
  950. DkObjectClose(hdl);
  951. }
  952. #endif
  953. del_all_ipc_ports(0);
  954. if (shim_stdio && shim_stdio != (PAL_HANDLE) -1)
  955. DkObjectClose(shim_stdio);
  956. shim_stdio = NULL;
  957. debug("process %u successfully terminated\n", cur_process.vmid);
  958. master_lock();
  959. DkProcessExit(cur_process.exit_code);
  960. return 0;
  961. }
  962. int message_confirm (const char * message, const char * options)
  963. {
  964. char answer;
  965. int noptions = strlen(options);
  966. char * option_str = __alloca(noptions * 2 + 3), * str = option_str;
  967. int ret = 0;
  968. *(str++) = ' ';
  969. *(str++) = '[';
  970. for (int i = 0 ; i < noptions ; i++) {
  971. *(str++) = options[i];
  972. *(str++) = '/';
  973. }
  974. str--;
  975. *(str++) = ']';
  976. *(str++) = ' ';
  977. master_lock();
  978. PAL_HANDLE hdl = __open_shim_stdio();
  979. if (!hdl) {
  980. master_unlock();
  981. return -EACCES;
  982. }
  983. #define WRITE(buf, len) \
  984. ({ int _ret = DkStreamWrite(hdl, 0, len, (void *) buf, NULL); \
  985. _ret ? : -PAL_ERRNO; })
  986. #define READ(buf, len) \
  987. ({ int _ret = DkStreamRead(hdl, 0, len, buf, NULL, 0); \
  988. _ret ? : -PAL_ERRNO; })
  989. if ((ret = WRITE(message, strlen(message))) < 0)
  990. goto out;
  991. if ((ret = WRITE(option_str, noptions * 2 + 3)) < 0)
  992. goto out;
  993. if ((ret = READ(&answer, 1)) < 0)
  994. goto out;
  995. out:
  996. DkObjectClose(hdl);
  997. master_unlock();
  998. return (ret < 0) ? ret : answer;
  999. }